feat: data statictic n dataset division
This commit is contained in:
parent
212a1d6e38
commit
a2e7b94789
115
dane.ipynb
115
dane.ipynb
|
@ -2,18 +2,19 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from IPython.display import display,Markdown"
|
||||
"from IPython.display import display,Markdown\n",
|
||||
"from sklearn.model_selection import train_test_split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -27,7 +28,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -47,7 +48,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -330,7 +331,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -448,7 +449,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -479,11 +480,107 @@
|
|||
"source": [
|
||||
"display(dataset.info())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### STD"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"y_train std: 0.49939397301167954\n",
|
||||
"y_val std: 0.4997839588710888\n",
|
||||
"y_test std: 0.4998194469400359\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### MEAN"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"y_train mean: 0.475249178684782\n",
|
||||
"y_val mean: 0.4835189309576837\n",
|
||||
"y_test mean: 0.4846325167037862\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### Count"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"y_train count: 35918\n",
|
||||
"y_val count: 4490\n",
|
||||
"y_test count: 4490\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# creating train, val , test datasets dataset 8:1:1\n",
|
||||
"X_train, X_val_test, y_train, y_valtest = train_test_split(dataset[\"text\"],dataset[\"Value\"],test_size=0.2, shuffle=True)\n",
|
||||
"X_test, X_val, y_test, y_val = train_test_split(X_val_test,y_valtest,test_size=0.5, shuffle=True)\n",
|
||||
"display(Markdown(\"### STD\"))\n",
|
||||
"print(f\"y_train std: {y_train.std()}\")\n",
|
||||
"print(f\"y_val std: {y_val.std()}\")\n",
|
||||
"print(f\"y_test std: {y_test.std()}\")\n",
|
||||
"\n",
|
||||
"display(Markdown(\"### MEAN\"))\n",
|
||||
"print(f\"y_train mean: {y_train.mean()}\")\n",
|
||||
"print(f\"y_val mean: {y_val.mean()}\")\n",
|
||||
"print(f\"y_test mean: {y_test.mean()}\")\n",
|
||||
"\n",
|
||||
"display(Markdown(\"### Count\"))\n",
|
||||
"print(f\"y_train count: {y_train.count()}\")\n",
|
||||
"print(f\"y_val count: {y_val.count()}\")\n",
|
||||
"print(f\"y_test count: {y_test.count()}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "deep",
|
||||
"display_name": "dl",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
@ -502,7 +599,7 @@
|
|||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "1e61067c2f2e27a88e433eed08bcab15943261b719f4667f6d0d352911f3557f"
|
||||
"hash": "6e9239598a6712340c2b580c5c929949b8a813e86738fb7cf0a67c11d0863b74"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue