378 lines
145 KiB
Plaintext
378 lines
145 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "9102b9a5-ded2-43f5-8c10-b5ca14e150a1",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
" ### Importowanie bibliotek"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "e28d63b8-3d68-443f-a478-2047240f1e83",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import opendatasets as od\n",
|
||
|
"import numpy as np\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"from sklearn import preprocessing\n",
|
||
|
"import chardet"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "87a21ef6-e939-4e70-9c97-27250e75041c",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Pobieranie zbioru danych"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"id": "5d30b742-1c8a-4020-a578-de7eff3b532e",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Skipping, found downloaded files in \".\\lettuce-growth-days\" (use force=True to force download)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"od.download('https://www.kaggle.com/datasets/jjayfabor/lettuce-growth-days')\n",
|
||
|
"#{\"username\":\"jakubbg\",\"key\":\"e42b293c818e4ecd7b9365ee037af428\"}"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "dddc9962-111c-4157-8a50-911a43644642",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Czytanie zbioru danych z pliku csv"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"id": "76e4acad-eb3c-467e-8794-ef168eee9764",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" Plant_ID Date Temperature (�C) Humidity (%) TDS Value (ppm) \\\n",
|
||
|
"0 1 8/3/2023 33.4 53 582 \n",
|
||
|
"1 1 8/4/2023 33.5 53 451 \n",
|
||
|
"2 1 8/5/2023 33.4 59 678 \n",
|
||
|
"3 1 8/6/2023 33.4 68 420 \n",
|
||
|
"4 1 8/7/2023 33.4 74 637 \n",
|
||
|
"\n",
|
||
|
" pH Level Growth Days Temperature (F) Humidity \n",
|
||
|
"0 6.4 1 92.12 0.53 \n",
|
||
|
"1 6.1 2 92.30 0.53 \n",
|
||
|
"2 6.4 3 92.12 0.59 \n",
|
||
|
"3 6.4 4 92.12 0.68 \n",
|
||
|
"4 6.5 5 92.12 0.74 \n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"with open('./lettuce-growth-days/lettuce_dataset_updated.csv', 'rb') as f:\n",
|
||
|
" result = chardet.detect(f.read())\n",
|
||
|
"\n",
|
||
|
"dataset = pd.read_csv('./lettuce-growth-days/lettuce_dataset_updated.csv', encoding=result['encoding'])\n",
|
||
|
"length = len(dataset)\n",
|
||
|
"\n",
|
||
|
"print(dataset.head())"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "7d2d136d-518f-499b-8e7c-47a69cca30b6",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Wyświetlenie informacji o zbiorze danych"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"id": "012e41dc-ff15-4608-8758-08554f386c83",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
|
"RangeIndex: 3169 entries, 0 to 3168\n",
|
||
|
"Data columns (total 9 columns):\n",
|
||
|
" # Column Non-Null Count Dtype \n",
|
||
|
"--- ------ -------------- ----- \n",
|
||
|
" 0 Plant_ID 3169 non-null int64 \n",
|
||
|
" 1 Date 3169 non-null object \n",
|
||
|
" 2 Temperature (�C) 3169 non-null float64\n",
|
||
|
" 3 Humidity (%) 3169 non-null int64 \n",
|
||
|
" 4 TDS Value (ppm) 3169 non-null int64 \n",
|
||
|
" 5 pH Level 3169 non-null float64\n",
|
||
|
" 6 Growth Days 3169 non-null int64 \n",
|
||
|
" 7 Temperature (F) 3169 non-null float64\n",
|
||
|
" 8 Humidity 3169 non-null float64\n",
|
||
|
"dtypes: float64(4), int64(4), object(1)\n",
|
||
|
"memory usage: 222.9+ KB\n",
|
||
|
"None\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(dataset.info())"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "b5c92a1b-9841-4d76-b07b-26cbf4ea9eea",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Sprawdzenie czy występują puste wiersze"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "48ec1cba-81c4-4d8b-8905-7870e18ecd24",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Plant_ID 0\n",
|
||
|
"Date 0\n",
|
||
|
"Temperature (�C) 0\n",
|
||
|
"Humidity (%) 0\n",
|
||
|
"TDS Value (ppm) 0\n",
|
||
|
"pH Level 0\n",
|
||
|
"Growth Days 0\n",
|
||
|
"Temperature (F) 0\n",
|
||
|
"Humidity 0\n",
|
||
|
"dtype: int64\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(dataset.isnull().sum())"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "1c10d470-8da4-4fe7-bfce-1f9f25913894",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Statystyki zbioru"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"id": "9eaed893-22cb-457e-bc22-5a4ad7b872d6",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" Plant_ID Temperature (�C) Humidity (%) TDS Value (ppm) \\\n",
|
||
|
"count 3169.000000 3169.000000 3169.000000 3169.000000 \n",
|
||
|
"mean 35.441780 28.142222 64.873462 598.045440 \n",
|
||
|
"std 20.243433 4.670521 8.988985 115.713047 \n",
|
||
|
"min 1.000000 18.000000 50.000000 400.000000 \n",
|
||
|
"25% 18.000000 23.600000 57.000000 498.000000 \n",
|
||
|
"50% 35.000000 30.200000 65.000000 593.000000 \n",
|
||
|
"75% 53.000000 31.500000 73.000000 699.000000 \n",
|
||
|
"max 70.000000 33.500000 80.000000 800.000000 \n",
|
||
|
"\n",
|
||
|
" pH Level Growth Days Temperature (F) Humidity \n",
|
||
|
"count 3169.000000 3169.000000 3169.000000 3169.000000 \n",
|
||
|
"mean 6.399211 23.141054 82.655999 0.648735 \n",
|
||
|
"std 0.234418 13.077107 8.406938 0.089890 \n",
|
||
|
"min 6.000000 1.000000 64.400000 0.500000 \n",
|
||
|
"25% 6.200000 12.000000 74.480000 0.570000 \n",
|
||
|
"50% 6.400000 23.000000 86.360000 0.650000 \n",
|
||
|
"75% 6.600000 34.000000 88.700000 0.730000 \n",
|
||
|
"max 6.800000 48.000000 92.300000 0.800000 \n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(dataset.describe())"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "0485737a-f8bc-4c39-88d9-62ba23b4de72",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Rozkład wartości poszczególnych parametrów"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"id": "d89d3e2d-dad9-446f-bd8e-615c5cad3ffc",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAPdCAYAAACuupAFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1gUVxcH4N9sZekdRKqI2LsxxF6xG0vsCgY1MfaSRE3RaKKJxh5L8iWBWKLGEjX2hr1FY4kVQRSRJlKXsvV+fyAbl13assCC532efZa5c2fmzLKzZ+bOnRmOMcZACCGEEJPDq+wACCGEEKIfJWlCCCHERFGSJoQQQkwUJWlCCCHERFGSJoQQQkwUJWlCCCHERFGSJoQQQkwUJWlCCCHERFGSJoQQQkwUJWlishYsWACO4ypkWR07dkTHjh01w6dPnwbHcdi1a1eFLD84OBje3t4VsixDSaVSjBs3Dq6uruA4DtOnT6/skAxS0s/6yZMn4DgOYWFhmrKK/E4CwB9//AF7e3tIpVKjzO/IkSOwtLTEixcvjDI/Uv4oSZMKERYWBo7jNC8zMzO4ubkhMDAQa9asQWZmplGWExcXhwULFuDmzZtGmZ8xmXJsJbF48WKEhYVh4sSJ2Lx5M0aPHl1oXW9vb/Tp00fvuIreASpvixcvxt69e40+X5VKhfnz52PKlCmwtLTUlP/444/w8fGBvb09Ro8ejYyMDK3p1Go1mjVrhsWLF+vMs0ePHqhduzaWLFli9HhJOWGEVIDQ0FAGgC1cuJBt3ryZ/frrr2zx4sWse/fujOM45uXlxW7duqU1jUKhYDk5OaVazt9//80AsNDQ0FJNJ5PJmEwm0wyHh4czAGznzp2lmo+hscnlcpabm2u0ZZWH1q1bszZt2pSorpeXF+vdu7feceXx2ZZGST/r6Ohonf+Xvu+khYUFCwoKMnKUjP3555+M4zgWGxurKTt37hzjOI5NmzaNrV69mrm6urIJEyZoTbdx40bm4+NT6DquX7+emZubs4yMDKPHTIyPjqRJherZsydGjRqFsWPHYu7cuTh69ChOnDiBpKQk9OvXDzk5OZq6AoEAZmZm5RpPdnY2AEAkEkEkEpXrsooiFAohFosrbfklkZSUBFtb28oOo8zK8llXxHcyX2hoKNq0aYOaNWtqyg4cOICOHTti1apVmDp1KpYsWYL9+/drxqelpeHzzz/H999/X+g6Dho0CDKZDDt37iz3dSBlR0maVLrOnTvjiy++wNOnT7FlyxZNub7zf8ePH0fbtm1ha2sLS0tL+Pv7Y968eQDymlFbtWoFABg7dqymaT3/nGLHjh3RsGFDXL9+He3bt4e5ublm2oLnpPOpVCrMmzcPrq6usLCwQL9+/fDs2TOtOt7e3ggODtaZ9vV5FhebvvOkWVlZmDVrFjw8PCAWi+Hv74/vv/8erMCD6ziOw+TJk7F37140bNgQYrEYDRo0wJEjR/R/4AUkJSUhJCQELi4uMDMzQ5MmTfDbb79pxuc3T0dHR+PgwYOa2J88eVKi+ZdEYeeJ9X0H8td3586dqF+/PiQSCQICAvDvv/8CyGsOrl27NszMzNCxY0edOPUtKy0tDcHBwbCxsYGtrS2CgoKQlpZWbDwcxyErKwu//fab5nMJDg5GeHg4OI7Dn3/+qTOP33//HRzH4dKlS4V+Hrm5uThy5Ai6du2qVZ6TkwM7OzvNsL29vWZHMz++Ro0aYeDAgYXO29nZGY0bN8a+ffsKrUNMh6CyAyAEAEaPHo158+bh2LFjGD9+vN46d+/eRZ8+fdC4cWMsXLgQYrEYkZGRuHDhAgCgXr16WLhwIb788ktMmDAB7dq1AwC88847mnm8fPkSPXv2xLBhwzBq1Ci4uLgUGdc333wDjuPw6aefIikpCatWrULXrl1x8+ZNSCSSEq9fSWJ7HWMM/fr1Q3h4OEJCQtC0aVMcPXoUH3/8MZ4/f46VK1dq1T9//jz27NmDjz76CFZWVlizZg0GDRqEmJgYODg4FBpXTk4OOnbsiMjISEyePBk+Pj7YuXMngoODkZaWhmnTpqFevXrYvHkzZsyYAXd3d8yaNQsA4OTkVOQ6KxQKJCcn65Snp6cXOV1JnDt3Dvv378ekSZMAAEuWLEGfPn3wySefYP369fjoo4+QmpqKpUuX4v3338epU6cKnRdjDP3798f58+fx4Ycfol69evjzzz8RFBRUbBybN2/GuHHj8NZbb2HChAkAAF9fX7z99tvw8PDA1q1bMWDAAK1ptm7dCl9fXwQEBBQ63+vXr0Mul6N58+Za5a1atcLPP/+MY8eOwcfHB8uXL8dbb70FALh37x42btyIq1evFht3ixYtyuU8OikHldzcTt4Q+eek//7770Lr2NjYsGbNmmmG58+fz17/iq5cuZIBYC9evCh0HkWd9+3QoQMDwDZu3Kh3XIcOHTTD+edNa9asqXXu7o8//mAA2OrVqzVlXl5ees9JFpxnUbEFBQUxLy8vzfDevXsZAPb1119r1Rs8eDDjOI5FRkZqygAwkUikVXbr1i0GgK1du1ZnWa9btWoVA8C2bNmiKZPL5SwgIIBZWlpqrXtR55kL8vLyYgCKfL1+Trrg+ucr+B3IX1+xWMyio6M1ZT/++CMDwFxdXbVinjt3LgOgVbewz3rp0qWaMqVSydq1a6fz/9IXT2HnpOfOncvEYjFLS0vTlCUlJTGBQMDmz5+vU/91P//8MwPA/v33X61ypVLJBg4cqPkMPTw82O3btxljjHXv3p19+OGHRc433+LFixkAlpiYWKL6pPJQczcxGZaWlkX28s4/H7pv3z6o1WqDliEWizF27NgS1x8zZgysrKw0w4MHD0aNGjVw6NAhg5ZfUocOHQKfz8fUqVO1ymfNmgXGGA4fPqxV3rVrV/j6+mqGGzduDGtrazx+/LjY5bi6umL48OGaMqFQiKlTp0IqleLMmTMGr0Pr1q1x/Phxndf3339v8DzzdenSRavJunXr1gDyzre+/v/KLy/qczh06BAEAgEmTpyoKePz+ZgyZUqZYhwzZgxkMplWL/YdO3ZAqVRi1KhRRU778uVLANBq2s6Pa/fu3Xj06BGuXbuGiIgINGrUCPv378fVq1exaNEiPH/+HH379oWbmxv69u2LuLg4nfnnz1dfSwcxLZSkicmQSqVaP7AFDR06FG3atMG4cePg4uKCYcOG4Y8//ihVwq5Zs2apOoj5+flpDXMch9q1axv1fKw+T58+hZubm87nUa9ePc3413l6eurMw87ODqmpqcUux8/PDzye9k9BYcspDUdHR3Tt2lXn1aJFC4Pnma/g+trY2AAAPDw89JYX9Tk8ffoUNWrU0LrMCQD8/f3LFGPdunXRqlUrbN26VVO2detWvP3226hdu3aJ5sEK9D/IV7t2bbRo0QJmZmaQy+WYNWsW5s+fD0dHRwwbNgwSiQR//fUXzMzMMGLEiELnW5HXfBPDUJImJiE2Nhbp6elF/nhJJBKcPXsWJ06cwOjRo3H79m0MHToU3bp1g0qlKtFySnMeuaQK+6EraUzGwOfz9ZYX9iNvakr7GRa2vqb2OYwZMwZnzpxBbGwsoqKicPny5WKPogFo+hEUt5MFACtXroRAIMDkyZPx7NkznD9/HkuXLkWLFi2wdOlSzfJflz9fR0dHA9aKVCRK0sQkbN68GQAQGBhYZD0ej4cuXbpgxYoVuHfvHr755hucOnUK4eHhAIx/ZPDo0SOtYcYYIiMjtZpa7ezs9PYELngUWprYvLy8EBcXp9P8/+DBA814Y/Dy8sKjR490WiOMvZzilPQzLA9eXl6Ij4/XuavXw4cPSzR9Uf/XYcOGgc/nY9u2bdi6dSuEQiGGDh1a7Dzr1q0LAIiOji6yXnx8PL7++mtNos5v2nZzc9N6f/78udZ00dHRcHR0LLbzH6l8lKRJpTt16hQWLVoEHx8fjBw5stB6KSkpOmVNmzYFAMhkMgCAhYUFAOj9wTfEpk2btBLlrl27EB8fj549e2rKfH19cfnyZcjlck3ZgQMHdC7VKk1svXr1gkqlwg8//KBVvnLlSnAcp7X
|
||
|
"text/plain": [
|
||
|
"<Figure size 500x1000 with 7 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"\n",
|
||
|
"feature_to_plot = ['Humidity (%)','Temperature (�C)','TDS Value (ppm)','pH Level','Growth Days','Temperature (F)','Humidity']\n",
|
||
|
"\n",
|
||
|
"fig, axs = plt.subplots(len(feature_to_plot), figsize=(5, 10))\n",
|
||
|
"\n",
|
||
|
"for i, feature in enumerate(feature_to_plot):\n",
|
||
|
" sns.histplot(dataset[feature], ax=axs[i], kde=True)\n",
|
||
|
" axs[i].set_title(f'Distribution of {feature}')\n",
|
||
|
" axs[i].set_ylabel('Frequency')\n",
|
||
|
" \n",
|
||
|
"plt.tight_layout()\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "72d4c814-6138-4e81-a102-2dbb403bc0e7",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Normalizacja danych liczbowych do zakresu [0,1]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"id": "1f23b28b-a5c0-49be-bc61-10a9199deb69",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" Plant_ID Date Temperature (�C) Humidity (%) TDS Value (ppm) \\\n",
|
||
|
"0 1 8/3/2023 0.993548 0.1 0.4550 \n",
|
||
|
"1 1 8/4/2023 1.000000 0.1 0.1275 \n",
|
||
|
"2 1 8/5/2023 0.993548 0.3 0.6950 \n",
|
||
|
"3 1 8/6/2023 0.993548 0.6 0.0500 \n",
|
||
|
"4 1 8/7/2023 0.993548 0.8 0.5925 \n",
|
||
|
"\n",
|
||
|
" pH Level Growth Days Temperature (F) Humidity \n",
|
||
|
"0 0.500 0.000000 0.993548 0.1 \n",
|
||
|
"1 0.125 0.021277 1.000000 0.1 \n",
|
||
|
"2 0.500 0.042553 0.993548 0.3 \n",
|
||
|
"3 0.500 0.063830 0.993548 0.6 \n",
|
||
|
"4 0.625 0.085106 0.993548 0.8 \n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"columns = ['Humidity (%)','Temperature (�C)','TDS Value (ppm)','pH Level','Growth Days','Temperature (F)','Humidity']\n",
|
||
|
"\n",
|
||
|
"for col in columns:\n",
|
||
|
" dataset[col] = preprocessing.MinMaxScaler().fit_transform(dataset[col].values.reshape(-1, 1))\n",
|
||
|
"\n",
|
||
|
"print(dataset.head())"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "bb40a889-90de-4a44-87ad-cb24bdceea01",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Podział danych na podzbiory train/dev/test"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"id": "bd8daa4c-baaf-4c17-a10a-36b18e891974",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# 60 / 20 / 20\n",
|
||
|
"X_train, X_test = train_test_split(dataset, train_size=0.8, random_state=1)\n",
|
||
|
"X_train, X_dev = train_test_split(X_train, test_size=0.25, random_state=1)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "c83c0db8-4d3b-4d2c-8b6e-9d2fbfed0ab2",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Rozmiar pozbiorów"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"id": "dca99c75-9b22-400f-9e89-a0a6e9a75573",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Set length: 3169\n",
|
||
|
"Train subset length: 1901 59.99 %\n",
|
||
|
"Dev subset length: 634 20.01 %\n",
|
||
|
"Test subset length: 634 20.01 %\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(\"Set length: \"+str(length))\n",
|
||
|
"print(\"Train subset length: \"+str(len(X_train))+\" \"+str(\"{:.2f}\".format(len(X_train)/length*100))+\" %\")\n",
|
||
|
"print(\"Dev subset length: \"+str(len(X_dev))+\" \"+str(\"{:.2f}\".format(len(X_dev)/length*100))+\" %\")\n",
|
||
|
"print(\"Test subset length: \"+str(len(X_test))+\" \"+str(\"{:.2f}\".format(len(X_test)/length*100))+\" %\")"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.10.0"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|