diff --git a/Jenkinsfile b/Jenkinsfile
index f00cddc..203182d 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,6 +1,6 @@
 pipeline {
     agent any
-    options([parameters([string(defaultValue: '20000', description: '', name: 'CUTOFF', trim: false)])])
+    // options([parameters([string(defaultValue: '20000', description: '', name: 'CUTOFF', trim: false)])])
     stages {
         stage('sh: Shell Script') {
             steps {
@@ -8,7 +8,7 @@ pipeline {
                 "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
                     sh 'chmod +x script-zadanie-2-4.sh'
                     sh './script-zadanie-2-4.sh'
-                    sh 'head -n ${CUTOFF} train.csv >> train.csv'
+                    // sh 'head -n ${CUTOFF} train.csv >> train.csv'
                 }
                 archiveArtifacts 'train.csv'
                 archiveArtifacts 'test.csv'
diff --git a/ium01.ipynb b/ium01.ipynb
index 5d00540..ea89e92 100644
--- a/ium01.ipynb
+++ b/ium01.ipynb
@@ -1,381 +1 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
-  "colab": {
-   "name": "ium01.ipynb",
-   "provenance": [],
-   "collapsed_sections": [],
-   "toc_visible": true,
-   "mount_file_id": "1Z43Re5xIaiFOO8c1uCDSbP5Xf4BxmRqM",
-   "authorship_tag": "ABX9TyOIuQ5zGfTk3BtU/LhkFVWV"
-  },
-  "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3"
-  },
-  "language_info": {
-   "name": "python"
-  }
- },
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "zn8GQjYWnbcX"
-   },
-   "source": [
-    "# Notebook for first substask of Inżynieria Uczenia Maszynowego class project.\n",
-    "This workbook downloads, normalizes and prints short summary of the dataset I will be working on and its subsets.\n",
-    "\n",
-    "Link to the dataset at Kaggle.com:\n",
-    "\n",
-    "https://www.kaggle.com/pcbreviglieri/smart-grid-stability"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Omh9bzNn7s0Z"
-   },
-   "source": [
-    "#### google colab related stuff"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "id": "Z14xGWuJnWwq"
-   },
-   "source": [
-    "from google.colab import drive\n",
-    "drive.mount('drive')"
-   ],
-   "execution_count": null,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "mROvxIELsVv1"
-   },
-   "source": [
-    "* Click in Colab GUI to allow Colab access and modify Google Drive files"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "id": "hVfCOcburj5P",
-    "executionInfo": {
-     "status": "ok",
-     "timestamp": 1616369081457,
-     "user_tz": -60,
-     "elapsed": 5742,
-     "user": {
-      "displayName": "jadenadjezioro",
-      "photoUrl": "",
-      "userId": "13576387580000290170"
-     }
-    }
-   },
-   "source": [
-    "!mkdir ~/.kaggle\n",
-    "!cp drive/MyDrive/kaggle.json ~/.kaggle/.\n",
-    "!chmod +x ~/.kaggle/kaggle.json\n",
-    "!pip install -q kaggle"
-   ],
-   "execution_count": 2,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "EYeZaE3Cxf5i"
-   },
-   "source": [
-    "# script"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "SRF-igrsma-A"
-   },
-   "source": [
-    "download data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "id": "3UjQJzTawfKH",
-    "executionInfo": {
-     "status": "ok",
-     "timestamp": 1616369086975,
-     "user_tz": -60,
-     "elapsed": 1915,
-     "user": {
-      "displayName": "jadenadjezioro",
-      "photoUrl": "",
-      "userId": "13576387580000290170"
-     }
-    }
-   },
-   "source": [
-    "!kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1\n",
-    "!unzip smart-grid-stability.zip >>/dev/null 2>&1"
-   ],
-   "execution_count": 3,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "mkK6wZ2zmhdQ"
-   },
-   "source": [
-    "read the data as pandas data frame"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "id": "JcPbvjeixwQa",
-    "executionInfo": {
-     "status": "ok",
-     "timestamp": 1616369395418,
-     "user_tz": -60,
-     "elapsed": 563,
-     "user": {
-      "displayName": "jadenadjezioro",
-      "photoUrl": "",
-      "userId": "13576387580000290170"
-     }
-    }
-   },
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "df = pd.read_csv('smart_grid_stability_augmented.csv')"
-   ],
-   "execution_count": 17,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "x81Ip-6fmnfr"
-   },
-   "source": [
-    "normalize values, so they are all between 0 and 1 (included)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "id": "7QZX5c2ZMpTj",
-    "executionInfo": {
-     "status": "ok",
-     "timestamp": 1616369401750,
-     "user_tz": -60,
-     "elapsed": 552,
-     "user": {
-      "displayName": "jadenadjezioro",
-      "photoUrl": "",
-      "userId": "13576387580000290170"
-     }
-    }
-   },
-   "source": [
-    "from sklearn import preprocessing\n",
-    "\n",
-    "scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])\n",
-    "df_norm_array = scaler.transform(df.iloc[:, 0:-1])\n",
-    "df_norm = pd.DataFrame(data=df_norm_array,\n",
-    "                       columns=df.columns[:-1])\n",
-    "df_norm['stabf'] = df['stabf']"
-   ],
-   "execution_count": 18,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "hjAT_K-Cmzhq"
-   },
-   "source": [
-    "divide the data into train, test and validation subsets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "id": "MvI7kiL0UPc8",
-    "executionInfo": {
-     "status": "ok",
-     "timestamp": 1616369417725,
-     "user_tz": -60,
-     "elapsed": 562,
-     "user": {
-      "displayName": "jadenadjezioro",
-      "photoUrl": "",
-      "userId": "13576387580000290170"
-     }
-    }
-   },
-   "source": [
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "train, testAndValid = train_test_split(\n",
-    "    df_norm,\n",
-    "    test_size=0.2,\n",
-    "    random_state=42,\n",
-    "    stratify=df_norm['stabf'])\n",
-    "\n",
-    "test, valid =train_test_split(\n",
-    "    testAndValid,\n",
-    "    test_size=0.5,\n",
-    "    random_state=42,\n",
-    "    stratify=testAndValid['stabf'])"
-   ],
-   "execution_count": 19,
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "FdUL87MgnE2G"
-   },
-   "source": [
-    "print short summary of the dataset and its subsets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "id": "WUrX63SGcHSB",
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "executionInfo": {
-     "status": "ok",
-     "timestamp": 1616369421560,
-     "user_tz": -60,
-     "elapsed": 854,
-     "user": {
-      "displayName": "jadenadjezioro",
-      "photoUrl": "",
-      "userId": "13576387580000290170"
-     }
-    },
-    "outputId": "8cffba3e-8ea5-48b1-c7ce-b8ba2b7229e7"
-   },
-   "source": [
-    "def namestr(obj, namespace):\n",
-    "  return [name for name in namespace if namespace[name] is obj]\n",
-    "\n",
-    "dataset = df_norm\n",
-    "for x in [dataset, train, test, valid]:\n",
-    "  print([q for q in namestr(x, globals()) if len(q) == max([len(w) for w in namestr(x, globals())])][-1]) \n",
-    "  print(\"size:\", len(x))\n",
-    "  print(x.describe(include='all'))\n",
-    "  print(\"class distribution\", x.value_counts('stabf'))\n",
-    "  print('===============================================================')"
-   ],
-   "execution_count": 20,
-   "outputs": [
-    {
-     "output_type": "stream",
-     "text": [
-      "dataset\n",
-      "size: 60000\n",
-      "                tau1          tau2  ...          stab     stabf\n",
-      "count   6.000000e+04  6.000000e+04  ...  6.000000e+04     60000\n",
-      "unique           NaN           NaN  ...           NaN         2\n",
-      "top              NaN           NaN  ...           NaN  unstable\n",
-      "freq             NaN           NaN  ...           NaN     38280\n",
-      "mean    1.476245e-16 -1.998105e-16  ...  3.981075e-17       NaN\n",
-      "std     1.000008e+00  1.000008e+00  ...  1.000008e+00       NaN\n",
-      "min    -1.731763e+00 -1.731999e+00  ... -2.613709e+00       NaN\n",
-      "25%    -8.660657e-01 -8.660215e-01  ... -8.475133e-01       NaN\n",
-      "50%     1.437170e-06 -7.028730e-06  ...  3.821538e-02       NaN\n",
-      "75%     8.659131e-01  8.659873e-01  ...  7.895385e-01       NaN\n",
-      "max     1.731859e+00  1.731991e+00  ...  2.537363e+00       NaN\n",
-      "\n",
-      "[11 rows x 14 columns]\n",
-      "class distribution stabf\n",
-      "unstable    38280\n",
-      "stable      21720\n",
-      "dtype: int64\n",
-      "===============================================================\n",
-      "train\n",
-      "size: 48000\n",
-      "                tau1          tau2  ...          stab     stabf\n",
-      "count   48000.000000  48000.000000  ...  48000.000000     48000\n",
-      "unique           NaN           NaN  ...           NaN         2\n",
-      "top              NaN           NaN  ...           NaN  unstable\n",
-      "freq             NaN           NaN  ...           NaN     30624\n",
-      "mean       -0.001546     -0.001068  ...     -0.000873       NaN\n",
-      "std         1.000934      0.999107  ...      0.999578       NaN\n",
-      "min        -1.731763     -1.731999  ...     -2.613709       NaN\n",
-      "25%        -0.868796     -0.864317  ...     -0.847686       NaN\n",
-      "50%        -0.001740     -0.005136  ...      0.036743       NaN\n",
-      "75%         0.868335      0.861387  ...      0.788993       NaN\n",
-      "max         1.731859      1.731991  ...      2.537363       NaN\n",
-      "\n",
-      "[11 rows x 14 columns]\n",
-      "class distribution stabf\n",
-      "unstable    30624\n",
-      "stable      17376\n",
-      "dtype: int64\n",
-      "===============================================================\n",
-      "test\n",
-      "size: 6000\n",
-      "               tau1         tau2  ...         stab     stabf\n",
-      "count   6000.000000  6000.000000  ...  6000.000000      6000\n",
-      "unique          NaN          NaN  ...          NaN         2\n",
-      "top             NaN          NaN  ...          NaN  unstable\n",
-      "freq            NaN          NaN  ...          NaN      3828\n",
-      "mean       0.023917     0.012911  ...     0.003546       NaN\n",
-      "std        0.998552     1.001761  ...     0.998815       NaN\n",
-      "min       -1.731763    -1.731184  ...    -2.613709       NaN\n",
-      "25%       -0.839910    -0.855393  ...    -0.847835       NaN\n",
-      "50%        0.042499     0.020595  ...     0.049834       NaN\n",
-      "75%        0.889110     0.902355  ...     0.794568       NaN\n",
-      "max        1.731686     1.731427  ...     2.537363       NaN\n",
-      "\n",
-      "[11 rows x 14 columns]\n",
-      "class distribution stabf\n",
-      "unstable    3828\n",
-      "stable      2172\n",
-      "dtype: int64\n",
-      "===============================================================\n",
-      "valid\n",
-      "size: 6000\n",
-      "               tau1         tau2  ...         stab     stabf\n",
-      "count   6000.000000  6000.000000  ...  6000.000000      6000\n",
-      "unique          NaN          NaN  ...          NaN         2\n",
-      "top             NaN          NaN  ...          NaN  unstable\n",
-      "freq            NaN          NaN  ...          NaN      3828\n",
-      "mean      -0.011551    -0.004364  ...     0.003435       NaN\n",
-      "std        0.993842     1.005519  ...     1.004786       NaN\n",
-      "min       -1.731763    -1.731999  ...    -2.613709       NaN\n",
-      "25%       -0.874471    -0.887753  ...    -0.844789       NaN\n",
-      "50%       -0.017244     0.017840  ...     0.039665       NaN\n",
-      "75%        0.825347     0.868048  ...     0.787678       NaN\n",
-      "max        1.731859     1.731991  ...     2.537363       NaN\n",
-      "\n",
-      "[11 rows x 14 columns]\n",
-      "class distribution stabf\n",
-      "unstable    3828\n",
-      "stable      2172\n",
-      "dtype: int64\n",
-      "===============================================================\n"
-     ],
-     "name": "stdout"
-    }
-   ]
-  }
- ]
-}
\ No newline at end of file
+{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ium01.ipynb","provenance":[],"collapsed_sections":["EYeZaE3Cxf5i"],"toc_visible":true,"mount_file_id":"1Z43Re5xIaiFOO8c1uCDSbP5Xf4BxmRqM","authorship_tag":"ABX9TyPMsVZZ+JFELlXyuCddiOXa"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zn8GQjYWnbcX"},"source":["# Notebook for first substask of Inżynieria Uczenia Maszynowego class project.\n","This workbook downloads, normalizes and prints short summary of the dataset I will be working on and its subsets.\n","\n","Link to the dataset at Kaggle.com:\n","\n","https://www.kaggle.com/pcbreviglieri/smart-grid-stability"]},{"cell_type":"markdown","metadata":{"id":"Omh9bzNn7s0Z"},"source":["#### google colab related stuff"]},{"cell_type":"code","metadata":{"id":"Z14xGWuJnWwq","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616926547933,"user_tz":-120,"elapsed":1241,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"9cc0275b-0883-4494-97a7-29c5054727da"},"source":["from google.colab import drive\n","drive.mount('drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Drive already mounted at drive; to attempt to forcibly remount, call drive.mount(\"drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mROvxIELsVv1"},"source":["* Click in Colab GUI to allow Colab access and modify Google Drive files"]},{"cell_type":"code","metadata":{"id":"hVfCOcburj5P","executionInfo":{"status":"ok","timestamp":1616926567889,"user_tz":-120,"elapsed":4654,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}}},"source":["!mkdir ~/.kaggle\n","!cp drive/MyDrive/kaggle.json ~/.kaggle/.\n","!chmod +x ~/.kaggle/kaggle.json\n","!pip install -q kaggle"],"execution_count":2,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"EYeZaE3Cxf5i"},"source":["# script for lab IUM-01"]},{"cell_type":"markdown","metadata":{"id":"XspjcqV4U9tb"},"source":["download data"]},{"cell_type":"code","metadata":{"id":"3UjQJzTawfKH"},"source":["!kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1\n","!unzip smart-grid-stability.zip >>/dev/null 2>&1"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mkK6wZ2zmhdQ"},"source":["read the data as pandas data frame"]},{"cell_type":"code","metadata":{"id":"JcPbvjeixwQa"},"source":["import pandas as pd\n","\n","df = pd.read_csv('smart_grid_stability_augmented.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"x81Ip-6fmnfr"},"source":["normalize values, so they are all between 0 and 1 (included)"]},{"cell_type":"code","metadata":{"id":"7QZX5c2ZMpTj"},"source":["from sklearn import preprocessing\n","\n","scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])\n","df_norm_array = scaler.transform(df.iloc[:, 0:-1])\n","df_norm = pd.DataFrame(data=df_norm_array,\n","                       columns=df.columns[:-1])\n","df_norm['stabf'] = df['stabf']"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hjAT_K-Cmzhq"},"source":["divide the data into train, test and validation subsets"]},{"cell_type":"code","metadata":{"id":"MvI7kiL0UPc8"},"source":["from sklearn.model_selection import train_test_split\n","\n","train, testAndValid = train_test_split(\n","    df_norm,\n","    test_size=0.2,\n","    random_state=42,\n","    stratify=df_norm['stabf'])\n","\n","test, valid =train_test_split(\n","    testAndValid,\n","    test_size=0.5,\n","    random_state=42,\n","    stratify=testAndValid['stabf'])"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FdUL87MgnE2G"},"source":["print short summary of the dataset and its subsets"]},{"cell_type":"code","metadata":{"id":"WUrX63SGcHSB","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616369421560,"user_tz":-60,"elapsed":854,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"8cffba3e-8ea5-48b1-c7ce-b8ba2b7229e7"},"source":["def namestr(obj, namespace):\n","  return [name for name in namespace if namespace[name] is obj]\n","\n","dataset = df_norm\n","for x in [dataset, train, test, valid]:\n","  print([q for q in namestr(x, globals()) if len(q) == max([len(w) for w in namestr(x, globals())])][-1]) \n","  print(\"size:\", len(x))\n","  print(x.describe(include='all'))\n","  print(\"class distribution\", x.value_counts('stabf'))\n","  print('===============================================================')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["dataset\n","size: 60000\n","                tau1          tau2  ...          stab     stabf\n","count   6.000000e+04  6.000000e+04  ...  6.000000e+04     60000\n","unique           NaN           NaN  ...           NaN         2\n","top              NaN           NaN  ...           NaN  unstable\n","freq             NaN           NaN  ...           NaN     38280\n","mean    1.476245e-16 -1.998105e-16  ...  3.981075e-17       NaN\n","std     1.000008e+00  1.000008e+00  ...  1.000008e+00       NaN\n","min    -1.731763e+00 -1.731999e+00  ... -2.613709e+00       NaN\n","25%    -8.660657e-01 -8.660215e-01  ... -8.475133e-01       NaN\n","50%     1.437170e-06 -7.028730e-06  ...  3.821538e-02       NaN\n","75%     8.659131e-01  8.659873e-01  ...  7.895385e-01       NaN\n","max     1.731859e+00  1.731991e+00  ...  2.537363e+00       NaN\n","\n","[11 rows x 14 columns]\n","class distribution stabf\n","unstable    38280\n","stable      21720\n","dtype: int64\n","===============================================================\n","train\n","size: 48000\n","                tau1          tau2  ...          stab     stabf\n","count   48000.000000  48000.000000  ...  48000.000000     48000\n","unique           NaN           NaN  ...           NaN         2\n","top              NaN           NaN  ...           NaN  unstable\n","freq             NaN           NaN  ...           NaN     30624\n","mean       -0.001546     -0.001068  ...     -0.000873       NaN\n","std         1.000934      0.999107  ...      0.999578       NaN\n","min        -1.731763     -1.731999  ...     -2.613709       NaN\n","25%        -0.868796     -0.864317  ...     -0.847686       NaN\n","50%        -0.001740     -0.005136  ...      0.036743       NaN\n","75%         0.868335      0.861387  ...      0.788993       NaN\n","max         1.731859      1.731991  ...      2.537363       NaN\n","\n","[11 rows x 14 columns]\n","class distribution stabf\n","unstable    30624\n","stable      17376\n","dtype: int64\n","===============================================================\n","test\n","size: 6000\n","               tau1         tau2  ...         stab     stabf\n","count   6000.000000  6000.000000  ...  6000.000000      6000\n","unique          NaN          NaN  ...          NaN         2\n","top             NaN          NaN  ...          NaN  unstable\n","freq            NaN          NaN  ...          NaN      3828\n","mean       0.023917     0.012911  ...     0.003546       NaN\n","std        0.998552     1.001761  ...     0.998815       NaN\n","min       -1.731763    -1.731184  ...    -2.613709       NaN\n","25%       -0.839910    -0.855393  ...    -0.847835       NaN\n","50%        0.042499     0.020595  ...     0.049834       NaN\n","75%        0.889110     0.902355  ...     0.794568       NaN\n","max        1.731686     1.731427  ...     2.537363       NaN\n","\n","[11 rows x 14 columns]\n","class distribution stabf\n","unstable    3828\n","stable      2172\n","dtype: int64\n","===============================================================\n","valid\n","size: 6000\n","               tau1         tau2  ...         stab     stabf\n","count   6000.000000  6000.000000  ...  6000.000000      6000\n","unique          NaN          NaN  ...          NaN         2\n","top             NaN          NaN  ...          NaN  unstable\n","freq            NaN          NaN  ...          NaN      3828\n","mean      -0.011551    -0.004364  ...     0.003435       NaN\n","std        0.993842     1.005519  ...     1.004786       NaN\n","min       -1.731763    -1.731999  ...    -2.613709       NaN\n","25%       -0.874471    -0.887753  ...    -0.844789       NaN\n","50%       -0.017244     0.017840  ...     0.039665       NaN\n","75%        0.825347     0.868048  ...     0.787678       NaN\n","max        1.731859     1.731991  ...     2.537363       NaN\n","\n","[11 rows x 14 columns]\n","class distribution stabf\n","unstable    3828\n","stable      2172\n","dtype: int64\n","===============================================================\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"66jPkCNlVGK9"},"source":["# script for lab IUM-03"]},{"cell_type":"markdown","metadata":{"id":"SRF-igrsma-A"},"source":["download data"]},{"cell_type":"code","metadata":{"id":"IkZTO5PhVB7R","executionInfo":{"status":"ok","timestamp":1616926577004,"user_tz":-120,"elapsed":1881,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}}},"source":["!kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1\n","!unzip smart-grid-stability.zip >>/dev/null 2>&1"],"execution_count":3,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ijEeDXgpLYqk"},"source":["check how many data entries is in the dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"p9ONnUUIW27z","executionInfo":{"status":"ok","timestamp":1616926972602,"user_tz":-120,"elapsed":732,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"cb078dbb-daa0-4ae6-ec08-9b84141c8507"},"source":["!wc -l smart_grid_stability_augmented.csv"],"execution_count":7,"outputs":[{"output_type":"stream","text":["60001 smart_grid_stability_augmented.csv\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"jQw4a7ZjMmXr"},"source":["take a look at the dataset to choose columns to keep"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":226},"id":"82msg0JoVZiK","executionInfo":{"status":"ok","timestamp":1616926656293,"user_tz":-120,"elapsed":1067,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"9c0243dd-f21d-4453-dd56-7410c4391cdd"},"source":["import pandas as pd\n","df = pd.read_csv('smart_grid_stability_augmented.csv')\n","df.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>tau1</th>\n","      <th>tau2</th>\n","      <th>tau3</th>\n","      <th>tau4</th>\n","      <th>p1</th>\n","      <th>p2</th>\n","      <th>p3</th>\n","      <th>p4</th>\n","      <th>g1</th>\n","      <th>g2</th>\n","      <th>g3</th>\n","      <th>g4</th>\n","      <th>stab</th>\n","      <th>stabf</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2.959060</td>\n","      <td>3.079885</td>\n","      <td>8.381025</td>\n","      <td>9.780754</td>\n","      <td>3.763085</td>\n","      <td>-0.782604</td>\n","      <td>-1.257395</td>\n","      <td>-1.723086</td>\n","      <td>0.650456</td>\n","      <td>0.859578</td>\n","      <td>0.887445</td>\n","      <td>0.958034</td>\n","      <td>0.055347</td>\n","      <td>unstable</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>9.304097</td>\n","      <td>4.902524</td>\n","      <td>3.047541</td>\n","      <td>1.369357</td>\n","      <td>5.067812</td>\n","      <td>-1.940058</td>\n","      <td>-1.872742</td>\n","      <td>-1.255012</td>\n","      <td>0.413441</td>\n","      <td>0.862414</td>\n","      <td>0.562139</td>\n","      <td>0.781760</td>\n","      <td>-0.005957</td>\n","      <td>stable</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>8.971707</td>\n","      <td>8.848428</td>\n","      <td>3.046479</td>\n","      <td>1.214518</td>\n","      <td>3.405158</td>\n","      <td>-1.207456</td>\n","      <td>-1.277210</td>\n","      <td>-0.920492</td>\n","      <td>0.163041</td>\n","      <td>0.766689</td>\n","      <td>0.839444</td>\n","      <td>0.109853</td>\n","      <td>0.003471</td>\n","      <td>unstable</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0.716415</td>\n","      <td>7.669600</td>\n","      <td>4.486641</td>\n","      <td>2.340563</td>\n","      <td>3.963791</td>\n","      <td>-1.027473</td>\n","      <td>-1.938944</td>\n","      <td>-0.997374</td>\n","      <td>0.446209</td>\n","      <td>0.976744</td>\n","      <td>0.929381</td>\n","      <td>0.362718</td>\n","      <td>0.028871</td>\n","      <td>unstable</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>3.134112</td>\n","      <td>7.608772</td>\n","      <td>4.943759</td>\n","      <td>9.857573</td>\n","      <td>3.525811</td>\n","      <td>-1.125531</td>\n","      <td>-1.845975</td>\n","      <td>-0.554305</td>\n","      <td>0.797110</td>\n","      <td>0.455450</td>\n","      <td>0.656947</td>\n","      <td>0.820923</td>\n","      <td>0.049860</td>\n","      <td>unstable</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["       tau1      tau2      tau3  ...        g4      stab     stabf\n","0  2.959060  3.079885  8.381025  ...  0.958034  0.055347  unstable\n","1  9.304097  4.902524  3.047541  ...  0.781760 -0.005957    stable\n","2  8.971707  8.848428  3.046479  ...  0.109853  0.003471  unstable\n","3  0.716415  7.669600  4.486641  ...  0.362718  0.028871  unstable\n","4  3.134112  7.608772  4.943759  ...  0.820923  0.049860  unstable\n","\n","[5 rows x 14 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"KHBmXrjALz0W"},"source":["discard some of the columns; shuffle the data; divide into train, test and validations subsets and print number of rows of the subsets"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tJo2DytZVyOo","executionInfo":{"status":"ok","timestamp":1616931887367,"user_tz":-120,"elapsed":2363,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"e61890d6-3f2f-4008-a83d-b7272aeecd11"},"source":["!sed 1d smart_grid_stability_augmented.csv | cut -f 1,5,9,13,14 -d \",\" | shuf | split -l 48000\n","!mv xaa train.csv\n","!mv xab toDivide\n","!split -l 6000 toDivide\n","!mv xaa test.csv\n","!mv xab valid.csv\n","!wc -l train.csv\n","!wc -l test.csv\n","!wc -l valid.csv\n"],"execution_count":34,"outputs":[{"output_type":"stream","text":["48000 train.csv\n","6000 test.csv\n","6000 valid.csv\n"],"name":"stdout"}]}]}
\ No newline at end of file