diff --git a/Jenkinsfile b/Jenkinsfile index f00cddc..203182d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,6 +1,6 @@ pipeline { agent any - options([parameters([string(defaultValue: '20000', description: '', name: 'CUTOFF', trim: false)])]) + // options([parameters([string(defaultValue: '20000', description: '', name: 'CUTOFF', trim: false)])]) stages { stage('sh: Shell Script') { steps { @@ -8,7 +8,7 @@ pipeline { "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { sh 'chmod +x script-zadanie-2-4.sh' sh './script-zadanie-2-4.sh' - sh 'head -n ${CUTOFF} train.csv >> train.csv' + // sh 'head -n ${CUTOFF} train.csv >> train.csv' } archiveArtifacts 'train.csv' archiveArtifacts 'test.csv' diff --git a/ium01.ipynb b/ium01.ipynb index 5d00540..ea89e92 100644 --- a/ium01.ipynb +++ b/ium01.ipynb @@ -1,381 +1 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "ium01.ipynb", - "provenance": [], - "collapsed_sections": [], - "toc_visible": true, - "mount_file_id": "1Z43Re5xIaiFOO8c1uCDSbP5Xf4BxmRqM", - "authorship_tag": "ABX9TyOIuQ5zGfTk3BtU/LhkFVWV" - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "zn8GQjYWnbcX" - }, - "source": [ - "# Notebook for first substask of Inżynieria Uczenia Maszynowego class project.\n", - "This workbook downloads, normalizes and prints short summary of the dataset I will be working on and its subsets.\n", - "\n", - "Link to the dataset at Kaggle.com:\n", - "\n", - "https://www.kaggle.com/pcbreviglieri/smart-grid-stability" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Omh9bzNn7s0Z" - }, - "source": [ - "#### google colab related stuff" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Z14xGWuJnWwq" - }, - "source": [ - "from google.colab import drive\n", - "drive.mount('drive')" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mROvxIELsVv1" - }, - "source": [ - "* Click in Colab GUI to allow Colab access and modify Google Drive files" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "hVfCOcburj5P", - "executionInfo": { - "status": "ok", - "timestamp": 1616369081457, - "user_tz": -60, - "elapsed": 5742, - "user": { - "displayName": "jadenadjezioro", - "photoUrl": "", - "userId": "13576387580000290170" - } - } - }, - "source": [ - "!mkdir ~/.kaggle\n", - "!cp drive/MyDrive/kaggle.json ~/.kaggle/.\n", - "!chmod +x ~/.kaggle/kaggle.json\n", - "!pip install -q kaggle" - ], - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EYeZaE3Cxf5i" - }, - "source": [ - "# script" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SRF-igrsma-A" - }, - "source": [ - "download data" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "3UjQJzTawfKH", - "executionInfo": { - "status": "ok", - "timestamp": 1616369086975, - "user_tz": -60, - "elapsed": 1915, - "user": { - "displayName": "jadenadjezioro", - "photoUrl": "", - "userId": "13576387580000290170" - } - } - }, - "source": [ - "!kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1\n", - "!unzip smart-grid-stability.zip >>/dev/null 2>&1" - ], - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mkK6wZ2zmhdQ" - }, - "source": [ - "read the data as pandas data frame" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "JcPbvjeixwQa", - "executionInfo": { - "status": "ok", - "timestamp": 1616369395418, - "user_tz": -60, - "elapsed": 563, - "user": { - "displayName": "jadenadjezioro", - "photoUrl": "", - "userId": "13576387580000290170" - } - } - }, - "source": [ - "import pandas as pd\n", - "\n", - "df = pd.read_csv('smart_grid_stability_augmented.csv')" - ], - "execution_count": 17, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x81Ip-6fmnfr" - }, - "source": [ - "normalize values, so they are all between 0 and 1 (included)" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "7QZX5c2ZMpTj", - "executionInfo": { - "status": "ok", - "timestamp": 1616369401750, - "user_tz": -60, - "elapsed": 552, - "user": { - "displayName": "jadenadjezioro", - "photoUrl": "", - "userId": "13576387580000290170" - } - } - }, - "source": [ - "from sklearn import preprocessing\n", - "\n", - "scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])\n", - "df_norm_array = scaler.transform(df.iloc[:, 0:-1])\n", - "df_norm = pd.DataFrame(data=df_norm_array,\n", - " columns=df.columns[:-1])\n", - "df_norm['stabf'] = df['stabf']" - ], - "execution_count": 18, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hjAT_K-Cmzhq" - }, - "source": [ - "divide the data into train, test and validation subsets" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "MvI7kiL0UPc8", - "executionInfo": { - "status": "ok", - "timestamp": 1616369417725, - "user_tz": -60, - "elapsed": 562, - "user": { - "displayName": "jadenadjezioro", - "photoUrl": "", - "userId": "13576387580000290170" - } - } - }, - "source": [ - "from sklearn.model_selection import train_test_split\n", - "\n", - "train, testAndValid = train_test_split(\n", - " df_norm,\n", - " test_size=0.2,\n", - " random_state=42,\n", - " stratify=df_norm['stabf'])\n", - "\n", - "test, valid =train_test_split(\n", - " testAndValid,\n", - " test_size=0.5,\n", - " random_state=42,\n", - " stratify=testAndValid['stabf'])" - ], - "execution_count": 19, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FdUL87MgnE2G" - }, - "source": [ - "print short summary of the dataset and its subsets" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "WUrX63SGcHSB", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "executionInfo": { - "status": "ok", - "timestamp": 1616369421560, - "user_tz": -60, - "elapsed": 854, - "user": { - "displayName": "jadenadjezioro", - "photoUrl": "", - "userId": "13576387580000290170" - } - }, - "outputId": "8cffba3e-8ea5-48b1-c7ce-b8ba2b7229e7" - }, - "source": [ - "def namestr(obj, namespace):\n", - " return [name for name in namespace if namespace[name] is obj]\n", - "\n", - "dataset = df_norm\n", - "for x in [dataset, train, test, valid]:\n", - " print([q for q in namestr(x, globals()) if len(q) == max([len(w) for w in namestr(x, globals())])][-1]) \n", - " print(\"size:\", len(x))\n", - " print(x.describe(include='all'))\n", - " print(\"class distribution\", x.value_counts('stabf'))\n", - " print('===============================================================')" - ], - "execution_count": 20, - "outputs": [ - { - "output_type": "stream", - "text": [ - "dataset\n", - "size: 60000\n", - " tau1 tau2 ... stab stabf\n", - "count 6.000000e+04 6.000000e+04 ... 6.000000e+04 60000\n", - "unique NaN NaN ... NaN 2\n", - "top NaN NaN ... NaN unstable\n", - "freq NaN NaN ... NaN 38280\n", - "mean 1.476245e-16 -1.998105e-16 ... 3.981075e-17 NaN\n", - "std 1.000008e+00 1.000008e+00 ... 1.000008e+00 NaN\n", - "min -1.731763e+00 -1.731999e+00 ... -2.613709e+00 NaN\n", - "25% -8.660657e-01 -8.660215e-01 ... -8.475133e-01 NaN\n", - "50% 1.437170e-06 -7.028730e-06 ... 3.821538e-02 NaN\n", - "75% 8.659131e-01 8.659873e-01 ... 7.895385e-01 NaN\n", - "max 1.731859e+00 1.731991e+00 ... 2.537363e+00 NaN\n", - "\n", - "[11 rows x 14 columns]\n", - "class distribution stabf\n", - "unstable 38280\n", - "stable 21720\n", - "dtype: int64\n", - "===============================================================\n", - "train\n", - "size: 48000\n", - " tau1 tau2 ... stab stabf\n", - "count 48000.000000 48000.000000 ... 48000.000000 48000\n", - "unique NaN NaN ... NaN 2\n", - "top NaN NaN ... NaN unstable\n", - "freq NaN NaN ... NaN 30624\n", - "mean -0.001546 -0.001068 ... -0.000873 NaN\n", - "std 1.000934 0.999107 ... 0.999578 NaN\n", - "min -1.731763 -1.731999 ... -2.613709 NaN\n", - "25% -0.868796 -0.864317 ... -0.847686 NaN\n", - "50% -0.001740 -0.005136 ... 0.036743 NaN\n", - "75% 0.868335 0.861387 ... 0.788993 NaN\n", - "max 1.731859 1.731991 ... 2.537363 NaN\n", - "\n", - "[11 rows x 14 columns]\n", - "class distribution stabf\n", - "unstable 30624\n", - "stable 17376\n", - "dtype: int64\n", - "===============================================================\n", - "test\n", - "size: 6000\n", - " tau1 tau2 ... stab stabf\n", - "count 6000.000000 6000.000000 ... 6000.000000 6000\n", - "unique NaN NaN ... NaN 2\n", - "top NaN NaN ... NaN unstable\n", - "freq NaN NaN ... NaN 3828\n", - "mean 0.023917 0.012911 ... 0.003546 NaN\n", - "std 0.998552 1.001761 ... 0.998815 NaN\n", - "min -1.731763 -1.731184 ... -2.613709 NaN\n", - "25% -0.839910 -0.855393 ... -0.847835 NaN\n", - "50% 0.042499 0.020595 ... 0.049834 NaN\n", - "75% 0.889110 0.902355 ... 0.794568 NaN\n", - "max 1.731686 1.731427 ... 2.537363 NaN\n", - "\n", - "[11 rows x 14 columns]\n", - "class distribution stabf\n", - "unstable 3828\n", - "stable 2172\n", - "dtype: int64\n", - "===============================================================\n", - "valid\n", - "size: 6000\n", - " tau1 tau2 ... stab stabf\n", - "count 6000.000000 6000.000000 ... 6000.000000 6000\n", - "unique NaN NaN ... NaN 2\n", - "top NaN NaN ... NaN unstable\n", - "freq NaN NaN ... NaN 3828\n", - "mean -0.011551 -0.004364 ... 0.003435 NaN\n", - "std 0.993842 1.005519 ... 1.004786 NaN\n", - "min -1.731763 -1.731999 ... -2.613709 NaN\n", - "25% -0.874471 -0.887753 ... -0.844789 NaN\n", - "50% -0.017244 0.017840 ... 0.039665 NaN\n", - "75% 0.825347 0.868048 ... 0.787678 NaN\n", - "max 1.731859 1.731991 ... 2.537363 NaN\n", - "\n", - "[11 rows x 14 columns]\n", - "class distribution stabf\n", - "unstable 3828\n", - "stable 2172\n", - "dtype: int64\n", - "===============================================================\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ium01.ipynb","provenance":[],"collapsed_sections":["EYeZaE3Cxf5i"],"toc_visible":true,"mount_file_id":"1Z43Re5xIaiFOO8c1uCDSbP5Xf4BxmRqM","authorship_tag":"ABX9TyPMsVZZ+JFELlXyuCddiOXa"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zn8GQjYWnbcX"},"source":["# Notebook for first substask of Inżynieria Uczenia Maszynowego class project.\n","This workbook downloads, normalizes and prints short summary of the dataset I will be working on and its subsets.\n","\n","Link to the dataset at Kaggle.com:\n","\n","https://www.kaggle.com/pcbreviglieri/smart-grid-stability"]},{"cell_type":"markdown","metadata":{"id":"Omh9bzNn7s0Z"},"source":["#### google colab related stuff"]},{"cell_type":"code","metadata":{"id":"Z14xGWuJnWwq","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616926547933,"user_tz":-120,"elapsed":1241,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"9cc0275b-0883-4494-97a7-29c5054727da"},"source":["from google.colab import drive\n","drive.mount('drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Drive already mounted at drive; to attempt to forcibly remount, call drive.mount(\"drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mROvxIELsVv1"},"source":["* Click in Colab GUI to allow Colab access and modify Google Drive files"]},{"cell_type":"code","metadata":{"id":"hVfCOcburj5P","executionInfo":{"status":"ok","timestamp":1616926567889,"user_tz":-120,"elapsed":4654,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}}},"source":["!mkdir ~/.kaggle\n","!cp drive/MyDrive/kaggle.json ~/.kaggle/.\n","!chmod +x ~/.kaggle/kaggle.json\n","!pip install -q kaggle"],"execution_count":2,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"EYeZaE3Cxf5i"},"source":["# script for lab IUM-01"]},{"cell_type":"markdown","metadata":{"id":"XspjcqV4U9tb"},"source":["download data"]},{"cell_type":"code","metadata":{"id":"3UjQJzTawfKH"},"source":["!kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1\n","!unzip smart-grid-stability.zip >>/dev/null 2>&1"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mkK6wZ2zmhdQ"},"source":["read the data as pandas data frame"]},{"cell_type":"code","metadata":{"id":"JcPbvjeixwQa"},"source":["import pandas as pd\n","\n","df = pd.read_csv('smart_grid_stability_augmented.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"x81Ip-6fmnfr"},"source":["normalize values, so they are all between 0 and 1 (included)"]},{"cell_type":"code","metadata":{"id":"7QZX5c2ZMpTj"},"source":["from sklearn import preprocessing\n","\n","scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])\n","df_norm_array = scaler.transform(df.iloc[:, 0:-1])\n","df_norm = pd.DataFrame(data=df_norm_array,\n"," columns=df.columns[:-1])\n","df_norm['stabf'] = df['stabf']"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hjAT_K-Cmzhq"},"source":["divide the data into train, test and validation subsets"]},{"cell_type":"code","metadata":{"id":"MvI7kiL0UPc8"},"source":["from sklearn.model_selection import train_test_split\n","\n","train, testAndValid = train_test_split(\n"," df_norm,\n"," test_size=0.2,\n"," random_state=42,\n"," stratify=df_norm['stabf'])\n","\n","test, valid =train_test_split(\n"," testAndValid,\n"," test_size=0.5,\n"," random_state=42,\n"," stratify=testAndValid['stabf'])"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FdUL87MgnE2G"},"source":["print short summary of the dataset and its subsets"]},{"cell_type":"code","metadata":{"id":"WUrX63SGcHSB","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616369421560,"user_tz":-60,"elapsed":854,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"8cffba3e-8ea5-48b1-c7ce-b8ba2b7229e7"},"source":["def namestr(obj, namespace):\n"," return [name for name in namespace if namespace[name] is obj]\n","\n","dataset = df_norm\n","for x in [dataset, train, test, valid]:\n"," print([q for q in namestr(x, globals()) if len(q) == max([len(w) for w in namestr(x, globals())])][-1]) \n"," print(\"size:\", len(x))\n"," print(x.describe(include='all'))\n"," print(\"class distribution\", x.value_counts('stabf'))\n"," print('===============================================================')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["dataset\n","size: 60000\n"," tau1 tau2 ... stab stabf\n","count 6.000000e+04 6.000000e+04 ... 6.000000e+04 60000\n","unique NaN NaN ... NaN 2\n","top NaN NaN ... NaN unstable\n","freq NaN NaN ... NaN 38280\n","mean 1.476245e-16 -1.998105e-16 ... 3.981075e-17 NaN\n","std 1.000008e+00 1.000008e+00 ... 1.000008e+00 NaN\n","min -1.731763e+00 -1.731999e+00 ... -2.613709e+00 NaN\n","25% -8.660657e-01 -8.660215e-01 ... -8.475133e-01 NaN\n","50% 1.437170e-06 -7.028730e-06 ... 3.821538e-02 NaN\n","75% 8.659131e-01 8.659873e-01 ... 7.895385e-01 NaN\n","max 1.731859e+00 1.731991e+00 ... 2.537363e+00 NaN\n","\n","[11 rows x 14 columns]\n","class distribution stabf\n","unstable 38280\n","stable 21720\n","dtype: int64\n","===============================================================\n","train\n","size: 48000\n"," tau1 tau2 ... stab stabf\n","count 48000.000000 48000.000000 ... 48000.000000 48000\n","unique NaN NaN ... NaN 2\n","top NaN NaN ... NaN unstable\n","freq NaN NaN ... NaN 30624\n","mean -0.001546 -0.001068 ... -0.000873 NaN\n","std 1.000934 0.999107 ... 0.999578 NaN\n","min -1.731763 -1.731999 ... -2.613709 NaN\n","25% -0.868796 -0.864317 ... -0.847686 NaN\n","50% -0.001740 -0.005136 ... 0.036743 NaN\n","75% 0.868335 0.861387 ... 0.788993 NaN\n","max 1.731859 1.731991 ... 2.537363 NaN\n","\n","[11 rows x 14 columns]\n","class distribution stabf\n","unstable 30624\n","stable 17376\n","dtype: int64\n","===============================================================\n","test\n","size: 6000\n"," tau1 tau2 ... stab stabf\n","count 6000.000000 6000.000000 ... 6000.000000 6000\n","unique NaN NaN ... NaN 2\n","top NaN NaN ... NaN unstable\n","freq NaN NaN ... NaN 3828\n","mean 0.023917 0.012911 ... 0.003546 NaN\n","std 0.998552 1.001761 ... 0.998815 NaN\n","min -1.731763 -1.731184 ... -2.613709 NaN\n","25% -0.839910 -0.855393 ... -0.847835 NaN\n","50% 0.042499 0.020595 ... 0.049834 NaN\n","75% 0.889110 0.902355 ... 0.794568 NaN\n","max 1.731686 1.731427 ... 2.537363 NaN\n","\n","[11 rows x 14 columns]\n","class distribution stabf\n","unstable 3828\n","stable 2172\n","dtype: int64\n","===============================================================\n","valid\n","size: 6000\n"," tau1 tau2 ... stab stabf\n","count 6000.000000 6000.000000 ... 6000.000000 6000\n","unique NaN NaN ... NaN 2\n","top NaN NaN ... NaN unstable\n","freq NaN NaN ... NaN 3828\n","mean -0.011551 -0.004364 ... 0.003435 NaN\n","std 0.993842 1.005519 ... 1.004786 NaN\n","min -1.731763 -1.731999 ... -2.613709 NaN\n","25% -0.874471 -0.887753 ... -0.844789 NaN\n","50% -0.017244 0.017840 ... 0.039665 NaN\n","75% 0.825347 0.868048 ... 0.787678 NaN\n","max 1.731859 1.731991 ... 2.537363 NaN\n","\n","[11 rows x 14 columns]\n","class distribution stabf\n","unstable 3828\n","stable 2172\n","dtype: int64\n","===============================================================\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"66jPkCNlVGK9"},"source":["# script for lab IUM-03"]},{"cell_type":"markdown","metadata":{"id":"SRF-igrsma-A"},"source":["download data"]},{"cell_type":"code","metadata":{"id":"IkZTO5PhVB7R","executionInfo":{"status":"ok","timestamp":1616926577004,"user_tz":-120,"elapsed":1881,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}}},"source":["!kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1\n","!unzip smart-grid-stability.zip >>/dev/null 2>&1"],"execution_count":3,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ijEeDXgpLYqk"},"source":["check how many data entries is in the dataset"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"p9ONnUUIW27z","executionInfo":{"status":"ok","timestamp":1616926972602,"user_tz":-120,"elapsed":732,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"cb078dbb-daa0-4ae6-ec08-9b84141c8507"},"source":["!wc -l smart_grid_stability_augmented.csv"],"execution_count":7,"outputs":[{"output_type":"stream","text":["60001 smart_grid_stability_augmented.csv\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"jQw4a7ZjMmXr"},"source":["take a look at the dataset to choose columns to keep"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":226},"id":"82msg0JoVZiK","executionInfo":{"status":"ok","timestamp":1616926656293,"user_tz":-120,"elapsed":1067,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"9c0243dd-f21d-4453-dd56-7410c4391cdd"},"source":["import pandas as pd\n","df = pd.read_csv('smart_grid_stability_augmented.csv')\n","df.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
tau1tau2tau3tau4p1p2p3p4g1g2g3g4stabstabf
02.9590603.0798858.3810259.7807543.763085-0.782604-1.257395-1.7230860.6504560.8595780.8874450.9580340.055347unstable
19.3040974.9025243.0475411.3693575.067812-1.940058-1.872742-1.2550120.4134410.8624140.5621390.781760-0.005957stable
28.9717078.8484283.0464791.2145183.405158-1.207456-1.277210-0.9204920.1630410.7666890.8394440.1098530.003471unstable
30.7164157.6696004.4866412.3405633.963791-1.027473-1.938944-0.9973740.4462090.9767440.9293810.3627180.028871unstable
43.1341127.6087724.9437599.8575733.525811-1.125531-1.845975-0.5543050.7971100.4554500.6569470.8209230.049860unstable
\n","
"],"text/plain":[" tau1 tau2 tau3 ... g4 stab stabf\n","0 2.959060 3.079885 8.381025 ... 0.958034 0.055347 unstable\n","1 9.304097 4.902524 3.047541 ... 0.781760 -0.005957 stable\n","2 8.971707 8.848428 3.046479 ... 0.109853 0.003471 unstable\n","3 0.716415 7.669600 4.486641 ... 0.362718 0.028871 unstable\n","4 3.134112 7.608772 4.943759 ... 0.820923 0.049860 unstable\n","\n","[5 rows x 14 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"markdown","metadata":{"id":"KHBmXrjALz0W"},"source":["discard some of the columns; shuffle the data; divide into train, test and validations subsets and print number of rows of the subsets"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tJo2DytZVyOo","executionInfo":{"status":"ok","timestamp":1616931887367,"user_tz":-120,"elapsed":2363,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"e61890d6-3f2f-4008-a83d-b7272aeecd11"},"source":["!sed 1d smart_grid_stability_augmented.csv | cut -f 1,5,9,13,14 -d \",\" | shuf | split -l 48000\n","!mv xaa train.csv\n","!mv xab toDivide\n","!split -l 6000 toDivide\n","!mv xaa test.csv\n","!mv xab valid.csv\n","!wc -l train.csv\n","!wc -l test.csv\n","!wc -l valid.csv\n"],"execution_count":34,"outputs":[{"output_type":"stream","text":["48000 train.csv\n","6000 test.csv\n","6000 valid.csv\n"],"name":"stdout"}]}]} \ No newline at end of file