{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ium01.ipynb","provenance":[],"collapsed_sections":["EYeZaE3Cxf5i"],"toc_visible":true,"mount_file_id":"1Z43Re5xIaiFOO8c1uCDSbP5Xf4BxmRqM","authorship_tag":"ABX9TyPMsVZZ+JFELlXyuCddiOXa"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zn8GQjYWnbcX"},"source":["# Notebook for first substask of Inżynieria Uczenia Maszynowego class project.\n","This workbook downloads, normalizes and prints short summary of the dataset I will be working on and its subsets.\n","\n","Link to the dataset at Kaggle.com:\n","\n","https://www.kaggle.com/pcbreviglieri/smart-grid-stability"]},{"cell_type":"markdown","metadata":{"id":"Omh9bzNn7s0Z"},"source":["#### google colab related stuff"]},{"cell_type":"code","metadata":{"id":"Z14xGWuJnWwq","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616926547933,"user_tz":-120,"elapsed":1241,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"9cc0275b-0883-4494-97a7-29c5054727da"},"source":["from google.colab import drive\n","drive.mount('drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Drive already mounted at drive; to attempt to forcibly remount, call drive.mount(\"drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mROvxIELsVv1"},"source":["* Click in Colab GUI to allow Colab access and modify Google Drive files"]},{"cell_type":"code","metadata":{"id":"hVfCOcburj5P","executionInfo":{"status":"ok","timestamp":1616926567889,"user_tz":-120,"elapsed":4654,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}}},"source":["!mkdir ~/.kaggle\n","!cp drive/MyDrive/kaggle.json ~/.kaggle/.\n","!chmod +x ~/.kaggle/kaggle.json\n","!pip install -q kaggle"],"execution_count":2,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"EYeZaE3Cxf5i"},"source":["# script for lab IUM-01"]},{"cell_type":"markdown","metadata":{"id":"XspjcqV4U9tb"},"source":["download data"]},{"cell_type":"code","metadata":{"id":"3UjQJzTawfKH"},"source":["!kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1\n","!unzip smart-grid-stability.zip >>/dev/null 2>&1"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mkK6wZ2zmhdQ"},"source":["read the data as pandas data frame"]},{"cell_type":"code","metadata":{"id":"JcPbvjeixwQa"},"source":["import pandas as pd\n","\n","df = pd.read_csv('smart_grid_stability_augmented.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"x81Ip-6fmnfr"},"source":["normalize values, so they are all between 0 and 1 (included)"]},{"cell_type":"code","metadata":{"id":"7QZX5c2ZMpTj"},"source":["from sklearn import preprocessing\n","\n","scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])\n","df_norm_array = scaler.transform(df.iloc[:, 0:-1])\n","df_norm = pd.DataFrame(data=df_norm_array,\n"," columns=df.columns[:-1])\n","df_norm['stabf'] = df['stabf']"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hjAT_K-Cmzhq"},"source":["divide the data into train, test and validation subsets"]},{"cell_type":"code","metadata":{"id":"MvI7kiL0UPc8"},"source":["from sklearn.model_selection import train_test_split\n","\n","train, testAndValid = train_test_split(\n"," df_norm,\n"," test_size=0.2,\n"," random_state=42,\n"," stratify=df_norm['stabf'])\n","\n","test, valid =train_test_split(\n"," testAndValid,\n"," test_size=0.5,\n"," random_state=42,\n"," stratify=testAndValid['stabf'])"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FdUL87MgnE2G"},"source":["print short summary of the dataset and its subsets"]},{"cell_type":"code","metadata":{"id":"WUrX63SGcHSB","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616369421560,"user_tz":-60,"elapsed"