{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ium01.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true,"mount_file_id":"1Z43Re5xIaiFOO8c1uCDSbP5Xf4BxmRqM","authorship_tag":"ABX9TyPFtl4TgeF3QUTlnbPWnS29"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","metadata":{"id":"zn8GQjYWnbcX"},"source":["# Notebook for first substask of Inżynieria Uczenia Maszynowego class project.\n","This workbook downloads, normalizes and prints short summary of the dataset I will be working on and its subsets.\n","\n","Link to the dataset at Kaggle.com:\n","\n","https://www.kaggle.com/pcbreviglieri/smart-grid-stability"]},{"cell_type":"markdown","metadata":{"id":"Omh9bzNn7s0Z"},"source":["#### google colab related stuff"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Z14xGWuJnWwq","executionInfo":{"status":"ok","timestamp":1616345223048,"user_tz":-60,"elapsed":21202,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"d221b1c6-8331-4124-f2f2-52cfbaeb3283"},"source":["from google.colab import drive\n","drive.mount('drive')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /gdrive\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mROvxIELsVv1"},"source":["* Click in Colab GUI to allow Colab access and modify Google Drive files"]},{"cell_type":"code","metadata":{"id":"hVfCOcburj5P"},"source":["!mkdir ~/.kaggle\n","!cp drive/MyDrive/kaggle.json ~/.kaggle/.\n","!chmod +x ~/.kaggle/kaggle.json\n","!pip install -q kaggle"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"EYeZaE3Cxf5i"},"source":["# script"]},{"cell_type":"markdown","metadata":{"id":"SRF-igrsma-A"},"source":["download data"]},{"cell_type":"code","metadata":{"id":"3UjQJzTawfKH"},"source":["!kaggle datasets download -d 'pcbreviglieri/smart-grid-stability' >>/dev/null 2>&1\n","!unzip smart-grid-stability.zip >>/dev/null 2>&1"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"mkK6wZ2zmhdQ"},"source":["read the data as pandas data frame"]},{"cell_type":"code","metadata":{"id":"JcPbvjeixwQa"},"source":["import pandas as pd\n","\n","df = pd.read_csv('smart_grid_stability_augmented.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"x81Ip-6fmnfr"},"source":["normalize values, so they are all between 0 and 1 (included)"]},{"cell_type":"code","metadata":{"id":"7QZX5c2ZMpTj"},"source":["from sklearn import preprocessing\n","\n","min_max_scaler = preprocessing.MinMaxScaler()\n","df_norm_array = min_max_scaler.fit_transform(df.iloc[:,0:-1])\n","df_norm = pd.DataFrame(data=df_norm_array,\n"," columns=df.columns[:-1])\n","df_norm['stabf'] = df['stabf']"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hjAT_K-Cmzhq"},"source":["divide the data into train, test and validation subsets"]},{"cell_type":"code","metadata":{"id":"MvI7kiL0UPc8"},"source":["from sklearn.model_selection import train_test_split\n","\n","train, testAndValid = train_test_split(\n"," df_norm,\n"," test_size=0.2,\n"," random_state=42,\n"," stratify=df_norm['stabf'])\n","\n","test, valid =train_test_split(\n"," testAndValid,\n"," test_size=0.5,\n"," random_state=42,\n"," stratify=testAndValid['stabf'])"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FdUL87MgnE2G"},"source":["print short summary of the dataset and its subsets"]},{"cell_type":"code","metadata":{"id":"WUrX63SGcHSB","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1616349373305,"user_tz":-60,"elapsed":1041,"user":{"displayName":"jadenadjezioro","photoUrl":"","userId":"13576387580000290170"}},"outputId":"e87f1c78-fa00-42a4-c2a7-a54ce1d0ae5f"},"source":["def namestr(obj, namespace):\n"," return [name for name in namespace if namespace[name] is obj]\n","\n","dataset = df_norm\n","for x in [dat