add code for Zadanie 1
This commit is contained in:
commit
13abcc4551
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
.idea
|
||||
*.iml
|
12
README.md
Normal file
12
README.md
Normal file
@ -0,0 +1,12 @@
|
||||
## Project for Inżynieria Uczenia Maszynowego class.
|
||||
|
||||
The scope of this project is to propose a classifier based on Smart Grid Stability dataset:
|
||||
https://www.kaggle.com/pcbreviglieri/smart-grid-stability
|
||||
while using proper ML tools in a correct way.
|
||||
|
||||
### Zadanie 1
|
||||
script.sh downloads and unzips the dataset and executes python_script.sh,
|
||||
which then normalizes the data, divides the dataset into train, test and validation subsets
|
||||
and prints a short summary of the dataset as well as its subsets.
|
||||
|
||||
ium01.ipynb is a notebook used to develop previously mentioned scripts.
|
1
ium01.ipynb
Normal file
1
ium01.ipynb
Normal file
File diff suppressed because one or more lines are too long
36
python_script.py
Normal file
36
python_script.py
Normal file
@ -0,0 +1,36 @@
|
||||
import pandas as pd
|
||||
|
||||
from sklearn import preprocessing
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
df = pd.read_csv('smart_grid_stability_augmented.csv')
|
||||
min_max_scaler = preprocessing.MinMaxScaler()
|
||||
df_norm_array = min_max_scaler.fit_transform(df.iloc[:, 0:-1])
|
||||
df_norm = pd.DataFrame(data=df_norm_array,
|
||||
columns=df.columns[:-1])
|
||||
df_norm['stabf'] = df['stabf']
|
||||
|
||||
train, testAndValid = train_test_split(
|
||||
df_norm,
|
||||
test_size=0.2,
|
||||
random_state=42,
|
||||
stratify=df_norm['stabf'])
|
||||
|
||||
test, valid = train_test_split(
|
||||
testAndValid,
|
||||
test_size=0.5,
|
||||
random_state=42,
|
||||
stratify=testAndValid['stabf'])
|
||||
|
||||
|
||||
def namestr(obj, namespace):
|
||||
return [name for name in namespace if namespace[name] is obj]
|
||||
|
||||
|
||||
dataset = df_norm
|
||||
for x in [dataset, train, test, valid]:
|
||||
print([q for q in namestr(x, globals()) if len(q) == max([len(w) for w in namestr(x, globals())])][-1])
|
||||
print("size:", len(x))
|
||||
print(x.describe(include='all'))
|
||||
print("class distribution", x.value_counts('stabf'))
|
Loading…
Reference in New Issue
Block a user