diff --git a/.dvc/config b/.dvc/config index e69de29..c02d6a2 100644 --- a/.dvc/config +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = ium_ssh_remote +['remote "ium_ssh_remote"'] + url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d5caba4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/vgsales.csv +/10_x.csv +/10_y.csv +/vgsales_model_dvc.h5 diff --git a/DVC_prepare.py b/DVC_prepare.py new file mode 100644 index 0000000..71ba270 --- /dev/null +++ b/DVC_prepare.py @@ -0,0 +1,12 @@ +import pandas as pd + +vgsales=pd.read_csv('vgsales.csv') + +vgsales['Nintendo'] = vgsales['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) + +Y = vgsales['Nintendo'] +X = vgsales.drop(['Rank','Name','Platform','Year','Genre','Publisher','Nintendo'],axis = 1) + + +X.to_csv(r'10_x.csv', index=False) +Y.to_csv(r'10_y.csv', index=False) \ No newline at end of file diff --git a/DVC_train.py b/DVC_train.py new file mode 100644 index 0000000..275bf93 --- /dev/null +++ b/DVC_train.py @@ -0,0 +1,42 @@ +import sys +from tensorflow.keras.backend import batch_dot, mean +import pandas as pd +import numpy as np +from six import int2byte +from sklearn import preprocessing +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.models import Sequential + + +X=pd.read_csv('10_x.csv') +Y=pd.read_csv('10_y.csv') + +X_train, X_test, y_train, y_test = train_test_split(X,Y , test_size=0.2,train_size=0.8, random_state=21) + +model = Sequential() +model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu')) +model.add(Dense(1,kernel_initializer='normal', activation='sigmoid')) + +early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) + + +model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + + +model.fit(X_train, y_train, epochs=15, batch_size=16, validation_data=(X_test, y_test)) + + +prediction = model.predict(X_test) + + +rmse = mean_squared_error(y_test, prediction) + + +model.save('vgsales_model_dvc.h5') \ No newline at end of file diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 0000000..8d83ed8 --- /dev/null +++ b/dvc.lock @@ -0,0 +1,34 @@ +schema: '2.0' +stages: + prepare: + cmd: python3 DVC_prepare.py + deps: + - path: DVC_prepare.py + md5: 4a18290ac1a5c889a63ff2dee652dcef + size: 331 + - path: vgsales.csv + md5: 67fa6f5efdc100db4586aa80556e8620 + size: 1355781 + outs: + - path: 10_x.csv + md5: ac06118118e3e8dd519820c78d1ec868 + size: 384424 + - path: 10_y.csv + md5: e1543e161f0da6d5887c8c7baf0210c7 + size: 33205 + train: + cmd: python3 DVC_train.py + deps: + - path: 10_x.csv + md5: ac06118118e3e8dd519820c78d1ec868 + size: 384424 + - path: 10_y.csv + md5: e1543e161f0da6d5887c8c7baf0210c7 + size: 33205 + - path: DVC_train.py + md5: 5650806d29bdf745ee046411d3b75f1e + size: 1305 + outs: + - path: vgsales_model_dvc.h5 + md5: a2c4dae4a46d3554654cee5b4e4c42c7 + size: 25512 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..0f93ce3 --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,17 @@ +stages: + prepare: + cmd: python3 DVC_prepare.py + deps: + - DVC_prepare.py + - vgsales.csv + outs: + - 10_x.csv + - 10_y.csv + train: + cmd: python3 DVC_train.py + deps: + - 10_x.csv + - 10_y.csv + - DVC_train.py + outs: + - vgsales_model_dvc.h5 diff --git a/vgsales.csv.dvc b/vgsales.csv.dvc new file mode 100644 index 0000000..638c7b5 --- /dev/null +++ b/vgsales.csv.dvc @@ -0,0 +1,4 @@ +outs: +- md5: 67fa6f5efdc100db4586aa80556e8620 + size: 1355781 + path: vgsales.csv diff --git a/vgsales_model.h5 b/vgsales_model.h5 index 63aca41..eb3e11c 100644 Binary files a/vgsales_model.h5 and b/vgsales_model.h5 differ