zadania z dvc

This commit is contained in:
s434695 2021-06-11 23:09:34 +02:00
parent 986ce24801
commit c7a114129d
8 changed files with 117 additions and 0 deletions

View File

@ -0,0 +1,4 @@
[core]
remote = ium_ssh_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
/vgsales.csv
/10_x.csv
/10_y.csv
/vgsales_model_dvc.h5

12
DVC_prepare.py Normal file
View File

@ -0,0 +1,12 @@
import pandas as pd
vgsales=pd.read_csv('vgsales.csv')
vgsales['Nintendo'] = vgsales['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
Y = vgsales['Nintendo']
X = vgsales.drop(['Rank','Name','Platform','Year','Genre','Publisher','Nintendo'],axis = 1)
X.to_csv(r'10_x.csv', index=False)
Y.to_csv(r'10_y.csv', index=False)

42
DVC_train.py Normal file
View File

@ -0,0 +1,42 @@
import sys
from tensorflow.keras.backend import batch_dot, mean
import pandas as pd
import numpy as np
from six import int2byte
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
X=pd.read_csv('10_x.csv')
Y=pd.read_csv('10_y.csv')
X_train, X_test, y_train, y_test = train_test_split(X,Y , test_size=0.2,train_size=0.8, random_state=21)
model = Sequential()
model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(Dense(1,kernel_initializer='normal', activation='sigmoid'))
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=15, batch_size=16, validation_data=(X_test, y_test))
prediction = model.predict(X_test)
rmse = mean_squared_error(y_test, prediction)
model.save('vgsales_model_dvc.h5')

34
dvc.lock Normal file
View File

@ -0,0 +1,34 @@
schema: '2.0'
stages:
prepare:
cmd: python3 DVC_prepare.py
deps:
- path: DVC_prepare.py
md5: 4a18290ac1a5c889a63ff2dee652dcef
size: 331
- path: vgsales.csv
md5: 67fa6f5efdc100db4586aa80556e8620
size: 1355781
outs:
- path: 10_x.csv
md5: ac06118118e3e8dd519820c78d1ec868
size: 384424
- path: 10_y.csv
md5: e1543e161f0da6d5887c8c7baf0210c7
size: 33205
train:
cmd: python3 DVC_train.py
deps:
- path: 10_x.csv
md5: ac06118118e3e8dd519820c78d1ec868
size: 384424
- path: 10_y.csv
md5: e1543e161f0da6d5887c8c7baf0210c7
size: 33205
- path: DVC_train.py
md5: 5650806d29bdf745ee046411d3b75f1e
size: 1305
outs:
- path: vgsales_model_dvc.h5
md5: a2c4dae4a46d3554654cee5b4e4c42c7
size: 25512

17
dvc.yaml Normal file
View File

@ -0,0 +1,17 @@
stages:
prepare:
cmd: python3 DVC_prepare.py
deps:
- DVC_prepare.py
- vgsales.csv
outs:
- 10_x.csv
- 10_y.csv
train:
cmd: python3 DVC_train.py
deps:
- 10_x.csv
- 10_y.csv
- DVC_train.py
outs:
- vgsales_model_dvc.h5

4
vgsales.csv.dvc Normal file
View File

@ -0,0 +1,4 @@
outs:
- md5: 67fa6f5efdc100db4586aa80556e8620
size: 1355781
path: vgsales.csv

Binary file not shown.