diff --git a/Jenkinsfile b/Jenkinsfile index 197a084..1dbd804 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -29,7 +29,7 @@ pipeline { } stage('archiveArtifacts') { steps{ - archiveArtifacts 'model1.h5' + archiveArtifacts 'vgsales_model.h5' } } } diff --git a/train.py b/train.py index 9c43cd7..c579a74 100755 --- a/train.py +++ b/train.py @@ -1,42 +1,48 @@ -#! /usr/bin/python3 -from tensorflow.keras.models import Sequential, load_model -from tensorflow.keras.layers import Dense -from sklearn.metrics import accuracy_score, classification_report +import sys import pandas as pd -from sklearn.model_selection import train_test_split import numpy as np -import requests -url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv' -r = requests.get(url, allow_redirects=True) +from sklearn import preprocessing +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.models import Sequential -open('vgsales.csv', 'wb').write(r.content) -df = pd.read_csv('vgsales.csv') +# odczytanie danych z plików +vgsales_train = pd.read_csv('train.csv') +vgsales_test = pd.read_csv('test.csv') +vgsales_dev = pd.read_csv('dev.csv') +vgsales_train['Nintendo'] = vgsales_train['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) +vgsales_test['Nintendo'] = vgsales_test['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) +vgsales_dev['Nintendo'] = vgsales_dev['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) +# podzial na X i y +X_train = vgsales_train.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) +y_train = vgsales_train[['Nintendo']] +X_test = vgsales_test.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) +y_test = vgsales_test[['Nintendo']] -def regression_model(): - model = Sequential() - model.add(Dense(16,activation = "relu", input_shape = (x_train.shape[1],))) - model.add(Dense(32,activation = "relu")) - model.add(Dense(1,activation = "relu")) - - model.compile(optimizer = "adam", loss = "mean_squared_error") - return model +print(X_train.shape[1]) +# keras model +model = Sequential() +model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu')) +model.add(Dense(1,kernel_initializer='normal', activation='sigmoid')) -df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) -df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) -df +early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) -y = df.Nintendo +# kompilacja +model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) -df=((df-df.min())/(df.max()-df.min())) +# model fit +epochs = int(sys.argv[1]) +batch_size = int(sys.argv[2]) -x = df.drop(['Nintendo'],axis = 1) +# trenowanie modelu +model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test)) -x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21) - -model = regression_model() -model.fit(x_train, y_train, epochs = 600, verbose = 1) - -y_pred = model.predict(x_test) -model.save('model1.h5') \ No newline at end of file +# zapisanie modelu +model.save('vgsales_model.h5') diff --git a/train2.py b/train2.py new file mode 100644 index 0000000..c579a74 --- /dev/null +++ b/train2.py @@ -0,0 +1,48 @@ +import sys +import pandas as pd +import numpy as np +from sklearn import preprocessing +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.models import Sequential + +# odczytanie danych z plików +vgsales_train = pd.read_csv('train.csv') +vgsales_test = pd.read_csv('test.csv') +vgsales_dev = pd.read_csv('dev.csv') + +vgsales_train['Nintendo'] = vgsales_train['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) +vgsales_test['Nintendo'] = vgsales_test['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) +vgsales_dev['Nintendo'] = vgsales_dev['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) + +# podzial na X i y +X_train = vgsales_train.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) +y_train = vgsales_train[['Nintendo']] +X_test = vgsales_test.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) +y_test = vgsales_test[['Nintendo']] + +print(X_train.shape[1]) +# keras model +model = Sequential() +model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu')) +model.add(Dense(1,kernel_initializer='normal', activation='sigmoid')) + +early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) + +# kompilacja +model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + +# model fit +epochs = int(sys.argv[1]) +batch_size = int(sys.argv[2]) + +# trenowanie modelu +model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test)) + +# zapisanie modelu +model.save('vgsales_model.h5')