This commit is contained in:
michalzareba 2021-05-07 22:36:28 +02:00
parent 755bc2b67a
commit 87855b69ea
5 changed files with 92 additions and 23010 deletions

27
Jenkinsfile_training Normal file
View File

@ -0,0 +1,27 @@
pipeline {
agent {dockerfile true}
parameters {
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying artifacts',
name: 'BUILD_SELECTOR')
}
stages {
stage('copyArtifacts') {
steps {
copyArtifacts fingerprintArtifacts: true, projectName: 's430705-create-dataset', selector: buildParameter('BUILD_SELECTOR')
}
}
stage('Sh script') {
steps {
sh 'python3 lab06_training.py ${params.LEARNING_RATE}'
}
}
stage('Archive artifacts') {
steps{
archiveArtifacts artifacts: 'model_movies'
}
}
}
}

File diff suppressed because one or more lines are too long

50
lab06_training.py Normal file
View File

@ -0,0 +1,50 @@
import string
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import wget
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error
movies_data = pd.read_csv('train.csv')
movies_data.drop(movies_data.columns[0], axis=1, inplace=True)
movies_data.dropna(inplace=True)
X = movies_data.drop("rating", axis=1)
Y = movies_data["rating"]
# Split set to train/test 8:2 ratio
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=0.2, random_state=42
)
# Set up model
model = Sequential()
model.add(Dense(8, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(3, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
model.fit(
x=X_train,
y=Y_train.values,
validation_data=(X_test, Y_test.values),
batch_size=128,
epochs=400,
callbacks=[early_stop],
)
model.save('model_movies')

View File

@ -37,10 +37,22 @@ movies_data["votes_number"] = (movies_data["votes_number"].str.replace(",", ""))
# Normalize number values # Normalize number values
scaler = preprocessing.MinMaxScaler() scaler = preprocessing.MinMaxScaler()
movies_data[["rating", "votes_number", "year", "runtime"]] = scaler.fit_transform( movies_data[["votes_number", "year", "runtime"]] = scaler.fit_transform(
movies_data[["rating", "votes_number", "year", "runtime"]] movies_data[["votes_number", "year", "runtime"]]
) )
drop_columns = [
"original_title",
"countries",
"genres",
"director",
"cast",
"release_date",
]
movies_data.drop(labels=drop_columns, axis=1, inplace=True)
# Split set to train/dev/test 6:2:2 ratio and save to .csv file # Split set to train/dev/test 6:2:2 ratio and save to .csv file
train, dev = train_test_split(movies_data, train_size=0.6, test_size=0.4, shuffle=True) train, dev = train_test_split(movies_data, train_size=0.6, test_size=0.4, shuffle=True)
dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=True) dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=True)