lab06_01
This commit is contained in:
parent
755bc2b67a
commit
87855b69ea
27
Jenkinsfile_training
Normal file
27
Jenkinsfile_training
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {dockerfile true}
|
||||||
|
parameters {
|
||||||
|
buildSelector(
|
||||||
|
defaultSelector: lastSuccessful(),
|
||||||
|
description: 'Which build to use for copying artifacts',
|
||||||
|
name: 'BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('copyArtifacts') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts fingerprintArtifacts: true, projectName: 's430705-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Sh script') {
|
||||||
|
steps {
|
||||||
|
sh 'python3 lab06_training.py ${params.LEARNING_RATE}'
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Archive artifacts') {
|
||||||
|
steps{
|
||||||
|
archiveArtifacts artifacts: 'model_movies'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
23007
imdb_movies.csv
23007
imdb_movies.csv
File diff suppressed because one or more lines are too long
50
lab06_training.py
Normal file
50
lab06_training.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import string
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn import preprocessing
|
||||||
|
import wget
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from tensorflow.keras.models import Sequential
|
||||||
|
from tensorflow.keras.layers import Dense
|
||||||
|
from tensorflow.keras.optimizers import Adam
|
||||||
|
from tensorflow.keras.layers import Dropout
|
||||||
|
from tensorflow.keras.callbacks import EarlyStopping
|
||||||
|
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
||||||
|
|
||||||
|
|
||||||
|
movies_data = pd.read_csv('train.csv')
|
||||||
|
movies_data.drop(movies_data.columns[0], axis=1, inplace=True)
|
||||||
|
movies_data.dropna(inplace=True)
|
||||||
|
X = movies_data.drop("rating", axis=1)
|
||||||
|
Y = movies_data["rating"]
|
||||||
|
|
||||||
|
|
||||||
|
# Split set to train/test 8:2 ratio
|
||||||
|
X_train, X_test, Y_train, Y_test = train_test_split(
|
||||||
|
X, Y, test_size=0.2, random_state=42
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set up model
|
||||||
|
model = Sequential()
|
||||||
|
model.add(Dense(8, activation="relu"))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(3, activation="relu"))
|
||||||
|
model.add(Dropout(0.5))
|
||||||
|
model.add(Dense(1))
|
||||||
|
model.compile(optimizer="adam", loss="mse")
|
||||||
|
|
||||||
|
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
|
||||||
|
|
||||||
|
|
||||||
|
model.fit(
|
||||||
|
x=X_train,
|
||||||
|
y=Y_train.values,
|
||||||
|
validation_data=(X_test, Y_test.values),
|
||||||
|
batch_size=128,
|
||||||
|
epochs=400,
|
||||||
|
callbacks=[early_stop],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
model.save('model_movies')
|
16
script2.py
16
script2.py
@ -37,10 +37,22 @@ movies_data["votes_number"] = (movies_data["votes_number"].str.replace(",", ""))
|
|||||||
|
|
||||||
# Normalize number values
|
# Normalize number values
|
||||||
scaler = preprocessing.MinMaxScaler()
|
scaler = preprocessing.MinMaxScaler()
|
||||||
movies_data[["rating", "votes_number", "year", "runtime"]] = scaler.fit_transform(
|
movies_data[["votes_number", "year", "runtime"]] = scaler.fit_transform(
|
||||||
movies_data[["rating", "votes_number", "year", "runtime"]]
|
movies_data[["votes_number", "year", "runtime"]]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
drop_columns = [
|
||||||
|
"original_title",
|
||||||
|
"countries",
|
||||||
|
"genres",
|
||||||
|
"director",
|
||||||
|
"cast",
|
||||||
|
"release_date",
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
movies_data.drop(labels=drop_columns, axis=1, inplace=True)
|
||||||
|
|
||||||
# Split set to train/dev/test 6:2:2 ratio and save to .csv file
|
# Split set to train/dev/test 6:2:2 ratio and save to .csv file
|
||||||
train, dev = train_test_split(movies_data, train_size=0.6, test_size=0.4, shuffle=True)
|
train, dev = train_test_split(movies_data, train_size=0.6, test_size=0.4, shuffle=True)
|
||||||
dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=True)
|
dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user