Merge remote-tracking branch 'origin/master'
Some checks failed
s430705-training/pipeline/head There was a failure building this commit
Some checks failed
s430705-training/pipeline/head There was a failure building this commit
This commit is contained in:
commit
f2f21bc120
7
Jenkinsfile
vendored
7
Jenkinsfile
vendored
@ -26,7 +26,8 @@ pipeline{
|
|||||||
steps{
|
steps{
|
||||||
withEnv(["CUTOFF=${params.CUTOFF}"]) {
|
withEnv(["CUTOFF=${params.CUTOFF}"]) {
|
||||||
sh "chmod 777 ./script.sh"
|
sh "chmod 777 ./script.sh"
|
||||||
sh "./script.sh"
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
stage('Archive artifacts'){
|
stage('Archive artifacts'){
|
||||||
@ -34,7 +35,7 @@ pipeline{
|
|||||||
archiveArtifacts 'test.csv'
|
archiveArtifacts 'test.csv'
|
||||||
archiveArtifacts 'dev.csv'
|
archiveArtifacts 'dev.csv'
|
||||||
archiveArtifacts 'train.csv'
|
archiveArtifacts 'train.csv'
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}}
|
||||||
}
|
|
33
script2.py
33
script2.py
@ -21,15 +21,26 @@ movies_data.dropna(inplace=True)
|
|||||||
|
|
||||||
# Remove not interesting columns
|
# Remove not interesting columns
|
||||||
drop_columns = ["title_id", "certificate", "title", "plot"]
|
drop_columns = ["title_id", "certificate", "title", "plot"]
|
||||||
|
drop_columns2 = [
|
||||||
|
"original_title",
|
||||||
|
"countries",
|
||||||
|
"genres",
|
||||||
|
"director",
|
||||||
|
"cast",
|
||||||
|
"release_date",
|
||||||
|
|
||||||
|
]
|
||||||
|
drop_columns = drop_columns + drop_columns2
|
||||||
|
|
||||||
movies_data.drop(labels=drop_columns, axis=1, inplace=True)
|
movies_data.drop(labels=drop_columns, axis=1, inplace=True)
|
||||||
|
|
||||||
# Normalize data, lowercase str
|
# Normalize data, lowercase str
|
||||||
for column_name in ["original_title", "countries", "genres", "director", "cast"]:
|
# for column_name in ["original_title", "countries", "genres", "director", "cast"]:
|
||||||
movies_data[column_name] = (
|
# movies_data[column_name] = (
|
||||||
movies_data[column_name]
|
# movies_data[column_name]
|
||||||
.str.translate(str.maketrans("", "", string.punctuation))
|
# .str.translate(str.maketrans("", "", string.punctuation))
|
||||||
.str.lower()
|
# .str.lower()
|
||||||
)
|
# )
|
||||||
|
|
||||||
# Remove ',' from votes number and change type to int
|
# Remove ',' from votes number and change type to int
|
||||||
movies_data["votes_number"] = (movies_data["votes_number"].str.replace(",", "")).astype(
|
movies_data["votes_number"] = (movies_data["votes_number"].str.replace(",", "")).astype(
|
||||||
@ -42,17 +53,9 @@ movies_data[["votes_number", "year", "runtime"]] = scaler.fit_transform(
|
|||||||
movies_data[["votes_number", "year", "runtime"]]
|
movies_data[["votes_number", "year", "runtime"]]
|
||||||
)
|
)
|
||||||
|
|
||||||
drop_columns = [
|
|
||||||
"original_title",
|
|
||||||
"countries",
|
|
||||||
"genres",
|
|
||||||
"director",
|
|
||||||
"cast",
|
|
||||||
"release_date",
|
|
||||||
|
|
||||||
]
|
|
||||||
|
|
||||||
movies_data.drop(labels=drop_columns, axis=1, inplace=True)
|
#movies_data.drop(labels=drop_columns, axis=1, inplace=True)
|
||||||
|
|
||||||
# Split set to train/dev/test 6:2:2 ratio and save to .csv file
|
# Split set to train/dev/test 6:2:2 ratio and save to .csv file
|
||||||
train, dev = train_test_split(movies_data, train_size=0.6, test_size=0.4, shuffle=True)
|
train, dev = train_test_split(movies_data, train_size=0.6, test_size=0.4, shuffle=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user