Merge remote-tracking branch 'origin/master'

2021-05-10 15:35:09 +02:00 · 2021-05-10 15:35:09 +02:00 · f2f21bc120
commit f2f21bc120
parent c03f0f9ed7 f594d97c59
2 changed files with 22 additions and 18 deletions
--- a/7
+++ b/7
@ -26,7 +26,8 @@ pipeline{
            steps{
            withEnv(["CUTOFF=${params.CUTOFF}"]) {
                sh "chmod 777 ./script.sh"
-                sh "./script.sh"
+
+
            }
        }}
        stage('Archive artifacts'){
@ -34,7 +35,7 @@ pipeline{
                archiveArtifacts 'test.csv'
                archiveArtifacts 'dev.csv'
                archiveArtifacts 'train.csv'
+
        }
    }
-    }
-}
+}}
--- a/script2.py
+++ b/script2.py
@ -21,15 +21,26 @@ movies_data.dropna(inplace=True)

 # Remove not interesting columns
 drop_columns = ["title_id", "certificate", "title", "plot"]
+drop_columns2 = [
+    "original_title",
+    "countries",
+    "genres",
+    "director",
+    "cast",
+    "release_date",
+
+]
+drop_columns = drop_columns + drop_columns2
+
 movies_data.drop(labels=drop_columns, axis=1, inplace=True)

 # Normalize data, lowercase str
-for column_name in ["original_title", "countries", "genres", "director", "cast"]:
-    movies_data[column_name] = (
-        movies_data[column_name]
-        .str.translate(str.maketrans("", "", string.punctuation))
-        .str.lower()
-    )
+# for column_name in ["original_title", "countries", "genres", "director", "cast"]:
+#     movies_data[column_name] = (
+#         movies_data[column_name]
+#         .str.translate(str.maketrans("", "", string.punctuation))
+#         .str.lower()
+#     )

 # Remove ',' from votes number and change type to int
 movies_data["votes_number"] = (movies_data["votes_number"].str.replace(",", "")).astype(
@ -42,17 +53,9 @@ movies_data[["votes_number", "year", "runtime"]] = scaler.fit_transform(
    movies_data[["votes_number", "year", "runtime"]]
 )

-drop_columns = [
-    "original_title",
-    "countries",
-    "genres",
-    "director",
-    "cast",
-    "release_date",

-]

-movies_data.drop(labels=drop_columns, axis=1, inplace=True)
+#movies_data.drop(labels=drop_columns, axis=1, inplace=True)

 # Split set to train/dev/test 6:2:2 ratio and save to .csv file
 train, dev = train_test_split(movies_data, train_size=0.6, test_size=0.4, shuffle=True)