Use cutoff

2023-04-19 20:33:37 +02:00 · 2023-04-19 20:33:37 +02:00 · 73b08f5234
commit 73b08f5234
parent 164c0c8ab3
2 changed files with 8 additions and 8 deletions
--- a/create-dataset/Jenkinsfile
+++ b/create-dataset/Jenkinsfile
@ -10,7 +10,7 @@ pipeline {
                    properties([
                        parameters([
                            string(
-                              defaultValue: '1000',
+                              defaultValue: '0',
                              description: 'Maximum number of rows',
                              name: 'CUTOFF',
                              trim: false
--- a/create-dataset/main.py
+++ b/create-dataset/main.py
@ -8,21 +8,21 @@ from datasets import load_dataset
 from sklearn.preprocessing import MinMaxScaler


-# Read CUTOFF
-
-cutoff = int(sys.argv[1])
-print(cutoff)
-
-
 # Load dataset

 data = load_dataset('mstz/spambase')
 data = pd.DataFrame(data['train'])


+# Read CUTOFF
+
+cutoff = int(sys.argv[1])
+cutoff = cutoff if cutoff > 0 else len(data)
+
+
 # Shuffle data

-data = data.sample(frac=1)
+data = data.sample(frac=1).head(cutoff)


 # Split data