Use cutoff

This commit is contained in:
Jan Świątek 2023-04-19 20:33:37 +02:00
parent 164c0c8ab3
commit 73b08f5234
2 changed files with 8 additions and 8 deletions

View File

@ -10,7 +10,7 @@ pipeline {
properties([
parameters([
string(
defaultValue: '1000',
defaultValue: '0',
description: 'Maximum number of rows',
name: 'CUTOFF',
trim: false

View File

@ -8,21 +8,21 @@ from datasets import load_dataset
from sklearn.preprocessing import MinMaxScaler
# Read CUTOFF
cutoff = int(sys.argv[1])
print(cutoff)
# Load dataset
data = load_dataset('mstz/spambase')
data = pd.DataFrame(data['train'])
# Read CUTOFF
cutoff = int(sys.argv[1])
cutoff = cutoff if cutoff > 0 else len(data)
# Shuffle data
data = data.sample(frac=1)
data = data.sample(frac=1).head(cutoff)
# Split data