Use cutoff
This commit is contained in:
parent
164c0c8ab3
commit
73b08f5234
2
create-dataset/Jenkinsfile
vendored
2
create-dataset/Jenkinsfile
vendored
@ -10,7 +10,7 @@ pipeline {
|
||||
properties([
|
||||
parameters([
|
||||
string(
|
||||
defaultValue: '1000',
|
||||
defaultValue: '0',
|
||||
description: 'Maximum number of rows',
|
||||
name: 'CUTOFF',
|
||||
trim: false
|
||||
|
@ -8,21 +8,21 @@ from datasets import load_dataset
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
|
||||
|
||||
# Read CUTOFF
|
||||
|
||||
cutoff = int(sys.argv[1])
|
||||
print(cutoff)
|
||||
|
||||
|
||||
# Load dataset
|
||||
|
||||
data = load_dataset('mstz/spambase')
|
||||
data = pd.DataFrame(data['train'])
|
||||
|
||||
|
||||
# Read CUTOFF
|
||||
|
||||
cutoff = int(sys.argv[1])
|
||||
cutoff = cutoff if cutoff > 0 else len(data)
|
||||
|
||||
|
||||
# Shuffle data
|
||||
|
||||
data = data.sample(frac=1)
|
||||
data = data.sample(frac=1).head(cutoff)
|
||||
|
||||
|
||||
# Split data
|
||||
|
Loading…
Reference in New Issue
Block a user