Prepare train and test sets
This commit is contained in:
parent
9eb59b3cee
commit
60dbcba650
@ -1,6 +1,6 @@
|
||||
pipeline {
|
||||
agent {
|
||||
docker {image 'agakul/ium:3.0'}
|
||||
docker {image 'agakul/ium:4.0'}
|
||||
}
|
||||
stages {
|
||||
stage('Check out from version control') {
|
||||
@ -10,7 +10,8 @@ pipeline {
|
||||
}
|
||||
stage('Shell Script') {
|
||||
steps {
|
||||
sh 'ipython ./preparation.py'
|
||||
sh 'ipython ./prepare_datasets.py'
|
||||
archiveArtifacts artifacts: 'X_train.csv, X_test.csv, y_train.csv, y_test.csv '
|
||||
}
|
||||
}
|
||||
}
|
||||
|
50
prepare_datasets.py
Normal file
50
prepare_datasets.py
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('unzip -o body-performance-data.zip')
|
||||
|
||||
|
||||
# In[4]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
# In[21]:
|
||||
|
||||
|
||||
df = pd.read_csv('bodyPerformance.csv')
|
||||
|
||||
|
||||
# In[22]:
|
||||
|
||||
|
||||
cols = ['gender', 'height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']
|
||||
df = df[cols]
|
||||
|
||||
# male - 0, female - 1
|
||||
df['gender'].replace({'M': 0, 'F': 1}, inplace = True)
|
||||
df = df.dropna(how='any')
|
||||
|
||||
|
||||
# In[23]:
|
||||
|
||||
|
||||
X = df[['height_cm', 'weight_kg', 'body fat_%', 'sit-ups counts', 'broad jump_cm']]
|
||||
y = df[['gender']]
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
|
||||
# In[24]:
|
||||
|
||||
|
||||
X_train.to_csv(r'X_train.csv', index=False)
|
||||
X_test.to_csv(r'X_test.csv', index=False)
|
||||
y_train.to_csv(r'y_train.csv', index=False)
|
||||
y_test.to_csv(r'y_test.csv', index=False)
|
||||
|
Loading…
Reference in New Issue
Block a user