4.3.1. v5
This commit is contained in:
parent
759ca86b6c
commit
f80e875766
@ -3,11 +3,4 @@ FROM ubuntu:latest
|
|||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y python3 python3-pip
|
apt-get install -y python3 python3-pip
|
||||||
|
|
||||||
RUN pip3 install pandas scikit-learn kaggle
|
RUN pip3 install pandas numpy
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY create-dataset.py /app
|
|
||||||
COPY data/barcelona_weekends.csv /app
|
|
||||||
|
|
||||||
CMD ["python3", "create-dataset.py"]
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent {
|
||||||
|
dockerfile true
|
||||||
|
}
|
||||||
parameters{
|
parameters{
|
||||||
string(
|
string(
|
||||||
defaultValue: 'piotrwrzodak',
|
defaultValue: 'piotrwrzodak',
|
||||||
@ -36,18 +38,9 @@ pipeline {
|
|||||||
sh 'kaggle datasets download -d thedevastator/airbnb-prices-in-european-cities'
|
sh 'kaggle datasets download -d thedevastator/airbnb-prices-in-european-cities'
|
||||||
sh 'unzip airbnb-prices-in-european-cities.zip -d data'
|
sh 'unzip airbnb-prices-in-european-cities.zip -d data'
|
||||||
sh 'ls'
|
sh 'ls'
|
||||||
}
|
sh 'python create-dataset.py'
|
||||||
}
|
archiveArtifacts artifacts: 'data/barcelona_weekends.train.csv, data/barcelona_weekends.dev.csv, data/barcelona_weekends.test.csv', fingerprint: true
|
||||||
}
|
}
|
||||||
stage('Docker') {
|
|
||||||
agent {
|
|
||||||
dockerfile {
|
|
||||||
filename 'Dockerfile'
|
|
||||||
reuseNode true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
steps {
|
|
||||||
archiveArtifacts artifacts: 'barcelona_weekends.train.csv, barcelona_weekends.dev.csv, barcelona_weekends.test.csv', fingerprint: true
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,16 +5,16 @@ import numpy as np
|
|||||||
|
|
||||||
cutoff = 10
|
cutoff = 10
|
||||||
|
|
||||||
data = pd.read_csv('./barcelona_weekends.csv')
|
data = pd.read_csv('./data/barcelona_weekends.csv')
|
||||||
data = data.sample(cutoff)
|
data = data.sample(cutoff)
|
||||||
data = data.iloc[:, 1:]
|
data = data.iloc[:, 1:]
|
||||||
|
|
||||||
train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42),
|
train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42),
|
||||||
[int(.6 * len(data)), int(.8 * len(data))])
|
[int(.6 * len(data)), int(.8 * len(data))])
|
||||||
|
|
||||||
train_set.to_csv('barcelona_weekends.train.csv', index=False)
|
train_set.to_csv('data/barcelona_weekends.train.csv', index=False)
|
||||||
dev_set.to_csv('barcelona_weekends.dev.csv', index=False)
|
dev_set.to_csv('data/barcelona_weekends.dev.csv', index=False)
|
||||||
test_set.to_csv('barcelona_weekends.test.csv', index=False)
|
test_set.to_csv('data/barcelona_weekends.test.csv', index=False)
|
||||||
|
|
||||||
|
|
||||||
check = pd.read_csv('./train.csv')
|
check = pd.read_csv('./train.csv')
|
||||||
|
Loading…
Reference in New Issue
Block a user