4.3.1. v5

This commit is contained in:
piotrwrzodak 2023-05-11 22:42:00 +02:00
parent 759ca86b6c
commit f80e875766
3 changed files with 10 additions and 24 deletions

View File

@ -3,11 +3,4 @@ FROM ubuntu:latest
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y python3 python3-pip apt-get install -y python3 python3-pip
RUN pip3 install pandas scikit-learn kaggle RUN pip3 install pandas numpy
WORKDIR /app
COPY create-dataset.py /app
COPY data/barcelona_weekends.csv /app
CMD ["python3", "create-dataset.py"]

View File

@ -1,5 +1,7 @@
pipeline { pipeline {
agent any agent {
dockerfile true
}
parameters{ parameters{
string( string(
defaultValue: 'piotrwrzodak', defaultValue: 'piotrwrzodak',
@ -36,19 +38,10 @@ pipeline {
sh 'kaggle datasets download -d thedevastator/airbnb-prices-in-european-cities' sh 'kaggle datasets download -d thedevastator/airbnb-prices-in-european-cities'
sh 'unzip airbnb-prices-in-european-cities.zip -d data' sh 'unzip airbnb-prices-in-european-cities.zip -d data'
sh 'ls' sh 'ls'
sh 'python create-dataset.py'
archiveArtifacts artifacts: 'data/barcelona_weekends.train.csv, data/barcelona_weekends.dev.csv, data/barcelona_weekends.test.csv', fingerprint: true
} }
} }
} }
stage('Docker') {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
archiveArtifacts artifacts: 'barcelona_weekends.train.csv, barcelona_weekends.dev.csv, barcelona_weekends.test.csv', fingerprint: true
}
}
} }
} }

View File

@ -5,16 +5,16 @@ import numpy as np
cutoff = 10 cutoff = 10
data = pd.read_csv('./barcelona_weekends.csv') data = pd.read_csv('./data/barcelona_weekends.csv')
data = data.sample(cutoff) data = data.sample(cutoff)
data = data.iloc[:, 1:] data = data.iloc[:, 1:]
train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42), train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42),
[int(.6 * len(data)), int(.8 * len(data))]) [int(.6 * len(data)), int(.8 * len(data))])
train_set.to_csv('barcelona_weekends.train.csv', index=False) train_set.to_csv('data/barcelona_weekends.train.csv', index=False)
dev_set.to_csv('barcelona_weekends.dev.csv', index=False) dev_set.to_csv('data/barcelona_weekends.dev.csv', index=False)
test_set.to_csv('barcelona_weekends.test.csv', index=False) test_set.to_csv('data/barcelona_weekends.test.csv', index=False)
check = pd.read_csv('./train.csv') check = pd.read_csv('./train.csv')