diff --git a/Jenkinsfile-create-dataset-docker b/Jenkinsfile-create-dataset-docker new file mode 100644 index 0000000..296f6da --- /dev/null +++ b/Jenkinsfile-create-dataset-docker @@ -0,0 +1,50 @@ +pipeline { + agent any + parameters{ + string( + defaultValue: 'piotrwrzodak', + description: 'Kaggle username', + name: 'KAGGLE_USERNAME', + trim: false + ) + password( + defaultValue: '', + description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials', + name: 'KAGGLE_KEY' + ) + string( + defaultValue: '100', + description: 'CUTOFF', + name: 'CUTOFF', + trim: false + ) + } + stages { + stage('checkout: Check out from version control') { + steps { + checkout scm + } + } + stage('Build') { + steps { + withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { + sh 'kaggle datasets download -d thedevastator/airbnb-prices-in-european-cities' + sh 'unzip airbnb-prices-in-european-cities.zip -d data' + } + } + } + stage('Docker') { + agent { + dockerfile { + filename 'Dockerfile' + dir 'ium_z444510' + reuseNode true + } + } + steps { + sh: 'ls -a' + archiveArtifacts artifacts: 'barcelona_weekends.train.csv, barcelona_weekends.dev.csv, barcelona_weekends.test.csv', fingerprint: true + } + } + } +} \ No newline at end of file diff --git a/create-dataset.py b/create-dataset.py index 665aa2f..d93da1e 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -12,9 +12,9 @@ data = data.iloc[:, 1:] train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42), [int(.6 * len(data)), int(.8 * len(data))]) -train_set.to_csv('train.csv', index=False) -dev_set.to_csv('dev.csv', index=False) -test_set.to_csv('test.csv', index=False) +train_set.to_csv('barcelona_weekends.train.csv', index=False) +dev_set.to_csv('barcelona_weekends.dev.csv', index=False) +test_set.to_csv('barcelona_weekends.test.csv', index=False) check = pd.read_csv('./train.csv')