diff --git a/Dockerfile b/Dockerfile index 39d484a..a7bcc46 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,8 @@ -FROM python:3.9 - - -RUN pip install --user kaggle -RUN pip install --user pandas - -ADD . . -ARG KAGGLE_USERNAME -ARG KAGGLE_KEY - -RUN chmod a+x *.sh -RUN ./dataset_download.sh - -CMD python kagle.py \ No newline at end of file +FROM ubuntu:latest +WORKDIR /ium +RUN apt update && apt install -y python3-pip +RUN pip3 install pandas +RUN pip3 install numpy +RUN pip3 install sklearn +COPY ./Steam-200k.csv ./ +COPY ./kagle.py ./ \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index e39a7e1..a4d181c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,34 +1,13 @@ pipeline { - parameters { - string( - defaultValue: 'szymonjadczak', - description: 'Kaggle username', - name: 'KAGGLE_USERNAME', - trim: false - ) - password( - defaultValue: '', - description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials', - name: 'KAGGLE_KEY' - ) - } - agent { - dockerfile { - additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} -t s444386-create-dataset" + agent { + dockerfile true } - } - environment { - KAGGLE_USERNAME="$params.KAGGLE_USERNAME" - KAGGLE_KEY="$params.KAGGLE_KEY" - CUTOFF="$params.CUTOFF" - } - stages { - stage('Stage 1') { - steps { - echo 'Hello world!!!' - checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s444386/ium_444386']]]) - archiveArtifacts 'Steam-200k.csv' - } - } - } + stages { + stage('Stage 1') { + steps { + sh 'chmod u+x ./kagle.py' + sh 'python3 kagle.py' + } + } + } } diff --git a/kagle.py b/kagle.py index 1bf202e..7cb7eca 100644 --- a/kagle.py +++ b/kagle.py @@ -2,8 +2,8 @@ import os import pandas as pd from sklearn.model_selection import train_test_split -os.system("kaggle datasets download -d tamber/steam-video-games") -os.system("unzip -o steam-video-games.zip") +#os.system("kaggle datasets download -d tamber/steam-video-games") +#os.system("unzip -o steam-video-games.zip") steam=pd.read_csv('Steam-200k.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed']) steam.isnull().values.any()