zadanie doker

This commit is contained in:
szymonj98 2022-04-03 14:40:45 +02:00
parent 3dbda7a2cd
commit 5a29b56c27
3 changed files with 20 additions and 47 deletions

View File

@ -1,14 +1,8 @@
FROM python:3.9 FROM ubuntu:latest
WORKDIR /ium
RUN apt update && apt install -y python3-pip
RUN pip install --user kaggle RUN pip3 install pandas
RUN pip install --user pandas RUN pip3 install numpy
RUN pip3 install sklearn
ADD . . COPY ./Steam-200k.csv ./
ARG KAGGLE_USERNAME COPY ./kagle.py ./
ARG KAGGLE_KEY
RUN chmod a+x *.sh
RUN ./dataset_download.sh
CMD python kagle.py

41
Jenkinsfile vendored
View File

@ -1,34 +1,13 @@
pipeline { pipeline {
parameters { agent {
string( dockerfile true
defaultValue: 'szymonjadczak',
description: 'Kaggle username',
name: 'KAGGLE_USERNAME',
trim: false
)
password(
defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY'
)
}
agent {
dockerfile {
additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} -t s444386-create-dataset"
} }
} stages {
environment { stage('Stage 1') {
KAGGLE_USERNAME="$params.KAGGLE_USERNAME" steps {
KAGGLE_KEY="$params.KAGGLE_KEY" sh 'chmod u+x ./kagle.py'
CUTOFF="$params.CUTOFF" sh 'python3 kagle.py'
} }
stages { }
stage('Stage 1') { }
steps {
echo 'Hello world!!!'
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s444386/ium_444386']]])
archiveArtifacts 'Steam-200k.csv'
}
}
}
} }

View File

@ -2,8 +2,8 @@ import os
import pandas as pd import pandas as pd
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
os.system("kaggle datasets download -d tamber/steam-video-games") #os.system("kaggle datasets download -d tamber/steam-video-games")
os.system("unzip -o steam-video-games.zip") #os.system("unzip -o steam-video-games.zip")
steam=pd.read_csv('Steam-200k.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed']) steam=pd.read_csv('Steam-200k.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
steam.isnull().values.any() steam.isnull().values.any()