download-dataset with docker
This commit is contained in:
parent
f9e12e8e3c
commit
87211b61b9
19
Dockerfile
Normal file
19
Dockerfile
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Nasz obraz będzie dzidziczył z obrazu Ubuntu w wersji latest
|
||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
# Instalujemy niezbędne zależności. Zwróć uwagę na flagę "-y" (assume yes)
|
||||||
|
RUN apt update && apt install -y python3 \
|
||||||
|
python3-pip \
|
||||||
|
vim
|
||||||
|
|
||||||
|
ENV CUTOFF=${CUTOFF}
|
||||||
|
ENV KAGGLE_USERNAME=${KAGGLE_USERNAME}
|
||||||
|
ENV KAGGLE_KEY=${KAGGLE_KEY}
|
||||||
|
|
||||||
|
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||||
|
COPY . /app/
|
||||||
|
|
||||||
|
RUN python3 -m pip install -r requirements.txt
|
10
Jenkinsfile
vendored
10
Jenkinsfile
vendored
@ -1,4 +1,5 @@
|
|||||||
node {
|
node {
|
||||||
|
docker.image('s444452/ium:1.0').inside {
|
||||||
stage('Preparation') {
|
stage('Preparation') {
|
||||||
properties([
|
properties([
|
||||||
parameters([
|
parameters([
|
||||||
@ -14,7 +15,7 @@ node {
|
|||||||
name: 'KAGGLE_KEY'
|
name: 'KAGGLE_KEY'
|
||||||
),
|
),
|
||||||
string(
|
string(
|
||||||
defaultValue: "1000",
|
defaultValue: "10000",
|
||||||
description: 'Determine the size of dataset',
|
description: 'Determine the size of dataset',
|
||||||
name: 'CUTOFF'
|
name: 'CUTOFF'
|
||||||
)
|
)
|
||||||
@ -28,11 +29,12 @@ node {
|
|||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
||||||
"KAGGLE_KEY=${params.KAGGLE_KEY}","CUTOFF=${params.CUTOFF}"]) {
|
"KAGGLE_KEY=${params.KAGGLE_KEY}","CUTOFF=${params.CUTOFF}"]) {
|
||||||
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
|
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
|
||||||
sh "chmod u+x ./download_dataset.sh"
|
sh "chmod u+x ./lab2_data.py"
|
||||||
sh "./download_dataset.sh $CUTOFF"
|
sh "./lab2_data.py"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('Archive artifacts') {
|
stage('Archive artifacts') {
|
||||||
archiveArtifacts 'dataset.csv'
|
archiveArtifacts 'fake_job_postings.csv'
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
4
figlet-loop.sh
Normal file
4
figlet-loop.sh
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
while read line; do
|
||||||
|
figlet "$line"
|
||||||
|
done
|
@ -6,9 +6,7 @@ from sklearn.model_selection import train_test_split
|
|||||||
|
|
||||||
def download_and_save_dataset():
|
def download_and_save_dataset():
|
||||||
api.authenticate()
|
api.authenticate()
|
||||||
api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction',
|
api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', unzip=True)
|
||||||
path='./data',
|
|
||||||
unzip=True)
|
|
||||||
|
|
||||||
|
|
||||||
def split_dataset(data: DataFrame):
|
def split_dataset(data: DataFrame):
|
||||||
@ -26,7 +24,7 @@ def split_dataset(data: DataFrame):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
# download_and_save_dataset()
|
# download_and_save_dataset()
|
||||||
df = read_csv('./data/fake_job_postings.csv')
|
df = read_csv('./fake_job_postings.csv')
|
||||||
print(df.describe(include='all'))
|
print(df.describe(include='all'))
|
||||||
print(df.shape)
|
print(df.shape)
|
||||||
x_train, x_val, x_test, y_train, y_val, y_test = split_dataset(df)
|
x_train, x_val, x_test, y_train, y_val, y_test = split_dataset(df)
|
||||||
|
19
requirements.txt
Normal file
19
requirements.txt
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
certifi==2021.10.8
|
||||||
|
charset-normalizer==2.0.12
|
||||||
|
idna==3.3
|
||||||
|
joblib==1.1.0
|
||||||
|
kaggle==1.5.12
|
||||||
|
numpy==1.22.3
|
||||||
|
pandas==1.4.1
|
||||||
|
python-dateutil==2.8.2
|
||||||
|
python-slugify==6.1.1
|
||||||
|
pytz==2022.1
|
||||||
|
requests==2.27.1
|
||||||
|
scikit-learn==1.0.2
|
||||||
|
scipy==1.8.0
|
||||||
|
six==1.16.0
|
||||||
|
sklearn==0.0
|
||||||
|
text-unidecode==1.3
|
||||||
|
threadpoolctl==3.1.0
|
||||||
|
tqdm==4.63.1
|
||||||
|
urllib3==1.26.9
|
Loading…
Reference in New Issue
Block a user