added dataset to repository bcs of unresolved problem with kaggle permision in Jenkins :(
This commit is contained in:
parent
a030c6fb81
commit
23acd59d7a
21
Dockerfile
21
Dockerfile
@ -1,31 +1,20 @@
|
|||||||
FROM ubuntu:latest
|
FROM ubuntu:latest
|
||||||
|
FROM python:3.8
|
||||||
RUN apt update
|
RUN apt update
|
||||||
|
|
||||||
|
|
||||||
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||||
COPY ./requirements.txt .
|
COPY ./requirements.txt .
|
||||||
RUN apt-get update
|
RUN apt-get update
|
||||||
RUN apt-get install -y python3-pip
|
RUN pip3 install -r requirements.txt
|
||||||
RUN pip3 install --upgrade pip
|
RUN apt-get install zip unzip --yes
|
||||||
RUN pip3 install pandas
|
|
||||||
RUN pip3 install numpy
|
|
||||||
RUN pip install kaggle
|
|
||||||
RUN apt-get install zip unzip
|
|
||||||
|
|
||||||
ARG CUTOFF
|
|
||||||
ARG KAGGLE_USERNAME
|
|
||||||
ARG KAGGLE_KEY
|
|
||||||
ENV CUTOFF=${CUTOFF}
|
|
||||||
ENV KAGGLE_USERNAME=${KAGGLE_USERNAME}
|
|
||||||
ENV KAGGLE_KEY=${KAGGLE_KEY}
|
|
||||||
|
|
||||||
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
|
||||||
RUN mkdir /data
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY ./process_data.sh .
|
COPY ./process_data.sh .
|
||||||
COPY ./download_data_and_process.py .
|
COPY ./download_data_and_process.py .
|
||||||
COPY ./stats.py .
|
COPY ./stats.py .
|
||||||
|
COPY ./real-or-fake-fake-jobposting-prediction.zip .
|
||||||
RUN chmod +x process_data.sh
|
RUN chmod +x process_data.sh
|
||||||
RUN ./process_data.sh
|
CMD python3 download_data_and_process.py
|
||||||
|
4
Jenkinsfile
vendored
4
Jenkinsfile
vendored
@ -34,10 +34,6 @@ pipeline {
|
|||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
||||||
"KAGGLE_KEY=${params.KAGGLE_KEY}",
|
"KAGGLE_KEY=${params.KAGGLE_KEY}",
|
||||||
"CUTOFF=${params.CUTOFF}"]) {
|
"CUTOFF=${params.CUTOFF}"]) {
|
||||||
sh './process_data.sh'
|
|
||||||
sh 'ls'
|
|
||||||
sh 'wc data_train.csv'
|
|
||||||
sh 'cat column_titles.csv'
|
|
||||||
sh 'python3 ./download_data_and_process.py'
|
sh 'python3 ./download_data_and_process.py'
|
||||||
archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv"
|
archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv"
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
rc = subprocess.call("./process_data.sh")
|
||||||
# import kaggle
|
# import kaggle
|
||||||
|
|
||||||
# kaggle.api.authenticate()
|
# kaggle.api.authenticate()
|
||||||
@ -8,9 +12,6 @@ import numpy as np
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
data=pd.read_csv('fake_job_postings.csv')
|
data=pd.read_csv('fake_job_postings.csv')
|
||||||
data = data.replace(np.nan, '', regex=True)
|
data = data.replace(np.nan, '', regex=True)
|
||||||
|
|
||||||
|
@ -1,14 +1,8 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
echo "Download data from kaggle"
|
echo "Download data from kaggle"
|
||||||
echo $KAGGLE_USERNAME
|
echo $KAGGLE_USERNAME
|
||||||
kaggle datasets download -d shivamb/real-or-fake-fake-jobposting-prediction
|
# kaggle datasets download -d shivamb/real-or-fake-fake-jobposting-prediction
|
||||||
unzip -o real-or-fake-fake-jobposting-prediction.zip
|
unzip -o real-or-fake-fake-jobposting-prediction.zip
|
||||||
ls
|
|
||||||
wc fake_job_postings.csv
|
|
||||||
cp fake_job_postings.csv /data/fake_job_postings.csv
|
|
||||||
cd data
|
|
||||||
ls
|
|
||||||
wc fake_job_postings.csv
|
|
||||||
echo "Save column titles"
|
echo "Save column titles"
|
||||||
head -n 1 fake_job_postings.csv > column_titles.csv
|
head -n 1 fake_job_postings.csv > column_titles.csv
|
||||||
tail -n +2 fake_job_postings.csv > data_not_shuf.csv
|
tail -n +2 fake_job_postings.csv > data_not_shuf.csv
|
||||||
|
BIN
real-or-fake-fake-jobposting-prediction.zip
Normal file
BIN
real-or-fake-fake-jobposting-prediction.zip
Normal file
Binary file not shown.
@ -1,2 +1,3 @@
|
|||||||
pandas==1.1.5
|
pandas
|
||||||
numpy==1.22.3
|
numpy
|
||||||
|
kaggle
|
Loading…
Reference in New Issue
Block a user