This commit is contained in:
Mikołaj Pokrywka 2022-05-03 15:30:24 +02:00
parent bbfd0dfe1d
commit e03a936f90
3 changed files with 21 additions and 26 deletions

View File

@ -12,4 +12,10 @@ RUN apt-get install zip unzip --yes
WORKDIR /app WORKDIR /app
COPY ./deepl.py . COPY ./deepl.py .
COPY ./stare_zadania/process_data.sh .
COPY ./stare_zadania/download_data_and_process.py .
COPY ./stats.py .
COPY ./stare_zadania/real-or-fake-fake-jobposting-prediction.zip .
CMD python3 deepl.py CMD python3 deepl.py

30
Jenkinsfile vendored
View File

@ -1,25 +1,12 @@
pipeline { pipeline {
agent { agent {
dockerfile { dockerfile true
true
}
} }
parameters { parameters {
string ( string (
defaultValue: 'mikolajpokrywka', defaultValue: '10',
description: 'Kaggle username', description: 'Epochs number',
name: 'KAGGLE_USERNAME', name: 'EPOCH',
trim: false
)
password(
defaultValue: '',
description: 'Kaggle token',
name: 'KAGGLE_KEY'
)
string (
defaultValue: '17000',
description: 'cut data',
name: 'CUTOFF',
trim: false trim: false
) )
} }
@ -31,11 +18,10 @@ pipeline {
} }
stage('bash script') { stage('bash script') {
steps { steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", withEnv(["EPOCH=${params.CUTOFF}"]) {
"KAGGLE_KEY=${params.KAGGLE_KEY}", copyArtifacts filter: '*', projectName: 's444463-create-dataset'
"CUTOFF=${params.CUTOFF}"]) { sh 'python3 ./deepl.py $EPOCHS'
sh 'python3 ./download_data_and_process.py' archiveArtifacts artifacts: "model"
archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv"
} }
} }
} }

View File

@ -9,6 +9,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
from torch import nn from torch import nn
from torch import optim from torch import optim
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import sys
def convert_text_to_model_form(text): def convert_text_to_model_form(text):
@ -18,9 +20,11 @@ def convert_text_to_model_form(text):
if __name__ == "__main__": if __name__ == "__main__":
kaggle.api.authenticate() epochs = int(sys.argv[1])
kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
unzip=True) # kaggle.api.authenticate()
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
# unzip=True)
data = pd.read_csv('fake_job_postings.csv', engine='python') data = pd.read_csv('fake_job_postings.csv', engine='python')
# data = data.replace(np.nan, '', regex=True) # data = data.replace(np.nan, '', regex=True)
@ -79,7 +83,6 @@ if __name__ == "__main__":
test_losses = [] test_losses = []
test_accuracies = [] test_accuracies = []
epochs = 50
for e in range(epochs): for e in range(epochs):
optimizer.zero_grad() optimizer.zero_grad()