This commit is contained in:
Mikołaj Pokrywka 2022-05-03 15:30:24 +02:00
parent bbfd0dfe1d
commit e03a936f90
3 changed files with 21 additions and 26 deletions

View File

@ -12,4 +12,10 @@ RUN apt-get install zip unzip --yes
WORKDIR /app
COPY ./deepl.py .
COPY ./stare_zadania/process_data.sh .
COPY ./stare_zadania/download_data_and_process.py .
COPY ./stats.py .
COPY ./stare_zadania/real-or-fake-fake-jobposting-prediction.zip .
CMD python3 deepl.py

30
Jenkinsfile vendored
View File

@ -1,25 +1,12 @@
pipeline {
agent {
dockerfile {
true
}
dockerfile true
}
parameters {
string (
defaultValue: 'mikolajpokrywka',
description: 'Kaggle username',
name: 'KAGGLE_USERNAME',
trim: false
)
password(
defaultValue: '',
description: 'Kaggle token',
name: 'KAGGLE_KEY'
)
string (
defaultValue: '17000',
description: 'cut data',
name: 'CUTOFF',
defaultValue: '10',
description: 'Epochs number',
name: 'EPOCH',
trim: false
)
}
@ -31,11 +18,10 @@ pipeline {
}
stage('bash script') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}",
"CUTOFF=${params.CUTOFF}"]) {
sh 'python3 ./download_data_and_process.py'
archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv"
withEnv(["EPOCH=${params.CUTOFF}"]) {
copyArtifacts filter: '*', projectName: 's444463-create-dataset'
sh 'python3 ./deepl.py $EPOCHS'
archiveArtifacts artifacts: "model"
}
}
}

View File

@ -9,6 +9,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
from torch import nn
from torch import optim
import matplotlib.pyplot as plt
import sys
def convert_text_to_model_form(text):
@ -18,9 +20,11 @@ def convert_text_to_model_form(text):
if __name__ == "__main__":
kaggle.api.authenticate()
kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
unzip=True)
epochs = int(sys.argv[1])
# kaggle.api.authenticate()
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
# unzip=True)
data = pd.read_csv('fake_job_postings.csv', engine='python')
# data = data.replace(np.nan, '', regex=True)
@ -79,7 +83,6 @@ if __name__ == "__main__":
test_losses = []
test_accuracies = []
epochs = 50
for e in range(epochs):
optimizer.zero_grad()