This commit is contained in:
Mikołaj Pokrywka 2022-05-03 15:30:24 +02:00
parent bbfd0dfe1d
commit 954bb281a1
3 changed files with 20 additions and 23 deletions

View File

@ -12,4 +12,10 @@ RUN apt-get install zip unzip --yes
WORKDIR /app WORKDIR /app
COPY ./deepl.py . COPY ./deepl.py .
COPY ./stare_zadania/process_data.sh .
COPY ./stare_zadania/download_data_and_process.py .
COPY ./stats.py .
COPY ./stare_zadania/real-or-fake-fake-jobposting-prediction.zip .
CMD python3 deepl.py CMD python3 deepl.py

26
Jenkinsfile vendored
View File

@ -6,20 +6,9 @@ pipeline {
} }
parameters { parameters {
string ( string (
defaultValue: 'mikolajpokrywka', defaultValue: '10',
description: 'Kaggle username', description: 'Epochs number',
name: 'KAGGLE_USERNAME', name: 'EPOCH',
trim: false
)
password(
defaultValue: '',
description: 'Kaggle token',
name: 'KAGGLE_KEY'
)
string (
defaultValue: '17000',
description: 'cut data',
name: 'CUTOFF',
trim: false trim: false
) )
} }
@ -31,11 +20,10 @@ pipeline {
} }
stage('bash script') { stage('bash script') {
steps { steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", withEnv(["EPOCH=${params.CUTOFF}"]) {
"KAGGLE_KEY=${params.KAGGLE_KEY}", copyArtifacts filter: '*', projectName: 's444463-create-dataset'
"CUTOFF=${params.CUTOFF}"]) { sh 'python3 ./deepl.py $EPOCHS'
sh 'python3 ./download_data_and_process.py' archiveArtifacts artifacts: "model"
archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv"
} }
} }
} }

View File

@ -9,6 +9,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
from torch import nn from torch import nn
from torch import optim from torch import optim
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import sys
def convert_text_to_model_form(text): def convert_text_to_model_form(text):
@ -18,9 +20,11 @@ def convert_text_to_model_form(text):
if __name__ == "__main__": if __name__ == "__main__":
kaggle.api.authenticate() epochs = int(sys.argv[1])
kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
unzip=True) # kaggle.api.authenticate()
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
# unzip=True)
data = pd.read_csv('fake_job_postings.csv', engine='python') data = pd.read_csv('fake_job_postings.csv', engine='python')
# data = data.replace(np.nan, '', regex=True) # data = data.replace(np.nan, '', regex=True)
@ -79,7 +83,6 @@ if __name__ == "__main__":
test_losses = [] test_losses = []
test_accuracies = [] test_accuracies = []
epochs = 50
for e in range(epochs): for e in range(epochs):
optimizer.zero_grad() optimizer.zero_grad()