wip
This commit is contained in:
parent
bbfd0dfe1d
commit
954bb281a1
@ -12,4 +12,10 @@ RUN apt-get install zip unzip --yes
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY ./deepl.py .
|
COPY ./deepl.py .
|
||||||
|
|
||||||
|
COPY ./stare_zadania/process_data.sh .
|
||||||
|
COPY ./stare_zadania/download_data_and_process.py .
|
||||||
|
COPY ./stats.py .
|
||||||
|
COPY ./stare_zadania/real-or-fake-fake-jobposting-prediction.zip .
|
||||||
|
|
||||||
CMD python3 deepl.py
|
CMD python3 deepl.py
|
||||||
|
26
Jenkinsfile
vendored
26
Jenkinsfile
vendored
@ -6,20 +6,9 @@ pipeline {
|
|||||||
}
|
}
|
||||||
parameters {
|
parameters {
|
||||||
string (
|
string (
|
||||||
defaultValue: 'mikolajpokrywka',
|
defaultValue: '10',
|
||||||
description: 'Kaggle username',
|
description: 'Epochs number',
|
||||||
name: 'KAGGLE_USERNAME',
|
name: 'EPOCH',
|
||||||
trim: false
|
|
||||||
)
|
|
||||||
password(
|
|
||||||
defaultValue: '',
|
|
||||||
description: 'Kaggle token',
|
|
||||||
name: 'KAGGLE_KEY'
|
|
||||||
)
|
|
||||||
string (
|
|
||||||
defaultValue: '17000',
|
|
||||||
description: 'cut data',
|
|
||||||
name: 'CUTOFF',
|
|
||||||
trim: false
|
trim: false
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -31,11 +20,10 @@ pipeline {
|
|||||||
}
|
}
|
||||||
stage('bash script') {
|
stage('bash script') {
|
||||||
steps {
|
steps {
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
withEnv(["EPOCH=${params.CUTOFF}"]) {
|
||||||
"KAGGLE_KEY=${params.KAGGLE_KEY}",
|
copyArtifacts filter: '*', projectName: 's444463-create-dataset'
|
||||||
"CUTOFF=${params.CUTOFF}"]) {
|
sh 'python3 ./deepl.py $EPOCHS'
|
||||||
sh 'python3 ./download_data_and_process.py'
|
archiveArtifacts artifacts: "model"
|
||||||
archiveArtifacts artifacts: "data_test.csv, data_dev.csv, data_train.csv, column_titles.csv, data.csv"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
11
deepl.py
11
deepl.py
@ -9,6 +9,8 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|||||||
from torch import nn
|
from torch import nn
|
||||||
from torch import optim
|
from torch import optim
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def convert_text_to_model_form(text):
|
def convert_text_to_model_form(text):
|
||||||
@ -18,9 +20,11 @@ def convert_text_to_model_form(text):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
kaggle.api.authenticate()
|
epochs = int(sys.argv[1])
|
||||||
kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
|
|
||||||
unzip=True)
|
# kaggle.api.authenticate()
|
||||||
|
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
|
||||||
|
# unzip=True)
|
||||||
|
|
||||||
data = pd.read_csv('fake_job_postings.csv', engine='python')
|
data = pd.read_csv('fake_job_postings.csv', engine='python')
|
||||||
# data = data.replace(np.nan, '', regex=True)
|
# data = data.replace(np.nan, '', regex=True)
|
||||||
@ -79,7 +83,6 @@ if __name__ == "__main__":
|
|||||||
test_losses = []
|
test_losses = []
|
||||||
test_accuracies = []
|
test_accuracies = []
|
||||||
|
|
||||||
epochs = 50
|
|
||||||
for e in range(epochs):
|
for e in range(epochs):
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user