This commit is contained in:
Mikołaj Pokrywka 2022-05-15 20:28:52 +02:00
parent 7eddb939fc
commit 0c67053401
128 changed files with 335 additions and 18 deletions

View File

@ -12,5 +12,6 @@ RUN apt-get install zip unzip --yes
WORKDIR /app WORKDIR /app
COPY ./deepl.py . COPY ./deepl.py .
COPY ./MLProject .
# CMD python3 deepl.py # CMD python3 deepl.py

12
MLProject Normal file
View File

@ -0,0 +1,12 @@
name: s444463
docker_env:
image: ium_444463
entry_points:
main:
parameters:
EPOCH: {type: int, default: 10}
command: "python3 ./deepl.py {EPOCH}"
eval:
command: "python3 evaluation.py"

View File

@ -1,3 +1,4 @@
import json
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import scipy import scipy
@ -10,40 +11,33 @@ from torch import nn
from torch import optim from torch import optim
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import sys import sys
from sacred import Experiment import mlflow
from sacred.observers import FileStorageObserver from mlflow.models import infer_signature
from sacred.observers import MongoObserver
ex = Experiment()
ex.observers.append(FileStorageObserver('my_runs'))
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
vectorizer = TfidfVectorizer()
@ex.config
def my_config():
epochs = 10
mlflow.set_tracking_uri("http://172.17.0.1:5000")
mlflow.set_experiment("s444463")
def convert_text_to_model_form(text): def convert_text_to_model_form(text):
a = vectorizer.transform([text]) a = vectorizer.transform([text])
b = torch.tensor(scipy.sparse.csr_matrix.todense(a)).float() b = torch.tensor(scipy.sparse.csr_matrix.todense(a)).float()
return b return b
@ex.automain
def my_main(epochs, _run): def train(epochs):
epochs = int(epochs)
# print(sys.argv[1]) # print(sys.argv[1])
# print(type(sys.argv[1])) # print(type(sys.argv[1]))
# print(sys.argv[1]) # print(sys.argv[1])
# epochs = int(sys.argv[1]) # epochs = int(sys.argv[1])
# epochs=10 # epochs=10
mlflow.log_param("epochs", epochs)
# kaggle.api.authenticate() # kaggle.api.authenticate()
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.', # kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='.',
# unzip=True) # unzip=True)
data = pd.read_csv('fake_job_postings.csv', engine='python') data = pd.read_csv('fake_job_postings.csv', engine='python')
# data = data.replace(np.nan, '', regex=True) # data = data.replace(np.nan, '', regex=True)
data = data[["company_profile", "fraudulent"]] data = data[["company_profile", "fraudulent"]]
data = data.dropna() data = data.dropna()
@ -72,12 +66,17 @@ def my_main(epochs, _run):
y_dev = np.array(y_dev) y_dev = np.array(y_dev)
y_test = np.array(y_test) y_test = np.array(y_test)
vectorizer = TfidfVectorizer()
company_profile = vectorizer.fit_transform(company_profile) company_profile = vectorizer.fit_transform(company_profile)
x_train = vectorizer.transform(x_train) x_train = vectorizer.transform(x_train)
x_dev = vectorizer.transform(x_dev) x_dev = vectorizer.transform(x_dev)
x_test = vectorizer.transform(x_test) x_test = vectorizer.transform(x_test)
siganture = json.dumps({"input:": 'tfidf vectorized company profile', "output:": "0 = ok, 1 = fake job"})
input_example = x_train[:20]
x_train = torch.tensor(scipy.sparse.csr_matrix.todense(x_train)).float() x_train = torch.tensor(scipy.sparse.csr_matrix.todense(x_train)).float()
x_dev = torch.tensor(scipy.sparse.csr_matrix.todense(x_dev)).float() x_dev = torch.tensor(scipy.sparse.csr_matrix.todense(x_dev)).float()
x_test = torch.tensor(scipy.sparse.csr_matrix.todense(x_test)).float() x_test = torch.tensor(scipy.sparse.csr_matrix.todense(x_test)).float()
@ -184,9 +183,18 @@ def my_main(epochs, _run):
f.close() f.close()
torch.save(model, 'model') torch.save(model, 'model')
ex.add_artifact("model") mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example)
mlflow.pytorch.save_model(model, "model", signature=siganture, input_example=input_example)
print(sys.argv[1])
print(type(sys.argv[1]))
print(sys.argv[1])
epochs = int(sys.argv[1])
if __name__ == "__main__":
train(epochs)
# plt.figure(figsize=(12, 5)) # plt.figure(figsize=(12, 5))
# ax = plt.subplot(121) # ax = plt.subplot(121)
# plt.xlabel('epochs') # plt.xlabel('epochs')

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/1be012972380454a86074e53f6007c86/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 1be012972380454a86074e53f6007c86
run_uuid: 1be012972380454a86074e53f6007c86
source_name: ''
source_type: 4
source_version: ''
start_time: 1652635708730
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
10

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/2cf44da0b5e8422d9254cbf8cd7f31ed/artifacts
end_time: 1652637872270
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 2cf44da0b5e8422d9254cbf8cd7f31ed
run_uuid: 2cf44da0b5e8422d9254cbf8cd7f31ed
source_name: ''
source_type: 4
source_version: ''
start_time: 1652637426164
status: 4
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
8

View File

@ -0,0 +1 @@
sha256:8c88d085e9b52a5042a701abf99841314d4ae732f3a2d340d431cf1b03ddd6a1

View File

@ -0,0 +1 @@
s444463:7eddb93

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
local

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/3ee2870794ef4f4dafb2444c0b888336/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 3ee2870794ef4f4dafb2444c0b888336
run_uuid: 3ee2870794ef4f4dafb2444c0b888336
source_name: ''
source_type: 4
source_version: ''
start_time: 1652638803535
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
8

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/4aeccdef06344ac6ba7d5958ff7f9578/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 4aeccdef06344ac6ba7d5958ff7f9578
run_uuid: 4aeccdef06344ac6ba7d5958ff7f9578
source_name: ''
source_type: 4
source_version: ''
start_time: 1652636332050
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
10

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/55b8688df78d406f8681acfb00187d19/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 55b8688df78d406f8681acfb00187d19
run_uuid: 55b8688df78d406f8681acfb00187d19
source_name: ''
source_type: 4
source_version: ''
start_time: 1652635729200
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
10

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/63016f8e28f84c44867231bf97181432/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 63016f8e28f84c44867231bf97181432
run_uuid: 63016f8e28f84c44867231bf97181432
source_name: ''
source_type: 4
source_version: ''
start_time: 1652635790126
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
10

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/6c7ffdfb78ef42b99ea3361d144ed394/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 6c7ffdfb78ef42b99ea3361d144ed394
run_uuid: 6c7ffdfb78ef42b99ea3361d144ed394
source_name: ''
source_type: 4
source_version: ''
start_time: 1652635689676
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
10

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/7bb429bfa99a4de4b5d23833129db58a/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 7bb429bfa99a4de4b5d23833129db58a
run_uuid: 7bb429bfa99a4de4b5d23833129db58a
source_name: ''
source_type: 4
source_version: ''
start_time: 1652636503389
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
12

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/82516964547444549542e17cd65c49f6/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 82516964547444549542e17cd65c49f6
run_uuid: 82516964547444549542e17cd65c49f6
source_name: ''
source_type: 4
source_version: ''
start_time: 1652636362120
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
11

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
docker

View File

@ -0,0 +1 @@
7eddb939fc735f54ddeed50dc19390ea59c4d4df

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

View File

@ -0,0 +1 @@
file:///home/mikolaj/ai_tech/ium_444463

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
mikolaj

View File

@ -0,0 +1,15 @@
artifact_uri: ./mlruns/0/833ad1985768424296dd74a44f2e1487/artifacts
end_time: null
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
name: ''
run_id: 833ad1985768424296dd74a44f2e1487
run_uuid: 833ad1985768424296dd74a44f2e1487
source_name: ''
source_type: 4
source_version: ''
start_time: 1652636704373
status: 1
tags: []
user_id: mikolaj

View File

@ -0,0 +1 @@
12

View File

@ -0,0 +1 @@
git@git.wmi.amu.edu.pl:s444463/ium_444463.git

Some files were not shown because too many files have changed in this diff Show More