From b77042df83edd46866eda890d4e2e16cbf4d798e Mon Sep 17 00:00:00 2001 From: Dominik Strzako Date: Tue, 8 Jun 2021 00:28:23 +0200 Subject: [PATCH] dvc jenkins --- .gitignore | 3 +++ Jenkinsfile_dvc | 17 ++++++++++++++++ Zadanie_10_Split.py | 32 ++++++++++++++++++++++++++++++ Zadanie_10_Train.py | 47 +++++++++++++++++++++++++++++++++++++++++++++ dvc.lock | 32 ++++++++++++++++++++++++++++++ dvc.yaml | 17 ++++++++++++++++ 6 files changed, 148 insertions(+) create mode 100644 Jenkinsfile_dvc create mode 100644 Zadanie_10_Split.py create mode 100644 Zadanie_10_Train.py create mode 100644 dvc.lock create mode 100644 dvc.yaml diff --git a/.gitignore b/.gitignore index 8a3fa50..063c393 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /winequality-red.csv +/10_x.csv +/10_y.csv +/sample.txt diff --git a/Jenkinsfile_dvc b/Jenkinsfile_dvc new file mode 100644 index 0000000..8b393e1 --- /dev/null +++ b/Jenkinsfile_dvc @@ -0,0 +1,17 @@ +pipeline { + agent {docker { image 'snowycocoon/ium_434788:3'}} + stages { + stage('Test') { + steps { + sh 'echo hi' + } + } + } + post { + success { + mail body: 'SUCCESS', + subject: 's434788 DVC', + to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms' + } + } +} \ No newline at end of file diff --git a/Zadanie_10_Split.py b/Zadanie_10_Split.py new file mode 100644 index 0000000..fb74375 --- /dev/null +++ b/Zadanie_10_Split.py @@ -0,0 +1,32 @@ +from sklearn.preprocessing import StandardScaler, LabelEncoder + +import numpy as np +import pandas as pd + +wine=pd.read_csv('winequality-red.csv') + +y = wine['quality'] +x = wine.drop('quality', axis=1) + +citricacid = x['fixed acidity'] * x['citric acid'] +citric_acidity = pd.DataFrame(citricacid, columns=['citric_accidity']) +density_acidity = x['fixed acidity'] * x['density'] +density_acidity = pd.DataFrame(density_acidity, columns=['density_acidity']) + +x = wine.join(citric_acidity).join(density_acidity) + +bins = (2, 5, 8) +labels = ['bad', 'nice'] +y = pd.cut(y, bins = bins, labels = labels) + +enc = LabelEncoder() +yenc = enc.fit_transform(y) + +scale = StandardScaler() +scaled_x = scale.fit_transform(x) + +df_x = pd.DataFrame(scaled_x) +df_y = pd.DataFrame(yenc) + +df_x.to_csv(r'10_x.csv', index=False) +df_y.to_csv(r'10_y.csv', index=False) \ No newline at end of file diff --git a/Zadanie_10_Train.py b/Zadanie_10_Train.py new file mode 100644 index 0000000..ef47596 --- /dev/null +++ b/Zadanie_10_Train.py @@ -0,0 +1,47 @@ +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense +from tensorflow.keras.optimizers import Adam + + +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler, LabelEncoder + +import numpy as np +import pandas as pd + +x=pd.read_csv('10_x.csv') +y=pd.read_csv('10_y.csv') + +x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21) + +NeuralModel = Sequential([ + Dense(128, activation='relu', input_shape=(14,)), + Dense(32, activation='relu'), + Dense(64, activation='relu'), + Dense(64, activation='relu'), + Dense(64, activation='relu'), + Dense(1, activation='sigmoid') +]) + + +#https://keras.io/api/losses/ +#https://keras.io/api/optimizers/ +#https://keras.io/api/metrics/ + +opt = Adam(lr=0.0003) +NeuralModel.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy','AUC']) +NeuralModel.fit(x_train, y_train, batch_size= 16, epochs = 16) #verbose = 1 + +y_pred = NeuralModel.predict(x_test) +y_pred = np.around(y_pred, decimals=0) +results = accuracy_score(y_test,y_pred) + + +text_file = open("sample.txt", "w") +n = text_file.write(f"accuracy: {results}") +text_file.close() + +print(f"accuracy: {results}") + +# Accuracy wynosi 1 z powodu banalnego podziaƂu na 2 klasy jakosci Wina: "bad" i "nice". \ No newline at end of file diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 0000000..64ed1f0 --- /dev/null +++ b/dvc.lock @@ -0,0 +1,32 @@ +split_model: + cmd: python3 Zadanie_10_Split.py + deps: + - path: Zadanie_10_Split.py + md5: 2d95e0e1afc997823fc613788e2fbe16 + size: 864 + - path: winequality-red.csv + md5: 6a883fd98624e18c0b7ce251f4fab4fb + size: 100951 + outs: + - path: 10_x.csv + md5: bcfb4f34de770b22e1065b9b2c133e16 + size: 443481 + - path: 10_y.csv + md5: 7d1dc704bd48248f8a51c771674e2ad8 + size: 4800 +train_model: + cmd: python3 Zadanie_10_Train.py + deps: + - path: 10_x.csv + md5: bcfb4f34de770b22e1065b9b2c133e16 + size: 443481 + - path: 10_y.csv + md5: 7d1dc704bd48248f8a51c771674e2ad8 + size: 4800 + - path: Zadanie_10_Train.py + md5: 0d0aff9e327292b07cb5110c576f7efe + size: 1541 + outs: + - path: sample.txt + md5: 98937548d721445b2095fb13deb756d7 + size: 13 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..2242f82 --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,17 @@ +stages: + split_model: + cmd: python3 Zadanie_10_Split.py + deps: + - winequality-red.csv + - Zadanie_10_Split.py + outs: + - 10_x.csv + - 10_y.csv + train_model: + cmd: python3 Zadanie_10_Train.py + deps: + - Zadanie_10_Train.py + - 10_x.csv + - 10_y.csv + outs: + - sample.txt \ No newline at end of file