IUM_08
This commit is contained in:
parent
0dbf6f1959
commit
d739f275e8
10
mlflow_project/MLproject
Normal file
10
mlflow_project/MLproject
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
name: MLflow_s464979
|
||||||
|
|
||||||
|
conda_env: conda.yaml
|
||||||
|
|
||||||
|
entry_points:
|
||||||
|
optimal_parameters:
|
||||||
|
parameters:
|
||||||
|
epochs: { type: int, default: 20 }
|
||||||
|
batch_size: { type: int, default: 32 }
|
||||||
|
command: 'python mlflow_training_model.py {epochs} {batch_size}'
|
11
mlflow_project/conda.yaml
Normal file
11
mlflow_project/conda.yaml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
name: MLflow_s464979
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- python=3.10
|
||||||
|
- pip
|
||||||
|
- pip:
|
||||||
|
- mlflow
|
||||||
|
- tensorflow
|
||||||
|
- pandas
|
||||||
|
- scikit-learn
|
53
mlflow_project/mlflow_training_model.py
Normal file
53
mlflow_project/mlflow_training_model.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import tensorflow as tf
|
||||||
|
import sys
|
||||||
|
import mlflow
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
mlflow.set_tracking_uri("http://localhost:5000")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
train_data = pd.read_csv('./beer_reviews_train.csv')
|
||||||
|
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||||
|
y_train = train_data['review_overall']
|
||||||
|
|
||||||
|
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
|
||||||
|
tokenizer.fit_on_texts(X_train)
|
||||||
|
X_train_seq = tokenizer.texts_to_sequences(X_train)
|
||||||
|
|
||||||
|
X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
|
||||||
|
|
||||||
|
with mlflow.start_run() as run:
|
||||||
|
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
|
||||||
|
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
|
||||||
|
tf.keras.layers.GlobalAveragePooling1D(),
|
||||||
|
tf.keras.layers.Dense(16, activation='relu'),
|
||||||
|
tf.keras.layers.Dense(1, activation='sigmoid')
|
||||||
|
])
|
||||||
|
|
||||||
|
model.compile(optimizer='adam',
|
||||||
|
loss='binary_crossentropy',
|
||||||
|
metrics=['accuracy'])
|
||||||
|
|
||||||
|
print(sys.argv[1])
|
||||||
|
print(sys.argv[2])
|
||||||
|
model.fit(X_train_pad, y_train, epochs=int(sys.argv[1]), batch_size=int(sys.argv[2]), validation_split=0.1)
|
||||||
|
|
||||||
|
mlflow.log_param("epochs", int(sys.argv[1]))
|
||||||
|
mlflow.log_param("batch_size", int(sys.argv[2]))
|
||||||
|
|
||||||
|
test_data = pd.read_csv('./beer_reviews_test.csv')
|
||||||
|
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
|
||||||
|
y_test = test_data['review_overall']
|
||||||
|
|
||||||
|
predictions = model.predict(X_test).flatten()
|
||||||
|
|
||||||
|
y_test_binary = (y_test >= 3).astype(int)
|
||||||
|
|
||||||
|
accuracy = accuracy_score(y_test_binary, predictions.round())
|
||||||
|
mlflow.log_metric("accuracy", accuracy)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user