diff --git a/.gitignore b/.gitignore
index 207d123..9eec80d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,14 +1,4 @@
-# ---> JupyterNotebooks
-# gitignore template for Jupyter Notebooks
-# website: http://jupyter.org/
-
-.ipynb_checkpoints
-*/.ipynb_checkpoints/*
-
-# IPython
-profile_default/
-ipython_config.py
-
-# Remove previous ipynb_checkpoints
-# git rm -r .ipynb_checkpoints/
-
+.ipynb_checkpoints/
+my_runs
+mlruns
+IUM_08/examples
diff --git a/IUM_02.Dane.ipynb b/IUM_02.Dane.ipynb
index 147ec44..05107a4 100644
--- a/IUM_02.Dane.ipynb
+++ b/IUM_02.Dane.ipynb
@@ -243,7 +243,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 61,
"metadata": {
"slideshow": {
"slide_type": "slide"
@@ -260,7 +260,7 @@
}
],
"source": [
- "# Żeby poniższa komenda zadziałała, musisz posiadać plik /.kaggle/kaggle.json, zawierający Kaggle API token.\n",
+ "# Żeby poniższa komenda zadziałała, musisz posiadać plik ~/.kaggle/kaggle.json, zawierający Kaggle API token.\n",
"# Instrukcje: https://www.kaggle.com/docs/api\n",
"!kaggle datasets download -d uciml/iris"
]
@@ -1022,7 +1022,11 @@
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
"source": [
"### Przykładowy podział z pomocą standardowych narzędzi Bash"
]
@@ -1260,7 +1264,11 @@
},
{
"cell_type": "markdown",
- "metadata": {},
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
"source": [
"### Podział z pomocą sckikit learn\n",
"- Do podziału możemy też użyć biblioteki https://scikit-learn.org/"
@@ -1269,7 +1277,11 @@
{
"cell_type": "code",
"execution_count": 45,
- "metadata": {},
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
"outputs": [
{
"data": {
@@ -1294,7 +1306,11 @@
{
"cell_type": "code",
"execution_count": 46,
- "metadata": {},
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
"outputs": [
{
"data": {
@@ -1318,7 +1334,10 @@
"cell_type": "code",
"execution_count": 48,
"metadata": {
- "scrolled": true
+ "scrolled": true,
+ "slideshow": {
+ "slide_type": "slide"
+ }
},
"outputs": [
{
@@ -1345,7 +1364,10 @@
"cell_type": "code",
"execution_count": 49,
"metadata": {
- "scrolled": true
+ "scrolled": true,
+ "slideshow": {
+ "slide_type": "slide"
+ }
},
"outputs": [
{
diff --git a/IUM_08.MLFlow.ipynb b/IUM_08.MLFlow.ipynb
new file mode 100644
index 0000000..8b58ef8
--- /dev/null
+++ b/IUM_08.MLFlow.ipynb
@@ -0,0 +1,1080 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "# MLflow\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ " ## MLflow\n",
+ " - https://mlflow.org/\n",
+ " - Narzędzie podobne do omawianego na poprzednich zajęciach Sacred\n",
+ " - Nieco inne podejście: mniej ingerencji w istniejący kod\n",
+ " - Bardziej kompleksowe rozwiązanie: 4 komponenty, pierwszy z nich ma funkcjonalność podobną do Sacred\n",
+ " - Działa \"z każdym\" językiem. A tak naprawdę: Python, R, Java + CLI API + REST API\n",
+ " - Popularna wśród pracodawców - wyniki wyszukiwania ofert pracy: 20 ofert (https://pl.indeed.com/), 36 ofert (linkedin). Sacred: 0\n",
+ " - Integracja z licznymi bibliotekami / chmurami\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "## Komponenty\n",
+ "\n",
+ "MLflow składa się z czterech niezależnych komponentów:\n",
+ " - **MLflow Tracking** - pozwala śledzić zmiany parametrów, kodu, środowiska i ich wpływ na metryki. Jest to funkcjonalność bardzo zbliżona do tej, którą zapewnia Sacred\n",
+ " - **MLflow Projects** - umożliwia \"pakowanie\" kodu ekserymentów w taki sposób, żeby mogłby być w łatwy sposób zreprodukowane przez innych\n",
+ " - **MLflow Models** - ułatwia \"pakowanie\" modeli uczenia maszynowego\n",
+ " - **MLflow Registry** - zapewnia centralne miejsce do przechowywania i współdzielenia modeli. Zapewnia narzędzia do wersjonowania i śledzenia pochodzenia tych modeli.\n",
+ " \n",
+ "Komponenty te mogą być używane razem bądź oddzielnie."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "## MLflow Tracking - przykład\n",
+ "(poniższe przykłady kodu trenującego pochodzą z tutoriala MLflow: https://mlflow.org/docs/latest/tutorials-and-examples/tutorial.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%%capture null\n",
+ "!pip install mlflow\n",
+ "!pip install sklearn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Overwriting IUM_08/examples/sklearn_elasticnet_wine/train.py\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%writefile IUM_08/examples/sklearn_elasticnet_wine/train.py\n",
+ "# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality\n",
+ "# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.\n",
+ "# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.\n",
+ "\n",
+ "import os\n",
+ "import warnings\n",
+ "import sys\n",
+ "\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.linear_model import ElasticNet\n",
+ "from urllib.parse import urlparse\n",
+ "import mlflow\n",
+ "import mlflow.sklearn\n",
+ "\n",
+ "import logging\n",
+ "\n",
+ "logging.basicConfig(level=logging.WARN)\n",
+ "logger = logging.getLogger(__name__)\n",
+ "\n",
+ "\n",
+ "def eval_metrics(actual, pred):\n",
+ " rmse = np.sqrt(mean_squared_error(actual, pred))\n",
+ " mae = mean_absolute_error(actual, pred)\n",
+ " r2 = r2_score(actual, pred)\n",
+ " return rmse, mae, r2\n",
+ "\n",
+ "\n",
+ "if __name__ == \"__main__\":\n",
+ " warnings.filterwarnings(\"ignore\")\n",
+ " np.random.seed(40)\n",
+ "\n",
+ " # Read the wine-quality csv file from the URL\n",
+ " csv_url = (\n",
+ " \"http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv\"\n",
+ " )\n",
+ " try:\n",
+ " data = pd.read_csv(csv_url, sep=\";\")\n",
+ " except Exception as e:\n",
+ " logger.exception(\n",
+ " \"Unable to download training & test CSV, check your internet connection. Error: %s\", e\n",
+ " )\n",
+ "\n",
+ " # Split the data into training and test sets. (0.75, 0.25) split.\n",
+ " train, test = train_test_split(data)\n",
+ "\n",
+ " # The predicted column is \"quality\" which is a scalar from [3, 9]\n",
+ " train_x = train.drop([\"quality\"], axis=1)\n",
+ " test_x = test.drop([\"quality\"], axis=1)\n",
+ " train_y = train[[\"quality\"]]\n",
+ " test_y = test[[\"quality\"]]\n",
+ "\n",
+ " \n",
+ " alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5\n",
+ " #alpha = 0.5\n",
+ " l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5\n",
+ " #l1_ratio = 0.5\n",
+ "\n",
+ " with mlflow.start_run():\n",
+ " lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)\n",
+ " lr.fit(train_x, train_y)\n",
+ "\n",
+ " predicted_qualities = lr.predict(test_x)\n",
+ "\n",
+ " (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)\n",
+ "\n",
+ " print(\"Elasticnet model (alpha=%f, l1_ratio=%f):\" % (alpha, l1_ratio))\n",
+ " print(\" RMSE: %s\" % rmse)\n",
+ " print(\" MAE: %s\" % mae)\n",
+ " print(\" R2: %s\" % r2)\n",
+ "\n",
+ " mlflow.log_param(\"alpha\", alpha)\n",
+ " mlflow.log_param(\"l1_ratio\", l1_ratio)\n",
+ " mlflow.log_metric(\"rmse\", rmse)\n",
+ " mlflow.log_metric(\"r2\", r2)\n",
+ " mlflow.log_metric(\"mae\", mae)\n",
+ "\n",
+ " tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n",
+ "\n",
+ " # Model registry does not work with file store\n",
+ " if tracking_url_type_store != \"file\":\n",
+ "\n",
+ " # Register the model\n",
+ " # There are other ways to use the Model Registry, which depends on the use case,\n",
+ " # please refer to the doc for more information:\n",
+ " # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n",
+ " mlflow.sklearn.log_model(lr, \"model\", registered_model_name=\"ElasticnetWineModel\")\n",
+ " else:\n",
+ " mlflow.sklearn.log_model(lr, \"model\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.500000):\r\n",
+ " RMSE: 0.7931640229276851\r\n",
+ " MAE: 0.6271946374319586\r\n",
+ " R2: 0.10862644997792614\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "### Wtyrenujmy model z domyślnymi wartościami parametrów\n",
+ "! cd ./IUM_08/examples/; python sklearn_elasticnet_wine/train.py"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7128829045893679\n",
+ " MAE: 0.5462202174984664\n",
+ " R2: 0.2799376066653344\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7268133518615142\n",
+ " MAE: 0.5586842416161892\n",
+ " R2: 0.251521166881557\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7347397539240514\n",
+ " MAE: 0.5657315547549873\n",
+ " R2: 0.23510678899596094\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7410782793160982\n",
+ " MAE: 0.5712718681984227\n",
+ " R2: 0.22185255063708875\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7460550348172179\n",
+ " MAE: 0.576381895873763\n",
+ " R2: 0.21136606570632266\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7510866447955419\n",
+ " MAE: 0.5815681289333974\n",
+ " R2: 0.20069264568704714\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7560654760040749\n",
+ " MAE: 0.5868129921328281\n",
+ " R2: 0.19006056603695476\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7609263702116827\n",
+ " MAE: 0.5919470003487062\n",
+ " R2: 0.17961256649282442\n",
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.100000):\n",
+ " RMSE: 0.7656313758553691\n",
+ " MAE: 0.5969367233859049\n",
+ " R2: 0.16943586313742276\n",
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7201489594275661\n",
+ " MAE: 0.5525324524014098\n",
+ " R2: 0.26518433811823017\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7336400911821402\n",
+ " MAE: 0.5643841279275428\n",
+ " R2: 0.23739466063584158\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7397486012946922\n",
+ " MAE: 0.5704931175017443\n",
+ " R2: 0.22464242411894242\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7468093030485085\n",
+ " MAE: 0.5777243300021722\n",
+ " R2: 0.2097706278632726\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7543919979968401\n",
+ " MAE: 0.5857669727382302\n",
+ " R2: 0.19364204365178095\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7622123676513404\n",
+ " MAE: 0.5938629318868578\n",
+ " R2: 0.17683724501340814\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7700845840888665\n",
+ " MAE: 0.6024685725504659\n",
+ " R2: 0.15974600028150265\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7778880968569085\n",
+ " MAE: 0.6105907461474273\n",
+ " R2: 0.14263059582492588\n",
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.200000):\n",
+ " RMSE: 0.7855450337039626\n",
+ " MAE: 0.6182359127922239\n",
+ " R2: 0.1256689455181047\n",
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.300000):\n",
+ " RMSE: 0.7260299544064643\n",
+ " MAE: 0.5571534327625295\n",
+ " R2: 0.2531337966130104\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.300000):\n",
+ " RMSE: 0.7357092639331829\n",
+ " MAE: 0.5667609266233857\n",
+ " R2: 0.23308686049079996\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.300000):\n",
+ " RMSE: 0.7443224557281489\n",
+ " MAE: 0.5754825491733004\n",
+ " R2: 0.2150247343683439\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.300000):\n",
+ " RMSE: 0.7545302211047864\n",
+ " MAE: 0.5862255018460154\n",
+ " R2: 0.19334652749043568\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.300000):\n",
+ " RMSE: 0.7657094552843393\n",
+ " MAE: 0.597876674089536\n",
+ " R2: 0.16926645189778677\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.300000):\n",
+ " RMSE: 0.7774287676055035\n",
+ " MAE: 0.6102458961382884\n",
+ " R2: 0.14364282001967787\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.300000):\n",
+ " RMSE: 0.7876149030178985\n",
+ " MAE: 0.6208628759605734\n",
+ " R2: 0.12105524358911324\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.300000):\n",
+ " RMSE: 0.7972426725990548\n",
+ " MAE: 0.6310633254738363\n",
+ " R2: 0.09943554388738107\n",
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.300000):\n",
+ " RMSE: 0.806653553139972\n",
+ " MAE: 0.6407940021176486\n",
+ " R2: 0.07804901733081859\n",
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.400000):\n",
+ " RMSE: 0.7301757756825391\n",
+ " MAE: 0.5603782497631705\n",
+ " R2: 0.24457984004307665\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.400000):\n",
+ " RMSE: 0.7383379454127179\n",
+ " MAE: 0.5696920200435643\n",
+ " R2: 0.22759672468382497\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.400000):\n",
+ " RMSE: 0.7501603725852\n",
+ " MAE: 0.5818749078280213\n",
+ " R2: 0.2026629101382652\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.400000):\n",
+ " RMSE: 0.7644619587468349\n",
+ " MAE: 0.5966303605775048\n",
+ " R2: 0.17197111491474282\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.400000):\n",
+ " RMSE: 0.7794144864140182\n",
+ " MAE: 0.6125287339702588\n",
+ " R2: 0.1392625955410326\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.400000):\n",
+ " RMSE: 0.7928446872861473\n",
+ " MAE: 0.626666444473971\n",
+ " R2: 0.10934405701835759\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.400000):\n",
+ " RMSE: 0.8064523157995205\n",
+ " MAE: 0.6407990295001776\n",
+ " R2: 0.07850896155515663\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.400000):\n",
+ " RMSE: 0.8200264141399415\n",
+ " MAE: 0.6539313398770489\n",
+ " R2: 0.04722706260889009\n",
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.400000):\n",
+ " RMSE: 0.8317936823364004\n",
+ " MAE: 0.6647839366878934\n",
+ " R2: 0.01968654319755092\n",
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.500000):\n",
+ " RMSE: 0.7308996187375898\n",
+ " MAE: 0.5615486628017713\n",
+ " R2: 0.2430813606733676\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.500000):\n",
+ " RMSE: 0.7415652207304311\n",
+ " MAE: 0.573067857646195\n",
+ " R2: 0.22082961765864062\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.500000):\n",
+ " RMSE: 0.7573787958793151\n",
+ " MAE: 0.5893143148791096\n",
+ " R2: 0.18724431943947983\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.500000):\n",
+ " RMSE: 0.7759342885655987\n",
+ " MAE: 0.6090076377075831\n",
+ " R2: 0.14693206734185604\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.500000):\n",
+ " RMSE: 0.7931640229276851\n",
+ " MAE: 0.6271946374319586\n",
+ " R2: 0.10862644997792614\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.500000):\n",
+ " RMSE: 0.8112953030727291\n",
+ " MAE: 0.645693705089251\n",
+ " R2: 0.06740807086129252\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.500000):\n",
+ " RMSE: 0.8298921852578498\n",
+ " MAE: 0.6629780128961713\n",
+ " R2: 0.024163452726365775\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.500000):\n",
+ " RMSE: 0.8320198635059106\n",
+ " MAE: 0.6657357030427604\n",
+ " R2: 0.019153337439844154\n",
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.500000):\n",
+ " RMSE: 0.8323808561832262\n",
+ " MAE: 0.6669472047761406\n",
+ " R2: 0.0183020229672054\n",
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.600000):\n",
+ " RMSE: 0.7317723392279818\n",
+ " MAE: 0.5627373693033669\n",
+ " R2: 0.24127270524006605\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.600000):\n",
+ " RMSE: 0.7454324777911233\n",
+ " MAE: 0.5772117261484206\n",
+ " R2: 0.21268169183406394\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.600000):\n",
+ " RMSE: 0.7661028672396263\n",
+ " MAE: 0.5984406933733759\n",
+ " R2: 0.16841259155853305\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.600000):\n",
+ " RMSE: 0.787179486885359\n",
+ " MAE: 0.6210967388389844\n",
+ " R2: 0.12202678676193257\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.600000):\n",
+ " RMSE: 0.809739471626647\n",
+ " MAE: 0.6442565454817458\n",
+ " R2: 0.07098152823463388\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.600000):\n",
+ " RMSE: 0.8317884179944764\n",
+ " MAE: 0.6647524814105722\n",
+ " R2: 0.019698951776764728\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.600000):\n",
+ " RMSE: 0.8321519738036909\n",
+ " MAE: 0.6662086037874676\n",
+ " R2: 0.018841829895677176\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.600000):\n",
+ " RMSE: 0.8326350511178233\n",
+ " MAE: 0.6676630843299566\n",
+ " R2: 0.01770234373563795\n",
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.600000):\n",
+ " RMSE: 0.8332048101440411\n",
+ " MAE: 0.6690717294644856\n",
+ " R2: 0.016357542209390563\n",
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.700000):\n",
+ " RMSE: 0.7327938109945942\n",
+ " MAE: 0.5640101718105491\n",
+ " R2: 0.23915303116151632\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.700000):\n",
+ " RMSE: 0.7499835110445395\n",
+ " MAE: 0.5819389930665501\n",
+ " R2: 0.20303883413454027\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.700000):\n",
+ " RMSE: 0.7747136483567111\n",
+ " MAE: 0.6079678532556209\n",
+ " R2: 0.14961391810397695\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.700000):\n",
+ " RMSE: 0.8004478857657858\n",
+ " MAE: 0.6350378679245181\n",
+ " R2: 0.09217977708630032\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.700000):\n",
+ " RMSE: 0.829586285479097\n",
+ " MAE: 0.6627028304266674\n",
+ " R2: 0.024882710417618137\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.700000):\n",
+ " RMSE: 0.8321502650365332\n",
+ " MAE: 0.6662000872414003\n",
+ " R2: 0.018845859373919027\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.700000):\n",
+ " RMSE: 0.832725785743381\n",
+ " MAE: 0.667898097502809\n",
+ " R2: 0.017488244494447747\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.700000):\n",
+ " RMSE: 0.8331825395236181\n",
+ " MAE: 0.6692175076829847\n",
+ " R2: 0.016410124803194592\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.700000):\n",
+ " RMSE: 0.8331069437643933\n",
+ " MAE: 0.6697424890266508\n",
+ " R2: 0.016588601539516357\n",
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.800000):\n",
+ " RMSE: 0.7339712501091269\n",
+ " MAE: 0.5654097809725043\n",
+ " R2: 0.23670603806205326\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.800000):\n",
+ " RMSE: 0.7552646505492441\n",
+ " MAE: 0.5873472009739388\n",
+ " R2: 0.19177543499093674\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.800000):\n",
+ " RMSE: 0.7836957692333741\n",
+ " MAE: 0.6176788505535867\n",
+ " R2: 0.12978065429593022\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.800000):\n",
+ " RMSE: 0.8160164529135189\n",
+ " MAE: 0.650349905850893\n",
+ " R2: 0.05652247327326554\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.800000):\n",
+ " RMSE: 0.8320145539945119\n",
+ " MAE: 0.6657081587004348\n",
+ " R2: 0.019165855890777572\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.800000):\n",
+ " RMSE: 0.8326325509502465\n",
+ " MAE: 0.6676500690618903\n",
+ " R2: 0.01770824285088779\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.800000):\n",
+ " RMSE: 0.8331830329685253\n",
+ " MAE: 0.6692142378162035\n",
+ " R2: 0.016408959758236752\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.800000):\n",
+ " RMSE: 0.8330972295348316\n",
+ " MAE: 0.669813814205792\n",
+ " R2: 0.016611535037920344\n",
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.800000):\n",
+ " RMSE: 0.8330208354420413\n",
+ " MAE: 0.6704133670619602\n",
+ " R2: 0.016791878033996177\n",
+ "Elasticnet model (alpha=0.100000, l1_ratio=0.900000):\n",
+ " RMSE: 0.735314956888905\n",
+ " MAE: 0.566974647785579\n",
+ " R2: 0.23390870203034675\n",
+ "Elasticnet model (alpha=0.200000, l1_ratio=0.900000):\n",
+ " RMSE: 0.7613249071370938\n",
+ " MAE: 0.593613372674502\n",
+ " R2: 0.1787529818606436\n",
+ "Elasticnet model (alpha=0.300000, l1_ratio=0.900000):\n",
+ " RMSE: 0.7940027723712206\n",
+ " MAE: 0.6284316436541582\n",
+ " R2: 0.10674024649047587\n",
+ "Elasticnet model (alpha=0.400000, l1_ratio=0.900000):\n",
+ " RMSE: 0.831784893250733\n",
+ " MAE: 0.6647313794016759\n",
+ " R2: 0.019707259905588637\n",
+ "Elasticnet model (alpha=0.500000, l1_ratio=0.900000):\n",
+ " RMSE: 0.8323747376136406\n",
+ " MAE: 0.6669171677143245\n",
+ " R2: 0.018316455219614114\n",
+ "Elasticnet model (alpha=0.600000, l1_ratio=0.900000):\n",
+ " RMSE: 0.8332063354920289\n",
+ " MAE: 0.6690618761753936\n",
+ " R2: 0.01635394069773599\n",
+ "Elasticnet model (alpha=0.700000, l1_ratio=0.900000):\n",
+ " RMSE: 0.8331078270287657\n",
+ " MAE: 0.6697360518827573\n",
+ " R2: 0.016586516302516174\n",
+ "Elasticnet model (alpha=0.800000, l1_ratio=0.900000):\n",
+ " RMSE: 0.8330212125502486\n",
+ " MAE: 0.6704102143580977\n",
+ " R2: 0.016790987837928095\n",
+ "Elasticnet model (alpha=0.900000, l1_ratio=0.900000):\n",
+ " RMSE: 0.8329464950658837\n",
+ " MAE: 0.6710843636018047\n",
+ " R2: 0.01696735695860563\n"
+ ]
+ }
+ ],
+ "source": [
+ "### I jeszcze raz, tym razem ze zmienionymi wartościami parametrów\n",
+ "! cd ./IUM_08/examples/; for l in {1..9}; do for a in {1..9}; do python sklearn_elasticnet_wine/train.py 0.$a 0.$l; done; done"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "total 16\r\n",
+ "drwxrwxr-x 6 tomek tomek 4096 maj 2 17:07 15918a3901854356933736dfc0935807\r\n",
+ "drwxrwxr-x 6 tomek tomek 4096 maj 2 16:36 23ae1069b29e4955ac9f3536c71e7ac2\r\n",
+ "drwxrwxr-x 6 tomek tomek 4096 maj 2 17:07 b7ddb17a37404d7898e105afa5c20287\r\n",
+ "-rw-rw-r-- 1 tomek tomek 151 maj 2 16:36 meta.yaml\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "### Informacje o przebieagach eksperymentu zostały zapisane w katalogu mlruns\n",
+ "! ls -l IUM_08/examples/mlruns/0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[2021-05-10 12:21:16 +0200] [20029] [INFO] Starting gunicorn 20.1.0\n",
+ "[2021-05-10 12:21:16 +0200] [20029] [INFO] Listening at: http://127.0.0.1:5000 (20029)\n",
+ "[2021-05-10 12:21:16 +0200] [20029] [INFO] Using worker: sync\n",
+ "[2021-05-10 12:21:16 +0200] [20030] [INFO] Booting worker with pid: 20030\n",
+ "^C\n",
+ "[2021-05-10 12:22:32 +0200] [20029] [INFO] Handling signal: int\n",
+ "[2021-05-10 12:22:32 +0200] [20030] [INFO] Worker exiting (pid: 20030)\n"
+ ]
+ }
+ ],
+ "source": [
+ "### Możemy je obejrzeć w przeglądarce uruchamiając interfejs webowy:\n",
+ "### (powinniśmy to wywołać w normalnej konsoli, w jupyter będziemy mieli zablokowany kernel)\n",
+ "! cd IUM_08/examples/; mlflow ui"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Wygląd interfejsu webowego\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Porównywanie wyników\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "## Logowanie\n",
+ " - logowania metryk i parametrów można dokonać m.in. poprzez wywołania Python-owego API: `mlflow.log_param()` i `mlflow.log_metric()`. Więcej dostępnych funkcji: [link](https://mlflow.org/docs/latest/tracking.html#logging-functions)\n",
+ " - wywołania te muszą nastąpić po wykonaniu [`mlflow.start_run()`](https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.start_run), najlepiej wewnątrz bloku:\n",
+ "```python\n",
+ " with mlflow.start_run():\n",
+ " \n",
+ " #[...]\n",
+ "\n",
+ " mlflow.log_param(\"alpha\", alpha)\n",
+ " mlflow.log_param(\"l1_ratio\", l1_ratio)\n",
+ "```\n",
+ " - jest też możliwość automatycznego logwania dla wybranych bibliotek: https://mlflow.org/docs/latest/tracking.html#automatic-logging"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "# MLflow Projects\n",
+ " - MLflow projects to zestaw konwencji i kilku narzędzi\n",
+ " - ułatwiają one uruchamianie eskperymentów"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Konfiguracja projektu\n",
+ " - W pliku `MLproject` zapisuje się konfigurację projektu ([specyfikacja](https://mlflow.org/docs/latest/projects.html))\n",
+ " - Zawiera ona:\n",
+ " - odnośnik do środowiska, w którym ma być wywołany eksperyment [szczegóły](https://mlflow.org/docs/latest/projects.html#specifying-an-environment):\n",
+ " - nazwa obrazu Docker\n",
+ " - albo ścieżka do pliku conda.yaml definiującego środowisko wykonania Conda\n",
+ " - parametry, z którymi można wywołać eksperyment\n",
+ " - polecenia służące do wywołania eksperymentu"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Overwriting IUM_08/examples/sklearn_elasticnet_wine/MLproject\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%writefile IUM_08/examples/sklearn_elasticnet_wine/MLproject\n",
+ "name: tutorial\n",
+ "\n",
+ "conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowiska\n",
+ " \n",
+ "#docker_env:\n",
+ "# image: mlflow-docker-example-environment\n",
+ "\n",
+ "entry_points:\n",
+ " main:\n",
+ " parameters:\n",
+ " alpha: {type: float, default: 0.5}\n",
+ " l1_ratio: {type: float, default: 0.1}\n",
+ " command: \"python train.py {alpha} {l1_ratio}\"\n",
+ " test:\n",
+ " parameters:\n",
+ " alpha: {type: cutoff, default: 0}\n",
+ " command: \"python test.py {cutoff}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Środowisko Conda\n",
+ " \n",
+ " - https://docs.conda.io\n",
+ " - Składnia plików conda.yaml definiujących środowisko: https://docs.conda.io/projects/conda/en/4.6.1/user-guide/tasks/manage-environments.html#create-env-file-manually\n",
+ " - Składnia YAML: [przystępnie](https://learnxinyminutes.com/docs/yaml/), [oficjalnie](https://yaml.org/spec/1.2/spec.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Overwriting IUM_08/examples/sklearn_elasticnet_wine/conda.yaml\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%writefile IUM_08/examples/sklearn_elasticnet_wine/conda.yaml\n",
+ "name: tutorial\n",
+ "channels:\n",
+ " - defaults\n",
+ "dependencies:\n",
+ " - python=3.6 #Te zależności będą zainstalowane za pomocą conda isntall\n",
+ " - pip\n",
+ " - pip: #Te ząś za pomocą pip install\n",
+ " - scikit-learn==0.23.2\n",
+ " - mlflow>=1.0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Środowisko docker\n",
+ "- zamiast środowiska Conda możemy również podać nazwę obrazu docker, w którym ma być wywołany eksperyment.\n",
+ "- obraz będzie szukany lokalnie a następnie na DockerHub, lub w innym repozytorium dockera\n",
+ "- składnia specyfikacji ścieżki jest taka sama jak w przypadki poleceń dockera, np. docker pull [link](https://docs.docker.com/engine/reference/commandline/pull/#pull-from-a-different-registry)\n",
+ "- Można również podać katalogi do podmontowania wewnątrz kontenera oraz wartości zmiennych środowiskowych do ustawienia w kontenerze:\n",
+ "```yaml\n",
+ "docker_env:\n",
+ " image: mlflow-docker-example-environment\n",
+ " volumes: [\"/local/path:/container/mount/path\"]\n",
+ " environment: [[\"NEW_ENV_VAR\", \"new_var_value\"], \"VAR_TO_COPY_FROM_HOST_ENVIRONMENT\"]\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Parametry\n",
+ " - Specyfikacja parametrów w pliku MLproject pozwala na ich walidację i używanie wartości domyślnych\n",
+ " - Dostępne typy:\n",
+ " - String\n",
+ " - Float - dowolna liczba (MLflow waliduje, czy podana wartość jest liczbą)\n",
+ " - Path - pozwala podawać ścieżki względne (przekształca je na bezwzlędne) do plików lokalnych albo do plików zdalnych (np. do s3://) - zostaną wtedy ściągnięte lokalnie\n",
+ " - URI - podobnie jak path, ale do rozproszonych systemów plików\n",
+ "\n",
+ "- [Składnia](https://mlflow.org/docs/latest/projects.html#specifying-parameters)\n",
+ " \n",
+ "```yml:\n",
+ " parameter_name: {type: data_type, default: value} # Short syntax\n",
+ "\n",
+ " parameter_name: # Long syntax\n",
+ " type: data_type\n",
+ " default: value\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Uruchamianie projektu\n",
+ " - Projekt możemy uruchomić przy pomocy polecenia `mlflow run` ([dokumentacja](https://mlflow.org/docs/latest/cli.html#mlflow-run))\n",
+ " - Spowoduje to przygotowanie środowiska i uruchomienie eksperymentu wewnątrz środowiska\n",
+ " - domyślnie zostanie uruchomione polecenie zdefiniowane w \"entry point\" `main`. Żeby uruchomić inny \"entry point\", możemy użyć parametru `-e`, np:\n",
+ " ```bash\n",
+ " mlflow run sklearn_elasticnet_wine -e test\n",
+ " ```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2021/05/10 12:39:32 INFO mlflow.utils.conda: === Creating conda environment mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29 ===\n",
+ "Collecting package metadata (repodata.json): done\n",
+ "Solving environment: done\n",
+ "Preparing transaction: done\n",
+ "Verifying transaction: done\n",
+ "Executing transaction: done\n",
+ "Installing pip dependencies: / Ran pip subprocess with arguments:\n",
+ "['/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/bin/python', '-m', 'pip', 'install', '-U', '-r', '/home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt']\n",
+ "Pip subprocess output:\n",
+ "Collecting scikit-learn==0.23.2\n",
+ " Using cached scikit_learn-0.23.2-cp36-cp36m-manylinux1_x86_64.whl (6.8 MB)\n",
+ "Collecting mlflow>=1.0\n",
+ " Downloading mlflow-1.17.0-py3-none-any.whl (14.2 MB)\n",
+ "Collecting joblib>=0.11\n",
+ " Using cached joblib-1.0.1-py3-none-any.whl (303 kB)\n",
+ "Collecting scipy>=0.19.1\n",
+ " Using cached scipy-1.5.4-cp36-cp36m-manylinux1_x86_64.whl (25.9 MB)\n",
+ "Requirement already satisfied: numpy>=1.13.3 in /home/tomek/.local/lib/python3.6/site-packages (from scikit-learn==0.23.2->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 1)) (1.15.4)\n",
+ "Collecting threadpoolctl>=2.0.0\n",
+ " Using cached threadpoolctl-2.1.0-py3-none-any.whl (12 kB)\n",
+ "Collecting pandas\n",
+ " Using cached pandas-1.1.5-cp36-cp36m-manylinux1_x86_64.whl (9.5 MB)\n",
+ "Collecting pyyaml\n",
+ " Using cached PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl (640 kB)\n",
+ "Collecting gunicorn\n",
+ " Using cached gunicorn-20.1.0-py3-none-any.whl (79 kB)\n",
+ "Collecting Flask\n",
+ " Using cached Flask-1.1.2-py2.py3-none-any.whl (94 kB)\n",
+ "Collecting alembic<=1.4.1\n",
+ " Using cached alembic-1.4.1-py2.py3-none-any.whl\n",
+ "Collecting prometheus-flask-exporter\n",
+ " Downloading prometheus_flask_exporter-0.18.2.tar.gz (22 kB)\n",
+ "Collecting entrypoints\n",
+ " Using cached entrypoints-0.3-py2.py3-none-any.whl (11 kB)\n",
+ "Collecting databricks-cli>=0.8.7\n",
+ " Using cached databricks_cli-0.14.3-py3-none-any.whl\n",
+ "Collecting requests>=2.17.3\n",
+ " Using cached requests-2.25.1-py2.py3-none-any.whl (61 kB)\n",
+ "Collecting docker>=4.0.0\n",
+ " Using cached docker-5.0.0-py2.py3-none-any.whl (146 kB)\n",
+ "Collecting sqlalchemy\n",
+ " Downloading SQLAlchemy-1.4.14-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)\n",
+ "Collecting cloudpickle\n",
+ " Using cached cloudpickle-1.6.0-py3-none-any.whl (23 kB)\n",
+ "Collecting pytz\n",
+ " Using cached pytz-2021.1-py2.py3-none-any.whl (510 kB)\n",
+ "Collecting protobuf>=3.6.0\n",
+ " Downloading protobuf-3.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)\n",
+ "Collecting click>=7.0\n",
+ " Using cached click-7.1.2-py2.py3-none-any.whl (82 kB)\n",
+ "Collecting sqlparse>=0.3.1\n",
+ " Using cached sqlparse-0.4.1-py3-none-any.whl (42 kB)\n",
+ "Collecting querystring-parser\n",
+ " Using cached querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)\n",
+ "Collecting gitpython>=2.1.0\n",
+ " Using cached GitPython-3.1.14-py3-none-any.whl (159 kB)\n",
+ "Collecting Mako\n",
+ " Using cached Mako-1.1.4-py2.py3-none-any.whl (75 kB)\n",
+ "Collecting python-editor>=0.3\n",
+ " Using cached python_editor-1.0.4-py3-none-any.whl (4.9 kB)\n",
+ "Collecting python-dateutil\n",
+ " Using cached python_dateutil-2.8.1-py2.py3-none-any.whl (227 kB)\n",
+ "Collecting tabulate>=0.7.7\n",
+ " Using cached tabulate-0.8.9-py3-none-any.whl (25 kB)\n",
+ "Requirement already satisfied: six>=1.10.0 in /home/tomek/.local/lib/python3.6/site-packages (from databricks-cli>=0.8.7->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (1.12.0)\n",
+ "Collecting websocket-client>=0.32.0\n",
+ " Downloading websocket_client-0.59.0-py2.py3-none-any.whl (67 kB)\n",
+ "Collecting gitdb<5,>=4.0.1\n",
+ " Using cached gitdb-4.0.7-py3-none-any.whl (63 kB)\n",
+ "Collecting smmap<5,>=3.0.1\n",
+ " Using cached smmap-4.0.0-py2.py3-none-any.whl (24 kB)\n",
+ "Collecting idna<3,>=2.5\n",
+ " Using cached idna-2.10-py2.py3-none-any.whl (58 kB)\n",
+ "Collecting chardet<5,>=3.0.2\n",
+ " Using cached chardet-4.0.0-py2.py3-none-any.whl (178 kB)\n",
+ "Collecting urllib3<1.27,>=1.21.1\n",
+ " Using cached urllib3-1.26.4-py2.py3-none-any.whl (153 kB)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /media/tomek/Linux_data/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages (from requests>=2.17.3->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (2020.12.5)\n",
+ "Collecting greenlet!=0.4.17\n",
+ " Downloading greenlet-1.1.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (155 kB)\n",
+ "Collecting importlib-metadata\n",
+ " Using cached importlib_metadata-4.0.1-py3-none-any.whl (16 kB)\n",
+ "Collecting itsdangerous>=0.24\n",
+ " Using cached itsdangerous-1.1.0-py2.py3-none-any.whl (16 kB)\n",
+ "Collecting Werkzeug>=0.15\n",
+ " Using cached Werkzeug-1.0.1-py2.py3-none-any.whl (298 kB)\n",
+ "Collecting Jinja2>=2.10.1\n",
+ " Using cached Jinja2-2.11.3-py2.py3-none-any.whl (125 kB)\n",
+ "Collecting MarkupSafe>=0.23\n",
+ " Using cached MarkupSafe-1.1.1-cp36-cp36m-manylinux2010_x86_64.whl (32 kB)\n",
+ "Requirement already satisfied: setuptools>=3.0 in /media/tomek/Linux_data/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages (from gunicorn->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (52.0.0.post20210125)\n",
+ "Collecting typing-extensions>=3.6.4\n",
+ " Using cached typing_extensions-3.10.0.0-py3-none-any.whl (26 kB)\n",
+ "Collecting zipp>=0.5\n",
+ " Using cached zipp-3.4.1-py3-none-any.whl (5.2 kB)\n",
+ "Collecting prometheus_client\n",
+ " Using cached prometheus_client-0.10.1-py2.py3-none-any.whl (55 kB)\n",
+ "Building wheels for collected packages: prometheus-flask-exporter\n",
+ " Building wheel for prometheus-flask-exporter (setup.py): started\n",
+ " Building wheel for prometheus-flask-exporter (setup.py): finished with status 'done'\n",
+ " Created wheel for prometheus-flask-exporter: filename=prometheus_flask_exporter-0.18.2-py3-none-any.whl size=17399 sha256=84da5903cdaabc8f667b7b2e3d5f63a3021cab3d4f4fc1981d9d2a3ab5264738\n",
+ " Stored in directory: /home/tomek/.cache/pip/wheels/15/77/e8/3ca90b66243b0b58d5a5323a3da02cc8c5daf1de7a65141701\n",
+ "Successfully built prometheus-flask-exporter\n",
+ "Installing collected packages: zipp, typing-extensions, MarkupSafe, Werkzeug, urllib3, smmap, Jinja2, itsdangerous, importlib-metadata, idna, greenlet, click, chardet, websocket-client, tabulate, sqlalchemy, requests, pytz, python-editor, python-dateutil, prometheus-client, Mako, gitdb, Flask, threadpoolctl, sqlparse, scipy, querystring-parser, pyyaml, protobuf, prometheus-flask-exporter, pandas, joblib, gunicorn, gitpython, entrypoints, docker, databricks-cli, cloudpickle, alembic, scikit-learn, mlflow\n",
+ "Successfully installed Flask-1.1.2 Jinja2-2.11.3 Mako-1.1.4 MarkupSafe-1.1.1 Werkzeug-1.0.1 alembic-1.4.1 chardet-4.0.0 click-7.1.2 cloudpickle-1.6.0 databricks-cli-0.14.3 docker-5.0.0 entrypoints-0.3 gitdb-4.0.7 gitpython-3.1.14 greenlet-1.1.0 gunicorn-20.1.0 idna-2.10 importlib-metadata-4.0.1 itsdangerous-1.1.0 joblib-1.0.1 mlflow-1.17.0 pandas-1.1.5 prometheus-client-0.10.1 prometheus-flask-exporter-0.18.2 protobuf-3.16.0 python-dateutil-2.8.1 python-editor-1.0.4 pytz-2021.1 pyyaml-5.4.1 querystring-parser-1.2.4 requests-2.25.1 scikit-learn-0.23.2 scipy-1.5.4 smmap-4.0.0 sqlalchemy-1.4.14 sqlparse-0.4.1 tabulate-0.8.9 threadpoolctl-2.1.0 typing-extensions-3.10.0.0 urllib3-1.26.4 websocket-client-0.59.0 zipp-3.4.1\n",
+ "\n",
+ "done\n",
+ "#\n",
+ "# To activate this environment, use\n",
+ "#\n",
+ "# $ conda activate mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29\n",
+ "#\n",
+ "# To deactivate an active environment, use\n",
+ "#\n",
+ "# $ conda deactivate\n",
+ "\n",
+ "2021/05/10 12:40:17 INFO mlflow.projects.utils: === Created directory /tmp/tmpgvcpfml8 for downloading remote URIs passed to arguments of type 'path' ===\n",
+ "2021/05/10 12:40:17 INFO mlflow.projects.backend.local: === Running command 'source /home/tomek/miniconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29 1>&2 && python train.py 0.42 0.1' in run with ID 'b9b3795a2898495d95c650bafc0dcc76' === \n",
+ "ERROR:__main__:Unable to download training & test CSV, check your internet connection. Error: \n",
+ "Traceback (most recent call last):\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1349, in do_open\n",
+ " encode_chunked=req.has_header('Transfer-encoding'))\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1287, in request\n",
+ " self._send_request(method, url, body, headers, encode_chunked)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1333, in _send_request\n",
+ " self.endheaders(body, encode_chunked=encode_chunked)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1282, in endheaders\n",
+ " self._send_output(message_body, encode_chunked=encode_chunked)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1042, in _send_output\n",
+ " self.send(msg)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 980, in send\n",
+ " self.connect()\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 952, in connect\n",
+ " (self.host,self.port), self.timeout, self.source_address)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/socket.py\", line 724, in create_connection\n",
+ " raise err\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/socket.py\", line 713, in create_connection\n",
+ " sock.connect(sa)\n",
+ "TimeoutError: [Errno 110] Connection timed out\n",
+ "\n",
+ "During handling of the above exception, another exception occurred:\n",
+ "\n",
+ "Traceback (most recent call last):\n",
+ " File \"train.py\", line 40, in \n",
+ " data = pd.read_csv(csv_url, sep=\";\")\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/parsers.py\", line 688, in read_csv\n",
+ " return _read(filepath_or_buffer, kwds)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/parsers.py\", line 437, in _read\n",
+ " filepath_or_buffer, encoding, compression\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/common.py\", line 183, in get_filepath_or_buffer\n",
+ " req = urlopen(filepath_or_buffer)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/common.py\", line 137, in urlopen\n",
+ " return urllib.request.urlopen(*args, **kwargs)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 223, in urlopen\n",
+ " return opener.open(url, data, timeout)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 526, in open\n",
+ " response = self._open(req, data)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 544, in _open\n",
+ " '_open', req)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 504, in _call_chain\n",
+ " result = func(*args)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1377, in http_open\n",
+ " return self.do_open(http.client.HTTPConnection, req)\n",
+ " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1351, in do_open\n",
+ " raise URLError(err)\n",
+ "urllib.error.URLError: \n",
+ "Traceback (most recent call last):\n",
+ " File \"train.py\", line 47, in \n",
+ " train, test = train_test_split(data)\n",
+ "NameError: name 'data' is not defined\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2021/05/10 12:42:29 ERROR mlflow.cli: === Run (ID 'b9b3795a2898495d95c650bafc0dcc76') failed ===\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "!cd IUM_08/examples/; mlflow run sklearn_elasticnet_wine -P alpha=0.42"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "# Zadania [10p pkt] (do 16 V 12:00)\n",
+ "1. Dodaj do swojego projektu logowanie parametrów i metryk za pomocą MLflow (polecenia `mlflow.log_param` i `mlflow.log_metric`\n",
+ "2. Dodaj plik MLProject definiujący polecenia do trenowania i testowania, ich parametry wywołania oraz środowisko (użyj zdefiniowanego wcześniej obrazu Docker)"
+ ]
+ }
+ ],
+ "metadata": {
+ "celltoolbar": "Slideshow",
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/IUM_08/compare-metrics.png b/IUM_08/compare-metrics.png
new file mode 100644
index 0000000..bb64e52
Binary files /dev/null and b/IUM_08/compare-metrics.png differ
diff --git a/IUM_08/mlflowui.png b/IUM_08/mlflowui.png
new file mode 100644
index 0000000..893fac8
Binary files /dev/null and b/IUM_08/mlflowui.png differ
diff --git a/img/expcontrol/mlflow-logo-d.png b/img/expcontrol/mlflow-logo-d.png
new file mode 100644
index 0000000..1d6a56b
Binary files /dev/null and b/img/expcontrol/mlflow-logo-d.png differ