From aa4f26b92001290a87406511bf496a469df7bb60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Zi=C4=99tkiewicz?= Date: Mon, 17 May 2021 12:52:53 +0200 Subject: [PATCH] =?UTF-8?q?Zaj=C4=99cia=2008,=20cz=C4=99=C5=9B=C4=87=202.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- IUM_08.MLFlow.ipynb | 721 ++++++++++++++++++++++++++++++++------------ 1 file changed, 521 insertions(+), 200 deletions(-) diff --git a/IUM_08.MLFlow.ipynb b/IUM_08.MLFlow.ipynb index 8b58ef8..09c9cc2 100644 --- a/IUM_08.MLFlow.ipynb +++ b/IUM_08.MLFlow.ipynb @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 1, "metadata": { "slideshow": { "slide_type": "slide" @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 30, "metadata": { "slideshow": { "slide_type": "slide" @@ -117,6 +117,7 @@ "logging.basicConfig(level=logging.WARN)\n", "logger = logging.getLogger(__name__)\n", "\n", + "#mlflow.set_tracking_uri(\"http://localhost:5001\")\n", "\n", "def eval_metrics(actual, pred):\n", " rmse = np.sqrt(mean_squared_error(actual, pred))\n", @@ -173,6 +174,9 @@ " mlflow.log_metric(\"rmse\", rmse)\n", " mlflow.log_metric(\"r2\", r2)\n", " mlflow.log_metric(\"mae\", mae)\n", + " \n", + " # Infer model signature to log it\n", + " signature = mlflow.models.signature.infer_signature(train_x, lr.predict(train_x))\n", "\n", " tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n", "\n", @@ -183,14 +187,14 @@ " # There are other ways to use the Model Registry, which depends on the use case,\n", " # please refer to the doc for more information:\n", " # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n", - " mlflow.sklearn.log_model(lr, \"model\", registered_model_name=\"ElasticnetWineModel\")\n", + " mlflow.sklearn.log_model(lr, \"model\", registered_model_name=\"ElasticnetWineModel\", signature=signature)\n", " else:\n", - " mlflow.sklearn.log_model(lr, \"model\")" + " mlflow.sklearn.log_model(lr, \"model\", signature=signature)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": { "slideshow": { "slide_type": "slide" @@ -215,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 4, "metadata": { "slideshow": { "slide_type": "slide" @@ -566,7 +570,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 32, "metadata": { "slideshow": { "slide_type": "slide" @@ -578,21 +582,21 @@ "output_type": "stream", "text": [ "total 16\r\n", - "drwxrwxr-x 6 tomek tomek 4096 maj 2 17:07 15918a3901854356933736dfc0935807\r\n", - "drwxrwxr-x 6 tomek tomek 4096 maj 2 16:36 23ae1069b29e4955ac9f3536c71e7ac2\r\n", - "drwxrwxr-x 6 tomek tomek 4096 maj 2 17:07 b7ddb17a37404d7898e105afa5c20287\r\n", - "-rw-rw-r-- 1 tomek tomek 151 maj 2 16:36 meta.yaml\r\n" + "drwxrwxr-x 6 tomek tomek 4096 maj 17 08:43 375cde31bdd44a45a91fd7cee92ebcda\r\n", + "drwxrwxr-x 6 tomek tomek 4096 maj 17 10:38 b395b55b47fc43de876b67f5a4a5dae9\r\n", + "drwxrwxr-x 6 tomek tomek 4096 maj 17 09:15 b3ead42eca964113b29e7e5f8bcb7bb7\r\n", + "-rw-rw-r-- 1 tomek tomek 151 maj 17 08:43 meta.yaml\r\n" ] } ], "source": [ "### Informacje o przebieagach eksperymentu zostały zapisane w katalogu mlruns\n", - "! ls -l IUM_08/examples/mlruns/0" + "! ls -l IUM_08/examples/mlruns/0 | head" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 33, "metadata": { "slideshow": { "slide_type": "slide" @@ -603,13 +607,45 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2021-05-10 12:21:16 +0200] [20029] [INFO] Starting gunicorn 20.1.0\n", - "[2021-05-10 12:21:16 +0200] [20029] [INFO] Listening at: http://127.0.0.1:5000 (20029)\n", - "[2021-05-10 12:21:16 +0200] [20029] [INFO] Using worker: sync\n", - "[2021-05-10 12:21:16 +0200] [20030] [INFO] Booting worker with pid: 20030\n", - "^C\n", - "[2021-05-10 12:22:32 +0200] [20029] [INFO] Handling signal: int\n", - "[2021-05-10 12:22:32 +0200] [20030] [INFO] Worker exiting (pid: 20030)\n" + "total 20\r\n", + "drwxrwxr-x 3 tomek tomek 4096 maj 17 08:43 artifacts\r\n", + "-rw-rw-r-- 1 tomek tomek 423 maj 17 08:43 meta.yaml\r\n", + "drwxrwxr-x 2 tomek tomek 4096 maj 17 08:43 metrics\r\n", + "drwxrwxr-x 2 tomek tomek 4096 maj 17 08:43 params\r\n", + "drwxrwxr-x 2 tomek tomek 4096 maj 17 08:43 tags\r\n" + ] + } + ], + "source": [ + "! ls -l IUM_08/examples/mlruns/0/375cde31bdd44a45a91fd7cee92ebcda" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2021-05-16 17:58:43 +0200] [118029] [INFO] Starting gunicorn 20.1.0\n", + "[2021-05-16 17:58:43 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n", + "[2021-05-16 17:58:43 +0200] [118029] [ERROR] Retrying in 1 second.\n", + "[2021-05-16 17:58:44 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n", + "[2021-05-16 17:58:44 +0200] [118029] [ERROR] Retrying in 1 second.\n", + "[2021-05-16 17:58:45 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n", + "[2021-05-16 17:58:45 +0200] [118029] [ERROR] Retrying in 1 second.\n", + "[2021-05-16 17:58:46 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n", + "[2021-05-16 17:58:46 +0200] [118029] [ERROR] Retrying in 1 second.\n", + "[2021-05-16 17:58:47 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n", + "[2021-05-16 17:58:47 +0200] [118029] [ERROR] Retrying in 1 second.\n", + "[2021-05-16 17:58:48 +0200] [118029] [ERROR] Can't connect to ('127.0.0.1', 5000)\n", + "Running the mlflow server failed. Please see the logs above for details.\n" ] } ], @@ -698,7 +734,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": { "slideshow": { "slide_type": "slide" @@ -751,7 +787,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 10, "metadata": { "slideshow": { "slide_type": "slide" @@ -846,7 +882,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 11, "metadata": { "slideshow": { "slide_type": "slide" @@ -857,183 +893,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "2021/05/10 12:39:32 INFO mlflow.utils.conda: === Creating conda environment mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29 ===\n", - "Collecting package metadata (repodata.json): done\n", - "Solving environment: done\n", - "Preparing transaction: done\n", - "Verifying transaction: done\n", - "Executing transaction: done\n", - "Installing pip dependencies: / Ran pip subprocess with arguments:\n", - "['/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/bin/python', '-m', 'pip', 'install', '-U', '-r', '/home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt']\n", - "Pip subprocess output:\n", - "Collecting scikit-learn==0.23.2\n", - " Using cached scikit_learn-0.23.2-cp36-cp36m-manylinux1_x86_64.whl (6.8 MB)\n", - "Collecting mlflow>=1.0\n", - " Downloading mlflow-1.17.0-py3-none-any.whl (14.2 MB)\n", - "Collecting joblib>=0.11\n", - " Using cached joblib-1.0.1-py3-none-any.whl (303 kB)\n", - "Collecting scipy>=0.19.1\n", - " Using cached scipy-1.5.4-cp36-cp36m-manylinux1_x86_64.whl (25.9 MB)\n", - "Requirement already satisfied: numpy>=1.13.3 in /home/tomek/.local/lib/python3.6/site-packages (from scikit-learn==0.23.2->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 1)) (1.15.4)\n", - "Collecting threadpoolctl>=2.0.0\n", - " Using cached threadpoolctl-2.1.0-py3-none-any.whl (12 kB)\n", - "Collecting pandas\n", - " Using cached pandas-1.1.5-cp36-cp36m-manylinux1_x86_64.whl (9.5 MB)\n", - "Collecting pyyaml\n", - " Using cached PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl (640 kB)\n", - "Collecting gunicorn\n", - " Using cached gunicorn-20.1.0-py3-none-any.whl (79 kB)\n", - "Collecting Flask\n", - " Using cached Flask-1.1.2-py2.py3-none-any.whl (94 kB)\n", - "Collecting alembic<=1.4.1\n", - " Using cached alembic-1.4.1-py2.py3-none-any.whl\n", - "Collecting prometheus-flask-exporter\n", - " Downloading prometheus_flask_exporter-0.18.2.tar.gz (22 kB)\n", - "Collecting entrypoints\n", - " Using cached entrypoints-0.3-py2.py3-none-any.whl (11 kB)\n", - "Collecting databricks-cli>=0.8.7\n", - " Using cached databricks_cli-0.14.3-py3-none-any.whl\n", - "Collecting requests>=2.17.3\n", - " Using cached requests-2.25.1-py2.py3-none-any.whl (61 kB)\n", - "Collecting docker>=4.0.0\n", - " Using cached docker-5.0.0-py2.py3-none-any.whl (146 kB)\n", - "Collecting sqlalchemy\n", - " Downloading SQLAlchemy-1.4.14-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)\n", - "Collecting cloudpickle\n", - " Using cached cloudpickle-1.6.0-py3-none-any.whl (23 kB)\n", - "Collecting pytz\n", - " Using cached pytz-2021.1-py2.py3-none-any.whl (510 kB)\n", - "Collecting protobuf>=3.6.0\n", - " Downloading protobuf-3.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)\n", - "Collecting click>=7.0\n", - " Using cached click-7.1.2-py2.py3-none-any.whl (82 kB)\n", - "Collecting sqlparse>=0.3.1\n", - " Using cached sqlparse-0.4.1-py3-none-any.whl (42 kB)\n", - "Collecting querystring-parser\n", - " Using cached querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)\n", - "Collecting gitpython>=2.1.0\n", - " Using cached GitPython-3.1.14-py3-none-any.whl (159 kB)\n", - "Collecting Mako\n", - " Using cached Mako-1.1.4-py2.py3-none-any.whl (75 kB)\n", - "Collecting python-editor>=0.3\n", - " Using cached python_editor-1.0.4-py3-none-any.whl (4.9 kB)\n", - "Collecting python-dateutil\n", - " Using cached python_dateutil-2.8.1-py2.py3-none-any.whl (227 kB)\n", - "Collecting tabulate>=0.7.7\n", - " Using cached tabulate-0.8.9-py3-none-any.whl (25 kB)\n", - "Requirement already satisfied: six>=1.10.0 in /home/tomek/.local/lib/python3.6/site-packages (from databricks-cli>=0.8.7->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (1.12.0)\n", - "Collecting websocket-client>=0.32.0\n", - " Downloading websocket_client-0.59.0-py2.py3-none-any.whl (67 kB)\n", - "Collecting gitdb<5,>=4.0.1\n", - " Using cached gitdb-4.0.7-py3-none-any.whl (63 kB)\n", - "Collecting smmap<5,>=3.0.1\n", - " Using cached smmap-4.0.0-py2.py3-none-any.whl (24 kB)\n", - "Collecting idna<3,>=2.5\n", - " Using cached idna-2.10-py2.py3-none-any.whl (58 kB)\n", - "Collecting chardet<5,>=3.0.2\n", - " Using cached chardet-4.0.0-py2.py3-none-any.whl (178 kB)\n", - "Collecting urllib3<1.27,>=1.21.1\n", - " Using cached urllib3-1.26.4-py2.py3-none-any.whl (153 kB)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /media/tomek/Linux_data/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages (from requests>=2.17.3->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (2020.12.5)\n", - "Collecting greenlet!=0.4.17\n", - " Downloading greenlet-1.1.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (155 kB)\n", - "Collecting importlib-metadata\n", - " Using cached importlib_metadata-4.0.1-py3-none-any.whl (16 kB)\n", - "Collecting itsdangerous>=0.24\n", - " Using cached itsdangerous-1.1.0-py2.py3-none-any.whl (16 kB)\n", - "Collecting Werkzeug>=0.15\n", - " Using cached Werkzeug-1.0.1-py2.py3-none-any.whl (298 kB)\n", - "Collecting Jinja2>=2.10.1\n", - " Using cached Jinja2-2.11.3-py2.py3-none-any.whl (125 kB)\n", - "Collecting MarkupSafe>=0.23\n", - " Using cached MarkupSafe-1.1.1-cp36-cp36m-manylinux2010_x86_64.whl (32 kB)\n", - "Requirement already satisfied: setuptools>=3.0 in /media/tomek/Linux_data/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages (from gunicorn->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (52.0.0.post20210125)\n", - "Collecting typing-extensions>=3.6.4\n", - " Using cached typing_extensions-3.10.0.0-py3-none-any.whl (26 kB)\n", - "Collecting zipp>=0.5\n", - " Using cached zipp-3.4.1-py3-none-any.whl (5.2 kB)\n", - "Collecting prometheus_client\n", - " Using cached prometheus_client-0.10.1-py2.py3-none-any.whl (55 kB)\n", - "Building wheels for collected packages: prometheus-flask-exporter\n", - " Building wheel for prometheus-flask-exporter (setup.py): started\n", - " Building wheel for prometheus-flask-exporter (setup.py): finished with status 'done'\n", - " Created wheel for prometheus-flask-exporter: filename=prometheus_flask_exporter-0.18.2-py3-none-any.whl size=17399 sha256=84da5903cdaabc8f667b7b2e3d5f63a3021cab3d4f4fc1981d9d2a3ab5264738\n", - " Stored in directory: /home/tomek/.cache/pip/wheels/15/77/e8/3ca90b66243b0b58d5a5323a3da02cc8c5daf1de7a65141701\n", - "Successfully built prometheus-flask-exporter\n", - "Installing collected packages: zipp, typing-extensions, MarkupSafe, Werkzeug, urllib3, smmap, Jinja2, itsdangerous, importlib-metadata, idna, greenlet, click, chardet, websocket-client, tabulate, sqlalchemy, requests, pytz, python-editor, python-dateutil, prometheus-client, Mako, gitdb, Flask, threadpoolctl, sqlparse, scipy, querystring-parser, pyyaml, protobuf, prometheus-flask-exporter, pandas, joblib, gunicorn, gitpython, entrypoints, docker, databricks-cli, cloudpickle, alembic, scikit-learn, mlflow\n", - "Successfully installed Flask-1.1.2 Jinja2-2.11.3 Mako-1.1.4 MarkupSafe-1.1.1 Werkzeug-1.0.1 alembic-1.4.1 chardet-4.0.0 click-7.1.2 cloudpickle-1.6.0 databricks-cli-0.14.3 docker-5.0.0 entrypoints-0.3 gitdb-4.0.7 gitpython-3.1.14 greenlet-1.1.0 gunicorn-20.1.0 idna-2.10 importlib-metadata-4.0.1 itsdangerous-1.1.0 joblib-1.0.1 mlflow-1.17.0 pandas-1.1.5 prometheus-client-0.10.1 prometheus-flask-exporter-0.18.2 protobuf-3.16.0 python-dateutil-2.8.1 python-editor-1.0.4 pytz-2021.1 pyyaml-5.4.1 querystring-parser-1.2.4 requests-2.25.1 scikit-learn-0.23.2 scipy-1.5.4 smmap-4.0.0 sqlalchemy-1.4.14 sqlparse-0.4.1 tabulate-0.8.9 threadpoolctl-2.1.0 typing-extensions-3.10.0.0 urllib3-1.26.4 websocket-client-0.59.0 zipp-3.4.1\n", - "\n", - "done\n", - "#\n", - "# To activate this environment, use\n", - "#\n", - "# $ conda activate mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29\n", - "#\n", - "# To deactivate an active environment, use\n", - "#\n", - "# $ conda deactivate\n", - "\n", - "2021/05/10 12:40:17 INFO mlflow.projects.utils: === Created directory /tmp/tmpgvcpfml8 for downloading remote URIs passed to arguments of type 'path' ===\n", - "2021/05/10 12:40:17 INFO mlflow.projects.backend.local: === Running command 'source /home/tomek/miniconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29 1>&2 && python train.py 0.42 0.1' in run with ID 'b9b3795a2898495d95c650bafc0dcc76' === \n", - "ERROR:__main__:Unable to download training & test CSV, check your internet connection. Error: \n", - "Traceback (most recent call last):\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1349, in do_open\n", - " encode_chunked=req.has_header('Transfer-encoding'))\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1287, in request\n", - " self._send_request(method, url, body, headers, encode_chunked)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1333, in _send_request\n", - " self.endheaders(body, encode_chunked=encode_chunked)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1282, in endheaders\n", - " self._send_output(message_body, encode_chunked=encode_chunked)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1042, in _send_output\n", - " self.send(msg)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 980, in send\n", - " self.connect()\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 952, in connect\n", - " (self.host,self.port), self.timeout, self.source_address)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/socket.py\", line 724, in create_connection\n", - " raise err\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/socket.py\", line 713, in create_connection\n", - " sock.connect(sa)\n", - "TimeoutError: [Errno 110] Connection timed out\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"train.py\", line 40, in \n", - " data = pd.read_csv(csv_url, sep=\";\")\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/parsers.py\", line 688, in read_csv\n", - " return _read(filepath_or_buffer, kwds)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/parsers.py\", line 437, in _read\n", - " filepath_or_buffer, encoding, compression\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/common.py\", line 183, in get_filepath_or_buffer\n", - " req = urlopen(filepath_or_buffer)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/common.py\", line 137, in urlopen\n", - " return urllib.request.urlopen(*args, **kwargs)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 223, in urlopen\n", - " return opener.open(url, data, timeout)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 526, in open\n", - " response = self._open(req, data)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 544, in _open\n", - " '_open', req)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 504, in _call_chain\n", - " result = func(*args)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1377, in http_open\n", - " return self.do_open(http.client.HTTPConnection, req)\n", - " File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1351, in do_open\n", - " raise URLError(err)\n", - "urllib.error.URLError: \n", - "Traceback (most recent call last):\n", - " File \"train.py\", line 47, in \n", - " train, test = train_test_split(data)\n", - "NameError: name 'data' is not defined\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2021/05/10 12:42:29 ERROR mlflow.cli: === Run (ID 'b9b3795a2898495d95c650bafc0dcc76') failed ===\r\n" + "2021/05/16 17:59:10 INFO mlflow.projects.utils: === Created directory /tmp/tmprq4mdosv for downloading remote URIs passed to arguments of type 'path' ===\n", + "2021/05/16 17:59:10 INFO mlflow.projects.backend.local: === Running command 'source /home/tomek/miniconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29 1>&2 && python train.py 0.42 0.1' in run with ID '1860d321ea1545ff8866e4ba199d1712' === \n", + "Elasticnet model (alpha=0.420000, l1_ratio=0.100000):\n", + " RMSE: 0.7420620899060748\n", + " MAE: 0.5722846717246247\n", + " R2: 0.21978513651550236\n", + "2021/05/16 17:59:19 INFO mlflow.projects: === Run (ID '1860d321ea1545ff8866e4ba199d1712') succeeded ===\n" ] } ], @@ -1053,6 +919,461 @@ "1. Dodaj do swojego projektu logowanie parametrów i metryk za pomocą MLflow (polecenia `mlflow.log_param` i `mlflow.log_metric`\n", "2. Dodaj plik MLProject definiujący polecenia do trenowania i testowania, ich parametry wywołania oraz środowisko (użyj zdefiniowanego wcześniej obrazu Docker)" ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## MLflow Models\n", + "\n", + "MLflow Models to konwencja zapisu modeli, która ułatwia potem ich załadowanie i użycie" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Rodzaje modeli (\"flavors\") wspierane przez MLflow:\n", + "\n", + " - Python Function (python_function)\n", + " - PyTorch (pytorch)\n", + " - TensorFlow (tensorflow)\n", + " - Keras (keras)\n", + " - Scikit-learn (sklearn)\n", + " - Spacy(spaCy)\n", + " - ONNX (onnx)\n", + " - R Function (crate)\n", + " - H2O (h2o)\n", + " - MLeap (mleap)\n", + " - Spark MLlib (spark)\n", + " - MXNet Gluon (gluon)\n", + " - XGBoost (xgboost)\n", + " - LightGBM (lightgbm)\n", + " - CatBoost (catboost)\n", + " - Fastai(fastai)\n", + " - Statsmodels (statsmodels)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Zapisywanie modelu\n", + "Model ML można zapisać w MLflow przy pomocy jednej z dwóch funkcji z pakietu odpowiadającego używanej przez nas bibliotece:\n", + " - `save_model()` - zapisuje model na dysku\n", + " - `log_model()` - zapisuje model razem z innymi informacjami (metrykami, parametrami). W zależności od ustawień [\"tracking_uri\"](https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.set_tracking_uri) może być to lokalny folder w `mlruns/ ` lub ścieżka na zdalnym serwerze MLflow\n", + "\n", + "```Python\n", + " mlflow.sklearn.save_model(lr, \"my_model\")\n", + "```\n", + "\n", + "```Python\n", + " mlflow.keras.save_model(lr, \"my_model\")\n", + "```\n", + "\n", + "Wywołanie tej funkcji spowoduje stworzenie katalogu \"my_model\" zawierającego:\n", + " - plik *MLmodel* zawierający informacje o sposobach, w jaki model można załadować (\"flavors\") oraz ścieżki do plików związanych z modelem, takich jak:\n", + " - *conda.yaml* - opis środowiska potrzebnego do załadowania modelu\n", + " - *model.pkl* - plik z zserializowanym modelem\n", + "\n", + "Tylko plik *MLmodel* jest specjalnym plikiem MLflow - reszta zależy od konkrentego \"falovor\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "conda.yaml MLmodel model.pkl\r\n" + ] + } + ], + "source": [ + "ls IUM_08/examples/my_model" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "total 12\r\n", + "-rw-rw-r-- 1 tomek tomek 153 maj 17 10:38 conda.yaml\r\n", + "-rw-rw-r-- 1 tomek tomek 958 maj 17 10:38 MLmodel\r\n", + "-rw-rw-r-- 1 tomek tomek 641 maj 17 10:38 model.pkl\r\n" + ] + } + ], + "source": [ + "! ls -l IUM_08/examples/mlruns/0/b395b55b47fc43de876b67f5a4a5dae9/artifacts/model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "# %load IUM_08/examples/mlruns/0/b395b55b47fc43de876b67f5a4a5dae9/artifacts/model/MLmodel\n", + "artifact_path: model\n", + "flavors:\n", + " python_function:\n", + " env: conda.yaml\n", + " loader_module: mlflow.sklearn\n", + " model_path: model.pkl\n", + " python_version: 3.9.1\n", + " sklearn:\n", + " pickled_model: model.pkl\n", + " serialization_format: cloudpickle\n", + " sklearn_version: 0.24.2\n", + "run_id: b395b55b47fc43de876b67f5a4a5dae9\n", + "signature:\n", + " inputs: '[{\"name\": \"fixed acidity\", \"type\": \"double\"}, {\"name\": \"volatile acidity\",\n", + " \"type\": \"double\"}, {\"name\": \"citric acid\", \"type\": \"double\"}, {\"name\": \"residual\n", + " sugar\", \"type\": \"double\"}, {\"name\": \"chlorides\", \"type\": \"double\"}, {\"name\": \"free\n", + " sulfur dioxide\", \"type\": \"double\"}, {\"name\": \"total sulfur dioxide\", \"type\": \"double\"},\n", + " {\"name\": \"density\", \"type\": \"double\"}, {\"name\": \"pH\", \"type\": \"double\"}, {\"name\":\n", + " \"sulphates\", \"type\": \"double\"}, {\"name\": \"alcohol\", \"type\": \"double\"}]'\n", + " outputs: '[{\"type\": \"tensor\", \"tensor-spec\": {\"dtype\": \"float64\", \"shape\": [-1]}}]'\n", + "utc_time_created: '2021-05-17 08:38:41.749670'\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "# %load IUM_08/examples/my_model/conda.yaml\n", + "channels:\n", + "- defaults\n", + "- conda-forge\n", + "dependencies:\n", + "- python=3.9.1\n", + "- pip\n", + "- pip:\n", + " - mlflow\n", + " - scikit-learn==0.24.2\n", + " - cloudpickle==1.6.0\n", + "name: mlflow-env" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Dodatkowe pola w MLmodel\n", + "\n", + "\n", + "- *utc_time_created* - timestamp z czasem stworzenia modelu\n", + "- *run_id* - ID uruchomienia (\"run\"), które stworzyło ten model, jeśli model był zapisany za pomocą MLflow Tracking.\n", + "- *signature* - opisa danych wejściowych i wyjściowych w formacie JSON\n", + "- *input_example* przykładowe wejście przyjmowane przez model. Można je podać poprzez parametr `input_example` funkcji [log_model](https://mlflow.org/docs/latest/python_api/mlflow.sklearn.html#mlflow.sklearn.log_model)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([5.57688397, 5.50664777, 5.52550482, 5.50431125, 5.57688397])" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import mlflow\n", + "import pandas as pd\n", + "model = mlflow.sklearn.load_model(\"IUM_08/examples/mlruns/0/b395b55b47fc43de876b67f5a4a5dae9/artifacts/model\")\n", + "csv_url = \"http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv\"\n", + "data = pd.read_csv(csv_url, sep=\";\")\n", + "model.predict(data.drop([\"quality\"], axis=1).head())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Serwowanie modeli" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Usage: mlflow models [OPTIONS] COMMAND [ARGS]...\r\n", + "\r\n", + " Deploy MLflow models locally.\r\n", + "\r\n", + " To deploy a model associated with a run on a tracking server, set the\r\n", + " MLFLOW_TRACKING_URI environment variable to the URL of the desired server.\r\n", + "\r\n", + "Options:\r\n", + " --help Show this message and exit.\r\n", + "\r\n", + "Commands:\r\n", + " build-docker **EXPERIMENTAL**: Builds a Docker image whose default...\r\n", + " predict Generate predictions in json format using a saved MLflow...\r\n", + " prepare-env **EXPERIMENTAL**: Performs any preparation necessary to...\r\n", + " serve Serve a model saved with MLflow by launching a webserver on...\r\n" + ] + } + ], + "source": [ + "!cd IUM_08/examples/; mlflow models --help" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Usage: mlflow models serve [OPTIONS]\r\n", + "\r\n", + " Serve a model saved with MLflow by launching a webserver on the specified\r\n", + " host and port. The command supports models with the ``python_function`` or\r\n", + " ``crate`` (R Function) flavor. For information about the input data\r\n", + " formats accepted by the webserver, see the following documentation:\r\n", + " https://www.mlflow.org/docs/latest/models.html#built-in-deployment-tools.\r\n", + "\r\n", + " You can make requests to ``POST /invocations`` in pandas split- or record-\r\n", + " oriented formats.\r\n", + "\r\n", + " Example:\r\n", + "\r\n", + " .. code-block:: bash\r\n", + "\r\n", + " $ mlflow models serve -m runs:/my-run-id/model-path &\r\n", + "\r\n", + " $ curl http://127.0.0.1:5000/invocations -H 'Content-Type:\r\n", + " application/json' -d '{ \"columns\": [\"a\", \"b\", \"c\"],\r\n", + " \"data\": [[1, 2, 3], [4, 5, 6]] }'\r\n", + "\r\n", + "Options:\r\n", + " -m, --model-uri URI URI to the model. A local path, a 'runs:/' URI, or a\r\n", + " remote storage URI (e.g., an 's3://' URI). For more\r\n", + " information about supported remote URIs for model\r\n", + " artifacts, see\r\n", + " https://mlflow.org/docs/latest/tracking.html#artifact-\r\n", + " stores [required]\r\n", + "\r\n", + " -p, --port INTEGER The port to listen on (default: 5000).\r\n", + " -h, --host HOST The network address to listen on (default: 127.0.0.1).\r\n", + " Use 0.0.0.0 to bind to all addresses if you want to\r\n", + " access the tracking server from other machines.\r\n", + "\r\n", + " -w, --workers TEXT Number of gunicorn worker processes to handle requests\r\n", + " (default: 4).\r\n", + "\r\n", + " --no-conda If specified, will assume that MLmodel/MLproject is\r\n", + " running within a Conda environment with the necessary\r\n", + " dependencies for the current project instead of\r\n", + " attempting to create a new conda environment.\r\n", + "\r\n", + " --install-mlflow If specified and there is a conda environment to be\r\n", + " activated mlflow will be installed into the environment\r\n", + " after it has been activated. The version of installed\r\n", + " mlflow will be the same asthe one used to invoke this\r\n", + " command.\r\n", + "\r\n", + " --help Show this message and exit.\r\n" + ] + } + ], + "source": [ + "!cd IUM_08/examples/; mlflow models serve --help" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"columns\":[\"fixed acidity\",\"volatile acidity\",\"citric acid\",\"residual sugar\",\"chlorides\",\"free sulfur dioxide\",\"total sulfur dioxide\",\"density\",\"pH\",\"sulphates\",\"alcohol\"],\"index\":[0],\"data\":[[7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4]]}\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "csv_url = \"http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv\"\n", + "data = pd.read_csv(csv_url, sep=\";\").drop([\"quality\"], axis=1).head(1).to_json(orient='split')\n", + "print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5.576883967129615]" + ] + } + ], + "source": [ + "!curl http://127.0.0.1:5003/invocations -H 'Content-Type: application/json' -d '{\\\n", + " \"columns\":[\\\n", + " \"fixed acidity\",\"volatile acidity\",\"citric acid\",\"residual sugar\",\"chlorides\",\"free sulfur dioxide\",\"total sulfur dioxide\",\"density\",\"pH\",\"sulphates\",\"alcohol\"],\\\n", + " \"index\":[0],\\\n", + " \"data\":[[7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4]]}'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "```\n", + "$ cd IUM_08/examples/\n", + "$ mlflow models serve -m my_model\n", + "2021/05/17 08:52:07 INFO mlflow.models.cli: Selected backend for flavor 'python_function'\n", + "2021/05/17 08:52:07 INFO mlflow.pyfunc.backend: === Running command 'source /home/tomek/miniconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-503f0c7520a32f054a9d168bd099584a9439de9d 1>&2 && gunicorn --timeout=60 -b 127.0.0.1:5003 -w 1 ${GUNICORN_CMD_ARGS} -- mlflow.pyfunc.scoring_server.wsgi:app'\n", + "[2021-05-17 08:52:07 +0200] [291217] [INFO] Starting gunicorn 20.1.0\n", + "[2021-05-17 08:52:07 +0200] [291217] [INFO] Listening at: http://127.0.0.1:5003 (291217)\n", + "[2021-05-17 08:52:07 +0200] [291217] [INFO] Using worker: sync\n", + "[2021-05-17 08:52:07 +0200] [291221] [INFO] Booting worker with pid: 291221\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## MLflow Registry\n", + " - umożliwia [zapisywanie](https://mlflow.org/docs/latest/model-registry.html#adding-an-mlflow-model-to-the-model-registry) i [ładowanie](https://mlflow.org/docs/latest/model-registry.html#fetching-an-mlflow-model-from-the-model-registry) modeli z centralnego rejestru\n", + " - Modele można też serwować bezpośrednio z rejestru:\n", + "\n", + "```bash\n", + "#!/usr/bin/env sh\n", + "\n", + "# Set environment variable for the tracking URL where the Model Registry resides\n", + "export MLFLOW_TRACKING_URI=http://localhost:5000\n", + "\n", + "# Serve the production model from the model registry\n", + "mlflow models serve -m \"models:/sk-learn-random-forest-reg-model/Production\"\n", + "```\n", + "\n", + "- Żeby było to możliwe, musimy mieć uruchomiony [serwer MLflow](https://mlflow.org/docs/latest/tracking.html#tracking-server)\n", + "- Umożliwia zarządzanie wersjami modeli i oznaczanie ich różnymi fazami, np. \"Staging\", \"Production\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Zadania\n", + "1. [2 pkt] Dodaj do joba treningowego wywołania MLflow, tak, żeby przy każdym uruchomieniu stworzyć i zarchiwizować katalog z modelem. Plik MLmodel powinien on zawierać pola:\n", + " - signature\n", + " - input_example\n", + "\n", + " Folder powinien również zawierać środowisko - conda lub docker, umożliwiająceo uruchomienie projektu.\n", + "\n", + "2. [6 pkt] Wybierz jedną osobę z grupy. Załóżmy, że Twoje ID to s123456 a jej s654321. Stwórz na Jenkinsie projekt `s123456-predict-s654321`, w którym:\n", + " - pobierzesz artefakt z zapisanym modelem z joba osoby s654321\n", + " - dokonasz na nim predykcji danych wejściowych podanych w formacie json jako parametr zadania Jenkinsowego. Domyślną wartością tego parametry niech będą przykładowe dane wejściowe z `input_example`\n", + " \n", + "3. [1 pkt] Zarejestruj swój model w MLflow registry (dan do połączenia z rejstrem podam po jego pomyślnym skonfigurowaniu, nie później niż w środę 19.05.2021\n", + "\n", + "4. [6 pkt] Stwórz na Jenkinsie projekt `s123456-predict-s654321-from-registry`, który zrealizuje to samo zadanie co `s123456-predict-s654321`, ale tym razem pobierze model z rejestru MLflow zamiast z artefaktów Jenkinsa" + ] } ], "metadata": {