Zajęcia 08, część 2.

This commit is contained in:
Tomasz Ziętkiewicz 2021-05-17 12:52:53 +02:00
parent d9b9e63875
commit aa4f26b920

View File

@ -63,7 +63,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 1,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -78,7 +78,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 30,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -117,6 +117,7 @@
"logging.basicConfig(level=logging.WARN)\n",
"logger = logging.getLogger(__name__)\n",
"\n",
"#mlflow.set_tracking_uri(\"http://localhost:5001\")\n",
"\n",
"def eval_metrics(actual, pred):\n",
" rmse = np.sqrt(mean_squared_error(actual, pred))\n",
@ -173,6 +174,9 @@
" mlflow.log_metric(\"rmse\", rmse)\n",
" mlflow.log_metric(\"r2\", r2)\n",
" mlflow.log_metric(\"mae\", mae)\n",
" \n",
" # Infer model signature to log it\n",
" signature = mlflow.models.signature.infer_signature(train_x, lr.predict(train_x))\n",
"\n",
" tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme\n",
"\n",
@ -183,14 +187,14 @@
" # There are other ways to use the Model Registry, which depends on the use case,\n",
" # please refer to the doc for more information:\n",
" # https://mlflow.org/docs/latest/model-registry.html#api-workflow\n",
" mlflow.sklearn.log_model(lr, \"model\", registered_model_name=\"ElasticnetWineModel\")\n",
" mlflow.sklearn.log_model(lr, \"model\", registered_model_name=\"ElasticnetWineModel\", signature=signature)\n",
" else:\n",
" mlflow.sklearn.log_model(lr, \"model\")"
" mlflow.sklearn.log_model(lr, \"model\", signature=signature)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 31,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -215,7 +219,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 4,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -566,7 +570,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 32,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -578,21 +582,21 @@
"output_type": "stream",
"text": [
"total 16\r\n",
"drwxrwxr-x 6 tomek tomek 4096 maj 2 17:07 15918a3901854356933736dfc0935807\r\n",
"drwxrwxr-x 6 tomek tomek 4096 maj 2 16:36 23ae1069b29e4955ac9f3536c71e7ac2\r\n",
"drwxrwxr-x 6 tomek tomek 4096 maj 2 17:07 b7ddb17a37404d7898e105afa5c20287\r\n",
"-rw-rw-r-- 1 tomek tomek 151 maj 2 16:36 meta.yaml\r\n"
"drwxrwxr-x 6 tomek tomek 4096 maj 17 08:43 375cde31bdd44a45a91fd7cee92ebcda\r\n",
"drwxrwxr-x 6 tomek tomek 4096 maj 17 10:38 b395b55b47fc43de876b67f5a4a5dae9\r\n",
"drwxrwxr-x 6 tomek tomek 4096 maj 17 09:15 b3ead42eca964113b29e7e5f8bcb7bb7\r\n",
"-rw-rw-r-- 1 tomek tomek 151 maj 17 08:43 meta.yaml\r\n"
]
}
],
"source": [
"### Informacje o przebieagach eksperymentu zostały zapisane w katalogu mlruns\n",
"! ls -l IUM_08/examples/mlruns/0"
"! ls -l IUM_08/examples/mlruns/0 | head"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 33,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -603,13 +607,45 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[2021-05-10 12:21:16 +0200] [20029] [INFO] Starting gunicorn 20.1.0\n",
"[2021-05-10 12:21:16 +0200] [20029] [INFO] Listening at: http://127.0.0.1:5000 (20029)\n",
"[2021-05-10 12:21:16 +0200] [20029] [INFO] Using worker: sync\n",
"[2021-05-10 12:21:16 +0200] [20030] [INFO] Booting worker with pid: 20030\n",
"^C\n",
"[2021-05-10 12:22:32 +0200] [20029] [INFO] Handling signal: int\n",
"[2021-05-10 12:22:32 +0200] [20030] [INFO] Worker exiting (pid: 20030)\n"
"total 20\r\n",
"drwxrwxr-x 3 tomek tomek 4096 maj 17 08:43 artifacts\r\n",
"-rw-rw-r-- 1 tomek tomek 423 maj 17 08:43 meta.yaml\r\n",
"drwxrwxr-x 2 tomek tomek 4096 maj 17 08:43 metrics\r\n",
"drwxrwxr-x 2 tomek tomek 4096 maj 17 08:43 params\r\n",
"drwxrwxr-x 2 tomek tomek 4096 maj 17 08:43 tags\r\n"
]
}
],
"source": [
"! ls -l IUM_08/examples/mlruns/0/375cde31bdd44a45a91fd7cee92ebcda"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2021-05-16 17:58:43 +0200] [118029] [INFO] Starting gunicorn 20.1.0\n",
"[2021-05-16 17:58:43 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n",
"[2021-05-16 17:58:43 +0200] [118029] [ERROR] Retrying in 1 second.\n",
"[2021-05-16 17:58:44 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n",
"[2021-05-16 17:58:44 +0200] [118029] [ERROR] Retrying in 1 second.\n",
"[2021-05-16 17:58:45 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n",
"[2021-05-16 17:58:45 +0200] [118029] [ERROR] Retrying in 1 second.\n",
"[2021-05-16 17:58:46 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n",
"[2021-05-16 17:58:46 +0200] [118029] [ERROR] Retrying in 1 second.\n",
"[2021-05-16 17:58:47 +0200] [118029] [ERROR] Connection in use: ('127.0.0.1', 5000)\n",
"[2021-05-16 17:58:47 +0200] [118029] [ERROR] Retrying in 1 second.\n",
"[2021-05-16 17:58:48 +0200] [118029] [ERROR] Can't connect to ('127.0.0.1', 5000)\n",
"Running the mlflow server failed. Please see the logs above for details.\n"
]
}
],
@ -698,7 +734,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 9,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -751,7 +787,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 10,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -846,7 +882,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 11,
"metadata": {
"slideshow": {
"slide_type": "slide"
@ -857,183 +893,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
"2021/05/10 12:39:32 INFO mlflow.utils.conda: === Creating conda environment mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29 ===\n",
"Collecting package metadata (repodata.json): done\n",
"Solving environment: done\n",
"Preparing transaction: done\n",
"Verifying transaction: done\n",
"Executing transaction: done\n",
"Installing pip dependencies: / Ran pip subprocess with arguments:\n",
"['/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/bin/python', '-m', 'pip', 'install', '-U', '-r', '/home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt']\n",
"Pip subprocess output:\n",
"Collecting scikit-learn==0.23.2\n",
" Using cached scikit_learn-0.23.2-cp36-cp36m-manylinux1_x86_64.whl (6.8 MB)\n",
"Collecting mlflow>=1.0\n",
" Downloading mlflow-1.17.0-py3-none-any.whl (14.2 MB)\n",
"Collecting joblib>=0.11\n",
" Using cached joblib-1.0.1-py3-none-any.whl (303 kB)\n",
"Collecting scipy>=0.19.1\n",
" Using cached scipy-1.5.4-cp36-cp36m-manylinux1_x86_64.whl (25.9 MB)\n",
"Requirement already satisfied: numpy>=1.13.3 in /home/tomek/.local/lib/python3.6/site-packages (from scikit-learn==0.23.2->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 1)) (1.15.4)\n",
"Collecting threadpoolctl>=2.0.0\n",
" Using cached threadpoolctl-2.1.0-py3-none-any.whl (12 kB)\n",
"Collecting pandas\n",
" Using cached pandas-1.1.5-cp36-cp36m-manylinux1_x86_64.whl (9.5 MB)\n",
"Collecting pyyaml\n",
" Using cached PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl (640 kB)\n",
"Collecting gunicorn\n",
" Using cached gunicorn-20.1.0-py3-none-any.whl (79 kB)\n",
"Collecting Flask\n",
" Using cached Flask-1.1.2-py2.py3-none-any.whl (94 kB)\n",
"Collecting alembic<=1.4.1\n",
" Using cached alembic-1.4.1-py2.py3-none-any.whl\n",
"Collecting prometheus-flask-exporter\n",
" Downloading prometheus_flask_exporter-0.18.2.tar.gz (22 kB)\n",
"Collecting entrypoints\n",
" Using cached entrypoints-0.3-py2.py3-none-any.whl (11 kB)\n",
"Collecting databricks-cli>=0.8.7\n",
" Using cached databricks_cli-0.14.3-py3-none-any.whl\n",
"Collecting requests>=2.17.3\n",
" Using cached requests-2.25.1-py2.py3-none-any.whl (61 kB)\n",
"Collecting docker>=4.0.0\n",
" Using cached docker-5.0.0-py2.py3-none-any.whl (146 kB)\n",
"Collecting sqlalchemy\n",
" Downloading SQLAlchemy-1.4.14-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)\n",
"Collecting cloudpickle\n",
" Using cached cloudpickle-1.6.0-py3-none-any.whl (23 kB)\n",
"Collecting pytz\n",
" Using cached pytz-2021.1-py2.py3-none-any.whl (510 kB)\n",
"Collecting protobuf>=3.6.0\n",
" Downloading protobuf-3.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)\n",
"Collecting click>=7.0\n",
" Using cached click-7.1.2-py2.py3-none-any.whl (82 kB)\n",
"Collecting sqlparse>=0.3.1\n",
" Using cached sqlparse-0.4.1-py3-none-any.whl (42 kB)\n",
"Collecting querystring-parser\n",
" Using cached querystring_parser-1.2.4-py2.py3-none-any.whl (7.9 kB)\n",
"Collecting gitpython>=2.1.0\n",
" Using cached GitPython-3.1.14-py3-none-any.whl (159 kB)\n",
"Collecting Mako\n",
" Using cached Mako-1.1.4-py2.py3-none-any.whl (75 kB)\n",
"Collecting python-editor>=0.3\n",
" Using cached python_editor-1.0.4-py3-none-any.whl (4.9 kB)\n",
"Collecting python-dateutil\n",
" Using cached python_dateutil-2.8.1-py2.py3-none-any.whl (227 kB)\n",
"Collecting tabulate>=0.7.7\n",
" Using cached tabulate-0.8.9-py3-none-any.whl (25 kB)\n",
"Requirement already satisfied: six>=1.10.0 in /home/tomek/.local/lib/python3.6/site-packages (from databricks-cli>=0.8.7->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (1.12.0)\n",
"Collecting websocket-client>=0.32.0\n",
" Downloading websocket_client-0.59.0-py2.py3-none-any.whl (67 kB)\n",
"Collecting gitdb<5,>=4.0.1\n",
" Using cached gitdb-4.0.7-py3-none-any.whl (63 kB)\n",
"Collecting smmap<5,>=3.0.1\n",
" Using cached smmap-4.0.0-py2.py3-none-any.whl (24 kB)\n",
"Collecting idna<3,>=2.5\n",
" Using cached idna-2.10-py2.py3-none-any.whl (58 kB)\n",
"Collecting chardet<5,>=3.0.2\n",
" Using cached chardet-4.0.0-py2.py3-none-any.whl (178 kB)\n",
"Collecting urllib3<1.27,>=1.21.1\n",
" Using cached urllib3-1.26.4-py2.py3-none-any.whl (153 kB)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /media/tomek/Linux_data/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages (from requests>=2.17.3->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (2020.12.5)\n",
"Collecting greenlet!=0.4.17\n",
" Downloading greenlet-1.1.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (155 kB)\n",
"Collecting importlib-metadata\n",
" Using cached importlib_metadata-4.0.1-py3-none-any.whl (16 kB)\n",
"Collecting itsdangerous>=0.24\n",
" Using cached itsdangerous-1.1.0-py2.py3-none-any.whl (16 kB)\n",
"Collecting Werkzeug>=0.15\n",
" Using cached Werkzeug-1.0.1-py2.py3-none-any.whl (298 kB)\n",
"Collecting Jinja2>=2.10.1\n",
" Using cached Jinja2-2.11.3-py2.py3-none-any.whl (125 kB)\n",
"Collecting MarkupSafe>=0.23\n",
" Using cached MarkupSafe-1.1.1-cp36-cp36m-manylinux2010_x86_64.whl (32 kB)\n",
"Requirement already satisfied: setuptools>=3.0 in /media/tomek/Linux_data/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages (from gunicorn->mlflow>=1.0->-r /home/tomek/AITech/repo/aitech-ium-private/IUM_08/examples/sklearn_elasticnet_wine/condaenv.xf9x7i2v.requirements.txt (line 2)) (52.0.0.post20210125)\n",
"Collecting typing-extensions>=3.6.4\n",
" Using cached typing_extensions-3.10.0.0-py3-none-any.whl (26 kB)\n",
"Collecting zipp>=0.5\n",
" Using cached zipp-3.4.1-py3-none-any.whl (5.2 kB)\n",
"Collecting prometheus_client\n",
" Using cached prometheus_client-0.10.1-py2.py3-none-any.whl (55 kB)\n",
"Building wheels for collected packages: prometheus-flask-exporter\n",
" Building wheel for prometheus-flask-exporter (setup.py): started\n",
" Building wheel for prometheus-flask-exporter (setup.py): finished with status 'done'\n",
" Created wheel for prometheus-flask-exporter: filename=prometheus_flask_exporter-0.18.2-py3-none-any.whl size=17399 sha256=84da5903cdaabc8f667b7b2e3d5f63a3021cab3d4f4fc1981d9d2a3ab5264738\n",
" Stored in directory: /home/tomek/.cache/pip/wheels/15/77/e8/3ca90b66243b0b58d5a5323a3da02cc8c5daf1de7a65141701\n",
"Successfully built prometheus-flask-exporter\n",
"Installing collected packages: zipp, typing-extensions, MarkupSafe, Werkzeug, urllib3, smmap, Jinja2, itsdangerous, importlib-metadata, idna, greenlet, click, chardet, websocket-client, tabulate, sqlalchemy, requests, pytz, python-editor, python-dateutil, prometheus-client, Mako, gitdb, Flask, threadpoolctl, sqlparse, scipy, querystring-parser, pyyaml, protobuf, prometheus-flask-exporter, pandas, joblib, gunicorn, gitpython, entrypoints, docker, databricks-cli, cloudpickle, alembic, scikit-learn, mlflow\n",
"Successfully installed Flask-1.1.2 Jinja2-2.11.3 Mako-1.1.4 MarkupSafe-1.1.1 Werkzeug-1.0.1 alembic-1.4.1 chardet-4.0.0 click-7.1.2 cloudpickle-1.6.0 databricks-cli-0.14.3 docker-5.0.0 entrypoints-0.3 gitdb-4.0.7 gitpython-3.1.14 greenlet-1.1.0 gunicorn-20.1.0 idna-2.10 importlib-metadata-4.0.1 itsdangerous-1.1.0 joblib-1.0.1 mlflow-1.17.0 pandas-1.1.5 prometheus-client-0.10.1 prometheus-flask-exporter-0.18.2 protobuf-3.16.0 python-dateutil-2.8.1 python-editor-1.0.4 pytz-2021.1 pyyaml-5.4.1 querystring-parser-1.2.4 requests-2.25.1 scikit-learn-0.23.2 scipy-1.5.4 smmap-4.0.0 sqlalchemy-1.4.14 sqlparse-0.4.1 tabulate-0.8.9 threadpoolctl-2.1.0 typing-extensions-3.10.0.0 urllib3-1.26.4 websocket-client-0.59.0 zipp-3.4.1\n",
"\n",
"done\n",
"#\n",
"# To activate this environment, use\n",
"#\n",
"# $ conda activate mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29\n",
"#\n",
"# To deactivate an active environment, use\n",
"#\n",
"# $ conda deactivate\n",
"\n",
"2021/05/10 12:40:17 INFO mlflow.projects.utils: === Created directory /tmp/tmpgvcpfml8 for downloading remote URIs passed to arguments of type 'path' ===\n",
"2021/05/10 12:40:17 INFO mlflow.projects.backend.local: === Running command 'source /home/tomek/miniconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29 1>&2 && python train.py 0.42 0.1' in run with ID 'b9b3795a2898495d95c650bafc0dcc76' === \n",
"ERROR:__main__:Unable to download training & test CSV, check your internet connection. Error: <urlopen error [Errno 110] Connection timed out>\n",
"Traceback (most recent call last):\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1349, in do_open\n",
" encode_chunked=req.has_header('Transfer-encoding'))\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1287, in request\n",
" self._send_request(method, url, body, headers, encode_chunked)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1333, in _send_request\n",
" self.endheaders(body, encode_chunked=encode_chunked)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1282, in endheaders\n",
" self._send_output(message_body, encode_chunked=encode_chunked)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 1042, in _send_output\n",
" self.send(msg)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 980, in send\n",
" self.connect()\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/http/client.py\", line 952, in connect\n",
" (self.host,self.port), self.timeout, self.source_address)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/socket.py\", line 724, in create_connection\n",
" raise err\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/socket.py\", line 713, in create_connection\n",
" sock.connect(sa)\n",
"TimeoutError: [Errno 110] Connection timed out\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"train.py\", line 40, in <module>\n",
" data = pd.read_csv(csv_url, sep=\";\")\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/parsers.py\", line 688, in read_csv\n",
" return _read(filepath_or_buffer, kwds)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/parsers.py\", line 437, in _read\n",
" filepath_or_buffer, encoding, compression\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/common.py\", line 183, in get_filepath_or_buffer\n",
" req = urlopen(filepath_or_buffer)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/site-packages/pandas/io/common.py\", line 137, in urlopen\n",
" return urllib.request.urlopen(*args, **kwargs)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 223, in urlopen\n",
" return opener.open(url, data, timeout)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 526, in open\n",
" response = self._open(req, data)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 544, in _open\n",
" '_open', req)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 504, in _call_chain\n",
" result = func(*args)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1377, in http_open\n",
" return self.do_open(http.client.HTTPConnection, req)\n",
" File \"/home/tomek/miniconda3/envs/mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29/lib/python3.6/urllib/request.py\", line 1351, in do_open\n",
" raise URLError(err)\n",
"urllib.error.URLError: <urlopen error [Errno 110] Connection timed out>\n",
"Traceback (most recent call last):\n",
" File \"train.py\", line 47, in <module>\n",
" train, test = train_test_split(data)\n",
"NameError: name 'data' is not defined\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2021/05/10 12:42:29 ERROR mlflow.cli: === Run (ID 'b9b3795a2898495d95c650bafc0dcc76') failed ===\r\n"
"2021/05/16 17:59:10 INFO mlflow.projects.utils: === Created directory /tmp/tmprq4mdosv for downloading remote URIs passed to arguments of type 'path' ===\n",
"2021/05/16 17:59:10 INFO mlflow.projects.backend.local: === Running command 'source /home/tomek/miniconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-5987e03d4dbaa5faa1a697bb113be9b9bdc39b29 1>&2 && python train.py 0.42 0.1' in run with ID '1860d321ea1545ff8866e4ba199d1712' === \n",
"Elasticnet model (alpha=0.420000, l1_ratio=0.100000):\n",
" RMSE: 0.7420620899060748\n",
" MAE: 0.5722846717246247\n",
" R2: 0.21978513651550236\n",
"2021/05/16 17:59:19 INFO mlflow.projects: === Run (ID '1860d321ea1545ff8866e4ba199d1712') succeeded ===\n"
]
}
],
@ -1053,6 +919,461 @@
"1. Dodaj do swojego projektu logowanie parametrów i metryk za pomocą MLflow (polecenia `mlflow.log_param` i `mlflow.log_metric`\n",
"2. Dodaj plik MLProject definiujący polecenia do trenowania i testowania, ich parametry wywołania oraz środowisko (użyj zdefiniowanego wcześniej obrazu Docker)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## MLflow Models\n",
"\n",
"MLflow Models to konwencja zapisu modeli, która ułatwia potem ich załadowanie i użycie"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Rodzaje modeli (\"flavors\") wspierane przez MLflow:\n",
"\n",
" - Python Function (python_function)\n",
" - PyTorch (pytorch)\n",
" - TensorFlow (tensorflow)\n",
" - Keras (keras)\n",
" - Scikit-learn (sklearn)\n",
" - Spacy(spaCy)\n",
" - ONNX (onnx)\n",
" - R Function (crate)\n",
" - H2O (h2o)\n",
" - MLeap (mleap)\n",
" - Spark MLlib (spark)\n",
" - MXNet Gluon (gluon)\n",
" - XGBoost (xgboost)\n",
" - LightGBM (lightgbm)\n",
" - CatBoost (catboost)\n",
" - Fastai(fastai)\n",
" - Statsmodels (statsmodels)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Zapisywanie modelu\n",
"Model ML można zapisać w MLflow przy pomocy jednej z dwóch funkcji z pakietu odpowiadającego używanej przez nas bibliotece:\n",
" - `save_model()` - zapisuje model na dysku\n",
" - `log_model()` - zapisuje model razem z innymi informacjami (metrykami, parametrami). W zależności od ustawień [\"tracking_uri\"](https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.set_tracking_uri) może być to lokalny folder w `mlruns/ ` lub ścieżka na zdalnym serwerze MLflow\n",
"\n",
"```Python\n",
" mlflow.sklearn.save_model(lr, \"my_model\")\n",
"```\n",
"\n",
"```Python\n",
" mlflow.keras.save_model(lr, \"my_model\")\n",
"```\n",
"\n",
"Wywołanie tej funkcji spowoduje stworzenie katalogu \"my_model\" zawierającego:\n",
" - plik *MLmodel* zawierający informacje o sposobach, w jaki model można załadować (\"flavors\") oraz ścieżki do plików związanych z modelem, takich jak:\n",
" - *conda.yaml* - opis środowiska potrzebnego do załadowania modelu\n",
" - *model.pkl* - plik z zserializowanym modelem\n",
"\n",
"Tylko plik *MLmodel* jest specjalnym plikiem MLflow - reszta zależy od konkrentego \"falovor\"\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"conda.yaml MLmodel model.pkl\r\n"
]
}
],
"source": [
"ls IUM_08/examples/my_model"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 12\r\n",
"-rw-rw-r-- 1 tomek tomek 153 maj 17 10:38 conda.yaml\r\n",
"-rw-rw-r-- 1 tomek tomek 958 maj 17 10:38 MLmodel\r\n",
"-rw-rw-r-- 1 tomek tomek 641 maj 17 10:38 model.pkl\r\n"
]
}
],
"source": [
"! ls -l IUM_08/examples/mlruns/0/b395b55b47fc43de876b67f5a4a5dae9/artifacts/model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"# %load IUM_08/examples/mlruns/0/b395b55b47fc43de876b67f5a4a5dae9/artifacts/model/MLmodel\n",
"artifact_path: model\n",
"flavors:\n",
" python_function:\n",
" env: conda.yaml\n",
" loader_module: mlflow.sklearn\n",
" model_path: model.pkl\n",
" python_version: 3.9.1\n",
" sklearn:\n",
" pickled_model: model.pkl\n",
" serialization_format: cloudpickle\n",
" sklearn_version: 0.24.2\n",
"run_id: b395b55b47fc43de876b67f5a4a5dae9\n",
"signature:\n",
" inputs: '[{\"name\": \"fixed acidity\", \"type\": \"double\"}, {\"name\": \"volatile acidity\",\n",
" \"type\": \"double\"}, {\"name\": \"citric acid\", \"type\": \"double\"}, {\"name\": \"residual\n",
" sugar\", \"type\": \"double\"}, {\"name\": \"chlorides\", \"type\": \"double\"}, {\"name\": \"free\n",
" sulfur dioxide\", \"type\": \"double\"}, {\"name\": \"total sulfur dioxide\", \"type\": \"double\"},\n",
" {\"name\": \"density\", \"type\": \"double\"}, {\"name\": \"pH\", \"type\": \"double\"}, {\"name\":\n",
" \"sulphates\", \"type\": \"double\"}, {\"name\": \"alcohol\", \"type\": \"double\"}]'\n",
" outputs: '[{\"type\": \"tensor\", \"tensor-spec\": {\"dtype\": \"float64\", \"shape\": [-1]}}]'\n",
"utc_time_created: '2021-05-17 08:38:41.749670'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"# %load IUM_08/examples/my_model/conda.yaml\n",
"channels:\n",
"- defaults\n",
"- conda-forge\n",
"dependencies:\n",
"- python=3.9.1\n",
"- pip\n",
"- pip:\n",
" - mlflow\n",
" - scikit-learn==0.24.2\n",
" - cloudpickle==1.6.0\n",
"name: mlflow-env"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Dodatkowe pola w MLmodel\n",
"\n",
"\n",
"- *utc_time_created* - timestamp z czasem stworzenia modelu\n",
"- *run_id* - ID uruchomienia (\"run\"), które stworzyło ten model, jeśli model był zapisany za pomocą MLflow Tracking.\n",
"- *signature* - opisa danych wejściowych i wyjściowych w formacie JSON\n",
"- *input_example* przykładowe wejście przyjmowane przez model. Można je podać poprzez parametr `input_example` funkcji [log_model](https://mlflow.org/docs/latest/python_api/mlflow.sklearn.html#mlflow.sklearn.log_model)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"array([5.57688397, 5.50664777, 5.52550482, 5.50431125, 5.57688397])"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import mlflow\n",
"import pandas as pd\n",
"model = mlflow.sklearn.load_model(\"IUM_08/examples/mlruns/0/b395b55b47fc43de876b67f5a4a5dae9/artifacts/model\")\n",
"csv_url = \"http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv\"\n",
"data = pd.read_csv(csv_url, sep=\";\")\n",
"model.predict(data.drop([\"quality\"], axis=1).head())"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Serwowanie modeli"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Usage: mlflow models [OPTIONS] COMMAND [ARGS]...\r\n",
"\r\n",
" Deploy MLflow models locally.\r\n",
"\r\n",
" To deploy a model associated with a run on a tracking server, set the\r\n",
" MLFLOW_TRACKING_URI environment variable to the URL of the desired server.\r\n",
"\r\n",
"Options:\r\n",
" --help Show this message and exit.\r\n",
"\r\n",
"Commands:\r\n",
" build-docker **EXPERIMENTAL**: Builds a Docker image whose default...\r\n",
" predict Generate predictions in json format using a saved MLflow...\r\n",
" prepare-env **EXPERIMENTAL**: Performs any preparation necessary to...\r\n",
" serve Serve a model saved with MLflow by launching a webserver on...\r\n"
]
}
],
"source": [
"!cd IUM_08/examples/; mlflow models --help"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Usage: mlflow models serve [OPTIONS]\r\n",
"\r\n",
" Serve a model saved with MLflow by launching a webserver on the specified\r\n",
" host and port. The command supports models with the ``python_function`` or\r\n",
" ``crate`` (R Function) flavor. For information about the input data\r\n",
" formats accepted by the webserver, see the following documentation:\r\n",
" https://www.mlflow.org/docs/latest/models.html#built-in-deployment-tools.\r\n",
"\r\n",
" You can make requests to ``POST /invocations`` in pandas split- or record-\r\n",
" oriented formats.\r\n",
"\r\n",
" Example:\r\n",
"\r\n",
" .. code-block:: bash\r\n",
"\r\n",
" $ mlflow models serve -m runs:/my-run-id/model-path &\r\n",
"\r\n",
" $ curl http://127.0.0.1:5000/invocations -H 'Content-Type:\r\n",
" application/json' -d '{ \"columns\": [\"a\", \"b\", \"c\"],\r\n",
" \"data\": [[1, 2, 3], [4, 5, 6]] }'\r\n",
"\r\n",
"Options:\r\n",
" -m, --model-uri URI URI to the model. A local path, a 'runs:/' URI, or a\r\n",
" remote storage URI (e.g., an 's3://' URI). For more\r\n",
" information about supported remote URIs for model\r\n",
" artifacts, see\r\n",
" https://mlflow.org/docs/latest/tracking.html#artifact-\r\n",
" stores [required]\r\n",
"\r\n",
" -p, --port INTEGER The port to listen on (default: 5000).\r\n",
" -h, --host HOST The network address to listen on (default: 127.0.0.1).\r\n",
" Use 0.0.0.0 to bind to all addresses if you want to\r\n",
" access the tracking server from other machines.\r\n",
"\r\n",
" -w, --workers TEXT Number of gunicorn worker processes to handle requests\r\n",
" (default: 4).\r\n",
"\r\n",
" --no-conda If specified, will assume that MLmodel/MLproject is\r\n",
" running within a Conda environment with the necessary\r\n",
" dependencies for the current project instead of\r\n",
" attempting to create a new conda environment.\r\n",
"\r\n",
" --install-mlflow If specified and there is a conda environment to be\r\n",
" activated mlflow will be installed into the environment\r\n",
" after it has been activated. The version of installed\r\n",
" mlflow will be the same asthe one used to invoke this\r\n",
" command.\r\n",
"\r\n",
" --help Show this message and exit.\r\n"
]
}
],
"source": [
"!cd IUM_08/examples/; mlflow models serve --help"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"columns\":[\"fixed acidity\",\"volatile acidity\",\"citric acid\",\"residual sugar\",\"chlorides\",\"free sulfur dioxide\",\"total sulfur dioxide\",\"density\",\"pH\",\"sulphates\",\"alcohol\"],\"index\":[0],\"data\":[[7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4]]}\n"
]
}
],
"source": [
"import pandas as pd\n",
"csv_url = \"http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv\"\n",
"data = pd.read_csv(csv_url, sep=\";\").drop([\"quality\"], axis=1).head(1).to_json(orient='split')\n",
"print(data)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[5.576883967129615]"
]
}
],
"source": [
"!curl http://127.0.0.1:5003/invocations -H 'Content-Type: application/json' -d '{\\\n",
" \"columns\":[\\\n",
" \"fixed acidity\",\"volatile acidity\",\"citric acid\",\"residual sugar\",\"chlorides\",\"free sulfur dioxide\",\"total sulfur dioxide\",\"density\",\"pH\",\"sulphates\",\"alcohol\"],\\\n",
" \"index\":[0],\\\n",
" \"data\":[[7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4]]}'"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"```\n",
"$ cd IUM_08/examples/\n",
"$ mlflow models serve -m my_model\n",
"2021/05/17 08:52:07 INFO mlflow.models.cli: Selected backend for flavor 'python_function'\n",
"2021/05/17 08:52:07 INFO mlflow.pyfunc.backend: === Running command 'source /home/tomek/miniconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-503f0c7520a32f054a9d168bd099584a9439de9d 1>&2 && gunicorn --timeout=60 -b 127.0.0.1:5003 -w 1 ${GUNICORN_CMD_ARGS} -- mlflow.pyfunc.scoring_server.wsgi:app'\n",
"[2021-05-17 08:52:07 +0200] [291217] [INFO] Starting gunicorn 20.1.0\n",
"[2021-05-17 08:52:07 +0200] [291217] [INFO] Listening at: http://127.0.0.1:5003 (291217)\n",
"[2021-05-17 08:52:07 +0200] [291217] [INFO] Using worker: sync\n",
"[2021-05-17 08:52:07 +0200] [291221] [INFO] Booting worker with pid: 291221\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## MLflow Registry\n",
" - umożliwia [zapisywanie](https://mlflow.org/docs/latest/model-registry.html#adding-an-mlflow-model-to-the-model-registry) i [ładowanie](https://mlflow.org/docs/latest/model-registry.html#fetching-an-mlflow-model-from-the-model-registry) modeli z centralnego rejestru\n",
" - Modele można też serwować bezpośrednio z rejestru:\n",
"\n",
"```bash\n",
"#!/usr/bin/env sh\n",
"\n",
"# Set environment variable for the tracking URL where the Model Registry resides\n",
"export MLFLOW_TRACKING_URI=http://localhost:5000\n",
"\n",
"# Serve the production model from the model registry\n",
"mlflow models serve -m \"models:/sk-learn-random-forest-reg-model/Production\"\n",
"```\n",
"\n",
"- Żeby było to możliwe, musimy mieć uruchomiony [serwer MLflow](https://mlflow.org/docs/latest/tracking.html#tracking-server)\n",
"- Umożliwia zarządzanie wersjami modeli i oznaczanie ich różnymi fazami, np. \"Staging\", \"Production\""
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## Zadania\n",
"1. [2 pkt] Dodaj do joba treningowego wywołania MLflow, tak, żeby przy każdym uruchomieniu stworzyć i zarchiwizować katalog z modelem. Plik MLmodel powinien on zawierać pola:\n",
" - signature\n",
" - input_example\n",
"\n",
" Folder powinien również zawierać środowisko - conda lub docker, umożliwiająceo uruchomienie projektu.\n",
"\n",
"2. [6 pkt] Wybierz jedną osobę z grupy. Załóżmy, że Twoje ID to s123456 a jej s654321. Stwórz na Jenkinsie projekt `s123456-predict-s654321`, w którym:\n",
" - pobierzesz artefakt z zapisanym modelem z joba osoby s654321\n",
" - dokonasz na nim predykcji danych wejściowych podanych w formacie json jako parametr zadania Jenkinsowego. Domyślną wartością tego parametry niech będą przykładowe dane wejściowe z `input_example`\n",
" \n",
"3. [1 pkt] Zarejestruj swój model w MLflow registry (dan do połączenia z rejstrem podam po jego pomyślnym skonfigurowaniu, nie później niż w środę 19.05.2021\n",
"\n",
"4. [6 pkt] Stwórz na Jenkinsie projekt `s123456-predict-s654321-from-registry`, który zrealizuje to samo zadanie co `s123456-predict-s654321`, ale tym razem pobierze model z rejestru MLflow zamiast z artefaktów Jenkinsa"
]
}
],
"metadata": {