diff --git a/IUM_02.Dane.ipynb b/IUM_02.Dane.ipynb index cabe1bf..fbb72d0 100644 --- a/IUM_02.Dane.ipynb +++ b/IUM_02.Dane.ipynb @@ -210,11 +210,26 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, "source": [ "## Pobranie danych" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Pobieranie z Kaggle" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -230,29 +245,32 @@ "output_type": "stream", "text": [ "Collecting kaggle\n", - " Using cached kaggle-1.5.12.tar.gz (58 kB)\n", - "Requirement already satisfied: six>=1.10 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (1.15.0)\n", - "Requirement already satisfied: certifi in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (2021.5.30)\n", - "Requirement already satisfied: python-dateutil in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (2.8.1)\n", - "Requirement already satisfied: requests in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (2.25.1)\n", - "Requirement already satisfied: tqdm in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (4.59.0)\n", - "Requirement already satisfied: python-slugify in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (5.0.2)\n", - "Requirement already satisfied: urllib3 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (1.26.4)\n", - "Requirement already satisfied: text-unidecode>=1.3 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from python-slugify->kaggle) (1.3)\n", - "Requirement already satisfied: idna<3,>=2.5 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from requests->kaggle) (2.10)\n", - "Requirement already satisfied: chardet<5,>=3.0.2 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from requests->kaggle) (4.0.0)\n", + " Downloading kaggle-1.5.13.tar.gz (63 kB)\n", + "\u001b[K |████████████████████████████████| 63 kB 558 kB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: six>=1.10 in /home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (1.16.0)\n", + "Requirement already satisfied: certifi in /home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (2022.12.7)\n", + "Requirement already satisfied: python-dateutil in /home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (2.8.2)\n", + "Requirement already satisfied: requests in /home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (2.27.1)\n", + "Requirement already satisfied: tqdm in /home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (4.64.0)\n", + "Collecting python-slugify\n", + " Downloading python_slugify-8.0.1-py2.py3-none-any.whl (9.7 kB)\n", + "Requirement already satisfied: urllib3 in /home/tomek/miniconda3/lib/python3.9/site-packages (from kaggle) (1.26.9)\n", + "Collecting text-unidecode>=1.3\n", + " Using cached text_unidecode-1.3-py2.py3-none-any.whl (78 kB)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/tomek/miniconda3/lib/python3.9/site-packages (from requests->kaggle) (3.3)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /home/tomek/miniconda3/lib/python3.9/site-packages (from requests->kaggle) (2.0.4)\n", "Building wheels for collected packages: kaggle\n", " Building wheel for kaggle (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for kaggle: filename=kaggle-1.5.12-py3-none-any.whl size=73053 sha256=1e6240d540651324d97a9772ad1ced30da7d7b5dc5956dc974eeeddf7c48844b\n", - " Stored in directory: /home/tomek/.cache/pip/wheels/ac/b2/c3/fa4706d469b5879105991d1c8be9a3c2ef329ba9fe2ce5085e\n", + "\u001b[?25h Created wheel for kaggle: filename=kaggle-1.5.13-py3-none-any.whl size=77733 sha256=83eee49596c7c76816c3bb9e8ffc0763b25e336457881b9790b9620548ae7297\n", + " Stored in directory: /home/tomek/.cache/pip/wheels/9c/45/15/6d6d116cd2539fb8f450d64b0aee4a480e5366bb11b42ac763\n", "Successfully built kaggle\n", - "Installing collected packages: kaggle\n", - "Successfully installed kaggle-1.5.12\n", - "Requirement already satisfied: pandas in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (1.2.4)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (2.8.1)\n", - "Requirement already satisfied: numpy>=1.16.5 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (1.20.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (2021.1)\n", - "Requirement already satisfied: six>=1.5 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n" + "Installing collected packages: text-unidecode, python-slugify, kaggle\n", + "Successfully installed kaggle-1.5.13 python-slugify-8.0.1 text-unidecode-1.3\n", + "Requirement already satisfied: pandas in /home/tomek/miniconda3/lib/python3.9/site-packages (1.5.3)\n", + "Requirement already satisfied: numpy>=1.20.3 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (1.24.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (2022.7.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: six>=1.5 in /home/tomek/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" ] } ], @@ -264,7 +282,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, "source": [ " - Pobierzemy zbiór Iris z Kaggle: https://www.kaggle.com/uciml/iris\n", " - Licencja to \"Public Domain\", więc możemy z niego korzystać bez ograniczeń." @@ -272,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": { "slideshow": { "slide_type": "slide" @@ -283,9 +305,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "Downloading iris.zip to /home/tomek/AITech/repo/aitech-ium\n", - " 0%| | 0.00/3.60k [00:00=2017.3 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (2021.1)\n", - "Requirement already satisfied: numpy>=1.16.5 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (1.20.2)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (2.8.1)\n", - "Requirement already satisfied: six>=1.5 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n", - "Collecting seaborn\n", - " Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)\n", - "\u001b[K |████████████████████████████████| 292 kB 1.1 MB/s eta 0:00:01\n", - "\u001b[?25hCollecting matplotlib>=2.2\n", - " Downloading matplotlib-3.5.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.2 MB)\n", - "\u001b[K |████████████████████████████████| 11.2 MB 10.8 MB/s eta 0:00:01\n", - "\u001b[?25hRequirement already satisfied: pandas>=0.23 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from seaborn) (1.2.4)\n", - "Requirement already satisfied: numpy>=1.15 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from seaborn) (1.20.2)\n", - "Requirement already satisfied: scipy>=1.0 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from seaborn) (1.6.3)\n", - "Requirement already satisfied: packaging>=20.0 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (20.9)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (2.8.1)\n", - "Collecting cycler>=0.10\n", - " Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", - "Requirement already satisfied: pyparsing>=2.2.1 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)\n", - "Collecting fonttools>=4.22.0\n", - " Downloading fonttools-4.30.0-py3-none-any.whl (898 kB)\n", - "\u001b[K |████████████████████████████████| 898 kB 4.9 MB/s eta 0:00:01\n", - "\u001b[?25hRequirement already satisfied: pillow>=6.2.0 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from matplotlib>=2.2->seaborn) (8.2.0)\n", - "Collecting kiwisolver>=1.0.1\n", - " Downloading kiwisolver-1.3.2-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.6 MB)\n", - "\u001b[K |████████████████████████████████| 1.6 MB 7.7 MB/s eta 0:00:01\n", - "\u001b[?25hRequirement already satisfied: pytz>=2017.3 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from pandas>=0.23->seaborn) (2021.1)\n", - "Requirement already satisfied: six>=1.5 in /media/tomek/Linux_data/home/tomek/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.15.0)\n", - "Installing collected packages: kiwisolver, fonttools, cycler, matplotlib, seaborn\n", - "Successfully installed cycler-0.11.0 fonttools-4.30.0 kiwisolver-1.3.2 matplotlib-3.5.1 seaborn-0.11.2\n" + "151 Iris.csv\r\n" ] } ], "source": [ - "!pip install --user pandas\n", - "!pip install --user seaborn" + "!wc -l Iris.csv" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "metadata": { "slideshow": { "slide_type": "slide" @@ -422,9 +409,88 @@ "!head -n 5 Iris.csv" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```less Iris.csv```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "## Inspekcja\n", + "- Do inspekcji danych użyjemy popularnej biblioteki pythonowej Pandas: https://pandas.pydata.org/\n", + "- Do wizualizacji użyjemy biblioteki Seaborn: https://seaborn.pydata.org/index.html\n", + "- Służy ona do analizy i operowania na danych tabelarycznych jak i szeregach czasowych" + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in /home/tomek/miniconda3/lib/python3.9/site-packages (1.5.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (2022.7.1)\n", + "Requirement already satisfied: numpy>=1.20.3 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas) (1.24.2)\n", + "Requirement already satisfied: six>=1.5 in /home/tomek/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n", + "Collecting seaborn\n", + " Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)\n", + "\u001b[K |████████████████████████████████| 293 kB 694 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting matplotlib!=3.6.1,>=3.1\n", + " Downloading matplotlib-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\n", + "\u001b[K |████████████████████████████████| 11.6 MB 253 kB/s eta 0:00:01 |██████▊ | 2.4 MB 396 kB/s eta 0:00:24\n", + "\u001b[?25hRequirement already satisfied: pandas>=0.25 in /home/tomek/miniconda3/lib/python3.9/site-packages (from seaborn) (1.5.3)\n", + "Requirement already satisfied: numpy!=1.24.0,>=1.17 in /home/tomek/miniconda3/lib/python3.9/site-packages (from seaborn) (1.24.2)\n", + "Requirement already satisfied: packaging>=20.0 in /home/tomek/miniconda3/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.0)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /home/tomek/miniconda3/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /home/tomek/miniconda3/lib/python3.9/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (5.12.0)\n", + "Collecting contourpy>=1.0.1\n", + " Downloading contourpy-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (299 kB)\n", + "\u001b[K |████████████████████████████████| 299 kB 613 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting pyparsing>=2.3.1\n", + " Using cached pyparsing-3.0.9-py3-none-any.whl (98 kB)\n", + "Collecting fonttools>=4.22.0\n", + " Downloading fonttools-4.39.0-py3-none-any.whl (1.0 MB)\n", + "\u001b[K |████████████████████████████████| 1.0 MB 556 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting cycler>=0.10\n", + " Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", + "Collecting pillow>=6.2.0\n", + " Downloading Pillow-9.4.0-cp39-cp39-manylinux_2_28_x86_64.whl (3.4 MB)\n", + "\u001b[K |████████████████████████████████| 3.4 MB 664 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting kiwisolver>=1.0.1\n", + " Downloading kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.6 MB)\n", + "\u001b[K |████████████████████████████████| 1.6 MB 1.0 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: zipp>=3.1.0 in /home/tomek/miniconda3/lib/python3.9/site-packages (from importlib-resources>=3.2.0->matplotlib!=3.6.1,>=3.1->seaborn) (3.15.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas>=0.25->seaborn) (2022.7.1)\n", + "Requirement already satisfied: six>=1.5 in /home/tomek/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)\n", + "Installing collected packages: pyparsing, pillow, kiwisolver, fonttools, cycler, contourpy, matplotlib, seaborn\n", + "Successfully installed contourpy-1.0.7 cycler-0.11.0 fonttools-4.39.0 kiwisolver-1.4.4 matplotlib-3.7.1 pillow-9.4.0 pyparsing-3.0.9 seaborn-0.12.2\n" + ] + } + ], + "source": [ + "!pip install --user pandas\n", + "!pip install --user seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 14, "metadata": { "slideshow": { "slide_type": "slide" @@ -595,7 +661,7 @@ "[150 rows x 6 columns]" ] }, - "execution_count": 5, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1076,6 +1142,395 @@ "sns.pairplot(data=iris.drop(columns=[\"Id\"]), hue=\"Species\")" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Pobieranie z HuggingFace 🤗 Datasets\n", + " - Szukamy na https://huggingface.co/datasets/\n", + " - Klikamy w \" Use in datasets library\" i kopiujemy kod\n", + " - Instalujemy bibliotekę datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting datasets\n", + " Downloading datasets-2.10.1-py3-none-any.whl (469 kB)\n", + "\u001b[K |████████████████████████████████| 469 kB 683 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting responses<0.19\n", + " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", + "Collecting xxhash\n", + " Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n", + "\u001b[K |████████████████████████████████| 212 kB 866 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting pyarrow>=6.0.0\n", + " Downloading pyarrow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.9 MB)\n", + "\u001b[K |████████████████████████████████| 34.9 MB 956 kB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /home/tomek/miniconda3/lib/python3.9/site-packages (from datasets) (1.24.2)\n", + "Requirement already satisfied: requests>=2.19.0 in /home/tomek/miniconda3/lib/python3.9/site-packages (from datasets) (2.27.1)\n", + "Collecting aiohttp\n", + " Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", + "\u001b[K |████████████████████████████████| 1.0 MB 859 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting pyyaml>=5.1\n", + " Downloading PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (661 kB)\n", + "\u001b[K |████████████████████████████████| 661 kB 857 kB/s eta 0:00:01\n", + "\u001b[?25hCollecting huggingface-hub<1.0.0,>=0.2.0\n", + " Downloading huggingface_hub-0.13.2-py3-none-any.whl (199 kB)\n", + "\u001b[K |████████████████████████████████| 199 kB 866 kB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: packaging in /home/tomek/miniconda3/lib/python3.9/site-packages (from datasets) (23.0)\n", + "Collecting multiprocess\n", + " Downloading multiprocess-0.70.14-py39-none-any.whl (132 kB)\n", + "\u001b[K |████████████████████████████████| 132 kB 1.0 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: tqdm>=4.62.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from datasets) (4.64.0)\n", + "Requirement already satisfied: pandas in /home/tomek/miniconda3/lib/python3.9/site-packages (from datasets) (1.5.3)\n", + "Collecting fsspec[http]>=2021.11.1\n", + " Downloading fsspec-2023.3.0-py3-none-any.whl (145 kB)\n", + "\u001b[K |████████████████████████████████| 145 kB 1.0 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting dill<0.3.7,>=0.3.0\n", + " Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n", + "\u001b[K |████████████████████████████████| 110 kB 772 kB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /home/tomek/miniconda3/lib/python3.9/site-packages (from aiohttp->datasets) (22.2.0)\n", + "Collecting async-timeout<5.0,>=4.0.0a3\n", + " Using cached async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", + "Collecting aiosignal>=1.1.2\n", + " Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", + "Collecting yarl<2.0,>=1.0\n", + " Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)\n", + "\u001b[K |████████████████████████████████| 264 kB 1.1 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting frozenlist>=1.1.1\n", + " Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)\n", + "\u001b[K |████████████████████████████████| 158 kB 1.2 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting multidict<7.0,>=4.5\n", + " Downloading multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n", + "\u001b[K |████████████████████████████████| 114 kB 997 kB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: charset-normalizer<4.0,>=2.0 in /home/tomek/miniconda3/lib/python3.9/site-packages (from aiohttp->datasets) (2.0.4)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/tomek/miniconda3/lib/python3.9/site-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (4.5.0)\n", + "Collecting filelock\n", + " Downloading filelock-3.9.1-py3-none-any.whl (9.7 kB)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from requests>=2.19.0->datasets) (1.26.9)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/tomek/miniconda3/lib/python3.9/site-packages (from requests>=2.19.0->datasets) (2022.12.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/tomek/miniconda3/lib/python3.9/site-packages (from requests>=2.19.0->datasets) (3.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas->datasets) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/tomek/miniconda3/lib/python3.9/site-packages (from pandas->datasets) (2022.7.1)\n", + "Requirement already satisfied: six>=1.5 in /home/tomek/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", + "Installing collected packages: multidict, frozenlist, yarl, async-timeout, aiosignal, pyyaml, fsspec, filelock, dill, aiohttp, xxhash, responses, pyarrow, multiprocess, huggingface-hub, datasets\n", + "Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 datasets-2.10.1 dill-0.3.6 filelock-3.9.1 frozenlist-1.3.3 fsspec-2023.3.0 huggingface-hub-0.13.2 multidict-6.0.4 multiprocess-0.70.14 pyarrow-11.0.0 pyyaml-6.0 responses-0.18.0 xxhash-3.2.0 yarl-1.8.2\n" + ] + } + ], + "source": [ + "#Instalujemy bibliotekę datasets\n", + "!python -m pip install datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Found cached dataset csv (/home/tomek/.cache/huggingface/datasets/scikit-learn___csv/scikit-learn--iris-4e13227f45447466/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)\n", + "100%|██████████| 1/1 [00:00<00:00, 268.64it/s]\n" + ] + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "iris_dataset = load_dataset(\"scikit-learn/iris\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatasetDict({\n", + " train: Dataset({\n", + " features: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'],\n", + " num_rows: 150\n", + " })\n", + "})" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true, + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'],\n", + " num_rows: 150\n", + "})" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_dataset[\"train\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": false, + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Id': 1,\n", + " 'SepalLengthCm': 5.1,\n", + " 'SepalWidthCm': 3.5,\n", + " 'PetalLengthCm': 1.4,\n", + " 'PetalWidthCm': 0.2,\n", + " 'Species': 'Iris-setosa'}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "iris_dataset[\"train\"][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "scrolled": true, + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdSepalLengthCmSepalWidthCmPetalLengthCmPetalWidthCmSpecies
015.13.51.40.2Iris-setosa
124.93.01.40.2Iris-setosa
234.73.21.30.2Iris-setosa
344.63.11.50.2Iris-setosa
455.03.61.40.2Iris-setosa
.....................
1451466.73.05.22.3Iris-virginica
1461476.32.55.01.9Iris-virginica
1471486.53.05.22.0Iris-virginica
1481496.23.45.42.3Iris-virginica
1491505.93.05.11.8Iris-virginica
\n", + "

150 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \\\n", + "0 1 5.1 3.5 1.4 0.2 \n", + "1 2 4.9 3.0 1.4 0.2 \n", + "2 3 4.7 3.2 1.3 0.2 \n", + "3 4 4.6 3.1 1.5 0.2 \n", + "4 5 5.0 3.6 1.4 0.2 \n", + ".. ... ... ... ... ... \n", + "145 146 6.7 3.0 5.2 2.3 \n", + "146 147 6.3 2.5 5.0 1.9 \n", + "147 148 6.5 3.0 5.2 2.0 \n", + "148 149 6.2 3.4 5.4 2.3 \n", + "149 150 5.9 3.0 5.1 1.8 \n", + "\n", + " Species \n", + "0 Iris-setosa \n", + "1 Iris-setosa \n", + "2 Iris-setosa \n", + "3 Iris-setosa \n", + "4 Iris-setosa \n", + ".. ... \n", + "145 Iris-virginica \n", + "146 Iris-virginica \n", + "147 Iris-virginica \n", + "148 Iris-virginica \n", + "149 Iris-virginica \n", + "\n", + "[150 rows x 6 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(iris_dataset[\"train\"])" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1575,7 +2030,7 @@ "celltoolbar": "Slideshow", "email": "tomasz.zietkiewicz@amu.edu.pl", "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -1590,7 +2045,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.9.12" }, "slideshow": { "slide_type": "slide" diff --git a/IUM_03.CI-Jenkins.ipynb b/IUM_03.CI-Jenkins.ipynb index 7c10510..5f8286d 100644 --- a/IUM_03.CI-Jenkins.ipynb +++ b/IUM_03.CI-Jenkins.ipynb @@ -12,7 +12,7 @@ "
\n", "

Inżynieria uczenia maszynowego

\n", "

3. System ciągłej integracji na przykładzie Jenkins [laboratoria]

\n", - "

Tomasz Ziętkiewicz (2022)

\n", + "

Tomasz Ziętkiewicz (2023)

\n", "
\n", "\n", "![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)" @@ -104,7 +104,7 @@ " - **Job, aka. Pipleine (Projekt)** - podstawowa jednostka organizacji pracy wykonywanej przez Jenkinsa. \n", " - Posiada swoją konfigurację, która określa jakie polecenia będą wykonywane w jego ramach. \n", " - Jeden pipeline może być wykonany wiele razy, za każdym razem tworząc nowe *Zadanie* (*Build*). \n", - " Przykładowy pipeline: https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world/\n", + " Przykładowy pipeline: https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world/\n", "\n" ] }, @@ -121,10 +121,10 @@ " - Unstable \n", " - Aborted \n", " - Failed \n", - " Np: https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world/2/\n", + " Np: https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world/2/\n", " - Śledzenie wyników działania buildu jak i debugowanie ewentualnych problemów ułatwiają:\n", - " - Wyjście z konsoli [(Console Output)](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world/10/console) - tutaj widać logi wypisywane zarówno przez polecenia/funkcje Jenkinsowe jak i standardowe wyjście / wyjście błędów wykonywanych poleceń systemowych\n", - " - Workspace - to katalog roboczy, w którym uruchamiane są polecenia. Tutaj zostaje sklonowane repozytorium (jeśli je klonujemy), tu wywoływane będę polecenia systemowe. Można je przeglądać z poziomu przeglądarki, np. [tutaj](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world-scripted/1/execution/node/3/ws/)\n", + " - Wyjście z konsoli [(Console Output)](https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world/10/console) - tutaj widać logi wypisywane zarówno przez polecenia/funkcje Jenkinsowe jak i standardowe wyjście / wyjście błędów wykonywanych poleceń systemowych\n", + " - Workspace - to katalog roboczy, w którym uruchamiane są polecenia. Tutaj zostaje sklonowane repozytorium (jeśli je klonujemy), tu wywoływane będę polecenia systemowe. Można je przeglądać z poziomu przeglądarki, np. [tutaj](https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world-scripted/1/execution/node/3/ws/)\n", " - Każdy uruchomiony build można zatrzymać (abort) co powoduje zaprzestanie jego wykonywania\n", " - Build zakończony można usunąć (np. jeśli przez przypadek wypisaliśmy na konsolę nasze hasło)" ] @@ -160,7 +160,7 @@ "source": [ "## Dokumentacja\n", "- https://www.jenkins.io/doc/book/pipeline/\n", - "- \"Pipeline syntax\" na stronie każdego projektu, np: https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world/pipeline-syntax/\n", + "- \"Pipeline syntax\" na stronie każdego projektu, np: https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world/pipeline-syntax/\n", "- Znaki zapytania (W konfiguracji joba oraz w \"Pipeline Syntax\")" ] }, @@ -203,7 +203,7 @@ }, "source": [ "#### 1. Zaloguj się\n", - " - zaloguj się na https://tzietkiewicz.vm.wmi.amu.edu.pl:8080 za pomocą konta wydziałowego (jak w laboratoriach WMI)" + " - zaloguj się na https://tzietkiewicz.vm.wmi.amu.edu.pl:8081 za pomocą konta wydziałowego (jak w laboratoriach WMI)" ] }, { @@ -240,7 +240,7 @@ "\n", " - Pierwszy z nich daje większe możliwości, drugi jest łatwiejszy, lepiej udokumentowany, ale ma mniejszą siłę ekpresji.\n", "\n", - " - Fragmenty kodu można również generować przy pomocy kreatora, dostępnego pod linkiem [Pipeline syntax](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world/pipeline-syntax/) na stronie każdego projektu. Jest to bardzo przydatna funkcjonalność, nie tylko dla początkujących użytkowników\n", + " - Fragmenty kodu można również generować przy pomocy kreatora, dostępnego pod linkiem [Pipeline syntax](https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world/pipeline-syntax/) na stronie każdego projektu. Jest to bardzo przydatna funkcjonalność, nie tylko dla początkujących użytkowników\n", "\n", " - Jenkinsfile może być wprowadzony bezpośrednio z poziomu przeglądarki, albo pobrany z repozytorium.\n", "\n", @@ -258,7 +258,7 @@ } }, "source": [ - "Przykładowy declarative Pipeline (https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world/):\n", + "Przykładowy declarative Pipeline (https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world/):\n", "\n", "```groovy\n", "pipeline {\n", @@ -301,7 +301,7 @@ } }, "source": [ - "Przykładowy scripted Pipeline (https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world-scripted/):\n", + "Przykładowy scripted Pipeline (https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world-scripted/):\n", "\n", "```groovy\n", "node {\n", @@ -396,13 +396,13 @@ "export KAGGLE_USERNAME=datadinosaur\n", "export KAGGLE_KEY=xxxxxxxxxxxxxx\n", " ```\n", - " - Jenkins natomiast umożliwia utworzenie parametru typu password, którego wartość nie jest nigdzie zapisywana (wartości pozostałych parametrów są zapisywane w zakładce \"Parameters\" każdego build-a, np. [tutaj](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/hello-world-scripted/1/parameters/)\n", + " - Jenkins natomiast umożliwia utworzenie parametru typu password, którego wartość nie jest nigdzie zapisywana (wartości pozostałych parametrów są zapisywane w zakładce \"Parameters\" każdego build-a, np. [tutaj](https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/hello-world-scripted/1/parameters/)\n", " - konstukcja `withEnv` w Jenkinsfile, pozwala wywołać wszystkie otoczone nią polecenia z wyeksportowanymi wartościami zmiennych systemowych. Pozwala to np. przekazać wartości parametrów zadania Jenkinsowego do shella (poleceń wywoływanych z `sh`). \n", " - Zwróć jednak uwagę na to, w jaki sposób odwołujesz się do zmiennej z hasłem: https://www.jenkins.io/doc/book/pipeline/jenkinsfile/#string-interpolation !\n", " - ten sam rezultat co przy wykorzystaniu `withEnv` można by osiągnąć wywołując: `sh \"KAGGLE_USERNAME=${params.KAGGLE_USERNAME} KAGGLE_KEY=${params.KAGGLE_KEY} kaggle datasets list`, ale ten pierwszy wydahe się bardziej elegancki\n", " - Poniżej przykładowy projekt, który pokazuje jak wywołać Kaggle CLI używając hasła podanego w parametrach zadania:\n", " \n", - "https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/kaggle-CLI-example/\n", + "https://tzietkiewicz.vm.wmi.amu.edu.pl:8081/job/kaggle-CLI-example/\n", "```groovy\n", "node {\n", " stage('Preparation') { \n", @@ -509,7 +509,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.9.12" }, "slideshow": { "slide_type": "slide"