fixed IUM_zadanie.ipynb file

This commit is contained in:
koziej97 2023-03-28 23:23:49 +02:00
parent e8c3bd7b63
commit 2235db6b0c

View File

@ -2,17 +2,213 @@
"cells": [
{
"cell_type": "code",
"execution_count": 41,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zPOfPO5LAOqy",
"outputId": "a8846a75-ef0a-4048-8168-f71d79d7b7e8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: kaggle in /usr/local/lib/python3.9/dist-packages (1.5.13)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.27.1)\n",
"Requirement already satisfied: python-slugify in /usr/local/lib/python3.9/dist-packages (from kaggle) (8.0.1)\n",
"Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from kaggle) (4.65.0)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from kaggle) (2022.12.7)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: urllib3 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.26.15)\n",
"Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.9/dist-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (3.4)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (2.0.12)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.4.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2022.7.1)\n",
"Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.9/dist-packages (from pandas) (1.22.4)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
]
}
],
"source": [
"!pip install --user kaggle\n",
"!pip install --user pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gc7VHACRAOq0",
"outputId": "20220fe9-e872-451b-f759-b4cfff91bc51"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Traceback (most recent call last):\n",
" File \"/usr/local/bin/kaggle\", line 5, in <module>\n",
" from kaggle.cli import main\n",
" File \"/usr/local/lib/python3.9/dist-packages/kaggle/__init__.py\", line 23, in <module>\n",
" api.authenticate()\n",
" File \"/usr/local/lib/python3.9/dist-packages/kaggle/api/kaggle_api_extended.py\", line 164, in authenticate\n",
" raise IOError('Could not find {}. Make sure it\\'s located in'\n",
"OSError: Could not find kaggle.json. Make sure it's located in /root/.kaggle. Or use the environment method.\n"
]
}
],
"source": [
"!kaggle datasets download -d dylanjcastillo/7k-books-with-metadata"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"id": "utslvpN1AOq0",
"outputId": "dda342a0-18dc-40a7-86bd-b233844c1231",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Archive: 7k-books-with-metadata.zip\n",
" inflating: books.csv \n"
]
}
],
"source": [
"!unzip -o 7k-books-with-metadata.zip"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "k9Q3DwbiAOq0",
"outputId": "ab0a4f14-188b-41d6-c3fe-0553d80aa648",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 676
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" isbn13 isbn10 title \\\n",
"0 9780002005883 0002005883 Gilead \n",
"1 9780002261982 0002261987 Spider's Web \n",
"2 9780006163831 0006163831 The One Tree \n",
"3 9780006178736 0006178731 Rage of angels \n",
"4 9780006280897 0006280897 The Four Loves \n",
"... ... ... ... \n",
"6805 9788185300535 8185300534 I Am that \n",
"6806 9788185944609 8185944601 Secrets Of The Heart \n",
"6807 9788445074879 8445074873 Fahrenheit 451 \n",
"6808 9789027712059 9027712050 The Berlin Phenomenology \n",
"6809 9789042003408 9042003405 'I'm Telling You Stories' \n",
"\n",
" subtitle \\\n",
"0 NaN \n",
"1 A Novel \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"6805 Talks with Sri Nisargadatta Maharaj \n",
"6806 NaN \n",
"6807 NaN \n",
"6808 NaN \n",
"6809 Jeanette Winterson and the Politics of Reading \n",
"\n",
" authors \\\n",
"0 Marilynne Robinson \n",
"1 Charles Osborne;Agatha Christie \n",
"2 Stephen R. Donaldson \n",
"3 Sidney Sheldon \n",
"4 Clive Staples Lewis \n",
"... ... \n",
"6805 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
"6806 Khalil Gibran \n",
"6807 Ray Bradbury \n",
"6808 Georg Wilhelm Friedrich Hegel \n",
"6809 Helena Grice;Tim Woods \n",
"\n",
" categories \\\n",
"0 Fiction \n",
"1 Detective and mystery stories \n",
"2 American fiction \n",
"3 Fiction \n",
"4 Christian life \n",
"... ... \n",
"6805 Philosophy \n",
"6806 Mysticism \n",
"6807 Book burning \n",
"6808 History \n",
"6809 Literary Criticism \n",
"\n",
" thumbnail \\\n",
"0 http://books.google.com/books/content?id=KQZCP... \n",
"1 http://books.google.com/books/content?id=gA5GP... \n",
"2 http://books.google.com/books/content?id=OmQaw... \n",
"3 http://books.google.com/books/content?id=FKo2T... \n",
"4 http://books.google.com/books/content?id=XhQ5X... \n",
"... ... \n",
"6805 http://books.google.com/books/content?id=Fv_JP... \n",
"6806 http://books.google.com/books/content?id=XcrVp... \n",
"6807 NaN \n",
"6808 http://books.google.com/books/content?id=Vy7Sk... \n",
"6809 http://books.google.com/books/content?id=2lVyR... \n",
"\n",
" description published_year \\\n",
"0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n",
"1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n",
"2 Volume Two of Stephen Donaldson's acclaimed se... 1982.0 \n",
"3 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n",
"4 Lewis' work on the nature of love divides love... 2002.0 \n",
"... ... ... \n",
"6805 This collection of the timeless teachings of o... 1999.0 \n",
"6806 NaN 1993.0 \n",
"6807 NaN 2004.0 \n",
"6808 Since the three volume edition ofHegel's Philo... 1981.0 \n",
"6809 This is a jubilant and rewarding collection of... 1998.0 \n",
"\n",
" average_rating num_pages ratings_count \n",
"0 3.85 247.0 361.0 \n",
"1 3.83 241.0 5164.0 \n",
"2 3.97 479.0 172.0 \n",
"3 3.93 512.0 29532.0 \n",
"4 4.15 170.0 33684.0 \n",
"... ... ... ... \n",
"6805 4.51 531.0 104.0 \n",
"6806 4.08 74.0 324.0 \n",
"6807 3.98 186.0 5733.0 \n",
"6808 0.00 210.0 0.0 \n",
"6809 3.70 136.0 10.0 \n",
"\n",
"[6810 rows x 12 columns]"
],
"text/html": [
"<div>\n",
"\n",
" <div id=\"df-65d5a23c-fc61-4f09-a1fe-41189afea541\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
@ -213,106 +409,87 @@
" </tbody>\n",
"</table>\n",
"<p>6810 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" isbn13 isbn10 title \\\n",
"0 9780002005883 0002005883 Gilead \n",
"1 9780002261982 0002261987 Spider's Web \n",
"2 9780006163831 0006163831 The One Tree \n",
"3 9780006178736 0006178731 Rage of angels \n",
"4 9780006280897 0006280897 The Four Loves \n",
"... ... ... ... \n",
"6805 9788185300535 8185300534 I Am that \n",
"6806 9788185944609 8185944601 Secrets Of The Heart \n",
"6807 9788445074879 8445074873 Fahrenheit 451 \n",
"6808 9789027712059 9027712050 The Berlin Phenomenology \n",
"6809 9789042003408 9042003405 'I'm Telling You Stories' \n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-65d5a23c-fc61-4f09-a1fe-41189afea541')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" subtitle \\\n",
"0 NaN \n",
"1 A Novel \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"6805 Talks with Sri Nisargadatta Maharaj \n",
"6806 NaN \n",
"6807 NaN \n",
"6808 NaN \n",
"6809 Jeanette Winterson and the Politics of Reading \n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" authors \\\n",
"0 Marilynne Robinson \n",
"1 Charles Osborne;Agatha Christie \n",
"2 Stephen R. Donaldson \n",
"3 Sidney Sheldon \n",
"4 Clive Staples Lewis \n",
"... ... \n",
"6805 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
"6806 Khalil Gibran \n",
"6807 Ray Bradbury \n",
"6808 Georg Wilhelm Friedrich Hegel \n",
"6809 Helena Grice;Tim Woods \n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" categories \\\n",
"0 Fiction \n",
"1 Detective and mystery stories \n",
"2 American fiction \n",
"3 Fiction \n",
"4 Christian life \n",
"... ... \n",
"6805 Philosophy \n",
"6806 Mysticism \n",
"6807 Book burning \n",
"6808 History \n",
"6809 Literary Criticism \n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" thumbnail \\\n",
"0 http://books.google.com/books/content?id=KQZCP... \n",
"1 http://books.google.com/books/content?id=gA5GP... \n",
"2 http://books.google.com/books/content?id=OmQaw... \n",
"3 http://books.google.com/books/content?id=FKo2T... \n",
"4 http://books.google.com/books/content?id=XhQ5X... \n",
"... ... \n",
"6805 http://books.google.com/books/content?id=Fv_JP... \n",
"6806 http://books.google.com/books/content?id=XcrVp... \n",
"6807 NaN \n",
"6808 http://books.google.com/books/content?id=Vy7Sk... \n",
"6809 http://books.google.com/books/content?id=2lVyR... \n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" description published_year \\\n",
"0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n",
"1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n",
"2 Volume Two of Stephen Donaldson's acclaimed se... 1982.0 \n",
"3 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n",
"4 Lewis' work on the nature of love divides love... 2002.0 \n",
"... ... ... \n",
"6805 This collection of the timeless teachings of o... 1999.0 \n",
"6806 NaN 1993.0 \n",
"6807 NaN 2004.0 \n",
"6808 Since the three volume edition ofHegel's Philo... 1981.0 \n",
"6809 This is a jubilant and rewarding collection of... 1998.0 \n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-65d5a23c-fc61-4f09-a1fe-41189afea541 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" average_rating num_pages ratings_count \n",
"0 3.85 247.0 361.0 \n",
"1 3.83 241.0 5164.0 \n",
"2 3.97 479.0 172.0 \n",
"3 3.93 512.0 29532.0 \n",
"4 4.15 170.0 33684.0 \n",
"... ... ... ... \n",
"6805 4.51 531.0 104.0 \n",
"6806 4.08 74.0 324.0 \n",
"6807 3.98 186.0 5733.0 \n",
"6808 0.00 210.0 0.0 \n",
"6809 3.70 136.0 10.0 \n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-65d5a23c-fc61-4f09-a1fe-41189afea541');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
"[6810 rows x 12 columns]"
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
"execution_count": 16
}
],
"source": [
@ -323,12 +500,11 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": null,
"metadata": {
"scrolled": true,
"slideshow": {
"slide_type": "slide"
}
"id": "WgVroQDTAOq1",
"outputId": "932fdfce-1d65-4290-cc5d-cc053f4fa459"
},
"outputs": [
{
@ -614,12 +790,10 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": null,
"metadata": {
"scrolled": false,
"slideshow": {
"slide_type": "slide"
}
"id": "1hwHH65hAOq1",
"outputId": "0b3e32ab-230b-4d9d-db8a-d2d25e57161b"
},
"outputs": [
{
@ -651,8 +825,10 @@
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "mZMFUt2pAOq1"
},
"outputs": [],
"source": [
"books.drop('thumbnail', inplace=True, axis=1)\n",
@ -664,8 +840,11 @@
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "y6I2PKuhAOq1",
"outputId": "2e03efc3-e8e3-4cc8-abb8-97e0594665be"
},
"outputs": [
{
"data": {
@ -691,8 +870,11 @@
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "21R7h40lAOq1",
"outputId": "4c9f746b-4347-4cd3-cdae-21e7bc818f2c"
},
"outputs": [
{
"data": {
@ -895,8 +1077,11 @@
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "lx9gqh7UAOq2",
"outputId": "651a374e-eb8c-426f-faa0-c1cd6c9762bb"
},
"outputs": [
{
"data": {
@ -1082,8 +1267,11 @@
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "J7DUOhOwAOq2",
"outputId": "3bce3396-8f22-41a4-ebfb-9895ad2bb73c"
},
"outputs": [
{
"data": {
@ -1113,8 +1301,11 @@
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "4R3GDLXgAOq2",
"outputId": "4d3a9d8a-f37d-4cba-ebbb-0615571396f4"
},
"outputs": [
{
"data": {
@ -1144,8 +1335,11 @@
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "YSLMCB4nAOq2",
"outputId": "ccdb49cc-9037-4995-9d0b-c0a749f6eae1"
},
"outputs": [
{
"data": {
@ -1175,8 +1369,11 @@
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "D8HrFKIGAOq3",
"outputId": "20a73c84-1b66-4dd8-fa99-caba6ca68b29"
},
"outputs": [
{
"data": {
@ -1206,11 +1403,9 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
"id": "utiDxb60AOq3"
},
"outputs": [],
"source": [
@ -1223,8 +1418,11 @@
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "rS0epPE6AOq3",
"outputId": "f704dda5-95e7-474b-a9b3-d8e107067710"
},
"outputs": [
{
"data": {
@ -1439,8 +1637,11 @@
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "oUWEVGaGAOq3",
"outputId": "6a053600-98a9-4990-ae44-cb8eeda97293"
},
"outputs": [
{
"data": {
@ -1626,8 +1827,11 @@
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "yXkOfB9bAOq3",
"outputId": "3fc9e96e-8fe0-490c-d6b5-71b21277aa0a"
},
"outputs": [
{
"data": {
@ -1813,8 +2017,11 @@
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"execution_count": null,
"metadata": {
"id": "CWG6q0ixAOq4",
"outputId": "367a1088-975b-4da2-e333-50152a4fcbc3"
},
"outputs": [
{
"data": {
@ -2004,7 +2211,7 @@
"celltoolbar": "Slideshow",
"email": "tomasz.zietkiewicz@amu.edu.pl",
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@ -2019,7 +2226,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.2"
"version": "3.10.10"
},
"slideshow": {
"slide_type": "slide"
@ -2039,8 +2246,11 @@
"toc_section_display": false,
"toc_window_display": false
},
"year": "2021"
"year": "2021",
"colab": {
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 4
"nbformat_minor": 0
}