{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zPOfPO5LAOqy",
        "outputId": "a8846a75-ef0a-4048-8168-f71d79d7b7e8"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: kaggle in /usr/local/lib/python3.9/dist-packages (1.5.13)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.27.1)\n",
            "Requirement already satisfied: python-slugify in /usr/local/lib/python3.9/dist-packages (from kaggle) (8.0.1)\n",
            "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.16.0)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from kaggle) (4.65.0)\n",
            "Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from kaggle) (2022.12.7)\n",
            "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.8.2)\n",
            "Requirement already satisfied: urllib3 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.26.15)\n",
            "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.9/dist-packages (from python-slugify->kaggle) (1.3)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (3.4)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (2.0.12)\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.4.4)\n",
            "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2022.7.1)\n",
            "Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.9/dist-packages (from pandas) (1.22.4)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
          ]
        }
      ],
      "source": [
        "!pip install --user kaggle\n",
        "!pip install --user pandas"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gc7VHACRAOq0",
        "outputId": "20220fe9-e872-451b-f759-b4cfff91bc51"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Traceback (most recent call last):\n",
            "  File \"/usr/local/bin/kaggle\", line 5, in <module>\n",
            "    from kaggle.cli import main\n",
            "  File \"/usr/local/lib/python3.9/dist-packages/kaggle/__init__.py\", line 23, in <module>\n",
            "    api.authenticate()\n",
            "  File \"/usr/local/lib/python3.9/dist-packages/kaggle/api/kaggle_api_extended.py\", line 164, in authenticate\n",
            "    raise IOError('Could not find {}. Make sure it\\'s located in'\n",
            "OSError: Could not find kaggle.json. Make sure it's located in /root/.kaggle. Or use the environment method.\n"
          ]
        }
      ],
      "source": [
        "!kaggle datasets download -d dylanjcastillo/7k-books-with-metadata"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 15,
      "metadata": {
        "id": "utslvpN1AOq0",
        "outputId": "dda342a0-18dc-40a7-86bd-b233844c1231",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Archive:  7k-books-with-metadata.zip\n",
            "  inflating: books.csv               \n"
          ]
        }
      ],
      "source": [
        "!unzip -o 7k-books-with-metadata.zip"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 16,
      "metadata": {
        "id": "k9Q3DwbiAOq0",
        "outputId": "ab0a4f14-188b-41d6-c3fe-0553d80aa648",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 676
        }
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "             isbn13      isbn10                      title  \\\n",
              "0     9780002005883  0002005883                     Gilead   \n",
              "1     9780002261982  0002261987               Spider's Web   \n",
              "2     9780006163831  0006163831               The One Tree   \n",
              "3     9780006178736  0006178731             Rage of angels   \n",
              "4     9780006280897  0006280897             The Four Loves   \n",
              "...             ...         ...                        ...   \n",
              "6805  9788185300535  8185300534                  I Am that   \n",
              "6806  9788185944609  8185944601       Secrets Of The Heart   \n",
              "6807  9788445074879  8445074873             Fahrenheit 451   \n",
              "6808  9789027712059  9027712050   The Berlin Phenomenology   \n",
              "6809  9789042003408  9042003405  'I'm Telling You Stories'   \n",
              "\n",
              "                                            subtitle  \\\n",
              "0                                                NaN   \n",
              "1                                            A Novel   \n",
              "2                                                NaN   \n",
              "3                                                NaN   \n",
              "4                                                NaN   \n",
              "...                                              ...   \n",
              "6805             Talks with Sri Nisargadatta Maharaj   \n",
              "6806                                             NaN   \n",
              "6807                                             NaN   \n",
              "6808                                             NaN   \n",
              "6809  Jeanette Winterson and the Politics of Reading   \n",
              "\n",
              "                                           authors  \\\n",
              "0                               Marilynne Robinson   \n",
              "1                  Charles Osborne;Agatha Christie   \n",
              "2                             Stephen R. Donaldson   \n",
              "3                                   Sidney Sheldon   \n",
              "4                              Clive Staples Lewis   \n",
              "...                                            ...   \n",
              "6805  Sri Nisargadatta Maharaj;Sudhakar S. Dikshit   \n",
              "6806                                 Khalil Gibran   \n",
              "6807                                  Ray Bradbury   \n",
              "6808                 Georg Wilhelm Friedrich Hegel   \n",
              "6809                        Helena Grice;Tim Woods   \n",
              "\n",
              "                         categories  \\\n",
              "0                           Fiction   \n",
              "1     Detective and mystery stories   \n",
              "2                  American fiction   \n",
              "3                           Fiction   \n",
              "4                    Christian life   \n",
              "...                             ...   \n",
              "6805                     Philosophy   \n",
              "6806                      Mysticism   \n",
              "6807                   Book burning   \n",
              "6808                        History   \n",
              "6809             Literary Criticism   \n",
              "\n",
              "                                              thumbnail  \\\n",
              "0     http://books.google.com/books/content?id=KQZCP...   \n",
              "1     http://books.google.com/books/content?id=gA5GP...   \n",
              "2     http://books.google.com/books/content?id=OmQaw...   \n",
              "3     http://books.google.com/books/content?id=FKo2T...   \n",
              "4     http://books.google.com/books/content?id=XhQ5X...   \n",
              "...                                                 ...   \n",
              "6805  http://books.google.com/books/content?id=Fv_JP...   \n",
              "6806  http://books.google.com/books/content?id=XcrVp...   \n",
              "6807                                                NaN   \n",
              "6808  http://books.google.com/books/content?id=Vy7Sk...   \n",
              "6809  http://books.google.com/books/content?id=2lVyR...   \n",
              "\n",
              "                                            description  published_year  \\\n",
              "0     A NOVEL THAT READERS and critics have been eag...          2004.0   \n",
              "1     A new 'Christie for Christmas' -- a full-lengt...          2000.0   \n",
              "2     Volume Two of Stephen Donaldson's acclaimed se...          1982.0   \n",
              "3     A memorable, mesmerizing heroine Jennifer -- b...          1993.0   \n",
              "4     Lewis' work on the nature of love divides love...          2002.0   \n",
              "...                                                 ...             ...   \n",
              "6805  This collection of the timeless teachings of o...          1999.0   \n",
              "6806                                                NaN          1993.0   \n",
              "6807                                                NaN          2004.0   \n",
              "6808  Since the three volume edition ofHegel's Philo...          1981.0   \n",
              "6809  This is a jubilant and rewarding collection of...          1998.0   \n",
              "\n",
              "      average_rating  num_pages  ratings_count  \n",
              "0               3.85      247.0          361.0  \n",
              "1               3.83      241.0         5164.0  \n",
              "2               3.97      479.0          172.0  \n",
              "3               3.93      512.0        29532.0  \n",
              "4               4.15      170.0        33684.0  \n",
              "...              ...        ...            ...  \n",
              "6805            4.51      531.0          104.0  \n",
              "6806            4.08       74.0          324.0  \n",
              "6807            3.98      186.0         5733.0  \n",
              "6808            0.00      210.0            0.0  \n",
              "6809            3.70      136.0           10.0  \n",
              "\n",
              "[6810 rows x 12 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-65d5a23c-fc61-4f09-a1fe-41189afea541\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>isbn13</th>\n",
              "      <th>isbn10</th>\n",
              "      <th>title</th>\n",
              "      <th>subtitle</th>\n",
              "      <th>authors</th>\n",
              "      <th>categories</th>\n",
              "      <th>thumbnail</th>\n",
              "      <th>description</th>\n",
              "      <th>published_year</th>\n",
              "      <th>average_rating</th>\n",
              "      <th>num_pages</th>\n",
              "      <th>ratings_count</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>9780002005883</td>\n",
              "      <td>0002005883</td>\n",
              "      <td>Gilead</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Marilynne Robinson</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>http://books.google.com/books/content?id=KQZCP...</td>\n",
              "      <td>A NOVEL THAT READERS and critics have been eag...</td>\n",
              "      <td>2004.0</td>\n",
              "      <td>3.85</td>\n",
              "      <td>247.0</td>\n",
              "      <td>361.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>9780002261982</td>\n",
              "      <td>0002261987</td>\n",
              "      <td>Spider's Web</td>\n",
              "      <td>A Novel</td>\n",
              "      <td>Charles Osborne;Agatha Christie</td>\n",
              "      <td>Detective and mystery stories</td>\n",
              "      <td>http://books.google.com/books/content?id=gA5GP...</td>\n",
              "      <td>A new 'Christie for Christmas' -- a full-lengt...</td>\n",
              "      <td>2000.0</td>\n",
              "      <td>3.83</td>\n",
              "      <td>241.0</td>\n",
              "      <td>5164.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>9780006163831</td>\n",
              "      <td>0006163831</td>\n",
              "      <td>The One Tree</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Stephen R. Donaldson</td>\n",
              "      <td>American fiction</td>\n",
              "      <td>http://books.google.com/books/content?id=OmQaw...</td>\n",
              "      <td>Volume Two of Stephen Donaldson's acclaimed se...</td>\n",
              "      <td>1982.0</td>\n",
              "      <td>3.97</td>\n",
              "      <td>479.0</td>\n",
              "      <td>172.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>9780006178736</td>\n",
              "      <td>0006178731</td>\n",
              "      <td>Rage of angels</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Sidney Sheldon</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>http://books.google.com/books/content?id=FKo2T...</td>\n",
              "      <td>A memorable, mesmerizing heroine Jennifer -- b...</td>\n",
              "      <td>1993.0</td>\n",
              "      <td>3.93</td>\n",
              "      <td>512.0</td>\n",
              "      <td>29532.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>9780006280897</td>\n",
              "      <td>0006280897</td>\n",
              "      <td>The Four Loves</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Clive Staples Lewis</td>\n",
              "      <td>Christian life</td>\n",
              "      <td>http://books.google.com/books/content?id=XhQ5X...</td>\n",
              "      <td>Lewis' work on the nature of love divides love...</td>\n",
              "      <td>2002.0</td>\n",
              "      <td>4.15</td>\n",
              "      <td>170.0</td>\n",
              "      <td>33684.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6805</th>\n",
              "      <td>9788185300535</td>\n",
              "      <td>8185300534</td>\n",
              "      <td>I Am that</td>\n",
              "      <td>Talks with Sri Nisargadatta Maharaj</td>\n",
              "      <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
              "      <td>Philosophy</td>\n",
              "      <td>http://books.google.com/books/content?id=Fv_JP...</td>\n",
              "      <td>This collection of the timeless teachings of o...</td>\n",
              "      <td>1999.0</td>\n",
              "      <td>4.51</td>\n",
              "      <td>531.0</td>\n",
              "      <td>104.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6806</th>\n",
              "      <td>9788185944609</td>\n",
              "      <td>8185944601</td>\n",
              "      <td>Secrets Of The Heart</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Khalil Gibran</td>\n",
              "      <td>Mysticism</td>\n",
              "      <td>http://books.google.com/books/content?id=XcrVp...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1993.0</td>\n",
              "      <td>4.08</td>\n",
              "      <td>74.0</td>\n",
              "      <td>324.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6807</th>\n",
              "      <td>9788445074879</td>\n",
              "      <td>8445074873</td>\n",
              "      <td>Fahrenheit 451</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Ray Bradbury</td>\n",
              "      <td>Book burning</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2004.0</td>\n",
              "      <td>3.98</td>\n",
              "      <td>186.0</td>\n",
              "      <td>5733.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6808</th>\n",
              "      <td>9789027712059</td>\n",
              "      <td>9027712050</td>\n",
              "      <td>The Berlin Phenomenology</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Georg Wilhelm Friedrich Hegel</td>\n",
              "      <td>History</td>\n",
              "      <td>http://books.google.com/books/content?id=Vy7Sk...</td>\n",
              "      <td>Since the three volume edition ofHegel's Philo...</td>\n",
              "      <td>1981.0</td>\n",
              "      <td>0.00</td>\n",
              "      <td>210.0</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6809</th>\n",
              "      <td>9789042003408</td>\n",
              "      <td>9042003405</td>\n",
              "      <td>'I'm Telling You Stories'</td>\n",
              "      <td>Jeanette Winterson and the Politics of Reading</td>\n",
              "      <td>Helena Grice;Tim Woods</td>\n",
              "      <td>Literary Criticism</td>\n",
              "      <td>http://books.google.com/books/content?id=2lVyR...</td>\n",
              "      <td>This is a jubilant and rewarding collection of...</td>\n",
              "      <td>1998.0</td>\n",
              "      <td>3.70</td>\n",
              "      <td>136.0</td>\n",
              "      <td>10.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>6810 rows × 12 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-65d5a23c-fc61-4f09-a1fe-41189afea541')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-65d5a23c-fc61-4f09-a1fe-41189afea541 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-65d5a23c-fc61-4f09-a1fe-41189afea541');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 16
        }
      ],
      "source": [
        "import pandas as pd\n",
        "books=pd.read_csv('books.csv')\n",
        "books"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "scrolled": true,
        "id": "WgVroQDTAOq1",
        "outputId": "932fdfce-1d65-4290-cc5d-cc053f4fa459"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>isbn13</th>\n",
              "      <th>isbn10</th>\n",
              "      <th>title</th>\n",
              "      <th>subtitle</th>\n",
              "      <th>authors</th>\n",
              "      <th>categories</th>\n",
              "      <th>thumbnail</th>\n",
              "      <th>description</th>\n",
              "      <th>published_year</th>\n",
              "      <th>average_rating</th>\n",
              "      <th>num_pages</th>\n",
              "      <th>ratings_count</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>6.810000e+03</td>\n",
              "      <td>6810</td>\n",
              "      <td>6810</td>\n",
              "      <td>2381</td>\n",
              "      <td>6738</td>\n",
              "      <td>6711</td>\n",
              "      <td>6481</td>\n",
              "      <td>6548</td>\n",
              "      <td>6804.000000</td>\n",
              "      <td>6767.000000</td>\n",
              "      <td>6767.000000</td>\n",
              "      <td>6.767000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>unique</th>\n",
              "      <td>NaN</td>\n",
              "      <td>6810</td>\n",
              "      <td>6398</td>\n",
              "      <td>2009</td>\n",
              "      <td>3780</td>\n",
              "      <td>567</td>\n",
              "      <td>6481</td>\n",
              "      <td>6474</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>top</th>\n",
              "      <td>NaN</td>\n",
              "      <td>0786282258</td>\n",
              "      <td>The Lord of the Rings</td>\n",
              "      <td>A Novel</td>\n",
              "      <td>Agatha Christie</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>http://books.google.com/books/content?id=6dVAW...</td>\n",
              "      <td>This is a reproduction of the original artefac...</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>freq</th>\n",
              "      <td>NaN</td>\n",
              "      <td>1</td>\n",
              "      <td>11</td>\n",
              "      <td>226</td>\n",
              "      <td>37</td>\n",
              "      <td>2588</td>\n",
              "      <td>1</td>\n",
              "      <td>6</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>9.780677e+12</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1998.630364</td>\n",
              "      <td>3.933284</td>\n",
              "      <td>348.181026</td>\n",
              "      <td>2.106910e+04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>6.068911e+08</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>10.484257</td>\n",
              "      <td>0.331352</td>\n",
              "      <td>242.376783</td>\n",
              "      <td>1.376207e+05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>9.780002e+12</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1853.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000e+00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>9.780330e+12</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1996.000000</td>\n",
              "      <td>3.770000</td>\n",
              "      <td>208.000000</td>\n",
              "      <td>1.590000e+02</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>9.780553e+12</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2002.000000</td>\n",
              "      <td>3.960000</td>\n",
              "      <td>304.000000</td>\n",
              "      <td>1.018000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>9.780810e+12</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2005.000000</td>\n",
              "      <td>4.130000</td>\n",
              "      <td>420.000000</td>\n",
              "      <td>5.992500e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>9.789042e+12</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2019.000000</td>\n",
              "      <td>5.000000</td>\n",
              "      <td>3342.000000</td>\n",
              "      <td>5.629932e+06</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "              isbn13      isbn10                  title subtitle  \\\n",
              "count   6.810000e+03        6810                   6810     2381   \n",
              "unique           NaN        6810                   6398     2009   \n",
              "top              NaN  0786282258  The Lord of the Rings  A Novel   \n",
              "freq             NaN           1                     11      226   \n",
              "mean    9.780677e+12         NaN                    NaN      NaN   \n",
              "std     6.068911e+08         NaN                    NaN      NaN   \n",
              "min     9.780002e+12         NaN                    NaN      NaN   \n",
              "25%     9.780330e+12         NaN                    NaN      NaN   \n",
              "50%     9.780553e+12         NaN                    NaN      NaN   \n",
              "75%     9.780810e+12         NaN                    NaN      NaN   \n",
              "max     9.789042e+12         NaN                    NaN      NaN   \n",
              "\n",
              "                authors categories  \\\n",
              "count              6738       6711   \n",
              "unique             3780        567   \n",
              "top     Agatha Christie    Fiction   \n",
              "freq                 37       2588   \n",
              "mean                NaN        NaN   \n",
              "std                 NaN        NaN   \n",
              "min                 NaN        NaN   \n",
              "25%                 NaN        NaN   \n",
              "50%                 NaN        NaN   \n",
              "75%                 NaN        NaN   \n",
              "max                 NaN        NaN   \n",
              "\n",
              "                                                thumbnail  \\\n",
              "count                                                6481   \n",
              "unique                                               6481   \n",
              "top     http://books.google.com/books/content?id=6dVAW...   \n",
              "freq                                                    1   \n",
              "mean                                                  NaN   \n",
              "std                                                   NaN   \n",
              "min                                                   NaN   \n",
              "25%                                                   NaN   \n",
              "50%                                                   NaN   \n",
              "75%                                                   NaN   \n",
              "max                                                   NaN   \n",
              "\n",
              "                                              description  published_year  \\\n",
              "count                                                6548     6804.000000   \n",
              "unique                                               6474             NaN   \n",
              "top     This is a reproduction of the original artefac...             NaN   \n",
              "freq                                                    6             NaN   \n",
              "mean                                                  NaN     1998.630364   \n",
              "std                                                   NaN       10.484257   \n",
              "min                                                   NaN     1853.000000   \n",
              "25%                                                   NaN     1996.000000   \n",
              "50%                                                   NaN     2002.000000   \n",
              "75%                                                   NaN     2005.000000   \n",
              "max                                                   NaN     2019.000000   \n",
              "\n",
              "        average_rating    num_pages  ratings_count  \n",
              "count      6767.000000  6767.000000   6.767000e+03  \n",
              "unique             NaN          NaN            NaN  \n",
              "top                NaN          NaN            NaN  \n",
              "freq               NaN          NaN            NaN  \n",
              "mean          3.933284   348.181026   2.106910e+04  \n",
              "std           0.331352   242.376783   1.376207e+05  \n",
              "min           0.000000     0.000000   0.000000e+00  \n",
              "25%           3.770000   208.000000   1.590000e+02  \n",
              "50%           3.960000   304.000000   1.018000e+03  \n",
              "75%           4.130000   420.000000   5.992500e+03  \n",
              "max           5.000000  3342.000000   5.629932e+06  "
            ]
          },
          "execution_count": 42,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books.describe(include='all')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1hwHH65hAOq1",
        "outputId": "0b3e32ab-230b-4d9d-db8a-d2d25e57161b"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "isbn13               0\n",
              "isbn10               0\n",
              "title                0\n",
              "subtitle          4429\n",
              "authors             72\n",
              "categories          99\n",
              "thumbnail          329\n",
              "description        262\n",
              "published_year       6\n",
              "average_rating      43\n",
              "num_pages           43\n",
              "ratings_count       43\n",
              "dtype: int64"
            ]
          },
          "execution_count": 43,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books.isnull().sum()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "mZMFUt2pAOq1"
      },
      "outputs": [],
      "source": [
        "books.drop('thumbnail', inplace=True, axis=1)\n",
        "books.drop('subtitle', inplace=True, axis=1)\n",
        "books.drop('description', inplace=True, axis=1)\n",
        "books.drop('isbn13', inplace=True, axis=1)\n",
        "books.drop('isbn10', inplace=True, axis=1)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "y6I2PKuhAOq1",
        "outputId": "2e03efc3-e8e3-4cc8-abb8-97e0594665be"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "title              0\n",
              "authors           72\n",
              "categories        99\n",
              "published_year     6\n",
              "average_rating    43\n",
              "num_pages         43\n",
              "ratings_count     43\n",
              "dtype: int64"
            ]
          },
          "execution_count": 45,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books.isnull().sum()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "21R7h40lAOq1",
        "outputId": "4c9f746b-4347-4cd3-cdae-21e7bc818f2c"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>title</th>\n",
              "      <th>authors</th>\n",
              "      <th>categories</th>\n",
              "      <th>published_year</th>\n",
              "      <th>average_rating</th>\n",
              "      <th>num_pages</th>\n",
              "      <th>ratings_count</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Gilead</td>\n",
              "      <td>Marilynne Robinson</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>2004.0</td>\n",
              "      <td>3.85</td>\n",
              "      <td>247.0</td>\n",
              "      <td>361.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Spider's Web</td>\n",
              "      <td>Charles Osborne;Agatha Christie</td>\n",
              "      <td>Detective and mystery stories</td>\n",
              "      <td>2000.0</td>\n",
              "      <td>3.83</td>\n",
              "      <td>241.0</td>\n",
              "      <td>5164.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>The One Tree</td>\n",
              "      <td>Stephen R. Donaldson</td>\n",
              "      <td>American fiction</td>\n",
              "      <td>1982.0</td>\n",
              "      <td>3.97</td>\n",
              "      <td>479.0</td>\n",
              "      <td>172.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Rage of angels</td>\n",
              "      <td>Sidney Sheldon</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>1993.0</td>\n",
              "      <td>3.93</td>\n",
              "      <td>512.0</td>\n",
              "      <td>29532.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>The Four Loves</td>\n",
              "      <td>Clive Staples Lewis</td>\n",
              "      <td>Christian life</td>\n",
              "      <td>2002.0</td>\n",
              "      <td>4.15</td>\n",
              "      <td>170.0</td>\n",
              "      <td>33684.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6805</th>\n",
              "      <td>I Am that</td>\n",
              "      <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
              "      <td>Philosophy</td>\n",
              "      <td>1999.0</td>\n",
              "      <td>4.51</td>\n",
              "      <td>531.0</td>\n",
              "      <td>104.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6806</th>\n",
              "      <td>Secrets Of The Heart</td>\n",
              "      <td>Khalil Gibran</td>\n",
              "      <td>Mysticism</td>\n",
              "      <td>1993.0</td>\n",
              "      <td>4.08</td>\n",
              "      <td>74.0</td>\n",
              "      <td>324.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6807</th>\n",
              "      <td>Fahrenheit 451</td>\n",
              "      <td>Ray Bradbury</td>\n",
              "      <td>Book burning</td>\n",
              "      <td>2004.0</td>\n",
              "      <td>3.98</td>\n",
              "      <td>186.0</td>\n",
              "      <td>5733.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6808</th>\n",
              "      <td>The Berlin Phenomenology</td>\n",
              "      <td>Georg Wilhelm Friedrich Hegel</td>\n",
              "      <td>History</td>\n",
              "      <td>1981.0</td>\n",
              "      <td>0.00</td>\n",
              "      <td>210.0</td>\n",
              "      <td>0.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6809</th>\n",
              "      <td>'I'm Telling You Stories'</td>\n",
              "      <td>Helena Grice;Tim Woods</td>\n",
              "      <td>Literary Criticism</td>\n",
              "      <td>1998.0</td>\n",
              "      <td>3.70</td>\n",
              "      <td>136.0</td>\n",
              "      <td>10.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>6599 rows × 7 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "                          title                                       authors  \\\n",
              "0                        Gilead                            Marilynne Robinson   \n",
              "1                  Spider's Web               Charles Osborne;Agatha Christie   \n",
              "2                  The One Tree                          Stephen R. Donaldson   \n",
              "3                Rage of angels                                Sidney Sheldon   \n",
              "4                The Four Loves                           Clive Staples Lewis   \n",
              "...                         ...                                           ...   \n",
              "6805                  I Am that  Sri Nisargadatta Maharaj;Sudhakar S. Dikshit   \n",
              "6806       Secrets Of The Heart                                 Khalil Gibran   \n",
              "6807             Fahrenheit 451                                  Ray Bradbury   \n",
              "6808   The Berlin Phenomenology                 Georg Wilhelm Friedrich Hegel   \n",
              "6809  'I'm Telling You Stories'                        Helena Grice;Tim Woods   \n",
              "\n",
              "                         categories  published_year  average_rating  \\\n",
              "0                           Fiction          2004.0            3.85   \n",
              "1     Detective and mystery stories          2000.0            3.83   \n",
              "2                  American fiction          1982.0            3.97   \n",
              "3                           Fiction          1993.0            3.93   \n",
              "4                    Christian life          2002.0            4.15   \n",
              "...                             ...             ...             ...   \n",
              "6805                     Philosophy          1999.0            4.51   \n",
              "6806                      Mysticism          1993.0            4.08   \n",
              "6807                   Book burning          2004.0            3.98   \n",
              "6808                        History          1981.0            0.00   \n",
              "6809             Literary Criticism          1998.0            3.70   \n",
              "\n",
              "      num_pages  ratings_count  \n",
              "0         247.0          361.0  \n",
              "1         241.0         5164.0  \n",
              "2         479.0          172.0  \n",
              "3         512.0        29532.0  \n",
              "4         170.0        33684.0  \n",
              "...         ...            ...  \n",
              "6805      531.0          104.0  \n",
              "6806       74.0          324.0  \n",
              "6807      186.0         5733.0  \n",
              "6808      210.0            0.0  \n",
              "6809      136.0           10.0  \n",
              "\n",
              "[6599 rows x 7 columns]"
            ]
          },
          "execution_count": 46,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books.dropna(inplace=True)\n",
        "books"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "lx9gqh7UAOq2",
        "outputId": "651a374e-eb8c-426f-faa0-c1cd6c9762bb"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>title</th>\n",
              "      <th>authors</th>\n",
              "      <th>categories</th>\n",
              "      <th>published_year</th>\n",
              "      <th>average_rating</th>\n",
              "      <th>num_pages</th>\n",
              "      <th>ratings_count</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>6599</td>\n",
              "      <td>6599</td>\n",
              "      <td>6599</td>\n",
              "      <td>6599.000000</td>\n",
              "      <td>6599.000000</td>\n",
              "      <td>6599.000000</td>\n",
              "      <td>6.599000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>unique</th>\n",
              "      <td>6216</td>\n",
              "      <td>3728</td>\n",
              "      <td>563</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>top</th>\n",
              "      <td>The Lord of the Rings</td>\n",
              "      <td>Agatha Christie</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>freq</th>\n",
              "      <td>9</td>\n",
              "      <td>37</td>\n",
              "      <td>2561</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1998.750417</td>\n",
              "      <td>3.931367</td>\n",
              "      <td>348.296863</td>\n",
              "      <td>2.143083e+04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>10.168465</td>\n",
              "      <td>0.331173</td>\n",
              "      <td>239.199411</td>\n",
              "      <td>1.392929e+05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1876.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000e+00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1997.000000</td>\n",
              "      <td>3.770000</td>\n",
              "      <td>208.000000</td>\n",
              "      <td>1.630000e+02</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2002.000000</td>\n",
              "      <td>3.950000</td>\n",
              "      <td>304.000000</td>\n",
              "      <td>1.032000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2005.000000</td>\n",
              "      <td>4.130000</td>\n",
              "      <td>420.000000</td>\n",
              "      <td>6.105500e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2019.000000</td>\n",
              "      <td>5.000000</td>\n",
              "      <td>3342.000000</td>\n",
              "      <td>5.629932e+06</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                        title          authors categories  published_year  \\\n",
              "count                    6599             6599       6599     6599.000000   \n",
              "unique                   6216             3728        563             NaN   \n",
              "top     The Lord of the Rings  Agatha Christie    Fiction             NaN   \n",
              "freq                        9               37       2561             NaN   \n",
              "mean                      NaN              NaN        NaN     1998.750417   \n",
              "std                       NaN              NaN        NaN       10.168465   \n",
              "min                       NaN              NaN        NaN     1876.000000   \n",
              "25%                       NaN              NaN        NaN     1997.000000   \n",
              "50%                       NaN              NaN        NaN     2002.000000   \n",
              "75%                       NaN              NaN        NaN     2005.000000   \n",
              "max                       NaN              NaN        NaN     2019.000000   \n",
              "\n",
              "        average_rating    num_pages  ratings_count  \n",
              "count      6599.000000  6599.000000   6.599000e+03  \n",
              "unique             NaN          NaN            NaN  \n",
              "top                NaN          NaN            NaN  \n",
              "freq               NaN          NaN            NaN  \n",
              "mean          3.931367   348.296863   2.143083e+04  \n",
              "std           0.331173   239.199411   1.392929e+05  \n",
              "min           0.000000     0.000000   0.000000e+00  \n",
              "25%           3.770000   208.000000   1.630000e+02  \n",
              "50%           3.950000   304.000000   1.032000e+03  \n",
              "75%           4.130000   420.000000   6.105500e+03  \n",
              "max           5.000000  3342.000000   5.629932e+06  "
            ]
          },
          "execution_count": 47,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books.describe(include='all')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "J7DUOhOwAOq2",
        "outputId": "3bce3396-8f22-41a4-ebfb-9895ad2bb73c"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "Fiction                            2561\n",
              "Juvenile Fiction                    524\n",
              "Biography & Autobiography           398\n",
              "History                             261\n",
              "Literary Criticism                  165\n",
              "                                   ... \n",
              "Child analysis                        1\n",
              "Illinois                              1\n",
              "Erinyes (Greek mythology)             1\n",
              "Exorcism                              1\n",
              "People with social disabilities       1\n",
              "Name: categories, Length: 563, dtype: int64"
            ]
          },
          "execution_count": 48,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books[\"categories\"].value_counts()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "4R3GDLXgAOq2",
        "outputId": "4d3a9d8a-f37d-4cba-ebbb-0615571396f4"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "2006.0    877\n",
              "2005.0    681\n",
              "2004.0    605\n",
              "2003.0    569\n",
              "2002.0    470\n",
              "         ... \n",
              "1928.0      1\n",
              "1904.0      1\n",
              "1938.0      1\n",
              "1936.0      1\n",
              "1947.0      1\n",
              "Name: published_year, Length: 91, dtype: int64"
            ]
          },
          "execution_count": 49,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books[\"published_year\"].value_counts()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "YSLMCB4nAOq2",
        "outputId": "ccdb49cc-9037-4995-9d0b-c0a749f6eae1"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "Agatha Christie               37\n",
              "Stephen King                  36\n",
              "William Shakespeare           29\n",
              "John Ronald Reuel Tolkien     25\n",
              "Sandra Brown                  23\n",
              "                              ..\n",
              "Aeg                            1\n",
              "Pauline Reage                  1\n",
              "Tim Flannery                   1\n",
              "Saint Augustine (of Hippo)     1\n",
              "Michael S. Reynolds            1\n",
              "Name: authors, Length: 3728, dtype: int64"
            ]
          },
          "execution_count": 50,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books[\"authors\"].value_counts()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "D8HrFKIGAOq3",
        "outputId": "20a73c84-1b66-4dd8-fa99-caba6ca68b29"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "4.00    125\n",
              "3.93    110\n",
              "3.95    109\n",
              "3.99    108\n",
              "3.96    104\n",
              "       ... \n",
              "4.64      1\n",
              "4.68      1\n",
              "4.72      1\n",
              "2.44      1\n",
              "4.78      1\n",
              "Name: average_rating, Length: 200, dtype: int64"
            ]
          },
          "execution_count": 52,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books[\"average_rating\"].value_counts()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "utiDxb60AOq3"
      },
      "outputs": [],
      "source": [
        "import sklearn\n",
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "books_train, books_test = sklearn.model_selection.train_test_split(books, test_size=0.2, random_state=1)\n",
        "books_train, books_val = sklearn.model_selection.train_test_split(books_train, test_size=0.5, random_state=1)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "rS0epPE6AOq3",
        "outputId": "f704dda5-95e7-474b-a9b3-d8e107067710"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>title</th>\n",
              "      <th>authors</th>\n",
              "      <th>categories</th>\n",
              "      <th>published_year</th>\n",
              "      <th>average_rating</th>\n",
              "      <th>num_pages</th>\n",
              "      <th>ratings_count</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>915</th>\n",
              "      <td>The Autobiography of Alice B. Toklas</td>\n",
              "      <td>Gertrude Stein</td>\n",
              "      <td>Biography &amp; Autobiography</td>\n",
              "      <td>2001.0</td>\n",
              "      <td>3.59</td>\n",
              "      <td>272.0</td>\n",
              "      <td>233.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4493</th>\n",
              "      <td>Never Far from Nowhere</td>\n",
              "      <td>Andrea Levy</td>\n",
              "      <td>Blacks</td>\n",
              "      <td>1996.0</td>\n",
              "      <td>3.68</td>\n",
              "      <td>282.0</td>\n",
              "      <td>601.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1983</th>\n",
              "      <td>Year's Happy Ending</td>\n",
              "      <td>Betty Neels</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>2001.0</td>\n",
              "      <td>3.95</td>\n",
              "      <td>216.0</td>\n",
              "      <td>128.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2196</th>\n",
              "      <td>Wrinkles in Time</td>\n",
              "      <td>George Smoot;Keay Davidson</td>\n",
              "      <td>Science</td>\n",
              "      <td>1994.0</td>\n",
              "      <td>3.99</td>\n",
              "      <td>360.0</td>\n",
              "      <td>985.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4011</th>\n",
              "      <td>Dispatches</td>\n",
              "      <td>Michael Herr</td>\n",
              "      <td>History</td>\n",
              "      <td>1991.0</td>\n",
              "      <td>4.23</td>\n",
              "      <td>260.0</td>\n",
              "      <td>12590.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2841</th>\n",
              "      <td>Magic Bites</td>\n",
              "      <td>Ilona Andrews</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>2007.0</td>\n",
              "      <td>4.07</td>\n",
              "      <td>260.0</td>\n",
              "      <td>82231.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1713</th>\n",
              "      <td>High Five</td>\n",
              "      <td>Janet Evanovich</td>\n",
              "      <td>Bail bond agents</td>\n",
              "      <td>2000.0</td>\n",
              "      <td>4.18</td>\n",
              "      <td>336.0</td>\n",
              "      <td>99172.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3469</th>\n",
              "      <td>A Brief History of Time</td>\n",
              "      <td>Stephen Hawking</td>\n",
              "      <td>Science</td>\n",
              "      <td>1998.0</td>\n",
              "      <td>4.16</td>\n",
              "      <td>212.0</td>\n",
              "      <td>214520.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1657</th>\n",
              "      <td>The Magus</td>\n",
              "      <td>John Fowles</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>2001.0</td>\n",
              "      <td>4.05</td>\n",
              "      <td>656.0</td>\n",
              "      <td>36909.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3986</th>\n",
              "      <td>The Complete Monty Python's Flying Circus</td>\n",
              "      <td>Graham Chapman;Monty Python (Comedy troupe);Te...</td>\n",
              "      <td>Humor</td>\n",
              "      <td>1989.0</td>\n",
              "      <td>4.44</td>\n",
              "      <td>384.0</td>\n",
              "      <td>1191.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>2639 rows × 7 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "                                          title  \\\n",
              "915        The Autobiography of Alice B. Toklas   \n",
              "4493                     Never Far from Nowhere   \n",
              "1983                        Year's Happy Ending   \n",
              "2196                           Wrinkles in Time   \n",
              "4011                                 Dispatches   \n",
              "...                                         ...   \n",
              "2841                                Magic Bites   \n",
              "1713                                  High Five   \n",
              "3469                    A Brief History of Time   \n",
              "1657                                  The Magus   \n",
              "3986  The Complete Monty Python's Flying Circus   \n",
              "\n",
              "                                                authors  \\\n",
              "915                                      Gertrude Stein   \n",
              "4493                                        Andrea Levy   \n",
              "1983                                        Betty Neels   \n",
              "2196                         George Smoot;Keay Davidson   \n",
              "4011                                       Michael Herr   \n",
              "...                                                 ...   \n",
              "2841                                      Ilona Andrews   \n",
              "1713                                    Janet Evanovich   \n",
              "3469                                    Stephen Hawking   \n",
              "1657                                        John Fowles   \n",
              "3986  Graham Chapman;Monty Python (Comedy troupe);Te...   \n",
              "\n",
              "                     categories  published_year  average_rating  num_pages  \\\n",
              "915   Biography & Autobiography          2001.0            3.59      272.0   \n",
              "4493                     Blacks          1996.0            3.68      282.0   \n",
              "1983                    Fiction          2001.0            3.95      216.0   \n",
              "2196                    Science          1994.0            3.99      360.0   \n",
              "4011                    History          1991.0            4.23      260.0   \n",
              "...                         ...             ...             ...        ...   \n",
              "2841                    Fiction          2007.0            4.07      260.0   \n",
              "1713           Bail bond agents          2000.0            4.18      336.0   \n",
              "3469                    Science          1998.0            4.16      212.0   \n",
              "1657                    Fiction          2001.0            4.05      656.0   \n",
              "3986                      Humor          1989.0            4.44      384.0   \n",
              "\n",
              "      ratings_count  \n",
              "915           233.0  \n",
              "4493          601.0  \n",
              "1983          128.0  \n",
              "2196          985.0  \n",
              "4011        12590.0  \n",
              "...             ...  \n",
              "2841        82231.0  \n",
              "1713        99172.0  \n",
              "3469       214520.0  \n",
              "1657        36909.0  \n",
              "3986         1191.0  \n",
              "\n",
              "[2639 rows x 7 columns]"
            ]
          },
          "execution_count": 55,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books_train"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "oUWEVGaGAOq3",
        "outputId": "6a053600-98a9-4990-ae44-cb8eeda97293"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>title</th>\n",
              "      <th>authors</th>\n",
              "      <th>categories</th>\n",
              "      <th>published_year</th>\n",
              "      <th>average_rating</th>\n",
              "      <th>num_pages</th>\n",
              "      <th>ratings_count</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>2639</td>\n",
              "      <td>2639</td>\n",
              "      <td>2639</td>\n",
              "      <td>2639.000000</td>\n",
              "      <td>2639.000000</td>\n",
              "      <td>2639.000000</td>\n",
              "      <td>2.639000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>unique</th>\n",
              "      <td>2547</td>\n",
              "      <td>1827</td>\n",
              "      <td>286</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>top</th>\n",
              "      <td>One Hundred Years of Solitude</td>\n",
              "      <td>Stephen King</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>freq</th>\n",
              "      <td>4</td>\n",
              "      <td>18</td>\n",
              "      <td>1027</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1999.032967</td>\n",
              "      <td>3.929807</td>\n",
              "      <td>349.534672</td>\n",
              "      <td>2.363199e+04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>9.865320</td>\n",
              "      <td>0.358919</td>\n",
              "      <td>244.871090</td>\n",
              "      <td>1.452470e+05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1876.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000e+00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1997.000000</td>\n",
              "      <td>3.770000</td>\n",
              "      <td>208.000000</td>\n",
              "      <td>1.745000e+02</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2002.000000</td>\n",
              "      <td>3.950000</td>\n",
              "      <td>304.000000</td>\n",
              "      <td>1.066000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2005.000000</td>\n",
              "      <td>4.130000</td>\n",
              "      <td>429.000000</td>\n",
              "      <td>6.084500e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2019.000000</td>\n",
              "      <td>5.000000</td>\n",
              "      <td>3020.000000</td>\n",
              "      <td>4.367341e+06</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                                title       authors categories  \\\n",
              "count                            2639          2639       2639   \n",
              "unique                           2547          1827        286   \n",
              "top     One Hundred Years of Solitude  Stephen King    Fiction   \n",
              "freq                                4            18       1027   \n",
              "mean                              NaN           NaN        NaN   \n",
              "std                               NaN           NaN        NaN   \n",
              "min                               NaN           NaN        NaN   \n",
              "25%                               NaN           NaN        NaN   \n",
              "50%                               NaN           NaN        NaN   \n",
              "75%                               NaN           NaN        NaN   \n",
              "max                               NaN           NaN        NaN   \n",
              "\n",
              "        published_year  average_rating    num_pages  ratings_count  \n",
              "count      2639.000000     2639.000000  2639.000000   2.639000e+03  \n",
              "unique             NaN             NaN          NaN            NaN  \n",
              "top                NaN             NaN          NaN            NaN  \n",
              "freq               NaN             NaN          NaN            NaN  \n",
              "mean       1999.032967        3.929807   349.534672   2.363199e+04  \n",
              "std           9.865320        0.358919   244.871090   1.452470e+05  \n",
              "min        1876.000000        0.000000     0.000000   0.000000e+00  \n",
              "25%        1997.000000        3.770000   208.000000   1.745000e+02  \n",
              "50%        2002.000000        3.950000   304.000000   1.066000e+03  \n",
              "75%        2005.000000        4.130000   429.000000   6.084500e+03  \n",
              "max        2019.000000        5.000000  3020.000000   4.367341e+06  "
            ]
          },
          "execution_count": 56,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books_train.describe(include='all')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "yXkOfB9bAOq3",
        "outputId": "3fc9e96e-8fe0-490c-d6b5-71b21277aa0a"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>title</th>\n",
              "      <th>authors</th>\n",
              "      <th>categories</th>\n",
              "      <th>published_year</th>\n",
              "      <th>average_rating</th>\n",
              "      <th>num_pages</th>\n",
              "      <th>ratings_count</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>1320</td>\n",
              "      <td>1320</td>\n",
              "      <td>1320</td>\n",
              "      <td>1320.000000</td>\n",
              "      <td>1320.000000</td>\n",
              "      <td>1320.000000</td>\n",
              "      <td>1.320000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>unique</th>\n",
              "      <td>1303</td>\n",
              "      <td>1064</td>\n",
              "      <td>185</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>top</th>\n",
              "      <td>20,000 Leagues Under the Sea</td>\n",
              "      <td>Stephen King</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>freq</th>\n",
              "      <td>3</td>\n",
              "      <td>7</td>\n",
              "      <td>540</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1998.590909</td>\n",
              "      <td>3.925470</td>\n",
              "      <td>339.346970</td>\n",
              "      <td>1.588767e+04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>10.119569</td>\n",
              "      <td>0.299805</td>\n",
              "      <td>219.560964</td>\n",
              "      <td>7.877064e+04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1942.000000</td>\n",
              "      <td>2.330000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>0.000000e+00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1996.000000</td>\n",
              "      <td>3.750000</td>\n",
              "      <td>208.000000</td>\n",
              "      <td>1.510000e+02</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2002.000000</td>\n",
              "      <td>3.950000</td>\n",
              "      <td>304.000000</td>\n",
              "      <td>1.068000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2005.000000</td>\n",
              "      <td>4.130000</td>\n",
              "      <td>401.000000</td>\n",
              "      <td>6.360000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2017.000000</td>\n",
              "      <td>5.000000</td>\n",
              "      <td>3342.000000</td>\n",
              "      <td>2.115562e+06</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                               title       authors categories  published_year  \\\n",
              "count                           1320          1320       1320     1320.000000   \n",
              "unique                          1303          1064        185             NaN   \n",
              "top     20,000 Leagues Under the Sea  Stephen King    Fiction             NaN   \n",
              "freq                               3             7        540             NaN   \n",
              "mean                             NaN           NaN        NaN     1998.590909   \n",
              "std                              NaN           NaN        NaN       10.119569   \n",
              "min                              NaN           NaN        NaN     1942.000000   \n",
              "25%                              NaN           NaN        NaN     1996.000000   \n",
              "50%                              NaN           NaN        NaN     2002.000000   \n",
              "75%                              NaN           NaN        NaN     2005.000000   \n",
              "max                              NaN           NaN        NaN     2017.000000   \n",
              "\n",
              "        average_rating    num_pages  ratings_count  \n",
              "count      1320.000000  1320.000000   1.320000e+03  \n",
              "unique             NaN          NaN            NaN  \n",
              "top                NaN          NaN            NaN  \n",
              "freq               NaN          NaN            NaN  \n",
              "mean          3.925470   339.346970   1.588767e+04  \n",
              "std           0.299805   219.560964   7.877064e+04  \n",
              "min           2.330000     0.000000   0.000000e+00  \n",
              "25%           3.750000   208.000000   1.510000e+02  \n",
              "50%           3.950000   304.000000   1.068000e+03  \n",
              "75%           4.130000   401.000000   6.360000e+03  \n",
              "max           5.000000  3342.000000   2.115562e+06  "
            ]
          },
          "execution_count": 57,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books_test.describe(include='all')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "CWG6q0ixAOq4",
        "outputId": "367a1088-975b-4da2-e333-50152a4fcbc3"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>title</th>\n",
              "      <th>authors</th>\n",
              "      <th>categories</th>\n",
              "      <th>published_year</th>\n",
              "      <th>average_rating</th>\n",
              "      <th>num_pages</th>\n",
              "      <th>ratings_count</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>2640</td>\n",
              "      <td>2640</td>\n",
              "      <td>2640</td>\n",
              "      <td>2640.000000</td>\n",
              "      <td>2640.000000</td>\n",
              "      <td>2640.000000</td>\n",
              "      <td>2.640000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>unique</th>\n",
              "      <td>2562</td>\n",
              "      <td>1850</td>\n",
              "      <td>313</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>top</th>\n",
              "      <td>Three Complete Novels</td>\n",
              "      <td>Agatha Christie</td>\n",
              "      <td>Fiction</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>freq</th>\n",
              "      <td>6</td>\n",
              "      <td>14</td>\n",
              "      <td>994</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1998.547727</td>\n",
              "      <td>3.935875</td>\n",
              "      <td>351.534470</td>\n",
              "      <td>2.200209e+04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>10.483752</td>\n",
              "      <td>0.316971</td>\n",
              "      <td>242.829463</td>\n",
              "      <td>1.558830e+05</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1901.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>4.000000</td>\n",
              "      <td>0.000000e+00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1996.000000</td>\n",
              "      <td>3.770000</td>\n",
              "      <td>208.000000</td>\n",
              "      <td>1.557500e+02</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2002.000000</td>\n",
              "      <td>3.950000</td>\n",
              "      <td>309.500000</td>\n",
              "      <td>9.555000e+02</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2005.000000</td>\n",
              "      <td>4.130000</td>\n",
              "      <td>430.250000</td>\n",
              "      <td>5.980750e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2019.000000</td>\n",
              "      <td>5.000000</td>\n",
              "      <td>2965.000000</td>\n",
              "      <td>5.629932e+06</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                        title          authors categories  published_year  \\\n",
              "count                    2640             2640       2640     2640.000000   \n",
              "unique                   2562             1850        313             NaN   \n",
              "top     Three Complete Novels  Agatha Christie    Fiction             NaN   \n",
              "freq                        6               14        994             NaN   \n",
              "mean                      NaN              NaN        NaN     1998.547727   \n",
              "std                       NaN              NaN        NaN       10.483752   \n",
              "min                       NaN              NaN        NaN     1901.000000   \n",
              "25%                       NaN              NaN        NaN     1996.000000   \n",
              "50%                       NaN              NaN        NaN     2002.000000   \n",
              "75%                       NaN              NaN        NaN     2005.000000   \n",
              "max                       NaN              NaN        NaN     2019.000000   \n",
              "\n",
              "        average_rating    num_pages  ratings_count  \n",
              "count      2640.000000  2640.000000   2.640000e+03  \n",
              "unique             NaN          NaN            NaN  \n",
              "top                NaN          NaN            NaN  \n",
              "freq               NaN          NaN            NaN  \n",
              "mean          3.935875   351.534470   2.200209e+04  \n",
              "std           0.316971   242.829463   1.558830e+05  \n",
              "min           0.000000     4.000000   0.000000e+00  \n",
              "25%           3.770000   208.000000   1.557500e+02  \n",
              "50%           3.950000   309.500000   9.555000e+02  \n",
              "75%           4.130000   430.250000   5.980750e+03  \n",
              "max           5.000000  2965.000000   5.629932e+06  "
            ]
          },
          "execution_count": 58,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "books_val.describe(include='all')"
      ]
    }
  ],
  "metadata": {
    "author": "Tomasz Ziętkiewicz",
    "celltoolbar": "Slideshow",
    "email": "tomasz.zietkiewicz@amu.edu.pl",
    "kernelspec": {
      "display_name": "Python 3 (ipykernel)",
      "language": "python",
      "name": "python3"
    },
    "lang": "pl",
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.10"
    },
    "slideshow": {
      "slide_type": "slide"
    },
    "subtitle": "2.Dane[laboratoria]",
    "title": "Inżynieria uczenia maszynowego",
    "toc": {
      "base_numbering": 1,
      "nav_menu": {},
      "number_sections": false,
      "sideBar": false,
      "skip_h1_title": false,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": false,
      "toc_window_display": false
    },
    "year": "2021",
    "colab": {
      "provenance": []
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}