2256 lines
95 KiB
Plaintext
2256 lines
95 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "zPOfPO5LAOqy",
|
||
"outputId": "a8846a75-ef0a-4048-8168-f71d79d7b7e8"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
||
"Requirement already satisfied: kaggle in /usr/local/lib/python3.9/dist-packages (1.5.13)\n",
|
||
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.27.1)\n",
|
||
"Requirement already satisfied: python-slugify in /usr/local/lib/python3.9/dist-packages (from kaggle) (8.0.1)\n",
|
||
"Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.16.0)\n",
|
||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from kaggle) (4.65.0)\n",
|
||
"Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from kaggle) (2022.12.7)\n",
|
||
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.8.2)\n",
|
||
"Requirement already satisfied: urllib3 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.26.15)\n",
|
||
"Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.9/dist-packages (from python-slugify->kaggle) (1.3)\n",
|
||
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (3.4)\n",
|
||
"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (2.0.12)\n",
|
||
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
||
"Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.4.4)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n",
|
||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2022.7.1)\n",
|
||
"Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.9/dist-packages (from pandas) (1.22.4)\n",
|
||
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install --user kaggle\n",
|
||
"!pip install --user pandas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "gc7VHACRAOq0",
|
||
"outputId": "20220fe9-e872-451b-f759-b4cfff91bc51"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Traceback (most recent call last):\n",
|
||
" File \"/usr/local/bin/kaggle\", line 5, in <module>\n",
|
||
" from kaggle.cli import main\n",
|
||
" File \"/usr/local/lib/python3.9/dist-packages/kaggle/__init__.py\", line 23, in <module>\n",
|
||
" api.authenticate()\n",
|
||
" File \"/usr/local/lib/python3.9/dist-packages/kaggle/api/kaggle_api_extended.py\", line 164, in authenticate\n",
|
||
" raise IOError('Could not find {}. Make sure it\\'s located in'\n",
|
||
"OSError: Could not find kaggle.json. Make sure it's located in /root/.kaggle. Or use the environment method.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!kaggle datasets download -d dylanjcastillo/7k-books-with-metadata"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {
|
||
"id": "utslvpN1AOq0",
|
||
"outputId": "dda342a0-18dc-40a7-86bd-b233844c1231",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"Archive: 7k-books-with-metadata.zip\n",
|
||
" inflating: books.csv \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!unzip -o 7k-books-with-metadata.zip"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {
|
||
"id": "k9Q3DwbiAOq0",
|
||
"outputId": "ab0a4f14-188b-41d6-c3fe-0553d80aa648",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/",
|
||
"height": 676
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "execute_result",
|
||
"data": {
|
||
"text/plain": [
|
||
" isbn13 isbn10 title \\\n",
|
||
"0 9780002005883 0002005883 Gilead \n",
|
||
"1 9780002261982 0002261987 Spider's Web \n",
|
||
"2 9780006163831 0006163831 The One Tree \n",
|
||
"3 9780006178736 0006178731 Rage of angels \n",
|
||
"4 9780006280897 0006280897 The Four Loves \n",
|
||
"... ... ... ... \n",
|
||
"6805 9788185300535 8185300534 I Am that \n",
|
||
"6806 9788185944609 8185944601 Secrets Of The Heart \n",
|
||
"6807 9788445074879 8445074873 Fahrenheit 451 \n",
|
||
"6808 9789027712059 9027712050 The Berlin Phenomenology \n",
|
||
"6809 9789042003408 9042003405 'I'm Telling You Stories' \n",
|
||
"\n",
|
||
" subtitle \\\n",
|
||
"0 NaN \n",
|
||
"1 A Novel \n",
|
||
"2 NaN \n",
|
||
"3 NaN \n",
|
||
"4 NaN \n",
|
||
"... ... \n",
|
||
"6805 Talks with Sri Nisargadatta Maharaj \n",
|
||
"6806 NaN \n",
|
||
"6807 NaN \n",
|
||
"6808 NaN \n",
|
||
"6809 Jeanette Winterson and the Politics of Reading \n",
|
||
"\n",
|
||
" authors \\\n",
|
||
"0 Marilynne Robinson \n",
|
||
"1 Charles Osborne;Agatha Christie \n",
|
||
"2 Stephen R. Donaldson \n",
|
||
"3 Sidney Sheldon \n",
|
||
"4 Clive Staples Lewis \n",
|
||
"... ... \n",
|
||
"6805 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
|
||
"6806 Khalil Gibran \n",
|
||
"6807 Ray Bradbury \n",
|
||
"6808 Georg Wilhelm Friedrich Hegel \n",
|
||
"6809 Helena Grice;Tim Woods \n",
|
||
"\n",
|
||
" categories \\\n",
|
||
"0 Fiction \n",
|
||
"1 Detective and mystery stories \n",
|
||
"2 American fiction \n",
|
||
"3 Fiction \n",
|
||
"4 Christian life \n",
|
||
"... ... \n",
|
||
"6805 Philosophy \n",
|
||
"6806 Mysticism \n",
|
||
"6807 Book burning \n",
|
||
"6808 History \n",
|
||
"6809 Literary Criticism \n",
|
||
"\n",
|
||
" thumbnail \\\n",
|
||
"0 http://books.google.com/books/content?id=KQZCP... \n",
|
||
"1 http://books.google.com/books/content?id=gA5GP... \n",
|
||
"2 http://books.google.com/books/content?id=OmQaw... \n",
|
||
"3 http://books.google.com/books/content?id=FKo2T... \n",
|
||
"4 http://books.google.com/books/content?id=XhQ5X... \n",
|
||
"... ... \n",
|
||
"6805 http://books.google.com/books/content?id=Fv_JP... \n",
|
||
"6806 http://books.google.com/books/content?id=XcrVp... \n",
|
||
"6807 NaN \n",
|
||
"6808 http://books.google.com/books/content?id=Vy7Sk... \n",
|
||
"6809 http://books.google.com/books/content?id=2lVyR... \n",
|
||
"\n",
|
||
" description published_year \\\n",
|
||
"0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n",
|
||
"1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n",
|
||
"2 Volume Two of Stephen Donaldson's acclaimed se... 1982.0 \n",
|
||
"3 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n",
|
||
"4 Lewis' work on the nature of love divides love... 2002.0 \n",
|
||
"... ... ... \n",
|
||
"6805 This collection of the timeless teachings of o... 1999.0 \n",
|
||
"6806 NaN 1993.0 \n",
|
||
"6807 NaN 2004.0 \n",
|
||
"6808 Since the three volume edition ofHegel's Philo... 1981.0 \n",
|
||
"6809 This is a jubilant and rewarding collection of... 1998.0 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"0 3.85 247.0 361.0 \n",
|
||
"1 3.83 241.0 5164.0 \n",
|
||
"2 3.97 479.0 172.0 \n",
|
||
"3 3.93 512.0 29532.0 \n",
|
||
"4 4.15 170.0 33684.0 \n",
|
||
"... ... ... ... \n",
|
||
"6805 4.51 531.0 104.0 \n",
|
||
"6806 4.08 74.0 324.0 \n",
|
||
"6807 3.98 186.0 5733.0 \n",
|
||
"6808 0.00 210.0 0.0 \n",
|
||
"6809 3.70 136.0 10.0 \n",
|
||
"\n",
|
||
"[6810 rows x 12 columns]"
|
||
],
|
||
"text/html": [
|
||
"\n",
|
||
" <div id=\"df-65d5a23c-fc61-4f09-a1fe-41189afea541\">\n",
|
||
" <div class=\"colab-df-container\">\n",
|
||
" <div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>isbn13</th>\n",
|
||
" <th>isbn10</th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>subtitle</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>thumbnail</th>\n",
|
||
" <th>description</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>9780002005883</td>\n",
|
||
" <td>0002005883</td>\n",
|
||
" <td>Gilead</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Marilynne Robinson</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>http://books.google.com/books/content?id=KQZCP...</td>\n",
|
||
" <td>A NOVEL THAT READERS and critics have been eag...</td>\n",
|
||
" <td>2004.0</td>\n",
|
||
" <td>3.85</td>\n",
|
||
" <td>247.0</td>\n",
|
||
" <td>361.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>9780002261982</td>\n",
|
||
" <td>0002261987</td>\n",
|
||
" <td>Spider's Web</td>\n",
|
||
" <td>A Novel</td>\n",
|
||
" <td>Charles Osborne;Agatha Christie</td>\n",
|
||
" <td>Detective and mystery stories</td>\n",
|
||
" <td>http://books.google.com/books/content?id=gA5GP...</td>\n",
|
||
" <td>A new 'Christie for Christmas' -- a full-lengt...</td>\n",
|
||
" <td>2000.0</td>\n",
|
||
" <td>3.83</td>\n",
|
||
" <td>241.0</td>\n",
|
||
" <td>5164.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>9780006163831</td>\n",
|
||
" <td>0006163831</td>\n",
|
||
" <td>The One Tree</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Stephen R. Donaldson</td>\n",
|
||
" <td>American fiction</td>\n",
|
||
" <td>http://books.google.com/books/content?id=OmQaw...</td>\n",
|
||
" <td>Volume Two of Stephen Donaldson's acclaimed se...</td>\n",
|
||
" <td>1982.0</td>\n",
|
||
" <td>3.97</td>\n",
|
||
" <td>479.0</td>\n",
|
||
" <td>172.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>9780006178736</td>\n",
|
||
" <td>0006178731</td>\n",
|
||
" <td>Rage of angels</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Sidney Sheldon</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>http://books.google.com/books/content?id=FKo2T...</td>\n",
|
||
" <td>A memorable, mesmerizing heroine Jennifer -- b...</td>\n",
|
||
" <td>1993.0</td>\n",
|
||
" <td>3.93</td>\n",
|
||
" <td>512.0</td>\n",
|
||
" <td>29532.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>9780006280897</td>\n",
|
||
" <td>0006280897</td>\n",
|
||
" <td>The Four Loves</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Clive Staples Lewis</td>\n",
|
||
" <td>Christian life</td>\n",
|
||
" <td>http://books.google.com/books/content?id=XhQ5X...</td>\n",
|
||
" <td>Lewis' work on the nature of love divides love...</td>\n",
|
||
" <td>2002.0</td>\n",
|
||
" <td>4.15</td>\n",
|
||
" <td>170.0</td>\n",
|
||
" <td>33684.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6805</th>\n",
|
||
" <td>9788185300535</td>\n",
|
||
" <td>8185300534</td>\n",
|
||
" <td>I Am that</td>\n",
|
||
" <td>Talks with Sri Nisargadatta Maharaj</td>\n",
|
||
" <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
|
||
" <td>Philosophy</td>\n",
|
||
" <td>http://books.google.com/books/content?id=Fv_JP...</td>\n",
|
||
" <td>This collection of the timeless teachings of o...</td>\n",
|
||
" <td>1999.0</td>\n",
|
||
" <td>4.51</td>\n",
|
||
" <td>531.0</td>\n",
|
||
" <td>104.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6806</th>\n",
|
||
" <td>9788185944609</td>\n",
|
||
" <td>8185944601</td>\n",
|
||
" <td>Secrets Of The Heart</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Khalil Gibran</td>\n",
|
||
" <td>Mysticism</td>\n",
|
||
" <td>http://books.google.com/books/content?id=XcrVp...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1993.0</td>\n",
|
||
" <td>4.08</td>\n",
|
||
" <td>74.0</td>\n",
|
||
" <td>324.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6807</th>\n",
|
||
" <td>9788445074879</td>\n",
|
||
" <td>8445074873</td>\n",
|
||
" <td>Fahrenheit 451</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Ray Bradbury</td>\n",
|
||
" <td>Book burning</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2004.0</td>\n",
|
||
" <td>3.98</td>\n",
|
||
" <td>186.0</td>\n",
|
||
" <td>5733.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6808</th>\n",
|
||
" <td>9789027712059</td>\n",
|
||
" <td>9027712050</td>\n",
|
||
" <td>The Berlin Phenomenology</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Georg Wilhelm Friedrich Hegel</td>\n",
|
||
" <td>History</td>\n",
|
||
" <td>http://books.google.com/books/content?id=Vy7Sk...</td>\n",
|
||
" <td>Since the three volume edition ofHegel's Philo...</td>\n",
|
||
" <td>1981.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>210.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6809</th>\n",
|
||
" <td>9789042003408</td>\n",
|
||
" <td>9042003405</td>\n",
|
||
" <td>'I'm Telling You Stories'</td>\n",
|
||
" <td>Jeanette Winterson and the Politics of Reading</td>\n",
|
||
" <td>Helena Grice;Tim Woods</td>\n",
|
||
" <td>Literary Criticism</td>\n",
|
||
" <td>http://books.google.com/books/content?id=2lVyR...</td>\n",
|
||
" <td>This is a jubilant and rewarding collection of...</td>\n",
|
||
" <td>1998.0</td>\n",
|
||
" <td>3.70</td>\n",
|
||
" <td>136.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6810 rows × 12 columns</p>\n",
|
||
"</div>\n",
|
||
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-65d5a23c-fc61-4f09-a1fe-41189afea541')\"\n",
|
||
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||
" style=\"display:none;\">\n",
|
||
" \n",
|
||
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||
" width=\"24px\">\n",
|
||
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
||
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
||
" </svg>\n",
|
||
" </button>\n",
|
||
" \n",
|
||
" <style>\n",
|
||
" .colab-df-container {\n",
|
||
" display:flex;\n",
|
||
" flex-wrap:wrap;\n",
|
||
" gap: 12px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-convert {\n",
|
||
" background-color: #E8F0FE;\n",
|
||
" border: none;\n",
|
||
" border-radius: 50%;\n",
|
||
" cursor: pointer;\n",
|
||
" display: none;\n",
|
||
" fill: #1967D2;\n",
|
||
" height: 32px;\n",
|
||
" padding: 0 0 0 0;\n",
|
||
" width: 32px;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .colab-df-convert:hover {\n",
|
||
" background-color: #E2EBFA;\n",
|
||
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
" fill: #174EA6;\n",
|
||
" }\n",
|
||
"\n",
|
||
" [theme=dark] .colab-df-convert {\n",
|
||
" background-color: #3B4455;\n",
|
||
" fill: #D2E3FC;\n",
|
||
" }\n",
|
||
"\n",
|
||
" [theme=dark] .colab-df-convert:hover {\n",
|
||
" background-color: #434B5C;\n",
|
||
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||
" fill: #FFFFFF;\n",
|
||
" }\n",
|
||
" </style>\n",
|
||
"\n",
|
||
" <script>\n",
|
||
" const buttonEl =\n",
|
||
" document.querySelector('#df-65d5a23c-fc61-4f09-a1fe-41189afea541 button.colab-df-convert');\n",
|
||
" buttonEl.style.display =\n",
|
||
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
"\n",
|
||
" async function convertToInteractive(key) {\n",
|
||
" const element = document.querySelector('#df-65d5a23c-fc61-4f09-a1fe-41189afea541');\n",
|
||
" const dataTable =\n",
|
||
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||
" [key], {});\n",
|
||
" if (!dataTable) return;\n",
|
||
"\n",
|
||
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||
" + ' to learn more about interactive tables.';\n",
|
||
" element.innerHTML = '';\n",
|
||
" dataTable['output_type'] = 'display_data';\n",
|
||
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||
" const docLink = document.createElement('div');\n",
|
||
" docLink.innerHTML = docLinkHtml;\n",
|
||
" element.appendChild(docLink);\n",
|
||
" }\n",
|
||
" </script>\n",
|
||
" </div>\n",
|
||
" </div>\n",
|
||
" "
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"execution_count": 16
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"books=pd.read_csv('books.csv')\n",
|
||
"books"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"scrolled": true,
|
||
"id": "WgVroQDTAOq1",
|
||
"outputId": "932fdfce-1d65-4290-cc5d-cc053f4fa459"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>isbn13</th>\n",
|
||
" <th>isbn10</th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>subtitle</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>thumbnail</th>\n",
|
||
" <th>description</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>6.810000e+03</td>\n",
|
||
" <td>6810</td>\n",
|
||
" <td>6810</td>\n",
|
||
" <td>2381</td>\n",
|
||
" <td>6738</td>\n",
|
||
" <td>6711</td>\n",
|
||
" <td>6481</td>\n",
|
||
" <td>6548</td>\n",
|
||
" <td>6804.000000</td>\n",
|
||
" <td>6767.000000</td>\n",
|
||
" <td>6767.000000</td>\n",
|
||
" <td>6.767000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6810</td>\n",
|
||
" <td>6398</td>\n",
|
||
" <td>2009</td>\n",
|
||
" <td>3780</td>\n",
|
||
" <td>567</td>\n",
|
||
" <td>6481</td>\n",
|
||
" <td>6474</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0786282258</td>\n",
|
||
" <td>The Lord of the Rings</td>\n",
|
||
" <td>A Novel</td>\n",
|
||
" <td>Agatha Christie</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>http://books.google.com/books/content?id=6dVAW...</td>\n",
|
||
" <td>This is a reproduction of the original artefac...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>226</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>2588</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>9.780677e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1998.630364</td>\n",
|
||
" <td>3.933284</td>\n",
|
||
" <td>348.181026</td>\n",
|
||
" <td>2.106910e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>6.068911e+08</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10.484257</td>\n",
|
||
" <td>0.331352</td>\n",
|
||
" <td>242.376783</td>\n",
|
||
" <td>1.376207e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>9.780002e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1853.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>9.780330e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1996.000000</td>\n",
|
||
" <td>3.770000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.590000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>9.780553e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.960000</td>\n",
|
||
" <td>304.000000</td>\n",
|
||
" <td>1.018000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>9.780810e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>420.000000</td>\n",
|
||
" <td>5.992500e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>9.789042e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>3342.000000</td>\n",
|
||
" <td>5.629932e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" isbn13 isbn10 title subtitle \\\n",
|
||
"count 6.810000e+03 6810 6810 2381 \n",
|
||
"unique NaN 6810 6398 2009 \n",
|
||
"top NaN 0786282258 The Lord of the Rings A Novel \n",
|
||
"freq NaN 1 11 226 \n",
|
||
"mean 9.780677e+12 NaN NaN NaN \n",
|
||
"std 6.068911e+08 NaN NaN NaN \n",
|
||
"min 9.780002e+12 NaN NaN NaN \n",
|
||
"25% 9.780330e+12 NaN NaN NaN \n",
|
||
"50% 9.780553e+12 NaN NaN NaN \n",
|
||
"75% 9.780810e+12 NaN NaN NaN \n",
|
||
"max 9.789042e+12 NaN NaN NaN \n",
|
||
"\n",
|
||
" authors categories \\\n",
|
||
"count 6738 6711 \n",
|
||
"unique 3780 567 \n",
|
||
"top Agatha Christie Fiction \n",
|
||
"freq 37 2588 \n",
|
||
"mean NaN NaN \n",
|
||
"std NaN NaN \n",
|
||
"min NaN NaN \n",
|
||
"25% NaN NaN \n",
|
||
"50% NaN NaN \n",
|
||
"75% NaN NaN \n",
|
||
"max NaN NaN \n",
|
||
"\n",
|
||
" thumbnail \\\n",
|
||
"count 6481 \n",
|
||
"unique 6481 \n",
|
||
"top http://books.google.com/books/content?id=6dVAW... \n",
|
||
"freq 1 \n",
|
||
"mean NaN \n",
|
||
"std NaN \n",
|
||
"min NaN \n",
|
||
"25% NaN \n",
|
||
"50% NaN \n",
|
||
"75% NaN \n",
|
||
"max NaN \n",
|
||
"\n",
|
||
" description published_year \\\n",
|
||
"count 6548 6804.000000 \n",
|
||
"unique 6474 NaN \n",
|
||
"top This is a reproduction of the original artefac... NaN \n",
|
||
"freq 6 NaN \n",
|
||
"mean NaN 1998.630364 \n",
|
||
"std NaN 10.484257 \n",
|
||
"min NaN 1853.000000 \n",
|
||
"25% NaN 1996.000000 \n",
|
||
"50% NaN 2002.000000 \n",
|
||
"75% NaN 2005.000000 \n",
|
||
"max NaN 2019.000000 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"count 6767.000000 6767.000000 6.767000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 3.933284 348.181026 2.106910e+04 \n",
|
||
"std 0.331352 242.376783 1.376207e+05 \n",
|
||
"min 0.000000 0.000000 0.000000e+00 \n",
|
||
"25% 3.770000 208.000000 1.590000e+02 \n",
|
||
"50% 3.960000 304.000000 1.018000e+03 \n",
|
||
"75% 4.130000 420.000000 5.992500e+03 \n",
|
||
"max 5.000000 3342.000000 5.629932e+06 "
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "1hwHH65hAOq1",
|
||
"outputId": "0b3e32ab-230b-4d9d-db8a-d2d25e57161b"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"isbn13 0\n",
|
||
"isbn10 0\n",
|
||
"title 0\n",
|
||
"subtitle 4429\n",
|
||
"authors 72\n",
|
||
"categories 99\n",
|
||
"thumbnail 329\n",
|
||
"description 262\n",
|
||
"published_year 6\n",
|
||
"average_rating 43\n",
|
||
"num_pages 43\n",
|
||
"ratings_count 43\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.isnull().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "mZMFUt2pAOq1"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"books.drop('thumbnail', inplace=True, axis=1)\n",
|
||
"books.drop('subtitle', inplace=True, axis=1)\n",
|
||
"books.drop('description', inplace=True, axis=1)\n",
|
||
"books.drop('isbn13', inplace=True, axis=1)\n",
|
||
"books.drop('isbn10', inplace=True, axis=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "y6I2PKuhAOq1",
|
||
"outputId": "2e03efc3-e8e3-4cc8-abb8-97e0594665be"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"title 0\n",
|
||
"authors 72\n",
|
||
"categories 99\n",
|
||
"published_year 6\n",
|
||
"average_rating 43\n",
|
||
"num_pages 43\n",
|
||
"ratings_count 43\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.isnull().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "21R7h40lAOq1",
|
||
"outputId": "4c9f746b-4347-4cd3-cdae-21e7bc818f2c"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Gilead</td>\n",
|
||
" <td>Marilynne Robinson</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>2004.0</td>\n",
|
||
" <td>3.85</td>\n",
|
||
" <td>247.0</td>\n",
|
||
" <td>361.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Spider's Web</td>\n",
|
||
" <td>Charles Osborne;Agatha Christie</td>\n",
|
||
" <td>Detective and mystery stories</td>\n",
|
||
" <td>2000.0</td>\n",
|
||
" <td>3.83</td>\n",
|
||
" <td>241.0</td>\n",
|
||
" <td>5164.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>The One Tree</td>\n",
|
||
" <td>Stephen R. Donaldson</td>\n",
|
||
" <td>American fiction</td>\n",
|
||
" <td>1982.0</td>\n",
|
||
" <td>3.97</td>\n",
|
||
" <td>479.0</td>\n",
|
||
" <td>172.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Rage of angels</td>\n",
|
||
" <td>Sidney Sheldon</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>1993.0</td>\n",
|
||
" <td>3.93</td>\n",
|
||
" <td>512.0</td>\n",
|
||
" <td>29532.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>The Four Loves</td>\n",
|
||
" <td>Clive Staples Lewis</td>\n",
|
||
" <td>Christian life</td>\n",
|
||
" <td>2002.0</td>\n",
|
||
" <td>4.15</td>\n",
|
||
" <td>170.0</td>\n",
|
||
" <td>33684.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6805</th>\n",
|
||
" <td>I Am that</td>\n",
|
||
" <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
|
||
" <td>Philosophy</td>\n",
|
||
" <td>1999.0</td>\n",
|
||
" <td>4.51</td>\n",
|
||
" <td>531.0</td>\n",
|
||
" <td>104.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6806</th>\n",
|
||
" <td>Secrets Of The Heart</td>\n",
|
||
" <td>Khalil Gibran</td>\n",
|
||
" <td>Mysticism</td>\n",
|
||
" <td>1993.0</td>\n",
|
||
" <td>4.08</td>\n",
|
||
" <td>74.0</td>\n",
|
||
" <td>324.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6807</th>\n",
|
||
" <td>Fahrenheit 451</td>\n",
|
||
" <td>Ray Bradbury</td>\n",
|
||
" <td>Book burning</td>\n",
|
||
" <td>2004.0</td>\n",
|
||
" <td>3.98</td>\n",
|
||
" <td>186.0</td>\n",
|
||
" <td>5733.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6808</th>\n",
|
||
" <td>The Berlin Phenomenology</td>\n",
|
||
" <td>Georg Wilhelm Friedrich Hegel</td>\n",
|
||
" <td>History</td>\n",
|
||
" <td>1981.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>210.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6809</th>\n",
|
||
" <td>'I'm Telling You Stories'</td>\n",
|
||
" <td>Helena Grice;Tim Woods</td>\n",
|
||
" <td>Literary Criticism</td>\n",
|
||
" <td>1998.0</td>\n",
|
||
" <td>3.70</td>\n",
|
||
" <td>136.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6599 rows × 7 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors \\\n",
|
||
"0 Gilead Marilynne Robinson \n",
|
||
"1 Spider's Web Charles Osborne;Agatha Christie \n",
|
||
"2 The One Tree Stephen R. Donaldson \n",
|
||
"3 Rage of angels Sidney Sheldon \n",
|
||
"4 The Four Loves Clive Staples Lewis \n",
|
||
"... ... ... \n",
|
||
"6805 I Am that Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
|
||
"6806 Secrets Of The Heart Khalil Gibran \n",
|
||
"6807 Fahrenheit 451 Ray Bradbury \n",
|
||
"6808 The Berlin Phenomenology Georg Wilhelm Friedrich Hegel \n",
|
||
"6809 'I'm Telling You Stories' Helena Grice;Tim Woods \n",
|
||
"\n",
|
||
" categories published_year average_rating \\\n",
|
||
"0 Fiction 2004.0 3.85 \n",
|
||
"1 Detective and mystery stories 2000.0 3.83 \n",
|
||
"2 American fiction 1982.0 3.97 \n",
|
||
"3 Fiction 1993.0 3.93 \n",
|
||
"4 Christian life 2002.0 4.15 \n",
|
||
"... ... ... ... \n",
|
||
"6805 Philosophy 1999.0 4.51 \n",
|
||
"6806 Mysticism 1993.0 4.08 \n",
|
||
"6807 Book burning 2004.0 3.98 \n",
|
||
"6808 History 1981.0 0.00 \n",
|
||
"6809 Literary Criticism 1998.0 3.70 \n",
|
||
"\n",
|
||
" num_pages ratings_count \n",
|
||
"0 247.0 361.0 \n",
|
||
"1 241.0 5164.0 \n",
|
||
"2 479.0 172.0 \n",
|
||
"3 512.0 29532.0 \n",
|
||
"4 170.0 33684.0 \n",
|
||
"... ... ... \n",
|
||
"6805 531.0 104.0 \n",
|
||
"6806 74.0 324.0 \n",
|
||
"6807 186.0 5733.0 \n",
|
||
"6808 210.0 0.0 \n",
|
||
"6809 136.0 10.0 \n",
|
||
"\n",
|
||
"[6599 rows x 7 columns]"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.dropna(inplace=True)\n",
|
||
"books"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "lx9gqh7UAOq2",
|
||
"outputId": "651a374e-eb8c-426f-faa0-c1cd6c9762bb"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>6599</td>\n",
|
||
" <td>6599</td>\n",
|
||
" <td>6599</td>\n",
|
||
" <td>6599.000000</td>\n",
|
||
" <td>6599.000000</td>\n",
|
||
" <td>6599.000000</td>\n",
|
||
" <td>6.599000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>6216</td>\n",
|
||
" <td>3728</td>\n",
|
||
" <td>563</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>The Lord of the Rings</td>\n",
|
||
" <td>Agatha Christie</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>2561</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1998.750417</td>\n",
|
||
" <td>3.931367</td>\n",
|
||
" <td>348.296863</td>\n",
|
||
" <td>2.143083e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10.168465</td>\n",
|
||
" <td>0.331173</td>\n",
|
||
" <td>239.199411</td>\n",
|
||
" <td>1.392929e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1876.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1997.000000</td>\n",
|
||
" <td>3.770000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.630000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.950000</td>\n",
|
||
" <td>304.000000</td>\n",
|
||
" <td>1.032000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>420.000000</td>\n",
|
||
" <td>6.105500e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>3342.000000</td>\n",
|
||
" <td>5.629932e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors categories published_year \\\n",
|
||
"count 6599 6599 6599 6599.000000 \n",
|
||
"unique 6216 3728 563 NaN \n",
|
||
"top The Lord of the Rings Agatha Christie Fiction NaN \n",
|
||
"freq 9 37 2561 NaN \n",
|
||
"mean NaN NaN NaN 1998.750417 \n",
|
||
"std NaN NaN NaN 10.168465 \n",
|
||
"min NaN NaN NaN 1876.000000 \n",
|
||
"25% NaN NaN NaN 1997.000000 \n",
|
||
"50% NaN NaN NaN 2002.000000 \n",
|
||
"75% NaN NaN NaN 2005.000000 \n",
|
||
"max NaN NaN NaN 2019.000000 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"count 6599.000000 6599.000000 6.599000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 3.931367 348.296863 2.143083e+04 \n",
|
||
"std 0.331173 239.199411 1.392929e+05 \n",
|
||
"min 0.000000 0.000000 0.000000e+00 \n",
|
||
"25% 3.770000 208.000000 1.630000e+02 \n",
|
||
"50% 3.950000 304.000000 1.032000e+03 \n",
|
||
"75% 4.130000 420.000000 6.105500e+03 \n",
|
||
"max 5.000000 3342.000000 5.629932e+06 "
|
||
]
|
||
},
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "J7DUOhOwAOq2",
|
||
"outputId": "3bce3396-8f22-41a4-ebfb-9895ad2bb73c"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Fiction 2561\n",
|
||
"Juvenile Fiction 524\n",
|
||
"Biography & Autobiography 398\n",
|
||
"History 261\n",
|
||
"Literary Criticism 165\n",
|
||
" ... \n",
|
||
"Child analysis 1\n",
|
||
"Illinois 1\n",
|
||
"Erinyes (Greek mythology) 1\n",
|
||
"Exorcism 1\n",
|
||
"People with social disabilities 1\n",
|
||
"Name: categories, Length: 563, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books[\"categories\"].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "4R3GDLXgAOq2",
|
||
"outputId": "4d3a9d8a-f37d-4cba-ebbb-0615571396f4"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"2006.0 877\n",
|
||
"2005.0 681\n",
|
||
"2004.0 605\n",
|
||
"2003.0 569\n",
|
||
"2002.0 470\n",
|
||
" ... \n",
|
||
"1928.0 1\n",
|
||
"1904.0 1\n",
|
||
"1938.0 1\n",
|
||
"1936.0 1\n",
|
||
"1947.0 1\n",
|
||
"Name: published_year, Length: 91, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 49,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books[\"published_year\"].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "YSLMCB4nAOq2",
|
||
"outputId": "ccdb49cc-9037-4995-9d0b-c0a749f6eae1"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Agatha Christie 37\n",
|
||
"Stephen King 36\n",
|
||
"William Shakespeare 29\n",
|
||
"John Ronald Reuel Tolkien 25\n",
|
||
"Sandra Brown 23\n",
|
||
" ..\n",
|
||
"Aeg 1\n",
|
||
"Pauline Reage 1\n",
|
||
"Tim Flannery 1\n",
|
||
"Saint Augustine (of Hippo) 1\n",
|
||
"Michael S. Reynolds 1\n",
|
||
"Name: authors, Length: 3728, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books[\"authors\"].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "D8HrFKIGAOq3",
|
||
"outputId": "20a73c84-1b66-4dd8-fa99-caba6ca68b29"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"4.00 125\n",
|
||
"3.93 110\n",
|
||
"3.95 109\n",
|
||
"3.99 108\n",
|
||
"3.96 104\n",
|
||
" ... \n",
|
||
"4.64 1\n",
|
||
"4.68 1\n",
|
||
"4.72 1\n",
|
||
"2.44 1\n",
|
||
"4.78 1\n",
|
||
"Name: average_rating, Length: 200, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books[\"average_rating\"].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "utiDxb60AOq3"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import sklearn\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
"books_train, books_test = sklearn.model_selection.train_test_split(books, test_size=0.2, random_state=1)\n",
|
||
"books_train, books_val = sklearn.model_selection.train_test_split(books_train, test_size=0.5, random_state=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "rS0epPE6AOq3",
|
||
"outputId": "f704dda5-95e7-474b-a9b3-d8e107067710"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>915</th>\n",
|
||
" <td>The Autobiography of Alice B. Toklas</td>\n",
|
||
" <td>Gertrude Stein</td>\n",
|
||
" <td>Biography & Autobiography</td>\n",
|
||
" <td>2001.0</td>\n",
|
||
" <td>3.59</td>\n",
|
||
" <td>272.0</td>\n",
|
||
" <td>233.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4493</th>\n",
|
||
" <td>Never Far from Nowhere</td>\n",
|
||
" <td>Andrea Levy</td>\n",
|
||
" <td>Blacks</td>\n",
|
||
" <td>1996.0</td>\n",
|
||
" <td>3.68</td>\n",
|
||
" <td>282.0</td>\n",
|
||
" <td>601.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1983</th>\n",
|
||
" <td>Year's Happy Ending</td>\n",
|
||
" <td>Betty Neels</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>2001.0</td>\n",
|
||
" <td>3.95</td>\n",
|
||
" <td>216.0</td>\n",
|
||
" <td>128.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2196</th>\n",
|
||
" <td>Wrinkles in Time</td>\n",
|
||
" <td>George Smoot;Keay Davidson</td>\n",
|
||
" <td>Science</td>\n",
|
||
" <td>1994.0</td>\n",
|
||
" <td>3.99</td>\n",
|
||
" <td>360.0</td>\n",
|
||
" <td>985.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4011</th>\n",
|
||
" <td>Dispatches</td>\n",
|
||
" <td>Michael Herr</td>\n",
|
||
" <td>History</td>\n",
|
||
" <td>1991.0</td>\n",
|
||
" <td>4.23</td>\n",
|
||
" <td>260.0</td>\n",
|
||
" <td>12590.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2841</th>\n",
|
||
" <td>Magic Bites</td>\n",
|
||
" <td>Ilona Andrews</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>2007.0</td>\n",
|
||
" <td>4.07</td>\n",
|
||
" <td>260.0</td>\n",
|
||
" <td>82231.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1713</th>\n",
|
||
" <td>High Five</td>\n",
|
||
" <td>Janet Evanovich</td>\n",
|
||
" <td>Bail bond agents</td>\n",
|
||
" <td>2000.0</td>\n",
|
||
" <td>4.18</td>\n",
|
||
" <td>336.0</td>\n",
|
||
" <td>99172.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3469</th>\n",
|
||
" <td>A Brief History of Time</td>\n",
|
||
" <td>Stephen Hawking</td>\n",
|
||
" <td>Science</td>\n",
|
||
" <td>1998.0</td>\n",
|
||
" <td>4.16</td>\n",
|
||
" <td>212.0</td>\n",
|
||
" <td>214520.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1657</th>\n",
|
||
" <td>The Magus</td>\n",
|
||
" <td>John Fowles</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>2001.0</td>\n",
|
||
" <td>4.05</td>\n",
|
||
" <td>656.0</td>\n",
|
||
" <td>36909.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3986</th>\n",
|
||
" <td>The Complete Monty Python's Flying Circus</td>\n",
|
||
" <td>Graham Chapman;Monty Python (Comedy troupe);Te...</td>\n",
|
||
" <td>Humor</td>\n",
|
||
" <td>1989.0</td>\n",
|
||
" <td>4.44</td>\n",
|
||
" <td>384.0</td>\n",
|
||
" <td>1191.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>2639 rows × 7 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title \\\n",
|
||
"915 The Autobiography of Alice B. Toklas \n",
|
||
"4493 Never Far from Nowhere \n",
|
||
"1983 Year's Happy Ending \n",
|
||
"2196 Wrinkles in Time \n",
|
||
"4011 Dispatches \n",
|
||
"... ... \n",
|
||
"2841 Magic Bites \n",
|
||
"1713 High Five \n",
|
||
"3469 A Brief History of Time \n",
|
||
"1657 The Magus \n",
|
||
"3986 The Complete Monty Python's Flying Circus \n",
|
||
"\n",
|
||
" authors \\\n",
|
||
"915 Gertrude Stein \n",
|
||
"4493 Andrea Levy \n",
|
||
"1983 Betty Neels \n",
|
||
"2196 George Smoot;Keay Davidson \n",
|
||
"4011 Michael Herr \n",
|
||
"... ... \n",
|
||
"2841 Ilona Andrews \n",
|
||
"1713 Janet Evanovich \n",
|
||
"3469 Stephen Hawking \n",
|
||
"1657 John Fowles \n",
|
||
"3986 Graham Chapman;Monty Python (Comedy troupe);Te... \n",
|
||
"\n",
|
||
" categories published_year average_rating num_pages \\\n",
|
||
"915 Biography & Autobiography 2001.0 3.59 272.0 \n",
|
||
"4493 Blacks 1996.0 3.68 282.0 \n",
|
||
"1983 Fiction 2001.0 3.95 216.0 \n",
|
||
"2196 Science 1994.0 3.99 360.0 \n",
|
||
"4011 History 1991.0 4.23 260.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"2841 Fiction 2007.0 4.07 260.0 \n",
|
||
"1713 Bail bond agents 2000.0 4.18 336.0 \n",
|
||
"3469 Science 1998.0 4.16 212.0 \n",
|
||
"1657 Fiction 2001.0 4.05 656.0 \n",
|
||
"3986 Humor 1989.0 4.44 384.0 \n",
|
||
"\n",
|
||
" ratings_count \n",
|
||
"915 233.0 \n",
|
||
"4493 601.0 \n",
|
||
"1983 128.0 \n",
|
||
"2196 985.0 \n",
|
||
"4011 12590.0 \n",
|
||
"... ... \n",
|
||
"2841 82231.0 \n",
|
||
"1713 99172.0 \n",
|
||
"3469 214520.0 \n",
|
||
"1657 36909.0 \n",
|
||
"3986 1191.0 \n",
|
||
"\n",
|
||
"[2639 rows x 7 columns]"
|
||
]
|
||
},
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "oUWEVGaGAOq3",
|
||
"outputId": "6a053600-98a9-4990-ae44-cb8eeda97293"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>2639</td>\n",
|
||
" <td>2639</td>\n",
|
||
" <td>2639</td>\n",
|
||
" <td>2639.000000</td>\n",
|
||
" <td>2639.000000</td>\n",
|
||
" <td>2639.000000</td>\n",
|
||
" <td>2.639000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>2547</td>\n",
|
||
" <td>1827</td>\n",
|
||
" <td>286</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>One Hundred Years of Solitude</td>\n",
|
||
" <td>Stephen King</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>1027</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1999.032967</td>\n",
|
||
" <td>3.929807</td>\n",
|
||
" <td>349.534672</td>\n",
|
||
" <td>2.363199e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9.865320</td>\n",
|
||
" <td>0.358919</td>\n",
|
||
" <td>244.871090</td>\n",
|
||
" <td>1.452470e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1876.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1997.000000</td>\n",
|
||
" <td>3.770000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.745000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.950000</td>\n",
|
||
" <td>304.000000</td>\n",
|
||
" <td>1.066000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>429.000000</td>\n",
|
||
" <td>6.084500e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>3020.000000</td>\n",
|
||
" <td>4.367341e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors categories \\\n",
|
||
"count 2639 2639 2639 \n",
|
||
"unique 2547 1827 286 \n",
|
||
"top One Hundred Years of Solitude Stephen King Fiction \n",
|
||
"freq 4 18 1027 \n",
|
||
"mean NaN NaN NaN \n",
|
||
"std NaN NaN NaN \n",
|
||
"min NaN NaN NaN \n",
|
||
"25% NaN NaN NaN \n",
|
||
"50% NaN NaN NaN \n",
|
||
"75% NaN NaN NaN \n",
|
||
"max NaN NaN NaN \n",
|
||
"\n",
|
||
" published_year average_rating num_pages ratings_count \n",
|
||
"count 2639.000000 2639.000000 2639.000000 2.639000e+03 \n",
|
||
"unique NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN \n",
|
||
"mean 1999.032967 3.929807 349.534672 2.363199e+04 \n",
|
||
"std 9.865320 0.358919 244.871090 1.452470e+05 \n",
|
||
"min 1876.000000 0.000000 0.000000 0.000000e+00 \n",
|
||
"25% 1997.000000 3.770000 208.000000 1.745000e+02 \n",
|
||
"50% 2002.000000 3.950000 304.000000 1.066000e+03 \n",
|
||
"75% 2005.000000 4.130000 429.000000 6.084500e+03 \n",
|
||
"max 2019.000000 5.000000 3020.000000 4.367341e+06 "
|
||
]
|
||
},
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books_train.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "yXkOfB9bAOq3",
|
||
"outputId": "3fc9e96e-8fe0-490c-d6b5-71b21277aa0a"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>1320</td>\n",
|
||
" <td>1320</td>\n",
|
||
" <td>1320</td>\n",
|
||
" <td>1320.000000</td>\n",
|
||
" <td>1320.000000</td>\n",
|
||
" <td>1320.000000</td>\n",
|
||
" <td>1.320000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>1303</td>\n",
|
||
" <td>1064</td>\n",
|
||
" <td>185</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>20,000 Leagues Under the Sea</td>\n",
|
||
" <td>Stephen King</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>540</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1998.590909</td>\n",
|
||
" <td>3.925470</td>\n",
|
||
" <td>339.346970</td>\n",
|
||
" <td>1.588767e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10.119569</td>\n",
|
||
" <td>0.299805</td>\n",
|
||
" <td>219.560964</td>\n",
|
||
" <td>7.877064e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1942.000000</td>\n",
|
||
" <td>2.330000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1996.000000</td>\n",
|
||
" <td>3.750000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.510000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.950000</td>\n",
|
||
" <td>304.000000</td>\n",
|
||
" <td>1.068000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>401.000000</td>\n",
|
||
" <td>6.360000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2017.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>3342.000000</td>\n",
|
||
" <td>2.115562e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors categories published_year \\\n",
|
||
"count 1320 1320 1320 1320.000000 \n",
|
||
"unique 1303 1064 185 NaN \n",
|
||
"top 20,000 Leagues Under the Sea Stephen King Fiction NaN \n",
|
||
"freq 3 7 540 NaN \n",
|
||
"mean NaN NaN NaN 1998.590909 \n",
|
||
"std NaN NaN NaN 10.119569 \n",
|
||
"min NaN NaN NaN 1942.000000 \n",
|
||
"25% NaN NaN NaN 1996.000000 \n",
|
||
"50% NaN NaN NaN 2002.000000 \n",
|
||
"75% NaN NaN NaN 2005.000000 \n",
|
||
"max NaN NaN NaN 2017.000000 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"count 1320.000000 1320.000000 1.320000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 3.925470 339.346970 1.588767e+04 \n",
|
||
"std 0.299805 219.560964 7.877064e+04 \n",
|
||
"min 2.330000 0.000000 0.000000e+00 \n",
|
||
"25% 3.750000 208.000000 1.510000e+02 \n",
|
||
"50% 3.950000 304.000000 1.068000e+03 \n",
|
||
"75% 4.130000 401.000000 6.360000e+03 \n",
|
||
"max 5.000000 3342.000000 2.115562e+06 "
|
||
]
|
||
},
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books_test.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"id": "CWG6q0ixAOq4",
|
||
"outputId": "367a1088-975b-4da2-e333-50152a4fcbc3"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>2640</td>\n",
|
||
" <td>2640</td>\n",
|
||
" <td>2640</td>\n",
|
||
" <td>2640.000000</td>\n",
|
||
" <td>2640.000000</td>\n",
|
||
" <td>2640.000000</td>\n",
|
||
" <td>2.640000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>2562</td>\n",
|
||
" <td>1850</td>\n",
|
||
" <td>313</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>Three Complete Novels</td>\n",
|
||
" <td>Agatha Christie</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>994</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1998.547727</td>\n",
|
||
" <td>3.935875</td>\n",
|
||
" <td>351.534470</td>\n",
|
||
" <td>2.200209e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10.483752</td>\n",
|
||
" <td>0.316971</td>\n",
|
||
" <td>242.829463</td>\n",
|
||
" <td>1.558830e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>4.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1996.000000</td>\n",
|
||
" <td>3.770000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.557500e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.950000</td>\n",
|
||
" <td>309.500000</td>\n",
|
||
" <td>9.555000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>430.250000</td>\n",
|
||
" <td>5.980750e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>2965.000000</td>\n",
|
||
" <td>5.629932e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors categories published_year \\\n",
|
||
"count 2640 2640 2640 2640.000000 \n",
|
||
"unique 2562 1850 313 NaN \n",
|
||
"top Three Complete Novels Agatha Christie Fiction NaN \n",
|
||
"freq 6 14 994 NaN \n",
|
||
"mean NaN NaN NaN 1998.547727 \n",
|
||
"std NaN NaN NaN 10.483752 \n",
|
||
"min NaN NaN NaN 1901.000000 \n",
|
||
"25% NaN NaN NaN 1996.000000 \n",
|
||
"50% NaN NaN NaN 2002.000000 \n",
|
||
"75% NaN NaN NaN 2005.000000 \n",
|
||
"max NaN NaN NaN 2019.000000 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"count 2640.000000 2640.000000 2.640000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 3.935875 351.534470 2.200209e+04 \n",
|
||
"std 0.316971 242.829463 1.558830e+05 \n",
|
||
"min 0.000000 4.000000 0.000000e+00 \n",
|
||
"25% 3.770000 208.000000 1.557500e+02 \n",
|
||
"50% 3.950000 309.500000 9.555000e+02 \n",
|
||
"75% 4.130000 430.250000 5.980750e+03 \n",
|
||
"max 5.000000 2965.000000 5.629932e+06 "
|
||
]
|
||
},
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books_val.describe(include='all')"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"author": "Tomasz Ziętkiewicz",
|
||
"celltoolbar": "Slideshow",
|
||
"email": "tomasz.zietkiewicz@amu.edu.pl",
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"lang": "pl",
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.10"
|
||
},
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
},
|
||
"subtitle": "2.Dane[laboratoria]",
|
||
"title": "Inżynieria uczenia maszynowego",
|
||
"toc": {
|
||
"base_numbering": 1,
|
||
"nav_menu": {},
|
||
"number_sections": false,
|
||
"sideBar": false,
|
||
"skip_h1_title": false,
|
||
"title_cell": "Table of Contents",
|
||
"title_sidebar": "Contents",
|
||
"toc_cell": false,
|
||
"toc_position": {},
|
||
"toc_section_display": false,
|
||
"toc_window_display": false
|
||
},
|
||
"year": "2021",
|
||
"colab": {
|
||
"provenance": []
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0
|
||
} |