ium_425850/IUM_zadanie.ipynb

2256 lines
95 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zPOfPO5LAOqy",
"outputId": "a8846a75-ef0a-4048-8168-f71d79d7b7e8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: kaggle in /usr/local/lib/python3.9/dist-packages (1.5.13)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.27.1)\n",
"Requirement already satisfied: python-slugify in /usr/local/lib/python3.9/dist-packages (from kaggle) (8.0.1)\n",
"Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from kaggle) (4.65.0)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from kaggle) (2022.12.7)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: urllib3 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.26.15)\n",
"Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.9/dist-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (3.4)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (2.0.12)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.4.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2022.7.1)\n",
"Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.9/dist-packages (from pandas) (1.22.4)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
]
}
],
"source": [
"!pip install --user kaggle\n",
"!pip install --user pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gc7VHACRAOq0",
"outputId": "20220fe9-e872-451b-f759-b4cfff91bc51"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Traceback (most recent call last):\n",
" File \"/usr/local/bin/kaggle\", line 5, in <module>\n",
" from kaggle.cli import main\n",
" File \"/usr/local/lib/python3.9/dist-packages/kaggle/__init__.py\", line 23, in <module>\n",
" api.authenticate()\n",
" File \"/usr/local/lib/python3.9/dist-packages/kaggle/api/kaggle_api_extended.py\", line 164, in authenticate\n",
" raise IOError('Could not find {}. Make sure it\\'s located in'\n",
"OSError: Could not find kaggle.json. Make sure it's located in /root/.kaggle. Or use the environment method.\n"
]
}
],
"source": [
"!kaggle datasets download -d dylanjcastillo/7k-books-with-metadata"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"id": "utslvpN1AOq0",
"outputId": "dda342a0-18dc-40a7-86bd-b233844c1231",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Archive: 7k-books-with-metadata.zip\n",
" inflating: books.csv \n"
]
}
],
"source": [
"!unzip -o 7k-books-with-metadata.zip"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "k9Q3DwbiAOq0",
"outputId": "ab0a4f14-188b-41d6-c3fe-0553d80aa648",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 676
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" isbn13 isbn10 title \\\n",
"0 9780002005883 0002005883 Gilead \n",
"1 9780002261982 0002261987 Spider's Web \n",
"2 9780006163831 0006163831 The One Tree \n",
"3 9780006178736 0006178731 Rage of angels \n",
"4 9780006280897 0006280897 The Four Loves \n",
"... ... ... ... \n",
"6805 9788185300535 8185300534 I Am that \n",
"6806 9788185944609 8185944601 Secrets Of The Heart \n",
"6807 9788445074879 8445074873 Fahrenheit 451 \n",
"6808 9789027712059 9027712050 The Berlin Phenomenology \n",
"6809 9789042003408 9042003405 'I'm Telling You Stories' \n",
"\n",
" subtitle \\\n",
"0 NaN \n",
"1 A Novel \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"... ... \n",
"6805 Talks with Sri Nisargadatta Maharaj \n",
"6806 NaN \n",
"6807 NaN \n",
"6808 NaN \n",
"6809 Jeanette Winterson and the Politics of Reading \n",
"\n",
" authors \\\n",
"0 Marilynne Robinson \n",
"1 Charles Osborne;Agatha Christie \n",
"2 Stephen R. Donaldson \n",
"3 Sidney Sheldon \n",
"4 Clive Staples Lewis \n",
"... ... \n",
"6805 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
"6806 Khalil Gibran \n",
"6807 Ray Bradbury \n",
"6808 Georg Wilhelm Friedrich Hegel \n",
"6809 Helena Grice;Tim Woods \n",
"\n",
" categories \\\n",
"0 Fiction \n",
"1 Detective and mystery stories \n",
"2 American fiction \n",
"3 Fiction \n",
"4 Christian life \n",
"... ... \n",
"6805 Philosophy \n",
"6806 Mysticism \n",
"6807 Book burning \n",
"6808 History \n",
"6809 Literary Criticism \n",
"\n",
" thumbnail \\\n",
"0 http://books.google.com/books/content?id=KQZCP... \n",
"1 http://books.google.com/books/content?id=gA5GP... \n",
"2 http://books.google.com/books/content?id=OmQaw... \n",
"3 http://books.google.com/books/content?id=FKo2T... \n",
"4 http://books.google.com/books/content?id=XhQ5X... \n",
"... ... \n",
"6805 http://books.google.com/books/content?id=Fv_JP... \n",
"6806 http://books.google.com/books/content?id=XcrVp... \n",
"6807 NaN \n",
"6808 http://books.google.com/books/content?id=Vy7Sk... \n",
"6809 http://books.google.com/books/content?id=2lVyR... \n",
"\n",
" description published_year \\\n",
"0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n",
"1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n",
"2 Volume Two of Stephen Donaldson's acclaimed se... 1982.0 \n",
"3 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n",
"4 Lewis' work on the nature of love divides love... 2002.0 \n",
"... ... ... \n",
"6805 This collection of the timeless teachings of o... 1999.0 \n",
"6806 NaN 1993.0 \n",
"6807 NaN 2004.0 \n",
"6808 Since the three volume edition ofHegel's Philo... 1981.0 \n",
"6809 This is a jubilant and rewarding collection of... 1998.0 \n",
"\n",
" average_rating num_pages ratings_count \n",
"0 3.85 247.0 361.0 \n",
"1 3.83 241.0 5164.0 \n",
"2 3.97 479.0 172.0 \n",
"3 3.93 512.0 29532.0 \n",
"4 4.15 170.0 33684.0 \n",
"... ... ... ... \n",
"6805 4.51 531.0 104.0 \n",
"6806 4.08 74.0 324.0 \n",
"6807 3.98 186.0 5733.0 \n",
"6808 0.00 210.0 0.0 \n",
"6809 3.70 136.0 10.0 \n",
"\n",
"[6810 rows x 12 columns]"
],
"text/html": [
"\n",
" <div id=\"df-65d5a23c-fc61-4f09-a1fe-41189afea541\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>isbn13</th>\n",
" <th>isbn10</th>\n",
" <th>title</th>\n",
" <th>subtitle</th>\n",
" <th>authors</th>\n",
" <th>categories</th>\n",
" <th>thumbnail</th>\n",
" <th>description</th>\n",
" <th>published_year</th>\n",
" <th>average_rating</th>\n",
" <th>num_pages</th>\n",
" <th>ratings_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9780002005883</td>\n",
" <td>0002005883</td>\n",
" <td>Gilead</td>\n",
" <td>NaN</td>\n",
" <td>Marilynne Robinson</td>\n",
" <td>Fiction</td>\n",
" <td>http://books.google.com/books/content?id=KQZCP...</td>\n",
" <td>A NOVEL THAT READERS and critics have been eag...</td>\n",
" <td>2004.0</td>\n",
" <td>3.85</td>\n",
" <td>247.0</td>\n",
" <td>361.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>9780002261982</td>\n",
" <td>0002261987</td>\n",
" <td>Spider's Web</td>\n",
" <td>A Novel</td>\n",
" <td>Charles Osborne;Agatha Christie</td>\n",
" <td>Detective and mystery stories</td>\n",
" <td>http://books.google.com/books/content?id=gA5GP...</td>\n",
" <td>A new 'Christie for Christmas' -- a full-lengt...</td>\n",
" <td>2000.0</td>\n",
" <td>3.83</td>\n",
" <td>241.0</td>\n",
" <td>5164.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>9780006163831</td>\n",
" <td>0006163831</td>\n",
" <td>The One Tree</td>\n",
" <td>NaN</td>\n",
" <td>Stephen R. Donaldson</td>\n",
" <td>American fiction</td>\n",
" <td>http://books.google.com/books/content?id=OmQaw...</td>\n",
" <td>Volume Two of Stephen Donaldson's acclaimed se...</td>\n",
" <td>1982.0</td>\n",
" <td>3.97</td>\n",
" <td>479.0</td>\n",
" <td>172.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>9780006178736</td>\n",
" <td>0006178731</td>\n",
" <td>Rage of angels</td>\n",
" <td>NaN</td>\n",
" <td>Sidney Sheldon</td>\n",
" <td>Fiction</td>\n",
" <td>http://books.google.com/books/content?id=FKo2T...</td>\n",
" <td>A memorable, mesmerizing heroine Jennifer -- b...</td>\n",
" <td>1993.0</td>\n",
" <td>3.93</td>\n",
" <td>512.0</td>\n",
" <td>29532.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>9780006280897</td>\n",
" <td>0006280897</td>\n",
" <td>The Four Loves</td>\n",
" <td>NaN</td>\n",
" <td>Clive Staples Lewis</td>\n",
" <td>Christian life</td>\n",
" <td>http://books.google.com/books/content?id=XhQ5X...</td>\n",
" <td>Lewis' work on the nature of love divides love...</td>\n",
" <td>2002.0</td>\n",
" <td>4.15</td>\n",
" <td>170.0</td>\n",
" <td>33684.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6805</th>\n",
" <td>9788185300535</td>\n",
" <td>8185300534</td>\n",
" <td>I Am that</td>\n",
" <td>Talks with Sri Nisargadatta Maharaj</td>\n",
" <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
" <td>Philosophy</td>\n",
" <td>http://books.google.com/books/content?id=Fv_JP...</td>\n",
" <td>This collection of the timeless teachings of o...</td>\n",
" <td>1999.0</td>\n",
" <td>4.51</td>\n",
" <td>531.0</td>\n",
" <td>104.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6806</th>\n",
" <td>9788185944609</td>\n",
" <td>8185944601</td>\n",
" <td>Secrets Of The Heart</td>\n",
" <td>NaN</td>\n",
" <td>Khalil Gibran</td>\n",
" <td>Mysticism</td>\n",
" <td>http://books.google.com/books/content?id=XcrVp...</td>\n",
" <td>NaN</td>\n",
" <td>1993.0</td>\n",
" <td>4.08</td>\n",
" <td>74.0</td>\n",
" <td>324.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6807</th>\n",
" <td>9788445074879</td>\n",
" <td>8445074873</td>\n",
" <td>Fahrenheit 451</td>\n",
" <td>NaN</td>\n",
" <td>Ray Bradbury</td>\n",
" <td>Book burning</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2004.0</td>\n",
" <td>3.98</td>\n",
" <td>186.0</td>\n",
" <td>5733.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6808</th>\n",
" <td>9789027712059</td>\n",
" <td>9027712050</td>\n",
" <td>The Berlin Phenomenology</td>\n",
" <td>NaN</td>\n",
" <td>Georg Wilhelm Friedrich Hegel</td>\n",
" <td>History</td>\n",
" <td>http://books.google.com/books/content?id=Vy7Sk...</td>\n",
" <td>Since the three volume edition ofHegel's Philo...</td>\n",
" <td>1981.0</td>\n",
" <td>0.00</td>\n",
" <td>210.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6809</th>\n",
" <td>9789042003408</td>\n",
" <td>9042003405</td>\n",
" <td>'I'm Telling You Stories'</td>\n",
" <td>Jeanette Winterson and the Politics of Reading</td>\n",
" <td>Helena Grice;Tim Woods</td>\n",
" <td>Literary Criticism</td>\n",
" <td>http://books.google.com/books/content?id=2lVyR...</td>\n",
" <td>This is a jubilant and rewarding collection of...</td>\n",
" <td>1998.0</td>\n",
" <td>3.70</td>\n",
" <td>136.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6810 rows × 12 columns</p>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-65d5a23c-fc61-4f09-a1fe-41189afea541')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-65d5a23c-fc61-4f09-a1fe-41189afea541 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-65d5a23c-fc61-4f09-a1fe-41189afea541');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
]
},
"metadata": {},
"execution_count": 16
}
],
"source": [
"import pandas as pd\n",
"books=pd.read_csv('books.csv')\n",
"books"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true,
"id": "WgVroQDTAOq1",
"outputId": "932fdfce-1d65-4290-cc5d-cc053f4fa459"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>isbn13</th>\n",
" <th>isbn10</th>\n",
" <th>title</th>\n",
" <th>subtitle</th>\n",
" <th>authors</th>\n",
" <th>categories</th>\n",
" <th>thumbnail</th>\n",
" <th>description</th>\n",
" <th>published_year</th>\n",
" <th>average_rating</th>\n",
" <th>num_pages</th>\n",
" <th>ratings_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6.810000e+03</td>\n",
" <td>6810</td>\n",
" <td>6810</td>\n",
" <td>2381</td>\n",
" <td>6738</td>\n",
" <td>6711</td>\n",
" <td>6481</td>\n",
" <td>6548</td>\n",
" <td>6804.000000</td>\n",
" <td>6767.000000</td>\n",
" <td>6767.000000</td>\n",
" <td>6.767000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>NaN</td>\n",
" <td>6810</td>\n",
" <td>6398</td>\n",
" <td>2009</td>\n",
" <td>3780</td>\n",
" <td>567</td>\n",
" <td>6481</td>\n",
" <td>6474</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>NaN</td>\n",
" <td>0786282258</td>\n",
" <td>The Lord of the Rings</td>\n",
" <td>A Novel</td>\n",
" <td>Agatha Christie</td>\n",
" <td>Fiction</td>\n",
" <td>http://books.google.com/books/content?id=6dVAW...</td>\n",
" <td>This is a reproduction of the original artefac...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>11</td>\n",
" <td>226</td>\n",
" <td>37</td>\n",
" <td>2588</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>9.780677e+12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1998.630364</td>\n",
" <td>3.933284</td>\n",
" <td>348.181026</td>\n",
" <td>2.106910e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>6.068911e+08</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.484257</td>\n",
" <td>0.331352</td>\n",
" <td>242.376783</td>\n",
" <td>1.376207e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>9.780002e+12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1853.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>9.780330e+12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1996.000000</td>\n",
" <td>3.770000</td>\n",
" <td>208.000000</td>\n",
" <td>1.590000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>9.780553e+12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2002.000000</td>\n",
" <td>3.960000</td>\n",
" <td>304.000000</td>\n",
" <td>1.018000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.780810e+12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2005.000000</td>\n",
" <td>4.130000</td>\n",
" <td>420.000000</td>\n",
" <td>5.992500e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>9.789042e+12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2019.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3342.000000</td>\n",
" <td>5.629932e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" isbn13 isbn10 title subtitle \\\n",
"count 6.810000e+03 6810 6810 2381 \n",
"unique NaN 6810 6398 2009 \n",
"top NaN 0786282258 The Lord of the Rings A Novel \n",
"freq NaN 1 11 226 \n",
"mean 9.780677e+12 NaN NaN NaN \n",
"std 6.068911e+08 NaN NaN NaN \n",
"min 9.780002e+12 NaN NaN NaN \n",
"25% 9.780330e+12 NaN NaN NaN \n",
"50% 9.780553e+12 NaN NaN NaN \n",
"75% 9.780810e+12 NaN NaN NaN \n",
"max 9.789042e+12 NaN NaN NaN \n",
"\n",
" authors categories \\\n",
"count 6738 6711 \n",
"unique 3780 567 \n",
"top Agatha Christie Fiction \n",
"freq 37 2588 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" thumbnail \\\n",
"count 6481 \n",
"unique 6481 \n",
"top http://books.google.com/books/content?id=6dVAW... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" description published_year \\\n",
"count 6548 6804.000000 \n",
"unique 6474 NaN \n",
"top This is a reproduction of the original artefac... NaN \n",
"freq 6 NaN \n",
"mean NaN 1998.630364 \n",
"std NaN 10.484257 \n",
"min NaN 1853.000000 \n",
"25% NaN 1996.000000 \n",
"50% NaN 2002.000000 \n",
"75% NaN 2005.000000 \n",
"max NaN 2019.000000 \n",
"\n",
" average_rating num_pages ratings_count \n",
"count 6767.000000 6767.000000 6.767000e+03 \n",
"unique NaN NaN NaN \n",
"top NaN NaN NaN \n",
"freq NaN NaN NaN \n",
"mean 3.933284 348.181026 2.106910e+04 \n",
"std 0.331352 242.376783 1.376207e+05 \n",
"min 0.000000 0.000000 0.000000e+00 \n",
"25% 3.770000 208.000000 1.590000e+02 \n",
"50% 3.960000 304.000000 1.018000e+03 \n",
"75% 4.130000 420.000000 5.992500e+03 \n",
"max 5.000000 3342.000000 5.629932e+06 "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1hwHH65hAOq1",
"outputId": "0b3e32ab-230b-4d9d-db8a-d2d25e57161b"
},
"outputs": [
{
"data": {
"text/plain": [
"isbn13 0\n",
"isbn10 0\n",
"title 0\n",
"subtitle 4429\n",
"authors 72\n",
"categories 99\n",
"thumbnail 329\n",
"description 262\n",
"published_year 6\n",
"average_rating 43\n",
"num_pages 43\n",
"ratings_count 43\n",
"dtype: int64"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "mZMFUt2pAOq1"
},
"outputs": [],
"source": [
"books.drop('thumbnail', inplace=True, axis=1)\n",
"books.drop('subtitle', inplace=True, axis=1)\n",
"books.drop('description', inplace=True, axis=1)\n",
"books.drop('isbn13', inplace=True, axis=1)\n",
"books.drop('isbn10', inplace=True, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "y6I2PKuhAOq1",
"outputId": "2e03efc3-e8e3-4cc8-abb8-97e0594665be"
},
"outputs": [
{
"data": {
"text/plain": [
"title 0\n",
"authors 72\n",
"categories 99\n",
"published_year 6\n",
"average_rating 43\n",
"num_pages 43\n",
"ratings_count 43\n",
"dtype: int64"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "21R7h40lAOq1",
"outputId": "4c9f746b-4347-4cd3-cdae-21e7bc818f2c"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>authors</th>\n",
" <th>categories</th>\n",
" <th>published_year</th>\n",
" <th>average_rating</th>\n",
" <th>num_pages</th>\n",
" <th>ratings_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Gilead</td>\n",
" <td>Marilynne Robinson</td>\n",
" <td>Fiction</td>\n",
" <td>2004.0</td>\n",
" <td>3.85</td>\n",
" <td>247.0</td>\n",
" <td>361.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Spider's Web</td>\n",
" <td>Charles Osborne;Agatha Christie</td>\n",
" <td>Detective and mystery stories</td>\n",
" <td>2000.0</td>\n",
" <td>3.83</td>\n",
" <td>241.0</td>\n",
" <td>5164.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>The One Tree</td>\n",
" <td>Stephen R. Donaldson</td>\n",
" <td>American fiction</td>\n",
" <td>1982.0</td>\n",
" <td>3.97</td>\n",
" <td>479.0</td>\n",
" <td>172.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Rage of angels</td>\n",
" <td>Sidney Sheldon</td>\n",
" <td>Fiction</td>\n",
" <td>1993.0</td>\n",
" <td>3.93</td>\n",
" <td>512.0</td>\n",
" <td>29532.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>The Four Loves</td>\n",
" <td>Clive Staples Lewis</td>\n",
" <td>Christian life</td>\n",
" <td>2002.0</td>\n",
" <td>4.15</td>\n",
" <td>170.0</td>\n",
" <td>33684.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6805</th>\n",
" <td>I Am that</td>\n",
" <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
" <td>Philosophy</td>\n",
" <td>1999.0</td>\n",
" <td>4.51</td>\n",
" <td>531.0</td>\n",
" <td>104.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6806</th>\n",
" <td>Secrets Of The Heart</td>\n",
" <td>Khalil Gibran</td>\n",
" <td>Mysticism</td>\n",
" <td>1993.0</td>\n",
" <td>4.08</td>\n",
" <td>74.0</td>\n",
" <td>324.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6807</th>\n",
" <td>Fahrenheit 451</td>\n",
" <td>Ray Bradbury</td>\n",
" <td>Book burning</td>\n",
" <td>2004.0</td>\n",
" <td>3.98</td>\n",
" <td>186.0</td>\n",
" <td>5733.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6808</th>\n",
" <td>The Berlin Phenomenology</td>\n",
" <td>Georg Wilhelm Friedrich Hegel</td>\n",
" <td>History</td>\n",
" <td>1981.0</td>\n",
" <td>0.00</td>\n",
" <td>210.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6809</th>\n",
" <td>'I'm Telling You Stories'</td>\n",
" <td>Helena Grice;Tim Woods</td>\n",
" <td>Literary Criticism</td>\n",
" <td>1998.0</td>\n",
" <td>3.70</td>\n",
" <td>136.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6599 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" title authors \\\n",
"0 Gilead Marilynne Robinson \n",
"1 Spider's Web Charles Osborne;Agatha Christie \n",
"2 The One Tree Stephen R. Donaldson \n",
"3 Rage of angels Sidney Sheldon \n",
"4 The Four Loves Clive Staples Lewis \n",
"... ... ... \n",
"6805 I Am that Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
"6806 Secrets Of The Heart Khalil Gibran \n",
"6807 Fahrenheit 451 Ray Bradbury \n",
"6808 The Berlin Phenomenology Georg Wilhelm Friedrich Hegel \n",
"6809 'I'm Telling You Stories' Helena Grice;Tim Woods \n",
"\n",
" categories published_year average_rating \\\n",
"0 Fiction 2004.0 3.85 \n",
"1 Detective and mystery stories 2000.0 3.83 \n",
"2 American fiction 1982.0 3.97 \n",
"3 Fiction 1993.0 3.93 \n",
"4 Christian life 2002.0 4.15 \n",
"... ... ... ... \n",
"6805 Philosophy 1999.0 4.51 \n",
"6806 Mysticism 1993.0 4.08 \n",
"6807 Book burning 2004.0 3.98 \n",
"6808 History 1981.0 0.00 \n",
"6809 Literary Criticism 1998.0 3.70 \n",
"\n",
" num_pages ratings_count \n",
"0 247.0 361.0 \n",
"1 241.0 5164.0 \n",
"2 479.0 172.0 \n",
"3 512.0 29532.0 \n",
"4 170.0 33684.0 \n",
"... ... ... \n",
"6805 531.0 104.0 \n",
"6806 74.0 324.0 \n",
"6807 186.0 5733.0 \n",
"6808 210.0 0.0 \n",
"6809 136.0 10.0 \n",
"\n",
"[6599 rows x 7 columns]"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books.dropna(inplace=True)\n",
"books"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "lx9gqh7UAOq2",
"outputId": "651a374e-eb8c-426f-faa0-c1cd6c9762bb"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>authors</th>\n",
" <th>categories</th>\n",
" <th>published_year</th>\n",
" <th>average_rating</th>\n",
" <th>num_pages</th>\n",
" <th>ratings_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6599</td>\n",
" <td>6599</td>\n",
" <td>6599</td>\n",
" <td>6599.000000</td>\n",
" <td>6599.000000</td>\n",
" <td>6599.000000</td>\n",
" <td>6.599000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>6216</td>\n",
" <td>3728</td>\n",
" <td>563</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>The Lord of the Rings</td>\n",
" <td>Agatha Christie</td>\n",
" <td>Fiction</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>9</td>\n",
" <td>37</td>\n",
" <td>2561</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1998.750417</td>\n",
" <td>3.931367</td>\n",
" <td>348.296863</td>\n",
" <td>2.143083e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.168465</td>\n",
" <td>0.331173</td>\n",
" <td>239.199411</td>\n",
" <td>1.392929e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1876.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1997.000000</td>\n",
" <td>3.770000</td>\n",
" <td>208.000000</td>\n",
" <td>1.630000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2002.000000</td>\n",
" <td>3.950000</td>\n",
" <td>304.000000</td>\n",
" <td>1.032000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2005.000000</td>\n",
" <td>4.130000</td>\n",
" <td>420.000000</td>\n",
" <td>6.105500e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2019.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3342.000000</td>\n",
" <td>5.629932e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title authors categories published_year \\\n",
"count 6599 6599 6599 6599.000000 \n",
"unique 6216 3728 563 NaN \n",
"top The Lord of the Rings Agatha Christie Fiction NaN \n",
"freq 9 37 2561 NaN \n",
"mean NaN NaN NaN 1998.750417 \n",
"std NaN NaN NaN 10.168465 \n",
"min NaN NaN NaN 1876.000000 \n",
"25% NaN NaN NaN 1997.000000 \n",
"50% NaN NaN NaN 2002.000000 \n",
"75% NaN NaN NaN 2005.000000 \n",
"max NaN NaN NaN 2019.000000 \n",
"\n",
" average_rating num_pages ratings_count \n",
"count 6599.000000 6599.000000 6.599000e+03 \n",
"unique NaN NaN NaN \n",
"top NaN NaN NaN \n",
"freq NaN NaN NaN \n",
"mean 3.931367 348.296863 2.143083e+04 \n",
"std 0.331173 239.199411 1.392929e+05 \n",
"min 0.000000 0.000000 0.000000e+00 \n",
"25% 3.770000 208.000000 1.630000e+02 \n",
"50% 3.950000 304.000000 1.032000e+03 \n",
"75% 4.130000 420.000000 6.105500e+03 \n",
"max 5.000000 3342.000000 5.629932e+06 "
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "J7DUOhOwAOq2",
"outputId": "3bce3396-8f22-41a4-ebfb-9895ad2bb73c"
},
"outputs": [
{
"data": {
"text/plain": [
"Fiction 2561\n",
"Juvenile Fiction 524\n",
"Biography & Autobiography 398\n",
"History 261\n",
"Literary Criticism 165\n",
" ... \n",
"Child analysis 1\n",
"Illinois 1\n",
"Erinyes (Greek mythology) 1\n",
"Exorcism 1\n",
"People with social disabilities 1\n",
"Name: categories, Length: 563, dtype: int64"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books[\"categories\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4R3GDLXgAOq2",
"outputId": "4d3a9d8a-f37d-4cba-ebbb-0615571396f4"
},
"outputs": [
{
"data": {
"text/plain": [
"2006.0 877\n",
"2005.0 681\n",
"2004.0 605\n",
"2003.0 569\n",
"2002.0 470\n",
" ... \n",
"1928.0 1\n",
"1904.0 1\n",
"1938.0 1\n",
"1936.0 1\n",
"1947.0 1\n",
"Name: published_year, Length: 91, dtype: int64"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books[\"published_year\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YSLMCB4nAOq2",
"outputId": "ccdb49cc-9037-4995-9d0b-c0a749f6eae1"
},
"outputs": [
{
"data": {
"text/plain": [
"Agatha Christie 37\n",
"Stephen King 36\n",
"William Shakespeare 29\n",
"John Ronald Reuel Tolkien 25\n",
"Sandra Brown 23\n",
" ..\n",
"Aeg 1\n",
"Pauline Reage 1\n",
"Tim Flannery 1\n",
"Saint Augustine (of Hippo) 1\n",
"Michael S. Reynolds 1\n",
"Name: authors, Length: 3728, dtype: int64"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books[\"authors\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "D8HrFKIGAOq3",
"outputId": "20a73c84-1b66-4dd8-fa99-caba6ca68b29"
},
"outputs": [
{
"data": {
"text/plain": [
"4.00 125\n",
"3.93 110\n",
"3.95 109\n",
"3.99 108\n",
"3.96 104\n",
" ... \n",
"4.64 1\n",
"4.68 1\n",
"4.72 1\n",
"2.44 1\n",
"4.78 1\n",
"Name: average_rating, Length: 200, dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books[\"average_rating\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "utiDxb60AOq3"
},
"outputs": [],
"source": [
"import sklearn\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"books_train, books_test = sklearn.model_selection.train_test_split(books, test_size=0.2, random_state=1)\n",
"books_train, books_val = sklearn.model_selection.train_test_split(books_train, test_size=0.5, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rS0epPE6AOq3",
"outputId": "f704dda5-95e7-474b-a9b3-d8e107067710"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>authors</th>\n",
" <th>categories</th>\n",
" <th>published_year</th>\n",
" <th>average_rating</th>\n",
" <th>num_pages</th>\n",
" <th>ratings_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>915</th>\n",
" <td>The Autobiography of Alice B. Toklas</td>\n",
" <td>Gertrude Stein</td>\n",
" <td>Biography &amp; Autobiography</td>\n",
" <td>2001.0</td>\n",
" <td>3.59</td>\n",
" <td>272.0</td>\n",
" <td>233.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4493</th>\n",
" <td>Never Far from Nowhere</td>\n",
" <td>Andrea Levy</td>\n",
" <td>Blacks</td>\n",
" <td>1996.0</td>\n",
" <td>3.68</td>\n",
" <td>282.0</td>\n",
" <td>601.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1983</th>\n",
" <td>Year's Happy Ending</td>\n",
" <td>Betty Neels</td>\n",
" <td>Fiction</td>\n",
" <td>2001.0</td>\n",
" <td>3.95</td>\n",
" <td>216.0</td>\n",
" <td>128.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2196</th>\n",
" <td>Wrinkles in Time</td>\n",
" <td>George Smoot;Keay Davidson</td>\n",
" <td>Science</td>\n",
" <td>1994.0</td>\n",
" <td>3.99</td>\n",
" <td>360.0</td>\n",
" <td>985.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4011</th>\n",
" <td>Dispatches</td>\n",
" <td>Michael Herr</td>\n",
" <td>History</td>\n",
" <td>1991.0</td>\n",
" <td>4.23</td>\n",
" <td>260.0</td>\n",
" <td>12590.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2841</th>\n",
" <td>Magic Bites</td>\n",
" <td>Ilona Andrews</td>\n",
" <td>Fiction</td>\n",
" <td>2007.0</td>\n",
" <td>4.07</td>\n",
" <td>260.0</td>\n",
" <td>82231.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1713</th>\n",
" <td>High Five</td>\n",
" <td>Janet Evanovich</td>\n",
" <td>Bail bond agents</td>\n",
" <td>2000.0</td>\n",
" <td>4.18</td>\n",
" <td>336.0</td>\n",
" <td>99172.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3469</th>\n",
" <td>A Brief History of Time</td>\n",
" <td>Stephen Hawking</td>\n",
" <td>Science</td>\n",
" <td>1998.0</td>\n",
" <td>4.16</td>\n",
" <td>212.0</td>\n",
" <td>214520.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1657</th>\n",
" <td>The Magus</td>\n",
" <td>John Fowles</td>\n",
" <td>Fiction</td>\n",
" <td>2001.0</td>\n",
" <td>4.05</td>\n",
" <td>656.0</td>\n",
" <td>36909.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3986</th>\n",
" <td>The Complete Monty Python's Flying Circus</td>\n",
" <td>Graham Chapman;Monty Python (Comedy troupe);Te...</td>\n",
" <td>Humor</td>\n",
" <td>1989.0</td>\n",
" <td>4.44</td>\n",
" <td>384.0</td>\n",
" <td>1191.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2639 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"915 The Autobiography of Alice B. Toklas \n",
"4493 Never Far from Nowhere \n",
"1983 Year's Happy Ending \n",
"2196 Wrinkles in Time \n",
"4011 Dispatches \n",
"... ... \n",
"2841 Magic Bites \n",
"1713 High Five \n",
"3469 A Brief History of Time \n",
"1657 The Magus \n",
"3986 The Complete Monty Python's Flying Circus \n",
"\n",
" authors \\\n",
"915 Gertrude Stein \n",
"4493 Andrea Levy \n",
"1983 Betty Neels \n",
"2196 George Smoot;Keay Davidson \n",
"4011 Michael Herr \n",
"... ... \n",
"2841 Ilona Andrews \n",
"1713 Janet Evanovich \n",
"3469 Stephen Hawking \n",
"1657 John Fowles \n",
"3986 Graham Chapman;Monty Python (Comedy troupe);Te... \n",
"\n",
" categories published_year average_rating num_pages \\\n",
"915 Biography & Autobiography 2001.0 3.59 272.0 \n",
"4493 Blacks 1996.0 3.68 282.0 \n",
"1983 Fiction 2001.0 3.95 216.0 \n",
"2196 Science 1994.0 3.99 360.0 \n",
"4011 History 1991.0 4.23 260.0 \n",
"... ... ... ... ... \n",
"2841 Fiction 2007.0 4.07 260.0 \n",
"1713 Bail bond agents 2000.0 4.18 336.0 \n",
"3469 Science 1998.0 4.16 212.0 \n",
"1657 Fiction 2001.0 4.05 656.0 \n",
"3986 Humor 1989.0 4.44 384.0 \n",
"\n",
" ratings_count \n",
"915 233.0 \n",
"4493 601.0 \n",
"1983 128.0 \n",
"2196 985.0 \n",
"4011 12590.0 \n",
"... ... \n",
"2841 82231.0 \n",
"1713 99172.0 \n",
"3469 214520.0 \n",
"1657 36909.0 \n",
"3986 1191.0 \n",
"\n",
"[2639 rows x 7 columns]"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oUWEVGaGAOq3",
"outputId": "6a053600-98a9-4990-ae44-cb8eeda97293"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>authors</th>\n",
" <th>categories</th>\n",
" <th>published_year</th>\n",
" <th>average_rating</th>\n",
" <th>num_pages</th>\n",
" <th>ratings_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2639</td>\n",
" <td>2639</td>\n",
" <td>2639</td>\n",
" <td>2639.000000</td>\n",
" <td>2639.000000</td>\n",
" <td>2639.000000</td>\n",
" <td>2.639000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>2547</td>\n",
" <td>1827</td>\n",
" <td>286</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>One Hundred Years of Solitude</td>\n",
" <td>Stephen King</td>\n",
" <td>Fiction</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>4</td>\n",
" <td>18</td>\n",
" <td>1027</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1999.032967</td>\n",
" <td>3.929807</td>\n",
" <td>349.534672</td>\n",
" <td>2.363199e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9.865320</td>\n",
" <td>0.358919</td>\n",
" <td>244.871090</td>\n",
" <td>1.452470e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1876.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1997.000000</td>\n",
" <td>3.770000</td>\n",
" <td>208.000000</td>\n",
" <td>1.745000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2002.000000</td>\n",
" <td>3.950000</td>\n",
" <td>304.000000</td>\n",
" <td>1.066000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2005.000000</td>\n",
" <td>4.130000</td>\n",
" <td>429.000000</td>\n",
" <td>6.084500e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2019.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3020.000000</td>\n",
" <td>4.367341e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title authors categories \\\n",
"count 2639 2639 2639 \n",
"unique 2547 1827 286 \n",
"top One Hundred Years of Solitude Stephen King Fiction \n",
"freq 4 18 1027 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" published_year average_rating num_pages ratings_count \n",
"count 2639.000000 2639.000000 2639.000000 2.639000e+03 \n",
"unique NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN \n",
"mean 1999.032967 3.929807 349.534672 2.363199e+04 \n",
"std 9.865320 0.358919 244.871090 1.452470e+05 \n",
"min 1876.000000 0.000000 0.000000 0.000000e+00 \n",
"25% 1997.000000 3.770000 208.000000 1.745000e+02 \n",
"50% 2002.000000 3.950000 304.000000 1.066000e+03 \n",
"75% 2005.000000 4.130000 429.000000 6.084500e+03 \n",
"max 2019.000000 5.000000 3020.000000 4.367341e+06 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books_train.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "yXkOfB9bAOq3",
"outputId": "3fc9e96e-8fe0-490c-d6b5-71b21277aa0a"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>authors</th>\n",
" <th>categories</th>\n",
" <th>published_year</th>\n",
" <th>average_rating</th>\n",
" <th>num_pages</th>\n",
" <th>ratings_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1320</td>\n",
" <td>1320</td>\n",
" <td>1320</td>\n",
" <td>1320.000000</td>\n",
" <td>1320.000000</td>\n",
" <td>1320.000000</td>\n",
" <td>1.320000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>1303</td>\n",
" <td>1064</td>\n",
" <td>185</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>20,000 Leagues Under the Sea</td>\n",
" <td>Stephen King</td>\n",
" <td>Fiction</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>540</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1998.590909</td>\n",
" <td>3.925470</td>\n",
" <td>339.346970</td>\n",
" <td>1.588767e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.119569</td>\n",
" <td>0.299805</td>\n",
" <td>219.560964</td>\n",
" <td>7.877064e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1942.000000</td>\n",
" <td>2.330000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1996.000000</td>\n",
" <td>3.750000</td>\n",
" <td>208.000000</td>\n",
" <td>1.510000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2002.000000</td>\n",
" <td>3.950000</td>\n",
" <td>304.000000</td>\n",
" <td>1.068000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2005.000000</td>\n",
" <td>4.130000</td>\n",
" <td>401.000000</td>\n",
" <td>6.360000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2017.000000</td>\n",
" <td>5.000000</td>\n",
" <td>3342.000000</td>\n",
" <td>2.115562e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title authors categories published_year \\\n",
"count 1320 1320 1320 1320.000000 \n",
"unique 1303 1064 185 NaN \n",
"top 20,000 Leagues Under the Sea Stephen King Fiction NaN \n",
"freq 3 7 540 NaN \n",
"mean NaN NaN NaN 1998.590909 \n",
"std NaN NaN NaN 10.119569 \n",
"min NaN NaN NaN 1942.000000 \n",
"25% NaN NaN NaN 1996.000000 \n",
"50% NaN NaN NaN 2002.000000 \n",
"75% NaN NaN NaN 2005.000000 \n",
"max NaN NaN NaN 2017.000000 \n",
"\n",
" average_rating num_pages ratings_count \n",
"count 1320.000000 1320.000000 1.320000e+03 \n",
"unique NaN NaN NaN \n",
"top NaN NaN NaN \n",
"freq NaN NaN NaN \n",
"mean 3.925470 339.346970 1.588767e+04 \n",
"std 0.299805 219.560964 7.877064e+04 \n",
"min 2.330000 0.000000 0.000000e+00 \n",
"25% 3.750000 208.000000 1.510000e+02 \n",
"50% 3.950000 304.000000 1.068000e+03 \n",
"75% 4.130000 401.000000 6.360000e+03 \n",
"max 5.000000 3342.000000 2.115562e+06 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books_test.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "CWG6q0ixAOq4",
"outputId": "367a1088-975b-4da2-e333-50152a4fcbc3"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>authors</th>\n",
" <th>categories</th>\n",
" <th>published_year</th>\n",
" <th>average_rating</th>\n",
" <th>num_pages</th>\n",
" <th>ratings_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2640</td>\n",
" <td>2640</td>\n",
" <td>2640</td>\n",
" <td>2640.000000</td>\n",
" <td>2640.000000</td>\n",
" <td>2640.000000</td>\n",
" <td>2.640000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>2562</td>\n",
" <td>1850</td>\n",
" <td>313</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>Three Complete Novels</td>\n",
" <td>Agatha Christie</td>\n",
" <td>Fiction</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>6</td>\n",
" <td>14</td>\n",
" <td>994</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1998.547727</td>\n",
" <td>3.935875</td>\n",
" <td>351.534470</td>\n",
" <td>2.200209e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.483752</td>\n",
" <td>0.316971</td>\n",
" <td>242.829463</td>\n",
" <td>1.558830e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1901.000000</td>\n",
" <td>0.000000</td>\n",
" <td>4.000000</td>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1996.000000</td>\n",
" <td>3.770000</td>\n",
" <td>208.000000</td>\n",
" <td>1.557500e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2002.000000</td>\n",
" <td>3.950000</td>\n",
" <td>309.500000</td>\n",
" <td>9.555000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2005.000000</td>\n",
" <td>4.130000</td>\n",
" <td>430.250000</td>\n",
" <td>5.980750e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2019.000000</td>\n",
" <td>5.000000</td>\n",
" <td>2965.000000</td>\n",
" <td>5.629932e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title authors categories published_year \\\n",
"count 2640 2640 2640 2640.000000 \n",
"unique 2562 1850 313 NaN \n",
"top Three Complete Novels Agatha Christie Fiction NaN \n",
"freq 6 14 994 NaN \n",
"mean NaN NaN NaN 1998.547727 \n",
"std NaN NaN NaN 10.483752 \n",
"min NaN NaN NaN 1901.000000 \n",
"25% NaN NaN NaN 1996.000000 \n",
"50% NaN NaN NaN 2002.000000 \n",
"75% NaN NaN NaN 2005.000000 \n",
"max NaN NaN NaN 2019.000000 \n",
"\n",
" average_rating num_pages ratings_count \n",
"count 2640.000000 2640.000000 2.640000e+03 \n",
"unique NaN NaN NaN \n",
"top NaN NaN NaN \n",
"freq NaN NaN NaN \n",
"mean 3.935875 351.534470 2.200209e+04 \n",
"std 0.316971 242.829463 1.558830e+05 \n",
"min 0.000000 4.000000 0.000000e+00 \n",
"25% 3.770000 208.000000 1.557500e+02 \n",
"50% 3.950000 309.500000 9.555000e+02 \n",
"75% 4.130000 430.250000 5.980750e+03 \n",
"max 5.000000 2965.000000 5.629932e+06 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"books_val.describe(include='all')"
]
}
],
"metadata": {
"author": "Tomasz Ziętkiewicz",
"celltoolbar": "Slideshow",
"email": "tomasz.zietkiewicz@amu.edu.pl",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"lang": "pl",
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"slideshow": {
"slide_type": "slide"
},
"subtitle": "2.Dane[laboratoria]",
"title": "Inżynieria uczenia maszynowego",
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": false,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": false,
"toc_window_display": false
},
"year": "2021",
"colab": {
"provenance": []
}
},
"nbformat": 4,
"nbformat_minor": 0
}