diff --git a/IUM_zadanie.ipynb b/IUM_zadanie.ipynb index 0c37b8b..e4a502e 100644 --- a/IUM_zadanie.ipynb +++ b/IUM_zadanie.ipynb @@ -1,2046 +1,2256 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 41, - "metadata": { + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zPOfPO5LAOqy", + "outputId": "a8846a75-ef0a-4048-8168-f71d79d7b7e8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: kaggle in /usr/local/lib/python3.9/dist-packages (1.5.13)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.27.1)\n", + "Requirement already satisfied: python-slugify in /usr/local/lib/python3.9/dist-packages (from kaggle) (8.0.1)\n", + "Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.16.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from kaggle) (4.65.0)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from kaggle) (2022.12.7)\n", + "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.8.2)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.26.15)\n", + "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.9/dist-packages (from python-slugify->kaggle) (1.3)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (3.4)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (2.0.12)\n", + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.4.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2022.7.1)\n", + "Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.9/dist-packages (from pandas) (1.22.4)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" + ] + } + ], + "source": [ + "!pip install --user kaggle\n", + "!pip install --user pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gc7VHACRAOq0", + "outputId": "20220fe9-e872-451b-f759-b4cfff91bc51" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Traceback (most recent call last):\n", + " File \"/usr/local/bin/kaggle\", line 5, in \n", + " from kaggle.cli import main\n", + " File \"/usr/local/lib/python3.9/dist-packages/kaggle/__init__.py\", line 23, in \n", + " api.authenticate()\n", + " File \"/usr/local/lib/python3.9/dist-packages/kaggle/api/kaggle_api_extended.py\", line 164, in authenticate\n", + " raise IOError('Could not find {}. Make sure it\\'s located in'\n", + "OSError: Could not find kaggle.json. Make sure it's located in /root/.kaggle. Or use the environment method.\n" + ] + } + ], + "source": [ + "!kaggle datasets download -d dylanjcastillo/7k-books-with-metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "utslvpN1AOq0", + "outputId": "dda342a0-18dc-40a7-86bd-b233844c1231", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Archive: 7k-books-with-metadata.zip\n", + " inflating: books.csv \n" + ] + } + ], + "source": [ + "!unzip -o 7k-books-with-metadata.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "k9Q3DwbiAOq0", + "outputId": "ab0a4f14-188b-41d6-c3fe-0553d80aa648", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 676 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " isbn13 isbn10 title \\\n", + "0 9780002005883 0002005883 Gilead \n", + "1 9780002261982 0002261987 Spider's Web \n", + "2 9780006163831 0006163831 The One Tree \n", + "3 9780006178736 0006178731 Rage of angels \n", + "4 9780006280897 0006280897 The Four Loves \n", + "... ... ... ... \n", + "6805 9788185300535 8185300534 I Am that \n", + "6806 9788185944609 8185944601 Secrets Of The Heart \n", + "6807 9788445074879 8445074873 Fahrenheit 451 \n", + "6808 9789027712059 9027712050 The Berlin Phenomenology \n", + "6809 9789042003408 9042003405 'I'm Telling You Stories' \n", + "\n", + " subtitle \\\n", + "0 NaN \n", + "1 A Novel \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "6805 Talks with Sri Nisargadatta Maharaj \n", + "6806 NaN \n", + "6807 NaN \n", + "6808 NaN \n", + "6809 Jeanette Winterson and the Politics of Reading \n", + "\n", + " authors \\\n", + "0 Marilynne Robinson \n", + "1 Charles Osborne;Agatha Christie \n", + "2 Stephen R. Donaldson \n", + "3 Sidney Sheldon \n", + "4 Clive Staples Lewis \n", + "... ... \n", + "6805 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n", + "6806 Khalil Gibran \n", + "6807 Ray Bradbury \n", + "6808 Georg Wilhelm Friedrich Hegel \n", + "6809 Helena Grice;Tim Woods \n", + "\n", + " categories \\\n", + "0 Fiction \n", + "1 Detective and mystery stories \n", + "2 American fiction \n", + "3 Fiction \n", + "4 Christian life \n", + "... ... \n", + "6805 Philosophy \n", + "6806 Mysticism \n", + "6807 Book burning \n", + "6808 History \n", + "6809 Literary Criticism \n", + "\n", + " thumbnail \\\n", + "0 http://books.google.com/books/content?id=KQZCP... \n", + "1 http://books.google.com/books/content?id=gA5GP... \n", + "2 http://books.google.com/books/content?id=OmQaw... \n", + "3 http://books.google.com/books/content?id=FKo2T... \n", + "4 http://books.google.com/books/content?id=XhQ5X... \n", + "... ... \n", + "6805 http://books.google.com/books/content?id=Fv_JP... \n", + "6806 http://books.google.com/books/content?id=XcrVp... \n", + "6807 NaN \n", + "6808 http://books.google.com/books/content?id=Vy7Sk... \n", + "6809 http://books.google.com/books/content?id=2lVyR... \n", + "\n", + " description published_year \\\n", + "0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n", + "1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n", + "2 Volume Two of Stephen Donaldson's acclaimed se... 1982.0 \n", + "3 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n", + "4 Lewis' work on the nature of love divides love... 2002.0 \n", + "... ... ... \n", + "6805 This collection of the timeless teachings of o... 1999.0 \n", + "6806 NaN 1993.0 \n", + "6807 NaN 2004.0 \n", + "6808 Since the three volume edition ofHegel's Philo... 1981.0 \n", + "6809 This is a jubilant and rewarding collection of... 1998.0 \n", + "\n", + " average_rating num_pages ratings_count \n", + "0 3.85 247.0 361.0 \n", + "1 3.83 241.0 5164.0 \n", + "2 3.97 479.0 172.0 \n", + "3 3.93 512.0 29532.0 \n", + "4 4.15 170.0 33684.0 \n", + "... ... ... ... \n", + "6805 4.51 531.0 104.0 \n", + "6806 4.08 74.0 324.0 \n", + "6807 3.98 186.0 5733.0 \n", + "6808 0.00 210.0 0.0 \n", + "6809 3.70 136.0 10.0 \n", + "\n", + "[6810 rows x 12 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
isbn13isbn10titlesubtitleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_count
097800020058830002005883GileadNaNMarilynne RobinsonFictionhttp://books.google.com/books/content?id=KQZCP...A NOVEL THAT READERS and critics have been eag...2004.03.85247.0361.0
197800022619820002261987Spider's WebA NovelCharles Osborne;Agatha ChristieDetective and mystery storieshttp://books.google.com/books/content?id=gA5GP...A new 'Christie for Christmas' -- a full-lengt...2000.03.83241.05164.0
297800061638310006163831The One TreeNaNStephen R. DonaldsonAmerican fictionhttp://books.google.com/books/content?id=OmQaw...Volume Two of Stephen Donaldson's acclaimed se...1982.03.97479.0172.0
397800061787360006178731Rage of angelsNaNSidney SheldonFictionhttp://books.google.com/books/content?id=FKo2T...A memorable, mesmerizing heroine Jennifer -- b...1993.03.93512.029532.0
497800062808970006280897The Four LovesNaNClive Staples LewisChristian lifehttp://books.google.com/books/content?id=XhQ5X...Lewis' work on the nature of love divides love...2002.04.15170.033684.0
.......................................
680597881853005358185300534I Am thatTalks with Sri Nisargadatta MaharajSri Nisargadatta Maharaj;Sudhakar S. DikshitPhilosophyhttp://books.google.com/books/content?id=Fv_JP...This collection of the timeless teachings of o...1999.04.51531.0104.0
680697881859446098185944601Secrets Of The HeartNaNKhalil GibranMysticismhttp://books.google.com/books/content?id=XcrVp...NaN1993.04.0874.0324.0
680797884450748798445074873Fahrenheit 451NaNRay BradburyBook burningNaNNaN2004.03.98186.05733.0
680897890277120599027712050The Berlin PhenomenologyNaNGeorg Wilhelm Friedrich HegelHistoryhttp://books.google.com/books/content?id=Vy7Sk...Since the three volume edition ofHegel's Philo...1981.00.00210.00.0
680997890420034089042003405'I'm Telling You Stories'Jeanette Winterson and the Politics of ReadingHelena Grice;Tim WoodsLiterary Criticismhttp://books.google.com/books/content?id=2lVyR...This is a jubilant and rewarding collection of...1998.03.70136.010.0
\n", + "

6810 rows × 12 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "import pandas as pd\n", + "books=pd.read_csv('books.csv')\n", + "books" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "id": "WgVroQDTAOq1", + "outputId": "932fdfce-1d65-4290-cc5d-cc053f4fa459" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
isbn13isbn10titlesubtitleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_count
count6.810000e+0368106810238167386711648165486804.0000006767.0000006767.0000006.767000e+03
uniqueNaN681063982009378056764816474NaNNaNNaNNaN
topNaN0786282258The Lord of the RingsA NovelAgatha ChristieFictionhttp://books.google.com/books/content?id=6dVAW...This is a reproduction of the original artefac...NaNNaNNaNNaN
freqNaN11122637258816NaNNaNNaNNaN
mean9.780677e+12NaNNaNNaNNaNNaNNaNNaN1998.6303643.933284348.1810262.106910e+04
std6.068911e+08NaNNaNNaNNaNNaNNaNNaN10.4842570.331352242.3767831.376207e+05
min9.780002e+12NaNNaNNaNNaNNaNNaNNaN1853.0000000.0000000.0000000.000000e+00
25%9.780330e+12NaNNaNNaNNaNNaNNaNNaN1996.0000003.770000208.0000001.590000e+02
50%9.780553e+12NaNNaNNaNNaNNaNNaNNaN2002.0000003.960000304.0000001.018000e+03
75%9.780810e+12NaNNaNNaNNaNNaNNaNNaN2005.0000004.130000420.0000005.992500e+03
max9.789042e+12NaNNaNNaNNaNNaNNaNNaN2019.0000005.0000003342.0000005.629932e+06
\n", + "
" + ], + "text/plain": [ + " isbn13 isbn10 title subtitle \\\n", + "count 6.810000e+03 6810 6810 2381 \n", + "unique NaN 6810 6398 2009 \n", + "top NaN 0786282258 The Lord of the Rings A Novel \n", + "freq NaN 1 11 226 \n", + "mean 9.780677e+12 NaN NaN NaN \n", + "std 6.068911e+08 NaN NaN NaN \n", + "min 9.780002e+12 NaN NaN NaN \n", + "25% 9.780330e+12 NaN NaN NaN \n", + "50% 9.780553e+12 NaN NaN NaN \n", + "75% 9.780810e+12 NaN NaN NaN \n", + "max 9.789042e+12 NaN NaN NaN \n", + "\n", + " authors categories \\\n", + "count 6738 6711 \n", + "unique 3780 567 \n", + "top Agatha Christie Fiction \n", + "freq 37 2588 \n", + "mean NaN NaN \n", + "std NaN NaN \n", + "min NaN NaN \n", + "25% NaN NaN \n", + "50% NaN NaN \n", + "75% NaN NaN \n", + "max NaN NaN \n", + "\n", + " thumbnail \\\n", + "count 6481 \n", + "unique 6481 \n", + "top http://books.google.com/books/content?id=6dVAW... \n", + "freq 1 \n", + "mean NaN \n", + "std NaN \n", + "min NaN \n", + "25% NaN \n", + "50% NaN \n", + "75% NaN \n", + "max NaN \n", + "\n", + " description published_year \\\n", + "count 6548 6804.000000 \n", + "unique 6474 NaN \n", + "top This is a reproduction of the original artefac... NaN \n", + "freq 6 NaN \n", + "mean NaN 1998.630364 \n", + "std NaN 10.484257 \n", + "min NaN 1853.000000 \n", + "25% NaN 1996.000000 \n", + "50% NaN 2002.000000 \n", + "75% NaN 2005.000000 \n", + "max NaN 2019.000000 \n", + "\n", + " average_rating num_pages ratings_count \n", + "count 6767.000000 6767.000000 6.767000e+03 \n", + "unique NaN NaN NaN \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean 3.933284 348.181026 2.106910e+04 \n", + "std 0.331352 242.376783 1.376207e+05 \n", + "min 0.000000 0.000000 0.000000e+00 \n", + "25% 3.770000 208.000000 1.590000e+02 \n", + "50% 3.960000 304.000000 1.018000e+03 \n", + "75% 4.130000 420.000000 5.992500e+03 \n", + "max 5.000000 3342.000000 5.629932e+06 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1hwHH65hAOq1", + "outputId": "0b3e32ab-230b-4d9d-db8a-d2d25e57161b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "isbn13 0\n", + "isbn10 0\n", + "title 0\n", + "subtitle 4429\n", + "authors 72\n", + "categories 99\n", + "thumbnail 329\n", + "description 262\n", + "published_year 6\n", + "average_rating 43\n", + "num_pages 43\n", + "ratings_count 43\n", + "dtype: int64" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mZMFUt2pAOq1" + }, + "outputs": [], + "source": [ + "books.drop('thumbnail', inplace=True, axis=1)\n", + "books.drop('subtitle', inplace=True, axis=1)\n", + "books.drop('description', inplace=True, axis=1)\n", + "books.drop('isbn13', inplace=True, axis=1)\n", + "books.drop('isbn10', inplace=True, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y6I2PKuhAOq1", + "outputId": "2e03efc3-e8e3-4cc8-abb8-97e0594665be" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "title 0\n", + "authors 72\n", + "categories 99\n", + "published_year 6\n", + "average_rating 43\n", + "num_pages 43\n", + "ratings_count 43\n", + "dtype: int64" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "21R7h40lAOq1", + "outputId": "4c9f746b-4347-4cd3-cdae-21e7bc818f2c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
0GileadMarilynne RobinsonFiction2004.03.85247.0361.0
1Spider's WebCharles Osborne;Agatha ChristieDetective and mystery stories2000.03.83241.05164.0
2The One TreeStephen R. DonaldsonAmerican fiction1982.03.97479.0172.0
3Rage of angelsSidney SheldonFiction1993.03.93512.029532.0
4The Four LovesClive Staples LewisChristian life2002.04.15170.033684.0
........................
6805I Am thatSri Nisargadatta Maharaj;Sudhakar S. DikshitPhilosophy1999.04.51531.0104.0
6806Secrets Of The HeartKhalil GibranMysticism1993.04.0874.0324.0
6807Fahrenheit 451Ray BradburyBook burning2004.03.98186.05733.0
6808The Berlin PhenomenologyGeorg Wilhelm Friedrich HegelHistory1981.00.00210.00.0
6809'I'm Telling You Stories'Helena Grice;Tim WoodsLiterary Criticism1998.03.70136.010.0
\n", + "

6599 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " title authors \\\n", + "0 Gilead Marilynne Robinson \n", + "1 Spider's Web Charles Osborne;Agatha Christie \n", + "2 The One Tree Stephen R. Donaldson \n", + "3 Rage of angels Sidney Sheldon \n", + "4 The Four Loves Clive Staples Lewis \n", + "... ... ... \n", + "6805 I Am that Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n", + "6806 Secrets Of The Heart Khalil Gibran \n", + "6807 Fahrenheit 451 Ray Bradbury \n", + "6808 The Berlin Phenomenology Georg Wilhelm Friedrich Hegel \n", + "6809 'I'm Telling You Stories' Helena Grice;Tim Woods \n", + "\n", + " categories published_year average_rating \\\n", + "0 Fiction 2004.0 3.85 \n", + "1 Detective and mystery stories 2000.0 3.83 \n", + "2 American fiction 1982.0 3.97 \n", + "3 Fiction 1993.0 3.93 \n", + "4 Christian life 2002.0 4.15 \n", + "... ... ... ... \n", + "6805 Philosophy 1999.0 4.51 \n", + "6806 Mysticism 1993.0 4.08 \n", + "6807 Book burning 2004.0 3.98 \n", + "6808 History 1981.0 0.00 \n", + "6809 Literary Criticism 1998.0 3.70 \n", + "\n", + " num_pages ratings_count \n", + "0 247.0 361.0 \n", + "1 241.0 5164.0 \n", + "2 479.0 172.0 \n", + "3 512.0 29532.0 \n", + "4 170.0 33684.0 \n", + "... ... ... \n", + "6805 531.0 104.0 \n", + "6806 74.0 324.0 \n", + "6807 186.0 5733.0 \n", + "6808 210.0 0.0 \n", + "6809 136.0 10.0 \n", + "\n", + "[6599 rows x 7 columns]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books.dropna(inplace=True)\n", + "books" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lx9gqh7UAOq2", + "outputId": "651a374e-eb8c-426f-faa0-c1cd6c9762bb" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
count6599659965996599.0000006599.0000006599.0000006.599000e+03
unique62163728563NaNNaNNaNNaN
topThe Lord of the RingsAgatha ChristieFictionNaNNaNNaNNaN
freq9372561NaNNaNNaNNaN
meanNaNNaNNaN1998.7504173.931367348.2968632.143083e+04
stdNaNNaNNaN10.1684650.331173239.1994111.392929e+05
minNaNNaNNaN1876.0000000.0000000.0000000.000000e+00
25%NaNNaNNaN1997.0000003.770000208.0000001.630000e+02
50%NaNNaNNaN2002.0000003.950000304.0000001.032000e+03
75%NaNNaNNaN2005.0000004.130000420.0000006.105500e+03
maxNaNNaNNaN2019.0000005.0000003342.0000005.629932e+06
\n", + "
" + ], + "text/plain": [ + " title authors categories published_year \\\n", + "count 6599 6599 6599 6599.000000 \n", + "unique 6216 3728 563 NaN \n", + "top The Lord of the Rings Agatha Christie Fiction NaN \n", + "freq 9 37 2561 NaN \n", + "mean NaN NaN NaN 1998.750417 \n", + "std NaN NaN NaN 10.168465 \n", + "min NaN NaN NaN 1876.000000 \n", + "25% NaN NaN NaN 1997.000000 \n", + "50% NaN NaN NaN 2002.000000 \n", + "75% NaN NaN NaN 2005.000000 \n", + "max NaN NaN NaN 2019.000000 \n", + "\n", + " average_rating num_pages ratings_count \n", + "count 6599.000000 6599.000000 6.599000e+03 \n", + "unique NaN NaN NaN \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean 3.931367 348.296863 2.143083e+04 \n", + "std 0.331173 239.199411 1.392929e+05 \n", + "min 0.000000 0.000000 0.000000e+00 \n", + "25% 3.770000 208.000000 1.630000e+02 \n", + "50% 3.950000 304.000000 1.032000e+03 \n", + "75% 4.130000 420.000000 6.105500e+03 \n", + "max 5.000000 3342.000000 5.629932e+06 " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J7DUOhOwAOq2", + "outputId": "3bce3396-8f22-41a4-ebfb-9895ad2bb73c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Fiction 2561\n", + "Juvenile Fiction 524\n", + "Biography & Autobiography 398\n", + "History 261\n", + "Literary Criticism 165\n", + " ... \n", + "Child analysis 1\n", + "Illinois 1\n", + "Erinyes (Greek mythology) 1\n", + "Exorcism 1\n", + "People with social disabilities 1\n", + "Name: categories, Length: 563, dtype: int64" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books[\"categories\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4R3GDLXgAOq2", + "outputId": "4d3a9d8a-f37d-4cba-ebbb-0615571396f4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2006.0 877\n", + "2005.0 681\n", + "2004.0 605\n", + "2003.0 569\n", + "2002.0 470\n", + " ... \n", + "1928.0 1\n", + "1904.0 1\n", + "1938.0 1\n", + "1936.0 1\n", + "1947.0 1\n", + "Name: published_year, Length: 91, dtype: int64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books[\"published_year\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YSLMCB4nAOq2", + "outputId": "ccdb49cc-9037-4995-9d0b-c0a749f6eae1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Agatha Christie 37\n", + "Stephen King 36\n", + "William Shakespeare 29\n", + "John Ronald Reuel Tolkien 25\n", + "Sandra Brown 23\n", + " ..\n", + "Aeg 1\n", + "Pauline Reage 1\n", + "Tim Flannery 1\n", + "Saint Augustine (of Hippo) 1\n", + "Michael S. Reynolds 1\n", + "Name: authors, Length: 3728, dtype: int64" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books[\"authors\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D8HrFKIGAOq3", + "outputId": "20a73c84-1b66-4dd8-fa99-caba6ca68b29" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "4.00 125\n", + "3.93 110\n", + "3.95 109\n", + "3.99 108\n", + "3.96 104\n", + " ... \n", + "4.64 1\n", + "4.68 1\n", + "4.72 1\n", + "2.44 1\n", + "4.78 1\n", + "Name: average_rating, Length: 200, dtype: int64" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books[\"average_rating\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "utiDxb60AOq3" + }, + "outputs": [], + "source": [ + "import sklearn\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "books_train, books_test = sklearn.model_selection.train_test_split(books, test_size=0.2, random_state=1)\n", + "books_train, books_val = sklearn.model_selection.train_test_split(books_train, test_size=0.5, random_state=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rS0epPE6AOq3", + "outputId": "f704dda5-95e7-474b-a9b3-d8e107067710" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
915The Autobiography of Alice B. ToklasGertrude SteinBiography & Autobiography2001.03.59272.0233.0
4493Never Far from NowhereAndrea LevyBlacks1996.03.68282.0601.0
1983Year's Happy EndingBetty NeelsFiction2001.03.95216.0128.0
2196Wrinkles in TimeGeorge Smoot;Keay DavidsonScience1994.03.99360.0985.0
4011DispatchesMichael HerrHistory1991.04.23260.012590.0
........................
2841Magic BitesIlona AndrewsFiction2007.04.07260.082231.0
1713High FiveJanet EvanovichBail bond agents2000.04.18336.099172.0
3469A Brief History of TimeStephen HawkingScience1998.04.16212.0214520.0
1657The MagusJohn FowlesFiction2001.04.05656.036909.0
3986The Complete Monty Python's Flying CircusGraham Chapman;Monty Python (Comedy troupe);Te...Humor1989.04.44384.01191.0
\n", + "

2639 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " title \\\n", + "915 The Autobiography of Alice B. Toklas \n", + "4493 Never Far from Nowhere \n", + "1983 Year's Happy Ending \n", + "2196 Wrinkles in Time \n", + "4011 Dispatches \n", + "... ... \n", + "2841 Magic Bites \n", + "1713 High Five \n", + "3469 A Brief History of Time \n", + "1657 The Magus \n", + "3986 The Complete Monty Python's Flying Circus \n", + "\n", + " authors \\\n", + "915 Gertrude Stein \n", + "4493 Andrea Levy \n", + "1983 Betty Neels \n", + "2196 George Smoot;Keay Davidson \n", + "4011 Michael Herr \n", + "... ... \n", + "2841 Ilona Andrews \n", + "1713 Janet Evanovich \n", + "3469 Stephen Hawking \n", + "1657 John Fowles \n", + "3986 Graham Chapman;Monty Python (Comedy troupe);Te... \n", + "\n", + " categories published_year average_rating num_pages \\\n", + "915 Biography & Autobiography 2001.0 3.59 272.0 \n", + "4493 Blacks 1996.0 3.68 282.0 \n", + "1983 Fiction 2001.0 3.95 216.0 \n", + "2196 Science 1994.0 3.99 360.0 \n", + "4011 History 1991.0 4.23 260.0 \n", + "... ... ... ... ... \n", + "2841 Fiction 2007.0 4.07 260.0 \n", + "1713 Bail bond agents 2000.0 4.18 336.0 \n", + "3469 Science 1998.0 4.16 212.0 \n", + "1657 Fiction 2001.0 4.05 656.0 \n", + "3986 Humor 1989.0 4.44 384.0 \n", + "\n", + " ratings_count \n", + "915 233.0 \n", + "4493 601.0 \n", + "1983 128.0 \n", + "2196 985.0 \n", + "4011 12590.0 \n", + "... ... \n", + "2841 82231.0 \n", + "1713 99172.0 \n", + "3469 214520.0 \n", + "1657 36909.0 \n", + "3986 1191.0 \n", + "\n", + "[2639 rows x 7 columns]" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books_train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oUWEVGaGAOq3", + "outputId": "6a053600-98a9-4990-ae44-cb8eeda97293" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
count2639263926392639.0000002639.0000002639.0000002.639000e+03
unique25471827286NaNNaNNaNNaN
topOne Hundred Years of SolitudeStephen KingFictionNaNNaNNaNNaN
freq4181027NaNNaNNaNNaN
meanNaNNaNNaN1999.0329673.929807349.5346722.363199e+04
stdNaNNaNNaN9.8653200.358919244.8710901.452470e+05
minNaNNaNNaN1876.0000000.0000000.0000000.000000e+00
25%NaNNaNNaN1997.0000003.770000208.0000001.745000e+02
50%NaNNaNNaN2002.0000003.950000304.0000001.066000e+03
75%NaNNaNNaN2005.0000004.130000429.0000006.084500e+03
maxNaNNaNNaN2019.0000005.0000003020.0000004.367341e+06
\n", + "
" + ], + "text/plain": [ + " title authors categories \\\n", + "count 2639 2639 2639 \n", + "unique 2547 1827 286 \n", + "top One Hundred Years of Solitude Stephen King Fiction \n", + "freq 4 18 1027 \n", + "mean NaN NaN NaN \n", + "std NaN NaN NaN \n", + "min NaN NaN NaN \n", + "25% NaN NaN NaN \n", + "50% NaN NaN NaN \n", + "75% NaN NaN NaN \n", + "max NaN NaN NaN \n", + "\n", + " published_year average_rating num_pages ratings_count \n", + "count 2639.000000 2639.000000 2639.000000 2.639000e+03 \n", + "unique NaN NaN NaN NaN \n", + "top NaN NaN NaN NaN \n", + "freq NaN NaN NaN NaN \n", + "mean 1999.032967 3.929807 349.534672 2.363199e+04 \n", + "std 9.865320 0.358919 244.871090 1.452470e+05 \n", + "min 1876.000000 0.000000 0.000000 0.000000e+00 \n", + "25% 1997.000000 3.770000 208.000000 1.745000e+02 \n", + "50% 2002.000000 3.950000 304.000000 1.066000e+03 \n", + "75% 2005.000000 4.130000 429.000000 6.084500e+03 \n", + "max 2019.000000 5.000000 3020.000000 4.367341e+06 " + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books_train.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yXkOfB9bAOq3", + "outputId": "3fc9e96e-8fe0-490c-d6b5-71b21277aa0a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
count1320132013201320.0000001320.0000001320.0000001.320000e+03
unique13031064185NaNNaNNaNNaN
top20,000 Leagues Under the SeaStephen KingFictionNaNNaNNaNNaN
freq37540NaNNaNNaNNaN
meanNaNNaNNaN1998.5909093.925470339.3469701.588767e+04
stdNaNNaNNaN10.1195690.299805219.5609647.877064e+04
minNaNNaNNaN1942.0000002.3300000.0000000.000000e+00
25%NaNNaNNaN1996.0000003.750000208.0000001.510000e+02
50%NaNNaNNaN2002.0000003.950000304.0000001.068000e+03
75%NaNNaNNaN2005.0000004.130000401.0000006.360000e+03
maxNaNNaNNaN2017.0000005.0000003342.0000002.115562e+06
\n", + "
" + ], + "text/plain": [ + " title authors categories published_year \\\n", + "count 1320 1320 1320 1320.000000 \n", + "unique 1303 1064 185 NaN \n", + "top 20,000 Leagues Under the Sea Stephen King Fiction NaN \n", + "freq 3 7 540 NaN \n", + "mean NaN NaN NaN 1998.590909 \n", + "std NaN NaN NaN 10.119569 \n", + "min NaN NaN NaN 1942.000000 \n", + "25% NaN NaN NaN 1996.000000 \n", + "50% NaN NaN NaN 2002.000000 \n", + "75% NaN NaN NaN 2005.000000 \n", + "max NaN NaN NaN 2017.000000 \n", + "\n", + " average_rating num_pages ratings_count \n", + "count 1320.000000 1320.000000 1.320000e+03 \n", + "unique NaN NaN NaN \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean 3.925470 339.346970 1.588767e+04 \n", + "std 0.299805 219.560964 7.877064e+04 \n", + "min 2.330000 0.000000 0.000000e+00 \n", + "25% 3.750000 208.000000 1.510000e+02 \n", + "50% 3.950000 304.000000 1.068000e+03 \n", + "75% 4.130000 401.000000 6.360000e+03 \n", + "max 5.000000 3342.000000 2.115562e+06 " + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books_test.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CWG6q0ixAOq4", + "outputId": "367a1088-975b-4da2-e333-50152a4fcbc3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
count2640264026402640.0000002640.0000002640.0000002.640000e+03
unique25621850313NaNNaNNaNNaN
topThree Complete NovelsAgatha ChristieFictionNaNNaNNaNNaN
freq614994NaNNaNNaNNaN
meanNaNNaNNaN1998.5477273.935875351.5344702.200209e+04
stdNaNNaNNaN10.4837520.316971242.8294631.558830e+05
minNaNNaNNaN1901.0000000.0000004.0000000.000000e+00
25%NaNNaNNaN1996.0000003.770000208.0000001.557500e+02
50%NaNNaNNaN2002.0000003.950000309.5000009.555000e+02
75%NaNNaNNaN2005.0000004.130000430.2500005.980750e+03
maxNaNNaNNaN2019.0000005.0000002965.0000005.629932e+06
\n", + "
" + ], + "text/plain": [ + " title authors categories published_year \\\n", + "count 2640 2640 2640 2640.000000 \n", + "unique 2562 1850 313 NaN \n", + "top Three Complete Novels Agatha Christie Fiction NaN \n", + "freq 6 14 994 NaN \n", + "mean NaN NaN NaN 1998.547727 \n", + "std NaN NaN NaN 10.483752 \n", + "min NaN NaN NaN 1901.000000 \n", + "25% NaN NaN NaN 1996.000000 \n", + "50% NaN NaN NaN 2002.000000 \n", + "75% NaN NaN NaN 2005.000000 \n", + "max NaN NaN NaN 2019.000000 \n", + "\n", + " average_rating num_pages ratings_count \n", + "count 2640.000000 2640.000000 2.640000e+03 \n", + "unique NaN NaN NaN \n", + "top NaN NaN NaN \n", + "freq NaN NaN NaN \n", + "mean 3.935875 351.534470 2.200209e+04 \n", + "std 0.316971 242.829463 1.558830e+05 \n", + "min 0.000000 4.000000 0.000000e+00 \n", + "25% 3.770000 208.000000 1.557500e+02 \n", + "50% 3.950000 309.500000 9.555000e+02 \n", + "75% 4.130000 430.250000 5.980750e+03 \n", + "max 5.000000 2965.000000 5.629932e+06 " + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "books_val.describe(include='all')" + ] + } + ], + "metadata": { + "author": "Tomasz Ziętkiewicz", + "celltoolbar": "Slideshow", + "email": "tomasz.zietkiewicz@amu.edu.pl", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "lang": "pl", + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + }, "slideshow": { - "slide_type": "slide" + "slide_type": "slide" + }, + "subtitle": "2.Dane[laboratoria]", + "title": "Inżynieria uczenia maszynowego", + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": false, + "toc_window_display": false + }, + "year": "2021", + "colab": { + "provenance": [] } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
isbn13isbn10titlesubtitleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_count
097800020058830002005883GileadNaNMarilynne RobinsonFictionhttp://books.google.com/books/content?id=KQZCP...A NOVEL THAT READERS and critics have been eag...2004.03.85247.0361.0
197800022619820002261987Spider's WebA NovelCharles Osborne;Agatha ChristieDetective and mystery storieshttp://books.google.com/books/content?id=gA5GP...A new 'Christie for Christmas' -- a full-lengt...2000.03.83241.05164.0
297800061638310006163831The One TreeNaNStephen R. DonaldsonAmerican fictionhttp://books.google.com/books/content?id=OmQaw...Volume Two of Stephen Donaldson's acclaimed se...1982.03.97479.0172.0
397800061787360006178731Rage of angelsNaNSidney SheldonFictionhttp://books.google.com/books/content?id=FKo2T...A memorable, mesmerizing heroine Jennifer -- b...1993.03.93512.029532.0
497800062808970006280897The Four LovesNaNClive Staples LewisChristian lifehttp://books.google.com/books/content?id=XhQ5X...Lewis' work on the nature of love divides love...2002.04.15170.033684.0
.......................................
680597881853005358185300534I Am thatTalks with Sri Nisargadatta MaharajSri Nisargadatta Maharaj;Sudhakar S. DikshitPhilosophyhttp://books.google.com/books/content?id=Fv_JP...This collection of the timeless teachings of o...1999.04.51531.0104.0
680697881859446098185944601Secrets Of The HeartNaNKhalil GibranMysticismhttp://books.google.com/books/content?id=XcrVp...NaN1993.04.0874.0324.0
680797884450748798445074873Fahrenheit 451NaNRay BradburyBook burningNaNNaN2004.03.98186.05733.0
680897890277120599027712050The Berlin PhenomenologyNaNGeorg Wilhelm Friedrich HegelHistoryhttp://books.google.com/books/content?id=Vy7Sk...Since the three volume edition ofHegel's Philo...1981.00.00210.00.0
680997890420034089042003405'I'm Telling You Stories'Jeanette Winterson and the Politics of ReadingHelena Grice;Tim WoodsLiterary Criticismhttp://books.google.com/books/content?id=2lVyR...This is a jubilant and rewarding collection of...1998.03.70136.010.0
\n", - "

6810 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " isbn13 isbn10 title \\\n", - "0 9780002005883 0002005883 Gilead \n", - "1 9780002261982 0002261987 Spider's Web \n", - "2 9780006163831 0006163831 The One Tree \n", - "3 9780006178736 0006178731 Rage of angels \n", - "4 9780006280897 0006280897 The Four Loves \n", - "... ... ... ... \n", - "6805 9788185300535 8185300534 I Am that \n", - "6806 9788185944609 8185944601 Secrets Of The Heart \n", - "6807 9788445074879 8445074873 Fahrenheit 451 \n", - "6808 9789027712059 9027712050 The Berlin Phenomenology \n", - "6809 9789042003408 9042003405 'I'm Telling You Stories' \n", - "\n", - " subtitle \\\n", - "0 NaN \n", - "1 A Novel \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "... ... \n", - "6805 Talks with Sri Nisargadatta Maharaj \n", - "6806 NaN \n", - "6807 NaN \n", - "6808 NaN \n", - "6809 Jeanette Winterson and the Politics of Reading \n", - "\n", - " authors \\\n", - "0 Marilynne Robinson \n", - "1 Charles Osborne;Agatha Christie \n", - "2 Stephen R. Donaldson \n", - "3 Sidney Sheldon \n", - "4 Clive Staples Lewis \n", - "... ... \n", - "6805 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n", - "6806 Khalil Gibran \n", - "6807 Ray Bradbury \n", - "6808 Georg Wilhelm Friedrich Hegel \n", - "6809 Helena Grice;Tim Woods \n", - "\n", - " categories \\\n", - "0 Fiction \n", - "1 Detective and mystery stories \n", - "2 American fiction \n", - "3 Fiction \n", - "4 Christian life \n", - "... ... \n", - "6805 Philosophy \n", - "6806 Mysticism \n", - "6807 Book burning \n", - "6808 History \n", - "6809 Literary Criticism \n", - "\n", - " thumbnail \\\n", - "0 http://books.google.com/books/content?id=KQZCP... \n", - "1 http://books.google.com/books/content?id=gA5GP... \n", - "2 http://books.google.com/books/content?id=OmQaw... \n", - "3 http://books.google.com/books/content?id=FKo2T... \n", - "4 http://books.google.com/books/content?id=XhQ5X... \n", - "... ... \n", - "6805 http://books.google.com/books/content?id=Fv_JP... \n", - "6806 http://books.google.com/books/content?id=XcrVp... \n", - "6807 NaN \n", - "6808 http://books.google.com/books/content?id=Vy7Sk... \n", - "6809 http://books.google.com/books/content?id=2lVyR... \n", - "\n", - " description published_year \\\n", - "0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n", - "1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n", - "2 Volume Two of Stephen Donaldson's acclaimed se... 1982.0 \n", - "3 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n", - "4 Lewis' work on the nature of love divides love... 2002.0 \n", - "... ... ... \n", - "6805 This collection of the timeless teachings of o... 1999.0 \n", - "6806 NaN 1993.0 \n", - "6807 NaN 2004.0 \n", - "6808 Since the three volume edition ofHegel's Philo... 1981.0 \n", - "6809 This is a jubilant and rewarding collection of... 1998.0 \n", - "\n", - " average_rating num_pages ratings_count \n", - "0 3.85 247.0 361.0 \n", - "1 3.83 241.0 5164.0 \n", - "2 3.97 479.0 172.0 \n", - "3 3.93 512.0 29532.0 \n", - "4 4.15 170.0 33684.0 \n", - "... ... ... ... \n", - "6805 4.51 531.0 104.0 \n", - "6806 4.08 74.0 324.0 \n", - "6807 3.98 186.0 5733.0 \n", - "6808 0.00 210.0 0.0 \n", - "6809 3.70 136.0 10.0 \n", - "\n", - "[6810 rows x 12 columns]" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "books=pd.read_csv('books.csv')\n", - "books" - ] }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "scrolled": true, - "slideshow": { - "slide_type": "slide" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
isbn13isbn10titlesubtitleauthorscategoriesthumbnaildescriptionpublished_yearaverage_ratingnum_pagesratings_count
count6.810000e+0368106810238167386711648165486804.0000006767.0000006767.0000006.767000e+03
uniqueNaN681063982009378056764816474NaNNaNNaNNaN
topNaN0786282258The Lord of the RingsA NovelAgatha ChristieFictionhttp://books.google.com/books/content?id=6dVAW...This is a reproduction of the original artefac...NaNNaNNaNNaN
freqNaN11122637258816NaNNaNNaNNaN
mean9.780677e+12NaNNaNNaNNaNNaNNaNNaN1998.6303643.933284348.1810262.106910e+04
std6.068911e+08NaNNaNNaNNaNNaNNaNNaN10.4842570.331352242.3767831.376207e+05
min9.780002e+12NaNNaNNaNNaNNaNNaNNaN1853.0000000.0000000.0000000.000000e+00
25%9.780330e+12NaNNaNNaNNaNNaNNaNNaN1996.0000003.770000208.0000001.590000e+02
50%9.780553e+12NaNNaNNaNNaNNaNNaNNaN2002.0000003.960000304.0000001.018000e+03
75%9.780810e+12NaNNaNNaNNaNNaNNaNNaN2005.0000004.130000420.0000005.992500e+03
max9.789042e+12NaNNaNNaNNaNNaNNaNNaN2019.0000005.0000003342.0000005.629932e+06
\n", - "
" - ], - "text/plain": [ - " isbn13 isbn10 title subtitle \\\n", - "count 6.810000e+03 6810 6810 2381 \n", - "unique NaN 6810 6398 2009 \n", - "top NaN 0786282258 The Lord of the Rings A Novel \n", - "freq NaN 1 11 226 \n", - "mean 9.780677e+12 NaN NaN NaN \n", - "std 6.068911e+08 NaN NaN NaN \n", - "min 9.780002e+12 NaN NaN NaN \n", - "25% 9.780330e+12 NaN NaN NaN \n", - "50% 9.780553e+12 NaN NaN NaN \n", - "75% 9.780810e+12 NaN NaN NaN \n", - "max 9.789042e+12 NaN NaN NaN \n", - "\n", - " authors categories \\\n", - "count 6738 6711 \n", - "unique 3780 567 \n", - "top Agatha Christie Fiction \n", - "freq 37 2588 \n", - "mean NaN NaN \n", - "std NaN NaN \n", - "min NaN NaN \n", - "25% NaN NaN \n", - "50% NaN NaN \n", - "75% NaN NaN \n", - "max NaN NaN \n", - "\n", - " thumbnail \\\n", - "count 6481 \n", - "unique 6481 \n", - "top http://books.google.com/books/content?id=6dVAW... \n", - "freq 1 \n", - "mean NaN \n", - "std NaN \n", - "min NaN \n", - "25% NaN \n", - "50% NaN \n", - "75% NaN \n", - "max NaN \n", - "\n", - " description published_year \\\n", - "count 6548 6804.000000 \n", - "unique 6474 NaN \n", - "top This is a reproduction of the original artefac... NaN \n", - "freq 6 NaN \n", - "mean NaN 1998.630364 \n", - "std NaN 10.484257 \n", - "min NaN 1853.000000 \n", - "25% NaN 1996.000000 \n", - "50% NaN 2002.000000 \n", - "75% NaN 2005.000000 \n", - "max NaN 2019.000000 \n", - "\n", - " average_rating num_pages ratings_count \n", - "count 6767.000000 6767.000000 6.767000e+03 \n", - "unique NaN NaN NaN \n", - "top NaN NaN NaN \n", - "freq NaN NaN NaN \n", - "mean 3.933284 348.181026 2.106910e+04 \n", - "std 0.331352 242.376783 1.376207e+05 \n", - "min 0.000000 0.000000 0.000000e+00 \n", - "25% 3.770000 208.000000 1.590000e+02 \n", - "50% 3.960000 304.000000 1.018000e+03 \n", - "75% 4.130000 420.000000 5.992500e+03 \n", - "max 5.000000 3342.000000 5.629932e+06 " - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books.describe(include='all')" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "scrolled": false, - "slideshow": { - "slide_type": "slide" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "isbn13 0\n", - "isbn10 0\n", - "title 0\n", - "subtitle 4429\n", - "authors 72\n", - "categories 99\n", - "thumbnail 329\n", - "description 262\n", - "published_year 6\n", - "average_rating 43\n", - "num_pages 43\n", - "ratings_count 43\n", - "dtype: int64" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books.isnull().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "books.drop('thumbnail', inplace=True, axis=1)\n", - "books.drop('subtitle', inplace=True, axis=1)\n", - "books.drop('description', inplace=True, axis=1)\n", - "books.drop('isbn13', inplace=True, axis=1)\n", - "books.drop('isbn10', inplace=True, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "title 0\n", - "authors 72\n", - "categories 99\n", - "published_year 6\n", - "average_rating 43\n", - "num_pages 43\n", - "ratings_count 43\n", - "dtype: int64" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books.isnull().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
0GileadMarilynne RobinsonFiction2004.03.85247.0361.0
1Spider's WebCharles Osborne;Agatha ChristieDetective and mystery stories2000.03.83241.05164.0
2The One TreeStephen R. DonaldsonAmerican fiction1982.03.97479.0172.0
3Rage of angelsSidney SheldonFiction1993.03.93512.029532.0
4The Four LovesClive Staples LewisChristian life2002.04.15170.033684.0
........................
6805I Am thatSri Nisargadatta Maharaj;Sudhakar S. DikshitPhilosophy1999.04.51531.0104.0
6806Secrets Of The HeartKhalil GibranMysticism1993.04.0874.0324.0
6807Fahrenheit 451Ray BradburyBook burning2004.03.98186.05733.0
6808The Berlin PhenomenologyGeorg Wilhelm Friedrich HegelHistory1981.00.00210.00.0
6809'I'm Telling You Stories'Helena Grice;Tim WoodsLiterary Criticism1998.03.70136.010.0
\n", - "

6599 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " title authors \\\n", - "0 Gilead Marilynne Robinson \n", - "1 Spider's Web Charles Osborne;Agatha Christie \n", - "2 The One Tree Stephen R. Donaldson \n", - "3 Rage of angels Sidney Sheldon \n", - "4 The Four Loves Clive Staples Lewis \n", - "... ... ... \n", - "6805 I Am that Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n", - "6806 Secrets Of The Heart Khalil Gibran \n", - "6807 Fahrenheit 451 Ray Bradbury \n", - "6808 The Berlin Phenomenology Georg Wilhelm Friedrich Hegel \n", - "6809 'I'm Telling You Stories' Helena Grice;Tim Woods \n", - "\n", - " categories published_year average_rating \\\n", - "0 Fiction 2004.0 3.85 \n", - "1 Detective and mystery stories 2000.0 3.83 \n", - "2 American fiction 1982.0 3.97 \n", - "3 Fiction 1993.0 3.93 \n", - "4 Christian life 2002.0 4.15 \n", - "... ... ... ... \n", - "6805 Philosophy 1999.0 4.51 \n", - "6806 Mysticism 1993.0 4.08 \n", - "6807 Book burning 2004.0 3.98 \n", - "6808 History 1981.0 0.00 \n", - "6809 Literary Criticism 1998.0 3.70 \n", - "\n", - " num_pages ratings_count \n", - "0 247.0 361.0 \n", - "1 241.0 5164.0 \n", - "2 479.0 172.0 \n", - "3 512.0 29532.0 \n", - "4 170.0 33684.0 \n", - "... ... ... \n", - "6805 531.0 104.0 \n", - "6806 74.0 324.0 \n", - "6807 186.0 5733.0 \n", - "6808 210.0 0.0 \n", - "6809 136.0 10.0 \n", - "\n", - "[6599 rows x 7 columns]" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books.dropna(inplace=True)\n", - "books" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
count6599659965996599.0000006599.0000006599.0000006.599000e+03
unique62163728563NaNNaNNaNNaN
topThe Lord of the RingsAgatha ChristieFictionNaNNaNNaNNaN
freq9372561NaNNaNNaNNaN
meanNaNNaNNaN1998.7504173.931367348.2968632.143083e+04
stdNaNNaNNaN10.1684650.331173239.1994111.392929e+05
minNaNNaNNaN1876.0000000.0000000.0000000.000000e+00
25%NaNNaNNaN1997.0000003.770000208.0000001.630000e+02
50%NaNNaNNaN2002.0000003.950000304.0000001.032000e+03
75%NaNNaNNaN2005.0000004.130000420.0000006.105500e+03
maxNaNNaNNaN2019.0000005.0000003342.0000005.629932e+06
\n", - "
" - ], - "text/plain": [ - " title authors categories published_year \\\n", - "count 6599 6599 6599 6599.000000 \n", - "unique 6216 3728 563 NaN \n", - "top The Lord of the Rings Agatha Christie Fiction NaN \n", - "freq 9 37 2561 NaN \n", - "mean NaN NaN NaN 1998.750417 \n", - "std NaN NaN NaN 10.168465 \n", - "min NaN NaN NaN 1876.000000 \n", - "25% NaN NaN NaN 1997.000000 \n", - "50% NaN NaN NaN 2002.000000 \n", - "75% NaN NaN NaN 2005.000000 \n", - "max NaN NaN NaN 2019.000000 \n", - "\n", - " average_rating num_pages ratings_count \n", - "count 6599.000000 6599.000000 6.599000e+03 \n", - "unique NaN NaN NaN \n", - "top NaN NaN NaN \n", - "freq NaN NaN NaN \n", - "mean 3.931367 348.296863 2.143083e+04 \n", - "std 0.331173 239.199411 1.392929e+05 \n", - "min 0.000000 0.000000 0.000000e+00 \n", - "25% 3.770000 208.000000 1.630000e+02 \n", - "50% 3.950000 304.000000 1.032000e+03 \n", - "75% 4.130000 420.000000 6.105500e+03 \n", - "max 5.000000 3342.000000 5.629932e+06 " - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books.describe(include='all')" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Fiction 2561\n", - "Juvenile Fiction 524\n", - "Biography & Autobiography 398\n", - "History 261\n", - "Literary Criticism 165\n", - " ... \n", - "Child analysis 1\n", - "Illinois 1\n", - "Erinyes (Greek mythology) 1\n", - "Exorcism 1\n", - "People with social disabilities 1\n", - "Name: categories, Length: 563, dtype: int64" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books[\"categories\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2006.0 877\n", - "2005.0 681\n", - "2004.0 605\n", - "2003.0 569\n", - "2002.0 470\n", - " ... \n", - "1928.0 1\n", - "1904.0 1\n", - "1938.0 1\n", - "1936.0 1\n", - "1947.0 1\n", - "Name: published_year, Length: 91, dtype: int64" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books[\"published_year\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Agatha Christie 37\n", - "Stephen King 36\n", - "William Shakespeare 29\n", - "John Ronald Reuel Tolkien 25\n", - "Sandra Brown 23\n", - " ..\n", - "Aeg 1\n", - "Pauline Reage 1\n", - "Tim Flannery 1\n", - "Saint Augustine (of Hippo) 1\n", - "Michael S. Reynolds 1\n", - "Name: authors, Length: 3728, dtype: int64" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books[\"authors\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4.00 125\n", - "3.93 110\n", - "3.95 109\n", - "3.99 108\n", - "3.96 104\n", - " ... \n", - "4.64 1\n", - "4.68 1\n", - "4.72 1\n", - "2.44 1\n", - "4.78 1\n", - "Name: average_rating, Length: 200, dtype: int64" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books[\"average_rating\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "outputs": [], - "source": [ - "import sklearn\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "books_train, books_test = sklearn.model_selection.train_test_split(books, test_size=0.2, random_state=1)\n", - "books_train, books_val = sklearn.model_selection.train_test_split(books_train, test_size=0.5, random_state=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
915The Autobiography of Alice B. ToklasGertrude SteinBiography & Autobiography2001.03.59272.0233.0
4493Never Far from NowhereAndrea LevyBlacks1996.03.68282.0601.0
1983Year's Happy EndingBetty NeelsFiction2001.03.95216.0128.0
2196Wrinkles in TimeGeorge Smoot;Keay DavidsonScience1994.03.99360.0985.0
4011DispatchesMichael HerrHistory1991.04.23260.012590.0
........................
2841Magic BitesIlona AndrewsFiction2007.04.07260.082231.0
1713High FiveJanet EvanovichBail bond agents2000.04.18336.099172.0
3469A Brief History of TimeStephen HawkingScience1998.04.16212.0214520.0
1657The MagusJohn FowlesFiction2001.04.05656.036909.0
3986The Complete Monty Python's Flying CircusGraham Chapman;Monty Python (Comedy troupe);Te...Humor1989.04.44384.01191.0
\n", - "

2639 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " title \\\n", - "915 The Autobiography of Alice B. Toklas \n", - "4493 Never Far from Nowhere \n", - "1983 Year's Happy Ending \n", - "2196 Wrinkles in Time \n", - "4011 Dispatches \n", - "... ... \n", - "2841 Magic Bites \n", - "1713 High Five \n", - "3469 A Brief History of Time \n", - "1657 The Magus \n", - "3986 The Complete Monty Python's Flying Circus \n", - "\n", - " authors \\\n", - "915 Gertrude Stein \n", - "4493 Andrea Levy \n", - "1983 Betty Neels \n", - "2196 George Smoot;Keay Davidson \n", - "4011 Michael Herr \n", - "... ... \n", - "2841 Ilona Andrews \n", - "1713 Janet Evanovich \n", - "3469 Stephen Hawking \n", - "1657 John Fowles \n", - "3986 Graham Chapman;Monty Python (Comedy troupe);Te... \n", - "\n", - " categories published_year average_rating num_pages \\\n", - "915 Biography & Autobiography 2001.0 3.59 272.0 \n", - "4493 Blacks 1996.0 3.68 282.0 \n", - "1983 Fiction 2001.0 3.95 216.0 \n", - "2196 Science 1994.0 3.99 360.0 \n", - "4011 History 1991.0 4.23 260.0 \n", - "... ... ... ... ... \n", - "2841 Fiction 2007.0 4.07 260.0 \n", - "1713 Bail bond agents 2000.0 4.18 336.0 \n", - "3469 Science 1998.0 4.16 212.0 \n", - "1657 Fiction 2001.0 4.05 656.0 \n", - "3986 Humor 1989.0 4.44 384.0 \n", - "\n", - " ratings_count \n", - "915 233.0 \n", - "4493 601.0 \n", - "1983 128.0 \n", - "2196 985.0 \n", - "4011 12590.0 \n", - "... ... \n", - "2841 82231.0 \n", - "1713 99172.0 \n", - "3469 214520.0 \n", - "1657 36909.0 \n", - "3986 1191.0 \n", - "\n", - "[2639 rows x 7 columns]" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books_train" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
count2639263926392639.0000002639.0000002639.0000002.639000e+03
unique25471827286NaNNaNNaNNaN
topOne Hundred Years of SolitudeStephen KingFictionNaNNaNNaNNaN
freq4181027NaNNaNNaNNaN
meanNaNNaNNaN1999.0329673.929807349.5346722.363199e+04
stdNaNNaNNaN9.8653200.358919244.8710901.452470e+05
minNaNNaNNaN1876.0000000.0000000.0000000.000000e+00
25%NaNNaNNaN1997.0000003.770000208.0000001.745000e+02
50%NaNNaNNaN2002.0000003.950000304.0000001.066000e+03
75%NaNNaNNaN2005.0000004.130000429.0000006.084500e+03
maxNaNNaNNaN2019.0000005.0000003020.0000004.367341e+06
\n", - "
" - ], - "text/plain": [ - " title authors categories \\\n", - "count 2639 2639 2639 \n", - "unique 2547 1827 286 \n", - "top One Hundred Years of Solitude Stephen King Fiction \n", - "freq 4 18 1027 \n", - "mean NaN NaN NaN \n", - "std NaN NaN NaN \n", - "min NaN NaN NaN \n", - "25% NaN NaN NaN \n", - "50% NaN NaN NaN \n", - "75% NaN NaN NaN \n", - "max NaN NaN NaN \n", - "\n", - " published_year average_rating num_pages ratings_count \n", - "count 2639.000000 2639.000000 2639.000000 2.639000e+03 \n", - "unique NaN NaN NaN NaN \n", - "top NaN NaN NaN NaN \n", - "freq NaN NaN NaN NaN \n", - "mean 1999.032967 3.929807 349.534672 2.363199e+04 \n", - "std 9.865320 0.358919 244.871090 1.452470e+05 \n", - "min 1876.000000 0.000000 0.000000 0.000000e+00 \n", - "25% 1997.000000 3.770000 208.000000 1.745000e+02 \n", - "50% 2002.000000 3.950000 304.000000 1.066000e+03 \n", - "75% 2005.000000 4.130000 429.000000 6.084500e+03 \n", - "max 2019.000000 5.000000 3020.000000 4.367341e+06 " - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books_train.describe(include='all')" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
count1320132013201320.0000001320.0000001320.0000001.320000e+03
unique13031064185NaNNaNNaNNaN
top20,000 Leagues Under the SeaStephen KingFictionNaNNaNNaNNaN
freq37540NaNNaNNaNNaN
meanNaNNaNNaN1998.5909093.925470339.3469701.588767e+04
stdNaNNaNNaN10.1195690.299805219.5609647.877064e+04
minNaNNaNNaN1942.0000002.3300000.0000000.000000e+00
25%NaNNaNNaN1996.0000003.750000208.0000001.510000e+02
50%NaNNaNNaN2002.0000003.950000304.0000001.068000e+03
75%NaNNaNNaN2005.0000004.130000401.0000006.360000e+03
maxNaNNaNNaN2017.0000005.0000003342.0000002.115562e+06
\n", - "
" - ], - "text/plain": [ - " title authors categories published_year \\\n", - "count 1320 1320 1320 1320.000000 \n", - "unique 1303 1064 185 NaN \n", - "top 20,000 Leagues Under the Sea Stephen King Fiction NaN \n", - "freq 3 7 540 NaN \n", - "mean NaN NaN NaN 1998.590909 \n", - "std NaN NaN NaN 10.119569 \n", - "min NaN NaN NaN 1942.000000 \n", - "25% NaN NaN NaN 1996.000000 \n", - "50% NaN NaN NaN 2002.000000 \n", - "75% NaN NaN NaN 2005.000000 \n", - "max NaN NaN NaN 2017.000000 \n", - "\n", - " average_rating num_pages ratings_count \n", - "count 1320.000000 1320.000000 1.320000e+03 \n", - "unique NaN NaN NaN \n", - "top NaN NaN NaN \n", - "freq NaN NaN NaN \n", - "mean 3.925470 339.346970 1.588767e+04 \n", - "std 0.299805 219.560964 7.877064e+04 \n", - "min 2.330000 0.000000 0.000000e+00 \n", - "25% 3.750000 208.000000 1.510000e+02 \n", - "50% 3.950000 304.000000 1.068000e+03 \n", - "75% 4.130000 401.000000 6.360000e+03 \n", - "max 5.000000 3342.000000 2.115562e+06 " - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books_test.describe(include='all')" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titleauthorscategoriespublished_yearaverage_ratingnum_pagesratings_count
count2640264026402640.0000002640.0000002640.0000002.640000e+03
unique25621850313NaNNaNNaNNaN
topThree Complete NovelsAgatha ChristieFictionNaNNaNNaNNaN
freq614994NaNNaNNaNNaN
meanNaNNaNNaN1998.5477273.935875351.5344702.200209e+04
stdNaNNaNNaN10.4837520.316971242.8294631.558830e+05
minNaNNaNNaN1901.0000000.0000004.0000000.000000e+00
25%NaNNaNNaN1996.0000003.770000208.0000001.557500e+02
50%NaNNaNNaN2002.0000003.950000309.5000009.555000e+02
75%NaNNaNNaN2005.0000004.130000430.2500005.980750e+03
maxNaNNaNNaN2019.0000005.0000002965.0000005.629932e+06
\n", - "
" - ], - "text/plain": [ - " title authors categories published_year \\\n", - "count 2640 2640 2640 2640.000000 \n", - "unique 2562 1850 313 NaN \n", - "top Three Complete Novels Agatha Christie Fiction NaN \n", - "freq 6 14 994 NaN \n", - "mean NaN NaN NaN 1998.547727 \n", - "std NaN NaN NaN 10.483752 \n", - "min NaN NaN NaN 1901.000000 \n", - "25% NaN NaN NaN 1996.000000 \n", - "50% NaN NaN NaN 2002.000000 \n", - "75% NaN NaN NaN 2005.000000 \n", - "max NaN NaN NaN 2019.000000 \n", - "\n", - " average_rating num_pages ratings_count \n", - "count 2640.000000 2640.000000 2.640000e+03 \n", - "unique NaN NaN NaN \n", - "top NaN NaN NaN \n", - "freq NaN NaN NaN \n", - "mean 3.935875 351.534470 2.200209e+04 \n", - "std 0.316971 242.829463 1.558830e+05 \n", - "min 0.000000 4.000000 0.000000e+00 \n", - "25% 3.770000 208.000000 1.557500e+02 \n", - "50% 3.950000 309.500000 9.555000e+02 \n", - "75% 4.130000 430.250000 5.980750e+03 \n", - "max 5.000000 2965.000000 5.629932e+06 " - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "books_val.describe(include='all')" - ] - } - ], - "metadata": { - "author": "Tomasz Ziętkiewicz", - "celltoolbar": "Slideshow", - "email": "tomasz.zietkiewicz@amu.edu.pl", - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "lang": "pl", - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.2" - }, - "slideshow": { - "slide_type": "slide" - }, - "subtitle": "2.Dane[laboratoria]", - "title": "Inżynieria uczenia maszynowego", - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": false, - "sideBar": false, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": false, - "toc_window_display": false - }, - "year": "2021" - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file