2047 lines
72 KiB
Plaintext
2047 lines
72 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>isbn13</th>\n",
|
||
" <th>isbn10</th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>subtitle</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>thumbnail</th>\n",
|
||
" <th>description</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>9780002005883</td>\n",
|
||
" <td>0002005883</td>\n",
|
||
" <td>Gilead</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Marilynne Robinson</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>http://books.google.com/books/content?id=KQZCP...</td>\n",
|
||
" <td>A NOVEL THAT READERS and critics have been eag...</td>\n",
|
||
" <td>2004.0</td>\n",
|
||
" <td>3.85</td>\n",
|
||
" <td>247.0</td>\n",
|
||
" <td>361.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>9780002261982</td>\n",
|
||
" <td>0002261987</td>\n",
|
||
" <td>Spider's Web</td>\n",
|
||
" <td>A Novel</td>\n",
|
||
" <td>Charles Osborne;Agatha Christie</td>\n",
|
||
" <td>Detective and mystery stories</td>\n",
|
||
" <td>http://books.google.com/books/content?id=gA5GP...</td>\n",
|
||
" <td>A new 'Christie for Christmas' -- a full-lengt...</td>\n",
|
||
" <td>2000.0</td>\n",
|
||
" <td>3.83</td>\n",
|
||
" <td>241.0</td>\n",
|
||
" <td>5164.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>9780006163831</td>\n",
|
||
" <td>0006163831</td>\n",
|
||
" <td>The One Tree</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Stephen R. Donaldson</td>\n",
|
||
" <td>American fiction</td>\n",
|
||
" <td>http://books.google.com/books/content?id=OmQaw...</td>\n",
|
||
" <td>Volume Two of Stephen Donaldson's acclaimed se...</td>\n",
|
||
" <td>1982.0</td>\n",
|
||
" <td>3.97</td>\n",
|
||
" <td>479.0</td>\n",
|
||
" <td>172.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>9780006178736</td>\n",
|
||
" <td>0006178731</td>\n",
|
||
" <td>Rage of angels</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Sidney Sheldon</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>http://books.google.com/books/content?id=FKo2T...</td>\n",
|
||
" <td>A memorable, mesmerizing heroine Jennifer -- b...</td>\n",
|
||
" <td>1993.0</td>\n",
|
||
" <td>3.93</td>\n",
|
||
" <td>512.0</td>\n",
|
||
" <td>29532.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>9780006280897</td>\n",
|
||
" <td>0006280897</td>\n",
|
||
" <td>The Four Loves</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Clive Staples Lewis</td>\n",
|
||
" <td>Christian life</td>\n",
|
||
" <td>http://books.google.com/books/content?id=XhQ5X...</td>\n",
|
||
" <td>Lewis' work on the nature of love divides love...</td>\n",
|
||
" <td>2002.0</td>\n",
|
||
" <td>4.15</td>\n",
|
||
" <td>170.0</td>\n",
|
||
" <td>33684.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6805</th>\n",
|
||
" <td>9788185300535</td>\n",
|
||
" <td>8185300534</td>\n",
|
||
" <td>I Am that</td>\n",
|
||
" <td>Talks with Sri Nisargadatta Maharaj</td>\n",
|
||
" <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
|
||
" <td>Philosophy</td>\n",
|
||
" <td>http://books.google.com/books/content?id=Fv_JP...</td>\n",
|
||
" <td>This collection of the timeless teachings of o...</td>\n",
|
||
" <td>1999.0</td>\n",
|
||
" <td>4.51</td>\n",
|
||
" <td>531.0</td>\n",
|
||
" <td>104.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6806</th>\n",
|
||
" <td>9788185944609</td>\n",
|
||
" <td>8185944601</td>\n",
|
||
" <td>Secrets Of The Heart</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Khalil Gibran</td>\n",
|
||
" <td>Mysticism</td>\n",
|
||
" <td>http://books.google.com/books/content?id=XcrVp...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1993.0</td>\n",
|
||
" <td>4.08</td>\n",
|
||
" <td>74.0</td>\n",
|
||
" <td>324.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6807</th>\n",
|
||
" <td>9788445074879</td>\n",
|
||
" <td>8445074873</td>\n",
|
||
" <td>Fahrenheit 451</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Ray Bradbury</td>\n",
|
||
" <td>Book burning</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2004.0</td>\n",
|
||
" <td>3.98</td>\n",
|
||
" <td>186.0</td>\n",
|
||
" <td>5733.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6808</th>\n",
|
||
" <td>9789027712059</td>\n",
|
||
" <td>9027712050</td>\n",
|
||
" <td>The Berlin Phenomenology</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Georg Wilhelm Friedrich Hegel</td>\n",
|
||
" <td>History</td>\n",
|
||
" <td>http://books.google.com/books/content?id=Vy7Sk...</td>\n",
|
||
" <td>Since the three volume edition ofHegel's Philo...</td>\n",
|
||
" <td>1981.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>210.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6809</th>\n",
|
||
" <td>9789042003408</td>\n",
|
||
" <td>9042003405</td>\n",
|
||
" <td>'I'm Telling You Stories'</td>\n",
|
||
" <td>Jeanette Winterson and the Politics of Reading</td>\n",
|
||
" <td>Helena Grice;Tim Woods</td>\n",
|
||
" <td>Literary Criticism</td>\n",
|
||
" <td>http://books.google.com/books/content?id=2lVyR...</td>\n",
|
||
" <td>This is a jubilant and rewarding collection of...</td>\n",
|
||
" <td>1998.0</td>\n",
|
||
" <td>3.70</td>\n",
|
||
" <td>136.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6810 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" isbn13 isbn10 title \\\n",
|
||
"0 9780002005883 0002005883 Gilead \n",
|
||
"1 9780002261982 0002261987 Spider's Web \n",
|
||
"2 9780006163831 0006163831 The One Tree \n",
|
||
"3 9780006178736 0006178731 Rage of angels \n",
|
||
"4 9780006280897 0006280897 The Four Loves \n",
|
||
"... ... ... ... \n",
|
||
"6805 9788185300535 8185300534 I Am that \n",
|
||
"6806 9788185944609 8185944601 Secrets Of The Heart \n",
|
||
"6807 9788445074879 8445074873 Fahrenheit 451 \n",
|
||
"6808 9789027712059 9027712050 The Berlin Phenomenology \n",
|
||
"6809 9789042003408 9042003405 'I'm Telling You Stories' \n",
|
||
"\n",
|
||
" subtitle \\\n",
|
||
"0 NaN \n",
|
||
"1 A Novel \n",
|
||
"2 NaN \n",
|
||
"3 NaN \n",
|
||
"4 NaN \n",
|
||
"... ... \n",
|
||
"6805 Talks with Sri Nisargadatta Maharaj \n",
|
||
"6806 NaN \n",
|
||
"6807 NaN \n",
|
||
"6808 NaN \n",
|
||
"6809 Jeanette Winterson and the Politics of Reading \n",
|
||
"\n",
|
||
" authors \\\n",
|
||
"0 Marilynne Robinson \n",
|
||
"1 Charles Osborne;Agatha Christie \n",
|
||
"2 Stephen R. Donaldson \n",
|
||
"3 Sidney Sheldon \n",
|
||
"4 Clive Staples Lewis \n",
|
||
"... ... \n",
|
||
"6805 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
|
||
"6806 Khalil Gibran \n",
|
||
"6807 Ray Bradbury \n",
|
||
"6808 Georg Wilhelm Friedrich Hegel \n",
|
||
"6809 Helena Grice;Tim Woods \n",
|
||
"\n",
|
||
" categories \\\n",
|
||
"0 Fiction \n",
|
||
"1 Detective and mystery stories \n",
|
||
"2 American fiction \n",
|
||
"3 Fiction \n",
|
||
"4 Christian life \n",
|
||
"... ... \n",
|
||
"6805 Philosophy \n",
|
||
"6806 Mysticism \n",
|
||
"6807 Book burning \n",
|
||
"6808 History \n",
|
||
"6809 Literary Criticism \n",
|
||
"\n",
|
||
" thumbnail \\\n",
|
||
"0 http://books.google.com/books/content?id=KQZCP... \n",
|
||
"1 http://books.google.com/books/content?id=gA5GP... \n",
|
||
"2 http://books.google.com/books/content?id=OmQaw... \n",
|
||
"3 http://books.google.com/books/content?id=FKo2T... \n",
|
||
"4 http://books.google.com/books/content?id=XhQ5X... \n",
|
||
"... ... \n",
|
||
"6805 http://books.google.com/books/content?id=Fv_JP... \n",
|
||
"6806 http://books.google.com/books/content?id=XcrVp... \n",
|
||
"6807 NaN \n",
|
||
"6808 http://books.google.com/books/content?id=Vy7Sk... \n",
|
||
"6809 http://books.google.com/books/content?id=2lVyR... \n",
|
||
"\n",
|
||
" description published_year \\\n",
|
||
"0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n",
|
||
"1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n",
|
||
"2 Volume Two of Stephen Donaldson's acclaimed se... 1982.0 \n",
|
||
"3 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n",
|
||
"4 Lewis' work on the nature of love divides love... 2002.0 \n",
|
||
"... ... ... \n",
|
||
"6805 This collection of the timeless teachings of o... 1999.0 \n",
|
||
"6806 NaN 1993.0 \n",
|
||
"6807 NaN 2004.0 \n",
|
||
"6808 Since the three volume edition ofHegel's Philo... 1981.0 \n",
|
||
"6809 This is a jubilant and rewarding collection of... 1998.0 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"0 3.85 247.0 361.0 \n",
|
||
"1 3.83 241.0 5164.0 \n",
|
||
"2 3.97 479.0 172.0 \n",
|
||
"3 3.93 512.0 29532.0 \n",
|
||
"4 4.15 170.0 33684.0 \n",
|
||
"... ... ... ... \n",
|
||
"6805 4.51 531.0 104.0 \n",
|
||
"6806 4.08 74.0 324.0 \n",
|
||
"6807 3.98 186.0 5733.0 \n",
|
||
"6808 0.00 210.0 0.0 \n",
|
||
"6809 3.70 136.0 10.0 \n",
|
||
"\n",
|
||
"[6810 rows x 12 columns]"
|
||
]
|
||
},
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"books=pd.read_csv('books.csv')\n",
|
||
"books"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {
|
||
"scrolled": true,
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>isbn13</th>\n",
|
||
" <th>isbn10</th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>subtitle</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>thumbnail</th>\n",
|
||
" <th>description</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>6.810000e+03</td>\n",
|
||
" <td>6810</td>\n",
|
||
" <td>6810</td>\n",
|
||
" <td>2381</td>\n",
|
||
" <td>6738</td>\n",
|
||
" <td>6711</td>\n",
|
||
" <td>6481</td>\n",
|
||
" <td>6548</td>\n",
|
||
" <td>6804.000000</td>\n",
|
||
" <td>6767.000000</td>\n",
|
||
" <td>6767.000000</td>\n",
|
||
" <td>6.767000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6810</td>\n",
|
||
" <td>6398</td>\n",
|
||
" <td>2009</td>\n",
|
||
" <td>3780</td>\n",
|
||
" <td>567</td>\n",
|
||
" <td>6481</td>\n",
|
||
" <td>6474</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0786282258</td>\n",
|
||
" <td>The Lord of the Rings</td>\n",
|
||
" <td>A Novel</td>\n",
|
||
" <td>Agatha Christie</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>http://books.google.com/books/content?id=6dVAW...</td>\n",
|
||
" <td>This is a reproduction of the original artefac...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>226</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>2588</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>9.780677e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1998.630364</td>\n",
|
||
" <td>3.933284</td>\n",
|
||
" <td>348.181026</td>\n",
|
||
" <td>2.106910e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>6.068911e+08</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10.484257</td>\n",
|
||
" <td>0.331352</td>\n",
|
||
" <td>242.376783</td>\n",
|
||
" <td>1.376207e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>9.780002e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1853.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>9.780330e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1996.000000</td>\n",
|
||
" <td>3.770000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.590000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>9.780553e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.960000</td>\n",
|
||
" <td>304.000000</td>\n",
|
||
" <td>1.018000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>9.780810e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>420.000000</td>\n",
|
||
" <td>5.992500e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>9.789042e+12</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>3342.000000</td>\n",
|
||
" <td>5.629932e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" isbn13 isbn10 title subtitle \\\n",
|
||
"count 6.810000e+03 6810 6810 2381 \n",
|
||
"unique NaN 6810 6398 2009 \n",
|
||
"top NaN 0786282258 The Lord of the Rings A Novel \n",
|
||
"freq NaN 1 11 226 \n",
|
||
"mean 9.780677e+12 NaN NaN NaN \n",
|
||
"std 6.068911e+08 NaN NaN NaN \n",
|
||
"min 9.780002e+12 NaN NaN NaN \n",
|
||
"25% 9.780330e+12 NaN NaN NaN \n",
|
||
"50% 9.780553e+12 NaN NaN NaN \n",
|
||
"75% 9.780810e+12 NaN NaN NaN \n",
|
||
"max 9.789042e+12 NaN NaN NaN \n",
|
||
"\n",
|
||
" authors categories \\\n",
|
||
"count 6738 6711 \n",
|
||
"unique 3780 567 \n",
|
||
"top Agatha Christie Fiction \n",
|
||
"freq 37 2588 \n",
|
||
"mean NaN NaN \n",
|
||
"std NaN NaN \n",
|
||
"min NaN NaN \n",
|
||
"25% NaN NaN \n",
|
||
"50% NaN NaN \n",
|
||
"75% NaN NaN \n",
|
||
"max NaN NaN \n",
|
||
"\n",
|
||
" thumbnail \\\n",
|
||
"count 6481 \n",
|
||
"unique 6481 \n",
|
||
"top http://books.google.com/books/content?id=6dVAW... \n",
|
||
"freq 1 \n",
|
||
"mean NaN \n",
|
||
"std NaN \n",
|
||
"min NaN \n",
|
||
"25% NaN \n",
|
||
"50% NaN \n",
|
||
"75% NaN \n",
|
||
"max NaN \n",
|
||
"\n",
|
||
" description published_year \\\n",
|
||
"count 6548 6804.000000 \n",
|
||
"unique 6474 NaN \n",
|
||
"top This is a reproduction of the original artefac... NaN \n",
|
||
"freq 6 NaN \n",
|
||
"mean NaN 1998.630364 \n",
|
||
"std NaN 10.484257 \n",
|
||
"min NaN 1853.000000 \n",
|
||
"25% NaN 1996.000000 \n",
|
||
"50% NaN 2002.000000 \n",
|
||
"75% NaN 2005.000000 \n",
|
||
"max NaN 2019.000000 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"count 6767.000000 6767.000000 6.767000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 3.933284 348.181026 2.106910e+04 \n",
|
||
"std 0.331352 242.376783 1.376207e+05 \n",
|
||
"min 0.000000 0.000000 0.000000e+00 \n",
|
||
"25% 3.770000 208.000000 1.590000e+02 \n",
|
||
"50% 3.960000 304.000000 1.018000e+03 \n",
|
||
"75% 4.130000 420.000000 5.992500e+03 \n",
|
||
"max 5.000000 3342.000000 5.629932e+06 "
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {
|
||
"scrolled": false,
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"isbn13 0\n",
|
||
"isbn10 0\n",
|
||
"title 0\n",
|
||
"subtitle 4429\n",
|
||
"authors 72\n",
|
||
"categories 99\n",
|
||
"thumbnail 329\n",
|
||
"description 262\n",
|
||
"published_year 6\n",
|
||
"average_rating 43\n",
|
||
"num_pages 43\n",
|
||
"ratings_count 43\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.isnull().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"books.drop('thumbnail', inplace=True, axis=1)\n",
|
||
"books.drop('subtitle', inplace=True, axis=1)\n",
|
||
"books.drop('description', inplace=True, axis=1)\n",
|
||
"books.drop('isbn13', inplace=True, axis=1)\n",
|
||
"books.drop('isbn10', inplace=True, axis=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"title 0\n",
|
||
"authors 72\n",
|
||
"categories 99\n",
|
||
"published_year 6\n",
|
||
"average_rating 43\n",
|
||
"num_pages 43\n",
|
||
"ratings_count 43\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.isnull().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Gilead</td>\n",
|
||
" <td>Marilynne Robinson</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>2004.0</td>\n",
|
||
" <td>3.85</td>\n",
|
||
" <td>247.0</td>\n",
|
||
" <td>361.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Spider's Web</td>\n",
|
||
" <td>Charles Osborne;Agatha Christie</td>\n",
|
||
" <td>Detective and mystery stories</td>\n",
|
||
" <td>2000.0</td>\n",
|
||
" <td>3.83</td>\n",
|
||
" <td>241.0</td>\n",
|
||
" <td>5164.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>The One Tree</td>\n",
|
||
" <td>Stephen R. Donaldson</td>\n",
|
||
" <td>American fiction</td>\n",
|
||
" <td>1982.0</td>\n",
|
||
" <td>3.97</td>\n",
|
||
" <td>479.0</td>\n",
|
||
" <td>172.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Rage of angels</td>\n",
|
||
" <td>Sidney Sheldon</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>1993.0</td>\n",
|
||
" <td>3.93</td>\n",
|
||
" <td>512.0</td>\n",
|
||
" <td>29532.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>The Four Loves</td>\n",
|
||
" <td>Clive Staples Lewis</td>\n",
|
||
" <td>Christian life</td>\n",
|
||
" <td>2002.0</td>\n",
|
||
" <td>4.15</td>\n",
|
||
" <td>170.0</td>\n",
|
||
" <td>33684.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6805</th>\n",
|
||
" <td>I Am that</td>\n",
|
||
" <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
|
||
" <td>Philosophy</td>\n",
|
||
" <td>1999.0</td>\n",
|
||
" <td>4.51</td>\n",
|
||
" <td>531.0</td>\n",
|
||
" <td>104.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6806</th>\n",
|
||
" <td>Secrets Of The Heart</td>\n",
|
||
" <td>Khalil Gibran</td>\n",
|
||
" <td>Mysticism</td>\n",
|
||
" <td>1993.0</td>\n",
|
||
" <td>4.08</td>\n",
|
||
" <td>74.0</td>\n",
|
||
" <td>324.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6807</th>\n",
|
||
" <td>Fahrenheit 451</td>\n",
|
||
" <td>Ray Bradbury</td>\n",
|
||
" <td>Book burning</td>\n",
|
||
" <td>2004.0</td>\n",
|
||
" <td>3.98</td>\n",
|
||
" <td>186.0</td>\n",
|
||
" <td>5733.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6808</th>\n",
|
||
" <td>The Berlin Phenomenology</td>\n",
|
||
" <td>Georg Wilhelm Friedrich Hegel</td>\n",
|
||
" <td>History</td>\n",
|
||
" <td>1981.0</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>210.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6809</th>\n",
|
||
" <td>'I'm Telling You Stories'</td>\n",
|
||
" <td>Helena Grice;Tim Woods</td>\n",
|
||
" <td>Literary Criticism</td>\n",
|
||
" <td>1998.0</td>\n",
|
||
" <td>3.70</td>\n",
|
||
" <td>136.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6599 rows × 7 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors \\\n",
|
||
"0 Gilead Marilynne Robinson \n",
|
||
"1 Spider's Web Charles Osborne;Agatha Christie \n",
|
||
"2 The One Tree Stephen R. Donaldson \n",
|
||
"3 Rage of angels Sidney Sheldon \n",
|
||
"4 The Four Loves Clive Staples Lewis \n",
|
||
"... ... ... \n",
|
||
"6805 I Am that Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
|
||
"6806 Secrets Of The Heart Khalil Gibran \n",
|
||
"6807 Fahrenheit 451 Ray Bradbury \n",
|
||
"6808 The Berlin Phenomenology Georg Wilhelm Friedrich Hegel \n",
|
||
"6809 'I'm Telling You Stories' Helena Grice;Tim Woods \n",
|
||
"\n",
|
||
" categories published_year average_rating \\\n",
|
||
"0 Fiction 2004.0 3.85 \n",
|
||
"1 Detective and mystery stories 2000.0 3.83 \n",
|
||
"2 American fiction 1982.0 3.97 \n",
|
||
"3 Fiction 1993.0 3.93 \n",
|
||
"4 Christian life 2002.0 4.15 \n",
|
||
"... ... ... ... \n",
|
||
"6805 Philosophy 1999.0 4.51 \n",
|
||
"6806 Mysticism 1993.0 4.08 \n",
|
||
"6807 Book burning 2004.0 3.98 \n",
|
||
"6808 History 1981.0 0.00 \n",
|
||
"6809 Literary Criticism 1998.0 3.70 \n",
|
||
"\n",
|
||
" num_pages ratings_count \n",
|
||
"0 247.0 361.0 \n",
|
||
"1 241.0 5164.0 \n",
|
||
"2 479.0 172.0 \n",
|
||
"3 512.0 29532.0 \n",
|
||
"4 170.0 33684.0 \n",
|
||
"... ... ... \n",
|
||
"6805 531.0 104.0 \n",
|
||
"6806 74.0 324.0 \n",
|
||
"6807 186.0 5733.0 \n",
|
||
"6808 210.0 0.0 \n",
|
||
"6809 136.0 10.0 \n",
|
||
"\n",
|
||
"[6599 rows x 7 columns]"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.dropna(inplace=True)\n",
|
||
"books"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>6599</td>\n",
|
||
" <td>6599</td>\n",
|
||
" <td>6599</td>\n",
|
||
" <td>6599.000000</td>\n",
|
||
" <td>6599.000000</td>\n",
|
||
" <td>6599.000000</td>\n",
|
||
" <td>6.599000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>6216</td>\n",
|
||
" <td>3728</td>\n",
|
||
" <td>563</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>The Lord of the Rings</td>\n",
|
||
" <td>Agatha Christie</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>2561</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1998.750417</td>\n",
|
||
" <td>3.931367</td>\n",
|
||
" <td>348.296863</td>\n",
|
||
" <td>2.143083e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10.168465</td>\n",
|
||
" <td>0.331173</td>\n",
|
||
" <td>239.199411</td>\n",
|
||
" <td>1.392929e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1876.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1997.000000</td>\n",
|
||
" <td>3.770000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.630000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.950000</td>\n",
|
||
" <td>304.000000</td>\n",
|
||
" <td>1.032000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>420.000000</td>\n",
|
||
" <td>6.105500e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>3342.000000</td>\n",
|
||
" <td>5.629932e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors categories published_year \\\n",
|
||
"count 6599 6599 6599 6599.000000 \n",
|
||
"unique 6216 3728 563 NaN \n",
|
||
"top The Lord of the Rings Agatha Christie Fiction NaN \n",
|
||
"freq 9 37 2561 NaN \n",
|
||
"mean NaN NaN NaN 1998.750417 \n",
|
||
"std NaN NaN NaN 10.168465 \n",
|
||
"min NaN NaN NaN 1876.000000 \n",
|
||
"25% NaN NaN NaN 1997.000000 \n",
|
||
"50% NaN NaN NaN 2002.000000 \n",
|
||
"75% NaN NaN NaN 2005.000000 \n",
|
||
"max NaN NaN NaN 2019.000000 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"count 6599.000000 6599.000000 6.599000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 3.931367 348.296863 2.143083e+04 \n",
|
||
"std 0.331173 239.199411 1.392929e+05 \n",
|
||
"min 0.000000 0.000000 0.000000e+00 \n",
|
||
"25% 3.770000 208.000000 1.630000e+02 \n",
|
||
"50% 3.950000 304.000000 1.032000e+03 \n",
|
||
"75% 4.130000 420.000000 6.105500e+03 \n",
|
||
"max 5.000000 3342.000000 5.629932e+06 "
|
||
]
|
||
},
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Fiction 2561\n",
|
||
"Juvenile Fiction 524\n",
|
||
"Biography & Autobiography 398\n",
|
||
"History 261\n",
|
||
"Literary Criticism 165\n",
|
||
" ... \n",
|
||
"Child analysis 1\n",
|
||
"Illinois 1\n",
|
||
"Erinyes (Greek mythology) 1\n",
|
||
"Exorcism 1\n",
|
||
"People with social disabilities 1\n",
|
||
"Name: categories, Length: 563, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books[\"categories\"].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"2006.0 877\n",
|
||
"2005.0 681\n",
|
||
"2004.0 605\n",
|
||
"2003.0 569\n",
|
||
"2002.0 470\n",
|
||
" ... \n",
|
||
"1928.0 1\n",
|
||
"1904.0 1\n",
|
||
"1938.0 1\n",
|
||
"1936.0 1\n",
|
||
"1947.0 1\n",
|
||
"Name: published_year, Length: 91, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 49,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books[\"published_year\"].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Agatha Christie 37\n",
|
||
"Stephen King 36\n",
|
||
"William Shakespeare 29\n",
|
||
"John Ronald Reuel Tolkien 25\n",
|
||
"Sandra Brown 23\n",
|
||
" ..\n",
|
||
"Aeg 1\n",
|
||
"Pauline Reage 1\n",
|
||
"Tim Flannery 1\n",
|
||
"Saint Augustine (of Hippo) 1\n",
|
||
"Michael S. Reynolds 1\n",
|
||
"Name: authors, Length: 3728, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 50,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books[\"authors\"].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"4.00 125\n",
|
||
"3.93 110\n",
|
||
"3.95 109\n",
|
||
"3.99 108\n",
|
||
"3.96 104\n",
|
||
" ... \n",
|
||
"4.64 1\n",
|
||
"4.68 1\n",
|
||
"4.72 1\n",
|
||
"2.44 1\n",
|
||
"4.78 1\n",
|
||
"Name: average_rating, Length: 200, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books[\"average_rating\"].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import sklearn\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
"books_train, books_test = sklearn.model_selection.train_test_split(books, test_size=0.2, random_state=1)\n",
|
||
"books_train, books_val = sklearn.model_selection.train_test_split(books_train, test_size=0.5, random_state=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>915</th>\n",
|
||
" <td>The Autobiography of Alice B. Toklas</td>\n",
|
||
" <td>Gertrude Stein</td>\n",
|
||
" <td>Biography & Autobiography</td>\n",
|
||
" <td>2001.0</td>\n",
|
||
" <td>3.59</td>\n",
|
||
" <td>272.0</td>\n",
|
||
" <td>233.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4493</th>\n",
|
||
" <td>Never Far from Nowhere</td>\n",
|
||
" <td>Andrea Levy</td>\n",
|
||
" <td>Blacks</td>\n",
|
||
" <td>1996.0</td>\n",
|
||
" <td>3.68</td>\n",
|
||
" <td>282.0</td>\n",
|
||
" <td>601.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1983</th>\n",
|
||
" <td>Year's Happy Ending</td>\n",
|
||
" <td>Betty Neels</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>2001.0</td>\n",
|
||
" <td>3.95</td>\n",
|
||
" <td>216.0</td>\n",
|
||
" <td>128.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2196</th>\n",
|
||
" <td>Wrinkles in Time</td>\n",
|
||
" <td>George Smoot;Keay Davidson</td>\n",
|
||
" <td>Science</td>\n",
|
||
" <td>1994.0</td>\n",
|
||
" <td>3.99</td>\n",
|
||
" <td>360.0</td>\n",
|
||
" <td>985.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4011</th>\n",
|
||
" <td>Dispatches</td>\n",
|
||
" <td>Michael Herr</td>\n",
|
||
" <td>History</td>\n",
|
||
" <td>1991.0</td>\n",
|
||
" <td>4.23</td>\n",
|
||
" <td>260.0</td>\n",
|
||
" <td>12590.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2841</th>\n",
|
||
" <td>Magic Bites</td>\n",
|
||
" <td>Ilona Andrews</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>2007.0</td>\n",
|
||
" <td>4.07</td>\n",
|
||
" <td>260.0</td>\n",
|
||
" <td>82231.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1713</th>\n",
|
||
" <td>High Five</td>\n",
|
||
" <td>Janet Evanovich</td>\n",
|
||
" <td>Bail bond agents</td>\n",
|
||
" <td>2000.0</td>\n",
|
||
" <td>4.18</td>\n",
|
||
" <td>336.0</td>\n",
|
||
" <td>99172.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3469</th>\n",
|
||
" <td>A Brief History of Time</td>\n",
|
||
" <td>Stephen Hawking</td>\n",
|
||
" <td>Science</td>\n",
|
||
" <td>1998.0</td>\n",
|
||
" <td>4.16</td>\n",
|
||
" <td>212.0</td>\n",
|
||
" <td>214520.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1657</th>\n",
|
||
" <td>The Magus</td>\n",
|
||
" <td>John Fowles</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>2001.0</td>\n",
|
||
" <td>4.05</td>\n",
|
||
" <td>656.0</td>\n",
|
||
" <td>36909.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3986</th>\n",
|
||
" <td>The Complete Monty Python's Flying Circus</td>\n",
|
||
" <td>Graham Chapman;Monty Python (Comedy troupe);Te...</td>\n",
|
||
" <td>Humor</td>\n",
|
||
" <td>1989.0</td>\n",
|
||
" <td>4.44</td>\n",
|
||
" <td>384.0</td>\n",
|
||
" <td>1191.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>2639 rows × 7 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title \\\n",
|
||
"915 The Autobiography of Alice B. Toklas \n",
|
||
"4493 Never Far from Nowhere \n",
|
||
"1983 Year's Happy Ending \n",
|
||
"2196 Wrinkles in Time \n",
|
||
"4011 Dispatches \n",
|
||
"... ... \n",
|
||
"2841 Magic Bites \n",
|
||
"1713 High Five \n",
|
||
"3469 A Brief History of Time \n",
|
||
"1657 The Magus \n",
|
||
"3986 The Complete Monty Python's Flying Circus \n",
|
||
"\n",
|
||
" authors \\\n",
|
||
"915 Gertrude Stein \n",
|
||
"4493 Andrea Levy \n",
|
||
"1983 Betty Neels \n",
|
||
"2196 George Smoot;Keay Davidson \n",
|
||
"4011 Michael Herr \n",
|
||
"... ... \n",
|
||
"2841 Ilona Andrews \n",
|
||
"1713 Janet Evanovich \n",
|
||
"3469 Stephen Hawking \n",
|
||
"1657 John Fowles \n",
|
||
"3986 Graham Chapman;Monty Python (Comedy troupe);Te... \n",
|
||
"\n",
|
||
" categories published_year average_rating num_pages \\\n",
|
||
"915 Biography & Autobiography 2001.0 3.59 272.0 \n",
|
||
"4493 Blacks 1996.0 3.68 282.0 \n",
|
||
"1983 Fiction 2001.0 3.95 216.0 \n",
|
||
"2196 Science 1994.0 3.99 360.0 \n",
|
||
"4011 History 1991.0 4.23 260.0 \n",
|
||
"... ... ... ... ... \n",
|
||
"2841 Fiction 2007.0 4.07 260.0 \n",
|
||
"1713 Bail bond agents 2000.0 4.18 336.0 \n",
|
||
"3469 Science 1998.0 4.16 212.0 \n",
|
||
"1657 Fiction 2001.0 4.05 656.0 \n",
|
||
"3986 Humor 1989.0 4.44 384.0 \n",
|
||
"\n",
|
||
" ratings_count \n",
|
||
"915 233.0 \n",
|
||
"4493 601.0 \n",
|
||
"1983 128.0 \n",
|
||
"2196 985.0 \n",
|
||
"4011 12590.0 \n",
|
||
"... ... \n",
|
||
"2841 82231.0 \n",
|
||
"1713 99172.0 \n",
|
||
"3469 214520.0 \n",
|
||
"1657 36909.0 \n",
|
||
"3986 1191.0 \n",
|
||
"\n",
|
||
"[2639 rows x 7 columns]"
|
||
]
|
||
},
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books_train"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>2639</td>\n",
|
||
" <td>2639</td>\n",
|
||
" <td>2639</td>\n",
|
||
" <td>2639.000000</td>\n",
|
||
" <td>2639.000000</td>\n",
|
||
" <td>2639.000000</td>\n",
|
||
" <td>2.639000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>2547</td>\n",
|
||
" <td>1827</td>\n",
|
||
" <td>286</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>One Hundred Years of Solitude</td>\n",
|
||
" <td>Stephen King</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>1027</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1999.032967</td>\n",
|
||
" <td>3.929807</td>\n",
|
||
" <td>349.534672</td>\n",
|
||
" <td>2.363199e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9.865320</td>\n",
|
||
" <td>0.358919</td>\n",
|
||
" <td>244.871090</td>\n",
|
||
" <td>1.452470e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1876.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1997.000000</td>\n",
|
||
" <td>3.770000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.745000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.950000</td>\n",
|
||
" <td>304.000000</td>\n",
|
||
" <td>1.066000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>429.000000</td>\n",
|
||
" <td>6.084500e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>3020.000000</td>\n",
|
||
" <td>4.367341e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors categories \\\n",
|
||
"count 2639 2639 2639 \n",
|
||
"unique 2547 1827 286 \n",
|
||
"top One Hundred Years of Solitude Stephen King Fiction \n",
|
||
"freq 4 18 1027 \n",
|
||
"mean NaN NaN NaN \n",
|
||
"std NaN NaN NaN \n",
|
||
"min NaN NaN NaN \n",
|
||
"25% NaN NaN NaN \n",
|
||
"50% NaN NaN NaN \n",
|
||
"75% NaN NaN NaN \n",
|
||
"max NaN NaN NaN \n",
|
||
"\n",
|
||
" published_year average_rating num_pages ratings_count \n",
|
||
"count 2639.000000 2639.000000 2639.000000 2.639000e+03 \n",
|
||
"unique NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN \n",
|
||
"mean 1999.032967 3.929807 349.534672 2.363199e+04 \n",
|
||
"std 9.865320 0.358919 244.871090 1.452470e+05 \n",
|
||
"min 1876.000000 0.000000 0.000000 0.000000e+00 \n",
|
||
"25% 1997.000000 3.770000 208.000000 1.745000e+02 \n",
|
||
"50% 2002.000000 3.950000 304.000000 1.066000e+03 \n",
|
||
"75% 2005.000000 4.130000 429.000000 6.084500e+03 \n",
|
||
"max 2019.000000 5.000000 3020.000000 4.367341e+06 "
|
||
]
|
||
},
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books_train.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>1320</td>\n",
|
||
" <td>1320</td>\n",
|
||
" <td>1320</td>\n",
|
||
" <td>1320.000000</td>\n",
|
||
" <td>1320.000000</td>\n",
|
||
" <td>1320.000000</td>\n",
|
||
" <td>1.320000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>1303</td>\n",
|
||
" <td>1064</td>\n",
|
||
" <td>185</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>20,000 Leagues Under the Sea</td>\n",
|
||
" <td>Stephen King</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>540</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1998.590909</td>\n",
|
||
" <td>3.925470</td>\n",
|
||
" <td>339.346970</td>\n",
|
||
" <td>1.588767e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10.119569</td>\n",
|
||
" <td>0.299805</td>\n",
|
||
" <td>219.560964</td>\n",
|
||
" <td>7.877064e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1942.000000</td>\n",
|
||
" <td>2.330000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1996.000000</td>\n",
|
||
" <td>3.750000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.510000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.950000</td>\n",
|
||
" <td>304.000000</td>\n",
|
||
" <td>1.068000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>401.000000</td>\n",
|
||
" <td>6.360000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2017.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>3342.000000</td>\n",
|
||
" <td>2.115562e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors categories published_year \\\n",
|
||
"count 1320 1320 1320 1320.000000 \n",
|
||
"unique 1303 1064 185 NaN \n",
|
||
"top 20,000 Leagues Under the Sea Stephen King Fiction NaN \n",
|
||
"freq 3 7 540 NaN \n",
|
||
"mean NaN NaN NaN 1998.590909 \n",
|
||
"std NaN NaN NaN 10.119569 \n",
|
||
"min NaN NaN NaN 1942.000000 \n",
|
||
"25% NaN NaN NaN 1996.000000 \n",
|
||
"50% NaN NaN NaN 2002.000000 \n",
|
||
"75% NaN NaN NaN 2005.000000 \n",
|
||
"max NaN NaN NaN 2017.000000 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"count 1320.000000 1320.000000 1.320000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 3.925470 339.346970 1.588767e+04 \n",
|
||
"std 0.299805 219.560964 7.877064e+04 \n",
|
||
"min 2.330000 0.000000 0.000000e+00 \n",
|
||
"25% 3.750000 208.000000 1.510000e+02 \n",
|
||
"50% 3.950000 304.000000 1.068000e+03 \n",
|
||
"75% 4.130000 401.000000 6.360000e+03 \n",
|
||
"max 5.000000 3342.000000 2.115562e+06 "
|
||
]
|
||
},
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books_test.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>authors</th>\n",
|
||
" <th>categories</th>\n",
|
||
" <th>published_year</th>\n",
|
||
" <th>average_rating</th>\n",
|
||
" <th>num_pages</th>\n",
|
||
" <th>ratings_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>2640</td>\n",
|
||
" <td>2640</td>\n",
|
||
" <td>2640</td>\n",
|
||
" <td>2640.000000</td>\n",
|
||
" <td>2640.000000</td>\n",
|
||
" <td>2640.000000</td>\n",
|
||
" <td>2.640000e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>2562</td>\n",
|
||
" <td>1850</td>\n",
|
||
" <td>313</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>Three Complete Novels</td>\n",
|
||
" <td>Agatha Christie</td>\n",
|
||
" <td>Fiction</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>994</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1998.547727</td>\n",
|
||
" <td>3.935875</td>\n",
|
||
" <td>351.534470</td>\n",
|
||
" <td>2.200209e+04</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10.483752</td>\n",
|
||
" <td>0.316971</td>\n",
|
||
" <td>242.829463</td>\n",
|
||
" <td>1.558830e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1901.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>4.000000</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1996.000000</td>\n",
|
||
" <td>3.770000</td>\n",
|
||
" <td>208.000000</td>\n",
|
||
" <td>1.557500e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2002.000000</td>\n",
|
||
" <td>3.950000</td>\n",
|
||
" <td>309.500000</td>\n",
|
||
" <td>9.555000e+02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2005.000000</td>\n",
|
||
" <td>4.130000</td>\n",
|
||
" <td>430.250000</td>\n",
|
||
" <td>5.980750e+03</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>2965.000000</td>\n",
|
||
" <td>5.629932e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" title authors categories published_year \\\n",
|
||
"count 2640 2640 2640 2640.000000 \n",
|
||
"unique 2562 1850 313 NaN \n",
|
||
"top Three Complete Novels Agatha Christie Fiction NaN \n",
|
||
"freq 6 14 994 NaN \n",
|
||
"mean NaN NaN NaN 1998.547727 \n",
|
||
"std NaN NaN NaN 10.483752 \n",
|
||
"min NaN NaN NaN 1901.000000 \n",
|
||
"25% NaN NaN NaN 1996.000000 \n",
|
||
"50% NaN NaN NaN 2002.000000 \n",
|
||
"75% NaN NaN NaN 2005.000000 \n",
|
||
"max NaN NaN NaN 2019.000000 \n",
|
||
"\n",
|
||
" average_rating num_pages ratings_count \n",
|
||
"count 2640.000000 2640.000000 2.640000e+03 \n",
|
||
"unique NaN NaN NaN \n",
|
||
"top NaN NaN NaN \n",
|
||
"freq NaN NaN NaN \n",
|
||
"mean 3.935875 351.534470 2.200209e+04 \n",
|
||
"std 0.316971 242.829463 1.558830e+05 \n",
|
||
"min 0.000000 4.000000 0.000000e+00 \n",
|
||
"25% 3.770000 208.000000 1.557500e+02 \n",
|
||
"50% 3.950000 309.500000 9.555000e+02 \n",
|
||
"75% 4.130000 430.250000 5.980750e+03 \n",
|
||
"max 5.000000 2965.000000 5.629932e+06 "
|
||
]
|
||
},
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"books_val.describe(include='all')"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"author": "Tomasz Ziętkiewicz",
|
||
"celltoolbar": "Slideshow",
|
||
"email": "tomasz.zietkiewicz@amu.edu.pl",
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"lang": "pl",
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.9.2"
|
||
},
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
},
|
||
"subtitle": "2.Dane[laboratoria]",
|
||
"title": "Inżynieria uczenia maszynowego",
|
||
"toc": {
|
||
"base_numbering": 1,
|
||
"nav_menu": {},
|
||
"number_sections": false,
|
||
"sideBar": false,
|
||
"skip_h1_title": false,
|
||
"title_cell": "Table of Contents",
|
||
"title_sidebar": "Contents",
|
||
"toc_cell": false,
|
||
"toc_position": {},
|
||
"toc_section_display": false,
|
||
"toc_window_display": false
|
||
},
|
||
"year": "2021"
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|