update data formatting

This commit is contained in:
s444018 2022-03-21 09:03:16 +01:00
parent 0ac74675bc
commit fc89e512d9

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "circular-credit", "id": "municipal-plumbing",
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Skrypt do ściagnięcia zbiory danych" "### Skrypt do ściagnięcia zbiory danych"
@ -11,7 +11,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "occupational-sister", "id": "colored-lesbian",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -25,7 +25,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"id": "facial-faith", "id": "previous-oracle",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -47,7 +47,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"id": "supreme-denial", "id": "desperate-amazon",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -67,65 +67,21 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 92, "execution_count": 57,
"id": "destroyed-display", "id": "fixed-accessory",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import pandas as pd\n", "import pandas as pd\n",
"import numpy as np\n", "import numpy as np\n",
"data=pd.read_csv('imdb_top_1000.csv')\n", "data=pd.read_csv('imdb_top_1000.csv')\n",
"data"
]
},
{
"cell_type": "markdown",
"id": "aggressive-greece",
"metadata": {},
"source": [
"## Usuwanie kolumn\n",
"- Poster_Link: kolumna zawierająca linki do plakatów promujących film\n",
"- Overview: kolumna zawierająca recenzje poszczególnych filmów"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "commercial-sustainability",
"metadata": {},
"outputs": [],
"source": [
"data.drop(columns=[\"Poster_Link\"], inplace=True)\n",
"data.drop(columns=[\"Overview\"], inplace=True)\n",
"# data" "# data"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 95, "execution_count": 59,
"id": "electronic-disco", "id": "otherwise-atlas",
"metadata": {},
"outputs": [],
"source": [
"# Lowercase na polach tekstowych\n",
"data[\"Series_Title\"] = data[\"Series_Title\"].str.lower()\n",
"data[\"Genre\"] = data[\"Genre\"].str.lower()\n",
"data[\"Director\"] = data[\"Director\"].str.lower()\n",
"data[\"Star1\"] = data[\"Star1\"].str.lower()\n",
"data[\"Star2\"] = data[\"Star2\"].str.lower()\n",
"data[\"Star3\"] = data[\"Star3\"].str.lower()\n",
"data[\"Star4\"] = data[\"Star4\"].str.lower()\n",
"\n",
"# Usunięcie Nan i string to int \n",
"data = data.replace(np.nan, '', regex=True)\n",
"data[\"Gross\"] = data[\"Gross\"].str.replace(',', '')\n",
"data[\"Gross\"] = pd.to_numeric(data[\"Gross\"])"
]
},
{
"cell_type": "code",
"execution_count": 97,
"id": "chemical-middle",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -141,307 +97,91 @@
"!wc -l imdb_top_1000.csv" "!wc -l imdb_top_1000.csv"
] ]
}, },
{
"cell_type": "markdown",
"id": "strategic-brooks",
"metadata": {},
"source": [
"## Usuwanie kolumn\n",
"- Poster_Link: kolumna zawierająca linki do plakatów promujących film\n",
"- Overview: kolumna zawierająca recenzje poszczególnych filmów"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 96, "execution_count": 48,
"id": "respective-cathedral", "id": "alternative-genealogy",
"metadata": {},
"outputs": [],
"source": [
"data.drop(columns=[\"Poster_Link\"], inplace=True)\n",
"data.drop(columns=[\"Overview\"], inplace=True)\n",
"\n",
"# data"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "antique-nowhere",
"metadata": {},
"outputs": [],
"source": [
"# Lowercase na polach tekstowych\n",
"data[\"Series_Title\"] = data[\"Series_Title\"].str.lower()\n",
"data[\"Genre\"] = data[\"Genre\"].str.lower()\n",
"data[\"Director\"] = data[\"Director\"].str.lower()\n",
"data[\"Star1\"] = data[\"Star1\"].str.lower()\n",
"data[\"Star2\"] = data[\"Star2\"].str.lower()\n",
"data[\"Star3\"] = data[\"Star3\"].str.lower()\n",
"data[\"Star4\"] = data[\"Star4\"].str.lower()\n",
"\n",
"# Usunięcie Nan i string to int \n",
"data = data.replace(np.nan, '', regex=True)\n",
"data[\"Gross\"] = data[\"Gross\"].str.replace(',', '')\n",
"data[\"Gross\"] = pd.to_numeric(data[\"Gross\"], errors='coerce')\n",
"\n",
"data = data.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "furnished-dating",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Series_Title</th>\n",
" <th>Released_Year</th>\n",
" <th>Certificate</th>\n",
" <th>Runtime</th>\n",
" <th>Genre</th>\n",
" <th>IMDB_Rating</th>\n",
" <th>Meta_score</th>\n",
" <th>Director</th>\n",
" <th>Star1</th>\n",
" <th>Star2</th>\n",
" <th>Star3</th>\n",
" <th>Star4</th>\n",
" <th>No_of_Votes</th>\n",
" <th>Gross</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000.000000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1000</td>\n",
" <td>1.000000e+03</td>\n",
" <td>8.310000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>999</td>\n",
" <td>100</td>\n",
" <td>17</td>\n",
" <td>140</td>\n",
" <td>202</td>\n",
" <td>NaN</td>\n",
" <td>64</td>\n",
" <td>548</td>\n",
" <td>660</td>\n",
" <td>841</td>\n",
" <td>891</td>\n",
" <td>939</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>drishyam</td>\n",
" <td>2014</td>\n",
" <td>U</td>\n",
" <td>130 min</td>\n",
" <td>drama</td>\n",
" <td>NaN</td>\n",
" <td></td>\n",
" <td>alfred hitchcock</td>\n",
" <td>tom hanks</td>\n",
" <td>emma watson</td>\n",
" <td>rupert grint</td>\n",
" <td>michael caine</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>2</td>\n",
" <td>32</td>\n",
" <td>234</td>\n",
" <td>23</td>\n",
" <td>85</td>\n",
" <td>NaN</td>\n",
" <td>157</td>\n",
" <td>14</td>\n",
" <td>12</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.949300</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.736929e+05</td>\n",
" <td>6.803475e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.275491</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.273727e+05</td>\n",
" <td>1.097500e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.600000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.508800e+04</td>\n",
" <td>1.305000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.700000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5.552625e+04</td>\n",
" <td>3.253559e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.900000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.385485e+05</td>\n",
" <td>2.353089e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8.100000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.741612e+05</td>\n",
" <td>8.075089e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9.300000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.343110e+06</td>\n",
" <td>9.366622e+08</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [ "text/plain": [
" Series_Title Released_Year Certificate Runtime Genre IMDB_Rating \\\n", "(831, 16)"
"count 1000 1000 1000 1000 1000 1000.000000 \n",
"unique 999 100 17 140 202 NaN \n",
"top drishyam 2014 U 130 min drama NaN \n",
"freq 2 32 234 23 85 NaN \n",
"mean NaN NaN NaN NaN NaN 7.949300 \n",
"std NaN NaN NaN NaN NaN 0.275491 \n",
"min NaN NaN NaN NaN NaN 7.600000 \n",
"25% NaN NaN NaN NaN NaN 7.700000 \n",
"50% NaN NaN NaN NaN NaN 7.900000 \n",
"75% NaN NaN NaN NaN NaN 8.100000 \n",
"max NaN NaN NaN NaN NaN 9.300000 \n",
"\n",
" Meta_score Director Star1 Star2 Star3 \\\n",
"count 1000 1000 1000 1000 1000 \n",
"unique 64 548 660 841 891 \n",
"top alfred hitchcock tom hanks emma watson rupert grint \n",
"freq 157 14 12 7 5 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" Star4 No_of_Votes Gross \n",
"count 1000 1.000000e+03 8.310000e+02 \n",
"unique 939 NaN NaN \n",
"top michael caine NaN NaN \n",
"freq 4 NaN NaN \n",
"mean NaN 2.736929e+05 6.803475e+07 \n",
"std NaN 3.273727e+05 1.097500e+08 \n",
"min NaN 2.508800e+04 1.305000e+03 \n",
"25% NaN 5.552625e+04 3.253559e+06 \n",
"50% NaN 1.385485e+05 2.353089e+07 \n",
"75% NaN 3.741612e+05 8.075089e+07 \n",
"max NaN 2.343110e+06 9.366622e+08 "
] ]
}, },
"execution_count": 96, "execution_count": 60,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"data.describe(include='all')" "data.describe(include='all')\n",
"data.shape"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 98, "execution_count": 61,
"id": "comfortable-palmer", "id": "political-pension",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"IMDB_Rating 7.9\n", "IMDB_Rating 7.9\n",
"No_of_Votes 138548.5\n", "No_of_Votes 186734.0\n",
"Gross 23530892.0\n", "Gross 23530892.0\n",
"dtype: float64" "dtype: float64"
] ]
}, },
"execution_count": 98, "execution_count": 61,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -452,17 +192,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 99, "execution_count": 64,
"id": "hairy-thanksgiving", "id": "fitting-houston",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"(600, 14)\n", "(601, 16)\n",
"(200, 14)\n", "(115, 16)\n",
"(200, 14)\n" "(115, 16)\n"
] ]
} }
], ],
@ -470,8 +210,8 @@
"from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import train_test_split\n",
"import sklearn\n", "import sklearn\n",
"\n", "\n",
"data_train, data_test = train_test_split(data, test_size=400, random_state=1)\n", "data_train, data_test = train_test_split(data, test_size=230, random_state=1)\n",
"data_test, data_dev = train_test_split(data_test, test_size=200, random_state=1)\n", "data_test, data_dev = train_test_split(data_test, test_size=115, random_state=1)\n",
"print(data_train.shape)\n", "print(data_train.shape)\n",
"print(data_test.shape)\n", "print(data_test.shape)\n",
"print(data_dev.shape)" "print(data_dev.shape)"
@ -479,17 +219,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 100, "execution_count": 65,
"id": "adjacent-probe", "id": "white-livestock",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"0.3333333333333333" "0.1913477537437604"
] ]
}, },
"execution_count": 100, "execution_count": 65,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -500,8 +240,8 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 101, "execution_count": 66,
"id": "driven-withdrawal", "id": "sharp-criterion",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -525,12 +265,14 @@
" <thead>\n", " <thead>\n",
" <tr style=\"text-align: right;\">\n", " <tr style=\"text-align: right;\">\n",
" <th></th>\n", " <th></th>\n",
" <th>Poster_Link</th>\n",
" <th>Series_Title</th>\n", " <th>Series_Title</th>\n",
" <th>Released_Year</th>\n", " <th>Released_Year</th>\n",
" <th>Certificate</th>\n", " <th>Certificate</th>\n",
" <th>Runtime</th>\n", " <th>Runtime</th>\n",
" <th>Genre</th>\n", " <th>Genre</th>\n",
" <th>IMDB_Rating</th>\n", " <th>IMDB_Rating</th>\n",
" <th>Overview</th>\n",
" <th>Meta_score</th>\n", " <th>Meta_score</th>\n",
" <th>Director</th>\n", " <th>Director</th>\n",
" <th>Star1</th>\n", " <th>Star1</th>\n",
@ -544,69 +286,77 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>count</th>\n", " <th>count</th>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600.000000</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601.000000</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>600</td>\n", " <td>601</td>\n",
" <td>6.000000e+02</td>\n", " <td>601</td>\n",
" <td>5.050000e+02</td>\n", " <td>601</td>\n",
" <td>6.010000e+02</td>\n",
" <td>6.010000e+02</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>unique</th>\n", " <th>unique</th>\n",
" <td>599</td>\n", " <td>601</td>\n",
" <td>95</td>\n", " <td>601</td>\n",
" <td>17</td>\n", " <td>90</td>\n",
" <td>122</td>\n", " <td>13</td>\n",
" <td>163</td>\n", " <td>121</td>\n",
" <td>162</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>61</td>\n", " <td>601</td>\n",
" <td>399</td>\n", " <td>59</td>\n",
" <td>448</td>\n", " <td>378</td>\n",
" <td>535</td>\n", " <td>438</td>\n",
" <td>545</td>\n", " <td>530</td>\n",
" <td>580</td>\n", " <td>556</td>\n",
" <td>577</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>top</th>\n", " <th>top</th>\n",
" <td>drishyam</td>\n", " <td>https://m.media-amazon.com/images/M/MV5BNGYyZG...</td>\n",
" <td>2004</td>\n", " <td>what ever happened to baby jane?</td>\n",
" <td>2014</td>\n",
" <td>U</td>\n", " <td>U</td>\n",
" <td>130 min</td>\n", " <td>101 min</td>\n",
" <td>drama</td>\n", " <td>drama</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>A few friends have a weekly fools' dinner, whe...</td>\n",
" <td></td>\n", " <td></td>\n",
" <td>david fincher</td>\n", " <td>martin scorsese</td>\n",
" <td>robert de niro</td>\n", " <td>clint eastwood</td>\n",
" <td>emma watson</td>\n", " <td>emma watson</td>\n",
" <td>rupert grint</td>\n", " <td>joe pesci</td>\n",
" <td>michael caine</td>\n", " <td>michael caine</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>freq</th>\n", " <th>freq</th>\n",
" <td>2</td>\n", " <td>1</td>\n",
" <td>1</td>\n",
" <td>22</td>\n", " <td>22</td>\n",
" <td>133</td>\n", " <td>143</td>\n",
" <td>17</td>\n", " <td>17</td>\n",
" <td>45</td>\n", " <td>53</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>88</td>\n", " <td>1</td>\n",
" <td>8</td>\n", " <td>53</td>\n",
" <td>8</td>\n", " <td>10</td>\n",
" <td>6</td>\n", " <td>10</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>4</td>\n", " <td>4</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" </tr>\n", " </tr>\n",
@ -617,15 +367,17 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>7.947167</td>\n", " <td>NaN</td>\n",
" <td>7.947920</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>2.765041e+05</td>\n", " <td>NaN</td>\n",
" <td>6.726714e+07</td>\n", " <td>3.174649e+05</td>\n",
" <td>6.775699e+07</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>std</th>\n", " <th>std</th>\n",
@ -634,15 +386,17 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>0.269282</td>\n", " <td>NaN</td>\n",
" <td>0.280238</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>3.219888e+05</td>\n", " <td>NaN</td>\n",
" <td>1.076309e+08</td>\n", " <td>3.407094e+05</td>\n",
" <td>1.095511e+08</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>min</th>\n", " <th>min</th>\n",
@ -651,6 +405,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.600000</td>\n", " <td>7.600000</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -658,6 +413,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.508800e+04</td>\n", " <td>2.508800e+04</td>\n",
" <td>1.305000e+03</td>\n", " <td>1.305000e+03</td>\n",
" </tr>\n", " </tr>\n",
@ -668,6 +424,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.700000</td>\n", " <td>7.700000</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -675,8 +432,9 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>5.705600e+04</td>\n", " <td>NaN</td>\n",
" <td>3.108485e+06</td>\n", " <td>6.846300e+04</td>\n",
" <td>3.151130e+06</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>50%</th>\n", " <th>50%</th>\n",
@ -685,6 +443,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.900000</td>\n", " <td>7.900000</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -692,8 +451,9 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>1.414865e+05</td>\n", " <td>NaN</td>\n",
" <td>2.447542e+07</td>\n", " <td>1.897160e+05</td>\n",
" <td>2.365000e+07</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>75%</th>\n", " <th>75%</th>\n",
@ -702,6 +462,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8.100000</td>\n", " <td>8.100000</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -709,8 +470,9 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>3.907975e+05</td>\n", " <td>NaN</td>\n",
" <td>8.340000e+07</td>\n", " <td>4.622520e+05</td>\n",
" <td>7.891296e+07</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>max</th>\n", " <th>max</th>\n",
@ -719,6 +481,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9.200000</td>\n", " <td>9.200000</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -726,55 +489,82 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>1.854740e+06</td>\n", " <td>NaN</td>\n",
" <td>9.366622e+08</td>\n", " <td>2.303232e+06</td>\n",
" <td>8.583730e+08</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" Series_Title Released_Year Certificate Runtime Genre IMDB_Rating \\\n", " Poster_Link \\\n",
"count 600 600 600 600 600 600.000000 \n", "count 601 \n",
"unique 599 95 17 122 163 NaN \n", "unique 601 \n",
"top drishyam 2004 U 130 min drama NaN \n", "top https://m.media-amazon.com/images/M/MV5BNGYyZG... \n",
"freq 2 22 133 17 45 NaN \n", "freq 1 \n",
"mean NaN NaN NaN NaN NaN 7.947167 \n", "mean NaN \n",
"std NaN NaN NaN NaN NaN 0.269282 \n", "std NaN \n",
"min NaN NaN NaN NaN NaN 7.600000 \n", "min NaN \n",
"25% NaN NaN NaN NaN NaN 7.700000 \n", "25% NaN \n",
"50% NaN NaN NaN NaN NaN 7.900000 \n", "50% NaN \n",
"75% NaN NaN NaN NaN NaN 8.100000 \n", "75% NaN \n",
"max NaN NaN NaN NaN NaN 9.200000 \n", "max NaN \n",
"\n", "\n",
" Meta_score Director Star1 Star2 Star3 \\\n", " Series_Title Released_Year Certificate Runtime \\\n",
"count 600 600 600 600 600 \n", "count 601 601 601 601 \n",
"unique 61 399 448 535 545 \n", "unique 601 90 13 121 \n",
"top david fincher robert de niro emma watson rupert grint \n", "top what ever happened to baby jane? 2014 U 101 min \n",
"freq 88 8 8 6 4 \n", "freq 1 22 143 17 \n",
"mean NaN NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n",
"\n",
" Genre IMDB_Rating Overview \\\n",
"count 601 601.000000 601 \n",
"unique 162 NaN 601 \n",
"top drama NaN A few friends have a weekly fools' dinner, whe... \n",
"freq 53 NaN 1 \n",
"mean NaN 7.947920 NaN \n",
"std NaN 0.280238 NaN \n",
"min NaN 7.600000 NaN \n",
"25% NaN 7.700000 NaN \n",
"50% NaN 7.900000 NaN \n",
"75% NaN 8.100000 NaN \n",
"max NaN 9.200000 NaN \n",
"\n",
" Meta_score Director Star1 Star2 Star3 \\\n",
"count 601 601 601 601 601 \n",
"unique 59 378 438 530 556 \n",
"top martin scorsese clint eastwood emma watson joe pesci \n",
"freq 53 10 10 5 4 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n", "\n",
" Star4 No_of_Votes Gross \n", " Star4 No_of_Votes Gross \n",
"count 600 6.000000e+02 5.050000e+02 \n", "count 601 6.010000e+02 6.010000e+02 \n",
"unique 580 NaN NaN \n", "unique 577 NaN NaN \n",
"top michael caine NaN NaN \n", "top michael caine NaN NaN \n",
"freq 3 NaN NaN \n", "freq 4 NaN NaN \n",
"mean NaN 2.765041e+05 6.726714e+07 \n", "mean NaN 3.174649e+05 6.775699e+07 \n",
"std NaN 3.219888e+05 1.076309e+08 \n", "std NaN 3.407094e+05 1.095511e+08 \n",
"min NaN 2.508800e+04 1.305000e+03 \n", "min NaN 2.508800e+04 1.305000e+03 \n",
"25% NaN 5.705600e+04 3.108485e+06 \n", "25% NaN 6.846300e+04 3.151130e+06 \n",
"50% NaN 1.414865e+05 2.447542e+07 \n", "50% NaN 1.897160e+05 2.365000e+07 \n",
"75% NaN 3.907975e+05 8.340000e+07 \n", "75% NaN 4.622520e+05 7.891296e+07 \n",
"max NaN 1.854740e+06 9.366622e+08 " "max NaN 2.303232e+06 8.583730e+08 "
] ]
}, },
"execution_count": 101, "execution_count": 66,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -786,7 +576,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 102, "execution_count": 102,
"id": "bigger-brazil", "id": "excessive-congress",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1071,7 +861,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 103, "execution_count": 103,
"id": "egyptian-cooper", "id": "exact-prince",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1356,7 +1146,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 105, "execution_count": 105,
"id": "affecting-berkeley", "id": "modified-potential",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1368,7 +1158,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "interesting-pathology", "id": "finnish-burning",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []