ium_151636/zadanie1.ipynb

1596 lines
68 KiB
Plaintext
Raw Normal View History

2023-05-11 20:49:42 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie title</th>\n",
" <th>Run Time</th>\n",
" <th>Rating</th>\n",
" <th>User Rating</th>\n",
" <th>Generes</th>\n",
" <th>Overview</th>\n",
" <th>Plot Kyeword</th>\n",
" <th>Director</th>\n",
" <th>Top 5 Casts</th>\n",
" <th>Writer</th>\n",
" <th>year</th>\n",
" <th>path</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Top Gun: Maverick</td>\n",
" <td>$170,000,000 (estimated)</td>\n",
" <td>8.6</td>\n",
" <td>187K</td>\n",
" <td>['Action', 'Drama']</td>\n",
" <td>After more than thirty years of service as one...</td>\n",
" <td>['fighter jet', 'sequel', 'u.s. navy', 'fighte...</td>\n",
" <td>Joseph Kosinski</td>\n",
" <td>['Jack Epps Jr.', 'Peter Craig', 'Tom Cruise',...</td>\n",
" <td>Jim Cash</td>\n",
" <td>-2022</td>\n",
" <td>/title/tt1745960/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Jurassic World Dominion</td>\n",
" <td>2 hours 27 minutes</td>\n",
" <td>6</td>\n",
" <td>56K</td>\n",
" <td>['Action', 'Adventure', 'Sci-Fi']</td>\n",
" <td>Four years after the destruction of Isla Nubla...</td>\n",
" <td>['dinosaur', 'jurassic park', 'tyrannosaurus r...</td>\n",
" <td>Colin Trevorrow</td>\n",
" <td>['Colin Trevorrow', 'Derek Connolly', 'Chris P...</td>\n",
" <td>Emily Carmichael</td>\n",
" <td>-2022</td>\n",
" <td>/title/tt8041270/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Top Gun</td>\n",
" <td>$15,000,000 (estimated)</td>\n",
" <td>6.9</td>\n",
" <td>380K</td>\n",
" <td>['Action', 'Drama']</td>\n",
" <td>As students at the United States Navy's elite ...</td>\n",
" <td>['pilot', 'male camaraderie', 'u.s. navy', 'gr...</td>\n",
" <td>Tony Scott</td>\n",
" <td>['Jack Epps Jr.', 'Ehud Yonay', 'Tom Cruise', ...</td>\n",
" <td>Jim Cash</td>\n",
" <td>-1986</td>\n",
" <td>/title/tt0092099/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Lightyear</td>\n",
" <td>$71,101,257</td>\n",
" <td>5.2</td>\n",
" <td>32K</td>\n",
" <td>['Animation', 'Action', 'Adventure']</td>\n",
" <td>While spending years attempting to return home...</td>\n",
" <td>['galaxy', 'spaceship', 'robot', 'rocket', 'sp...</td>\n",
" <td>Angus MacLane</td>\n",
" <td>['Jason Headley', 'Matthew Aldrich', 'Chris Ev...</td>\n",
" <td>Angus MacLane</td>\n",
" <td>-2022</td>\n",
" <td>/title/tt10298810/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Spiderhead</td>\n",
" <td>not-released</td>\n",
" <td>5.4</td>\n",
" <td>23K</td>\n",
" <td>['Action', 'Crime', 'Drama']</td>\n",
" <td>In the near future, convicts are offered the c...</td>\n",
" <td>['discover', 'medical', 'test', 'reality', 'fi...</td>\n",
" <td>Joseph Kosinski</td>\n",
" <td>['Rhett Reese', 'Paul Wernick', 'Chris Hemswor...</td>\n",
" <td>George Saunders</td>\n",
" <td>-2022</td>\n",
" <td>/title/tt9783600/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24397</th>\n",
" <td>Delicatessen</td>\n",
" <td>FRF 24,000,000 (estimated)</td>\n",
" <td>7.6</td>\n",
" <td>85K</td>\n",
" <td>['Comedy', 'Crime']</td>\n",
" <td>Post-apocalyptic surrealist black comedy about...</td>\n",
" <td>['surrealist', 'black comedy', 'human meat', '...</td>\n",
" <td>Marc Caro</td>\n",
" <td>['Jean-Pierre Jeunet', 'Marc Caro', 'Gilles Ad...</td>\n",
" <td>Jean-Pierre Jeunet</td>\n",
" <td>-1991</td>\n",
" <td>/title/tt0101700/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24398</th>\n",
" <td>Bitch Ass</td>\n",
" <td>not-released</td>\n",
" <td>5.5</td>\n",
" <td>52</td>\n",
" <td>['Crime', 'Horror']</td>\n",
" <td>A gang initiation goes wrong when a group of f...</td>\n",
" <td>[]</td>\n",
" <td>Bill Posley</td>\n",
" <td>['Bill Posley', 'Teon Kelley', 'Tunde Laleye',...</td>\n",
" <td>Jonathan Colomb</td>\n",
" <td>-2022</td>\n",
" <td>/title/tt13991504/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24399</th>\n",
" <td>Bullwhip</td>\n",
" <td>not-released</td>\n",
" <td>5.1</td>\n",
" <td>398</td>\n",
" <td>['Crime', 'Romance', 'Western']</td>\n",
" <td>In order to avoid the hangman's noose, a cowbo...</td>\n",
" <td>['taming of the shrew', 'fur trader', 'busines...</td>\n",
" <td>Harmon Jones</td>\n",
" <td>['Guy Madison', 'Rhonda Fleming', 'James Griff...</td>\n",
" <td>Adele Buffington</td>\n",
" <td>-1958</td>\n",
" <td>/title/tt0051438/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24400</th>\n",
" <td>The Freshman</td>\n",
" <td>1 hour 42 minutes</td>\n",
" <td>6.4</td>\n",
" <td>20K</td>\n",
" <td>['Comedy', 'Crime']</td>\n",
" <td>An N.Y.C. film school student accepts a job wi...</td>\n",
" <td>['endangered species', 'fish out of water', 'g...</td>\n",
" <td>Andrew Bergman</td>\n",
" <td>['Marlon Brando', 'Matthew Broderick', 'Bruno ...</td>\n",
" <td>Andrew Bergman</td>\n",
" <td>-1990</td>\n",
" <td>/title/tt0099615/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24401</th>\n",
" <td>Guys and Dolls</td>\n",
" <td>$5,500,000 (estimated)</td>\n",
" <td>7.1</td>\n",
" <td>18K</td>\n",
" <td>['Comedy', 'Crime', 'Musical']</td>\n",
" <td>In New York, a gambler is challenged to take a...</td>\n",
" <td>['mission', 'gambler', 'new york city', 'based...</td>\n",
" <td>Joseph L. Mankiewicz</td>\n",
" <td>['Abe Burrows', 'Damon Runyon', 'Marlon Brando...</td>\n",
" <td>Jo Swerling</td>\n",
" <td>-1955</td>\n",
" <td>/title/tt0048140/</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>24402 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" movie title Run Time Rating User Rating \\\n",
"0 Top Gun: Maverick $170,000,000 (estimated) 8.6 187K \n",
"1 Jurassic World Dominion 2 hours 27 minutes 6 56K \n",
"2 Top Gun $15,000,000 (estimated) 6.9 380K \n",
"3 Lightyear $71,101,257 5.2 32K \n",
"4 Spiderhead not-released 5.4 23K \n",
"... ... ... ... ... \n",
"24397 Delicatessen FRF 24,000,000 (estimated) 7.6 85K \n",
"24398 Bitch Ass not-released 5.5 52 \n",
"24399 Bullwhip not-released 5.1 398 \n",
"24400 The Freshman 1 hour 42 minutes 6.4 20K \n",
"24401 Guys and Dolls $5,500,000 (estimated) 7.1 18K \n",
"\n",
" Generes \\\n",
"0 ['Action', 'Drama'] \n",
"1 ['Action', 'Adventure', 'Sci-Fi'] \n",
"2 ['Action', 'Drama'] \n",
"3 ['Animation', 'Action', 'Adventure'] \n",
"4 ['Action', 'Crime', 'Drama'] \n",
"... ... \n",
"24397 ['Comedy', 'Crime'] \n",
"24398 ['Crime', 'Horror'] \n",
"24399 ['Crime', 'Romance', 'Western'] \n",
"24400 ['Comedy', 'Crime'] \n",
"24401 ['Comedy', 'Crime', 'Musical'] \n",
"\n",
" Overview \\\n",
"0 After more than thirty years of service as one... \n",
"1 Four years after the destruction of Isla Nubla... \n",
"2 As students at the United States Navy's elite ... \n",
"3 While spending years attempting to return home... \n",
"4 In the near future, convicts are offered the c... \n",
"... ... \n",
"24397 Post-apocalyptic surrealist black comedy about... \n",
"24398 A gang initiation goes wrong when a group of f... \n",
"24399 In order to avoid the hangman's noose, a cowbo... \n",
"24400 An N.Y.C. film school student accepts a job wi... \n",
"24401 In New York, a gambler is challenged to take a... \n",
"\n",
" Plot Kyeword \\\n",
"0 ['fighter jet', 'sequel', 'u.s. navy', 'fighte... \n",
"1 ['dinosaur', 'jurassic park', 'tyrannosaurus r... \n",
"2 ['pilot', 'male camaraderie', 'u.s. navy', 'gr... \n",
"3 ['galaxy', 'spaceship', 'robot', 'rocket', 'sp... \n",
"4 ['discover', 'medical', 'test', 'reality', 'fi... \n",
"... ... \n",
"24397 ['surrealist', 'black comedy', 'human meat', '... \n",
"24398 [] \n",
"24399 ['taming of the shrew', 'fur trader', 'busines... \n",
"24400 ['endangered species', 'fish out of water', 'g... \n",
"24401 ['mission', 'gambler', 'new york city', 'based... \n",
"\n",
" Director \\\n",
"0 Joseph Kosinski \n",
"1 Colin Trevorrow \n",
"2 Tony Scott \n",
"3 Angus MacLane \n",
"4 Joseph Kosinski \n",
"... ... \n",
"24397 Marc Caro \n",
"24398 Bill Posley \n",
"24399 Harmon Jones \n",
"24400 Andrew Bergman \n",
"24401 Joseph L. Mankiewicz \n",
"\n",
" Top 5 Casts Writer \\\n",
"0 ['Jack Epps Jr.', 'Peter Craig', 'Tom Cruise',... Jim Cash \n",
"1 ['Colin Trevorrow', 'Derek Connolly', 'Chris P... Emily Carmichael \n",
"2 ['Jack Epps Jr.', 'Ehud Yonay', 'Tom Cruise', ... Jim Cash \n",
"3 ['Jason Headley', 'Matthew Aldrich', 'Chris Ev... Angus MacLane \n",
"4 ['Rhett Reese', 'Paul Wernick', 'Chris Hemswor... George Saunders \n",
"... ... ... \n",
"24397 ['Jean-Pierre Jeunet', 'Marc Caro', 'Gilles Ad... Jean-Pierre Jeunet \n",
"24398 ['Bill Posley', 'Teon Kelley', 'Tunde Laleye',... Jonathan Colomb \n",
"24399 ['Guy Madison', 'Rhonda Fleming', 'James Griff... Adele Buffington \n",
"24400 ['Marlon Brando', 'Matthew Broderick', 'Bruno ... Andrew Bergman \n",
"24401 ['Abe Burrows', 'Damon Runyon', 'Marlon Brando... Jo Swerling \n",
"\n",
" year path \n",
"0 -2022 /title/tt1745960/ \n",
"1 -2022 /title/tt8041270/ \n",
"2 -1986 /title/tt0092099/ \n",
"3 -2022 /title/tt10298810/ \n",
"4 -2022 /title/tt9783600/ \n",
"... ... ... \n",
"24397 -1991 /title/tt0101700/ \n",
"24398 -2022 /title/tt13991504/ \n",
"24399 -1958 /title/tt0051438/ \n",
"24400 -1990 /title/tt0099615/ \n",
"24401 -1955 /title/tt0048140/ \n",
"\n",
"[24402 rows x 12 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"movies = pd.read_csv(\"25k_IMDb_movie_Dataset.csv\")\n",
"movies"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"train, dev, test = np.split(movies.sample(frac=1, random_state=69), [int(.6*len(movies)), int(.8*len(movies))])\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie title</th>\n",
" <th>Run Time</th>\n",
" <th>Rating</th>\n",
" <th>User Rating</th>\n",
" <th>Generes</th>\n",
" <th>Overview</th>\n",
" <th>Plot Kyeword</th>\n",
" <th>Director</th>\n",
" <th>Top 5 Casts</th>\n",
" <th>Writer</th>\n",
" <th>year</th>\n",
" <th>path</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4782</th>\n",
" <td>Golmaal: Fun Unlimited</td>\n",
" <td>2 hours 30 minutes</td>\n",
" <td>7.4</td>\n",
" <td>17K</td>\n",
" <td>['Action', 'Comedy', 'Drama']</td>\n",
" <td>Four runaway crooks take shelter in a bungalow...</td>\n",
" <td>['blind couple', 'friend', 'mute', 'slacker', ...</td>\n",
" <td>Rohit Shetty</td>\n",
" <td>['Ajay Devgn', 'Arshad Warsi', 'Sharman Joshi'...</td>\n",
" <td>Neeraj Vora</td>\n",
" <td>-2006</td>\n",
" <td>/title/tt0495034/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9291</th>\n",
" <td>The Remains of the Day</td>\n",
" <td>2 hours 14 minutes</td>\n",
" <td>7.8</td>\n",
" <td>74K</td>\n",
" <td>['Drama', 'Romance']</td>\n",
" <td>A butler who sacrificed body and soul to servi...</td>\n",
" <td>['class differences', 'butler', 'housekeeper',...</td>\n",
" <td>James Ivory</td>\n",
" <td>['Ruth Prawer Jhabvala', 'Anthony Hopkins', 'E...</td>\n",
" <td>Kazuo Ishiguro</td>\n",
" <td>-1993</td>\n",
" <td>/title/tt0107943/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3137</th>\n",
" <td>Blue Iguana</td>\n",
" <td>1 hour 40 minutes</td>\n",
" <td>5.6</td>\n",
" <td>5.2K</td>\n",
" <td>['Action', 'Comedy', 'Crime']</td>\n",
" <td>Ex-jailbirds Eddie and Paul are on parole and ...</td>\n",
" <td>['singing in a car', 'reference to the world c...</td>\n",
" <td>Hadi Hajaig</td>\n",
" <td>['Sam Rockwell', 'Phoebe Fox', 'Ben Schwartz',...</td>\n",
" <td>Hadi Hajaig</td>\n",
" <td>-2018</td>\n",
" <td>/title/tt2316479/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17052</th>\n",
" <td>Nocturna</td>\n",
" <td>1 hour 20 minutes</td>\n",
" <td>7.2</td>\n",
" <td>2.4K</td>\n",
" <td>['Animation', 'Adventure', 'Family']</td>\n",
" <td>An orphan boy named Tim is afraid of the dark....</td>\n",
" <td>['orphan', 'night', 'cat', 'one word title', '...</td>\n",
" <td>Adrià García</td>\n",
" <td>['Adrià García', 'Víctor Maldonado', 'Teresa V...</td>\n",
" <td>Víctor Maldonado</td>\n",
" <td>-2007</td>\n",
" <td>/title/tt0836682/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>675</th>\n",
" <td>Alexander</td>\n",
" <td>$155,000,000 (estimated)</td>\n",
" <td>5.6</td>\n",
" <td>169K</td>\n",
" <td>['Action', 'Biography', 'Drama']</td>\n",
" <td>Alexander, the King of Macedonia and one of th...</td>\n",
" <td>['ancient greece', 'greek', 'macedonia', 'sex ...</td>\n",
" <td>Oliver Stone</td>\n",
" <td>['Christopher Kyle', 'Laeta Kalogridis', 'Coli...</td>\n",
" <td>Oliver Stone</td>\n",
" <td>-2004</td>\n",
" <td>/title/tt0346491/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6365</th>\n",
" <td>Yalghaar</td>\n",
" <td>2 hours 38 minutes</td>\n",
" <td>6.4</td>\n",
" <td>1K</td>\n",
" <td>['Action', 'Romance', 'War']</td>\n",
" <td>The film \"explores what happens in the lives o...</td>\n",
" <td>['live']</td>\n",
" <td>Hassan Rana</td>\n",
" <td>['Shaan Shahid', 'Humayun Saeed', 'Adnan Siddi...</td>\n",
" <td>Hassan Rana</td>\n",
" <td>-2017</td>\n",
" <td>/title/tt3945864/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7172</th>\n",
" <td>Brown of Harvard</td>\n",
" <td>not-released</td>\n",
" <td>6.2</td>\n",
" <td>1.5K</td>\n",
" <td>['Action', 'Drama', 'Romance']</td>\n",
" <td>Tom Brown shows up at Harvard, confident and a...</td>\n",
" <td>['harvard', 'no homoeroticism', 'pre code film...</td>\n",
" <td>Jack Conway</td>\n",
" <td>['Donald Ogden Stewart', 'Andrew Percival Youn...</td>\n",
" <td>Rida Johnson Young</td>\n",
" <td>-1926</td>\n",
" <td>/title/tt0016690/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12644</th>\n",
" <td>Deseo</td>\n",
" <td>1 hour 37 minutes</td>\n",
" <td>4.6</td>\n",
" <td>417</td>\n",
" <td>['Comedy', 'Drama', 'Romance']</td>\n",
" <td>A succession of erotic encounters weaved into ...</td>\n",
" <td>['smoking marijuana', 'female full frontal nud...</td>\n",
" <td>Antonio Zavala Kugler</td>\n",
" <td>['Antonio Zavala Kugler', 'Christian Bach', 'A...</td>\n",
" <td>Arthur Schnitzler</td>\n",
" <td>-2013</td>\n",
" <td>/title/tt1236434/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23970</th>\n",
" <td>Saezuru Tori Wa Habatakanai: The Clouds Gather</td>\n",
" <td>1 hour 25 minutes</td>\n",
" <td>6.9</td>\n",
" <td>681</td>\n",
" <td>['Animation', 'Crime', 'Drama']</td>\n",
" <td>Yashiro is the president of the Shinseikai Ent...</td>\n",
" <td>['yaoi', 'boys love', 'gay', 'anime', 'yakuza'...</td>\n",
" <td>Kaori Makita</td>\n",
" <td>['Kou Yoneda', 'Tarusuke Shingaki', 'Wataru Ha...</td>\n",
" <td>Hiroshi Seko</td>\n",
" <td>-2020</td>\n",
" <td>/title/tt10675392/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13423</th>\n",
" <td>99</td>\n",
" <td>2 hours 15 minutes</td>\n",
" <td>7.3</td>\n",
" <td>2.9K</td>\n",
" <td>['Comedy', 'Crime', 'Drama']</td>\n",
" <td>A gangster deputes two of his men to recover m...</td>\n",
" <td>['cricket the sport', 'briefcase', 'caper', 'e...</td>\n",
" <td>Krishna D.K.</td>\n",
" <td>['Raj Nidimoru', 'Krishna D.K.', 'Sita Menon',...</td>\n",
" <td>Raj Nidimoru</td>\n",
" <td>(I) (2009)</td>\n",
" <td>/title/tt1370429/</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>14641 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" movie title \\\n",
"4782 Golmaal: Fun Unlimited \n",
"9291 The Remains of the Day \n",
"3137 Blue Iguana \n",
"17052 Nocturna \n",
"675 Alexander \n",
"... ... \n",
"6365 Yalghaar \n",
"7172 Brown of Harvard \n",
"12644 Deseo \n",
"23970 Saezuru Tori Wa Habatakanai: The Clouds Gather \n",
"13423 99 \n",
"\n",
" Run Time Rating User Rating \\\n",
"4782 2 hours 30 minutes 7.4 17K \n",
"9291 2 hours 14 minutes 7.8 74K \n",
"3137 1 hour 40 minutes 5.6 5.2K \n",
"17052 1 hour 20 minutes 7.2 2.4K \n",
"675 $155,000,000 (estimated) 5.6 169K \n",
"... ... ... ... \n",
"6365 2 hours 38 minutes 6.4 1K \n",
"7172 not-released 6.2 1.5K \n",
"12644 1 hour 37 minutes 4.6 417 \n",
"23970 1 hour 25 minutes 6.9 681 \n",
"13423 2 hours 15 minutes 7.3 2.9K \n",
"\n",
" Generes \\\n",
"4782 ['Action', 'Comedy', 'Drama'] \n",
"9291 ['Drama', 'Romance'] \n",
"3137 ['Action', 'Comedy', 'Crime'] \n",
"17052 ['Animation', 'Adventure', 'Family'] \n",
"675 ['Action', 'Biography', 'Drama'] \n",
"... ... \n",
"6365 ['Action', 'Romance', 'War'] \n",
"7172 ['Action', 'Drama', 'Romance'] \n",
"12644 ['Comedy', 'Drama', 'Romance'] \n",
"23970 ['Animation', 'Crime', 'Drama'] \n",
"13423 ['Comedy', 'Crime', 'Drama'] \n",
"\n",
" Overview \\\n",
"4782 Four runaway crooks take shelter in a bungalow... \n",
"9291 A butler who sacrificed body and soul to servi... \n",
"3137 Ex-jailbirds Eddie and Paul are on parole and ... \n",
"17052 An orphan boy named Tim is afraid of the dark.... \n",
"675 Alexander, the King of Macedonia and one of th... \n",
"... ... \n",
"6365 The film \"explores what happens in the lives o... \n",
"7172 Tom Brown shows up at Harvard, confident and a... \n",
"12644 A succession of erotic encounters weaved into ... \n",
"23970 Yashiro is the president of the Shinseikai Ent... \n",
"13423 A gangster deputes two of his men to recover m... \n",
"\n",
" Plot Kyeword \\\n",
"4782 ['blind couple', 'friend', 'mute', 'slacker', ... \n",
"9291 ['class differences', 'butler', 'housekeeper',... \n",
"3137 ['singing in a car', 'reference to the world c... \n",
"17052 ['orphan', 'night', 'cat', 'one word title', '... \n",
"675 ['ancient greece', 'greek', 'macedonia', 'sex ... \n",
"... ... \n",
"6365 ['live'] \n",
"7172 ['harvard', 'no homoeroticism', 'pre code film... \n",
"12644 ['smoking marijuana', 'female full frontal nud... \n",
"23970 ['yaoi', 'boys love', 'gay', 'anime', 'yakuza'... \n",
"13423 ['cricket the sport', 'briefcase', 'caper', 'e... \n",
"\n",
" Director \\\n",
"4782 Rohit Shetty \n",
"9291 James Ivory \n",
"3137 Hadi Hajaig \n",
"17052 Adrià García \n",
"675 Oliver Stone \n",
"... ... \n",
"6365 Hassan Rana \n",
"7172 Jack Conway \n",
"12644 Antonio Zavala Kugler \n",
"23970 Kaori Makita \n",
"13423 Krishna D.K. \n",
"\n",
" Top 5 Casts Writer \\\n",
"4782 ['Ajay Devgn', 'Arshad Warsi', 'Sharman Joshi'... Neeraj Vora \n",
"9291 ['Ruth Prawer Jhabvala', 'Anthony Hopkins', 'E... Kazuo Ishiguro \n",
"3137 ['Sam Rockwell', 'Phoebe Fox', 'Ben Schwartz',... Hadi Hajaig \n",
"17052 ['Adrià García', 'Víctor Maldonado', 'Teresa V... Víctor Maldonado \n",
"675 ['Christopher Kyle', 'Laeta Kalogridis', 'Coli... Oliver Stone \n",
"... ... ... \n",
"6365 ['Shaan Shahid', 'Humayun Saeed', 'Adnan Siddi... Hassan Rana \n",
"7172 ['Donald Ogden Stewart', 'Andrew Percival Youn... Rida Johnson Young \n",
"12644 ['Antonio Zavala Kugler', 'Christian Bach', 'A... Arthur Schnitzler \n",
"23970 ['Kou Yoneda', 'Tarusuke Shingaki', 'Wataru Ha... Hiroshi Seko \n",
"13423 ['Raj Nidimoru', 'Krishna D.K.', 'Sita Menon',... Raj Nidimoru \n",
"\n",
" year path \n",
"4782 -2006 /title/tt0495034/ \n",
"9291 -1993 /title/tt0107943/ \n",
"3137 -2018 /title/tt2316479/ \n",
"17052 -2007 /title/tt0836682/ \n",
"675 -2004 /title/tt0346491/ \n",
"... ... ... \n",
"6365 -2017 /title/tt3945864/ \n",
"7172 -1926 /title/tt0016690/ \n",
"12644 -2013 /title/tt1236434/ \n",
"23970 -2020 /title/tt10675392/ \n",
"13423 (I) (2009) /title/tt1370429/ \n",
"\n",
"[14641 rows x 12 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie title</th>\n",
" <th>Run Time</th>\n",
" <th>Rating</th>\n",
" <th>User Rating</th>\n",
" <th>Generes</th>\n",
" <th>Overview</th>\n",
" <th>Plot Kyeword</th>\n",
" <th>Director</th>\n",
" <th>Top 5 Casts</th>\n",
" <th>Writer</th>\n",
" <th>year</th>\n",
" <th>path</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>20683</th>\n",
" <td>The Animals</td>\n",
" <td>not-released</td>\n",
" <td>4.2</td>\n",
" <td>398</td>\n",
" <td>['Western']</td>\n",
" <td>A woman tracks down the five men who raped her...</td>\n",
" <td>['rape and revenge', 'arizona territory', 'ari...</td>\n",
" <td>Ron Joy</td>\n",
" <td>['Michele Carey', 'Henry Silva', 'Keenan Wynn'...</td>\n",
" <td>Richard Bakalyan</td>\n",
" <td>-1970</td>\n",
" <td>/title/tt0065407/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8040</th>\n",
" <td>The Lincoln Lawyer</td>\n",
" <td>$40,000,000 (estimated)</td>\n",
" <td>7.3</td>\n",
" <td>236K</td>\n",
" <td>['Crime', 'Drama', 'Mystery']</td>\n",
" <td>A lawyer defending a wealthy man begins to bel...</td>\n",
" <td>['defense lawyer', 'plot twist', 'drug rehabil...</td>\n",
" <td>Brad Furman</td>\n",
" <td>['Michael Connelly', 'Matthew McConaughey', 'M...</td>\n",
" <td>John Romano</td>\n",
" <td>-2011</td>\n",
" <td>/title/tt1189340/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23664</th>\n",
" <td>What's the Worst That Could Happen?</td>\n",
" <td>1 hour 34 minutes</td>\n",
" <td>5.4</td>\n",
" <td>16K</td>\n",
" <td>['Comedy', 'Crime']</td>\n",
" <td>A rich man catches a thief burglarizing his ho...</td>\n",
" <td>['breaking and entering', 'bankruptcy', 'quest...</td>\n",
" <td>Sam Weisman</td>\n",
" <td>['Matthew Chapman', 'Martin Lawrence', 'Danny ...</td>\n",
" <td>Donald E. Westlake</td>\n",
" <td>-2001</td>\n",
" <td>/title/tt0161083/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9078</th>\n",
" <td>Cleopatra</td>\n",
" <td>not-released</td>\n",
" <td>6.2</td>\n",
" <td>652</td>\n",
" <td>['Animation', 'History', 'Romance']</td>\n",
" <td>In order to foil the enemy aliens' \"Cleopatra ...</td>\n",
" <td>['adult anime', 'adult animation', 'anime', 'd...</td>\n",
" <td>Osamu Tezuka</td>\n",
" <td>['Osamu Tezuka', 'Shigemi Satoyoshi', 'Chinats...</td>\n",
" <td>Eiichi Yamamoto</td>\n",
" <td>-1963</td>\n",
" <td>/title/tt0056937/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17118</th>\n",
" <td>Seven Cities of Gold</td>\n",
" <td>1 hour 43 minutes</td>\n",
" <td>5.9</td>\n",
" <td>479</td>\n",
" <td>['Adventure', 'Biography', 'History']</td>\n",
" <td>In 1769, a Spanish expedition to California se...</td>\n",
" <td>['limping man', 'prologue', 'voice over narrat...</td>\n",
" <td>Robert D. Webb</td>\n",
" <td>['John C. Higgins', 'Joseph Petracca', 'Richar...</td>\n",
" <td>Richard L. Breen</td>\n",
" <td>-1955</td>\n",
" <td>/title/tt0048603/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1248</th>\n",
" <td>The Wall</td>\n",
" <td>1 hour 28 minutes</td>\n",
" <td>6.2</td>\n",
" <td>27K</td>\n",
" <td>['Action', 'Drama', 'Thriller']</td>\n",
" <td>Two American Soldiers are trapped by a lethal ...</td>\n",
" <td>['sniper', 'soldier', 'deception', 'wound', 'd...</td>\n",
" <td>Doug Liman</td>\n",
" <td>['Aaron Taylor-Johnson', 'John Cena', 'Laith N...</td>\n",
" <td>Dwain Worrell</td>\n",
" <td>(II) (2017)</td>\n",
" <td>/title/tt4218696/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7091</th>\n",
" <td>Commando 3</td>\n",
" <td>2 hours 13 minutes</td>\n",
" <td>5.7</td>\n",
" <td>3K</td>\n",
" <td>['Action', 'Adventure', 'Thriller']</td>\n",
" <td>Karan goes to London to stop a terrorist attac...</td>\n",
" <td>['chase']</td>\n",
" <td>Aditya Datt</td>\n",
" <td>['Junaid Wasi', 'Gulshan Devaiah', 'Robin Chau...</td>\n",
" <td>Darius Yarmil</td>\n",
" <td>-2019</td>\n",
" <td>/title/tt8983168/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15635</th>\n",
" <td>Adam Resurrected</td>\n",
" <td>1 hour 46 minutes</td>\n",
" <td>6.2</td>\n",
" <td>4.2K</td>\n",
" <td>['Drama', 'War']</td>\n",
" <td>In the aftermath of World War II, a former cir...</td>\n",
" <td>['man wears a white suit', 'desert hotel', 'ti...</td>\n",
" <td>Paul Schrader</td>\n",
" <td>['Noah Stollman', 'Jeff Goldblum', 'Willem Daf...</td>\n",
" <td>Yoram Kaniuk</td>\n",
" <td>-2008</td>\n",
" <td>/title/tt0479341/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12914</th>\n",
" <td>Main Street</td>\n",
" <td>1 hour 32 minutes</td>\n",
" <td>4.7</td>\n",
" <td>2.9K</td>\n",
" <td>['Drama']</td>\n",
" <td>Durham is slowly dying like the tobacco busine...</td>\n",
" <td>['economic depression', 'reference to lucky st...</td>\n",
" <td>John Doyle</td>\n",
" <td>['Colin Firth', 'Ellen Burstyn', 'Patricia Cla...</td>\n",
" <td>Horton Foote</td>\n",
" <td>-2010</td>\n",
" <td>/title/tt1365483/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2825</th>\n",
" <td>The Outrage</td>\n",
" <td>1 hour 36 minutes</td>\n",
" <td>6.2</td>\n",
" <td>2.2K</td>\n",
" <td>['Crime', 'Drama', 'Western']</td>\n",
" <td>Travelers in the 1870s Southwest discuss a rec...</td>\n",
" <td>['highwayman', 'man bound and gagged', 'gun du...</td>\n",
" <td>Martin Ritt</td>\n",
" <td>['Akira Kurosawa', 'Ryûnosuke Akutagawa', 'Pau...</td>\n",
" <td>Michael Kanin</td>\n",
" <td>-1964</td>\n",
" <td>/title/tt0058437/</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4880 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" movie title Run Time Rating \\\n",
"20683 The Animals not-released 4.2 \n",
"8040 The Lincoln Lawyer $40,000,000 (estimated) 7.3 \n",
"23664 What's the Worst That Could Happen? 1 hour 34 minutes 5.4 \n",
"9078 Cleopatra not-released 6.2 \n",
"17118 Seven Cities of Gold 1 hour 43 minutes 5.9 \n",
"... ... ... ... \n",
"1248 The Wall 1 hour 28 minutes 6.2 \n",
"7091 Commando 3 2 hours 13 minutes 5.7 \n",
"15635 Adam Resurrected 1 hour 46 minutes 6.2 \n",
"12914 Main Street 1 hour 32 minutes 4.7 \n",
"2825 The Outrage 1 hour 36 minutes 6.2 \n",
"\n",
" User Rating Generes \\\n",
"20683 398 ['Western'] \n",
"8040 236K ['Crime', 'Drama', 'Mystery'] \n",
"23664 16K ['Comedy', 'Crime'] \n",
"9078 652 ['Animation', 'History', 'Romance'] \n",
"17118 479 ['Adventure', 'Biography', 'History'] \n",
"... ... ... \n",
"1248 27K ['Action', 'Drama', 'Thriller'] \n",
"7091 3K ['Action', 'Adventure', 'Thriller'] \n",
"15635 4.2K ['Drama', 'War'] \n",
"12914 2.9K ['Drama'] \n",
"2825 2.2K ['Crime', 'Drama', 'Western'] \n",
"\n",
" Overview \\\n",
"20683 A woman tracks down the five men who raped her... \n",
"8040 A lawyer defending a wealthy man begins to bel... \n",
"23664 A rich man catches a thief burglarizing his ho... \n",
"9078 In order to foil the enemy aliens' \"Cleopatra ... \n",
"17118 In 1769, a Spanish expedition to California se... \n",
"... ... \n",
"1248 Two American Soldiers are trapped by a lethal ... \n",
"7091 Karan goes to London to stop a terrorist attac... \n",
"15635 In the aftermath of World War II, a former cir... \n",
"12914 Durham is slowly dying like the tobacco busine... \n",
"2825 Travelers in the 1870s Southwest discuss a rec... \n",
"\n",
" Plot Kyeword Director \\\n",
"20683 ['rape and revenge', 'arizona territory', 'ari... Ron Joy \n",
"8040 ['defense lawyer', 'plot twist', 'drug rehabil... Brad Furman \n",
"23664 ['breaking and entering', 'bankruptcy', 'quest... Sam Weisman \n",
"9078 ['adult anime', 'adult animation', 'anime', 'd... Osamu Tezuka \n",
"17118 ['limping man', 'prologue', 'voice over narrat... Robert D. Webb \n",
"... ... ... \n",
"1248 ['sniper', 'soldier', 'deception', 'wound', 'd... Doug Liman \n",
"7091 ['chase'] Aditya Datt \n",
"15635 ['man wears a white suit', 'desert hotel', 'ti... Paul Schrader \n",
"12914 ['economic depression', 'reference to lucky st... John Doyle \n",
"2825 ['highwayman', 'man bound and gagged', 'gun du... Martin Ritt \n",
"\n",
" Top 5 Casts Writer \\\n",
"20683 ['Michele Carey', 'Henry Silva', 'Keenan Wynn'... Richard Bakalyan \n",
"8040 ['Michael Connelly', 'Matthew McConaughey', 'M... John Romano \n",
"23664 ['Matthew Chapman', 'Martin Lawrence', 'Danny ... Donald E. Westlake \n",
"9078 ['Osamu Tezuka', 'Shigemi Satoyoshi', 'Chinats... Eiichi Yamamoto \n",
"17118 ['John C. Higgins', 'Joseph Petracca', 'Richar... Richard L. Breen \n",
"... ... ... \n",
"1248 ['Aaron Taylor-Johnson', 'John Cena', 'Laith N... Dwain Worrell \n",
"7091 ['Junaid Wasi', 'Gulshan Devaiah', 'Robin Chau... Darius Yarmil \n",
"15635 ['Noah Stollman', 'Jeff Goldblum', 'Willem Daf... Yoram Kaniuk \n",
"12914 ['Colin Firth', 'Ellen Burstyn', 'Patricia Cla... Horton Foote \n",
"2825 ['Akira Kurosawa', 'Ryûnosuke Akutagawa', 'Pau... Michael Kanin \n",
"\n",
" year path \n",
"20683 -1970 /title/tt0065407/ \n",
"8040 -2011 /title/tt1189340/ \n",
"23664 -2001 /title/tt0161083/ \n",
"9078 -1963 /title/tt0056937/ \n",
"17118 -1955 /title/tt0048603/ \n",
"... ... ... \n",
"1248 (II) (2017) /title/tt4218696/ \n",
"7091 -2019 /title/tt8983168/ \n",
"15635 -2008 /title/tt0479341/ \n",
"12914 -2010 /title/tt1365483/ \n",
"2825 -1964 /title/tt0058437/ \n",
"\n",
"[4880 rows x 12 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dev"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie title</th>\n",
" <th>Run Time</th>\n",
" <th>Rating</th>\n",
" <th>User Rating</th>\n",
" <th>Generes</th>\n",
" <th>Overview</th>\n",
" <th>Plot Kyeword</th>\n",
" <th>Director</th>\n",
" <th>Top 5 Casts</th>\n",
" <th>Writer</th>\n",
" <th>year</th>\n",
" <th>path</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>16788</th>\n",
" <td>High in the Clouds</td>\n",
" <td>not-released</td>\n",
" <td>no-rating</td>\n",
" <td>0</td>\n",
" <td>['Animation', 'Adventure', 'Comedy']</td>\n",
" <td>A squirrel embarks on a journey to find an ani...</td>\n",
" <td>['journey', 'cloud', 'animal', 'friend', 'trag...</td>\n",
" <td>Timothy Reckart</td>\n",
" <td>['Jon Croker', 'Geoff Dunbar', 'Timothy Reckar...</td>\n",
" <td>Philip Ardagh</td>\n",
" <td>-2022</td>\n",
" <td>/title/tt1458167/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17155</th>\n",
" <td>Curious George 3: Back to the Jungle</td>\n",
" <td>1 hour 21 minutes</td>\n",
" <td>5.4</td>\n",
" <td>598</td>\n",
" <td>['Animation', 'Adventure', 'Comedy']</td>\n",
" <td>Curious George goes on an epic adventure to sp...</td>\n",
" <td>['sequel', 'third part', 'jungle', 'curious ge...</td>\n",
" <td>Phil Weinstein</td>\n",
" <td>['H.A. Rey', 'Chuck Tately', 'Frank Welker', '...</td>\n",
" <td>Margret Rey</td>\n",
" <td>-2015</td>\n",
" <td>/title/tt4622340/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5985</th>\n",
" <td>Tarkan: Altin Madalyon</td>\n",
" <td>not-released</td>\n",
" <td>6.2</td>\n",
" <td>1.6K</td>\n",
" <td>['Action', 'Adventure', 'History']</td>\n",
" <td>The story of Tarkan and his friends efforts to...</td>\n",
" <td>['black magic', 'blood', 'axe', 'snake', 'pros...</td>\n",
" <td>Mehmet Aslan</td>\n",
" <td>['Sadik Sendil', 'Kartal Tibet', 'Eva Bender',...</td>\n",
" <td>Sezgin Burak</td>\n",
" <td>-1973</td>\n",
" <td>/title/tt0274933/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1640</th>\n",
" <td>Harley Davidson and the Marlboro Man</td>\n",
" <td>$23,000,000 (estimated)</td>\n",
" <td>6.1</td>\n",
" <td>20K</td>\n",
" <td>['Action', 'Crime', 'Drama']</td>\n",
" <td>Forced by the imminent foreclosure of their fr...</td>\n",
" <td>['swimming pool', 'night club', 'voyeurism', '...</td>\n",
" <td>Simon Wincer</td>\n",
" <td>['Mickey Rourke', 'Don Johnson', 'Chelsea Fiel...</td>\n",
" <td>Don Michael Paul</td>\n",
" <td>-1991</td>\n",
" <td>/title/tt0102005/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9826</th>\n",
" <td>I Am All Girls</td>\n",
" <td>not-released</td>\n",
" <td>5.9</td>\n",
" <td>5.8K</td>\n",
" <td>['Crime', 'Drama', 'Mystery']</td>\n",
" <td>A special crimes investigator forms an unlikel...</td>\n",
" <td>['child', 'bond', 'investigator', 'murder', 'c...</td>\n",
" <td>Donovan Marsh</td>\n",
" <td>['Marcell Greeff', 'Emile Leuvennink', 'Erica ...</td>\n",
" <td>Wayne Fitzjohn</td>\n",
" <td>-2021</td>\n",
" <td>/title/tt9013182/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11825</th>\n",
" <td>The Apostle</td>\n",
" <td>2 hours 14 minutes</td>\n",
" <td>7.2</td>\n",
" <td>14K</td>\n",
" <td>['Drama']</td>\n",
" <td>After his happy life spins out of control, a p...</td>\n",
" <td>['southern gothic', 'timeframe 1930s', 'preach...</td>\n",
" <td>Robert Duvall</td>\n",
" <td>['Robert Duvall', 'Todd Allen', 'Paul Bagget',...</td>\n",
" <td>Robert Duvall</td>\n",
" <td>-1997</td>\n",
" <td>/title/tt0118632/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14740</th>\n",
" <td>My Normal</td>\n",
" <td>not-released</td>\n",
" <td>4.1</td>\n",
" <td>756</td>\n",
" <td>['Drama', 'Romance']</td>\n",
" <td>A lesbian dominatrix finds a way to use her un...</td>\n",
" <td>['dominatrix', 'lesbian', 'lesbian kiss', 'blo...</td>\n",
" <td>Irving Schwartz</td>\n",
" <td>['Renee Garzon', 'Keith Planit', 'Nicole LaLib...</td>\n",
" <td>Abdul Malik Abbott</td>\n",
" <td>-2009</td>\n",
" <td>/title/tt1117983/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9818</th>\n",
" <td>Lust och fägring stor</td>\n",
" <td>2 hours 10 minutes</td>\n",
" <td>6.9</td>\n",
" <td>6.5K</td>\n",
" <td>['Drama', 'Romance', 'War']</td>\n",
" <td>Malmö, Sweden during the Second World War. Sti...</td>\n",
" <td>['extramarital affair', 'teacher student sex',...</td>\n",
" <td>Bo Widerberg</td>\n",
" <td>['Johan Widerberg', 'Marika Lagercrantz', 'Tom...</td>\n",
" <td>Bo Widerberg</td>\n",
" <td>-1995</td>\n",
" <td>/title/tt0113720/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4041</th>\n",
" <td>RoboCop Returns</td>\n",
" <td>not-released</td>\n",
" <td>no-rating</td>\n",
" <td>0</td>\n",
" <td>['Action', 'Adventure', 'Crime']</td>\n",
" <td>RoboCop returns to fight crime in Detroit.</td>\n",
" <td>['sequel', 'reboot', 'non comic book superhero...</td>\n",
" <td>Abe Forsythe</td>\n",
" <td>['Edward Neumeier', 'Justin Rhodes', 'Abe Fors...</td>\n",
" <td>Michael Miner</td>\n",
" <td>NaN</td>\n",
" <td>/title/tt8688550/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23755</th>\n",
" <td>The Maiden Heist</td>\n",
" <td>1 hour 30 minutes</td>\n",
" <td>6</td>\n",
" <td>17K</td>\n",
" <td>['Comedy', 'Crime']</td>\n",
" <td>A comedy centered on three museum security gua...</td>\n",
" <td>['heist crime', 'caper crime', 'forgery', 'hei...</td>\n",
" <td>Peter Hewitt</td>\n",
" <td>['Christopher Walken', 'Joseph McKenna', 'Wynn...</td>\n",
" <td>Michael LeSieur</td>\n",
" <td>-2009</td>\n",
" <td>/title/tt1107860/</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4881 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" movie title Run Time \\\n",
"16788 High in the Clouds not-released \n",
"17155 Curious George 3: Back to the Jungle 1 hour 21 minutes \n",
"5985 Tarkan: Altin Madalyon not-released \n",
"1640 Harley Davidson and the Marlboro Man $23,000,000 (estimated) \n",
"9826 I Am All Girls not-released \n",
"... ... ... \n",
"11825 The Apostle 2 hours 14 minutes \n",
"14740 My Normal not-released \n",
"9818 Lust och fägring stor 2 hours 10 minutes \n",
"4041 RoboCop Returns not-released \n",
"23755 The Maiden Heist 1 hour 30 minutes \n",
"\n",
" Rating User Rating Generes \\\n",
"16788 no-rating 0 ['Animation', 'Adventure', 'Comedy'] \n",
"17155 5.4 598 ['Animation', 'Adventure', 'Comedy'] \n",
"5985 6.2 1.6K ['Action', 'Adventure', 'History'] \n",
"1640 6.1 20K ['Action', 'Crime', 'Drama'] \n",
"9826 5.9 5.8K ['Crime', 'Drama', 'Mystery'] \n",
"... ... ... ... \n",
"11825 7.2 14K ['Drama'] \n",
"14740 4.1 756 ['Drama', 'Romance'] \n",
"9818 6.9 6.5K ['Drama', 'Romance', 'War'] \n",
"4041 no-rating 0 ['Action', 'Adventure', 'Crime'] \n",
"23755 6 17K ['Comedy', 'Crime'] \n",
"\n",
" Overview \\\n",
"16788 A squirrel embarks on a journey to find an ani... \n",
"17155 Curious George goes on an epic adventure to sp... \n",
"5985 The story of Tarkan and his friends efforts to... \n",
"1640 Forced by the imminent foreclosure of their fr... \n",
"9826 A special crimes investigator forms an unlikel... \n",
"... ... \n",
"11825 After his happy life spins out of control, a p... \n",
"14740 A lesbian dominatrix finds a way to use her un... \n",
"9818 Malmö, Sweden during the Second World War. Sti... \n",
"4041 RoboCop returns to fight crime in Detroit. \n",
"23755 A comedy centered on three museum security gua... \n",
"\n",
" Plot Kyeword Director \\\n",
"16788 ['journey', 'cloud', 'animal', 'friend', 'trag... Timothy Reckart \n",
"17155 ['sequel', 'third part', 'jungle', 'curious ge... Phil Weinstein \n",
"5985 ['black magic', 'blood', 'axe', 'snake', 'pros... Mehmet Aslan \n",
"1640 ['swimming pool', 'night club', 'voyeurism', '... Simon Wincer \n",
"9826 ['child', 'bond', 'investigator', 'murder', 'c... Donovan Marsh \n",
"... ... ... \n",
"11825 ['southern gothic', 'timeframe 1930s', 'preach... Robert Duvall \n",
"14740 ['dominatrix', 'lesbian', 'lesbian kiss', 'blo... Irving Schwartz \n",
"9818 ['extramarital affair', 'teacher student sex',... Bo Widerberg \n",
"4041 ['sequel', 'reboot', 'non comic book superhero... Abe Forsythe \n",
"23755 ['heist crime', 'caper crime', 'forgery', 'hei... Peter Hewitt \n",
"\n",
" Top 5 Casts Writer \\\n",
"16788 ['Jon Croker', 'Geoff Dunbar', 'Timothy Reckar... Philip Ardagh \n",
"17155 ['H.A. Rey', 'Chuck Tately', 'Frank Welker', '... Margret Rey \n",
"5985 ['Sadik Sendil', 'Kartal Tibet', 'Eva Bender',... Sezgin Burak \n",
"1640 ['Mickey Rourke', 'Don Johnson', 'Chelsea Fiel... Don Michael Paul \n",
"9826 ['Marcell Greeff', 'Emile Leuvennink', 'Erica ... Wayne Fitzjohn \n",
"... ... ... \n",
"11825 ['Robert Duvall', 'Todd Allen', 'Paul Bagget',... Robert Duvall \n",
"14740 ['Renee Garzon', 'Keith Planit', 'Nicole LaLib... Abdul Malik Abbott \n",
"9818 ['Johan Widerberg', 'Marika Lagercrantz', 'Tom... Bo Widerberg \n",
"4041 ['Edward Neumeier', 'Justin Rhodes', 'Abe Fors... Michael Miner \n",
"23755 ['Christopher Walken', 'Joseph McKenna', 'Wynn... Michael LeSieur \n",
"\n",
" year path \n",
"16788 -2022 /title/tt1458167/ \n",
"17155 -2015 /title/tt4622340/ \n",
"5985 -1973 /title/tt0274933/ \n",
"1640 -1991 /title/tt0102005/ \n",
"9826 -2021 /title/tt9013182/ \n",
"... ... ... \n",
"11825 -1997 /title/tt0118632/ \n",
"14740 -2009 /title/tt1117983/ \n",
"9818 -1995 /title/tt0113720/ \n",
"4041 NaN /title/tt8688550/ \n",
"23755 -2009 /title/tt1107860/ \n",
"\n",
"[4881 rows x 12 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>movie title</th>\n",
" <th>Run Time</th>\n",
" <th>Rating</th>\n",
" <th>User Rating</th>\n",
" <th>Generes</th>\n",
" <th>Overview</th>\n",
" <th>Plot Kyeword</th>\n",
" <th>Director</th>\n",
" <th>Top 5 Casts</th>\n",
" <th>Writer</th>\n",
" <th>year</th>\n",
" <th>path</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>24402</td>\n",
" <td>24402</td>\n",
" <td>24402</td>\n",
" <td>24402</td>\n",
" <td>24402</td>\n",
" <td>24158</td>\n",
" <td>24402</td>\n",
" <td>24402</td>\n",
" <td>24402</td>\n",
" <td>24402</td>\n",
" <td>23624</td>\n",
" <td>24402</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>23922</td>\n",
" <td>1556</td>\n",
" <td>91</td>\n",
" <td>1684</td>\n",
" <td>746</td>\n",
" <td>23957</td>\n",
" <td>21546</td>\n",
" <td>11604</td>\n",
" <td>24211</td>\n",
" <td>15562</td>\n",
" <td>250</td>\n",
" <td>23922</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>Rage</td>\n",
" <td>not-released</td>\n",
" <td>no-rating</td>\n",
" <td>0</td>\n",
" <td>['Drama']</td>\n",
" <td>none</td>\n",
" <td>[]</td>\n",
" <td>See company contact information</td>\n",
" <td>['See producer', 'See preliminary cast']</td>\n",
" <td>See writer</td>\n",
" <td>-2022</td>\n",
" <td>/title/tt0114224/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>4</td>\n",
" <td>8475</td>\n",
" <td>1740</td>\n",
" <td>1740</td>\n",
" <td>943</td>\n",
" <td>142</td>\n",
" <td>1696</td>\n",
" <td>142</td>\n",
" <td>142</td>\n",
" <td>142</td>\n",
" <td>1201</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" movie title Run Time Rating User Rating Generes Overview \\\n",
"count 24402 24402 24402 24402 24402 24158 \n",
"unique 23922 1556 91 1684 746 23957 \n",
"top Rage not-released no-rating 0 ['Drama'] none \n",
"freq 4 8475 1740 1740 943 142 \n",
"\n",
" Plot Kyeword Director \\\n",
"count 24402 24402 \n",
"unique 21546 11604 \n",
"top [] See company contact information \n",
"freq 1696 142 \n",
"\n",
" Top 5 Casts Writer year \\\n",
"count 24402 24402 23624 \n",
"unique 24211 15562 250 \n",
"top ['See producer', 'See preliminary cast'] See writer -2022 \n",
"freq 142 142 1201 \n",
"\n",
" path \n",
"count 24402 \n",
"unique 23922 \n",
"top /title/tt0114224/ \n",
"freq 4 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movies.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Rage 4\n",
"The Killer 4\n",
"The Beast 4\n",
"Spiral 4\n",
"The Silence 3\n",
" ..\n",
"The Mule 1\n",
"Donnie Brasco 1\n",
"Little Miss Sunshine 1\n",
"Three Billboards Outside Ebbing, Missouri 1\n",
"Guys and Dolls 1\n",
"Name: movie title, Length: 23922, dtype: int64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movies[\"movie title\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"not-released 8475\n",
"1 hour 30 minutes 503\n",
"1 hour 35 minutes 376\n",
"1 hour 38 minutes 350\n",
"1 hour 31 minutes 338\n",
" ... \n",
"$14,492 1\n",
"$181,415 1\n",
"$11,060,485 1\n",
"$1,043,910 1\n",
"FRF 24,000,000 (estimated) 1\n",
"Name: Run Time, Length: 1556, dtype: int64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movies[\"Run Time\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"no-rating 1740\n",
"6.4 852\n",
"6.2 847\n",
"6.1 819\n",
"6.3 809\n",
" ... \n",
"9.9 2\n",
"9.8 2\n",
"9.4 2\n",
"1 2\n",
"9.5 2\n",
"Name: Rating, Length: 91, dtype: int64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movies[\"Rating\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1740\n",
"11K 325\n",
"1.2K 323\n",
"1.1K 315\n",
"1.3K 295\n",
" ... \n",
"501K 1\n",
"769K 1\n",
"321K 1\n",
"991K 1\n",
"347K 1\n",
"Name: User Rating, Length: 1684, dtype: int64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movies[\"User Rating\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Drama'] 943\n",
"['Action', 'Crime', 'Drama'] 867\n",
"['Crime', 'Drama', 'Thriller'] 609\n",
"['Comedy', 'Drama', 'Romance'] 608\n",
"['Crime', 'Drama'] 550\n",
" ... \n",
"['Drama', 'Romance', 'Crime'] 1\n",
"['Drama', 'Crime', 'Mystery'] 1\n",
"['Family', 'Adventure', 'Comedy'] 1\n",
"['Crime', 'Mystery', 'Horror'] 1\n",
"['Crime', 'Romance', 'Western'] 1\n",
"Name: Generes, Length: 746, dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"movies[\"Generes\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}