diff --git a/.gitignore b/.gitignore index 8c2b884..ac1c488 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,14 @@ -# ---> VisualStudioCode -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json -!.vscode/*.code-snippets - -# Local History for Visual Studio Code -.history/ - -# Built Visual Studio Code Extensions -*.vsix - +# ---> VisualStudioCode +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + diff --git a/Dockerfile b/Dockerfile index 4e4b398..eef0016 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,14 @@ -FROM ubuntu:latest - -RUN apt-get update && \ - apt-get install -y python3.10 python3-pip && \ - pip3 install --upgrade pip - -RUN pip3 install --user kaggle pandas - - -COPY . /app -WORKDIR /app - -CMD ./script1 - +FROM ubuntu:latest + +RUN apt-get update && \ + apt-get install -y python3.10 python3-pip && \ + pip3 install --upgrade pip + +RUN pip3 install --user kaggle pandas + + +COPY . /app +WORKDIR /app + +CMD ./script1 + diff --git a/Jenkinsfile b/Jenkinsfile index cc95661..7299a06 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,20 +1,20 @@ -pipeline { - agent any - - stages { - stage('Stage1') { - steps { - echo ("checkout: check out from version control") - git "https://git.wmi.amu.edu.pl/s151636/ium_151636.git" - - echo ("sh: Shell Script") - sh "./script1" - - echo ("copyArtifacts") - - - echo("archiveArtifacts") - } - } - } -} +pipeline { + agent any + + stages { + stage('Stage1') { + steps { + echo ("checkout: check out from version control") + git "https://git.wmi.amu.edu.pl/s151636/ium_151636.git" + + echo ("sh: Shell Script") + sh "./script1" + + echo ("copyArtifacts") + + + echo("archiveArtifacts") + } + } + } +} diff --git a/LICENSE b/LICENSE index c7ffc1a..cb76ea6 100644 --- a/LICENSE +++ b/LICENSE @@ -1 +1 @@ -"THE BEER-WARE LICENSE" (Revision 42): wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp +"THE BEER-WARE LICENSE" (Revision 42): wrote this file. As long as you retain this notice you can do whatever you want with this stuff. If we meet some day, and you think this stuff is worth it, you can buy me a beer in return Poul-Henning Kamp diff --git a/README.md b/README.md index ffc409d..df040fa 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ -# ium151636 - +# ium151636 + diff --git a/script1 b/script1 index 1667f4f..976308f 100755 --- a/script1 +++ b/script1 @@ -1,11 +1,11 @@ -#!/bin/bash - -wc -l 25k_IMDb_movie_Dataset.csv - -head -n 5 25k_IMDb_movie_Dataset.csv - -cut -f 5 -d "," 25k_IMDb_movie_Dataset.csv | sort | uniq -c - -grep -P "^$" -n 25k_IMDb_movie_Dataset.csv - -head -n -1 25k_IMDb_movie_Dataset.csv | shuf > 25k_movies.csv.shuf +#!/bin/bash + +wc -l 25k_IMDb_movie_Dataset.csv + +head -n 5 25k_IMDb_movie_Dataset.csv + +cut -f 5 -d "," 25k_IMDb_movie_Dataset.csv | sort | uniq -c + +grep -P "^$" -n 25k_IMDb_movie_Dataset.csv + +head -n -1 25k_IMDb_movie_Dataset.csv | shuf > 25k_movies.csv.shuf diff --git a/zadanie1.ipynb b/zadanie1.ipynb index 06f1d19..21c1936 100644 --- a/zadanie1.ipynb +++ b/zadanie1.ipynb @@ -1,1595 +1,1595 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
0Top Gun: Maverick$170,000,000 (estimated)8.6187K['Action', 'Drama']After more than thirty years of service as one...['fighter jet', 'sequel', 'u.s. navy', 'fighte...Joseph Kosinski['Jack Epps Jr.', 'Peter Craig', 'Tom Cruise',...Jim Cash-2022/title/tt1745960/
1Jurassic World Dominion2 hours 27 minutes656K['Action', 'Adventure', 'Sci-Fi']Four years after the destruction of Isla Nubla...['dinosaur', 'jurassic park', 'tyrannosaurus r...Colin Trevorrow['Colin Trevorrow', 'Derek Connolly', 'Chris P...Emily Carmichael-2022/title/tt8041270/
2Top Gun$15,000,000 (estimated)6.9380K['Action', 'Drama']As students at the United States Navy's elite ...['pilot', 'male camaraderie', 'u.s. navy', 'gr...Tony Scott['Jack Epps Jr.', 'Ehud Yonay', 'Tom Cruise', ...Jim Cash-1986/title/tt0092099/
3Lightyear$71,101,2575.232K['Animation', 'Action', 'Adventure']While spending years attempting to return home...['galaxy', 'spaceship', 'robot', 'rocket', 'sp...Angus MacLane['Jason Headley', 'Matthew Aldrich', 'Chris Ev...Angus MacLane-2022/title/tt10298810/
4Spiderheadnot-released5.423K['Action', 'Crime', 'Drama']In the near future, convicts are offered the c...['discover', 'medical', 'test', 'reality', 'fi...Joseph Kosinski['Rhett Reese', 'Paul Wernick', 'Chris Hemswor...George Saunders-2022/title/tt9783600/
.......................................
24397DelicatessenFRF 24,000,000 (estimated)7.685K['Comedy', 'Crime']Post-apocalyptic surrealist black comedy about...['surrealist', 'black comedy', 'human meat', '...Marc Caro['Jean-Pierre Jeunet', 'Marc Caro', 'Gilles Ad...Jean-Pierre Jeunet-1991/title/tt0101700/
24398Bitch Assnot-released5.552['Crime', 'Horror']A gang initiation goes wrong when a group of f...[]Bill Posley['Bill Posley', 'Teon Kelley', 'Tunde Laleye',...Jonathan Colomb-2022/title/tt13991504/
24399Bullwhipnot-released5.1398['Crime', 'Romance', 'Western']In order to avoid the hangman's noose, a cowbo...['taming of the shrew', 'fur trader', 'busines...Harmon Jones['Guy Madison', 'Rhonda Fleming', 'James Griff...Adele Buffington-1958/title/tt0051438/
24400The Freshman1 hour 42 minutes6.420K['Comedy', 'Crime']An N.Y.C. film school student accepts a job wi...['endangered species', 'fish out of water', 'g...Andrew Bergman['Marlon Brando', 'Matthew Broderick', 'Bruno ...Andrew Bergman-1990/title/tt0099615/
24401Guys and Dolls$5,500,000 (estimated)7.118K['Comedy', 'Crime', 'Musical']In New York, a gambler is challenged to take a...['mission', 'gambler', 'new york city', 'based...Joseph L. Mankiewicz['Abe Burrows', 'Damon Runyon', 'Marlon Brando...Jo Swerling-1955/title/tt0048140/
\n", - "

24402 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " movie title Run Time Rating User Rating \\\n", - "0 Top Gun: Maverick $170,000,000 (estimated) 8.6 187K \n", - "1 Jurassic World Dominion 2 hours 27 minutes 6 56K \n", - "2 Top Gun $15,000,000 (estimated) 6.9 380K \n", - "3 Lightyear $71,101,257 5.2 32K \n", - "4 Spiderhead not-released 5.4 23K \n", - "... ... ... ... ... \n", - "24397 Delicatessen FRF 24,000,000 (estimated) 7.6 85K \n", - "24398 Bitch Ass not-released 5.5 52 \n", - "24399 Bullwhip not-released 5.1 398 \n", - "24400 The Freshman 1 hour 42 minutes 6.4 20K \n", - "24401 Guys and Dolls $5,500,000 (estimated) 7.1 18K \n", - "\n", - " Generes \\\n", - "0 ['Action', 'Drama'] \n", - "1 ['Action', 'Adventure', 'Sci-Fi'] \n", - "2 ['Action', 'Drama'] \n", - "3 ['Animation', 'Action', 'Adventure'] \n", - "4 ['Action', 'Crime', 'Drama'] \n", - "... ... \n", - "24397 ['Comedy', 'Crime'] \n", - "24398 ['Crime', 'Horror'] \n", - "24399 ['Crime', 'Romance', 'Western'] \n", - "24400 ['Comedy', 'Crime'] \n", - "24401 ['Comedy', 'Crime', 'Musical'] \n", - "\n", - " Overview \\\n", - "0 After more than thirty years of service as one... \n", - "1 Four years after the destruction of Isla Nubla... \n", - "2 As students at the United States Navy's elite ... \n", - "3 While spending years attempting to return home... \n", - "4 In the near future, convicts are offered the c... \n", - "... ... \n", - "24397 Post-apocalyptic surrealist black comedy about... \n", - "24398 A gang initiation goes wrong when a group of f... \n", - "24399 In order to avoid the hangman's noose, a cowbo... \n", - "24400 An N.Y.C. film school student accepts a job wi... \n", - "24401 In New York, a gambler is challenged to take a... \n", - "\n", - " Plot Kyeword \\\n", - "0 ['fighter jet', 'sequel', 'u.s. navy', 'fighte... \n", - "1 ['dinosaur', 'jurassic park', 'tyrannosaurus r... \n", - "2 ['pilot', 'male camaraderie', 'u.s. navy', 'gr... \n", - "3 ['galaxy', 'spaceship', 'robot', 'rocket', 'sp... \n", - "4 ['discover', 'medical', 'test', 'reality', 'fi... \n", - "... ... \n", - "24397 ['surrealist', 'black comedy', 'human meat', '... \n", - "24398 [] \n", - "24399 ['taming of the shrew', 'fur trader', 'busines... \n", - "24400 ['endangered species', 'fish out of water', 'g... \n", - "24401 ['mission', 'gambler', 'new york city', 'based... \n", - "\n", - " Director \\\n", - "0 Joseph Kosinski \n", - "1 Colin Trevorrow \n", - "2 Tony Scott \n", - "3 Angus MacLane \n", - "4 Joseph Kosinski \n", - "... ... \n", - "24397 Marc Caro \n", - "24398 Bill Posley \n", - "24399 Harmon Jones \n", - "24400 Andrew Bergman \n", - "24401 Joseph L. Mankiewicz \n", - "\n", - " Top 5 Casts Writer \\\n", - "0 ['Jack Epps Jr.', 'Peter Craig', 'Tom Cruise',... Jim Cash \n", - "1 ['Colin Trevorrow', 'Derek Connolly', 'Chris P... Emily Carmichael \n", - "2 ['Jack Epps Jr.', 'Ehud Yonay', 'Tom Cruise', ... Jim Cash \n", - "3 ['Jason Headley', 'Matthew Aldrich', 'Chris Ev... Angus MacLane \n", - "4 ['Rhett Reese', 'Paul Wernick', 'Chris Hemswor... George Saunders \n", - "... ... ... \n", - "24397 ['Jean-Pierre Jeunet', 'Marc Caro', 'Gilles Ad... Jean-Pierre Jeunet \n", - "24398 ['Bill Posley', 'Teon Kelley', 'Tunde Laleye',... Jonathan Colomb \n", - "24399 ['Guy Madison', 'Rhonda Fleming', 'James Griff... Adele Buffington \n", - "24400 ['Marlon Brando', 'Matthew Broderick', 'Bruno ... Andrew Bergman \n", - "24401 ['Abe Burrows', 'Damon Runyon', 'Marlon Brando... Jo Swerling \n", - "\n", - " year path \n", - "0 -2022 /title/tt1745960/ \n", - "1 -2022 /title/tt8041270/ \n", - "2 -1986 /title/tt0092099/ \n", - "3 -2022 /title/tt10298810/ \n", - "4 -2022 /title/tt9783600/ \n", - "... ... ... \n", - "24397 -1991 /title/tt0101700/ \n", - "24398 -2022 /title/tt13991504/ \n", - "24399 -1958 /title/tt0051438/ \n", - "24400 -1990 /title/tt0099615/ \n", - "24401 -1955 /title/tt0048140/ \n", - "\n", - "[24402 rows x 12 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "movies = pd.read_csv(\"25k_IMDb_movie_Dataset.csv\")\n", - "movies" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "train, dev, test = np.split(movies.sample(frac=1, random_state=69), [int(.6*len(movies)), int(.8*len(movies))])\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
4782Golmaal: Fun Unlimited2 hours 30 minutes7.417K['Action', 'Comedy', 'Drama']Four runaway crooks take shelter in a bungalow...['blind couple', 'friend', 'mute', 'slacker', ...Rohit Shetty['Ajay Devgn', 'Arshad Warsi', 'Sharman Joshi'...Neeraj Vora-2006/title/tt0495034/
9291The Remains of the Day2 hours 14 minutes7.874K['Drama', 'Romance']A butler who sacrificed body and soul to servi...['class differences', 'butler', 'housekeeper',...James Ivory['Ruth Prawer Jhabvala', 'Anthony Hopkins', 'E...Kazuo Ishiguro-1993/title/tt0107943/
3137Blue Iguana1 hour 40 minutes5.65.2K['Action', 'Comedy', 'Crime']Ex-jailbirds Eddie and Paul are on parole and ...['singing in a car', 'reference to the world c...Hadi Hajaig['Sam Rockwell', 'Phoebe Fox', 'Ben Schwartz',...Hadi Hajaig-2018/title/tt2316479/
17052Nocturna1 hour 20 minutes7.22.4K['Animation', 'Adventure', 'Family']An orphan boy named Tim is afraid of the dark....['orphan', 'night', 'cat', 'one word title', '...Adrià García['Adrià García', 'Víctor Maldonado', 'Teresa V...Víctor Maldonado-2007/title/tt0836682/
675Alexander$155,000,000 (estimated)5.6169K['Action', 'Biography', 'Drama']Alexander, the King of Macedonia and one of th...['ancient greece', 'greek', 'macedonia', 'sex ...Oliver Stone['Christopher Kyle', 'Laeta Kalogridis', 'Coli...Oliver Stone-2004/title/tt0346491/
.......................................
6365Yalghaar2 hours 38 minutes6.41K['Action', 'Romance', 'War']The film \"explores what happens in the lives o...['live']Hassan Rana['Shaan Shahid', 'Humayun Saeed', 'Adnan Siddi...Hassan Rana-2017/title/tt3945864/
7172Brown of Harvardnot-released6.21.5K['Action', 'Drama', 'Romance']Tom Brown shows up at Harvard, confident and a...['harvard', 'no homoeroticism', 'pre code film...Jack Conway['Donald Ogden Stewart', 'Andrew Percival Youn...Rida Johnson Young-1926/title/tt0016690/
12644Deseo1 hour 37 minutes4.6417['Comedy', 'Drama', 'Romance']A succession of erotic encounters weaved into ...['smoking marijuana', 'female full frontal nud...Antonio Zavala Kugler['Antonio Zavala Kugler', 'Christian Bach', 'A...Arthur Schnitzler-2013/title/tt1236434/
23970Saezuru Tori Wa Habatakanai: The Clouds Gather1 hour 25 minutes6.9681['Animation', 'Crime', 'Drama']Yashiro is the president of the Shinseikai Ent...['yaoi', 'boys love', 'gay', 'anime', 'yakuza'...Kaori Makita['Kou Yoneda', 'Tarusuke Shingaki', 'Wataru Ha...Hiroshi Seko-2020/title/tt10675392/
13423992 hours 15 minutes7.32.9K['Comedy', 'Crime', 'Drama']A gangster deputes two of his men to recover m...['cricket the sport', 'briefcase', 'caper', 'e...Krishna D.K.['Raj Nidimoru', 'Krishna D.K.', 'Sita Menon',...Raj Nidimoru(I) (2009)/title/tt1370429/
\n", - "

14641 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " movie title \\\n", - "4782 Golmaal: Fun Unlimited \n", - "9291 The Remains of the Day \n", - "3137 Blue Iguana \n", - "17052 Nocturna \n", - "675 Alexander \n", - "... ... \n", - "6365 Yalghaar \n", - "7172 Brown of Harvard \n", - "12644 Deseo \n", - "23970 Saezuru Tori Wa Habatakanai: The Clouds Gather \n", - "13423 99 \n", - "\n", - " Run Time Rating User Rating \\\n", - "4782 2 hours 30 minutes 7.4 17K \n", - "9291 2 hours 14 minutes 7.8 74K \n", - "3137 1 hour 40 minutes 5.6 5.2K \n", - "17052 1 hour 20 minutes 7.2 2.4K \n", - "675 $155,000,000 (estimated) 5.6 169K \n", - "... ... ... ... \n", - "6365 2 hours 38 minutes 6.4 1K \n", - "7172 not-released 6.2 1.5K \n", - "12644 1 hour 37 minutes 4.6 417 \n", - "23970 1 hour 25 minutes 6.9 681 \n", - "13423 2 hours 15 minutes 7.3 2.9K \n", - "\n", - " Generes \\\n", - "4782 ['Action', 'Comedy', 'Drama'] \n", - "9291 ['Drama', 'Romance'] \n", - "3137 ['Action', 'Comedy', 'Crime'] \n", - "17052 ['Animation', 'Adventure', 'Family'] \n", - "675 ['Action', 'Biography', 'Drama'] \n", - "... ... \n", - "6365 ['Action', 'Romance', 'War'] \n", - "7172 ['Action', 'Drama', 'Romance'] \n", - "12644 ['Comedy', 'Drama', 'Romance'] \n", - "23970 ['Animation', 'Crime', 'Drama'] \n", - "13423 ['Comedy', 'Crime', 'Drama'] \n", - "\n", - " Overview \\\n", - "4782 Four runaway crooks take shelter in a bungalow... \n", - "9291 A butler who sacrificed body and soul to servi... \n", - "3137 Ex-jailbirds Eddie and Paul are on parole and ... \n", - "17052 An orphan boy named Tim is afraid of the dark.... \n", - "675 Alexander, the King of Macedonia and one of th... \n", - "... ... \n", - "6365 The film \"explores what happens in the lives o... \n", - "7172 Tom Brown shows up at Harvard, confident and a... \n", - "12644 A succession of erotic encounters weaved into ... \n", - "23970 Yashiro is the president of the Shinseikai Ent... \n", - "13423 A gangster deputes two of his men to recover m... \n", - "\n", - " Plot Kyeword \\\n", - "4782 ['blind couple', 'friend', 'mute', 'slacker', ... \n", - "9291 ['class differences', 'butler', 'housekeeper',... \n", - "3137 ['singing in a car', 'reference to the world c... \n", - "17052 ['orphan', 'night', 'cat', 'one word title', '... \n", - "675 ['ancient greece', 'greek', 'macedonia', 'sex ... \n", - "... ... \n", - "6365 ['live'] \n", - "7172 ['harvard', 'no homoeroticism', 'pre code film... \n", - "12644 ['smoking marijuana', 'female full frontal nud... \n", - "23970 ['yaoi', 'boys love', 'gay', 'anime', 'yakuza'... \n", - "13423 ['cricket the sport', 'briefcase', 'caper', 'e... \n", - "\n", - " Director \\\n", - "4782 Rohit Shetty \n", - "9291 James Ivory \n", - "3137 Hadi Hajaig \n", - "17052 Adrià García \n", - "675 Oliver Stone \n", - "... ... \n", - "6365 Hassan Rana \n", - "7172 Jack Conway \n", - "12644 Antonio Zavala Kugler \n", - "23970 Kaori Makita \n", - "13423 Krishna D.K. \n", - "\n", - " Top 5 Casts Writer \\\n", - "4782 ['Ajay Devgn', 'Arshad Warsi', 'Sharman Joshi'... Neeraj Vora \n", - "9291 ['Ruth Prawer Jhabvala', 'Anthony Hopkins', 'E... Kazuo Ishiguro \n", - "3137 ['Sam Rockwell', 'Phoebe Fox', 'Ben Schwartz',... Hadi Hajaig \n", - "17052 ['Adrià García', 'Víctor Maldonado', 'Teresa V... Víctor Maldonado \n", - "675 ['Christopher Kyle', 'Laeta Kalogridis', 'Coli... Oliver Stone \n", - "... ... ... \n", - "6365 ['Shaan Shahid', 'Humayun Saeed', 'Adnan Siddi... Hassan Rana \n", - "7172 ['Donald Ogden Stewart', 'Andrew Percival Youn... Rida Johnson Young \n", - "12644 ['Antonio Zavala Kugler', 'Christian Bach', 'A... Arthur Schnitzler \n", - "23970 ['Kou Yoneda', 'Tarusuke Shingaki', 'Wataru Ha... Hiroshi Seko \n", - "13423 ['Raj Nidimoru', 'Krishna D.K.', 'Sita Menon',... Raj Nidimoru \n", - "\n", - " year path \n", - "4782 -2006 /title/tt0495034/ \n", - "9291 -1993 /title/tt0107943/ \n", - "3137 -2018 /title/tt2316479/ \n", - "17052 -2007 /title/tt0836682/ \n", - "675 -2004 /title/tt0346491/ \n", - "... ... ... \n", - "6365 -2017 /title/tt3945864/ \n", - "7172 -1926 /title/tt0016690/ \n", - "12644 -2013 /title/tt1236434/ \n", - "23970 -2020 /title/tt10675392/ \n", - "13423 (I) (2009) /title/tt1370429/ \n", - "\n", - "[14641 rows x 12 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
20683The Animalsnot-released4.2398['Western']A woman tracks down the five men who raped her...['rape and revenge', 'arizona territory', 'ari...Ron Joy['Michele Carey', 'Henry Silva', 'Keenan Wynn'...Richard Bakalyan-1970/title/tt0065407/
8040The Lincoln Lawyer$40,000,000 (estimated)7.3236K['Crime', 'Drama', 'Mystery']A lawyer defending a wealthy man begins to bel...['defense lawyer', 'plot twist', 'drug rehabil...Brad Furman['Michael Connelly', 'Matthew McConaughey', 'M...John Romano-2011/title/tt1189340/
23664What's the Worst That Could Happen?1 hour 34 minutes5.416K['Comedy', 'Crime']A rich man catches a thief burglarizing his ho...['breaking and entering', 'bankruptcy', 'quest...Sam Weisman['Matthew Chapman', 'Martin Lawrence', 'Danny ...Donald E. Westlake-2001/title/tt0161083/
9078Cleopatranot-released6.2652['Animation', 'History', 'Romance']In order to foil the enemy aliens' \"Cleopatra ...['adult anime', 'adult animation', 'anime', 'd...Osamu Tezuka['Osamu Tezuka', 'Shigemi Satoyoshi', 'Chinats...Eiichi Yamamoto-1963/title/tt0056937/
17118Seven Cities of Gold1 hour 43 minutes5.9479['Adventure', 'Biography', 'History']In 1769, a Spanish expedition to California se...['limping man', 'prologue', 'voice over narrat...Robert D. Webb['John C. Higgins', 'Joseph Petracca', 'Richar...Richard L. Breen-1955/title/tt0048603/
.......................................
1248The Wall1 hour 28 minutes6.227K['Action', 'Drama', 'Thriller']Two American Soldiers are trapped by a lethal ...['sniper', 'soldier', 'deception', 'wound', 'd...Doug Liman['Aaron Taylor-Johnson', 'John Cena', 'Laith N...Dwain Worrell(II) (2017)/title/tt4218696/
7091Commando 32 hours 13 minutes5.73K['Action', 'Adventure', 'Thriller']Karan goes to London to stop a terrorist attac...['chase']Aditya Datt['Junaid Wasi', 'Gulshan Devaiah', 'Robin Chau...Darius Yarmil-2019/title/tt8983168/
15635Adam Resurrected1 hour 46 minutes6.24.2K['Drama', 'War']In the aftermath of World War II, a former cir...['man wears a white suit', 'desert hotel', 'ti...Paul Schrader['Noah Stollman', 'Jeff Goldblum', 'Willem Daf...Yoram Kaniuk-2008/title/tt0479341/
12914Main Street1 hour 32 minutes4.72.9K['Drama']Durham is slowly dying like the tobacco busine...['economic depression', 'reference to lucky st...John Doyle['Colin Firth', 'Ellen Burstyn', 'Patricia Cla...Horton Foote-2010/title/tt1365483/
2825The Outrage1 hour 36 minutes6.22.2K['Crime', 'Drama', 'Western']Travelers in the 1870s Southwest discuss a rec...['highwayman', 'man bound and gagged', 'gun du...Martin Ritt['Akira Kurosawa', 'Ryûnosuke Akutagawa', 'Pau...Michael Kanin-1964/title/tt0058437/
\n", - "

4880 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " movie title Run Time Rating \\\n", - "20683 The Animals not-released 4.2 \n", - "8040 The Lincoln Lawyer $40,000,000 (estimated) 7.3 \n", - "23664 What's the Worst That Could Happen? 1 hour 34 minutes 5.4 \n", - "9078 Cleopatra not-released 6.2 \n", - "17118 Seven Cities of Gold 1 hour 43 minutes 5.9 \n", - "... ... ... ... \n", - "1248 The Wall 1 hour 28 minutes 6.2 \n", - "7091 Commando 3 2 hours 13 minutes 5.7 \n", - "15635 Adam Resurrected 1 hour 46 minutes 6.2 \n", - "12914 Main Street 1 hour 32 minutes 4.7 \n", - "2825 The Outrage 1 hour 36 minutes 6.2 \n", - "\n", - " User Rating Generes \\\n", - "20683 398 ['Western'] \n", - "8040 236K ['Crime', 'Drama', 'Mystery'] \n", - "23664 16K ['Comedy', 'Crime'] \n", - "9078 652 ['Animation', 'History', 'Romance'] \n", - "17118 479 ['Adventure', 'Biography', 'History'] \n", - "... ... ... \n", - "1248 27K ['Action', 'Drama', 'Thriller'] \n", - "7091 3K ['Action', 'Adventure', 'Thriller'] \n", - "15635 4.2K ['Drama', 'War'] \n", - "12914 2.9K ['Drama'] \n", - "2825 2.2K ['Crime', 'Drama', 'Western'] \n", - "\n", - " Overview \\\n", - "20683 A woman tracks down the five men who raped her... \n", - "8040 A lawyer defending a wealthy man begins to bel... \n", - "23664 A rich man catches a thief burglarizing his ho... \n", - "9078 In order to foil the enemy aliens' \"Cleopatra ... \n", - "17118 In 1769, a Spanish expedition to California se... \n", - "... ... \n", - "1248 Two American Soldiers are trapped by a lethal ... \n", - "7091 Karan goes to London to stop a terrorist attac... \n", - "15635 In the aftermath of World War II, a former cir... \n", - "12914 Durham is slowly dying like the tobacco busine... \n", - "2825 Travelers in the 1870s Southwest discuss a rec... \n", - "\n", - " Plot Kyeword Director \\\n", - "20683 ['rape and revenge', 'arizona territory', 'ari... Ron Joy \n", - "8040 ['defense lawyer', 'plot twist', 'drug rehabil... Brad Furman \n", - "23664 ['breaking and entering', 'bankruptcy', 'quest... Sam Weisman \n", - "9078 ['adult anime', 'adult animation', 'anime', 'd... Osamu Tezuka \n", - "17118 ['limping man', 'prologue', 'voice over narrat... Robert D. Webb \n", - "... ... ... \n", - "1248 ['sniper', 'soldier', 'deception', 'wound', 'd... Doug Liman \n", - "7091 ['chase'] Aditya Datt \n", - "15635 ['man wears a white suit', 'desert hotel', 'ti... Paul Schrader \n", - "12914 ['economic depression', 'reference to lucky st... John Doyle \n", - "2825 ['highwayman', 'man bound and gagged', 'gun du... Martin Ritt \n", - "\n", - " Top 5 Casts Writer \\\n", - "20683 ['Michele Carey', 'Henry Silva', 'Keenan Wynn'... Richard Bakalyan \n", - "8040 ['Michael Connelly', 'Matthew McConaughey', 'M... John Romano \n", - "23664 ['Matthew Chapman', 'Martin Lawrence', 'Danny ... Donald E. Westlake \n", - "9078 ['Osamu Tezuka', 'Shigemi Satoyoshi', 'Chinats... Eiichi Yamamoto \n", - "17118 ['John C. Higgins', 'Joseph Petracca', 'Richar... Richard L. Breen \n", - "... ... ... \n", - "1248 ['Aaron Taylor-Johnson', 'John Cena', 'Laith N... Dwain Worrell \n", - "7091 ['Junaid Wasi', 'Gulshan Devaiah', 'Robin Chau... Darius Yarmil \n", - "15635 ['Noah Stollman', 'Jeff Goldblum', 'Willem Daf... Yoram Kaniuk \n", - "12914 ['Colin Firth', 'Ellen Burstyn', 'Patricia Cla... Horton Foote \n", - "2825 ['Akira Kurosawa', 'Ryûnosuke Akutagawa', 'Pau... Michael Kanin \n", - "\n", - " year path \n", - "20683 -1970 /title/tt0065407/ \n", - "8040 -2011 /title/tt1189340/ \n", - "23664 -2001 /title/tt0161083/ \n", - "9078 -1963 /title/tt0056937/ \n", - "17118 -1955 /title/tt0048603/ \n", - "... ... ... \n", - "1248 (II) (2017) /title/tt4218696/ \n", - "7091 -2019 /title/tt8983168/ \n", - "15635 -2008 /title/tt0479341/ \n", - "12914 -2010 /title/tt1365483/ \n", - "2825 -1964 /title/tt0058437/ \n", - "\n", - "[4880 rows x 12 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dev" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
16788High in the Cloudsnot-releasedno-rating0['Animation', 'Adventure', 'Comedy']A squirrel embarks on a journey to find an ani...['journey', 'cloud', 'animal', 'friend', 'trag...Timothy Reckart['Jon Croker', 'Geoff Dunbar', 'Timothy Reckar...Philip Ardagh-2022/title/tt1458167/
17155Curious George 3: Back to the Jungle1 hour 21 minutes5.4598['Animation', 'Adventure', 'Comedy']Curious George goes on an epic adventure to sp...['sequel', 'third part', 'jungle', 'curious ge...Phil Weinstein['H.A. Rey', 'Chuck Tately', 'Frank Welker', '...Margret Rey-2015/title/tt4622340/
5985Tarkan: Altin Madalyonnot-released6.21.6K['Action', 'Adventure', 'History']The story of Tarkan and his friends efforts to...['black magic', 'blood', 'axe', 'snake', 'pros...Mehmet Aslan['Sadik Sendil', 'Kartal Tibet', 'Eva Bender',...Sezgin Burak-1973/title/tt0274933/
1640Harley Davidson and the Marlboro Man$23,000,000 (estimated)6.120K['Action', 'Crime', 'Drama']Forced by the imminent foreclosure of their fr...['swimming pool', 'night club', 'voyeurism', '...Simon Wincer['Mickey Rourke', 'Don Johnson', 'Chelsea Fiel...Don Michael Paul-1991/title/tt0102005/
9826I Am All Girlsnot-released5.95.8K['Crime', 'Drama', 'Mystery']A special crimes investigator forms an unlikel...['child', 'bond', 'investigator', 'murder', 'c...Donovan Marsh['Marcell Greeff', 'Emile Leuvennink', 'Erica ...Wayne Fitzjohn-2021/title/tt9013182/
.......................................
11825The Apostle2 hours 14 minutes7.214K['Drama']After his happy life spins out of control, a p...['southern gothic', 'timeframe 1930s', 'preach...Robert Duvall['Robert Duvall', 'Todd Allen', 'Paul Bagget',...Robert Duvall-1997/title/tt0118632/
14740My Normalnot-released4.1756['Drama', 'Romance']A lesbian dominatrix finds a way to use her un...['dominatrix', 'lesbian', 'lesbian kiss', 'blo...Irving Schwartz['Renee Garzon', 'Keith Planit', 'Nicole LaLib...Abdul Malik Abbott-2009/title/tt1117983/
9818Lust och fägring stor2 hours 10 minutes6.96.5K['Drama', 'Romance', 'War']Malmö, Sweden during the Second World War. Sti...['extramarital affair', 'teacher student sex',...Bo Widerberg['Johan Widerberg', 'Marika Lagercrantz', 'Tom...Bo Widerberg-1995/title/tt0113720/
4041RoboCop Returnsnot-releasedno-rating0['Action', 'Adventure', 'Crime']RoboCop returns to fight crime in Detroit.['sequel', 'reboot', 'non comic book superhero...Abe Forsythe['Edward Neumeier', 'Justin Rhodes', 'Abe Fors...Michael MinerNaN/title/tt8688550/
23755The Maiden Heist1 hour 30 minutes617K['Comedy', 'Crime']A comedy centered on three museum security gua...['heist crime', 'caper crime', 'forgery', 'hei...Peter Hewitt['Christopher Walken', 'Joseph McKenna', 'Wynn...Michael LeSieur-2009/title/tt1107860/
\n", - "

4881 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " movie title Run Time \\\n", - "16788 High in the Clouds not-released \n", - "17155 Curious George 3: Back to the Jungle 1 hour 21 minutes \n", - "5985 Tarkan: Altin Madalyon not-released \n", - "1640 Harley Davidson and the Marlboro Man $23,000,000 (estimated) \n", - "9826 I Am All Girls not-released \n", - "... ... ... \n", - "11825 The Apostle 2 hours 14 minutes \n", - "14740 My Normal not-released \n", - "9818 Lust och fägring stor 2 hours 10 minutes \n", - "4041 RoboCop Returns not-released \n", - "23755 The Maiden Heist 1 hour 30 minutes \n", - "\n", - " Rating User Rating Generes \\\n", - "16788 no-rating 0 ['Animation', 'Adventure', 'Comedy'] \n", - "17155 5.4 598 ['Animation', 'Adventure', 'Comedy'] \n", - "5985 6.2 1.6K ['Action', 'Adventure', 'History'] \n", - "1640 6.1 20K ['Action', 'Crime', 'Drama'] \n", - "9826 5.9 5.8K ['Crime', 'Drama', 'Mystery'] \n", - "... ... ... ... \n", - "11825 7.2 14K ['Drama'] \n", - "14740 4.1 756 ['Drama', 'Romance'] \n", - "9818 6.9 6.5K ['Drama', 'Romance', 'War'] \n", - "4041 no-rating 0 ['Action', 'Adventure', 'Crime'] \n", - "23755 6 17K ['Comedy', 'Crime'] \n", - "\n", - " Overview \\\n", - "16788 A squirrel embarks on a journey to find an ani... \n", - "17155 Curious George goes on an epic adventure to sp... \n", - "5985 The story of Tarkan and his friends efforts to... \n", - "1640 Forced by the imminent foreclosure of their fr... \n", - "9826 A special crimes investigator forms an unlikel... \n", - "... ... \n", - "11825 After his happy life spins out of control, a p... \n", - "14740 A lesbian dominatrix finds a way to use her un... \n", - "9818 Malmö, Sweden during the Second World War. Sti... \n", - "4041 RoboCop returns to fight crime in Detroit. \n", - "23755 A comedy centered on three museum security gua... \n", - "\n", - " Plot Kyeword Director \\\n", - "16788 ['journey', 'cloud', 'animal', 'friend', 'trag... Timothy Reckart \n", - "17155 ['sequel', 'third part', 'jungle', 'curious ge... Phil Weinstein \n", - "5985 ['black magic', 'blood', 'axe', 'snake', 'pros... Mehmet Aslan \n", - "1640 ['swimming pool', 'night club', 'voyeurism', '... Simon Wincer \n", - "9826 ['child', 'bond', 'investigator', 'murder', 'c... Donovan Marsh \n", - "... ... ... \n", - "11825 ['southern gothic', 'timeframe 1930s', 'preach... Robert Duvall \n", - "14740 ['dominatrix', 'lesbian', 'lesbian kiss', 'blo... Irving Schwartz \n", - "9818 ['extramarital affair', 'teacher student sex',... Bo Widerberg \n", - "4041 ['sequel', 'reboot', 'non comic book superhero... Abe Forsythe \n", - "23755 ['heist crime', 'caper crime', 'forgery', 'hei... Peter Hewitt \n", - "\n", - " Top 5 Casts Writer \\\n", - "16788 ['Jon Croker', 'Geoff Dunbar', 'Timothy Reckar... Philip Ardagh \n", - "17155 ['H.A. Rey', 'Chuck Tately', 'Frank Welker', '... Margret Rey \n", - "5985 ['Sadik Sendil', 'Kartal Tibet', 'Eva Bender',... Sezgin Burak \n", - "1640 ['Mickey Rourke', 'Don Johnson', 'Chelsea Fiel... Don Michael Paul \n", - "9826 ['Marcell Greeff', 'Emile Leuvennink', 'Erica ... Wayne Fitzjohn \n", - "... ... ... \n", - "11825 ['Robert Duvall', 'Todd Allen', 'Paul Bagget',... Robert Duvall \n", - "14740 ['Renee Garzon', 'Keith Planit', 'Nicole LaLib... Abdul Malik Abbott \n", - "9818 ['Johan Widerberg', 'Marika Lagercrantz', 'Tom... Bo Widerberg \n", - "4041 ['Edward Neumeier', 'Justin Rhodes', 'Abe Fors... Michael Miner \n", - "23755 ['Christopher Walken', 'Joseph McKenna', 'Wynn... Michael LeSieur \n", - "\n", - " year path \n", - "16788 -2022 /title/tt1458167/ \n", - "17155 -2015 /title/tt4622340/ \n", - "5985 -1973 /title/tt0274933/ \n", - "1640 -1991 /title/tt0102005/ \n", - "9826 -2021 /title/tt9013182/ \n", - "... ... ... \n", - "11825 -1997 /title/tt0118632/ \n", - "14740 -2009 /title/tt1117983/ \n", - "9818 -1995 /title/tt0113720/ \n", - "4041 NaN /title/tt8688550/ \n", - "23755 -2009 /title/tt1107860/ \n", - "\n", - "[4881 rows x 12 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
count244022440224402244022440224158244022440224402244022362424402
unique239221556911684746239572154611604242111556225023922
topRagenot-releasedno-rating0['Drama']none[]See company contact information['See producer', 'See preliminary cast']See writer-2022/title/tt0114224/
freq4847517401740943142169614214214212014
\n", - "
" - ], - "text/plain": [ - " movie title Run Time Rating User Rating Generes Overview \\\n", - "count 24402 24402 24402 24402 24402 24158 \n", - "unique 23922 1556 91 1684 746 23957 \n", - "top Rage not-released no-rating 0 ['Drama'] none \n", - "freq 4 8475 1740 1740 943 142 \n", - "\n", - " Plot Kyeword Director \\\n", - "count 24402 24402 \n", - "unique 21546 11604 \n", - "top [] See company contact information \n", - "freq 1696 142 \n", - "\n", - " Top 5 Casts Writer year \\\n", - "count 24402 24402 23624 \n", - "unique 24211 15562 250 \n", - "top ['See producer', 'See preliminary cast'] See writer -2022 \n", - "freq 142 142 1201 \n", - "\n", - " path \n", - "count 24402 \n", - "unique 23922 \n", - "top /title/tt0114224/ \n", - "freq 4 " - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies.describe(include='all')" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Rage 4\n", - "The Killer 4\n", - "The Beast 4\n", - "Spiral 4\n", - "The Silence 3\n", - " ..\n", - "The Mule 1\n", - "Donnie Brasco 1\n", - "Little Miss Sunshine 1\n", - "Three Billboards Outside Ebbing, Missouri 1\n", - "Guys and Dolls 1\n", - "Name: movie title, Length: 23922, dtype: int64" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies[\"movie title\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "not-released 8475\n", - "1 hour 30 minutes 503\n", - "1 hour 35 minutes 376\n", - "1 hour 38 minutes 350\n", - "1 hour 31 minutes 338\n", - " ... \n", - "$14,492 1\n", - "$181,415 1\n", - "$11,060,485 1\n", - "$1,043,910 1\n", - "FRF 24,000,000 (estimated) 1\n", - "Name: Run Time, Length: 1556, dtype: int64" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies[\"Run Time\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "no-rating 1740\n", - "6.4 852\n", - "6.2 847\n", - "6.1 819\n", - "6.3 809\n", - " ... \n", - "9.9 2\n", - "9.8 2\n", - "9.4 2\n", - "1 2\n", - "9.5 2\n", - "Name: Rating, Length: 91, dtype: int64" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies[\"Rating\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1740\n", - "11K 325\n", - "1.2K 323\n", - "1.1K 315\n", - "1.3K 295\n", - " ... \n", - "501K 1\n", - "769K 1\n", - "321K 1\n", - "991K 1\n", - "347K 1\n", - "Name: User Rating, Length: 1684, dtype: int64" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies[\"User Rating\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Drama'] 943\n", - "['Action', 'Crime', 'Drama'] 867\n", - "['Crime', 'Drama', 'Thriller'] 609\n", - "['Comedy', 'Drama', 'Romance'] 608\n", - "['Crime', 'Drama'] 550\n", - " ... \n", - "['Drama', 'Romance', 'Crime'] 1\n", - "['Drama', 'Crime', 'Mystery'] 1\n", - "['Family', 'Adventure', 'Comedy'] 1\n", - "['Crime', 'Mystery', 'Horror'] 1\n", - "['Crime', 'Romance', 'Western'] 1\n", - "Name: Generes, Length: 746, dtype: int64" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "movies[\"Generes\"].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
0Top Gun: Maverick$170,000,000 (estimated)8.6187K['Action', 'Drama']After more than thirty years of service as one...['fighter jet', 'sequel', 'u.s. navy', 'fighte...Joseph Kosinski['Jack Epps Jr.', 'Peter Craig', 'Tom Cruise',...Jim Cash-2022/title/tt1745960/
1Jurassic World Dominion2 hours 27 minutes656K['Action', 'Adventure', 'Sci-Fi']Four years after the destruction of Isla Nubla...['dinosaur', 'jurassic park', 'tyrannosaurus r...Colin Trevorrow['Colin Trevorrow', 'Derek Connolly', 'Chris P...Emily Carmichael-2022/title/tt8041270/
2Top Gun$15,000,000 (estimated)6.9380K['Action', 'Drama']As students at the United States Navy's elite ...['pilot', 'male camaraderie', 'u.s. navy', 'gr...Tony Scott['Jack Epps Jr.', 'Ehud Yonay', 'Tom Cruise', ...Jim Cash-1986/title/tt0092099/
3Lightyear$71,101,2575.232K['Animation', 'Action', 'Adventure']While spending years attempting to return home...['galaxy', 'spaceship', 'robot', 'rocket', 'sp...Angus MacLane['Jason Headley', 'Matthew Aldrich', 'Chris Ev...Angus MacLane-2022/title/tt10298810/
4Spiderheadnot-released5.423K['Action', 'Crime', 'Drama']In the near future, convicts are offered the c...['discover', 'medical', 'test', 'reality', 'fi...Joseph Kosinski['Rhett Reese', 'Paul Wernick', 'Chris Hemswor...George Saunders-2022/title/tt9783600/
.......................................
24397DelicatessenFRF 24,000,000 (estimated)7.685K['Comedy', 'Crime']Post-apocalyptic surrealist black comedy about...['surrealist', 'black comedy', 'human meat', '...Marc Caro['Jean-Pierre Jeunet', 'Marc Caro', 'Gilles Ad...Jean-Pierre Jeunet-1991/title/tt0101700/
24398Bitch Assnot-released5.552['Crime', 'Horror']A gang initiation goes wrong when a group of f...[]Bill Posley['Bill Posley', 'Teon Kelley', 'Tunde Laleye',...Jonathan Colomb-2022/title/tt13991504/
24399Bullwhipnot-released5.1398['Crime', 'Romance', 'Western']In order to avoid the hangman's noose, a cowbo...['taming of the shrew', 'fur trader', 'busines...Harmon Jones['Guy Madison', 'Rhonda Fleming', 'James Griff...Adele Buffington-1958/title/tt0051438/
24400The Freshman1 hour 42 minutes6.420K['Comedy', 'Crime']An N.Y.C. film school student accepts a job wi...['endangered species', 'fish out of water', 'g...Andrew Bergman['Marlon Brando', 'Matthew Broderick', 'Bruno ...Andrew Bergman-1990/title/tt0099615/
24401Guys and Dolls$5,500,000 (estimated)7.118K['Comedy', 'Crime', 'Musical']In New York, a gambler is challenged to take a...['mission', 'gambler', 'new york city', 'based...Joseph L. Mankiewicz['Abe Burrows', 'Damon Runyon', 'Marlon Brando...Jo Swerling-1955/title/tt0048140/
\n", + "

24402 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " movie title Run Time Rating User Rating \\\n", + "0 Top Gun: Maverick $170,000,000 (estimated) 8.6 187K \n", + "1 Jurassic World Dominion 2 hours 27 minutes 6 56K \n", + "2 Top Gun $15,000,000 (estimated) 6.9 380K \n", + "3 Lightyear $71,101,257 5.2 32K \n", + "4 Spiderhead not-released 5.4 23K \n", + "... ... ... ... ... \n", + "24397 Delicatessen FRF 24,000,000 (estimated) 7.6 85K \n", + "24398 Bitch Ass not-released 5.5 52 \n", + "24399 Bullwhip not-released 5.1 398 \n", + "24400 The Freshman 1 hour 42 minutes 6.4 20K \n", + "24401 Guys and Dolls $5,500,000 (estimated) 7.1 18K \n", + "\n", + " Generes \\\n", + "0 ['Action', 'Drama'] \n", + "1 ['Action', 'Adventure', 'Sci-Fi'] \n", + "2 ['Action', 'Drama'] \n", + "3 ['Animation', 'Action', 'Adventure'] \n", + "4 ['Action', 'Crime', 'Drama'] \n", + "... ... \n", + "24397 ['Comedy', 'Crime'] \n", + "24398 ['Crime', 'Horror'] \n", + "24399 ['Crime', 'Romance', 'Western'] \n", + "24400 ['Comedy', 'Crime'] \n", + "24401 ['Comedy', 'Crime', 'Musical'] \n", + "\n", + " Overview \\\n", + "0 After more than thirty years of service as one... \n", + "1 Four years after the destruction of Isla Nubla... \n", + "2 As students at the United States Navy's elite ... \n", + "3 While spending years attempting to return home... \n", + "4 In the near future, convicts are offered the c... \n", + "... ... \n", + "24397 Post-apocalyptic surrealist black comedy about... \n", + "24398 A gang initiation goes wrong when a group of f... \n", + "24399 In order to avoid the hangman's noose, a cowbo... \n", + "24400 An N.Y.C. film school student accepts a job wi... \n", + "24401 In New York, a gambler is challenged to take a... \n", + "\n", + " Plot Kyeword \\\n", + "0 ['fighter jet', 'sequel', 'u.s. navy', 'fighte... \n", + "1 ['dinosaur', 'jurassic park', 'tyrannosaurus r... \n", + "2 ['pilot', 'male camaraderie', 'u.s. navy', 'gr... \n", + "3 ['galaxy', 'spaceship', 'robot', 'rocket', 'sp... \n", + "4 ['discover', 'medical', 'test', 'reality', 'fi... \n", + "... ... \n", + "24397 ['surrealist', 'black comedy', 'human meat', '... \n", + "24398 [] \n", + "24399 ['taming of the shrew', 'fur trader', 'busines... \n", + "24400 ['endangered species', 'fish out of water', 'g... \n", + "24401 ['mission', 'gambler', 'new york city', 'based... \n", + "\n", + " Director \\\n", + "0 Joseph Kosinski \n", + "1 Colin Trevorrow \n", + "2 Tony Scott \n", + "3 Angus MacLane \n", + "4 Joseph Kosinski \n", + "... ... \n", + "24397 Marc Caro \n", + "24398 Bill Posley \n", + "24399 Harmon Jones \n", + "24400 Andrew Bergman \n", + "24401 Joseph L. Mankiewicz \n", + "\n", + " Top 5 Casts Writer \\\n", + "0 ['Jack Epps Jr.', 'Peter Craig', 'Tom Cruise',... Jim Cash \n", + "1 ['Colin Trevorrow', 'Derek Connolly', 'Chris P... Emily Carmichael \n", + "2 ['Jack Epps Jr.', 'Ehud Yonay', 'Tom Cruise', ... Jim Cash \n", + "3 ['Jason Headley', 'Matthew Aldrich', 'Chris Ev... Angus MacLane \n", + "4 ['Rhett Reese', 'Paul Wernick', 'Chris Hemswor... George Saunders \n", + "... ... ... \n", + "24397 ['Jean-Pierre Jeunet', 'Marc Caro', 'Gilles Ad... Jean-Pierre Jeunet \n", + "24398 ['Bill Posley', 'Teon Kelley', 'Tunde Laleye',... Jonathan Colomb \n", + "24399 ['Guy Madison', 'Rhonda Fleming', 'James Griff... Adele Buffington \n", + "24400 ['Marlon Brando', 'Matthew Broderick', 'Bruno ... Andrew Bergman \n", + "24401 ['Abe Burrows', 'Damon Runyon', 'Marlon Brando... Jo Swerling \n", + "\n", + " year path \n", + "0 -2022 /title/tt1745960/ \n", + "1 -2022 /title/tt8041270/ \n", + "2 -1986 /title/tt0092099/ \n", + "3 -2022 /title/tt10298810/ \n", + "4 -2022 /title/tt9783600/ \n", + "... ... ... \n", + "24397 -1991 /title/tt0101700/ \n", + "24398 -2022 /title/tt13991504/ \n", + "24399 -1958 /title/tt0051438/ \n", + "24400 -1990 /title/tt0099615/ \n", + "24401 -1955 /title/tt0048140/ \n", + "\n", + "[24402 rows x 12 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "movies = pd.read_csv(\"25k_IMDb_movie_Dataset.csv\")\n", + "movies" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "train, dev, test = np.split(movies.sample(frac=1, random_state=69), [int(.6*len(movies)), int(.8*len(movies))])\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
4782Golmaal: Fun Unlimited2 hours 30 minutes7.417K['Action', 'Comedy', 'Drama']Four runaway crooks take shelter in a bungalow...['blind couple', 'friend', 'mute', 'slacker', ...Rohit Shetty['Ajay Devgn', 'Arshad Warsi', 'Sharman Joshi'...Neeraj Vora-2006/title/tt0495034/
9291The Remains of the Day2 hours 14 minutes7.874K['Drama', 'Romance']A butler who sacrificed body and soul to servi...['class differences', 'butler', 'housekeeper',...James Ivory['Ruth Prawer Jhabvala', 'Anthony Hopkins', 'E...Kazuo Ishiguro-1993/title/tt0107943/
3137Blue Iguana1 hour 40 minutes5.65.2K['Action', 'Comedy', 'Crime']Ex-jailbirds Eddie and Paul are on parole and ...['singing in a car', 'reference to the world c...Hadi Hajaig['Sam Rockwell', 'Phoebe Fox', 'Ben Schwartz',...Hadi Hajaig-2018/title/tt2316479/
17052Nocturna1 hour 20 minutes7.22.4K['Animation', 'Adventure', 'Family']An orphan boy named Tim is afraid of the dark....['orphan', 'night', 'cat', 'one word title', '...Adrià García['Adrià García', 'Víctor Maldonado', 'Teresa V...Víctor Maldonado-2007/title/tt0836682/
675Alexander$155,000,000 (estimated)5.6169K['Action', 'Biography', 'Drama']Alexander, the King of Macedonia and one of th...['ancient greece', 'greek', 'macedonia', 'sex ...Oliver Stone['Christopher Kyle', 'Laeta Kalogridis', 'Coli...Oliver Stone-2004/title/tt0346491/
.......................................
6365Yalghaar2 hours 38 minutes6.41K['Action', 'Romance', 'War']The film \"explores what happens in the lives o...['live']Hassan Rana['Shaan Shahid', 'Humayun Saeed', 'Adnan Siddi...Hassan Rana-2017/title/tt3945864/
7172Brown of Harvardnot-released6.21.5K['Action', 'Drama', 'Romance']Tom Brown shows up at Harvard, confident and a...['harvard', 'no homoeroticism', 'pre code film...Jack Conway['Donald Ogden Stewart', 'Andrew Percival Youn...Rida Johnson Young-1926/title/tt0016690/
12644Deseo1 hour 37 minutes4.6417['Comedy', 'Drama', 'Romance']A succession of erotic encounters weaved into ...['smoking marijuana', 'female full frontal nud...Antonio Zavala Kugler['Antonio Zavala Kugler', 'Christian Bach', 'A...Arthur Schnitzler-2013/title/tt1236434/
23970Saezuru Tori Wa Habatakanai: The Clouds Gather1 hour 25 minutes6.9681['Animation', 'Crime', 'Drama']Yashiro is the president of the Shinseikai Ent...['yaoi', 'boys love', 'gay', 'anime', 'yakuza'...Kaori Makita['Kou Yoneda', 'Tarusuke Shingaki', 'Wataru Ha...Hiroshi Seko-2020/title/tt10675392/
13423992 hours 15 minutes7.32.9K['Comedy', 'Crime', 'Drama']A gangster deputes two of his men to recover m...['cricket the sport', 'briefcase', 'caper', 'e...Krishna D.K.['Raj Nidimoru', 'Krishna D.K.', 'Sita Menon',...Raj Nidimoru(I) (2009)/title/tt1370429/
\n", + "

14641 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " movie title \\\n", + "4782 Golmaal: Fun Unlimited \n", + "9291 The Remains of the Day \n", + "3137 Blue Iguana \n", + "17052 Nocturna \n", + "675 Alexander \n", + "... ... \n", + "6365 Yalghaar \n", + "7172 Brown of Harvard \n", + "12644 Deseo \n", + "23970 Saezuru Tori Wa Habatakanai: The Clouds Gather \n", + "13423 99 \n", + "\n", + " Run Time Rating User Rating \\\n", + "4782 2 hours 30 minutes 7.4 17K \n", + "9291 2 hours 14 minutes 7.8 74K \n", + "3137 1 hour 40 minutes 5.6 5.2K \n", + "17052 1 hour 20 minutes 7.2 2.4K \n", + "675 $155,000,000 (estimated) 5.6 169K \n", + "... ... ... ... \n", + "6365 2 hours 38 minutes 6.4 1K \n", + "7172 not-released 6.2 1.5K \n", + "12644 1 hour 37 minutes 4.6 417 \n", + "23970 1 hour 25 minutes 6.9 681 \n", + "13423 2 hours 15 minutes 7.3 2.9K \n", + "\n", + " Generes \\\n", + "4782 ['Action', 'Comedy', 'Drama'] \n", + "9291 ['Drama', 'Romance'] \n", + "3137 ['Action', 'Comedy', 'Crime'] \n", + "17052 ['Animation', 'Adventure', 'Family'] \n", + "675 ['Action', 'Biography', 'Drama'] \n", + "... ... \n", + "6365 ['Action', 'Romance', 'War'] \n", + "7172 ['Action', 'Drama', 'Romance'] \n", + "12644 ['Comedy', 'Drama', 'Romance'] \n", + "23970 ['Animation', 'Crime', 'Drama'] \n", + "13423 ['Comedy', 'Crime', 'Drama'] \n", + "\n", + " Overview \\\n", + "4782 Four runaway crooks take shelter in a bungalow... \n", + "9291 A butler who sacrificed body and soul to servi... \n", + "3137 Ex-jailbirds Eddie and Paul are on parole and ... \n", + "17052 An orphan boy named Tim is afraid of the dark.... \n", + "675 Alexander, the King of Macedonia and one of th... \n", + "... ... \n", + "6365 The film \"explores what happens in the lives o... \n", + "7172 Tom Brown shows up at Harvard, confident and a... \n", + "12644 A succession of erotic encounters weaved into ... \n", + "23970 Yashiro is the president of the Shinseikai Ent... \n", + "13423 A gangster deputes two of his men to recover m... \n", + "\n", + " Plot Kyeword \\\n", + "4782 ['blind couple', 'friend', 'mute', 'slacker', ... \n", + "9291 ['class differences', 'butler', 'housekeeper',... \n", + "3137 ['singing in a car', 'reference to the world c... \n", + "17052 ['orphan', 'night', 'cat', 'one word title', '... \n", + "675 ['ancient greece', 'greek', 'macedonia', 'sex ... \n", + "... ... \n", + "6365 ['live'] \n", + "7172 ['harvard', 'no homoeroticism', 'pre code film... \n", + "12644 ['smoking marijuana', 'female full frontal nud... \n", + "23970 ['yaoi', 'boys love', 'gay', 'anime', 'yakuza'... \n", + "13423 ['cricket the sport', 'briefcase', 'caper', 'e... \n", + "\n", + " Director \\\n", + "4782 Rohit Shetty \n", + "9291 James Ivory \n", + "3137 Hadi Hajaig \n", + "17052 Adrià García \n", + "675 Oliver Stone \n", + "... ... \n", + "6365 Hassan Rana \n", + "7172 Jack Conway \n", + "12644 Antonio Zavala Kugler \n", + "23970 Kaori Makita \n", + "13423 Krishna D.K. \n", + "\n", + " Top 5 Casts Writer \\\n", + "4782 ['Ajay Devgn', 'Arshad Warsi', 'Sharman Joshi'... Neeraj Vora \n", + "9291 ['Ruth Prawer Jhabvala', 'Anthony Hopkins', 'E... Kazuo Ishiguro \n", + "3137 ['Sam Rockwell', 'Phoebe Fox', 'Ben Schwartz',... Hadi Hajaig \n", + "17052 ['Adrià García', 'Víctor Maldonado', 'Teresa V... Víctor Maldonado \n", + "675 ['Christopher Kyle', 'Laeta Kalogridis', 'Coli... Oliver Stone \n", + "... ... ... \n", + "6365 ['Shaan Shahid', 'Humayun Saeed', 'Adnan Siddi... Hassan Rana \n", + "7172 ['Donald Ogden Stewart', 'Andrew Percival Youn... Rida Johnson Young \n", + "12644 ['Antonio Zavala Kugler', 'Christian Bach', 'A... Arthur Schnitzler \n", + "23970 ['Kou Yoneda', 'Tarusuke Shingaki', 'Wataru Ha... Hiroshi Seko \n", + "13423 ['Raj Nidimoru', 'Krishna D.K.', 'Sita Menon',... Raj Nidimoru \n", + "\n", + " year path \n", + "4782 -2006 /title/tt0495034/ \n", + "9291 -1993 /title/tt0107943/ \n", + "3137 -2018 /title/tt2316479/ \n", + "17052 -2007 /title/tt0836682/ \n", + "675 -2004 /title/tt0346491/ \n", + "... ... ... \n", + "6365 -2017 /title/tt3945864/ \n", + "7172 -1926 /title/tt0016690/ \n", + "12644 -2013 /title/tt1236434/ \n", + "23970 -2020 /title/tt10675392/ \n", + "13423 (I) (2009) /title/tt1370429/ \n", + "\n", + "[14641 rows x 12 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
20683The Animalsnot-released4.2398['Western']A woman tracks down the five men who raped her...['rape and revenge', 'arizona territory', 'ari...Ron Joy['Michele Carey', 'Henry Silva', 'Keenan Wynn'...Richard Bakalyan-1970/title/tt0065407/
8040The Lincoln Lawyer$40,000,000 (estimated)7.3236K['Crime', 'Drama', 'Mystery']A lawyer defending a wealthy man begins to bel...['defense lawyer', 'plot twist', 'drug rehabil...Brad Furman['Michael Connelly', 'Matthew McConaughey', 'M...John Romano-2011/title/tt1189340/
23664What's the Worst That Could Happen?1 hour 34 minutes5.416K['Comedy', 'Crime']A rich man catches a thief burglarizing his ho...['breaking and entering', 'bankruptcy', 'quest...Sam Weisman['Matthew Chapman', 'Martin Lawrence', 'Danny ...Donald E. Westlake-2001/title/tt0161083/
9078Cleopatranot-released6.2652['Animation', 'History', 'Romance']In order to foil the enemy aliens' \"Cleopatra ...['adult anime', 'adult animation', 'anime', 'd...Osamu Tezuka['Osamu Tezuka', 'Shigemi Satoyoshi', 'Chinats...Eiichi Yamamoto-1963/title/tt0056937/
17118Seven Cities of Gold1 hour 43 minutes5.9479['Adventure', 'Biography', 'History']In 1769, a Spanish expedition to California se...['limping man', 'prologue', 'voice over narrat...Robert D. Webb['John C. Higgins', 'Joseph Petracca', 'Richar...Richard L. Breen-1955/title/tt0048603/
.......................................
1248The Wall1 hour 28 minutes6.227K['Action', 'Drama', 'Thriller']Two American Soldiers are trapped by a lethal ...['sniper', 'soldier', 'deception', 'wound', 'd...Doug Liman['Aaron Taylor-Johnson', 'John Cena', 'Laith N...Dwain Worrell(II) (2017)/title/tt4218696/
7091Commando 32 hours 13 minutes5.73K['Action', 'Adventure', 'Thriller']Karan goes to London to stop a terrorist attac...['chase']Aditya Datt['Junaid Wasi', 'Gulshan Devaiah', 'Robin Chau...Darius Yarmil-2019/title/tt8983168/
15635Adam Resurrected1 hour 46 minutes6.24.2K['Drama', 'War']In the aftermath of World War II, a former cir...['man wears a white suit', 'desert hotel', 'ti...Paul Schrader['Noah Stollman', 'Jeff Goldblum', 'Willem Daf...Yoram Kaniuk-2008/title/tt0479341/
12914Main Street1 hour 32 minutes4.72.9K['Drama']Durham is slowly dying like the tobacco busine...['economic depression', 'reference to lucky st...John Doyle['Colin Firth', 'Ellen Burstyn', 'Patricia Cla...Horton Foote-2010/title/tt1365483/
2825The Outrage1 hour 36 minutes6.22.2K['Crime', 'Drama', 'Western']Travelers in the 1870s Southwest discuss a rec...['highwayman', 'man bound and gagged', 'gun du...Martin Ritt['Akira Kurosawa', 'Ryûnosuke Akutagawa', 'Pau...Michael Kanin-1964/title/tt0058437/
\n", + "

4880 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " movie title Run Time Rating \\\n", + "20683 The Animals not-released 4.2 \n", + "8040 The Lincoln Lawyer $40,000,000 (estimated) 7.3 \n", + "23664 What's the Worst That Could Happen? 1 hour 34 minutes 5.4 \n", + "9078 Cleopatra not-released 6.2 \n", + "17118 Seven Cities of Gold 1 hour 43 minutes 5.9 \n", + "... ... ... ... \n", + "1248 The Wall 1 hour 28 minutes 6.2 \n", + "7091 Commando 3 2 hours 13 minutes 5.7 \n", + "15635 Adam Resurrected 1 hour 46 minutes 6.2 \n", + "12914 Main Street 1 hour 32 minutes 4.7 \n", + "2825 The Outrage 1 hour 36 minutes 6.2 \n", + "\n", + " User Rating Generes \\\n", + "20683 398 ['Western'] \n", + "8040 236K ['Crime', 'Drama', 'Mystery'] \n", + "23664 16K ['Comedy', 'Crime'] \n", + "9078 652 ['Animation', 'History', 'Romance'] \n", + "17118 479 ['Adventure', 'Biography', 'History'] \n", + "... ... ... \n", + "1248 27K ['Action', 'Drama', 'Thriller'] \n", + "7091 3K ['Action', 'Adventure', 'Thriller'] \n", + "15635 4.2K ['Drama', 'War'] \n", + "12914 2.9K ['Drama'] \n", + "2825 2.2K ['Crime', 'Drama', 'Western'] \n", + "\n", + " Overview \\\n", + "20683 A woman tracks down the five men who raped her... \n", + "8040 A lawyer defending a wealthy man begins to bel... \n", + "23664 A rich man catches a thief burglarizing his ho... \n", + "9078 In order to foil the enemy aliens' \"Cleopatra ... \n", + "17118 In 1769, a Spanish expedition to California se... \n", + "... ... \n", + "1248 Two American Soldiers are trapped by a lethal ... \n", + "7091 Karan goes to London to stop a terrorist attac... \n", + "15635 In the aftermath of World War II, a former cir... \n", + "12914 Durham is slowly dying like the tobacco busine... \n", + "2825 Travelers in the 1870s Southwest discuss a rec... \n", + "\n", + " Plot Kyeword Director \\\n", + "20683 ['rape and revenge', 'arizona territory', 'ari... Ron Joy \n", + "8040 ['defense lawyer', 'plot twist', 'drug rehabil... Brad Furman \n", + "23664 ['breaking and entering', 'bankruptcy', 'quest... Sam Weisman \n", + "9078 ['adult anime', 'adult animation', 'anime', 'd... Osamu Tezuka \n", + "17118 ['limping man', 'prologue', 'voice over narrat... Robert D. Webb \n", + "... ... ... \n", + "1248 ['sniper', 'soldier', 'deception', 'wound', 'd... Doug Liman \n", + "7091 ['chase'] Aditya Datt \n", + "15635 ['man wears a white suit', 'desert hotel', 'ti... Paul Schrader \n", + "12914 ['economic depression', 'reference to lucky st... John Doyle \n", + "2825 ['highwayman', 'man bound and gagged', 'gun du... Martin Ritt \n", + "\n", + " Top 5 Casts Writer \\\n", + "20683 ['Michele Carey', 'Henry Silva', 'Keenan Wynn'... Richard Bakalyan \n", + "8040 ['Michael Connelly', 'Matthew McConaughey', 'M... John Romano \n", + "23664 ['Matthew Chapman', 'Martin Lawrence', 'Danny ... Donald E. Westlake \n", + "9078 ['Osamu Tezuka', 'Shigemi Satoyoshi', 'Chinats... Eiichi Yamamoto \n", + "17118 ['John C. Higgins', 'Joseph Petracca', 'Richar... Richard L. Breen \n", + "... ... ... \n", + "1248 ['Aaron Taylor-Johnson', 'John Cena', 'Laith N... Dwain Worrell \n", + "7091 ['Junaid Wasi', 'Gulshan Devaiah', 'Robin Chau... Darius Yarmil \n", + "15635 ['Noah Stollman', 'Jeff Goldblum', 'Willem Daf... Yoram Kaniuk \n", + "12914 ['Colin Firth', 'Ellen Burstyn', 'Patricia Cla... Horton Foote \n", + "2825 ['Akira Kurosawa', 'Ryûnosuke Akutagawa', 'Pau... Michael Kanin \n", + "\n", + " year path \n", + "20683 -1970 /title/tt0065407/ \n", + "8040 -2011 /title/tt1189340/ \n", + "23664 -2001 /title/tt0161083/ \n", + "9078 -1963 /title/tt0056937/ \n", + "17118 -1955 /title/tt0048603/ \n", + "... ... ... \n", + "1248 (II) (2017) /title/tt4218696/ \n", + "7091 -2019 /title/tt8983168/ \n", + "15635 -2008 /title/tt0479341/ \n", + "12914 -2010 /title/tt1365483/ \n", + "2825 -1964 /title/tt0058437/ \n", + "\n", + "[4880 rows x 12 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dev" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
16788High in the Cloudsnot-releasedno-rating0['Animation', 'Adventure', 'Comedy']A squirrel embarks on a journey to find an ani...['journey', 'cloud', 'animal', 'friend', 'trag...Timothy Reckart['Jon Croker', 'Geoff Dunbar', 'Timothy Reckar...Philip Ardagh-2022/title/tt1458167/
17155Curious George 3: Back to the Jungle1 hour 21 minutes5.4598['Animation', 'Adventure', 'Comedy']Curious George goes on an epic adventure to sp...['sequel', 'third part', 'jungle', 'curious ge...Phil Weinstein['H.A. Rey', 'Chuck Tately', 'Frank Welker', '...Margret Rey-2015/title/tt4622340/
5985Tarkan: Altin Madalyonnot-released6.21.6K['Action', 'Adventure', 'History']The story of Tarkan and his friends efforts to...['black magic', 'blood', 'axe', 'snake', 'pros...Mehmet Aslan['Sadik Sendil', 'Kartal Tibet', 'Eva Bender',...Sezgin Burak-1973/title/tt0274933/
1640Harley Davidson and the Marlboro Man$23,000,000 (estimated)6.120K['Action', 'Crime', 'Drama']Forced by the imminent foreclosure of their fr...['swimming pool', 'night club', 'voyeurism', '...Simon Wincer['Mickey Rourke', 'Don Johnson', 'Chelsea Fiel...Don Michael Paul-1991/title/tt0102005/
9826I Am All Girlsnot-released5.95.8K['Crime', 'Drama', 'Mystery']A special crimes investigator forms an unlikel...['child', 'bond', 'investigator', 'murder', 'c...Donovan Marsh['Marcell Greeff', 'Emile Leuvennink', 'Erica ...Wayne Fitzjohn-2021/title/tt9013182/
.......................................
11825The Apostle2 hours 14 minutes7.214K['Drama']After his happy life spins out of control, a p...['southern gothic', 'timeframe 1930s', 'preach...Robert Duvall['Robert Duvall', 'Todd Allen', 'Paul Bagget',...Robert Duvall-1997/title/tt0118632/
14740My Normalnot-released4.1756['Drama', 'Romance']A lesbian dominatrix finds a way to use her un...['dominatrix', 'lesbian', 'lesbian kiss', 'blo...Irving Schwartz['Renee Garzon', 'Keith Planit', 'Nicole LaLib...Abdul Malik Abbott-2009/title/tt1117983/
9818Lust och fägring stor2 hours 10 minutes6.96.5K['Drama', 'Romance', 'War']Malmö, Sweden during the Second World War. Sti...['extramarital affair', 'teacher student sex',...Bo Widerberg['Johan Widerberg', 'Marika Lagercrantz', 'Tom...Bo Widerberg-1995/title/tt0113720/
4041RoboCop Returnsnot-releasedno-rating0['Action', 'Adventure', 'Crime']RoboCop returns to fight crime in Detroit.['sequel', 'reboot', 'non comic book superhero...Abe Forsythe['Edward Neumeier', 'Justin Rhodes', 'Abe Fors...Michael MinerNaN/title/tt8688550/
23755The Maiden Heist1 hour 30 minutes617K['Comedy', 'Crime']A comedy centered on three museum security gua...['heist crime', 'caper crime', 'forgery', 'hei...Peter Hewitt['Christopher Walken', 'Joseph McKenna', 'Wynn...Michael LeSieur-2009/title/tt1107860/
\n", + "

4881 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " movie title Run Time \\\n", + "16788 High in the Clouds not-released \n", + "17155 Curious George 3: Back to the Jungle 1 hour 21 minutes \n", + "5985 Tarkan: Altin Madalyon not-released \n", + "1640 Harley Davidson and the Marlboro Man $23,000,000 (estimated) \n", + "9826 I Am All Girls not-released \n", + "... ... ... \n", + "11825 The Apostle 2 hours 14 minutes \n", + "14740 My Normal not-released \n", + "9818 Lust och fägring stor 2 hours 10 minutes \n", + "4041 RoboCop Returns not-released \n", + "23755 The Maiden Heist 1 hour 30 minutes \n", + "\n", + " Rating User Rating Generes \\\n", + "16788 no-rating 0 ['Animation', 'Adventure', 'Comedy'] \n", + "17155 5.4 598 ['Animation', 'Adventure', 'Comedy'] \n", + "5985 6.2 1.6K ['Action', 'Adventure', 'History'] \n", + "1640 6.1 20K ['Action', 'Crime', 'Drama'] \n", + "9826 5.9 5.8K ['Crime', 'Drama', 'Mystery'] \n", + "... ... ... ... \n", + "11825 7.2 14K ['Drama'] \n", + "14740 4.1 756 ['Drama', 'Romance'] \n", + "9818 6.9 6.5K ['Drama', 'Romance', 'War'] \n", + "4041 no-rating 0 ['Action', 'Adventure', 'Crime'] \n", + "23755 6 17K ['Comedy', 'Crime'] \n", + "\n", + " Overview \\\n", + "16788 A squirrel embarks on a journey to find an ani... \n", + "17155 Curious George goes on an epic adventure to sp... \n", + "5985 The story of Tarkan and his friends efforts to... \n", + "1640 Forced by the imminent foreclosure of their fr... \n", + "9826 A special crimes investigator forms an unlikel... \n", + "... ... \n", + "11825 After his happy life spins out of control, a p... \n", + "14740 A lesbian dominatrix finds a way to use her un... \n", + "9818 Malmö, Sweden during the Second World War. Sti... \n", + "4041 RoboCop returns to fight crime in Detroit. \n", + "23755 A comedy centered on three museum security gua... \n", + "\n", + " Plot Kyeword Director \\\n", + "16788 ['journey', 'cloud', 'animal', 'friend', 'trag... Timothy Reckart \n", + "17155 ['sequel', 'third part', 'jungle', 'curious ge... Phil Weinstein \n", + "5985 ['black magic', 'blood', 'axe', 'snake', 'pros... Mehmet Aslan \n", + "1640 ['swimming pool', 'night club', 'voyeurism', '... Simon Wincer \n", + "9826 ['child', 'bond', 'investigator', 'murder', 'c... Donovan Marsh \n", + "... ... ... \n", + "11825 ['southern gothic', 'timeframe 1930s', 'preach... Robert Duvall \n", + "14740 ['dominatrix', 'lesbian', 'lesbian kiss', 'blo... Irving Schwartz \n", + "9818 ['extramarital affair', 'teacher student sex',... Bo Widerberg \n", + "4041 ['sequel', 'reboot', 'non comic book superhero... Abe Forsythe \n", + "23755 ['heist crime', 'caper crime', 'forgery', 'hei... Peter Hewitt \n", + "\n", + " Top 5 Casts Writer \\\n", + "16788 ['Jon Croker', 'Geoff Dunbar', 'Timothy Reckar... Philip Ardagh \n", + "17155 ['H.A. Rey', 'Chuck Tately', 'Frank Welker', '... Margret Rey \n", + "5985 ['Sadik Sendil', 'Kartal Tibet', 'Eva Bender',... Sezgin Burak \n", + "1640 ['Mickey Rourke', 'Don Johnson', 'Chelsea Fiel... Don Michael Paul \n", + "9826 ['Marcell Greeff', 'Emile Leuvennink', 'Erica ... Wayne Fitzjohn \n", + "... ... ... \n", + "11825 ['Robert Duvall', 'Todd Allen', 'Paul Bagget',... Robert Duvall \n", + "14740 ['Renee Garzon', 'Keith Planit', 'Nicole LaLib... Abdul Malik Abbott \n", + "9818 ['Johan Widerberg', 'Marika Lagercrantz', 'Tom... Bo Widerberg \n", + "4041 ['Edward Neumeier', 'Justin Rhodes', 'Abe Fors... Michael Miner \n", + "23755 ['Christopher Walken', 'Joseph McKenna', 'Wynn... Michael LeSieur \n", + "\n", + " year path \n", + "16788 -2022 /title/tt1458167/ \n", + "17155 -2015 /title/tt4622340/ \n", + "5985 -1973 /title/tt0274933/ \n", + "1640 -1991 /title/tt0102005/ \n", + "9826 -2021 /title/tt9013182/ \n", + "... ... ... \n", + "11825 -1997 /title/tt0118632/ \n", + "14740 -2009 /title/tt1117983/ \n", + "9818 -1995 /title/tt0113720/ \n", + "4041 NaN /title/tt8688550/ \n", + "23755 -2009 /title/tt1107860/ \n", + "\n", + "[4881 rows x 12 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie titleRun TimeRatingUser RatingGeneresOverviewPlot KyewordDirectorTop 5 CastsWriteryearpath
count244022440224402244022440224158244022440224402244022362424402
unique239221556911684746239572154611604242111556225023922
topRagenot-releasedno-rating0['Drama']none[]See company contact information['See producer', 'See preliminary cast']See writer-2022/title/tt0114224/
freq4847517401740943142169614214214212014
\n", + "
" + ], + "text/plain": [ + " movie title Run Time Rating User Rating Generes Overview \\\n", + "count 24402 24402 24402 24402 24402 24158 \n", + "unique 23922 1556 91 1684 746 23957 \n", + "top Rage not-released no-rating 0 ['Drama'] none \n", + "freq 4 8475 1740 1740 943 142 \n", + "\n", + " Plot Kyeword Director \\\n", + "count 24402 24402 \n", + "unique 21546 11604 \n", + "top [] See company contact information \n", + "freq 1696 142 \n", + "\n", + " Top 5 Casts Writer year \\\n", + "count 24402 24402 23624 \n", + "unique 24211 15562 250 \n", + "top ['See producer', 'See preliminary cast'] See writer -2022 \n", + "freq 142 142 1201 \n", + "\n", + " path \n", + "count 24402 \n", + "unique 23922 \n", + "top /title/tt0114224/ \n", + "freq 4 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.describe(include='all')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Rage 4\n", + "The Killer 4\n", + "The Beast 4\n", + "Spiral 4\n", + "The Silence 3\n", + " ..\n", + "The Mule 1\n", + "Donnie Brasco 1\n", + "Little Miss Sunshine 1\n", + "Three Billboards Outside Ebbing, Missouri 1\n", + "Guys and Dolls 1\n", + "Name: movie title, Length: 23922, dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies[\"movie title\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "not-released 8475\n", + "1 hour 30 minutes 503\n", + "1 hour 35 minutes 376\n", + "1 hour 38 minutes 350\n", + "1 hour 31 minutes 338\n", + " ... \n", + "$14,492 1\n", + "$181,415 1\n", + "$11,060,485 1\n", + "$1,043,910 1\n", + "FRF 24,000,000 (estimated) 1\n", + "Name: Run Time, Length: 1556, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies[\"Run Time\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "no-rating 1740\n", + "6.4 852\n", + "6.2 847\n", + "6.1 819\n", + "6.3 809\n", + " ... \n", + "9.9 2\n", + "9.8 2\n", + "9.4 2\n", + "1 2\n", + "9.5 2\n", + "Name: Rating, Length: 91, dtype: int64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies[\"Rating\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1740\n", + "11K 325\n", + "1.2K 323\n", + "1.1K 315\n", + "1.3K 295\n", + " ... \n", + "501K 1\n", + "769K 1\n", + "321K 1\n", + "991K 1\n", + "347K 1\n", + "Name: User Rating, Length: 1684, dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies[\"User Rating\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Drama'] 943\n", + "['Action', 'Crime', 'Drama'] 867\n", + "['Crime', 'Drama', 'Thriller'] 609\n", + "['Comedy', 'Drama', 'Romance'] 608\n", + "['Crime', 'Drama'] 550\n", + " ... \n", + "['Drama', 'Romance', 'Crime'] 1\n", + "['Drama', 'Crime', 'Mystery'] 1\n", + "['Family', 'Adventure', 'Comedy'] 1\n", + "['Crime', 'Mystery', 'Horror'] 1\n", + "['Crime', 'Romance', 'Western'] 1\n", + "Name: Generes, Length: 746, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies[\"Generes\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}