{
"cells": [
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MovieID | \n",
" MovieName | \n",
" Category | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Toy Story (1995) | \n",
" Animation|Children's|Comedy | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Jumanji (1995) | \n",
" Adventure|Children's|Fantasy | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Grumpier Old Men (1995) | \n",
" Comedy|Romance | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" Waiting to Exhale (1995) | \n",
" Comedy|Drama | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" Father of the Bride Part II (1995) | \n",
" Comedy | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MovieID MovieName Category\n",
"0 1 Toy Story (1995) Animation|Children's|Comedy\n",
"1 2 Jumanji (1995) Adventure|Children's|Fantasy\n",
"2 3 Grumpier Old Men (1995) Comedy|Romance\n",
"3 4 Waiting to Exhale (1995) Comedy|Drama\n",
"4 5 Father of the Bride Part II (1995) Comedy"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_movie = pd.read_csv('movies.dat',sep='::',encoding='latin1',engine='python',names=['MovieID','MovieName','Category'])\n",
"df_movie.dropna(inplace=True)\n",
"df_movie.head()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ID | \n",
" MovieID | \n",
" Ratings | \n",
" TimeStamp | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1193 | \n",
" 5 | \n",
" 978300760 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 661 | \n",
" 3 | \n",
" 978302109 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" 914 | \n",
" 3 | \n",
" 978301968 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 3408 | \n",
" 4 | \n",
" 978300275 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" 2355 | \n",
" 5 | \n",
" 978824291 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ID MovieID Ratings TimeStamp\n",
"0 1 1193 5 978300760\n",
"1 1 661 3 978302109\n",
"2 1 914 3 978301968\n",
"3 1 3408 4 978300275\n",
"4 1 2355 5 978824291"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_rating = pd.read_csv('ratings.dat',sep='::',encoding='latin1',engine='python',names=['ID','MovieID','Ratings','TimeStamp'])\n",
"df_rating.dropna(inplace=True)\n",
"df_rating.head()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" MovieID | \n",
" MovieName | \n",
" Category | \n",
" ID | \n",
" Ratings | \n",
" TimeStamp | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Toy Story (1995) | \n",
" Animation|Children's|Comedy | \n",
" 1 | \n",
" 5 | \n",
" 978824268 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" Toy Story (1995) | \n",
" Animation|Children's|Comedy | \n",
" 6 | \n",
" 4 | \n",
" 978237008 | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" Toy Story (1995) | \n",
" Animation|Children's|Comedy | \n",
" 8 | \n",
" 4 | \n",
" 978233496 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" Toy Story (1995) | \n",
" Animation|Children's|Comedy | \n",
" 9 | \n",
" 5 | \n",
" 978225952 | \n",
"
\n",
" \n",
" 4 | \n",
" 1 | \n",
" Toy Story (1995) | \n",
" Animation|Children's|Comedy | \n",
" 10 | \n",
" 5 | \n",
" 978226474 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" MovieID MovieName Category ID Ratings \\\n",
"0 1 Toy Story (1995) Animation|Children's|Comedy 1 5 \n",
"1 1 Toy Story (1995) Animation|Children's|Comedy 6 4 \n",
"2 1 Toy Story (1995) Animation|Children's|Comedy 8 4 \n",
"3 1 Toy Story (1995) Animation|Children's|Comedy 9 5 \n",
"4 1 Toy Story (1995) Animation|Children's|Comedy 10 5 \n",
"\n",
" TimeStamp \n",
"0 978824268 \n",
"1 978237008 \n",
"2 978233496 \n",
"3 978225952 \n",
"4 978226474 "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.merge(df_movie,df_rating,left_on='MovieID',right_on='MovieID')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MovieID\n",
"1 8.293693\n",
"2 6.402282\n",
"3 6.033473\n",
"4 5.458824\n",
"5 6.013514\n",
" ... \n",
"3948 7.271462\n",
"3949 8.230263\n",
"3950 7.333333\n",
"3951 7.800000\n",
"3952 7.561856\n",
"Name: Ratings, Length: 3706, dtype: float64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"groupByMovie = df.groupby('MovieID')\n",
"movieRatingsMean = groupByMovie['Ratings'].mean()*2\n",
"movieRatingsMean.columns = ['MovieID','Mean']\n",
"movieRatingsMean"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}