376 lines
10 KiB
Plaintext
376 lines
10 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 47,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"%matplotlib inline"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 48,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>MovieID</th>\n",
|
||
|
" <th>MovieName</th>\n",
|
||
|
" <th>Category</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Animation|Children's|Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>Jumanji (1995)</td>\n",
|
||
|
" <td>Adventure|Children's|Fantasy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>Grumpier Old Men (1995)</td>\n",
|
||
|
" <td>Comedy|Romance</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>Waiting to Exhale (1995)</td>\n",
|
||
|
" <td>Comedy|Drama</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>Father of the Bride Part II (1995)</td>\n",
|
||
|
" <td>Comedy</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" MovieID MovieName Category\n",
|
||
|
"0 1 Toy Story (1995) Animation|Children's|Comedy\n",
|
||
|
"1 2 Jumanji (1995) Adventure|Children's|Fantasy\n",
|
||
|
"2 3 Grumpier Old Men (1995) Comedy|Romance\n",
|
||
|
"3 4 Waiting to Exhale (1995) Comedy|Drama\n",
|
||
|
"4 5 Father of the Bride Part II (1995) Comedy"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 48,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df_movie = pd.read_csv('movies.dat',sep='::',encoding='latin1',engine='python',names=['MovieID','MovieName','Category'])\n",
|
||
|
"df_movie.dropna(inplace=True)\n",
|
||
|
"df_movie.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 49,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>ID</th>\n",
|
||
|
" <th>MovieID</th>\n",
|
||
|
" <th>Ratings</th>\n",
|
||
|
" <th>TimeStamp</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>1193</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>978300760</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>661</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>978302109</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>914</td>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>978301968</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>3408</td>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>978300275</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>2355</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>978824291</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" ID MovieID Ratings TimeStamp\n",
|
||
|
"0 1 1193 5 978300760\n",
|
||
|
"1 1 661 3 978302109\n",
|
||
|
"2 1 914 3 978301968\n",
|
||
|
"3 1 3408 4 978300275\n",
|
||
|
"4 1 2355 5 978824291"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 49,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df_rating = pd.read_csv('ratings.dat',sep='::',encoding='latin1',engine='python',names=['ID','MovieID','Ratings','TimeStamp'])\n",
|
||
|
"df_rating.dropna(inplace=True)\n",
|
||
|
"df_rating.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 50,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>MovieID</th>\n",
|
||
|
" <th>MovieName</th>\n",
|
||
|
" <th>Category</th>\n",
|
||
|
" <th>ID</th>\n",
|
||
|
" <th>Ratings</th>\n",
|
||
|
" <th>TimeStamp</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Animation|Children's|Comedy</td>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>978824268</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Animation|Children's|Comedy</td>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>978237008</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Animation|Children's|Comedy</td>\n",
|
||
|
" <td>8</td>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>978233496</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Animation|Children's|Comedy</td>\n",
|
||
|
" <td>9</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>978225952</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>Toy Story (1995)</td>\n",
|
||
|
" <td>Animation|Children's|Comedy</td>\n",
|
||
|
" <td>10</td>\n",
|
||
|
" <td>5</td>\n",
|
||
|
" <td>978226474</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" MovieID MovieName Category ID Ratings \\\n",
|
||
|
"0 1 Toy Story (1995) Animation|Children's|Comedy 1 5 \n",
|
||
|
"1 1 Toy Story (1995) Animation|Children's|Comedy 6 4 \n",
|
||
|
"2 1 Toy Story (1995) Animation|Children's|Comedy 8 4 \n",
|
||
|
"3 1 Toy Story (1995) Animation|Children's|Comedy 9 5 \n",
|
||
|
"4 1 Toy Story (1995) Animation|Children's|Comedy 10 5 \n",
|
||
|
"\n",
|
||
|
" TimeStamp \n",
|
||
|
"0 978824268 \n",
|
||
|
"1 978237008 \n",
|
||
|
"2 978233496 \n",
|
||
|
"3 978225952 \n",
|
||
|
"4 978226474 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 50,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"df = pd.merge(df_movie,df_rating,left_on='MovieID',right_on='MovieID')\n",
|
||
|
"df.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 60,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"MovieID\n",
|
||
|
"1 8.293693\n",
|
||
|
"2 6.402282\n",
|
||
|
"3 6.033473\n",
|
||
|
"4 5.458824\n",
|
||
|
"5 6.013514\n",
|
||
|
" ... \n",
|
||
|
"3948 7.271462\n",
|
||
|
"3949 8.230263\n",
|
||
|
"3950 7.333333\n",
|
||
|
"3951 7.800000\n",
|
||
|
"3952 7.561856\n",
|
||
|
"Name: Ratings, Length: 3706, dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 60,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"groupByMovie = df.groupby('MovieID')\n",
|
||
|
"movieRatingsMean = groupByMovie['Ratings'].mean()*2\n",
|
||
|
"movieRatingsMean.columns = ['MovieID','Mean']\n",
|
||
|
"movieRatingsMean"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]"
|
||
|
},
|
||
|
"orig_nbformat": 4,
|
||
|
"vscode": {
|
||
|
"interpreter": {
|
||
|
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
||
|
}
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|