{ "cells": [ { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MovieIDMovieNameCategory
01Toy Story (1995)Animation|Children's|Comedy
12Jumanji (1995)Adventure|Children's|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama
45Father of the Bride Part II (1995)Comedy
\n", "
" ], "text/plain": [ " MovieID MovieName Category\n", "0 1 Toy Story (1995) Animation|Children's|Comedy\n", "1 2 Jumanji (1995) Adventure|Children's|Fantasy\n", "2 3 Grumpier Old Men (1995) Comedy|Romance\n", "3 4 Waiting to Exhale (1995) Comedy|Drama\n", "4 5 Father of the Bride Part II (1995) Comedy" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_movie = pd.read_csv('movies.dat',sep='::',encoding='latin1',engine='python',names=['MovieID','MovieName','Category'])\n", "df_movie.dropna(inplace=True)\n", "df_movie.head()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDMovieIDRatingsTimeStamp
0111935978300760
116613978302109
219143978301968
3134084978300275
4123555978824291
\n", "
" ], "text/plain": [ " ID MovieID Ratings TimeStamp\n", "0 1 1193 5 978300760\n", "1 1 661 3 978302109\n", "2 1 914 3 978301968\n", "3 1 3408 4 978300275\n", "4 1 2355 5 978824291" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_rating = pd.read_csv('ratings.dat',sep='::',encoding='latin1',engine='python',names=['ID','MovieID','Ratings','TimeStamp'])\n", "df_rating.dropna(inplace=True)\n", "df_rating.head()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MovieIDMovieNameCategoryIDRatingsTimeStamp
01Toy Story (1995)Animation|Children's|Comedy15978824268
11Toy Story (1995)Animation|Children's|Comedy64978237008
21Toy Story (1995)Animation|Children's|Comedy84978233496
31Toy Story (1995)Animation|Children's|Comedy95978225952
41Toy Story (1995)Animation|Children's|Comedy105978226474
\n", "
" ], "text/plain": [ " MovieID MovieName Category ID Ratings \\\n", "0 1 Toy Story (1995) Animation|Children's|Comedy 1 5 \n", "1 1 Toy Story (1995) Animation|Children's|Comedy 6 4 \n", "2 1 Toy Story (1995) Animation|Children's|Comedy 8 4 \n", "3 1 Toy Story (1995) Animation|Children's|Comedy 9 5 \n", "4 1 Toy Story (1995) Animation|Children's|Comedy 10 5 \n", "\n", " TimeStamp \n", "0 978824268 \n", "1 978237008 \n", "2 978233496 \n", "3 978225952 \n", "4 978226474 " ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.merge(df_movie,df_rating,left_on='MovieID',right_on='MovieID')\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MovieID\n", "1 8.293693\n", "2 6.402282\n", "3 6.033473\n", "4 5.458824\n", "5 6.013514\n", " ... \n", "3948 7.271462\n", "3949 8.230263\n", "3950 7.333333\n", "3951 7.800000\n", "3952 7.561856\n", "Name: Ratings, Length: 3706, dtype: float64" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groupByMovie = df.groupby('MovieID')\n", "movieRatingsMean = groupByMovie['Ratings'].mean()*2\n", "movieRatingsMean.columns = ['MovieID','Mean']\n", "movieRatingsMean" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" } } }, "nbformat": 4, "nbformat_minor": 2 }