10 KiB
10 KiB
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df_movie = pd.read_csv('movies.dat',sep='::',encoding='latin1',engine='python',names=['MovieID','MovieName','Category'])
df_movie.dropna(inplace=True)
df_movie.head()
MovieID | MovieName | Category | |
---|---|---|---|
0 | 1 | Toy Story (1995) | Animation|Children's|Comedy |
1 | 2 | Jumanji (1995) | Adventure|Children's|Fantasy |
2 | 3 | Grumpier Old Men (1995) | Comedy|Romance |
3 | 4 | Waiting to Exhale (1995) | Comedy|Drama |
4 | 5 | Father of the Bride Part II (1995) | Comedy |
df_rating = pd.read_csv('ratings.dat',sep='::',encoding='latin1',engine='python',names=['ID','MovieID','Ratings','TimeStamp'])
df_rating.dropna(inplace=True)
df_rating.head()
ID | MovieID | Ratings | TimeStamp | |
---|---|---|---|---|
0 | 1 | 1193 | 5 | 978300760 |
1 | 1 | 661 | 3 | 978302109 |
2 | 1 | 914 | 3 | 978301968 |
3 | 1 | 3408 | 4 | 978300275 |
4 | 1 | 2355 | 5 | 978824291 |
df = pd.merge(df_movie,df_rating,left_on='MovieID',right_on='MovieID')
df.head()
MovieID | MovieName | Category | ID | Ratings | TimeStamp | |
---|---|---|---|---|---|---|
0 | 1 | Toy Story (1995) | Animation|Children's|Comedy | 1 | 5 | 978824268 |
1 | 1 | Toy Story (1995) | Animation|Children's|Comedy | 6 | 4 | 978237008 |
2 | 1 | Toy Story (1995) | Animation|Children's|Comedy | 8 | 4 | 978233496 |
3 | 1 | Toy Story (1995) | Animation|Children's|Comedy | 9 | 5 | 978225952 |
4 | 1 | Toy Story (1995) | Animation|Children's|Comedy | 10 | 5 | 978226474 |
groupByMovie = df.groupby('MovieID')
movieRatingsMean = groupByMovie['Ratings'].mean()*2
movieRatingsMean.columns = ['MovieID','Mean']
movieRatingsMean
MovieID 1 8.293693 2 6.402282 3 6.033473 4 5.458824 5 6.013514 ... 3948 7.271462 3949 8.230263 3950 7.333333 3951 7.800000 3952 7.561856 Name: Ratings, Length: 3706, dtype: float64