better preprocessing
This commit is contained in:
parent
d4a8245390
commit
87834a7bd9
24
better_preprocess.py
Normal file
24
better_preprocess.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
df=pd.read_csv("25k_IMDb_movie_Dataset.csv")
|
||||||
|
df=df.drop(columns=['Run Time'])
|
||||||
|
df['Rating']=df['Rating'].str.replace("no-rating","0")
|
||||||
|
df['Rating']=pd.to_numeric(df['Rating'])
|
||||||
|
df['User Rating']=df['User Rating'].str.replace("K","000")
|
||||||
|
df['User Rating']=df['User Rating'].str.replace("M","000000")
|
||||||
|
df['User Rating']=pd.to_numeric(df['User Rating'])
|
||||||
|
df=df.drop("path",axis=1)
|
||||||
|
df['year']=df['year'].str.replace("-","")
|
||||||
|
|
||||||
|
mumbojumbo=["(",")","I","V"," ","X"]
|
||||||
|
for a in mumbojumbo:
|
||||||
|
df['year']=df['year'].str.replace(a,"")
|
||||||
|
|
||||||
|
df['year']=np.where(df['year']=="","1900",df['year'])
|
||||||
|
df['year']=df['year'].fillna("1900")
|
||||||
|
df['year']=df['year']+"-01-01"
|
||||||
|
df['year']=pd.to_datetime(df['year'])
|
||||||
|
|
||||||
|
|
||||||
|
df.to_csv("data.csv")
|
Loading…
Reference in New Issue
Block a user