przetwarzanie_jezyka_natura.../P1/tmdb/main.ipynb
2023-01-19 21:22:01 +01:00

3.2 KiB

import numpy as np
import pandas as pd
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')
df = pd.merge(movies,credits,left_on=['id','title'],right_on=['movie_id','title'])
import json
def load_json_columns(col):
    col = col.apply(json.loads)
load_json_columns(df['genres'])
# df['genres'] = df['genres'].apply(json.loads)
# df['keywords'] = df['keywords'].apply(json.loads)
# df['production_companies'] = df['production_companies'].apply(json.loads)
# df['production_countries'] = df['production_countries'].apply(json.loads)
# df['cast'] = df['cast'].apply(json.loads)
# df['crew'] = df['crew'].apply(json.loads)
pandas.core.series.Series
df['keywords'][0]
[{'id': 1463, 'name': 'culture clash'},
 {'id': 2964, 'name': 'future'},
 {'id': 3386, 'name': 'space war'},
 {'id': 3388, 'name': 'space colony'},
 {'id': 3679, 'name': 'society'},
 {'id': 3801, 'name': 'space travel'},
 {'id': 9685, 'name': 'futuristic'},
 {'id': 9840, 'name': 'romance'},
 {'id': 9882, 'name': 'space'},
 {'id': 9951, 'name': 'alien'},
 {'id': 10148, 'name': 'tribe'},
 {'id': 10158, 'name': 'alien planet'},
 {'id': 10987, 'name': 'cgi'},
 {'id': 11399, 'name': 'marine'},
 {'id': 13065, 'name': 'soldier'},
 {'id': 14643, 'name': 'battle'},
 {'id': 14720, 'name': 'love affair'},
 {'id': 165431, 'name': 'anti war'},
 {'id': 193554, 'name': 'power relations'},
 {'id': 206690, 'name': 'mind and soul'},
 {'id': 209714, 'name': '3d'}]