przetwarzanie_jezyka_natura.../P1/tmdb/main.ipynb
2023-01-19 21:22:01 +01:00

129 lines
3.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"movies = pd.read_csv('tmdb_5000_movies.csv')\n",
"credits = pd.read_csv('tmdb_5000_credits.csv')\n",
"df = pd.merge(movies,credits,left_on=['id','title'],right_on=['movie_id','title'])"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import json\n",
"def load_json_columns(col):\n",
" col = col.apply(json.loads)\n",
"load_json_columns(df['genres'])\n",
"# df['genres'] = df['genres'].apply(json.loads)\n",
"# df['keywords'] = df['keywords'].apply(json.loads)\n",
"# df['production_companies'] = df['production_companies'].apply(json.loads)\n",
"# df['production_countries'] = df['production_countries'].apply(json.loads)\n",
"# df['cast'] = df['cast'].apply(json.loads)\n",
"# df['crew'] = df['crew'].apply(json.loads)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'id': 1463, 'name': 'culture clash'},\n",
" {'id': 2964, 'name': 'future'},\n",
" {'id': 3386, 'name': 'space war'},\n",
" {'id': 3388, 'name': 'space colony'},\n",
" {'id': 3679, 'name': 'society'},\n",
" {'id': 3801, 'name': 'space travel'},\n",
" {'id': 9685, 'name': 'futuristic'},\n",
" {'id': 9840, 'name': 'romance'},\n",
" {'id': 9882, 'name': 'space'},\n",
" {'id': 9951, 'name': 'alien'},\n",
" {'id': 10148, 'name': 'tribe'},\n",
" {'id': 10158, 'name': 'alien planet'},\n",
" {'id': 10987, 'name': 'cgi'},\n",
" {'id': 11399, 'name': 'marine'},\n",
" {'id': 13065, 'name': 'soldier'},\n",
" {'id': 14643, 'name': 'battle'},\n",
" {'id': 14720, 'name': 'love affair'},\n",
" {'id': 165431, 'name': 'anti war'},\n",
" {'id': 193554, 'name': 'power relations'},\n",
" {'id': 206690, 'name': 'mind and soul'},\n",
" {'id': 209714, 'name': '3d'}]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['keywords'][0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}