2023-01-07 15:21:05 +01:00
|
|
|
"""
|
|
|
|
!pip install scikit-learn
|
|
|
|
!pip install pandas
|
|
|
|
!pip install fastapi
|
|
|
|
!pip install "uvicorn[standard]"
|
|
|
|
!uvicorn main:app --reload
|
|
|
|
"""
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
from fastapi import FastAPI
|
|
|
|
from scipy.spatial.distance import cosine
|
|
|
|
from sklearn.preprocessing import MultiLabelBinarizer
|
|
|
|
|
|
|
|
from engine import FS
|
|
|
|
|
|
|
|
app = FastAPI()
|
|
|
|
data = pd.DataFrame()
|
|
|
|
|
|
|
|
|
2023-01-07 22:35:00 +01:00
|
|
|
def inference(first_id: str, second_id: str):
|
|
|
|
first = data.loc[first_id]
|
|
|
|
second = data.loc[second_id]
|
|
|
|
|
|
|
|
year_diff = int(first['release_year'] - second['release_year'])
|
|
|
|
FS.set_variable('RELEASE_YEAR', year_diff)
|
|
|
|
|
|
|
|
runtime_diff = int(first['runtime'] - second['runtime'])
|
|
|
|
FS.set_variable('RUNTIME', runtime_diff)
|
|
|
|
|
|
|
|
if not (np.isnan(first['seasons']) or np.isnan(second['seasons'])):
|
|
|
|
season_diff = int(first['seasons'] - second['seasons'])
|
|
|
|
FS.set_variable('SEASONS', season_diff)
|
|
|
|
else:
|
|
|
|
FS.set_variable('SEASONS', 0)
|
|
|
|
|
|
|
|
genre_diff = 1 - cosine(first['genres'], second['genres'])
|
|
|
|
FS.set_variable('GENRES', genre_diff)
|
|
|
|
|
|
|
|
emotion_diff = 1 - cosine(first['emotions'], second['emotions'])
|
|
|
|
FS.set_variable('EMOTIONS', emotion_diff)
|
|
|
|
|
|
|
|
return FS.inference(['RECOMMENDATION'])
|
|
|
|
|
|
|
|
|
|
|
|
|
2023-01-07 15:21:05 +01:00
|
|
|
@app.on_event('startup')
|
|
|
|
async def startup_event():
|
|
|
|
global data
|
|
|
|
data = pd.read_csv('processed_data.csv', index_col='id', converters={'genres': pd.eval})
|
|
|
|
all_genres = data.genres.explode().unique()
|
|
|
|
mlb = MultiLabelBinarizer()
|
|
|
|
mlb.fit([all_genres])
|
|
|
|
data['genres'] = data['genres'].apply(lambda x: mlb.transform([x])[0])
|
|
|
|
data['emotions'] = data[['Happy', 'Angry', 'Surprise', 'Sad', 'Fear']].values.tolist()
|
|
|
|
|
|
|
|
|
|
|
|
@app.get('/score/{first_id}/{second_id}')
|
|
|
|
def rec_score(first_id: str, second_id: str):
|
|
|
|
try:
|
|
|
|
first = data.loc[first_id]
|
|
|
|
except KeyError:
|
|
|
|
return {'error': f'{first_id} is not a valid id'}
|
|
|
|
try:
|
|
|
|
second = data.loc[second_id]
|
|
|
|
except KeyError:
|
|
|
|
return {'error': f'{second_id} is not a valid id'}
|
|
|
|
|
2023-01-07 22:35:00 +01:00
|
|
|
return inference(first_id, second_id)
|
2023-01-07 15:21:05 +01:00
|
|
|
|
|
|
|
|
2023-01-07 22:35:00 +01:00
|
|
|
@app.get('/recs/{production_id}')
|
|
|
|
async def recs(production_id: str, count: int | None):
|
|
|
|
try:
|
|
|
|
first = data.loc[production_id]
|
|
|
|
except KeyError:
|
|
|
|
return {'error': f'{production_id} is not a valid id'}
|
2023-01-07 15:21:05 +01:00
|
|
|
|
2023-01-07 22:35:00 +01:00
|
|
|
scores = []
|
2023-01-07 15:21:05 +01:00
|
|
|
|
2023-01-07 22:35:00 +01:00
|
|
|
for index, row in data.iterrows():
|
|
|
|
if str(index) == production_id:
|
|
|
|
continue
|
|
|
|
scores.append((index, inference(production_id, str(index))['RECOMMENDATION']))
|
2023-01-07 15:21:05 +01:00
|
|
|
|
2023-01-07 22:35:00 +01:00
|
|
|
scores = [idx[0] for idx in sorted(scores, key=lambda x: x[1], reverse=True)[:count]]
|
|
|
|
|
|
|
|
return list(scores)
|