""" !pip install scikit-learn !pip install pandas !pip install fastapi !pip install "uvicorn[standard]" !uvicorn main:app --reload """ import numpy as np import pandas as pd from fastapi import FastAPI from scipy.spatial.distance import cosine from sklearn.preprocessing import MultiLabelBinarizer from engine import FS app = FastAPI() data = pd.DataFrame() def inference(first_id: str, second_id: str): first = data.loc[first_id] second = data.loc[second_id] year_diff = int(first['release_year'] - second['release_year']) FS.set_variable('RELEASE_YEAR', year_diff) runtime_diff = int(first['runtime'] - second['runtime']) FS.set_variable('RUNTIME', runtime_diff) if not (np.isnan(first['seasons']) or np.isnan(second['seasons'])): season_diff = int(first['seasons'] - second['seasons']) FS.set_variable('SEASONS', season_diff) else: FS.set_variable('SEASONS', 0) genre_diff = 1 - cosine(first['genres'], second['genres']) FS.set_variable('GENRES', genre_diff) emotion_diff = 1 - cosine(first['emotions'], second['emotions']) FS.set_variable('EMOTIONS', emotion_diff) return FS.inference(['RECOMMENDATION']) @app.on_event('startup') async def startup_event(): global data data = pd.read_csv('processed_data.csv', index_col='id', converters={'genres': pd.eval}) all_genres = data.genres.explode().unique() mlb = MultiLabelBinarizer() mlb.fit([all_genres]) data['genres'] = data['genres'].apply(lambda x: mlb.transform([x])[0]) data['emotions'] = data[['Happy', 'Angry', 'Surprise', 'Sad', 'Fear']].values.tolist() @app.get('/score/{first_id}/{second_id}') def rec_score(first_id: str, second_id: str): try: first = data.loc[first_id] except KeyError: return {'error': f'{first_id} is not a valid id'} try: second = data.loc[second_id] except KeyError: return {'error': f'{second_id} is not a valid id'} return inference(first_id, second_id) @app.get('/recs/{production_id}') async def recs(production_id: str, count: int | None): try: first = data.loc[production_id] except KeyError: return {'error': f'{production_id} is not a valid id'} scores = [] for index, row in data.iterrows(): if str(index) == production_id: continue scores.append((index, inference(production_id, str(index))['RECOMMENDATION'])) scores = [idx[0] for idx in sorted(scores, key=lambda x: x[1], reverse=True)[:count]] return list(scores)