fuzzy-logic-movies/main.py

"""
!pip install scikit-learn
!pip install pandas
!pip install fastapi
!pip install "uvicorn[standard]"
!uvicorn main:app --reload
"""

import numpy as np
import pandas as pd
from fastapi import FastAPI
from scipy.spatial.distance import cosine
from sklearn.preprocessing import MultiLabelBinarizer

from engine import FS

app = FastAPI()
data = pd.DataFrame()


def inference(first_id: str, second_id: str):
    first = data.loc[first_id]
    second = data.loc[second_id]

    year_diff = int(first['release_year'] - second['release_year'])
    FS.set_variable('RELEASE_YEAR', year_diff)

    runtime_diff = int(first['runtime'] - second['runtime'])
    FS.set_variable('RUNTIME', runtime_diff)

    if not (np.isnan(first['seasons']) or np.isnan(second['seasons'])):
        season_diff = int(first['seasons'] - second['seasons'])
        FS.set_variable('SEASONS', season_diff)
    else:
        FS.set_variable('SEASONS', 0)

    genre_diff = 1 - cosine(first['genres'], second['genres'])
    FS.set_variable('GENRES', genre_diff)

    emotion_diff = 1 - cosine(first['emotions'], second['emotions'])
    FS.set_variable('EMOTIONS', emotion_diff)

    return FS.inference(['RECOMMENDATION'])


@app.on_event('startup')
async def startup_event():
    global data
    data = pd.read_csv('processed_data.csv', index_col='id', converters={'genres': pd.eval})
    all_genres = data.genres.explode().unique()
    mlb = MultiLabelBinarizer()
    mlb.fit([all_genres])
    data['genres'] = data['genres'].apply(lambda x: mlb.transform([x])[0])
    data['emotions'] = data[['Happy', 'Angry', 'Surprise', 'Sad', 'Fear']].values.tolist()


@app.get('/score/{first_id}/{second_id}')
def rec_score(first_id: str, second_id: str):
    try:
        first = data.loc[first_id]
    except KeyError:
        return {'error': f'{first_id} is not a valid id'}
    try:
        second = data.loc[second_id]
    except KeyError:
        return {'error': f'{second_id} is not a valid id'}

    return inference(first_id, second_id)


@app.get('/recs/{production_id}')
async def recs(production_id: str, count: int | None):
    try:
        first = data.loc[production_id]
    except KeyError:
        return {'error': f'{production_id} is not a valid id'}

    scores = []

    for index, row in data.iterrows():
        if str(index) == production_id:
            continue
        scores.append((index, inference(production_id, str(index))['RECOMMENDATION']))

    scores = [idx[0] for idx in sorted(scores, key=lambda x: x[1], reverse=True)[:count]]

    return list(scores)