fuzzy-logic-movies/main.py

"""
!pip install scikit-learn
!pip install pandas
!pip install fastapi
!pip install "uvicorn[standard]"
!uvicorn main:app --reload
"""
import multiprocessing
import time
from multiprocessing import Pool

import numpy as np
import pandas as pd
import pandas.core.series
from fastapi import FastAPI
from scipy.spatial.distance import cosine
from sklearn.preprocessing import MultiLabelBinarizer

from engine import fuzzy_system

app = FastAPI()
data = pd.DataFrame()
mlb = MultiLabelBinarizer()


def inference(first: pandas.core.series.Series,
              second_id: str,
              release_year_param='similar',
              runtime_param='similar',
              seasons_param='similar',
              genres_param='same',
              emotions_param='same',
              df=None):
    if df is not None:
        second = df.loc[second_id]
    else:
        second = data.loc[second_id]

    FS = fuzzy_system(release_year_param=release_year_param,
                      runtime_param=runtime_param,
                      seasons_param=seasons_param,
                      genres_param=genres_param,
                      emotions_param=emotions_param)

    year_diff = int(first['release_year'] - second['release_year'])
    FS.set_variable('RELEASE_YEAR', year_diff)

    runtime_diff = int(first['runtime'] - second['runtime'])
    FS.set_variable('RUNTIME', runtime_diff)

    if not (np.isnan(first['seasons']) or np.isnan(second['seasons'])):
        season_diff = int(first['seasons'] - second['seasons'])
        FS.set_variable('SEASONS', season_diff)
    else:
        FS.set_variable('SEASONS', 0)

    genre_diff = 1 - cosine(first['genres'], second['genres'])
    FS.set_variable('GENRES', genre_diff)

    emotion_diff = 1 - cosine(first['emotions'], second['emotions'])
    FS.set_variable('EMOTIONS', emotion_diff)

    return second_id, FS.inference(['RECOMMENDATION'])['RECOMMENDATION']


def process_dataframe(df,
                      production,
                      release_year_param,
                      runtime_param,
                      seasons_param,
                      genres_param,
                      emotions_param
                      ):
    scores = []
    for index, row in df.iterrows():
        scores.append(inference(production,
                                str(index),
                                release_year_param,
                                runtime_param,
                                seasons_param,
                                genres_param,
                                emotions_param,
                                df))
    return scores


@app.on_event('startup')
async def startup_event():
    global data
    global mlb
    data = pd.read_csv('processed_data.csv', index_col='id', converters={'genres': pd.eval})
    all_genres = data.genres.explode().unique()
    mlb.fit([all_genres])
    data['genres'] = data['genres'].apply(lambda x: mlb.transform([x])[0])
    data['emotions'] = data[['Happy', 'Angry', 'Surprise', 'Sad', 'Fear']].values.tolist()


@app.get('/find/{title}')
def titles(title: str):
    response = []
    for index, row in data.iterrows():
        if title.lower() in row['title'].lower():
            response.append({'id': index, 'title': row['title'], 'year': row['release_year']})
    return response


@app.get('/details/{production_id}')
def details(production_id: str):
    try:
        production = data.loc[production_id]
    except:
        return {'error': f'{production_id} is not a valid id'}
    genres = production['genres']
    genres = mlb.inverse_transform(genres.reshape(1, -1))[0]
    return {
        'title': production['title'],
        'type': production['type'],
        'description': production['description'],
        'year': int(production['release_year']),
        'runtime': int(production['runtime']),
        'genres': genres,
    }


@app.get('/score/{first_id}/{second_id}')
def rec_score(first_id: str, second_id: str):
    try:
        first = data.loc[first_id]
    except KeyError:
        return {'error': f'{first_id} is not a valid id'}
    try:
        second = data.loc[second_id]
    except KeyError:
        return {'error': f'{second_id} is not a valid id'}

    return inference(first, second_id)


@app.get('/recs/{production_id}')
async def recs(production_id: str,
               release_year_param: str | None = 'similar',
               runtime_param: str | None = 'similar',
               seasons_param: str | None = 'similar',
               genres_param: str | None = 'same',
               emotions_param: str | None = 'same',
               count: int | None = 5):
    try:
        first = data.loc[production_id]
    except KeyError:
        return {'error': f'{production_id} is not a valid id'}

    scores = []
    time_start = time.time()
    cpus = multiprocessing.cpu_count()
    df_list = np.array_split(data, cpus)
    pool = Pool(cpus)
    results = [pool.apply_async(process_dataframe,
                                [df,
                                 first,
                                 release_year_param,
                                 runtime_param,
                                 seasons_param,
                                 genres_param,
                                 emotions_param]) for df in df_list]

    for r in results:
        r.wait()
    for r in results:
        scores += r.get()
    print(f'time elapsed = {time.time() - time_start}')
    scores = [idx[0] for idx in sorted(scores, key=lambda x: x[1], reverse=True)[:count + 1]]
    if production_id in scores:
        scores.remove(production_id)
    return {
        'id': scores
    }
api 2023-01-07 15:21:05 +01:00			`"""`
			`!pip install scikit-learn`
			`!pip install pandas`
			`!pip install fastapi`
			`!pip install "uvicorn[standard]"`
			`!uvicorn main:app --reload`
			`"""`
multiprocessing 2023-01-09 19:50:09 +01:00			`import multiprocessing`
			`import time`
			`from multiprocessing import Pool`
api 2023-01-07 15:21:05 +01:00
			`import numpy as np`
			`import pandas as pd`
multiprocessing 2023-01-09 19:50:09 +01:00			`import pandas.core.series`
api 2023-01-07 15:21:05 +01:00			`from fastapi import FastAPI`
			`from scipy.spatial.distance import cosine`
			`from sklearn.preprocessing import MultiLabelBinarizer`

engine update 2023-01-25 14:39:25 +01:00			`from engine import fuzzy_system`
api 2023-01-07 15:21:05 +01:00
			`app = FastAPI()`
			`data = pd.DataFrame()`
new endpoints 2023-01-13 15:20:42 +01:00			`mlb = MultiLabelBinarizer()`
api 2023-01-07 15:21:05 +01:00
api adjusted for changing params 2023-01-27 18:43:12 +01:00
			`def inference(first: pandas.core.series.Series,`
			`second_id: str,`
			`release_year_param='similar',`
			`runtime_param='similar',`
			`seasons_param='similar',`
			`genres_param='same',`
			`emotions_param='same',`
			`df=None):`
multiprocessing 2023-01-09 19:50:09 +01:00			`if df is not None:`
			`second = df.loc[second_id]`
			`else:`
			`second = data.loc[second_id]`
recs 2023-01-07 22:35:00 +01:00
api adjusted for changing params 2023-01-27 18:43:12 +01:00			`FS = fuzzy_system(release_year_param=release_year_param,`
			`runtime_param=runtime_param,`
			`seasons_param=seasons_param,`
			`genres_param=genres_param,`
			`emotions_param=emotions_param)`
engine update 2023-01-25 14:39:25 +01:00
recs 2023-01-07 22:35:00 +01:00			`year_diff = int(first['release_year'] - second['release_year'])`
			`FS.set_variable('RELEASE_YEAR', year_diff)`

			`runtime_diff = int(first['runtime'] - second['runtime'])`
			`FS.set_variable('RUNTIME', runtime_diff)`

			`if not (np.isnan(first['seasons']) or np.isnan(second['seasons'])):`
			`season_diff = int(first['seasons'] - second['seasons'])`
			`FS.set_variable('SEASONS', season_diff)`
			`else:`
			`FS.set_variable('SEASONS', 0)`

			`genre_diff = 1 - cosine(first['genres'], second['genres'])`
			`FS.set_variable('GENRES', genre_diff)`

			`emotion_diff = 1 - cosine(first['emotions'], second['emotions'])`
			`FS.set_variable('EMOTIONS', emotion_diff)`

multiprocessing 2023-01-09 19:50:09 +01:00			`return second_id, FS.inference(['RECOMMENDATION'])['RECOMMENDATION']`
recs 2023-01-07 22:35:00 +01:00

api adjusted for changing params 2023-01-27 18:43:12 +01:00			`def process_dataframe(df,`
			`production,`
			`release_year_param,`
			`runtime_param,`
			`seasons_param,`
			`genres_param,`
			`emotions_param`
			`):`
multiprocessing 2023-01-09 19:50:09 +01:00			`scores = []`
			`for index, row in df.iterrows():`
api adjusted for changing params 2023-01-27 18:43:12 +01:00			`scores.append(inference(production,`
			`str(index),`
			`release_year_param,`
			`runtime_param,`
			`seasons_param,`
			`genres_param,`
			`emotions_param,`
			`df))`
multiprocessing 2023-01-09 19:50:09 +01:00			`return scores`

recs 2023-01-07 22:35:00 +01:00
api 2023-01-07 15:21:05 +01:00			`@app.on_event('startup')`
			`async def startup_event():`
			`global data`
new endpoints 2023-01-13 15:20:42 +01:00			`global mlb`
api 2023-01-07 15:21:05 +01:00			`data = pd.read_csv('processed_data.csv', index_col='id', converters={'genres': pd.eval})`
			`all_genres = data.genres.explode().unique()`
			`mlb.fit([all_genres])`
			`data['genres'] = data['genres'].apply(lambda x: mlb.transform([x])[0])`
			`data['emotions'] = data[['Happy', 'Angry', 'Surprise', 'Sad', 'Fear']].values.tolist()`


new endpoints 2023-01-13 15:20:42 +01:00			`@app.get('/find/{title}')`
			`def titles(title: str):`
fox 2023-01-13 17:55:18 +01:00			`response = []`
new endpoints 2023-01-13 15:20:42 +01:00			`for index, row in data.iterrows():`
			`if title.lower() in row['title'].lower():`
fox 2023-01-13 17:55:18 +01:00			`response.append({'id': index, 'title': row['title'], 'year': row['release_year']})`
new endpoints 2023-01-13 15:20:42 +01:00			`return response`


			`@app.get('/details/{production_id}')`
			`def details(production_id: str):`
			`try:`
			`production = data.loc[production_id]`
			`except:`
			`return {'error': f'{production_id} is not a valid id'}`
			`genres = production['genres']`
			`genres = mlb.inverse_transform(genres.reshape(1, -1))[0]`
			`return {`
			`'title': production['title'],`
			`'type': production['type'],`
			`'description': production['description'],`
			`'year': int(production['release_year']),`
			`'runtime': int(production['runtime']),`
			`'genres': genres,`
			`}`


api 2023-01-07 15:21:05 +01:00			`@app.get('/score/{first_id}/{second_id}')`
			`def rec_score(first_id: str, second_id: str):`
			`try:`
			`first = data.loc[first_id]`
			`except KeyError:`
			`return {'error': f'{first_id} is not a valid id'}`
			`try:`
			`second = data.loc[second_id]`
			`except KeyError:`
			`return {'error': f'{second_id} is not a valid id'}`

multiprocessing 2023-01-09 19:50:09 +01:00			`return inference(first, second_id)`
api 2023-01-07 15:21:05 +01:00

recs 2023-01-07 22:35:00 +01:00			`@app.get('/recs/{production_id}')`
api adjusted for changing params 2023-01-27 18:43:12 +01:00			`async def recs(production_id: str,`
			`release_year_param: str \| None = 'similar',`
			`runtime_param: str \| None = 'similar',`
			`seasons_param: str \| None = 'similar',`
			`genres_param: str \| None = 'same',`
			`emotions_param: str \| None = 'same',`
			`count: int \| None = 5):`
recs 2023-01-07 22:35:00 +01:00			`try:`
			`first = data.loc[production_id]`
			`except KeyError:`
			`return {'error': f'{production_id} is not a valid id'}`
api 2023-01-07 15:21:05 +01:00
recs 2023-01-07 22:35:00 +01:00			`scores = []`
multiprocessing 2023-01-09 19:50:09 +01:00			`time_start = time.time()`
			`cpus = multiprocessing.cpu_count()`
			`df_list = np.array_split(data, cpus)`
			`pool = Pool(cpus)`
api adjusted for changing params 2023-01-27 18:43:12 +01:00			`results = [pool.apply_async(process_dataframe,`
			`[df,`
			`first,`
			`release_year_param,`
			`runtime_param,`
			`seasons_param,`
			`genres_param,`
			`emotions_param]) for df in df_list]`
multiprocessing 2023-01-09 19:50:09 +01:00
			`for r in results:`
			`r.wait()`
			`for r in results:`
			`scores += r.get()`
			`print(f'time elapsed = {time.time() - time_start}')`
api adjusted for changing params 2023-01-27 18:43:12 +01:00			`scores = [idx[0] for idx in sorted(scores, key=lambda x: x[1], reverse=True)[:count + 1]]`
update API 2023-01-27 21:51:54 +01:00			`if production_id in scores:`
			`scores.remove(production_id)`
change return in recs function 2023-01-14 00:00:35 +01:00			`return {`
			`'id': scores`
update API 2023-01-27 21:51:54 +01:00			`}`