commit a7b78f1eaa8ec6404c96adbfae1528e61aa31af0 Author: Ryszard Staruch Date: Thu Dec 14 16:39:46 2023 +0100 Add app diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e99e36 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..964fae4 --- /dev/null +++ b/environment.yml @@ -0,0 +1,48 @@ +name: Poleval +channels: + - defaults +dependencies: + - bzip2=1.0.8=he774522_0 + - ca-certificates=2023.08.22=haa95532_0 + - libffi=3.4.4=hd77b12b_0 + - openssl=3.0.12=h2bbff1b_0 + - pip=23.3.1=py311haa95532_0 + - python=3.11.5=he1021f5_0 + - setuptools=68.2.2=py311haa95532_0 + - sqlite=3.41.2=h2bbff1b_0 + - tk=8.6.12=h2bbff1b_0 + - vc=14.2=h21ff451_1 + - vs2015_runtime=14.27.29016=h5e58377_2 + - wheel=0.41.2=py311haa95532_0 + - xz=5.4.5=h8cc25b3_0 + - zlib=1.2.13=h8cc25b3_0 + - pip: + - annotated-types==0.6.0 + - anyio==3.7.1 + - click==8.1.7 + - colorama==0.4.6 + - distance==0.1.3 + - fastapi==0.105.0 + - h11==0.14.0 + - httptools==0.6.1 + - idna==3.6 + - joblib==1.3.2 + - numpy==1.26.2 + - pandas==2.1.4 + - pydantic==2.5.2 + - pydantic-core==2.14.5 + - python-dateutil==2.8.2 + - python-dotenv==1.0.0 + - pytz==2023.3.post1 + - pyyaml==6.0.1 + - scikit-learn==1.3.2 + - scipy==1.11.4 + - six==1.16.0 + - sniffio==1.3.0 + - starlette==0.27.0 + - threadpoolctl==3.2.0 + - typing-extensions==4.9.0 + - tzdata==2023.3 + - uvicorn==0.24.0.post1 + - watchfiles==0.21.0 + - websockets==12.0 diff --git a/main.py b/main.py new file mode 100644 index 0000000..519c68e --- /dev/null +++ b/main.py @@ -0,0 +1,179 @@ +from fastapi import FastAPI +from pydantic import BaseModel +from sklearn.metrics import f1_score + +import pandas as pd +import distance +import io + + +ERROR_RESPONSE = { + "status": 400 +} +app = FastAPI() + + +class Data(BaseModel): + challenge: str + dev_expected: str = "" + dev_out: str = "" + testA_expected: str = "" + testA_out: str = "" + testB_expected: str = "" + testB_out: str = "" + dev_in: str= "" + testA_in: str="" + testB_in: str="" + + +def preprocess_data(out, expected): + out = out.split("\n") + expected = expected.split("\n")[:-1] + + out = out[:len(expected)] + + return out, expected + + +def get_levenshtein_score(trues, preds): + def normalize_answer(s): + return s.lower() + + levenstein_scores = [] + for true, pred in [(true, pred) for (true, pred) in zip(trues, preds) if true != ""]: + if pred == "": + levenstein_score = 0 + else: + levenstein_score = 1 - distance.nlevenshtein(normalize_answer(true), normalize_answer(pred)) + levenstein_scores.append(levenstein_score) + + avg_levenstein_score = sum(levenstein_scores) / len(levenstein_scores) * 100 + return avg_levenstein_score + + +def get_answerability_f1(trues, preds): + def get_answerability(answers): + return [1 if answer == "" else 0 for answer in answers] + + true_answerability = get_answerability(trues) + predicted_answerability = get_answerability(preds) + answerability_f1 = f1_score(true_answerability, predicted_answerability, zero_division=0.0) * 100 + return answerability_f1 + + +def get_scores(trues, preds): + scores = {} + scores["Levenshtein"] = get_levenshtein_score(trues, preds) + scores["AnswerabilityF1"] = get_answerability_f1(trues, preds) + scores["Final"] = round((scores["Levenshtein"] + scores["AnswerabilityF1"]) / 2, 2) + + scores["Levenshtein"] = round(get_levenshtein_score(trues, preds), 2) + scores["AnswerabilityF1"] = round(get_answerability_f1(trues, preds), 2) + return scores + + +def get_emotion_recognition_scores(df_in, df_expected, df_predition): + text_annotation = df_in['text'].apply(lambda x: x == '#' * len(x)) + + df_expected_text = df_expected[text_annotation] + df_expected_sentence = df_expected[~text_annotation] + + df_prediction_text = df_predition[text_annotation] + df_prediction_sentence = df_predition[~text_annotation] + + f1_text_score = f1_score( + df_prediction_text.values.flatten(), + df_expected_text.values.flatten(), + average='macro', + zero_division=0.0, + labels=[True, False] + ) + f1_text_score = f1_text_score * 100 + + f1_sentence_score = f1_score( + df_expected_sentence.values.flatten(), + df_prediction_sentence.values.flatten(), + average='macro', + zero_division=0.0, + labels=[True, False] + ) + f1_sentence_score = f1_sentence_score * 100 + + final_score = (f1_text_score + f1_sentence_score) / 2 + + return { + "SentenceF1": round(f1_sentence_score, 2), + "TextF1": round(f1_text_score, 2), + "FinalF1": round(final_score, 2) + } + +@app.get("/") +async def root(data: Data): + challenge = data.challenge + dev_expected = data.dev_expected + dev_out = data.dev_out + testA_expected = data.testA_expected + testA_out = data.testA_out + testB_expected = data.testB_expected + testB_out = data.testB_out + dev_in = data.dev_in + testA_in = data.testA_in + testB_in = data.testB_in + + if challenge == "QuestionAnswering": + results = {} + if len(dev_out) > 0: + dev_out, dev_expected = preprocess_data(dev_out, dev_expected) + if len(dev_out) != len(dev_expected): + return ERROR_RESPONSE + results["dev-0"] = get_scores(dev_expected, dev_out) + + if len(testA_out) > 0: + testA_out, testA_expected = preprocess_data(testA_out, testA_expected) + if len(testA_out) != len(testA_expected): + return ERROR_RESPONSE + results["test-A"] = get_scores(testA_expected, testA_out) + + if len(testB_out) > 0: + testB_out, testB_expected = preprocess_data(testB_out, testB_expected) + if len(testB_out) != len(testB_expected): + return ERROR_RESPONSE + results["test-B"] = get_scores(testB_expected, testB_out) + + if len(results) == 0: + return ERROR_RESPONSE + else: + return { + "status": 200, + "results": results + } + elif challenge == "EmotionRecognition": + results = {} + if len(dev_out) > 0: + df_in = pd.read_table(io.StringIO(dev_in)) + df_expected = pd.read_table(io.StringIO(dev_expected)) + df_predition = pd.read_table(io.StringIO(dev_out)) + + results["dev-0"] = get_emotion_recognition_scores(df_in, df_expected, df_predition) + if len(testA_out) > 0: + df_in = pd.read_table(io.StringIO(testA_in)) + df_expected = pd.read_table(io.StringIO(testA_expected)) + df_predition = pd.read_table(io.StringIO(testA_out)) + + results["test-A"] = get_emotion_recognition_scores(df_in, df_expected, df_predition) + if len(testB_out) > 0: + df_in = pd.read_table(io.StringIO(testB_in)) + df_expected = pd.read_table(io.StringIO(testB_expected)) + df_predition = pd.read_table(io.StringIO(testB_out)) + + results["test-B"] = get_emotion_recognition_scores(df_in, df_expected, df_predition) + + if len(results) == 0: + return ERROR_RESPONSE + else: + return { + "status": 200, + "results": results + } + else: + return ERROR_RESPONSE \ No newline at end of file