dl_projekt/transformer.ipynb
2024-06-04 15:30:43 +02:00

16 KiB

import pandas as pd
import numpy as np

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
valid = pd.read_csv("valid.csv")

train.loc[train["review_score"]==-1, "review_score"]=0
test.loc[test["review_score"]==-1, "review_score"]=0
valid.loc[valid["review_score"]==-1, "review_score"]=0
import torch
torch.cuda.is_available()
True
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis", device=0)
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
C:\Users\Adrian\miniconda3\lib\site-packages\huggingface_hub\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
  warnings.warn(
test["predicted_score"] = sentiment_pipeline(test["review_text"].tolist(), truncation=True)
test.iloc[0]["predicted_score"]
{'label': 'POSITIVE', 'score': 0.9997923970222473}
str_to_int_score = {"POSITIVE" : 1, "NEGATIVE" : 0}

test["model_predictions"] = test["predicted_score"].apply(lambda x: str_to_int_score[x["label"]])
test.head()
Unnamed: 0 review_text review_score predicted_score model_predictions
0 1265039 I love the Fact you can do what EVER you want ... 1 {'label': 'POSITIVE', 'score': 0.9997923970222... 1
1 3132003 Tony Hawk's without the Pro Skater. Finding ou... 1 {'label': 'POSITIVE', 'score': 0.9989967942237... 1
2 880195 It's pretty good. 1 {'label': 'POSITIVE', 'score': 0.9998482465744... 1
3 717128 This the best dungeon game I have played since... 1 {'label': 'POSITIVE', 'score': 0.9998807907104... 1
4 5221356 Totally awesome game alone or with a friend. I... 1 {'label': 'POSITIVE', 'score': 0.9998763799667... 1
def get_metrics():
    df = test
    predictions = df["model_predictions"].to_numpy()
    true_values = df["review_score"].to_numpy()
    accuracy = np.sum(np.rint(predictions) == true_values)/len(true_values)
    TN_count = len(df.query("`review_score`==0 and `model_predictions`==0").index)
    TP_count = len(df.query("`review_score`==1 and `model_predictions`==1").index)
    FP_count = len(df.query("`review_score`==0 and `model_predictions`==1").index)
    FN_count = len(df.query("`review_score`==1 and `model_predictions`==0").index)
    precision = TP_count/(TP_count+FP_count)
    recall = TP_count/(TP_count+FN_count)
    F1_score = (2*precision*recall)/(precision+recall)
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {F1_score:.2f}")
get_metrics()
Accuracy: 0.77
Precision: 0.97
Recall: 0.75
F1 Score: 0.84

Użyty domyślnie model (distilbert/distilbert-base-uncased-finetuned-sst-2-english) jest (wg. karty modelu) modelem do klasyfikacji tematów. Spróbujmy modelu, który jest dedykowany pod zadanie sentiment analysis dla recenzji.

sentiment_pipeline = pipeline(model="nlptown/bert-base-multilingual-uncased-sentiment", device=0)
C:\Users\Adrian\miniconda3\lib\site-packages\huggingface_hub\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
  warnings.warn(
config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]
C:\Users\Adrian\miniconda3\lib\site-packages\huggingface_hub\file_download.py:157: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\Adrian\.cache\huggingface\hub\models--nlptown--bert-base-multilingual-uncased-sentiment. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
  warnings.warn(message)
pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]
tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]
vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]
special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]
sentiment_pipeline(test.iloc[0]["review_text"])
[{'label': '5 stars', 'score': 0.8000338673591614}]
test["predicted_score"] = sentiment_pipeline(test["review_text"].tolist(), truncation=True)
test["predicted_score"] = test["predicted_score"].apply(lambda x : x["label"])
test["predicted_score"].value_counts()
predicted_score
5 stars    6183
4 stars    3952
1 star     2399
3 stars    1883
2 stars    1299
Name: count, dtype: int64
str_to_int_score = {"5 stars" : 1, "4 stars" : 1, "3 stars": 1, "2 stars": 0, "1 star": 0} # Arbitralnie ustalone progi

test["model_predictions"] = test["predicted_score"].apply(lambda x: str_to_int_score[x])
get_metrics()
Accuracy: 0.86
Precision: 0.95
Recall: 0.88
F1 Score: 0.91

Wyniki są teraz lepsze. W porównaniu z LSTM model ten ma odrobinę niższą precyzję i wyższy recall, czyli więcej recenzji (również błędnie) uznaje za pozytywne.

def test_review_text(sentence):
    model_output = sentiment_pipeline([sentence])
    score = str_to_int_score[model_output[0]["label"]]
    print(score)
    if score==0:
        print("Negative review")
    else:
        print("Positive review")
test_review_text("A buggy, uninspired mess")
0
Negative review
test_review_text("This game is bad")
0
Negative review
test_review_text("This game destroyed my life")
0
Negative review
test_review_text("Best game I've ever played")
1
Positive review
test_review_text("Fun cooperative play with scalable difficulty. Rapid path to get into a game with friends or open public games. ")
1
Positive review
test_review_text("Deliriously buggy. Fun if/when it works properly. Wait and see if they actually QA the next few patches before you play.")
0
Negative review