16 KiB
16 KiB
import pandas as pd
import numpy as np
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
valid = pd.read_csv("valid.csv")
train.loc[train["review_score"]==-1, "review_score"]=0
test.loc[test["review_score"]==-1, "review_score"]=0
valid.loc[valid["review_score"]==-1, "review_score"]=0
import torch
torch.cuda.is_available()
True
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis", device=0)
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english). Using a pipeline without specifying a model name and revision in production is not recommended. C:\Users\Adrian\miniconda3\lib\site-packages\huggingface_hub\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn(
test["predicted_score"] = sentiment_pipeline(test["review_text"].tolist(), truncation=True)
test.iloc[0]["predicted_score"]
{'label': 'POSITIVE', 'score': 0.9997923970222473}
str_to_int_score = {"POSITIVE" : 1, "NEGATIVE" : 0}
test["model_predictions"] = test["predicted_score"].apply(lambda x: str_to_int_score[x["label"]])
test.head()
Unnamed: 0 | review_text | review_score | predicted_score | model_predictions | |
---|---|---|---|---|---|
0 | 1265039 | I love the Fact you can do what EVER you want ... | 1 | {'label': 'POSITIVE', 'score': 0.9997923970222... | 1 |
1 | 3132003 | Tony Hawk's without the Pro Skater. Finding ou... | 1 | {'label': 'POSITIVE', 'score': 0.9989967942237... | 1 |
2 | 880195 | It's pretty good. | 1 | {'label': 'POSITIVE', 'score': 0.9998482465744... | 1 |
3 | 717128 | This the best dungeon game I have played since... | 1 | {'label': 'POSITIVE', 'score': 0.9998807907104... | 1 |
4 | 5221356 | Totally awesome game alone or with a friend. I... | 1 | {'label': 'POSITIVE', 'score': 0.9998763799667... | 1 |
def get_metrics():
df = test
predictions = df["model_predictions"].to_numpy()
true_values = df["review_score"].to_numpy()
accuracy = np.sum(np.rint(predictions) == true_values)/len(true_values)
TN_count = len(df.query("`review_score`==0 and `model_predictions`==0").index)
TP_count = len(df.query("`review_score`==1 and `model_predictions`==1").index)
FP_count = len(df.query("`review_score`==0 and `model_predictions`==1").index)
FN_count = len(df.query("`review_score`==1 and `model_predictions`==0").index)
precision = TP_count/(TP_count+FP_count)
recall = TP_count/(TP_count+FN_count)
F1_score = (2*precision*recall)/(precision+recall)
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {F1_score:.2f}")
get_metrics()
Accuracy: 0.77 Precision: 0.97 Recall: 0.75 F1 Score: 0.84
Użyty domyślnie model (distilbert/distilbert-base-uncased-finetuned-sst-2-english) jest (wg. karty modelu) modelem do klasyfikacji tematów. Spróbujmy modelu, który jest dedykowany pod zadanie sentiment analysis dla recenzji.
sentiment_pipeline = pipeline(model="nlptown/bert-base-multilingual-uncased-sentiment", device=0)
C:\Users\Adrian\miniconda3\lib\site-packages\huggingface_hub\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn(
config.json: 0%| | 0.00/953 [00:00<?, ?B/s]
C:\Users\Adrian\miniconda3\lib\site-packages\huggingface_hub\file_download.py:157: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\Adrian\.cache\huggingface\hub\models--nlptown--bert-base-multilingual-uncased-sentiment. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations. To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development warnings.warn(message)
pytorch_model.bin: 0%| | 0.00/669M [00:00<?, ?B/s]
tokenizer_config.json: 0%| | 0.00/39.0 [00:00<?, ?B/s]
vocab.txt: 0%| | 0.00/872k [00:00<?, ?B/s]
special_tokens_map.json: 0%| | 0.00/112 [00:00<?, ?B/s]
sentiment_pipeline(test.iloc[0]["review_text"])
[{'label': '5 stars', 'score': 0.8000338673591614}]
test["predicted_score"] = sentiment_pipeline(test["review_text"].tolist(), truncation=True)
test["predicted_score"] = test["predicted_score"].apply(lambda x : x["label"])
test["predicted_score"].value_counts()
predicted_score 5 stars 6183 4 stars 3952 1 star 2399 3 stars 1883 2 stars 1299 Name: count, dtype: int64
str_to_int_score = {"5 stars" : 1, "4 stars" : 1, "3 stars": 1, "2 stars": 0, "1 star": 0} # Arbitralnie ustalone progi
test["model_predictions"] = test["predicted_score"].apply(lambda x: str_to_int_score[x])
get_metrics()
Accuracy: 0.86 Precision: 0.95 Recall: 0.88 F1 Score: 0.91
Wyniki są teraz lepsze. W porównaniu z LSTM model ten ma odrobinę niższą precyzję i wyższy recall, czyli więcej recenzji (również błędnie) uznaje za pozytywne.
def test_review_text(sentence):
model_output = sentiment_pipeline([sentence])
score = str_to_int_score[model_output[0]["label"]]
print(score)
if score==0:
print("Negative review")
else:
print("Positive review")
test_review_text("A buggy, uninspired mess")
0 Negative review
test_review_text("This game is bad")
0 Negative review
test_review_text("This game destroyed my life")
0 Negative review
test_review_text("Best game I've ever played")
1 Positive review
test_review_text("Fun cooperative play with scalable difficulty. Rapid path to get into a game with friends or open public games. ")
1 Positive review
test_review_text("Deliriously buggy. Fun if/when it works properly. Wait and see if they actually QA the next few patches before you play.")
0 Negative review