import pandas as pd
import numpy as np
from train_naive_bayes import naive_bayes
from train_lstm import lstm
from train_bert import bert
import matplotlib.pyplot as plt
from termcolor import colored


def run():
    # Załadowanie zbioru danych
    df_train = pd.read_csv('train.txt', header=None, sep=';', names=['Input', 'Sentiment'], encoding='utf-8')
    df_test = pd.read_csv('test.txt', header=None, sep=';', names=['Input', 'Sentiment'], encoding='utf-8')

    # Wyświetlenie kilku przykładów ze zbiorów
    print('TRAIN SET:')
    print(df_train[3:6])
    print()
    print('TEST SET:')
    print(df_test[3:6])

    # Przykładowa wizualizacja - etykiety zbioru treningowego
    category_counts = {}
    for value in df_train['Sentiment']:
        category_counts[value] = category_counts.get(value, 0) + 1
    lists = sorted(category_counts.items())
    x, y = zip(*lists)
    labels = []
    sizes = []
    for x, y in lists:
        labels.append(x)
        sizes.append(y)
    plt.close('all')
    plt.pie(sizes, labels=labels, autopct=lambda x: '{:.0f}'.format(x * sum(sizes) / 100))
    plt.title(f'Sentiment analysis model training set with a total of {len(df_train)} examples')
    plt.axis('equal')
    print(colored('### Displaying training set data, close the display to continue ###', 'green'))
    plt.show()

    # Zamiana etykiet emocji z tekstu na liczby
    category_mapping = {'anger': 0, 'fear': 1, 'joy': 2, 'love': 3, 'sadness': 4, 'surprise': 5}
    df_train['Sentiment'] = df_train['Sentiment'].map(category_mapping)
    df_test['Sentiment'] = df_test['Sentiment'].map(category_mapping)

    # Interaktywne uruchomienie po kolei wszystkich modeli i zapisanie wyników
    print(colored('### Press any key to run the naive bayes model ###', 'green'))
    input()
    bayes_results = naive_bayes(df_train, df_test)
    print(colored('### Press any key to run the LSTM model ###', 'green'))
    input()
    lstm_results = lstm(df_train, df_test)
    print(colored('### Press any key to run the BERT model ###', 'green'))
    input()
    bert_results = bert(df_train, df_test)

    # Przykładowa wizualizacja - wyniki na podstawie metryki F1 dla wszystkich modeli
    plt.close()
    y_bayes = [bayes_results['accuracy'], bayes_results['macro avg']['f1-score'], bayes_results['weighted avg']['f1-score']]
    y_lstm = [lstm_results['accuracy'], lstm_results['macro avg']['f1-score'], lstm_results['weighted avg']['f1-score']]
    y_bert = [bert_results['accuracy'], bert_results['macro avg']['f1-score'], bert_results['weighted avg']['f1-score']]
    x = ['Accuracy', 'Macro avg', 'Weighted avg']
    x_axis = np.arange(len(x))
    plt.xticks(x_axis, x)
    plt.ylim(0, 1)
    plt.bar(x_axis - 0.2, y_bayes, 0.2, label='Naive bayes')
    plt.bar(x_axis, y_lstm, 0.2, label='LSTM')
    plt.bar(x_axis + 0.2, y_bert, 0.2, label='BERT')
    plt.xlabel('Metric')
    plt.ylabel('Score')
    plt.title('F1-scores per model', y=1.05)
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True, framealpha=1)
    print(colored('### Displaying F1-scores for all models, close the display to finish ###', 'green'))
    plt.show()


if __name__ == '__main__':
    run()