#!/usr/bin/python3 -W ignore::UserWarning

import pickle
import torch


def parse_dataset(documents, document_terms, mapping, voc_len):
    print('voc len = ', voc_len)
    x = []
    for i in range(len(documents)):
        features = voc_len * [0.]
        # bias
        features.append(1.0)
        for word in document_terms[i]:
            features[mapping[word]] += 1
        x.append(features)
    return torch.tensor(x, dtype=torch.float)


def main():
    model = pickle.load(open('dictionary.pkl', 'rb'))
    vocabulary, documents, document_terms, word_to_index_mapping, train_y = model
    x = parse_dataset(documents, document_terms, word_to_index_mapping, len(vocabulary))
    y = torch.tensor(train_y, dtype=torch.float)
    w = torch.randn(len(vocabulary) + 1, requires_grad=True)
    learning_rate = torch.tensor(0.001)
    counter = 0
    for _ in range(2500):
        if counter % 10 == 0:
            print(counter)
        y_predicted = torch.sigmoid(x @ w)
        cost = (-1 / y.size()[0]) * torch.sum(y * torch.log(y_predicted + 1e-10) + (1 - y) * torch.log(1 - y_predicted + 1e-10))
        cost.backward()
        with torch.no_grad():
            w -= learning_rate * w.grad
        w.requires_grad = True
        counter += 1
    print(w)
    model = (w, word_to_index_mapping)
    pickle.dump(model, open('model.pkl', 'wb'))


main()