paranormal-or-skeptic3/train.py

#!/usr/bin/python3 -W ignore::UserWarning

import pickle
import torch


def parse_dataset(documents, document_terms, mapping, voc_len):
    print('voc len = ', voc_len)
    x = []
    for i in range(len(documents)):
        features = voc_len * [0.]
        # bias
        features.append(1.0)
        for word in document_terms[i]:
            features[mapping[word]] += 1
        x.append(features)
    return torch.tensor(x, dtype=torch.float)


def main():
    model = pickle.load(open('dictionary.pkl', 'rb'))
    vocabulary, documents, document_terms, word_to_index_mapping, train_y = model
    x = parse_dataset(documents, document_terms, word_to_index_mapping, len(vocabulary))
    y = torch.tensor(train_y, dtype=torch.float)
    w = torch.randn(len(vocabulary) + 1, requires_grad=True)
    learning_rate = torch.tensor(0.001)
    counter = 0
    for _ in range(2500):
        if counter % 10 == 0:
            print(counter)
        y_predicted = torch.sigmoid(x @ w)
        cost = (-1 / y.size()[0]) * torch.sum(y * torch.log(y_predicted + 1e-10) + (1 - y) * torch.log(1 - y_predicted + 1e-10))
        cost.backward()
        with torch.no_grad():
            w -= learning_rate * w.grad
        w.requires_grad = True
        counter += 1
    print(w)
    model = (w, word_to_index_mapping)
    pickle.dump(model, open('model.pkl', 'wb'))


main()
solution 2020-12-15 16:40:10 +01:00			`#!/usr/bin/python3 -W ignore::UserWarning`

			`import pickle`
			`import torch`


			`def parse_dataset(documents, document_terms, mapping, voc_len):`
			`print('voc len = ', voc_len)`
			`x = []`
			`for i in range(len(documents)):`
			`features = voc_len * [0.]`
			`# bias`
			`features.append(1.0)`
			`for word in document_terms[i]:`
			`features[mapping[word]] += 1`
			`x.append(features)`
			`return torch.tensor(x, dtype=torch.float)`


			`def main():`
			`model = pickle.load(open('dictionary.pkl', 'rb'))`
			`vocabulary, documents, document_terms, word_to_index_mapping, train_y = model`
			`x = parse_dataset(documents, document_terms, word_to_index_mapping, len(vocabulary))`
			`y = torch.tensor(train_y, dtype=torch.float)`
			`w = torch.randn(len(vocabulary) + 1, requires_grad=True)`
			`learning_rate = torch.tensor(0.001)`
			`counter = 0`
			`for _ in range(2500):`
			`if counter % 10 == 0:`
			`print(counter)`
			`y_predicted = torch.sigmoid(x @ w)`
			`cost = (-1 / y.size()[0]) * torch.sum(y * torch.log(y_predicted + 1e-10) + (1 - y) * torch.log(1 - y_predicted + 1e-10))`
			`cost.backward()`
			`with torch.no_grad():`
			`w -= learning_rate * w.grad`
			`w.requires_grad = True`
			`counter += 1`
			`print(w)`
			`model = (w, word_to_index_mapping)`
			`pickle.dump(model, open('model.pkl', 'wb'))`


			`main()`