paranormal-or-skeptic3/train.py

44 lines
1.3 KiB
Python
Raw Permalink Normal View History

2020-12-15 16:40:10 +01:00
#!/usr/bin/python3 -W ignore::UserWarning
import pickle
import torch
def parse_dataset(documents, document_terms, mapping, voc_len):
print('voc len = ', voc_len)
x = []
for i in range(len(documents)):
features = voc_len * [0.]
# bias
features.append(1.0)
for word in document_terms[i]:
features[mapping[word]] += 1
x.append(features)
return torch.tensor(x, dtype=torch.float)
def main():
model = pickle.load(open('dictionary.pkl', 'rb'))
vocabulary, documents, document_terms, word_to_index_mapping, train_y = model
x = parse_dataset(documents, document_terms, word_to_index_mapping, len(vocabulary))
y = torch.tensor(train_y, dtype=torch.float)
w = torch.randn(len(vocabulary) + 1, requires_grad=True)
learning_rate = torch.tensor(0.001)
counter = 0
for _ in range(2500):
if counter % 10 == 0:
print(counter)
y_predicted = torch.sigmoid(x @ w)
cost = (-1 / y.size()[0]) * torch.sum(y * torch.log(y_predicted + 1e-10) + (1 - y) * torch.log(1 - y_predicted + 1e-10))
cost.backward()
with torch.no_grad():
w -= learning_rate * w.grad
w.requires_grad = True
counter += 1
print(w)
model = (w, word_to_index_mapping)
pickle.dump(model, open('model.pkl', 'wb'))
main()