14 KiB
14 KiB
Zadanie 1
Wyucz prosty bigramowy model języka oparty na regresji logistycznej (jak przedstawiono na wykładzie).
from itertools import islice
import regex as re
import sys
from torchtext.vocab import build_vocab_from_iterator
def get_words_from_line(line):
line = line.rstrip()
yield '<s>'
for m in re.finditer(r'[\p{L}0-9\*]+|\p{P}+', line):
yield m.group(0).lower()
yield '</s>'
def get_word_lines_from_file(file_name):
with open(file_name, 'r') as fh:
for line in fh:
yield get_words_from_line(line)
vocab_size = 20000
vocab = build_vocab_from_iterator(
get_word_lines_from_file('test-A/in.tsv'),
max_tokens = vocab_size,
specials = ['<unk>'])
vocab['welcome']
3798
from torch import nn
import torch
embed_size = 100
class SimpleBigramNeuralLanguageModel(nn.Module):
def __init__(self, vocabulary_size, embedding_size):
super(SimpleBigramNeuralLanguageModel, self).__init__()
self.model = nn.Sequential(
nn.Embedding(vocabulary_size, embedding_size),
nn.Linear(embedding_size, vocabulary_size),
nn.Softmax()
)
def forward(self, x):
return self.model(x)
model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size)
vocab.set_default_index(vocab['<unk>'])
ixs = torch.tensor(vocab.forward(['welcone']))
out = model(ixs)
out[0][vocab['to']]
tensor(5.5038e-05, grad_fn=<SelectBackward0>)
from torch.utils.data import IterableDataset
import itertools
def look_ahead_iterator(gen):
prev = None
for item in gen:
if prev is not None:
yield (prev, item)
prev = item
class Bigrams(IterableDataset):
def __init__(self, text_file, vocabulary_size):
self.vocab = build_vocab_from_iterator(
get_word_lines_from_file(text_file),
max_tokens = vocabulary_size,
specials = ['<unk>'])
self.vocab.set_default_index(self.vocab['<unk>'])
self.vocabulary_size = vocabulary_size
self.text_file = text_file
def __iter__(self):
return look_ahead_iterator(
(self.vocab[t] for t in itertools.chain.from_iterable(get_word_lines_from_file(self.text_file))))
train_dataset = Bigrams('test-A/in.tsv', vocab_size)
from torch.utils.data import DataLoader
device = 'cpu' # cuda
model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)
data = DataLoader(train_dataset, batch_size=5000)
optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.NLLLoss()
model.train()
step = 0
for x, y in data:
x = x.to(device)
y = y.to(device)
optimizer.zero_grad()
ypredicted = model(x)
loss = criterion(torch.log(ypredicted), y)
if step % 100 == 0:
print(step, loss)
step += 1
loss.backward()
optimizer.step()
torch.save(model.state_dict(), 'model1.bin')
0 tensor(10.0928, grad_fn=<NllLossBackward0>) 100 tensor(8.4572, grad_fn=<NllLossBackward0>) 200 tensor(7.6165, grad_fn=<NllLossBackward0>) 300 tensor(6.9356, grad_fn=<NllLossBackward0>) 400 tensor(6.5687, grad_fn=<NllLossBackward0>) 500 tensor(6.2197, grad_fn=<NllLossBackward0>)
[1;31m---------------------------------------------------------------------------[0m [1;31mKeyboardInterrupt[0m Traceback (most recent call last) Cell [1;32mIn[13], line 15[0m [0;32m 13[0m y [39m=[39m y[39m.[39mto(device) [0;32m 14[0m optimizer[39m.[39mzero_grad() [1;32m---> 15[0m ypredicted [39m=[39m model(x) [0;32m 16[0m loss [39m=[39m criterion(torch[39m.[39mlog(ypredicted), y) [0;32m 17[0m [39mif[39;00m step [39m%[39m [39m100[39m [39m==[39m [39m0[39m: File [1;32mc:\Users\jadamski\.conda\envs\modelowanie\lib\site-packages\torch\nn\modules\module.py:1501[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1496[0m [39m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1497[0m [39m# this function, and just call forward.[39;00m [0;32m 1498[0m [39mif[39;00m [39mnot[39;00m ([39mself[39m[39m.[39m_backward_hooks [39mor[39;00m [39mself[39m[39m.[39m_backward_pre_hooks [39mor[39;00m [39mself[39m[39m.[39m_forward_hooks [39mor[39;00m [39mself[39m[39m.[39m_forward_pre_hooks [0;32m 1499[0m [39mor[39;00m _global_backward_pre_hooks [39mor[39;00m _global_backward_hooks [0;32m 1500[0m [39mor[39;00m _global_forward_hooks [39mor[39;00m _global_forward_pre_hooks): [1;32m-> 1501[0m [39mreturn[39;00m forward_call([39m*[39margs, [39m*[39m[39m*[39mkwargs) [0;32m 1502[0m [39m# Do not call functions when jit is used[39;00m [0;32m 1503[0m full_backward_hooks, non_full_backward_hooks [39m=[39m [], [] Cell [1;32mIn[10], line 16[0m, in [0;36mSimpleBigramNeuralLanguageModel.forward[1;34m(self, x)[0m [0;32m 15[0m [39mdef[39;00m [39mforward[39m([39mself[39m, x): [1;32m---> 16[0m [39mreturn[39;00m [39mself[39;49m[39m.[39;49mmodel(x) File [1;32mc:\Users\jadamski\.conda\envs\modelowanie\lib\site-packages\torch\nn\modules\module.py:1501[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1496[0m [39m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1497[0m [39m# this function, and just call forward.[39;00m [0;32m 1498[0m [39mif[39;00m [39mnot[39;00m ([39mself[39m[39m.[39m_backward_hooks [39mor[39;00m [39mself[39m[39m.[39m_backward_pre_hooks [39mor[39;00m [39mself[39m[39m.[39m_forward_hooks [39mor[39;00m [39mself[39m[39m.[39m_forward_pre_hooks [0;32m 1499[0m [39mor[39;00m _global_backward_pre_hooks [39mor[39;00m _global_backward_hooks [0;32m 1500[0m [39mor[39;00m _global_forward_hooks [39mor[39;00m _global_forward_pre_hooks): [1;32m-> 1501[0m [39mreturn[39;00m forward_call([39m*[39margs, [39m*[39m[39m*[39mkwargs) [0;32m 1502[0m [39m# Do not call functions when jit is used[39;00m [0;32m 1503[0m full_backward_hooks, non_full_backward_hooks [39m=[39m [], [] File [1;32mc:\Users\jadamski\.conda\envs\modelowanie\lib\site-packages\torch\nn\modules\container.py:217[0m, in [0;36mSequential.forward[1;34m(self, input)[0m [0;32m 215[0m [39mdef[39;00m [39mforward[39m([39mself[39m, [39minput[39m): [0;32m 216[0m [39mfor[39;00m module [39min[39;00m [39mself[39m: [1;32m--> 217[0m [39minput[39m [39m=[39m module([39minput[39;49m) [0;32m 218[0m [39mreturn[39;00m [39minput[39m File [1;32mc:\Users\jadamski\.conda\envs\modelowanie\lib\site-packages\torch\nn\modules\module.py:1501[0m, in [0;36mModule._call_impl[1;34m(self, *args, **kwargs)[0m [0;32m 1496[0m [39m# If we don't have any hooks, we want to skip the rest of the logic in[39;00m [0;32m 1497[0m [39m# this function, and just call forward.[39;00m [0;32m 1498[0m [39mif[39;00m [39mnot[39;00m ([39mself[39m[39m.[39m_backward_hooks [39mor[39;00m [39mself[39m[39m.[39m_backward_pre_hooks [39mor[39;00m [39mself[39m[39m.[39m_forward_hooks [39mor[39;00m [39mself[39m[39m.[39m_forward_pre_hooks [0;32m 1499[0m [39mor[39;00m _global_backward_pre_hooks [39mor[39;00m _global_backward_hooks [0;32m 1500[0m [39mor[39;00m _global_forward_hooks [39mor[39;00m _global_forward_pre_hooks): [1;32m-> 1501[0m [39mreturn[39;00m forward_call([39m*[39margs, [39m*[39m[39m*[39mkwargs) [0;32m 1502[0m [39m# Do not call functions when jit is used[39;00m [0;32m 1503[0m full_backward_hooks, non_full_backward_hooks [39m=[39m [], [] File [1;32mc:\Users\jadamski\.conda\envs\modelowanie\lib\site-packages\torch\nn\modules\linear.py:114[0m, in [0;36mLinear.forward[1;34m(self, input)[0m [0;32m 113[0m [39mdef[39;00m [39mforward[39m([39mself[39m, [39minput[39m: Tensor) [39m-[39m[39m>[39m Tensor: [1;32m--> 114[0m [39mreturn[39;00m F[39m.[39;49mlinear([39minput[39;49m, [39mself[39;49m[39m.[39;49mweight, [39mself[39;49m[39m.[39;49mbias) [1;31mKeyboardInterrupt[0m:
device = 'cpu' # cuda
model = SimpleBigramNeuralLanguageModel(vocab_size, embed_size).to(device)
#model.load_state_dict(torch.load('model1.bin'))
model.eval()
ixs = torch.tensor(vocab.forward(['welcome'])).to(device)
out = model(ixs)
top = torch.topk(out[0], 10)
top_indices = top.indices.tolist()
top_probs = top.values.tolist()
top_words = vocab.lookup_tokens(top_indices)
list(zip(top_words, top_indices, top_probs))
[('liquid', 6933, 0.0004737793351523578), ('bia', 5842, 0.00043268679291941226), ('sole', 6386, 0.0004295798426028341), ('nmeant', 17711, 0.00034942160709761083), ('savs', 16709, 0.00034736539237201214), ('striving', 12414, 0.0003441996523179114), ('nol', 2640, 0.00032789510441944003), ('imposing', 8457, 0.0003199590719304979), ('hound', 17348, 0.00031824613688513637), ('?"\\\\', 4294, 0.0003141215711366385)]