gorgot to add files

This commit is contained in:
Mikołaj Pokrywka 2023-05-31 20:48:15 +02:00
parent 04decdd5ba
commit f8bec5bc13
2 changed files with 86 additions and 0 deletions

16
scripts.py Normal file
View File

@ -0,0 +1,16 @@
import regex as re
import string
def get_words_from_line(line):
line = line.rstrip()
# line = line.lower()
line = line.strip()
line = line.translate(str.maketrans('', '', string.punctuation))
# yield '<s>'
for m in re.finditer(r'[\p{L}0-9\*]+|\p{P}+', line):
yield m.group(0).lower()
# yield '</s>'
vocab_size = 60000
learning_rate=0.0001

70
utils.py Normal file
View File

@ -0,0 +1,70 @@
import regex as re
import string
from torch import nn
import torch
from torch.utils.data import DataLoader
from torch.utils.data import IterableDataset
import itertools
import lzma
import regex as re
import pickle
import scripts
import string
def get_words_from_line(line):
line = line.rstrip()
line = line.lower()
line = line.strip()
line = line.translate(str.maketrans('', '', string.punctuation))
yield '<s>'
for m in re.finditer(r'\p{L}+', line):
yield m.group(0)
yield '</s>'
vocab_size = 32000
learning_rate=0.0001
embed_size = 100
device = 'cuda'
class LanguageModel(nn.Module):
def __init__(self, vocabulary_size, embedding_size):
super(LanguageModel, self).__init__()
self.embedings = nn.Embedding(vocabulary_size, embedding_size)
self.linear = nn.Linear(embedding_size*3, vocabulary_size)
self.linear_first_layer = nn.Linear(embedding_size*5, embedding_size*3)
self.relu = nn.ReLU()
self.softmax = nn.Softmax()
# self.model = nn.Sequential(
# nn.Embedding(vocabulary_size, embedding_size),
# nn.Linear(embedding_size, vocabulary_size),
# nn.Softmax()
# )
def forward(self, x_in):
# emb_1 = self.embedings(x[0])
# emb_2 = self.embedings(x[1])
embeddings = [self.embedings(x) for x in x_in]
first = embeddings[0]
to_sum = embeddings[1:6]
to_concat = embeddings[6:]
for t in to_sum:
first = torch.add(first, t)
to_concat.insert(0, first)
first_layer = self.linear_first_layer(torch.cat(to_concat, dim=1))
after_relu = self.relu(first_layer)
concated = self.linear(after_relu)
y = self.softmax(concated)
return y