gorgot to add files
This commit is contained in:
parent
04decdd5ba
commit
f8bec5bc13
16
scripts.py
Normal file
16
scripts.py
Normal file
@ -0,0 +1,16 @@
|
||||
import regex as re
|
||||
import string
|
||||
|
||||
|
||||
def get_words_from_line(line):
|
||||
line = line.rstrip()
|
||||
# line = line.lower()
|
||||
line = line.strip()
|
||||
line = line.translate(str.maketrans('', '', string.punctuation))
|
||||
# yield '<s>'
|
||||
for m in re.finditer(r'[\p{L}0-9\*]+|\p{P}+', line):
|
||||
yield m.group(0).lower()
|
||||
# yield '</s>'
|
||||
|
||||
vocab_size = 60000
|
||||
learning_rate=0.0001
|
70
utils.py
Normal file
70
utils.py
Normal file
@ -0,0 +1,70 @@
|
||||
import regex as re
|
||||
import string
|
||||
from torch import nn
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from torch.utils.data import IterableDataset
|
||||
import itertools
|
||||
import lzma
|
||||
import regex as re
|
||||
import pickle
|
||||
import scripts
|
||||
import string
|
||||
|
||||
|
||||
def get_words_from_line(line):
|
||||
line = line.rstrip()
|
||||
line = line.lower()
|
||||
line = line.strip()
|
||||
line = line.translate(str.maketrans('', '', string.punctuation))
|
||||
yield '<s>'
|
||||
for m in re.finditer(r'\p{L}+', line):
|
||||
yield m.group(0)
|
||||
yield '</s>'
|
||||
|
||||
vocab_size = 32000
|
||||
learning_rate=0.0001
|
||||
embed_size = 100
|
||||
device = 'cuda'
|
||||
|
||||
class LanguageModel(nn.Module):
|
||||
def __init__(self, vocabulary_size, embedding_size):
|
||||
super(LanguageModel, self).__init__()
|
||||
self.embedings = nn.Embedding(vocabulary_size, embedding_size)
|
||||
self.linear = nn.Linear(embedding_size*3, vocabulary_size)
|
||||
|
||||
self.linear_first_layer = nn.Linear(embedding_size*5, embedding_size*3)
|
||||
self.relu = nn.ReLU()
|
||||
self.softmax = nn.Softmax()
|
||||
|
||||
# self.model = nn.Sequential(
|
||||
# nn.Embedding(vocabulary_size, embedding_size),
|
||||
# nn.Linear(embedding_size, vocabulary_size),
|
||||
# nn.Softmax()
|
||||
# )
|
||||
|
||||
def forward(self, x_in):
|
||||
# emb_1 = self.embedings(x[0])
|
||||
# emb_2 = self.embedings(x[1])
|
||||
|
||||
|
||||
|
||||
embeddings = [self.embedings(x) for x in x_in]
|
||||
|
||||
first = embeddings[0]
|
||||
to_sum = embeddings[1:6]
|
||||
to_concat = embeddings[6:]
|
||||
|
||||
for t in to_sum:
|
||||
first = torch.add(first, t)
|
||||
|
||||
to_concat.insert(0, first)
|
||||
|
||||
first_layer = self.linear_first_layer(torch.cat(to_concat, dim=1))
|
||||
after_relu = self.relu(first_layer)
|
||||
concated = self.linear(after_relu)
|
||||
|
||||
y = self.softmax(concated)
|
||||
|
||||
return y
|
Loading…
Reference in New Issue
Block a user