gpu working out all data 6 epochs

This commit is contained in:
Mikolaj Pokrywka 2022-06-06 23:59:48 +02:00
parent 5eab9c5889
commit e3659c82af
3 changed files with 456 additions and 6 deletions

215
dev-0/out.tsv Normal file
View File

@ -0,0 +1,215 @@

17
run.py
View File

@ -76,14 +76,14 @@ class NERModel(torch.nn.Module):
def __init__(self, ): def __init__(self, ):
super(NERModel, self).__init__() super(NERModel, self).__init__()
self.emb = torch.nn.Embedding(23627, 200) self.emb = torch.nn.Embedding(23627, 200)
self.fc1 = torch.nn.Linear(2000, 9) self.fc1 = torch.nn.Linear(2400, 9)
# self.softmax = torch.nn.Softmax(dim=1) # self.softmax = torch.nn.Softmax(dim=1)
# nie trzeba, bo używamy https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html # nie trzeba, bo używamy https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
# jako kryterium # jako kryterium
def forward(self, x): def forward(self, x):
x = self.emb(x) x = self.emb(x)
x = x.reshape(2000) x = x.reshape(2400)
x = self.fc1(x) x = self.fc1(x)
# x = self.softmax(x) # x = self.softmax(x)
return x return x
@ -150,7 +150,7 @@ def generate_out(folder_path):
# return predictions # return predictions
def manual_process(tens, tokens): def manual_process(tens, tokens):
array = [0, 0, 0, 0, 0, 0, 0] array = [0, 0, 0, 0, 0, 0, 0, 0, 0]
if len(tokens) >= 2: if len(tokens) >= 2:
if len(tokens[1]) >= 1: if len(tokens[1]) >= 1:
word = tokens[1] word = tokens[1]
@ -171,6 +171,10 @@ def manual_process(tens, tokens):
array[5] = 1 array[5] = 1
if '/' in word: if '/' in word:
array[6] = 1 array[6] = 1
if len(word) > 3:
array[6] = 1
if len(word) > 6:
array[6] = 1
x = torch.tensor(array) x = torch.tensor(array)
new_tensor = torch.cat((tens, x), 0) new_tensor = torch.cat((tens, x), 0)
return new_tensor return new_tensor
@ -178,7 +182,7 @@ def manual_process(tens, tokens):
if __name__ == "__main__": if __name__ == "__main__":
print('is cuda available', torch.cuda.is_available()) print('is cuda available', torch.cuda.is_available())
device = torch.device("cuda:0" if not torch.cuda.is_available() else "cpu") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
start_time = time.time() start_time = time.time()
X = [] X = []
Y = [] Y = []
@ -206,10 +210,11 @@ if __name__ == "__main__":
ner_model = NERModel().to(device) ner_model = NERModel().to(device)
print(next(ner_model.parameters()).is_cuda)
# ner_model(train_tokens_ids[10_000][1:4]) # ner_model(train_tokens_ids[10_000][1:4])
criterion = torch.nn.CrossEntropyLoss() criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(ner_model.parameters()) optimizer = torch.optim.Adam(ner_model.parameters())
for epoch in range(4): for epoch in range(6):
print('started epoch', epoch) print('started epoch', epoch)
start_time_epoch = time.time() start_time_epoch = time.time()
@ -222,7 +227,7 @@ if __name__ == "__main__":
items_total = 0 items_total = 0
ner_model.train() ner_model.train()
for i in range(len(train_labels)-1): for i in range(len(train_labels)-1):
# for i in range(20): # for i in range(10):
for j in range(1, len(train_labels[i]) - 1): for j in range(1, len(train_labels[i]) - 1):
X = train_tokens_ids[i][j - 1: j + 2] X = train_tokens_ids[i][j - 1: j + 2]

230
test-A/out.tsv Normal file
View File

@ -0,0 +1,230 @@