From e3659c82af7278205b04f1a5b2e8e20bffa10830 Mon Sep 17 00:00:00 2001 From: Mikolaj Pokrywka Date: Mon, 6 Jun 2022 23:59:48 +0200 Subject: [PATCH] gpu working out all data 6 epochs --- dev-0/out.tsv | 215 +++++++++++++++++++++++++++++++++++++++++++++ run.py | 17 ++-- test-A/out.tsv | 230 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 456 insertions(+), 6 deletions(-) create mode 100644 dev-0/out.tsv create mode 100644 test-A/out.tsv diff --git a/dev-0/out.tsv b/dev-0/out.tsv new file mode 100644 index 0000000..902eeef --- /dev/null +++ b/dev-0/out.tsv @@ -0,0 +1,215 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/run.py b/run.py index ddafa29..c8a6c45 100644 --- a/run.py +++ b/run.py @@ -76,14 +76,14 @@ class NERModel(torch.nn.Module): def __init__(self, ): super(NERModel, self).__init__() self.emb = torch.nn.Embedding(23627, 200) - self.fc1 = torch.nn.Linear(2000, 9) + self.fc1 = torch.nn.Linear(2400, 9) # self.softmax = torch.nn.Softmax(dim=1) # nie trzeba, bo używamy https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html # jako kryterium def forward(self, x): x = self.emb(x) - x = x.reshape(2000) + x = x.reshape(2400) x = self.fc1(x) # x = self.softmax(x) return x @@ -150,7 +150,7 @@ def generate_out(folder_path): # return predictions def manual_process(tens, tokens): - array = [0, 0, 0, 0, 0, 0, 0] + array = [0, 0, 0, 0, 0, 0, 0, 0, 0] if len(tokens) >= 2: if len(tokens[1]) >= 1: word = tokens[1] @@ -171,6 +171,10 @@ def manual_process(tens, tokens): array[5] = 1 if '/' in word: array[6] = 1 + if len(word) > 3: + array[6] = 1 + if len(word) > 6: + array[6] = 1 x = torch.tensor(array) new_tensor = torch.cat((tens, x), 0) return new_tensor @@ -178,7 +182,7 @@ def manual_process(tens, tokens): if __name__ == "__main__": print('is cuda available', torch.cuda.is_available()) - device = torch.device("cuda:0" if not torch.cuda.is_available() else "cpu") + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") start_time = time.time() X = [] Y = [] @@ -206,10 +210,11 @@ if __name__ == "__main__": ner_model = NERModel().to(device) + print(next(ner_model.parameters()).is_cuda) # ner_model(train_tokens_ids[10_000][1:4]) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(ner_model.parameters()) - for epoch in range(4): + for epoch in range(6): print('started epoch', epoch) start_time_epoch = time.time() @@ -222,7 +227,7 @@ if __name__ == "__main__": items_total = 0 ner_model.train() for i in range(len(train_labels)-1): - # for i in range(20): + # for i in range(10): for j in range(1, len(train_labels[i]) - 1): X = train_tokens_ids[i][j - 1: j + 2] diff --git a/test-A/out.tsv b/test-A/out.tsv new file mode 100644 index 0000000..aee8a91 --- /dev/null +++ b/test-A/out.tsv @@ -0,0 +1,230 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +