From e3659c82af7278205b04f1a5b2e8e20bffa10830 Mon Sep 17 00:00:00 2001
From: Mikolaj Pokrywka <mp7961@gmail.com>
Date: Mon, 6 Jun 2022 23:59:48 +0200
Subject: [PATCH] gpu working out all data 6 epochs

---
 dev-0/out.tsv  | 215 +++++++++++++++++++++++++++++++++++++++++++++
 run.py         |  17 ++--
 test-A/out.tsv | 230 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 456 insertions(+), 6 deletions(-)
 create mode 100644 dev-0/out.tsv
 create mode 100644 test-A/out.tsv

diff --git a/dev-0/out.tsv b/dev-0/out.tsv
new file mode 100644
index 0000000..902eeef
--- /dev/null
+++ b/dev-0/out.tsv
@@ -0,0 +1,215 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/run.py b/run.py
index ddafa29..c8a6c45 100644
--- a/run.py
+++ b/run.py
@@ -76,14 +76,14 @@ class NERModel(torch.nn.Module):
     def __init__(self, ):
         super(NERModel, self).__init__()
         self.emb = torch.nn.Embedding(23627, 200)
-        self.fc1 = torch.nn.Linear(2000, 9)
+        self.fc1 = torch.nn.Linear(2400, 9)
         # self.softmax = torch.nn.Softmax(dim=1)
         # nie trzeba, bo używamy https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
         # jako kryterium
 
     def forward(self, x):
         x = self.emb(x)
-        x = x.reshape(2000)
+        x = x.reshape(2400)
         x = self.fc1(x)
         # x = self.softmax(x)
         return x
@@ -150,7 +150,7 @@ def generate_out(folder_path):
 #     return predictions
 
 def manual_process(tens, tokens):
-    array = [0, 0, 0, 0, 0, 0, 0]
+    array = [0, 0, 0, 0, 0, 0, 0, 0, 0]
     if len(tokens) >= 2:
         if len(tokens[1]) >= 1:
             word = tokens[1]
@@ -171,6 +171,10 @@ def manual_process(tens, tokens):
                 array[5] = 1
             if '/' in word:
                 array[6] = 1
+            if len(word) > 3:
+                array[6] = 1
+            if len(word) > 6:
+                array[6] = 1
     x = torch.tensor(array)
     new_tensor = torch.cat((tens, x), 0)
     return new_tensor
@@ -178,7 +182,7 @@ def manual_process(tens, tokens):
 
 if __name__ == "__main__":
     print('is cuda available', torch.cuda.is_available())
-    device = torch.device("cuda:0" if not torch.cuda.is_available() else "cpu")
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     start_time = time.time()
     X = []
     Y = []
@@ -206,10 +210,11 @@ if __name__ == "__main__":
 
 
     ner_model = NERModel().to(device)
+    print(next(ner_model.parameters()).is_cuda)
     # ner_model(train_tokens_ids[10_000][1:4])
     criterion = torch.nn.CrossEntropyLoss()
     optimizer = torch.optim.Adam(ner_model.parameters())
-    for epoch in range(4):
+    for epoch in range(6):
         print('started epoch', epoch)
         start_time_epoch = time.time()
 
@@ -222,7 +227,7 @@ if __name__ == "__main__":
         items_total = 0
         ner_model.train()
         for i in range(len(train_labels)-1):
-        # for i in range(20):
+        # for i in range(10):
             for j in range(1, len(train_labels[i]) - 1):
 
                 X = train_tokens_ids[i][j - 1: j + 2]
diff --git a/test-A/out.tsv b/test-A/out.tsv
new file mode 100644
index 0000000..aee8a91
--- /dev/null
+++ b/test-A/out.tsv
@@ -0,0 +1,230 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+