sane

2019-12-03 23:08:49 +01:00 · 2019-12-03 23:08:49 +01:00 · b8222a2f25
commit b8222a2f25
parent 270eab1358
6 changed files with 36749 additions and 36740 deletions
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@ -146,14 +146,20 @@
      <breakpoints>
        <line-breakpoint suspend="THREAD" type="python-line">
          <url>file://$PROJECT_DIR$/s.py</url>
-          <line>139</line>
+          <line>141</line>
          <option name="timeStamp" value="3" />
        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/s.py</url>
+          <line>193</line>
+          <option name="timeStamp" value="8" />
+        </line-breakpoint>
      </breakpoints>
    </breakpoint-manager>
    <watches-manager>
      <configuration name="PythonConfigurationType">
        <watch expression="dev_y" />
+        <watch expression="debug_yp" />
      </configuration>
    </watches-manager>
  </component>
--- a/dev-0/out.tsv
+++ b/dev-0/out.tsv
--- a/dev-0/out_float.tsv
+++ b/dev-0/out_float.tsv
--- a/s.py
+++ b/s.py
@ -11,16 +11,18 @@ from torch.utils.data import Dataset, DataLoader

 #10 features: 4 normal + 6 from domain_onehot + 38 char labels
 model = nn.Sequential(
-    nn.Linear(48, 16),
+    nn.Linear(48, 96, bias=True),
    nn.ReLU(),
-    nn.Linear(16,1),
+    nn.Linear(96,48,bias=True),
+    nn.ReLU(),
+    nn.Linear(48, 1, bias=True),
    nn.Sigmoid())
-criterion = nn.MSELoss()
-optimizer = optim.SGD(model.parameters(), lr=0.000001, momentum=0.9)
-#optimizer = optim.Adam(model.parameters())
+criterion =  nn.BCELoss()
+#optimizer = optim.SGD(model.parameters(), lr=0.00001, momentum=0.9)
+optimizer = optim.Adam(model.parameters())


-minibatch_size = 1000
+minibatch_size = 200


 def count_polish_diacritics(x):
@ -105,7 +107,7 @@ class TrainDataset(Dataset):
        self.y = y

    def __len__(self):
-        return len(self.X)
+        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
@ -173,34 +175,35 @@ testA_x = torch.cat([testA_x_temp2, testA_x_words_onehot], 1)


 dataset_train = TrainDataset(x, y)
-trainloader=DataLoader(dataset=dataset_train,batch_size=5)
+trainloader=DataLoader(dataset=dataset_train, batch_size=minibatch_size, shuffle=True)

-def train_loop(i = 3):
+def train_loop(i = 100):
    for i in range(i):
-        for xb, yb_expected in trainloader:  # for each iteration a bach of samples is taken from loader(currently batch_size=5)
+        for xb, yb_expected in trainloader:
+            optimizer.zero_grad()
            yp = model(xb)

            # debug
            """
            debug_xb = pandas.DataFrame(xb.numpy())
            debug_yb_expected = pandas.DataFrame(yb_expected.numpy())
-            debug_yp = pandas.DataFrame(yp.detach().numpy())
            """
+            debug_yp = pandas.DataFrame(yp.detach().numpy())

-            loss = criterion(yp, yb_expected)
-            optimizer.zero_grad()
+
+            loss = criterion(torch.squeeze(yp), yb_expected)

            dev_y_pred_float_tensor = model(dev_x)
            dev_y_pred_float_df = pandas.DataFrame(dev_y_pred_float_tensor.detach().numpy())
            auc_score = roc_auc_score(dev_y_test, dev_y_pred_float_df)
-            print("auc: ", auc_score, "loss: ", loss.item())
-            if(auc_score > 0.9):
+            print("auc:\t", auc_score, "\tloss:\t", loss.item())
+            if ((auc_score > 0.80)):
                break

            loss.backward()
            optimizer.step()

-        if (auc_score > 0.9):
+        if ((auc_score > 0.80)):
            break
        #print(loss)

--- a/test-A/out.tsv
+++ b/test-A/out.tsv
--- a/test-A/out_float.tsv
+++ b/test-A/out_float.tsv