This commit is contained in:
ksanu 2019-12-03 23:08:49 +01:00
parent 270eab1358
commit b8222a2f25
6 changed files with 36749 additions and 36740 deletions

View File

@ -146,14 +146,20 @@
<breakpoints>
<line-breakpoint suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/s.py</url>
<line>139</line>
<line>141</line>
<option name="timeStamp" value="3" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/s.py</url>
<line>193</line>
<option name="timeStamp" value="8" />
</line-breakpoint>
</breakpoints>
</breakpoint-manager>
<watches-manager>
<configuration name="PythonConfigurationType">
<watch expression="dev_y" />
<watch expression="debug_yp" />
</configuration>
</watches-manager>
</component>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

35
s.py
View File

@ -11,16 +11,18 @@ from torch.utils.data import Dataset, DataLoader
#10 features: 4 normal + 6 from domain_onehot + 38 char labels
model = nn.Sequential(
nn.Linear(48, 16),
nn.Linear(48, 96, bias=True),
nn.ReLU(),
nn.Linear(16,1),
nn.Linear(96,48,bias=True),
nn.ReLU(),
nn.Linear(48, 1, bias=True),
nn.Sigmoid())
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.000001, momentum=0.9)
#optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()
#optimizer = optim.SGD(model.parameters(), lr=0.00001, momentum=0.9)
optimizer = optim.Adam(model.parameters())
minibatch_size = 1000
minibatch_size = 200
def count_polish_diacritics(x):
@ -105,7 +107,7 @@ class TrainDataset(Dataset):
self.y = y
def __len__(self):
return len(self.X)
return self.X.shape[0]
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
@ -173,34 +175,35 @@ testA_x = torch.cat([testA_x_temp2, testA_x_words_onehot], 1)
dataset_train = TrainDataset(x, y)
trainloader=DataLoader(dataset=dataset_train,batch_size=5)
trainloader=DataLoader(dataset=dataset_train, batch_size=minibatch_size, shuffle=True)
def train_loop(i = 3):
def train_loop(i = 100):
for i in range(i):
for xb, yb_expected in trainloader: # for each iteration a bach of samples is taken from loader(currently batch_size=5)
for xb, yb_expected in trainloader:
optimizer.zero_grad()
yp = model(xb)
# debug
"""
debug_xb = pandas.DataFrame(xb.numpy())
debug_yb_expected = pandas.DataFrame(yb_expected.numpy())
debug_yp = pandas.DataFrame(yp.detach().numpy())
"""
debug_yp = pandas.DataFrame(yp.detach().numpy())
loss = criterion(yp, yb_expected)
optimizer.zero_grad()
loss = criterion(torch.squeeze(yp), yb_expected)
dev_y_pred_float_tensor = model(dev_x)
dev_y_pred_float_df = pandas.DataFrame(dev_y_pred_float_tensor.detach().numpy())
auc_score = roc_auc_score(dev_y_test, dev_y_pred_float_df)
print("auc: ", auc_score, "loss: ", loss.item())
if(auc_score > 0.9):
print("auc:\t", auc_score, "\tloss:\t", loss.item())
if ((auc_score > 0.80)):
break
loss.backward()
optimizer.step()
if (auc_score > 0.9):
if ((auc_score > 0.80)):
break
#print(loss)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff