Sane words, with score

This commit is contained in:
ksanu 2019-11-29 14:56:57 +01:00
parent f0970031a0
commit 7deda60d20
6 changed files with 34525 additions and 34511 deletions

View File

@ -3,11 +3,11 @@
<component name="ChangeListManager">
<list default="true" id="d25a65da-2ba0-4272-a0a5-c59cbecb6088" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/dev-0/out.tsv" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/dev-0/out_float.tsv" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/dev-0/out.tsv" beforeDir="false" afterPath="$PROJECT_DIR$/dev-0/out.tsv" afterDir="false" />
<change beforePath="$PROJECT_DIR$/dev-0/out_float.tsv" beforeDir="false" afterPath="$PROJECT_DIR$/dev-0/out_float.tsv" afterDir="false" />
<change beforePath="$PROJECT_DIR$/solution2.py" beforeDir="false" afterPath="$PROJECT_DIR$/solution2.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/test-A/out.tsv" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/test-A/out_float.tsv" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/test-A/out.tsv" beforeDir="false" afterPath="$PROJECT_DIR$/test-A/out.tsv" afterDir="false" />
<change beforePath="$PROJECT_DIR$/test-A/out_float.tsv" beforeDir="false" afterPath="$PROJECT_DIR$/test-A/out_float.tsv" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -65,6 +65,18 @@ print(y)
print("Training...")
criterion = torch.nn.MSELoss(reduction='sum')
dev_data = pandas.read_csv('dev-0/in.tsv', sep='\t', names=['Domain', 'Word', 'Frequency'], header=None)
dev_x1 = Normalize(torch.tensor(dev_data['Frequency'], dtype=torch.float), x1)
dev_x2 = Normalize(torch.tensor(count_vowels(dev_data['Word']), dtype=torch.float), x2)
dev_x3 = Normalize(torch.tensor(dev_data['Domain'].astype('category').cat.codes, dtype=torch.float), x3)
dev_x4 = Normalize(torch.tensor(count_polish_diacritics(dev_data['Word']), dtype=torch.float), x4)
dev_x5 = Normalize(torch.tensor(dev_data['Word'].str.len(), dtype=torch.float), x5)
dev_x = torch.stack((dev_x1, dev_x2, dev_x3, dev_x4, dev_x5), 0)
dev_y_pred = pandas.DataFrame(pandas.read_csv('dev-0/out.tsv', encoding="utf-8", delimiter='\t', header=None))
for i in range(80):
for j in range(1000):
y_predicted = model(x)
@ -81,32 +93,34 @@ for i in range(80):
W2 = W2 - learning_rate * W2.grad
b2 = b2 - learning_rate * b2.grad
dev_y_test = model(dev_x)
dev_y_test_f = dev_y_test.numpy()
dev_y_test = np.where(dev_y_test_f > 0.5, 1, 0)
print(dev_y_test)
score = f1_score(dev_y_test, dev_y_pred)
W1.requires_grad_(True)
b1.requires_grad_(True)
W2.requires_grad_(True)
b2.requires_grad_(True)
if (score < 0.35):
break
print(score)
print(str(i), " ; ", cost)
print(str(i), " ; ", cost)
if (cost.item() < 1700):
break
#print("Dev0 pred...")
# dev
print("Dev0 pred...")
#dev data:
dev_data = pandas.read_csv('dev-0/in.tsv', sep='\t', names=['Domain', 'Word', 'Frequency'], header=None)
dev_x1 = Normalize(torch.tensor(dev_data['Frequency'], dtype=torch.float), x1)
dev_x2 = Normalize(torch.tensor(count_vowels(dev_data['Word']), dtype=torch.float), x2)
dev_x3 = Normalize(torch.tensor(dev_data['Domain'].astype('category').cat.codes, dtype=torch.float), x3)
dev_x4 = Normalize(torch.tensor(count_polish_diacritics(dev_data['Word']), dtype=torch.float), x4)
dev_x5 = Normalize(torch.tensor(dev_data['Word'].str.len(), dtype=torch.float), x5)
dev_x = torch.stack((dev_x1, dev_x2, dev_x3, dev_x4, dev_x5), 0)
dev_y_test = pandas.DataFrame(pandas.read_csv('dev-0/expected.tsv', encoding="utf-8", delimiter='\t', header=None))
dev_y = model(dev_x)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff