This commit is contained in:
nlitkowski 2021-05-26 03:33:25 +02:00
parent 4689a528ad
commit f3e75f5e1f
4 changed files with 3428 additions and 3428 deletions

File diff suppressed because it is too large Load Diff

11
main.py
View File

@ -14,10 +14,12 @@ TRAIN_PATH = "train"
WORD_2_VEC_MODEL_NAME = "word2vec-google-news-300"
EXP_FILE_NAME = "expected.tsv"
FILE_SEP = "\t"
BATCH_SIZE = 10
EPOCHS = 10
IN_HEADER_FILE_NAME = "in-header.tsv"
OUT_HEADER_FILE_NAME = "out-header.tsv"
# Model training config
BATCH_SIZE = 5
EPOCHS = 30
THRESHOLD = 0.5
# Model dimensions
@ -41,6 +43,7 @@ def main(dirnames):
print("Reading input data...")
in_sets = []
for d in dirnames:
print(f"\tReading dir: {d}...")
in_sets.append(get_tsv_data(
os.path.join(d, IN_FILE_NAME), names=in_cols))
@ -72,11 +75,13 @@ def main(dirnames):
model.eval()
for i in range(len(X_ins)):
print(
f"\tPredicting for: {os.path.join(dirnames[i], IN_FILE_NAME)}...")
predictions = predict(model, X_ins[i])
out_file_path = os.path.join(dirnames[i], OUT_FILE_NAME)
print(f"Saving predictions to file: {out_file_path}")
np.asarray(predictions, dtype=np.int32).tofile(out_file_path, sep="\n")
np.asarray(predictions, dtype=np.int32).tofile(out_file_path, sep="\n")
def vectorize(set, w2v_model):

View File

@ -16,22 +16,17 @@ class Model(nn.Module):
self.output_dim = output_dim
self.fc1 = nn.Linear(self.input_dim, self.hidden_dim)
self.fc2 = nn.Linear(self.hidden_dim, self.hidden_dim)
self.fc3 = nn.Linear(self.hidden_dim, self.output_dim)
self.r1 = nn.ReLU()
self.r2 = nn.ReLU()
self.fc2 = nn.Linear(self.hidden_dim, self.output_dim)
self.criterion = nn.BCELoss()
self.optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
self.optimizer = torch.optim.SGD(self.parameters(), lr=0.02)
def forward(self, x):
"""Step forward learning fn"""
x = self.fc1(x)
x = self.r1(x)
x = self.r2(x)
x = self.fc3(x)
x = torch.relu(x)
x = self.fc2(x)
x = torch.sigmoid(x)
return x

File diff suppressed because it is too large Load Diff