update
This commit is contained in:
parent
53fd98388c
commit
4689a528ad
23
main.py
23
main.py
@ -26,7 +26,7 @@ HIDDEN_D = 600
|
||||
OUTPUT_D = 1
|
||||
|
||||
|
||||
def main(dirname):
|
||||
def main(dirnames):
|
||||
check_path(IN_HEADER_FILE_NAME)
|
||||
in_cols = (pd.read_csv(IN_HEADER_FILE_NAME, sep=FILE_SEP)).columns
|
||||
check_path(OUT_HEADER_FILE_NAME)
|
||||
@ -39,7 +39,10 @@ def main(dirname):
|
||||
TRAIN_PATH, EXP_FILE_NAME), names=out_cols, compression=None)
|
||||
|
||||
print("Reading input data...")
|
||||
in_set = get_tsv_data(os.path.join(dirname, IN_FILE_NAME), names=in_cols)
|
||||
in_sets = []
|
||||
for d in dirnames:
|
||||
in_sets.append(get_tsv_data(
|
||||
os.path.join(d, IN_FILE_NAME), names=in_cols))
|
||||
|
||||
print("Preparing training data...")
|
||||
X_train_raw = train_set_features[in_cols[0]].str.lower()
|
||||
@ -47,14 +50,19 @@ def main(dirname):
|
||||
Y_train = train_set_labels[out_cols[0]]
|
||||
|
||||
print("Preparing input data...")
|
||||
X_in_raw = in_set[in_cols[0]].str.lower()
|
||||
X_ins_raw = []
|
||||
for s in in_sets:
|
||||
X_ins_raw.append(s[in_cols[0]].str.lower())
|
||||
|
||||
print("Loading word 2 vector model...")
|
||||
w2v_model = downloader.load(WORD_2_VEC_MODEL_NAME)
|
||||
|
||||
print("Vectorizing data...")
|
||||
X_train = vectorize(X_train, w2v_model)
|
||||
X_in = vectorize(X_in_raw, w2v_model)
|
||||
|
||||
X_ins = []
|
||||
for r in X_ins_raw:
|
||||
X_ins.append(vectorize(r, w2v_model))
|
||||
|
||||
model = Model(input_dim=INPUT_D, hidden_dim=HIDDEN_D, output_dim=OUTPUT_D)
|
||||
|
||||
@ -63,9 +71,10 @@ def main(dirname):
|
||||
|
||||
model.eval()
|
||||
|
||||
predictions = predict(model, X_in)
|
||||
for i in range(len(X_ins)):
|
||||
predictions = predict(model, X_ins[i])
|
||||
|
||||
out_file_path = os.path.join(dirname, OUT_FILE_NAME)
|
||||
out_file_path = os.path.join(dirnames[i], OUT_FILE_NAME)
|
||||
print(f"Saving predictions to file: {out_file_path}")
|
||||
np.asarray(predictions, dtype=np.int32).tofile(out_file_path, sep="\n")
|
||||
|
||||
@ -113,4 +122,4 @@ def check_path(filename: str):
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
raise Exception("Name of working dir not specified!")
|
||||
main(sys.argv[1])
|
||||
main(sys.argv[1:])
|
||||
|
14
model.py
14
model.py
@ -19,7 +19,8 @@ class Model(nn.Module):
|
||||
self.fc2 = nn.Linear(self.hidden_dim, self.hidden_dim)
|
||||
self.fc3 = nn.Linear(self.hidden_dim, self.output_dim)
|
||||
|
||||
self.relu = nn.ReLU()
|
||||
self.r1 = nn.ReLU()
|
||||
self.r2 = nn.ReLU()
|
||||
|
||||
self.criterion = nn.BCELoss()
|
||||
self.optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
|
||||
@ -28,19 +29,16 @@ class Model(nn.Module):
|
||||
"""Step forward learning fn"""
|
||||
|
||||
x = self.fc1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
x = self.relu(x)
|
||||
x = self.r1(x)
|
||||
x = self.r2(x)
|
||||
x = self.fc3(x)
|
||||
x = torch.sigmoid(x)
|
||||
return x
|
||||
|
||||
def run_training(self, X_train, Y_train, batch_size, epochs_count):
|
||||
for _ in range(epochs_count):
|
||||
for i in range(epochs_count):
|
||||
self.train()
|
||||
print(f"{Y_train.shape[0]}, {Y_train.shape[0] == self.input_dim}")
|
||||
print(f"{Y_train.shape[0]}, {Y_train.shape[0] == self.hidden_dim}")
|
||||
print(f"{Y_train.shape[0]}, {Y_train.shape[0] == self.output_dim}")
|
||||
print(f"Epochs: {i + 1}/{epochs_count}")
|
||||
for i in range(0, Y_train.shape[0], batch_size):
|
||||
X = X_train[i: i + batch_size]
|
||||
X = torch.tensor(X)
|
||||
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user