update
This commit is contained in:
parent
53fd98388c
commit
4689a528ad
25
main.py
25
main.py
@ -26,7 +26,7 @@ HIDDEN_D = 600
|
|||||||
OUTPUT_D = 1
|
OUTPUT_D = 1
|
||||||
|
|
||||||
|
|
||||||
def main(dirname):
|
def main(dirnames):
|
||||||
check_path(IN_HEADER_FILE_NAME)
|
check_path(IN_HEADER_FILE_NAME)
|
||||||
in_cols = (pd.read_csv(IN_HEADER_FILE_NAME, sep=FILE_SEP)).columns
|
in_cols = (pd.read_csv(IN_HEADER_FILE_NAME, sep=FILE_SEP)).columns
|
||||||
check_path(OUT_HEADER_FILE_NAME)
|
check_path(OUT_HEADER_FILE_NAME)
|
||||||
@ -39,7 +39,10 @@ def main(dirname):
|
|||||||
TRAIN_PATH, EXP_FILE_NAME), names=out_cols, compression=None)
|
TRAIN_PATH, EXP_FILE_NAME), names=out_cols, compression=None)
|
||||||
|
|
||||||
print("Reading input data...")
|
print("Reading input data...")
|
||||||
in_set = get_tsv_data(os.path.join(dirname, IN_FILE_NAME), names=in_cols)
|
in_sets = []
|
||||||
|
for d in dirnames:
|
||||||
|
in_sets.append(get_tsv_data(
|
||||||
|
os.path.join(d, IN_FILE_NAME), names=in_cols))
|
||||||
|
|
||||||
print("Preparing training data...")
|
print("Preparing training data...")
|
||||||
X_train_raw = train_set_features[in_cols[0]].str.lower()
|
X_train_raw = train_set_features[in_cols[0]].str.lower()
|
||||||
@ -47,14 +50,19 @@ def main(dirname):
|
|||||||
Y_train = train_set_labels[out_cols[0]]
|
Y_train = train_set_labels[out_cols[0]]
|
||||||
|
|
||||||
print("Preparing input data...")
|
print("Preparing input data...")
|
||||||
X_in_raw = in_set[in_cols[0]].str.lower()
|
X_ins_raw = []
|
||||||
|
for s in in_sets:
|
||||||
|
X_ins_raw.append(s[in_cols[0]].str.lower())
|
||||||
|
|
||||||
print("Loading word 2 vector model...")
|
print("Loading word 2 vector model...")
|
||||||
w2v_model = downloader.load(WORD_2_VEC_MODEL_NAME)
|
w2v_model = downloader.load(WORD_2_VEC_MODEL_NAME)
|
||||||
|
|
||||||
print("Vectorizing data...")
|
print("Vectorizing data...")
|
||||||
X_train = vectorize(X_train, w2v_model)
|
X_train = vectorize(X_train, w2v_model)
|
||||||
X_in = vectorize(X_in_raw, w2v_model)
|
|
||||||
|
X_ins = []
|
||||||
|
for r in X_ins_raw:
|
||||||
|
X_ins.append(vectorize(r, w2v_model))
|
||||||
|
|
||||||
model = Model(input_dim=INPUT_D, hidden_dim=HIDDEN_D, output_dim=OUTPUT_D)
|
model = Model(input_dim=INPUT_D, hidden_dim=HIDDEN_D, output_dim=OUTPUT_D)
|
||||||
|
|
||||||
@ -63,10 +71,11 @@ def main(dirname):
|
|||||||
|
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
predictions = predict(model, X_in)
|
for i in range(len(X_ins)):
|
||||||
|
predictions = predict(model, X_ins[i])
|
||||||
|
|
||||||
out_file_path = os.path.join(dirname, OUT_FILE_NAME)
|
out_file_path = os.path.join(dirnames[i], OUT_FILE_NAME)
|
||||||
print(f"Saving predictions to file: {out_file_path}")
|
print(f"Saving predictions to file: {out_file_path}")
|
||||||
np.asarray(predictions, dtype=np.int32).tofile(out_file_path, sep="\n")
|
np.asarray(predictions, dtype=np.int32).tofile(out_file_path, sep="\n")
|
||||||
|
|
||||||
|
|
||||||
@ -113,4 +122,4 @@ def check_path(filename: str):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
raise Exception("Name of working dir not specified!")
|
raise Exception("Name of working dir not specified!")
|
||||||
main(sys.argv[1])
|
main(sys.argv[1:])
|
||||||
|
14
model.py
14
model.py
@ -19,7 +19,8 @@ class Model(nn.Module):
|
|||||||
self.fc2 = nn.Linear(self.hidden_dim, self.hidden_dim)
|
self.fc2 = nn.Linear(self.hidden_dim, self.hidden_dim)
|
||||||
self.fc3 = nn.Linear(self.hidden_dim, self.output_dim)
|
self.fc3 = nn.Linear(self.hidden_dim, self.output_dim)
|
||||||
|
|
||||||
self.relu = nn.ReLU()
|
self.r1 = nn.ReLU()
|
||||||
|
self.r2 = nn.ReLU()
|
||||||
|
|
||||||
self.criterion = nn.BCELoss()
|
self.criterion = nn.BCELoss()
|
||||||
self.optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
|
self.optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
|
||||||
@ -28,19 +29,16 @@ class Model(nn.Module):
|
|||||||
"""Step forward learning fn"""
|
"""Step forward learning fn"""
|
||||||
|
|
||||||
x = self.fc1(x)
|
x = self.fc1(x)
|
||||||
x = self.relu(x)
|
x = self.r1(x)
|
||||||
x = self.fc2(x)
|
x = self.r2(x)
|
||||||
x = self.relu(x)
|
|
||||||
x = self.fc3(x)
|
x = self.fc3(x)
|
||||||
x = torch.sigmoid(x)
|
x = torch.sigmoid(x)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def run_training(self, X_train, Y_train, batch_size, epochs_count):
|
def run_training(self, X_train, Y_train, batch_size, epochs_count):
|
||||||
for _ in range(epochs_count):
|
for i in range(epochs_count):
|
||||||
self.train()
|
self.train()
|
||||||
print(f"{Y_train.shape[0]}, {Y_train.shape[0] == self.input_dim}")
|
print(f"Epochs: {i + 1}/{epochs_count}")
|
||||||
print(f"{Y_train.shape[0]}, {Y_train.shape[0] == self.hidden_dim}")
|
|
||||||
print(f"{Y_train.shape[0]}, {Y_train.shape[0] == self.output_dim}")
|
|
||||||
for i in range(0, Y_train.shape[0], batch_size):
|
for i in range(0, Y_train.shape[0], batch_size):
|
||||||
X = X_train[i: i + batch_size]
|
X = X_train[i: i + batch_size]
|
||||||
X = torch.tensor(X)
|
X = torch.tensor(X)
|
||||||
|
5152
test-A/out.tsv
Normal file
5152
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user