Start translator

This commit is contained in:
SzamanFL 2021-01-17 01:36:24 +01:00
parent ba5d69080f
commit 9ec62bc7bb

37
src/train.py Normal file
View File

@ -0,0 +1,37 @@
#!/usr/bin/env python3
# an LSTM language model trained on sentence pairs
import argparse
from collection import Counter
def clear_line(string, target):
return re.sub("[^a-z ]", "", string.lower()), re.sub("[^a-z ]", "", target.lower())
def read_clear_data(in_file_path, exptected_file_path):
print("Reading data")
source_data = []
target_data = []
with open(in_file_path) as in_file, open(exptected_file_path) as exp_file:
for string, target in zip(in_file, exp_file):
string, target = clear_line(string, target)
source_data.appen(string)
target_data.appen(target)
return source_data, target_data
def create_dict(data):
counter = Counter()
for line in data:
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--in_f')
parser.add_argument('--exp')
parser.add_argument("--vocab")
args = parser.parse_args()
source_data, target_data = read_clear_data(args.in_f, args.exp)
main()