adjustments
This commit is contained in:
parent
e00f27d62f
commit
1e6d9dfa89
@ -1,6 +1,6 @@
|
||||
SRC_LANG=pl
|
||||
TRG_LANG=en
|
||||
CORPUS_NAME=opensubtitles
|
||||
CORPUS_NAME=opensubtitles_sample
|
||||
SEPARATOR=@\#@
|
||||
|
||||
DICTIONARY_WEIGHT=3
|
||||
@ -9,6 +9,7 @@ all: corpora/$(CORPUS_NAME)/alignments.txt corpora/$(CORPUS_NAME)/src_clean.txt
|
||||
|
||||
|
||||
clean:
|
||||
rm -f corpora/$(CORPUS_NAME)/*.norm
|
||||
rm -f corpora/$(CORPUS_NAME)/*.lem
|
||||
rm -f corpora/$(CORPUS_NAME)/*.dict
|
||||
rm -f corpora/$(CORPUS_NAME)/src_clean.txt
|
||||
|
@ -17,7 +17,6 @@ def lemmatize_sentences(language_code, sentences):
|
||||
response_json = json.loads(response.text)
|
||||
|
||||
result = {'normalized':[], 'lemmatized':[]}
|
||||
print(response_json)
|
||||
for processed_sentence in response_json['processed_sentences']:
|
||||
result['normalized'].append(processed_sentence['normalized'])
|
||||
result['lemmatized'].append(processed_sentence['tokens'])
|
||||
@ -33,7 +32,7 @@ def write_result(result, norm_file, lem_file):
|
||||
file_name = sys.argv[1]
|
||||
language_code = sys.argv[2]
|
||||
norm_output_name = sys.argv[3]
|
||||
lem_output_name = sys.argv[3]
|
||||
lem_output_name = sys.argv[4]
|
||||
|
||||
sentences_buffer = []
|
||||
with open(file_name) as in_file, open(norm_output_name, 'w') as out_norm, open(lem_output_name, 'w') as out_lem:
|
||||
|
Loading…
Reference in New Issue
Block a user