corrected add sources

This commit is contained in:
rjawor 2019-03-04 11:07:37 +01:00
parent c8da3418bf
commit 3c5e56905e
3 changed files with 10 additions and 2 deletions

6
build_opensubtitles.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/sh
cd fast_aligner
make -j2
cd ../tests
./addFastAlignedTM.sh

View File

@ -1,6 +1,6 @@
SRC_LANG=pl SRC_LANG=pl
TRG_LANG=en TRG_LANG=en
CORPUS_NAME=opensubtitles CORPUS_NAME=opensubtitles_sample
SEPARATOR=@\#@ SEPARATOR=@\#@
DICTIONARY_WEIGHT=3 DICTIONARY_WEIGHT=3

View File

@ -31,7 +31,9 @@ with open(sys.argv[1]) as sources_file:
sources_buffer = [] sources_buffer = []
for line in sources_file: for line in sources_file:
counter += 1 counter += 1
sources_buffer.append(line.rstrip().split('\t')) id_raw, link, name = line.rstrip().split('\t')
sources_buffer.append([int(id_raw),name, link])
if len(sources_buffer) == BUFFER_SIZE: if len(sources_buffer) == BUFFER_SIZE:
addSources(sources_buffer) addSources(sources_buffer)
sources_buffer = [] sources_buffer = []