corrected add sources
This commit is contained in:
parent
c8da3418bf
commit
3c5e56905e
6
build_opensubtitles.sh
Executable file
6
build_opensubtitles.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
cd fast_aligner
|
||||||
|
make -j2
|
||||||
|
cd ../tests
|
||||||
|
./addFastAlignedTM.sh
|
@ -1,6 +1,6 @@
|
|||||||
SRC_LANG=pl
|
SRC_LANG=pl
|
||||||
TRG_LANG=en
|
TRG_LANG=en
|
||||||
CORPUS_NAME=opensubtitles
|
CORPUS_NAME=opensubtitles_sample
|
||||||
SEPARATOR=@\#@
|
SEPARATOR=@\#@
|
||||||
|
|
||||||
DICTIONARY_WEIGHT=3
|
DICTIONARY_WEIGHT=3
|
||||||
|
@ -31,7 +31,9 @@ with open(sys.argv[1]) as sources_file:
|
|||||||
sources_buffer = []
|
sources_buffer = []
|
||||||
for line in sources_file:
|
for line in sources_file:
|
||||||
counter += 1
|
counter += 1
|
||||||
sources_buffer.append(line.rstrip().split('\t'))
|
id_raw, link, name = line.rstrip().split('\t')
|
||||||
|
|
||||||
|
sources_buffer.append([int(id_raw),name, link])
|
||||||
if len(sources_buffer) == BUFFER_SIZE:
|
if len(sources_buffer) == BUFFER_SIZE:
|
||||||
addSources(sources_buffer)
|
addSources(sources_buffer)
|
||||||
sources_buffer = []
|
sources_buffer = []
|
||||||
|
Loading…
Reference in New Issue
Block a user