minor changes
This commit is contained in:
parent
11753e77a1
commit
9f91ea15b0
8
cat/versions_available/tmrepository_enhr.cfg
Normal file
8
cat/versions_available/tmrepository_enhr.cfg
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
dir@#@tmrepository_enhr
|
||||||
|
concordia_host@#@concordia.vm.wmi.amu.edu.pl
|
||||||
|
concordia_port@#@8800
|
||||||
|
tmid@#@1
|
||||||
|
desc@#@Welcome to Concordia. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is over 1M sentences from the TMrepository system (http://concordia.vm.wmi.amu.edu.pl/tmrepository). Please enter a Croatian sentence in the field below and press Enter (or use the search button). You can test the system on predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences. After the search, click on the highlighted fragments to see their context.
|
||||||
|
enjoy@#@Enjoy your work with the system!
|
||||||
|
prompt@#@Enter search pattern (English sentence):
|
||||||
|
suggestion@#@BiHs Komsic resigns from his party
|
@ -1 +0,0 @@
|
|||||||
../versions_available/stocznia_enpl.cfg
|
|
@ -1 +0,0 @@
|
|||||||
../versions_available/stocznia_plen.cfg
|
|
1
cat/versions_enabled/tmrepository_enhr.cfg
Symbolic link
1
cat/versions_enabled/tmrepository_enhr.cfg
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../versions_available/tmrepository_enhr.cfg
|
@ -1,6 +1,6 @@
|
|||||||
SRC_LANG=pl
|
SRC_LANG=pl
|
||||||
TRG_LANG=en
|
TRG_LANG=en
|
||||||
CORPUS_NAME=europarl_sample
|
CORPUS_NAME=opus
|
||||||
DICTIONARY_NAME=classyf_popular_medicine
|
DICTIONARY_NAME=classyf_popular_medicine
|
||||||
SEPARATOR=@\#@
|
SEPARATOR=@\#@
|
||||||
CORPUS_CHUNK_SIZE=100000
|
CORPUS_CHUNK_SIZE=100000
|
||||||
|
12
mgiza-aligner/corpus-compilator/filter.sh
Executable file
12
mgiza-aligner/corpus-compilator/filter.sh
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
DICTIONARY_NAME=classyf_popular_medicine
|
||||||
|
CORPUS_NAME=opus
|
||||||
|
|
||||||
|
make clean-filtering
|
||||||
|
make dictionaries/$DICTIONARY_NAME.lem
|
||||||
|
|
||||||
|
./get_corpus_lines.py dictionaries/$DICTIONARY_NAME.lem corpora/$CORPUS_NAME/report.txt > corpora/$CORPUS_NAME/corpus_lines.txt
|
||||||
|
./compile.py corpora/$CORPUS_NAME/src_clean.txt corpora/$CORPUS_NAME/trg_clean.txt corpora/$CORPUS_NAME/corpus_lines.txt corpora/$CORPUS_NAME/src_filtered.txt corpora/$CORPUS_NAME/trg_filtered.txt
|
||||||
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
SOLR_HOME=/home/rafalj/programs/solr-6.0.0
|
SOLR_HOME=/home/rjawor/programs/solr-5.5.4
|
||||||
|
|
||||||
$SOLR_HOME/bin/solr restart
|
$SOLR_HOME/bin/solr restart
|
||||||
$SOLR_HOME/bin/solr create -c corpus_compiler
|
$SOLR_HOME/bin/solr create -c corpus_compiler
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
CORPUS_NAME="europarl_sample"
|
CORPUS_NAME="tmrepository_enhr"
|
||||||
SRC_LANG_ID=1
|
SRC_LANG_ID=2
|
||||||
TRG_LANG_ID=2
|
TRG_LANG_ID=6
|
||||||
|
|
||||||
./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
|
./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
|
||||||
|
28
tests/generateIndex.py
Executable file
28
tests/generateIndex.py
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import json
|
||||||
|
import urllib2
|
||||||
|
import sys
|
||||||
|
import host
|
||||||
|
import time
|
||||||
|
|
||||||
|
address = 'http://'+host.concordia_host
|
||||||
|
if len(host.concordia_port) > 0:
|
||||||
|
address += ':'+host.concordia_port
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
print "Generating index..."
|
||||||
|
start = time.time()
|
||||||
|
data = {
|
||||||
|
'operation': 'refreshIndex',
|
||||||
|
'tmId' : 1
|
||||||
|
}
|
||||||
|
req = urllib2.Request(address)
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
urllib2.urlopen(req, json.dumps(data)).read()
|
||||||
|
|
||||||
|
end = time.time()
|
||||||
|
print "Index regeneration complete. The operation took %.4f s" % (end - start)
|
Loading…
Reference in New Issue
Block a user