minor changes

2017-07-26 13:29:22 +02:00 · 2017-07-26 13:29:22 +02:00 · 9f91ea15b0
commit 9f91ea15b0
parent 11753e77a1
9 changed files with 54 additions and 7 deletions
--- a/cat/versions_available/tmrepository_enhr.cfg
+++ b/cat/versions_available/tmrepository_enhr.cfg
@ -0,0 +1,8 @@
 dir@#@tmrepository_enhr
 concordia_host@#@concordia.vm.wmi.amu.edu.pl
 concordia_port@#@8800
 tmid@#@1
 desc@#@Welcome to Concordia. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is over 1M sentences from the TMrepository system (http://concordia.vm.wmi.amu.edu.pl/tmrepository). Please enter a Croatian sentence in the field below and press Enter (or use the search button). You can test the system on predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences. After the search, click on the highlighted fragments to see their context.
 enjoy@#@Enjoy your work with the system!
 prompt@#@Enter search pattern (English sentence):
 suggestion@#@BiHs Komsic resigns from his party
--- a/cat/versions_enabled/stocznia_enpl.cfg
+++ b/cat/versions_enabled/stocznia_enpl.cfg
@ -1 +0,0 @@
 ../versions_available/stocznia_enpl.cfg
--- a/cat/versions_enabled/stocznia_plen.cfg
+++ b/cat/versions_enabled/stocznia_plen.cfg
@ -1 +0,0 @@
 ../versions_available/stocznia_plen.cfg
--- a/cat/versions_enabled/tmrepository_enhr.cfg
+++ b/cat/versions_enabled/tmrepository_enhr.cfg
@ -0,0 +1 @@
 ../versions_available/tmrepository_enhr.cfg
--- a/mgiza-aligner/corpus-compilator/Makefile
+++ b/mgiza-aligner/corpus-compilator/Makefile
@ -1,6 +1,6 @@
 SRC_LANG=pl
 TRG_LANG=en
-CORPUS_NAME=europarl_sample
+CORPUS_NAME=opus
 DICTIONARY_NAME=classyf_popular_medicine
 SEPARATOR=@\#@
 CORPUS_CHUNK_SIZE=100000
--- a/mgiza-aligner/corpus-compilator/filter.sh
+++ b/mgiza-aligner/corpus-compilator/filter.sh
@ -0,0 +1,12 @@
 #!/bin/sh
 DICTIONARY_NAME=classyf_popular_medicine
 CORPUS_NAME=opus
 make clean-filtering
 make dictionaries/$DICTIONARY_NAME.lem
 ./get_corpus_lines.py dictionaries/$DICTIONARY_NAME.lem corpora/$CORPUS_NAME/report.txt > corpora/$CORPUS_NAME/corpus_lines.txt
 ./compile.py corpora/$CORPUS_NAME/src_clean.txt corpora/$CORPUS_NAME/trg_clean.txt corpora/$CORPUS_NAME/corpus_lines.txt corpora/$CORPUS_NAME/src_filtered.txt corpora/$CORPUS_NAME/trg_filtered.txt
--- a/mgiza-aligner/corpus-compilator/setup_solr.sh
+++ b/mgiza-aligner/corpus-compilator/setup_solr.sh
@ -1,6 +1,6 @@
 #!/bin/sh
-SOLR_HOME=/home/rafalj/programs/solr-6.0.0
+SOLR_HOME=/home/rjawor/programs/solr-5.5.4
 $SOLR_HOME/bin/solr restart
 $SOLR_HOME/bin/solr create -c corpus_compiler
--- a/tests/addLemmatizedTM.sh
+++ b/tests/addLemmatizedTM.sh
@ -1,7 +1,7 @@
 #!/bin/sh
-CORPUS_NAME="europarl_sample"
+CORPUS_NAME="tmrepository_enhr"
-SRC_LANG_ID=1
+SRC_LANG_ID=2
-TRG_LANG_ID=2
+TRG_LANG_ID=6
 ./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt
--- a/tests/generateIndex.py
+++ b/tests/generateIndex.py
@ -0,0 +1,28 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import unittest
 import json
 import urllib2
 import sys
 import host
 import time
 address = 'http://'+host.concordia_host
 if len(host.concordia_port) > 0:
    address += ':'+host.concordia_port
 print "Generating index..."
 start = time.time()
 data = {
    'operation': 'refreshIndex',
    'tmId' : 1
 }
 req = urllib2.Request(address)
 req.add_header('Content-Type', 'application/json')
 urllib2.urlopen(req, json.dumps(data)).read()
 end = time.time()
 print "Index regeneration complete. The operation took %.4f s" % (end - start)