This commit is contained in:
Rafał Jaworski 2018-08-22 13:30:03 +02:00
parent c6a71ddd00
commit 8e1d2d2c42
6 changed files with 14 additions and 5 deletions

1
.gitignore vendored
View File

@ -36,6 +36,7 @@ mgiza-aligner/mgiza/mgizapp/src/mkcls/Makefile
mgiza-aligner/mgiza/mgizapp/src/mkcls/cmake_install.cmake
__pycache__
import-requests/request_handler.log
import-requests/request_handler.pid
mgiza-aligner/corpus-compilator/corpora
mgiza-aligner/corpus-compilator/dictionaries/*lem
mgiza-aligner/corpus-compilator/dictionary-compilator/*.tsv

View File

@ -1,5 +1,5 @@
dir@#@stocznia_plen
concordia_host@#@localhost
concordia_host@#@concordia.poleng
concordia_port@#@8800
tmid@#@2
desc@#@Witamy w interaktywnym demo systemu Concordia. System znajduje najdłuższe fragmenty zdania wejściowego w pamięci tłumaczeń. Proszę wpisać polskie zdanie w poniższe pole i nacisnąć Enter (albo użyć przycisku "search"). Aby zapoznać się z systemem możesz użyć wcześniej przygotowanych przykładów - po prostu kliknij link "apply" przy wybranym przykładzie. Po wyszukaniu, kliknij na wybrany podświetlony fragment, aby zobaczyć jego kontekst.

View File

@ -67,7 +67,7 @@ void Logger::logConcordiaSearchResult(const ConcordiaSearchResult & result) {
void Logger::_initialize(log4cpp::Category & root) {
log4cpp::Appender *appender = new log4cpp::FileAppender("default", LOG_FILE_PATH);
log4cpp::PatternLayout *layout = new log4cpp::PatternLayout();
layout->setConversionPattern("%d{%Y-%m-%d %H:%M:%S}%c %x: %m%n");
layout->setConversionPattern("%d{%Y-%m-%d %H:%M:%S.%l}%c %x: %m%n");
appender->setLayout(layout);
root.setPriority(log4cpp::Priority::INFO);

View File

@ -128,13 +128,18 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::Stri
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & pattern,
const int tmId) {
Logger::log("concordia search");
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) {
TokenizedSentence originalPattern = it->second->tokenize(pattern, false, false);
std::string lemmatizedPattern =
_lemmatizerFacade->lemmatizeIfNeeded(originalPattern.getTokenizedSentence(), tmId);
boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult = it->second->concordiaSearch(lemmatizedPattern, true);
Logger::log("got raw concordia result");
CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(rawConcordiaResult, originalPattern);
Logger::log("got db result");
jsonWriter.StartObject();
jsonWriter.String("status");

View File

@ -92,6 +92,7 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
const MatchedPatternFragment & fragment,
const TokenizedSentence & tokenizedPattern) {
Logger::log("_getResultFromFragment");
DBconnection connection;
connection.startTransaction();
@ -112,6 +113,7 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
params.push_back(new IntParam(2*(sOccurence.getOffset()+fragment.getMatchedLength())));
params.push_back(new IntParam(sOccurence.getId()));
PGresult * result = connection.execute(query, params);
Logger::log("got examples");
ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id
connection.getIntValue(result,0,3), // matched example start
connection.getIntValue(result,0,4), // matched example end
@ -129,6 +131,7 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
targetParams.push_back(new IntParam(sOccurence.getOffset()));
targetParams.push_back(new IntParam(sOccurence.getOffset() + fragment.getMatchedLength() - 1));
PGresult * targetResult = connection.execute(targetQuery, targetParams);
Logger::log("got target fragments");
int prevPos = -2;
int currStart = -1;

View File

@ -1,7 +1,7 @@
#!/bin/sh
CORPUS_NAME="logofag_enpl"
SRC_LANG_ID=2
TRG_LANG_ID=1
CORPUS_NAME="stocznia_plen"
SRC_LANG_ID=1
TRG_LANG_ID=2
./addAlignedLemmatizedTM.py $CORPUS_NAME ../mgiza-aligner/corpora/$CORPUS_NAME/src_final.txt $SRC_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/trg_final.txt $TRG_LANG_ID ../mgiza-aligner/corpora/$CORPUS_NAME/aligned_final.txt