diff --git a/cat/js/cat.js b/cat/js/cat.js index 24ba0e1..1970fad 100644 --- a/cat/js/cat.js +++ b/cat/js/cat.js @@ -66,7 +66,8 @@ function presentFullSearchResults(data) { for (j=0;j'+occurence['sourceName']+''; } $('#occurrences').html(result); diff --git a/cat/versions_available/opensubtitles_plen.cfg b/cat/versions_available/opensubtitles_plen.cfg new file mode 100644 index 0000000..afb75b6 --- /dev/null +++ b/cat/versions_available/opensubtitles_plen.cfg @@ -0,0 +1,8 @@ +dir@#@opensubtitles_plen +concordia_host@#@concordia.poleng +concordia_port@#@8800 +tmid@#@2 +desc@#@Witamy w interaktywnym demo systemu Concordia. System znajduje najdłuższe fragmenty zdania wejściowego w pamięci tłumaczeń. Proszę wpisać polskie zdanie w poniższe pole i nacisnąć Enter (albo użyć przycisku "search"). Aby zapoznać się z systemem możesz użyć wcześniej przygotowanych przykładów - po prostu kliknij link "apply" przy wybranym przykładzie. Po wyszukaniu, kliknij na wybrany podświetlony fragment, aby zobaczyć jego kontekst. +enjoy@#@Życzymy udanej pracy z systemem! +prompt@#@Wprowadź zdanie (po polsku): +suggestion@#@Nawet zepsute zegary pokazują dwa razy dziennie właściwą godzinę diff --git a/cat/versions_enabled/opensubtitles_plen.cfg b/cat/versions_enabled/opensubtitles_plen.cfg new file mode 120000 index 0000000..4f185d7 --- /dev/null +++ b/cat/versions_enabled/opensubtitles_plen.cfg @@ -0,0 +1 @@ +../versions_available/opensubtitles_plen.cfg \ No newline at end of file diff --git a/concordia-server/example_occurrence.cpp b/concordia-server/example_occurrence.cpp index 7247f7a..b82224f 100644 --- a/concordia-server/example_occurrence.cpp +++ b/concordia-server/example_occurrence.cpp @@ -5,12 +5,16 @@ ExampleOccurrence::ExampleOccurrence( const int matchedExampleStart, const int matchedExampleEnd, const std::string & sourceSegment, - const std::string & targetSegment): + const std::string & targetSegment, + const std::string & sourceName, + const std::string & sourceLink): _id(id), _matchedExampleStart(matchedExampleStart), _matchedExampleEnd(matchedExampleEnd), _sourceSegment(sourceSegment), - _targetSegment(targetSegment) { + _targetSegment(targetSegment), + _sourceName(sourceName), + _sourceLink(sourceLink) { } ExampleOccurrence::~ExampleOccurrence() { diff --git a/concordia-server/example_occurrence.hpp b/concordia-server/example_occurrence.hpp index 65e2dda..8503603 100644 --- a/concordia-server/example_occurrence.hpp +++ b/concordia-server/example_occurrence.hpp @@ -12,7 +12,9 @@ public: const int matchedExampleStart, const int matchedExampleEnd, const std::string & sourceSegment, - const std::string & targetSegment + const std::string & targetSegment, + const std::string & sourceName, + const std::string & sourceLink ); /*! Destructor. */ @@ -42,6 +44,14 @@ public: return _targetFragments; } + const std::string & getSourceName() const { + return _sourceName; + } + + const std::string & getSourceLink() const { + return _sourceLink; + } + void addMatchedTargetFragment(const std::pair & targetFragment); private: @@ -56,6 +66,10 @@ private: std::string _targetSegment; std::vector > _targetFragments; + + std::string _sourceName; + + std::string _sourceLink; }; #endif diff --git a/concordia-server/index_controller.cpp b/concordia-server/index_controller.cpp index 2a9f5c9..7e3717d 100644 --- a/concordia-server/index_controller.cpp +++ b/concordia-server/index_controller.cpp @@ -72,10 +72,12 @@ void IndexController::addSentences(rapidjson::Writer & try { boost::ptr_map::iterator it = _concordiasMap->find(tmId); if (it != _concordiasMap->end()) { - std::vector tokenizedLemmatizedSentences = it->second->tokenizeAll(_lemmatizerFacade->lemmatizeSentencesIfNeeded(sourceSentences, tmId)); - std::vector tokenizedSentences = it->second->tokenizeAll(sourceSentences); - std::vector sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmId); - it->second->addAllTokenizedExamples(tokenizedLemmatizedSentences, sentenceIds); + std::vector tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, false, false); + std::vector tokenizedLemmatizedSourceSentences = it->second->tokenizeAll(lemmatizedSourceSentences, true, true); + std::vector tokenizedTargetSentences = it->second->tokenizeAll(targetSentences, false, false); + + std::vector sentenceIds = _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, alignments, sourceIds, tmId); + it->second->addAllTokenizedExamples(tokenizedLemmatizedSourceSentences, sentenceIds); jsonWriter.StartObject(); jsonWriter.String("status"); diff --git a/concordia-server/json_generator.cpp b/concordia-server/json_generator.cpp index c6ad56d..6e76b9e 100644 --- a/concordia-server/json_generator.cpp +++ b/concordia-server/json_generator.cpp @@ -82,6 +82,10 @@ void JsonGenerator::writeFullSearchResult(rapidjson::Writer >::const_iterator it = occurrence.getTargetFragments().begin(); diff --git a/concordia-server/unit_dao.cpp b/concordia-server/unit_dao.cpp index 8e7cc0e..8a2896a 100644 --- a/concordia-server/unit_dao.cpp +++ b/concordia-server/unit_dao.cpp @@ -54,6 +54,7 @@ std::vector UnitDAO::addSentences( return newIds; } + std::vector UnitDAO::addAlignedSentences( const std::vector & sourceSentences, const std::vector & targetSentences, @@ -72,6 +73,26 @@ std::vector UnitDAO::addAlignedSentences( return newIds; } +std::vector UnitDAO::addAlignedSentences( + const std::vector & sourceSentences, + const std::vector & targetSentences, + const std::vector > > & allAlignments, + const std::vector & sourceIds, + const int tmId) throw (ConcordiaException) { + + DBconnection connection; + std::vector newIds; + connection.startTransaction(); + + for (int i=0; i< sourceSentences.size(); i++) { + newIds.push_back(_addAlignedUnit(connection, sourceSentences.at(i), targetSentences.at(i), allAlignments.at(i), sourceIds.at(i), tmId)); + } + + connection.endTransaction(); + return newIds; +} + + SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment & fragment) { SimpleSearchResult result(fragment.getStart(), fragment.getEnd()); TokenizedSentence ts(""); @@ -146,7 +167,8 @@ SimpleSearchResult UnitDAO::_getResultFromFragment( } ExampleOccurrence UnitDAO::_getExampleOccurrence(DBconnection & connection, const SubstringOccurrence sOccurrence, const int matchedLength) { - std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer], target_tokens, alignments FROM unit WHERE id = $3::integer;"; + Logger::log("_getExampleOccurence"); + std::string query = "SELECT unit.id, unit.source_segment, unit.target_segment, unit.source_tokens[$1::integer], unit.source_tokens[$2::integer], unit.target_tokens, unit.alignments, source.name, source.link FROM unit left join source on unit.source_id = source.external_id where unit.id = $3::integer;"; std::vector params; params.push_back(new IntParam(2*sOccurrence.getOffset()+1)); params.push_back(new IntParam(2*(sOccurrence.getOffset()+matchedLength))); @@ -156,7 +178,9 @@ ExampleOccurrence UnitDAO::_getExampleOccurrence(DBconnection & connection, cons connection.getIntValue(result,0,3), // matched example start connection.getIntValue(result,0,4), // matched example end connection.getStringValue(result,0,1), // source segment - connection.getStringValue(result,0,2)); // target segment + connection.getStringValue(result,0,2), // target segment + connection.getStringValue(result,0,7), // source name + connection.getStringValue(result,0,8)); // source link std::string targetTokensRaw = connection.getStringValue(result,0,5); std::string alignmentsRaw = connection.getStringValue(result,0,6); @@ -273,6 +297,43 @@ int UnitDAO::_addAlignedUnit ( return newId; } +int UnitDAO::_addAlignedUnit ( + DBconnection & connection, + const TokenizedSentence & sourceSentence, + const TokenizedSentence & targetSentence, + const std::vector > & alignments, + const int sourceId, + const int tmId) throw(ConcordiaException) { + + if (sourceSentence.getTokens().size() != alignments.size()) { + // Here we check if the source sentence, taken from src.tok, + // is shorter than alignments array. + std::stringstream ss; + ss << "The size of source sentence is different than the size of alignments array. Source sentence: " << sourceSentence.getSentence() << ", alignments size:" << alignments.size(); + throw ConcordiaException(ss.str()); + } + + std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens, target_tokens, alignments, source_id) values($1::text,$2::text,$3::integer,$4,$5,$6,$7) RETURNING id"; + std::vector params; + params.push_back(new StringParam(sourceSentence.getOriginalSentence())); + params.push_back(new StringParam(targetSentence.getOriginalSentence())); + params.push_back(new IntParam(tmId)); + params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence))); + params.push_back(new IntArrayParam(_getTokenPositions(targetSentence))); + params.push_back(new Int2DArrayParam(alignments)); + params.push_back(new IntParam(sourceId)); + + PGresult * result = connection.execute(query, params); + int newId = connection.getIntValue(result, 0, 0); + connection.clearResult(result); + BOOST_FOREACH (QueryParam * param, params) { + delete param; + } + + return newId; +} + + std::vector UnitDAO::_getArray(std::string arrayString) { std::vector result; if (arrayString.length()>2) { diff --git a/concordia-server/unit_dao.hpp b/concordia-server/unit_dao.hpp index 48e58c7..245282b 100644 --- a/concordia-server/unit_dao.hpp +++ b/concordia-server/unit_dao.hpp @@ -43,6 +43,13 @@ public: const std::vector > > & allAlignments, const int tmId) throw (ConcordiaException); + std::vector addAlignedSentences( + const std::vector & sourceSentences, + const std::vector & targetSentences, + const std::vector > > & allAlignments, + const std::vector & sourceIds, + const int tmId) throw (ConcordiaException); + SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment); FullSearchResult getFullSearchResult(const OccurrencesList & occurrencesList, const int patternLength); @@ -78,6 +85,14 @@ private: const std::vector > & alignments, const int tmId) throw(ConcordiaException); + int _addAlignedUnit( + DBconnection & connection, + const TokenizedSentence & sourceSentence, + const TokenizedSentence & targetSentence, + const std::vector > & alignments, + const int sourceId, + const int tmId) throw(ConcordiaException); + std::vector _getArray(std::string arrayString); std::vector > _get2DArray(std::string arrayString);