diff --git a/doc/install.txt b/INSTALL.txt
similarity index 70%
rename from doc/install.txt
rename to INSTALL.txt
index 08455b4..5599fb5 100644
--- a/doc/install.txt
+++ b/INSTALL.txt
@@ -1,47 +1,28 @@
-- sudo apt-get install postgresql libfcgi-dev libpq-dev
+- sudo apt-get install postgresql libfcgi-dev libpq-dev mono-complete
- clone github repo, mkdir build, cd build, ../cmake.sh, make
- sudo -u postgres psql postgres
- create user concordia with encrypted password 'concordia';
- CREATE DATABASE concordia_server ENCODING 'UTF8' OWNER concordia TEMPLATE template0;
- grant all privileges on database concordia_server to concordia;
- sudo vim /etc/postgresql/9.3/main/pg_hba.conf: change "local all all peer" to "local all all md5"
-- pgbouncer: wget https://pgbouncer.github.io/downloads/files/1.6/pgbouncer-1.6.tar.gz
- - sudo apt-get install libevent-dev
+- pgbouncer:
+ - sudo apt-get install autoconf automake m4 libtool pkg-config libevent-dev autogen
+ $ git clone https://github.com/pgbouncer/pgbouncer.git
+ $ cd pgbouncer
+ $ git submodule init
+ $ git submodule update
+ $ ./autogen.sh
+ $ ./configure ...
+ $ make
+ $ make install
+- ./db/startPGbouncer.sh
- ./db/recreateDb.sh
-- nginx:
- sudo -s
- nginx=stable # use nginx=development for latest development version
- add-apt-repository ppa:nginx/$nginx
- apt-get update
+- nginx:
apt-get install nginx
-
+
+
+
sites-available:
- cat_html:
- # Default server configuration
- #
- server {
- listen 80 default_server;
- listen [::]:80 default_server;
-
- # SSL configuration
- #
- # listen 443 ssl default_server;
- # listen [::]:443 ssl default_server;
- #
- # Note: You should disable gzip for SSL traffic.
- # See: https://bugs.debian.org/773332
- #
- # Read up on ssl_ciphers to ensure a secure configuration.
- # See: https://bugs.debian.org/765782
- #
- # Self signed certs generated by the ssl-cert package
- # Don't use them in a production server!
- #
- # include snippets/snakeoil.conf;
-
- root /var/www/html;
-
-
rename default to fcgi_concordia
fcgi_concordia:
server {
@@ -95,8 +76,15 @@
}
- add links in sites-enabled, sudo service nginx restart
+ - sudo apt-get install php apache2 libapache2-mod-php
- install cat html to /var/www/html (adjust ajax requests)
- sudo apt-get install spawn-fcgi
- mkdir index
-- ./db/startPGbouncer.sh
- ./scripts/restart.sh
+- install upstart scripts
+
+
+mgiza-aligner:
+- cd mgiza, mgizapp
+- sudo apt-get install libboost-thread-dev
+- follow instructions in INSTALL
diff --git a/cat/README b/cat/README
index 93bae65..6faad05 100644
--- a/cat/README
+++ b/cat/README
@@ -1,5 +1,4 @@
1. Prepare host.cfg file with the address and port number of Concordia. See host.cfg_example. WARNING there should not be any empty lines in the .cfg files.
-2. Prepare version file for each tm in Concordia in teh "versions: directory.
+2. Prepare version file for each tm in Concordia in the "versions-" directory.
3. Clean a directory on your webserver (that supports PHP).
4. sudo ./publish.py PATH_ON_SERVER.
-
diff --git a/cat/css/concordia_cat.css b/cat/css/concordia_cat.css
index 644007f..2903305 100644
--- a/cat/css/concordia_cat.css
+++ b/cat/css/concordia_cat.css
@@ -122,7 +122,7 @@
cursor:pointer;
}
-.fragmentDetails {
+.example {
border-style: solid;
border-width: 5px;
border-color:#19424F;
diff --git a/cat/js/cat.js b/cat/js/cat.js
index d76c279..978dc93 100644
--- a/cat/js/cat.js
+++ b/cat/js/cat.js
@@ -47,6 +47,7 @@ function phraseSearchHandle(tmid, intervals) {
function renderResult(data) {
var res = '';
+ var disablePhraseSearch = true;
if (typeof(data['result']['bestOverlayScore']) === 'undefined') {
// ignore
@@ -89,29 +90,35 @@ function renderResult(data) {
}
function renderFragment(fragment, number) {
- var result = '
';
+ var result = ' ';
- // source segment
- var sourceSegment = fragment['sourceSegment'];
- result += sourceSegment.slice(0, fragment['matchedExampleStart']);
- result += '';
- result += sourceSegment.slice(fragment['matchedExampleStart'], fragment['matchedExampleEnd']);
- result += '';
- result += sourceSegment.slice(fragment['matchedExampleEnd']);
+ for (j=0;j';
- // target segment
- result += ' | ';
- var targetSegment = fragment['targetSegment'];
- var currStart = 0;
- for (i=0;i';
- result += targetSegment.slice(fragment['targetFragments'][i][0], fragment['targetFragments'][i][1]);
+ result += sourceSegment.slice(occurence['matchedExampleStart'], occurence['matchedExampleEnd']);
result += '';
- currStart = fragment['targetFragments'][i][1];
+ result += sourceSegment.slice(occurence['matchedExampleEnd']);
+
+ // target segment
+ result += ' | ';
+ var targetSegment = occurence['targetSegment'];
+ var currStart = 0;
+ for (i=0;i';
+ result += targetSegment.slice(occurence['targetFragments'][i][0], occurence['targetFragments'][i][1]);
+ result += '';
+ currStart = occurence['targetFragments'][i][1];
+ }
+ result += targetSegment.slice(currStart);
+ result += ' | |
';
}
- result += targetSegment.slice(currStart);
- result += '
';
+ result += '';
return result;
}
diff --git a/cat/versions_available/stocznia_enpl.cfg b/cat/versions_available/stocznia_enpl.cfg
new file mode 100644
index 0000000..56e2916
--- /dev/null
+++ b/cat/versions_available/stocznia_enpl.cfg
@@ -0,0 +1,8 @@
+dir@#@stocznia_enpl
+concordia_host@#@localhost
+concordia_port@#@8800
+tmid@#@5
+desc@#@Welcome to the interactive Concordia demo. The system finds the longest matches of the pattern sentence in its translation memory. Please enter an English sentence in the field below and press Enter (or use the search button). You can also use predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences. After the search, click on the highlighted fragments to see their context.
+enjoy@#@Enjoy your work with the system!
+prompt@#@Enter search pattern (English sentence):
+suggestion@#@This is a large ballast tank
diff --git a/cat/versions_available/stocznia_plen.cfg b/cat/versions_available/stocznia_plen.cfg
new file mode 100644
index 0000000..83239a6
--- /dev/null
+++ b/cat/versions_available/stocznia_plen.cfg
@@ -0,0 +1,8 @@
+dir@#@stocznia_plen
+concordia_host@#@localhost
+concordia_port@#@8800
+tmid@#@2
+desc@#@Witamy w interaktywnym demo systemu Concordia. System znajduje najdłuższe fragmenty zdania wejściowego w pamięci tłumaczeń. Proszę wpisać polskie zdanie w poniższe pole i nacisnąć Enter (albo użyć przycisku "search"). Aby zapoznać się z systemem możesz użyć wcześniej przygotowanych przykładów - po prostu kliknij link "apply" przy wybranym przykładzie. Po wyszukaniu, kliknij na wybrany podświetlony fragment, aby zobaczyć jego kontekst.
+enjoy@#@Życzymy udanej pracy z systemem!
+prompt@#@Wprowadź zdanie (po polsku):
+suggestion@#@To jest bardzo duży zbiornik balastowy
diff --git a/cat/versions_enabled/jrc_enpl.cfg b/cat/versions_enabled/jrc_enpl.cfg
deleted file mode 120000
index f020ebd..0000000
--- a/cat/versions_enabled/jrc_enpl.cfg
+++ /dev/null
@@ -1 +0,0 @@
-../versions_available/jrc_enpl.cfg
\ No newline at end of file
diff --git a/cat/versions_enabled/jrc_plen.cfg b/cat/versions_enabled/jrc_plen.cfg
deleted file mode 120000
index 66a188d..0000000
--- a/cat/versions_enabled/jrc_plen.cfg
+++ /dev/null
@@ -1 +0,0 @@
-../versions_available/jrc_plen.cfg
\ No newline at end of file
diff --git a/cat/versions_enabled/setimes_enhr.cfg b/cat/versions_enabled/setimes_enhr.cfg
deleted file mode 120000
index 94774d4..0000000
--- a/cat/versions_enabled/setimes_enhr.cfg
+++ /dev/null
@@ -1 +0,0 @@
-../versions_available/setimes_enhr.cfg
\ No newline at end of file
diff --git a/cat/versions_enabled/stocznia_enpl.cfg b/cat/versions_enabled/stocznia_enpl.cfg
new file mode 120000
index 0000000..884dd56
--- /dev/null
+++ b/cat/versions_enabled/stocznia_enpl.cfg
@@ -0,0 +1 @@
+../versions_available/stocznia_enpl.cfg
\ No newline at end of file
diff --git a/cat/versions_enabled/stocznia_plen.cfg b/cat/versions_enabled/stocznia_plen.cfg
new file mode 120000
index 0000000..0ba3868
--- /dev/null
+++ b/cat/versions_enabled/stocznia_plen.cfg
@@ -0,0 +1 @@
+../versions_available/stocznia_plen.cfg
\ No newline at end of file
diff --git a/concordia-server/complete_concordia_search_result.cpp b/concordia-server/complete_concordia_search_result.cpp
index c144f84..6fcf5fd 100644
--- a/concordia-server/complete_concordia_search_result.cpp
+++ b/concordia-server/complete_concordia_search_result.cpp
@@ -10,9 +10,14 @@ CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() {
}
+void CompleteConcordiaSearchResult::addToBestOverlay(const SimpleSearchResult & result) {
+ _bestOverlay.push_back(result);
+}
+
+
void CompleteConcordiaSearchResult::offsetPattern(int offset) {
BOOST_FOREACH(SimpleSearchResult & simpleResult, _bestOverlay) {
simpleResult.offsetPattern(offset);
- }
+ }
}
diff --git a/concordia-server/complete_concordia_search_result.hpp b/concordia-server/complete_concordia_search_result.hpp
index 2a0ce46..83cc903 100644
--- a/concordia-server/complete_concordia_search_result.hpp
+++ b/concordia-server/complete_concordia_search_result.hpp
@@ -14,20 +14,22 @@ public:
/*! Destructor.
*/
virtual ~CompleteConcordiaSearchResult();
-
+
const double getBestOverlayScore() {
return _bestOverlayScore;
}
- std::vector & getBestOverlay() {
+ std::vector getBestOverlay() const {
return _bestOverlay;
}
-
+
+ void addToBestOverlay(const SimpleSearchResult & result);
+
void offsetPattern(int offset);
-
+
private:
double _bestOverlayScore;
-
+
std::vector _bestOverlay;
};
diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp
index fa539ee..c78ece9 100644
--- a/concordia-server/concordia_server.cpp
+++ b/concordia-server/concordia_server.cpp
@@ -66,9 +66,11 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
int tmId = _getIntParameter(d, TM_ID_PARAM);
// loading data from json
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
+ /*
Logger::log("addSentences");
Logger::logInt("sentences to add", sentencesArray.Size());
Logger::logInt("tm id", tmId);
+ */
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
if (sentencesArray[i].Size() != 2) {
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements");
@@ -85,9 +87,11 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
int tmId = d[TM_ID_PARAM].GetInt();
// loading data from json
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
+ /*
Logger::log("addAlignedSentences");
Logger::logInt("sentences to add", sentencesArray.Size());
Logger::logInt("tm id", tmId);
+ */
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
if (sentencesArray[i].Size() != 2) {
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements");
@@ -105,9 +109,11 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
int tmId = d[TM_ID_PARAM].GetInt();
// loading data from json
const rapidjson::Value & sentencesArray = d[EXAMPLES_PARAM];
+ /*
Logger::log("addAlignedLemmatizedSentences");
Logger::logInt("lemmatized sentences to add", sentencesArray.Size());
Logger::logInt("tm id", tmId);
+ */
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
if (sentencesArray[i].Size() != 3) {
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements");
diff --git a/concordia-server/example_occurence.cpp b/concordia-server/example_occurence.cpp
new file mode 100644
index 0000000..9589815
--- /dev/null
+++ b/concordia-server/example_occurence.cpp
@@ -0,0 +1,21 @@
+#include "example_occurence.hpp"
+
+ExampleOccurence::ExampleOccurence(
+ const int id,
+ const int matchedExampleStart,
+ const int matchedExampleEnd,
+ const std::string & sourceSegment,
+ const std::string & targetSegment):
+ _id(id),
+ _matchedExampleStart(matchedExampleStart),
+ _matchedExampleEnd(matchedExampleEnd),
+ _sourceSegment(sourceSegment),
+ _targetSegment(targetSegment) {
+}
+
+ExampleOccurence::~ExampleOccurence() {
+}
+
+void ExampleOccurence::addMatchedTargetFragment(const std::pair & targetFragment) {
+ _targetFragments.push_back(targetFragment);
+}
diff --git a/concordia-server/example_occurence.hpp b/concordia-server/example_occurence.hpp
new file mode 100644
index 0000000..68d193f
--- /dev/null
+++ b/concordia-server/example_occurence.hpp
@@ -0,0 +1,61 @@
+#ifndef EXAMPLE_OCCURENCE_HDR
+#define EXAMPLE_OCCURENCE_HDR
+
+#include
+#include
+
+class ExampleOccurence {
+public:
+ /*! Constructor.
+ */
+ ExampleOccurence (const int id,
+ const int matchedExampleStart,
+ const int matchedExampleEnd,
+ const std::string & sourceSegment,
+ const std::string & targetSegment
+ );
+ /*! Destructor.
+ */
+ virtual ~ExampleOccurence();
+
+ int getId() const {
+ return _id;
+ }
+
+ int getMatchedExampleStart() const {
+ return _matchedExampleStart;
+ }
+
+ int getMatchedExampleEnd() const {
+ return _matchedExampleEnd;
+ }
+
+ const std::string & getSourceSegment() const {
+ return _sourceSegment;
+ }
+
+ const std::string & getTargetSegment() const {
+ return _targetSegment;
+ }
+
+ const std::vector > & getTargetFragments() const {
+ return _targetFragments;
+ }
+
+ void addMatchedTargetFragment(const std::pair & targetFragment);
+
+private:
+ int _id;
+
+ int _matchedExampleStart;
+
+ int _matchedExampleEnd;
+
+ std::string _sourceSegment;
+
+ std::string _targetSegment;
+
+ std::vector > _targetFragments;
+};
+
+#endif
diff --git a/concordia-server/index_controller.cpp b/concordia-server/index_controller.cpp
index 37de410..6c04dd6 100644
--- a/concordia-server/index_controller.cpp
+++ b/concordia-server/index_controller.cpp
@@ -135,13 +135,13 @@ void IndexController::addAlignedLemmatizedSentences(
std::vector > > allAlignments;
_getSourceSentencesAndAlignments(lemmatizedSourceSentences, allAlignments, alignmentStrings);
- std::vector tokenizedLemmatizedSourceSentences = it->second->tokenizeAll(lemmatizedSourceSentences, true, true);
+ std::vector tokenizedLemmatizedSourceSentences =
+ it->second->tokenizeAll(lemmatizedSourceSentences, true, true);
std::vector tokenizedSourceSentences = it->second->tokenizeAll(sourceSentences, true, false);
std::vector tokenizedTargetSentences = it->second->tokenizeAll(targetSentences, true, false);
std::vector sentenceIds =
-
- _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, allAlignments, tmId);
+ _unitDAO.addAlignedSentences(tokenizedSourceSentences, tokenizedTargetSentences, allAlignments, tmId);
for(int index = 0; index < tokenizedLemmatizedSourceSentences.size(); index++) {
it->second->addTokenizedExample(tokenizedLemmatizedSourceSentences.at(index), sentenceIds.at(index));
}
diff --git a/concordia-server/json_generator.cpp b/concordia-server/json_generator.cpp
index b2ca255..475756c 100644
--- a/concordia-server/json_generator.cpp
+++ b/concordia-server/json_generator.cpp
@@ -1,6 +1,7 @@
#include "json_generator.hpp"
#include
+#include "example_occurence.hpp"
JsonGenerator::JsonGenerator() {
}
@@ -19,37 +20,42 @@ void JsonGenerator::signalError(rapidjson::Writer & jso
jsonWriter.EndObject();
}
-void JsonGenerator::writeSearchResult(rapidjson::Writer & jsonWriter,
+void JsonGenerator::writeSimpleSearchResult(rapidjson::Writer & jsonWriter,
const SimpleSearchResult & result) {
jsonWriter.StartObject();
- jsonWriter.String("id");
- jsonWriter.Int(result.getId());
jsonWriter.String("matchedPatternStart");
jsonWriter.Int(result.getMatchedPatternStart());
jsonWriter.String("matchedPatternEnd");
jsonWriter.Int(result.getMatchedPatternEnd());
- jsonWriter.String("matchedExampleStart");
- jsonWriter.Int(result.getMatchedExampleStart());
- jsonWriter.String("matchedExampleEnd");
- jsonWriter.Int(result.getMatchedExampleEnd());
- jsonWriter.String("sourceSegment");
- jsonWriter.String(result.getSourceSegment().c_str());
- jsonWriter.String("targetSegment");
- jsonWriter.String(result.getTargetSegment().c_str());
- jsonWriter.String("targetFragments");
+ jsonWriter.String("occurences");
jsonWriter.StartArray();
- for (std::vector >::const_iterator it = result.getTargetFragments().begin();
- it != result.getTargetFragments().end(); it++) {
- jsonWriter.StartArray();
- jsonWriter.Int(it->first);
- jsonWriter.Int(it->second);
- jsonWriter.EndArray();
- }
- jsonWriter.EndArray();
+ BOOST_FOREACH(ExampleOccurence occurence, result.getOccurences()) {
+ jsonWriter.StartObject();
+ jsonWriter.String("id");
+ jsonWriter.Int(occurence.getId());
+ jsonWriter.String("matchedExampleStart");
+ jsonWriter.Int(occurence.getMatchedExampleStart());
+ jsonWriter.String("matchedExampleEnd");
+ jsonWriter.Int(occurence.getMatchedExampleEnd());
+ jsonWriter.String("sourceSegment");
+ jsonWriter.String(occurence.getSourceSegment().c_str());
+ jsonWriter.String("targetSegment");
+ jsonWriter.String(occurence.getTargetSegment().c_str());
+ jsonWriter.String("targetFragments");
+ jsonWriter.StartArray(); // all target fragments
+ for (std::vector >::const_iterator it = occurence.getTargetFragments().begin();
+ it != occurence.getTargetFragments().end(); it++) {
+ jsonWriter.StartArray(); // single target fragment
+ jsonWriter.Int(it->first);
+ jsonWriter.Int(it->second);
+ jsonWriter.EndArray(); // single target fragment
+ }
+ jsonWriter.EndArray(); // all target fragments
+ jsonWriter.EndObject(); // occurence
+ }
- jsonWriter.EndObject();
+ jsonWriter.EndArray(); //occurences
+
+ jsonWriter.EndObject(); //simple search result
}
-
-
-
diff --git a/concordia-server/json_generator.hpp b/concordia-server/json_generator.hpp
index 0d2e481..9fd7319 100644
--- a/concordia-server/json_generator.hpp
+++ b/concordia-server/json_generator.hpp
@@ -19,8 +19,8 @@ public:
static void signalError(rapidjson::Writer & jsonWriter,
const std::string & message);
- static void writeSearchResult(rapidjson::Writer & jsonWriter,
- const SimpleSearchResult & result);
+ static void writeSimpleSearchResult(rapidjson::Writer & jsonWriter,
+ const SimpleSearchResult & result);
private:
diff --git a/concordia-server/logger.cpp b/concordia-server/logger.cpp
index fa3eb3e..0fd0949 100644
--- a/concordia-server/logger.cpp
+++ b/concordia-server/logger.cpp
@@ -44,6 +44,26 @@ void Logger::logString(std::string name, std::string value) {
root.info(ss.str());
}
+void Logger::logFragment(const MatchedPatternFragment & fragment) {
+ log4cpp::Category & root = log4cpp::Category::getRoot();
+ if (_initialized == 0) {
+ _initialize(root);
+ }
+ std::stringstream ss;
+ ss << fragment;
+ root.info(ss.str());
+}
+
+void Logger::logConcordiaSearchResult(const ConcordiaSearchResult & result) {
+ log4cpp::Category & root = log4cpp::Category::getRoot();
+ if (_initialized == 0) {
+ _initialize(root);
+ }
+ std::stringstream ss;
+ ss << result;
+ root.info(ss.str());
+}
+
void Logger::_initialize(log4cpp::Category & root) {
log4cpp::Appender *appender = new log4cpp::FileAppender("default", LOG_FILE_PATH);
log4cpp::PatternLayout *layout = new log4cpp::PatternLayout();
@@ -52,8 +72,6 @@ void Logger::_initialize(log4cpp::Category & root) {
root.setPriority(log4cpp::Priority::INFO);
root.addAppender(appender);
-
+
_initialized = 1;
}
-
-
diff --git a/concordia-server/logger.hpp b/concordia-server/logger.hpp
index ddbd089..66cd66e 100644
--- a/concordia-server/logger.hpp
+++ b/concordia-server/logger.hpp
@@ -3,6 +3,8 @@
#include
#include
+#include
+#include
#include "log4cpp/Category.hh"
@@ -15,12 +17,17 @@ public:
/*! Destructor.
*/
virtual ~Logger();
-
+
static void log(std::string message);
static void logInt(std::string name, int value);
static void logString(std::string name, std::string value);
+
+ static void logFragment(const MatchedPatternFragment & fragment);
+
+ static void logConcordiaSearchResult(const ConcordiaSearchResult & result);
+
private:
static void _initialize(log4cpp::Category & root);
diff --git a/concordia-server/searcher_controller.cpp b/concordia-server/searcher_controller.cpp
index 10343f2..4c59a4a 100644
--- a/concordia-server/searcher_controller.cpp
+++ b/concordia-server/searcher_controller.cpp
@@ -25,17 +25,12 @@ void SearcherController::simpleSearch(rapidjson::Writer
boost::ptr_map::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) {
pattern = _lemmatizerFacade->lemmatizeIfNeeded(pattern, tmId);
- std::vector results = _unitDAO.getSearchResults(it->second->simpleSearch(pattern));
-
+ SimpleSearchResult result = _unitDAO.getSimpleSearchResult(it->second->simpleSearch(pattern, true));
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
- jsonWriter.String("results");
- jsonWriter.StartArray();
- BOOST_FOREACH(SimpleSearchResult & result, results) {
- JsonGenerator::writeSearchResult(jsonWriter, result);
- }
- jsonWriter.EndArray();
+ jsonWriter.String("result");
+ JsonGenerator::writeSimpleSearchResult(jsonWriter, result);
jsonWriter.EndObject();
} else {
JsonGenerator::signalError(jsonWriter, "no such tm!");
@@ -55,7 +50,7 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern));
+ SimpleSearchResult shortPatternResult = _unitDAO.getSimpleSearchResult(it->second->simpleSearch(shortPattern));
@@ -63,7 +58,7 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer 0) {
+ if (shortPatternResult.getOccurences().size() > 0) {
jsonWriter.Bool(true);
@@ -76,10 +71,9 @@ void SearcherController::concordiaPhraseSearch(rapidjson::Writer & jsonWriter,
std::string & pattern,
const int tmId) {
-
+ Logger::log("concordiaSearch");
boost::ptr_map::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) {
std::string lemmatizedPattern = _lemmatizerFacade->lemmatizeIfNeeded(pattern, tmId);
+ Logger::logString("pattern lemmatized", lemmatizedPattern);
TokenizedSentence originalPattern = it->second->tokenize(pattern, true, false);
- CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(it->second->concordiaSearch(lemmatizedPattern), originalPattern);
+ Logger::logInt("original pattern tokenized, token count", originalPattern.getTokens().size());
+ boost::shared_ptr rawConcordiaResult = it->second->concordiaSearch(lemmatizedPattern, true);
+ Logger::log("concordia searched, result:");
+ Logger::logConcordiaSearchResult(*rawConcordiaResult);
+ CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(rawConcordiaResult, originalPattern);
+ Logger::log("result got");
jsonWriter.StartObject();
jsonWriter.String("status");
@@ -128,8 +128,8 @@ void SearcherController::concordiaSearch(rapidjson::Writer & targetFragment) {
- _targetFragments.push_back(targetFragment);
+void SimpleSearchResult::addOccurence(const ExampleOccurence & occurence) {
+ _occurences.push_back(occurence);
}
void SimpleSearchResult::offsetPattern(int offset) {
_matchedPatternStart += offset;
_matchedPatternEnd += offset;
}
-
diff --git a/concordia-server/simple_search_result.hpp b/concordia-server/simple_search_result.hpp
index 7e73da1..83d82c3 100644
--- a/concordia-server/simple_search_result.hpp
+++ b/concordia-server/simple_search_result.hpp
@@ -1,6 +1,7 @@
#ifndef SIMPLE_SEARCH_RESULT_HDR
#define SIMPLE_SEARCH_RESULT_HDR
+#include "example_occurence.hpp"
#include
#include
@@ -8,21 +9,11 @@ class SimpleSearchResult {
public:
/*! Constructor.
*/
- SimpleSearchResult(const int id,
- const int matchedPatternStart,
- const int matchedPatternEnd,
- const int matchedExampleStart,
- const int matchedExampleEnd,
- const std::string & sourceSegment,
- const std::string & targetSegment
- );
+ SimpleSearchResult(const int matchedPatternStart,
+ const int matchedPatternEnd);
/*! Destructor.
*/
virtual ~SimpleSearchResult();
-
- int getId() const {
- return _id;
- }
int getMatchedPatternStart() const {
return _matchedPatternStart;
@@ -40,46 +31,20 @@ public:
_matchedPatternEnd = newEnd;
}
- int getMatchedExampleStart() const {
- return _matchedExampleStart;
+ std::vector getOccurences() const {
+ return _occurences;
}
- int getMatchedExampleEnd() const {
- return _matchedExampleEnd;
- }
+ void addOccurence(const ExampleOccurence & occurence);
- const std::string & getSourceSegment() const {
- return _sourceSegment;
- }
-
- const std::string & getTargetSegment() const {
- return _targetSegment;
- }
-
- const std::vector > & getTargetFragments() const {
- return _targetFragments;
- }
-
- void addMatchedTargetFragment(const std::pair & targetFragment);
-
void offsetPattern(int offset);
-
+
private:
- int _id;
-
+ std::vector _occurences;
+
int _matchedPatternStart;
int _matchedPatternEnd;
-
- int _matchedExampleStart;
-
- int _matchedExampleEnd;
-
- std::string _sourceSegment;
-
- std::string _targetSegment;
-
- std::vector > _targetFragments;
};
#endif
diff --git a/concordia-server/unit_dao.cpp b/concordia-server/unit_dao.cpp
index 4ab849b..9cd6f3f 100644
--- a/concordia-server/unit_dao.cpp
+++ b/concordia-server/unit_dao.cpp
@@ -8,6 +8,7 @@
#include "int_param.hpp"
#include "int_array_param.hpp"
#include "logger.hpp"
+#include "example_occurence.hpp"
#include
#include
@@ -65,58 +66,61 @@ std::vector UnitDAO::addAlignedSentences(
return newIds;
}
-std::vector UnitDAO::getSearchResults(const std::vector & fragments) {
- std::vector results;
+SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment & fragment) {
+ SimpleSearchResult result(fragment.getStart(), fragment.getEnd());
TokenizedSentence ts("");
- _getResultsFromFragments(results, fragments, ts);
- return results;
+ return _getResultFromFragment(fragment, ts);
}
CompleteConcordiaSearchResult UnitDAO::getConcordiaResult(boost::shared_ptr rawConcordiaResult) {
CompleteConcordiaSearchResult result(rawConcordiaResult->getBestOverlayScore());
- _getResultsFromFragments(result.getBestOverlay(),
- rawConcordiaResult->getBestOverlay(),
- rawConcordiaResult->getTokenizedPattern());
+ BOOST_FOREACH(MatchedPatternFragment fragment, rawConcordiaResult->getBestOverlay()) {
+ result.addToBestOverlay(_getResultFromFragment(fragment, rawConcordiaResult->getTokenizedPattern()));
+ }
return result;
}
CompleteConcordiaSearchResult UnitDAO::getConcordiaResult(boost::shared_ptr rawConcordiaResult, TokenizedSentence originalPattern) {
+ Logger::log("getConcordiaResult with original pattern");
CompleteConcordiaSearchResult result(rawConcordiaResult->getBestOverlayScore());
- _getResultsFromFragments(result.getBestOverlay(),
- rawConcordiaResult->getBestOverlay(),
- originalPattern);
+ BOOST_FOREACH(MatchedPatternFragment fragment, rawConcordiaResult->getBestOverlay()) {
+ Logger::log("Working on fragment:");
+ Logger::logFragment(fragment);
+ result.addToBestOverlay(_getResultFromFragment(fragment, originalPattern));
+ }
return result;
}
+SimpleSearchResult UnitDAO::_getResultFromFragment(
+ const MatchedPatternFragment & fragment,
+ const TokenizedSentence & tokenizedPattern) {
-void UnitDAO::_getResultsFromFragments(
- std::vector & results,
- const std::vector & fragments,
- const TokenizedSentence & tokenizedPattern) {
-
+ Logger::log("getResultFromFragment");
DBconnection connection;
connection.startTransaction();
- BOOST_FOREACH(const MatchedPatternFragment & fragment, fragments) {
- int matchedPatternStart = 0;
- int matchedPatternEnd = 0;
- if (tokenizedPattern.getTokens().size() > 0) {
- // if it is concordia searching
- matchedPatternStart = tokenizedPattern.getTokens().at(fragment.getStart()).getStart();
- matchedPatternEnd = tokenizedPattern.getTokens().at(fragment.getStart()+fragment.getMatchedLength() - 1).getEnd();
- }
-
+ int matchedPatternStart = 0;
+ int matchedPatternEnd = 0;
+ if (tokenizedPattern.getTokens().size() > 0) {
+ // if it is concordia searching
+ Logger::logInt("tokenizedPattern size",tokenizedPattern.getTokens().size());
+ Logger::logInt("fragment start",fragment.getStart());
+ Logger::logInt("fragment matched length",fragment.getMatchedLength());
+ matchedPatternStart = tokenizedPattern.getTokens().at(fragment.getStart()).getStart();
+ matchedPatternEnd = tokenizedPattern.getTokens().at(fragment.getStart()+fragment.getMatchedLength() - 1).getEnd();
+ }
+ SimpleSearchResult ssResult(matchedPatternStart, matchedPatternEnd);
+ Logger::log("simple search result created");
+ BOOST_FOREACH(SubstringOccurence sOccurence, fragment.getOccurences()) {
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
std::vector params;
- params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
- params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength())));
- params.push_back(new IntParam(fragment.getExampleId()));
+ params.push_back(new IntParam(2*sOccurence.getOffset()+1));
+ params.push_back(new IntParam(2*(sOccurence.getOffset()+fragment.getMatchedLength())));
+ params.push_back(new IntParam(sOccurence.getId()));
PGresult * result = connection.execute(query, params);
- SimpleSearchResult ssResult(connection.getIntValue(result,0,0), // example id
- matchedPatternStart,
- matchedPatternEnd,
+ ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id
connection.getIntValue(result,0,3), // matched example start
connection.getIntValue(result,0,4), // matched example end
connection.getStringValue(result,0,1), // source segment
@@ -129,9 +133,9 @@ void UnitDAO::_getResultsFromFragments(
// now add all target fragments matched with this fragment
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
std::vector targetParams;
- targetParams.push_back(new IntParam(fragment.getExampleId()));
- targetParams.push_back(new IntParam(fragment.getExampleOffset()));
- targetParams.push_back(new IntParam(fragment.getExampleOffset() + fragment.getMatchedLength() - 1));
+ targetParams.push_back(new IntParam(sOccurence.getId()));
+ targetParams.push_back(new IntParam(sOccurence.getOffset()));
+ targetParams.push_back(new IntParam(sOccurence.getOffset() + fragment.getMatchedLength() - 1));
PGresult * targetResult = connection.execute(targetQuery, targetParams);
int prevPos = -2;
@@ -146,7 +150,7 @@ void UnitDAO::_getResultsFromFragments(
if (prevPos < targetPos - 1) { // beginning of detached fragment
// check if there is a fragment to end
if (currStart >= 0) {
- ssResult.addMatchedTargetFragment(std::pair(currStart,currEnd));
+ occurence.addMatchedTargetFragment(std::pair(currStart,currEnd));
}
currStart = targetStart;
}
@@ -157,7 +161,7 @@ void UnitDAO::_getResultsFromFragments(
// check if there are remaining fragments
if (currStart >= 0) {
- ssResult.addMatchedTargetFragment(std::pair(currStart,currEnd));
+ occurence.addMatchedTargetFragment(std::pair(currStart,currEnd));
}
connection.clearResult(targetResult);
@@ -165,9 +169,13 @@ void UnitDAO::_getResultsFromFragments(
delete param;
}
- results.push_back(ssResult);
+ ssResult.addOccurence(occurence);
+
}
+
connection.endTransaction();
+
+ return ssResult;
}
diff --git a/concordia-server/unit_dao.hpp b/concordia-server/unit_dao.hpp
index 814b1aa..edf87ee 100644
--- a/concordia-server/unit_dao.hpp
+++ b/concordia-server/unit_dao.hpp
@@ -41,16 +41,16 @@ public:
const std::vector > > & allAlignments,
const int tmId) throw (ConcordiaException);
- std::vector getSearchResults(const std::vector & fragments);
+ SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr rawConcordiaResult);
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr rawConcordiaResult, TokenizedSentence originalPattern);
private:
- void _getResultsFromFragments(std::vector & results,
- const std::vector & fragments,
- const TokenizedSentence & tokenizedPattern);
+ SimpleSearchResult _getResultFromFragment(
+ const MatchedPatternFragment & fragment,
+ const TokenizedSentence & tokenizedPattern);
std::vector _getTokenPositions(const TokenizedSentence & ts);
diff --git a/mgiza-aligner/build.sh b/mgiza-aligner/build.sh
index a276b09..f7d0be8 100755
--- a/mgiza-aligner/build.sh
+++ b/mgiza-aligner/build.sh
@@ -1,10 +1,7 @@
#!/bin/sh
-make CORPUS_NAME=setimes_enhr SRC_LANG=en TRG_LANG=hr
-make CORPUS_NAME=setimes_enhr SRC_LANG=en TRG_LANG=hr clean-intermediate-files
+make CORPUS_NAME=stocznia_plen SRC_LANG=pl TRG_LANG=en
+make CORPUS_NAME=stocznia_plen SRC_LANG=pl TRG_LANG=en clean-intermediate-files
-make CORPUS_NAME=europarljrc_enpl SRC_LANG=en TRG_LANG=pl
-make CORPUS_NAME=europarljrc_enpl SRC_LANG=en TRG_LANG=pl clean-intermediate-files
-
-make CORPUS_NAME=jrc200k_plen SRC_LANG=pl TRG_LANG=en
-make CORPUS_NAME=jrc200k_plen SRC_LANG=pl TRG_LANG=en clean-intermediate-files
+make CORPUS_NAME=stocznia_enpl SRC_LANG=en TRG_LANG=pl
+make CORPUS_NAME=stocznia_enpl SRC_LANG=en TRG_LANG=pl clean-intermediate-files
diff --git a/tests/concordiaSearch.py b/tests/concordiaSearch.py
index ab78eba..9880d94 100755
--- a/tests/concordiaSearch.py
+++ b/tests/concordiaSearch.py
@@ -22,11 +22,9 @@ data = {
start = time.time()
req = urllib2.Request(address)
req.add_header('Content-Type', 'application/json')
-response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+response = urllib2.urlopen(req, json.dumps(data)).read()
end = time.time()
print "Execution time: %.4f seconds." % (end-start)
print "Result: "
print response
-
-
diff --git a/tests/simpleSearch.py b/tests/simpleSearch.py
index 6bea521..e7bdaee 100755
--- a/tests/simpleSearch.py
+++ b/tests/simpleSearch.py
@@ -21,11 +21,9 @@ if len(host.concordia_port) > 0:
start = time.time()
req = urllib2.Request(address)
req.add_header('Content-Type', 'application/json')
-response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
+response = urllib2.urlopen(req, json.dumps(data)).read()
end = time.time()
print "Execution time: %.4f seconds." % (end-start)
print "Result: "
print response
-
-
diff --git a/upstart/README.txt b/upstart/README.txt
index 7be3284..91665d7 100644
--- a/upstart/README.txt
+++ b/upstart/README.txt
@@ -1,9 +1,16 @@
+
+
In order to configure Concordia as upstart job, copy the 3 .conf files into your /etc/init and run:
+sudo apt-get install upstart upstart-sysv
sudo initctl reload-configuration
Also, add to the file /etc/init.d/postgresql the line:
initctl emit -n started JOB=postgresql
-at the end of postgresql start sequence.
+at the end of postgresql start sequence, after the loop:
+
+for v in $versions; do
+ $1 $v || EXIT=$?
+ done