mutliple translation memories
This commit is contained in:
parent
a43bd9be03
commit
5eff07d4b8
@ -18,10 +18,6 @@ set (STOP_WORDS_ENABLED "false")
|
||||
# ================================================
|
||||
|
||||
set (INDEX_DIRECTORY "${concordia-server_SOURCE_DIR}/index")
|
||||
set (WORD_MAP_FILE "word_map.bin")
|
||||
set (HASHED_INDEX_FILE "hashed_index.bin")
|
||||
set (MARKERS_FILE "markers.bin")
|
||||
|
||||
set (RESOURCES_DIRECTORY "${concordia-server_SOURCE_DIR}/resources")
|
||||
|
||||
configure_file (
|
||||
|
@ -1,41 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<script src="js/jquery-1.11.3.min.js"></script>
|
||||
<script src="js/cat.js"></script>
|
||||
<link rel="stylesheet" href="css/iatagger.css" />
|
||||
<meta charset="UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<div id="header">
|
||||
</div>
|
||||
<div id="content">
|
||||
<a href="http://tmconcordia.sourceforge.net/" target="_blank"><img src="images/banner.jpg" alt="Banner" /></a>
|
||||
<br/><br/><br/>
|
||||
<p>
|
||||
Welcome to the interactive Concordia demo. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is 200 000 sentences taken from English-Spanish corpus of European Law. Please enter an English sentence in the field below and press Enter (or use the search button). This instance of Concordia works best with law sentences, but is very likely to output some results for any English sentence. You can also use predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences.
|
||||
</p>
|
||||
<p>
|
||||
Enjoy your work with the system!
|
||||
</p>
|
||||
<label for="searchInput">Enter search pattern (English sentence):</label>
|
||||
<span class="suggestion" onclick="showHideSuggestions()">show/hide samples</span>
|
||||
<br/><br/>
|
||||
<div class="suggestionsInvisible" id="suggestions">
|
||||
<ul>
|
||||
<li>Every ship in the European Union must have a crew of 50 or more workers. <span class="suggestion" onclick="searchText('Every ship in the European Union must have a crew of 50 or more workers.');">apply</span></li>
|
||||
<li>It is impossible to abolish the customs duties on fruit and vegetables. <span class="suggestion" onclick="searchText('It is impossible to abolish the customs duties on fruit and vegetables.');">apply</span></li>
|
||||
<li>The convention on human rights was held in Geneva. <span class="suggestion" onclick="searchText('The convention on human rights was held in Geneva.');">apply</span></li>
|
||||
|
||||
</ul>
|
||||
<br/><br/>
|
||||
</div>
|
||||
<input id="searchInput" type="text" value="" />
|
||||
<br/><br/>
|
||||
<input type="button" value="search" onclick="searchHandle()" />
|
||||
<br/><br/><br/><br/>
|
||||
<div id="result">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
34
cat/index.html_pattern
Normal file
34
cat/index.html_pattern
Normal file
@ -0,0 +1,34 @@
|
||||
<html>
|
||||
<head>
|
||||
<script src="../js/jquery-1.11.3.min.js"></script>
|
||||
<script src="../js/cat.js"></script>
|
||||
<link rel="stylesheet" href="../css/iatagger.css" />
|
||||
<meta charset="UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<div id="header">
|
||||
</div>
|
||||
<div id="content">
|
||||
<a href="http://tmconcordia.sourceforge.net/" target="_blank"><img src="../images/banner.jpg" alt="Banner" /></a>
|
||||
<br/><br/><br/>
|
||||
<p>@desc@</p>
|
||||
<p>
|
||||
Enjoy your work with the system!
|
||||
</p>
|
||||
<label for="searchInput">@prompt@</label>
|
||||
<span class="suggestion" onclick="showHideSuggestions()">show/hide samples</span>
|
||||
<br/><br/>
|
||||
<div class="suggestionsInvisible" id="suggestions">
|
||||
<ul>@suggestions@</ul>
|
||||
<br/><br/>
|
||||
</div>
|
||||
<input id="searchInput" type="text" value="" />
|
||||
<br/><br/>
|
||||
<input type="button" value="search" onclick="searchHandle(@tmid@)" />
|
||||
<br/><br/><br/><br/>
|
||||
<div id="result">
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@ -11,9 +11,10 @@ $(document).ready(function() {
|
||||
});
|
||||
});
|
||||
|
||||
function searchHandle() {
|
||||
function searchHandle(tmid) {
|
||||
var concordiaRequest = {
|
||||
operation: 'concordiaSearch',
|
||||
tmId: tmid,
|
||||
pattern:$("#searchInput").val()
|
||||
}
|
||||
|
||||
|
55
cat/publish.py
Executable file
55
cat/publish.py
Executable file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys, os, shutil, re
|
||||
|
||||
root_dir = sys.argv[1]
|
||||
|
||||
if not os.path.exists(root_dir):
|
||||
print "%s does not exist!" % root_dir
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.isdir(root_dir):
|
||||
print "%s is not a directory!" % root_dir
|
||||
sys.exit(1)
|
||||
|
||||
if len(os.listdir(root_dir))>0:
|
||||
print "%s is not empty!" % root_dir
|
||||
sys.exit(1)
|
||||
|
||||
shutil.copytree('js', root_dir+'/js')
|
||||
shutil.copytree('css', root_dir+'/css')
|
||||
shutil.copytree('images', root_dir+'/images')
|
||||
|
||||
|
||||
versions_dir = 'versions'
|
||||
|
||||
versions = []
|
||||
|
||||
for version_file in os.listdir(versions_dir):
|
||||
version = {'suggestions':[]}
|
||||
with open(versions_dir+'/'+version_file) as v:
|
||||
for line in v:
|
||||
|
||||
field, value = line.strip().split('@#@')
|
||||
if field == 'suggestion':
|
||||
version['suggestions'].append(value)
|
||||
else:
|
||||
version[field] = value
|
||||
versions.append(version)
|
||||
|
||||
for version in versions:
|
||||
version_dir = root_dir+'/'+version['dir']
|
||||
os.mkdir(version_dir)
|
||||
with open('index.html_pattern', 'r') as pattern_file:
|
||||
with open(version_dir+'/index.html', 'w') as index_file:
|
||||
for line in pattern_file:
|
||||
for field, value in version.iteritems():
|
||||
if field == 'suggestions':
|
||||
suggestions_html = ''
|
||||
for suggestion in value:
|
||||
suggestions_html+='<li>'+suggestion+'<span class="suggestion" onclick="searchText(\''+suggestion+'\');">apply</span></li>'
|
||||
line = re.sub('@suggestions@', suggestions_html, line)
|
||||
else:
|
||||
line = re.sub('@'+field+'@', value, line)
|
||||
index_file.write(line)
|
7
cat/versions/jrc_enes.cfg
Normal file
7
cat/versions/jrc_enes.cfg
Normal file
@ -0,0 +1,7 @@
|
||||
dir@#@jrc_enes
|
||||
tmid@#@1
|
||||
desc@#@Welcome to the interactive Concordia demo. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is 200 000 sentences taken from English-Spanish corpus of European Law. Please enter an English sentence in the field below and press Enter (or use the search button). This instance of Concordia works best with law sentences, but is very likely to output some results for any English sentence. You can also use predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences.
|
||||
prompt@#@Enter search pattern (English sentence):
|
||||
suggestion@#@Every ship in the European Union must have a crew of 50 or more workers.
|
||||
suggestion@#@It is impossible to abolish the customs duties on fruit and vegetables.
|
||||
suggestion@#@The convention on human rights was held in Geneva.
|
6
cat/versions/setimes_hren.cfg
Normal file
6
cat/versions/setimes_hren.cfg
Normal file
@ -0,0 +1,6 @@
|
||||
dir@#@setimes_hren
|
||||
tmid@#@2
|
||||
desc@#@Welcome to Concordia. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is 200 000 sentences taken from the SETIMES2 Croatian-English corpus (<a href="http://opus.lingfil.uu.se/SETIMES2.php" target="_blank">link</a>). Please enter a Croatian sentence in the field below and press Enter (or use the search button). You can test the system on predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences.
|
||||
prompt@#@Enter search pattern (Croatian sentence):
|
||||
suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji
|
||||
suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci
|
@ -13,6 +13,7 @@ add_executable(concordia_server_process
|
||||
int_array_param.cpp
|
||||
simple_search_result.cpp
|
||||
complete_concordia_search_result.cpp
|
||||
tm_dao.cpp
|
||||
)
|
||||
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)
|
||||
|
||||
|
@ -7,12 +7,21 @@
|
||||
#include "config.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "rapidjson/rapidjson.h"
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
|
||||
ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
|
||||
throw(ConcordiaException) {
|
||||
boost::shared_ptr<Concordia> concordia(new Concordia(configFilePath));
|
||||
_indexController = boost::shared_ptr<IndexController> (new IndexController(concordia));
|
||||
_searcherController = boost::shared_ptr<SearcherController> (new SearcherController(concordia));
|
||||
throw(ConcordiaException) :
|
||||
_configFilePath(configFilePath) {
|
||||
std::vector<int> tmIds = _tmDAO.getTmIds();
|
||||
_concordiasMap = boost::shared_ptr<boost::ptr_map<int,Concordia> >(new boost::ptr_map<int,Concordia>());
|
||||
|
||||
BOOST_FOREACH(int & tmId, tmIds) {
|
||||
_addTm(tmId);
|
||||
}
|
||||
_indexController = boost::shared_ptr<IndexController> (new IndexController(_concordiasMap));
|
||||
_searcherController = boost::shared_ptr<SearcherController> (new SearcherController(_concordiasMap));
|
||||
}
|
||||
|
||||
ConcordiaServer::~ConcordiaServer() {
|
||||
@ -44,29 +53,48 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
} else if (operation == ADD_SENTENCES_OP) {
|
||||
std::vector<std::string> sourceSentences;
|
||||
std::vector<std::string> targetSentences;
|
||||
std::vector<int> tmIds;
|
||||
int tmId = d[TM_ID_PARAM].GetInt();
|
||||
// loading data from json
|
||||
const rapidjson::Value & sentencesArray = d[SENTENCES_PARAM];
|
||||
Logger::log("addSentences");
|
||||
Logger::logInt("sentences to add", sentencesArray.Size());
|
||||
Logger::logInt("tm id", tmId);
|
||||
for (rapidjson::SizeType i = 0; i < sentencesArray.Size(); i++) {
|
||||
if (sentencesArray[i].Size() != 3) {
|
||||
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 3 elements");
|
||||
if (sentencesArray[i].Size() != 2) {
|
||||
JsonGenerator::signalError(jsonWriter, "sentence should be an array of 2 elements");
|
||||
break;
|
||||
} else {
|
||||
tmIds.push_back(sentencesArray[i][0].GetInt());
|
||||
sourceSentences.push_back(sentencesArray[i][1].GetString());
|
||||
targetSentences.push_back(sentencesArray[i][2].GetString());
|
||||
sourceSentences.push_back(sentencesArray[i][0].GetString());
|
||||
targetSentences.push_back(sentencesArray[i][1].GetString());
|
||||
}
|
||||
}
|
||||
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmIds);
|
||||
_indexController->addSentences(jsonWriter, sourceSentences, targetSentences, tmId);
|
||||
} else if (operation == REFRESH_INDEX_OP) {
|
||||
_indexController->refreshIndexFromRAM(jsonWriter);
|
||||
int tmId = d[TM_ID_PARAM].GetInt();
|
||||
_indexController->refreshIndexFromRAM(jsonWriter, tmId);
|
||||
} else if (operation == SIMPLE_SEARCH_OP) {
|
||||
std::string pattern = d[PATTERN_PARAM].GetString();
|
||||
_searcherController->simpleSearch(jsonWriter, pattern);
|
||||
int tmId = d[TM_ID_PARAM].GetInt();
|
||||
_searcherController->simpleSearch(jsonWriter, pattern, tmId);
|
||||
} else if (operation == CONCORDIA_SEARCH_OP) {
|
||||
std::string pattern = d[PATTERN_PARAM].GetString();
|
||||
int tmId = d[TM_ID_PARAM].GetInt();
|
||||
Logger::logString("concordia search pattern", pattern);
|
||||
_searcherController->concordiaSearch(jsonWriter, pattern);
|
||||
_searcherController->concordiaSearch(jsonWriter, pattern, tmId);
|
||||
} else if (operation == ADD_TM_OP) {
|
||||
int sourceLangId = d[SOURCE_LANG_PARAM].GetInt();
|
||||
int targetLangId = d[TARGET_LANG_PARAM].GetInt();
|
||||
std::string name = d[NAME_PARAM].GetString();
|
||||
int newId = _tmDAO.addTm(sourceLangId, targetLangId, name);
|
||||
_addTm(newId);
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.String("newTmId");
|
||||
jsonWriter.Int(newId);
|
||||
jsonWriter.EndObject();
|
||||
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such operation");
|
||||
}
|
||||
@ -83,3 +111,13 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
return outputString.str();
|
||||
|
||||
}
|
||||
|
||||
void ConcordiaServer::_addTm(int tmId) {
|
||||
std::stringstream indexPath;
|
||||
indexPath << INDEX_DIRECTORY << "/tm_" << tmId;
|
||||
if (!boost::filesystem::exists(indexPath.str())) {
|
||||
boost::filesystem::create_directories(indexPath.str());
|
||||
}
|
||||
_concordiasMap->insert(tmId, new Concordia(indexPath.str(), _configFilePath));
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "rapidjson/writer.h"
|
||||
#include "rapidjson/error/en.h"
|
||||
|
||||
#include "tm_dao.hpp"
|
||||
#include "index_controller.hpp"
|
||||
#include "searcher_controller.hpp"
|
||||
|
||||
@ -29,6 +30,14 @@ public:
|
||||
std::string handleRequest(std::string & requestString);
|
||||
|
||||
private:
|
||||
void _addTm(int tmId);
|
||||
|
||||
std::string _configFilePath;
|
||||
|
||||
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
||||
|
||||
TmDAO _tmDAO;
|
||||
|
||||
boost::shared_ptr<IndexController> _indexController;
|
||||
|
||||
boost::shared_ptr<SearcherController> _searcherController;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#define CONFIG_FILE_PATH "@CONFIG_FILE_PATH@"
|
||||
#define LOG_FILE_PATH "@LOG_FILE_PATH@"
|
||||
#define INDEX_DIRECTORY "@INDEX_DIRECTORY@"
|
||||
|
||||
// database connection information
|
||||
#define DB_NAME "@DB_NAME@"
|
||||
@ -15,10 +16,14 @@
|
||||
#define TARGET_SENTENCE_PARAM "targetSentence"
|
||||
#define TM_ID_PARAM "tmId"
|
||||
#define SENTENCES_PARAM "sentences"
|
||||
#define SOURCE_LANG_PARAM "sourceLangId"
|
||||
#define TARGET_LANG_PARAM "targetLangId"
|
||||
#define NAME_PARAM "name"
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define ADD_SENTENCES_OP "addSentences"
|
||||
#define REFRESH_INDEX_OP "refreshIndex"
|
||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||
#define ADD_TM_OP "addTm"
|
||||
|
||||
|
@ -144,5 +144,13 @@ std::string DBconnection::getStringValue(PGresult * result, int row, int col) t
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int DBconnection::getRowCount(PGresult * result) throw (ConcordiaException) {
|
||||
try {
|
||||
return PQntuples(result);
|
||||
} catch (std::exception & e) {
|
||||
std::stringstream ss;
|
||||
ss << "Error getting int value. Message: " << e.what();
|
||||
throw ConcordiaException(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,8 @@ public:
|
||||
|
||||
std::string getStringValue(PGresult * result, int row, int col) throw (ConcordiaException);
|
||||
|
||||
int getRowCount(PGresult * result) throw (ConcordiaException);
|
||||
|
||||
private:
|
||||
void close();
|
||||
|
||||
|
@ -3,10 +3,11 @@
|
||||
#include <concordia/common/config.hpp>
|
||||
|
||||
#include "json_generator.hpp"
|
||||
#include "logger.hpp"
|
||||
|
||||
IndexController::IndexController(boost::shared_ptr<Concordia> concordia)
|
||||
IndexController::IndexController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
|
||||
throw(ConcordiaException):
|
||||
_concordia(concordia) {
|
||||
_concordiasMap(concordiasMap) {
|
||||
}
|
||||
|
||||
IndexController::~IndexController() {
|
||||
@ -20,19 +21,32 @@ void IndexController::addSentence(
|
||||
const int tmId) {
|
||||
|
||||
try {
|
||||
TokenizedSentence tokenizedSentence = _concordia->tokenize(sourceSentence);
|
||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||
if (it != _concordiasMap->end()) {
|
||||
TokenizedSentence tokenizedSentence = (*_concordiasMap)[tmId].tokenize(sourceSentence);
|
||||
int sentenceId = _unitDAO.addSentence(tokenizedSentence, targetSentence, tmId);
|
||||
_concordia->addTokenizedExample(tokenizedSentence, sentenceId);
|
||||
_concordia->refreshSAfromRAM();
|
||||
(*_concordiasMap)[tmId].addTokenizedExample(tokenizedSentence, sentenceId);
|
||||
(*_concordiasMap)[tmId].refreshSAfromRAM();
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||
}
|
||||
} catch (ConcordiaException & e) {
|
||||
std::stringstream errorstream;
|
||||
errorstream << "concordia error: " << e.what();
|
||||
JsonGenerator::signalError(jsonWriter, errorstream.str());
|
||||
} catch (std::exception & e) {
|
||||
std::stringstream errorstream;
|
||||
errorstream << "general error: " << e.what();
|
||||
JsonGenerator::signalError(jsonWriter, errorstream.str());
|
||||
} catch (...) {
|
||||
std::stringstream errorstream;
|
||||
errorstream << "unexpected error occurred";
|
||||
JsonGenerator::signalError(jsonWriter, errorstream.str());
|
||||
}
|
||||
}
|
||||
|
||||
@ -40,16 +54,21 @@ void IndexController::addSentences(
|
||||
rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const std::vector<std::string> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds) {
|
||||
const int tmId) {
|
||||
try {
|
||||
std::vector<TokenizedSentence> tokenizedSentences = _concordia->tokenizeAll(sourceSentences);
|
||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmIds);
|
||||
_concordia->addAllTokenizedExamples(tokenizedSentences, sentenceIds);
|
||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||
if (it != _concordiasMap->end()) {
|
||||
std::vector<TokenizedSentence> tokenizedSentences = (*_concordiasMap)[tmId].tokenizeAll(sourceSentences);
|
||||
std::vector<SUFFIX_MARKER_TYPE> sentenceIds = _unitDAO.addSentences(tokenizedSentences, targetSentences, tmId);
|
||||
(*_concordiasMap)[tmId].addAllTokenizedExamples(tokenizedSentences, sentenceIds);
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||
}
|
||||
} catch (ConcordiaException & e) {
|
||||
std::stringstream errorstream;
|
||||
errorstream << "concordia error: " << e.what();
|
||||
@ -57,14 +76,19 @@ void IndexController::addSentences(
|
||||
}
|
||||
}
|
||||
|
||||
void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter) {
|
||||
void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const int tmId) {
|
||||
try {
|
||||
_concordia->refreshSAfromRAM();
|
||||
if (it != _concordiasMap->end()) {
|
||||
(*_concordiasMap)[tmId].refreshSAfromRAM();
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||
}
|
||||
} catch (ConcordiaException & e) {
|
||||
std::stringstream errorstream;
|
||||
errorstream << "concordia error: " << e.what();
|
||||
@ -74,3 +98,4 @@ void IndexController::refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <concordia/concordia.hpp>
|
||||
#include <concordia/concordia_exception.hpp>
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
|
||||
|
||||
#include "unit_dao.hpp"
|
||||
|
||||
@ -14,7 +16,7 @@ class IndexController {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
explicit IndexController(boost::shared_ptr<Concordia> concordia)
|
||||
explicit IndexController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
|
||||
throw(ConcordiaException);
|
||||
/*! Destructor.
|
||||
*/
|
||||
@ -28,12 +30,13 @@ public:
|
||||
void addSentences(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const std::vector<std::string> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds);
|
||||
const int tmId);
|
||||
|
||||
void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter);
|
||||
void refreshIndexFromRAM(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const int tmId);
|
||||
|
||||
private:
|
||||
boost::shared_ptr<Concordia> _concordia;
|
||||
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
||||
|
||||
UnitDAO _unitDAO;
|
||||
};
|
||||
|
@ -5,17 +5,21 @@
|
||||
|
||||
#include "json_generator.hpp"
|
||||
|
||||
SearcherController::SearcherController(boost::shared_ptr<Concordia> concordia)
|
||||
SearcherController::SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
|
||||
throw(ConcordiaException):
|
||||
_concordia(concordia) {
|
||||
_concordiasMap(concordiasMap) {
|
||||
}
|
||||
|
||||
SearcherController::~SearcherController() {
|
||||
}
|
||||
|
||||
|
||||
void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
|
||||
std::vector<SimpleSearchResult> results = _unitDAO.getSearchResults(_concordia->simpleSearch(pattern));
|
||||
void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & pattern,
|
||||
const int tmId) {
|
||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||
if (it != _concordiasMap->end()) {
|
||||
std::vector<SimpleSearchResult> results = _unitDAO.getSearchResults((*_concordiasMap)[tmId].simpleSearch(pattern));
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
@ -27,11 +31,18 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
|
||||
}
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||
}
|
||||
}
|
||||
|
||||
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
|
||||
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & pattern,
|
||||
const int tmId) {
|
||||
|
||||
CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(_concordia->concordiaSearch(pattern));
|
||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||
if (it != _concordiasMap->end()) {
|
||||
CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult((*_concordiasMap)[tmId].concordiaSearch(pattern));
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
@ -50,5 +61,8 @@ void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuff
|
||||
|
||||
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include <string>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
#include <concordia/concordia.hpp>
|
||||
#include <concordia/concordia_exception.hpp>
|
||||
|
||||
@ -15,19 +16,23 @@ class SearcherController {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
explicit SearcherController(boost::shared_ptr<Concordia> concordia)
|
||||
explicit SearcherController(boost::shared_ptr<boost::ptr_map<int,Concordia> >concordiasMap)
|
||||
throw(ConcordiaException);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~SearcherController();
|
||||
|
||||
void simpleSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern);
|
||||
void simpleSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & pattern,
|
||||
const int tmId);
|
||||
|
||||
void concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern);
|
||||
void concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & pattern,
|
||||
const int tmId);
|
||||
|
||||
private:
|
||||
|
||||
boost::shared_ptr<Concordia> _concordia;
|
||||
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
||||
|
||||
UnitDAO _unitDAO;
|
||||
};
|
||||
|
55
concordia-server/tm_dao.cpp
Normal file
55
concordia-server/tm_dao.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
#include "tm_dao.hpp"
|
||||
|
||||
#include "query_param.hpp"
|
||||
#include "string_param.hpp"
|
||||
#include "int_param.hpp"
|
||||
#include "int_array_param.hpp"
|
||||
#include "logger.hpp"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include <libpq-fe.h>
|
||||
|
||||
TmDAO::TmDAO() {
|
||||
}
|
||||
|
||||
TmDAO::~TmDAO() {
|
||||
}
|
||||
|
||||
std::vector<int> TmDAO::getTmIds() {
|
||||
std::vector<int> result;
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
std::string query = "SELECT id FROM tm;";
|
||||
PGresult * dbResult = connection.execute(query);
|
||||
for (int i=0;i<connection.getRowCount(dbResult);i++) {
|
||||
int tmId = connection.getIntValue(dbResult, i, 0);
|
||||
result.push_back(tmId);
|
||||
}
|
||||
connection.clearResult(dbResult);
|
||||
connection.endTransaction();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int TmDAO::addTm(const int sourceLangId, const int targetLangId, const std::string name) {
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
|
||||
std::string query = "INSERT INTO tm(source_lang_id, target_lang_id, name) values($1::integer,$2::integer,$3::text) RETURNING id";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new IntParam(sourceLangId));
|
||||
params.push_back(new IntParam(targetLangId));
|
||||
params.push_back(new StringParam(name));
|
||||
|
||||
PGresult * result = connection.execute(query, params);
|
||||
int newId = connection.getIntValue(result, 0, 0);
|
||||
connection.clearResult(result);
|
||||
connection.endTransaction();
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
return newId;
|
||||
|
||||
}
|
||||
|
27
concordia-server/tm_dao.hpp
Normal file
27
concordia-server/tm_dao.hpp
Normal file
@ -0,0 +1,27 @@
|
||||
#ifndef TM_DAO_HDR
|
||||
#define TM_DAO_HDR
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <concordia/common/config.hpp>
|
||||
#include "db_connection.hpp"
|
||||
|
||||
class TmDAO {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
TmDAO();
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~TmDAO();
|
||||
|
||||
int addTm(const int sourceLangId, const int targetLangId, const std::string name);
|
||||
|
||||
std::vector<int> getTmIds();
|
||||
|
||||
private:
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -31,13 +31,13 @@ int UnitDAO::addSentence(
|
||||
std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
|
||||
const std::vector<TokenizedSentence> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds) {
|
||||
const int tmId) {
|
||||
DBconnection connection;
|
||||
std::vector<SUFFIX_MARKER_TYPE> newIds;
|
||||
connection.startTransaction();
|
||||
int index = 0;
|
||||
BOOST_FOREACH(const TokenizedSentence & sourceSentence, sourceSentences) {
|
||||
newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmIds.at(index)));
|
||||
newIds.push_back(_addSingleSentence(connection, sourceSentence, targetSentences.at(index), tmId));
|
||||
index++;
|
||||
}
|
||||
connection.endTransaction();
|
||||
|
@ -32,7 +32,7 @@ public:
|
||||
std::vector<SUFFIX_MARKER_TYPE> addSentences(
|
||||
const std::vector<TokenizedSentence> & sourceSentences,
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds);
|
||||
const int tmId);
|
||||
|
||||
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
|
||||
|
||||
|
@ -3,17 +3,6 @@
|
||||
#---------------------------
|
||||
#
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# The below set the paths for hashed index, markers array and word map files.
|
||||
# If all the files pointed by these paths exist, Concordia reads them to its
|
||||
# RAM index. When none of these files exist, a new empty index is created.
|
||||
# However, if any of these files exist and any other is missing, the index
|
||||
# is considered corrupt and Concordia does not start.
|
||||
|
||||
hashed_index_path = "@INDEX_DIRECTORY@/@HASHED_INDEX_FILE@"
|
||||
markers_path = "@INDEX_DIRECTORY@/@MARKERS_FILE@"
|
||||
word_map_path = "@INDEX_DIRECTORY@/@WORD_MAP_FILE@"
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# The following settings control the sentence anonymizer mechanism. It is used to
|
||||
# remove unnecessary symbols and possibly words from sentences added to index
|
||||
|
2
tests/.gitignore
vendored
Normal file
2
tests/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
host.py
|
||||
host.pyc
|
@ -5,10 +5,16 @@ import unittest
|
||||
import json
|
||||
import urllib2
|
||||
import sys
|
||||
import host
|
||||
import time
|
||||
|
||||
BUFFER_SIZE = 500
|
||||
|
||||
address = 'http://'+host.concordia_host
|
||||
if len(host.concordia_port) > 0:
|
||||
address += ':'+host.concordia_port
|
||||
|
||||
|
||||
def file_len(fname):
|
||||
with open(fname) as f:
|
||||
for i, l in enumerate(f):
|
||||
@ -16,13 +22,15 @@ def file_len(fname):
|
||||
return i + 1
|
||||
|
||||
def add_data(data):
|
||||
req = urllib2.Request('http://localhost')
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
urllib2.urlopen(req, json.dumps(data)).read()
|
||||
|
||||
sourceFile = sys.argv[1]
|
||||
targetFile = sys.argv[2]
|
||||
tmId = int(sys.argv[3])
|
||||
sourceLangId = int(sys.argv[2])
|
||||
targetFile = sys.argv[3]
|
||||
targetLangId = int(sys.argv[4])
|
||||
name = sys.argv[5]
|
||||
|
||||
totalLines = file_len(sourceFile)
|
||||
if file_len(targetFile) != totalLines:
|
||||
@ -30,18 +38,33 @@ if file_len(targetFile) != totalLines:
|
||||
sys.exit(1)
|
||||
|
||||
data = {
|
||||
'operation': 'addSentences'
|
||||
'operation': 'addTm',
|
||||
'sourceLangId':sourceLangId,
|
||||
'targetLangId':targetLangId,
|
||||
'name':name
|
||||
}
|
||||
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||
tmId = int(response['newTmId'])
|
||||
print "Added new tm: %d" % tmId
|
||||
|
||||
|
||||
data = {
|
||||
'operation': 'addSentences',
|
||||
'tmId':tmId
|
||||
}
|
||||
|
||||
sentences = []
|
||||
start = time.time()
|
||||
with open(sys.argv[1]) as sourceSentences:
|
||||
with open(sys.argv[2]) as targetSentences:
|
||||
with open(sourceFile) as sourceSentences:
|
||||
with open(targetFile) as targetSentences:
|
||||
lineNumber = 0
|
||||
for sourceSentence in sourceSentences:
|
||||
lineNumber += 1
|
||||
targetSentence = targetSentences.readline()
|
||||
sentences.append([tmId, sourceSentence, targetSentence])
|
||||
sentences.append([sourceSentence, targetSentence])
|
||||
if lineNumber % BUFFER_SIZE == 0:
|
||||
data['sentences'] = sentences
|
||||
sentences = []
|
||||
@ -60,9 +83,10 @@ print "Added all %d sentences. Time elapsed: %.4f s, overall speed: %.4f sentenc
|
||||
print "Generating index..."
|
||||
start = time.time()
|
||||
data = {
|
||||
'operation': 'refreshIndex'
|
||||
'operation': 'refreshIndex',
|
||||
'tmId' : tmId
|
||||
}
|
||||
req = urllib2.Request('http://localhost')
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
urllib2.urlopen(req, json.dumps(data)).read()
|
||||
|
||||
|
@ -6,16 +6,22 @@ import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
import host
|
||||
|
||||
address = 'http://'+host.concordia_host
|
||||
if len(host.concordia_port) > 0:
|
||||
address += ':'+host.concordia_port
|
||||
|
||||
|
||||
data = {
|
||||
'operation': 'addSentence',
|
||||
'sourceSentence':sys.argv[1],
|
||||
'targetSentence':sys.argv[2],
|
||||
'tmId':sys.argv[3]
|
||||
'tmId':int(sys.argv[3])
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
req = urllib2.Request('http://localhost')
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||
end = time.time()
|
||||
|
26
tests/addTm.py
Executable file
26
tests/addTm.py
Executable file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
import host
|
||||
|
||||
address = 'http://'+host.concordia_host
|
||||
if len(host.concordia_port) > 0:
|
||||
address += ':'+host.concordia_port
|
||||
|
||||
|
||||
data = {
|
||||
'operation': 'addTm',
|
||||
'sourceLangId':int(sys.argv[1]),
|
||||
'targetLangId':int(sys.argv[2]),
|
||||
'name':sys.argv[3]
|
||||
}
|
||||
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||
|
||||
print response
|
@ -6,6 +6,12 @@ import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
import host
|
||||
|
||||
address = 'http://'+host.concordia_host
|
||||
if len(host.concordia_port) > 0:
|
||||
address += ':'+host.concordia_port
|
||||
|
||||
|
||||
data = {
|
||||
'operation': 'concordiaSearch',
|
||||
@ -13,7 +19,7 @@ data = {
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
req = urllib2.Request('http://localhost')
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||
end = time.time()
|
||||
|
2
tests/host.py_example
Normal file
2
tests/host.py_example
Normal file
@ -0,0 +1,2 @@
|
||||
concordia_host = 'localhost'
|
||||
concordia_port = ''
|
@ -6,14 +6,20 @@ import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
import host
|
||||
|
||||
data = {
|
||||
'operation': 'simpleSearch',
|
||||
'pattern':sys.argv[1]
|
||||
'pattern':sys.argv[1],
|
||||
'tmId':int(sys.argv[2])
|
||||
}
|
||||
|
||||
address = 'http://'+host.concordia_host
|
||||
if len(host.concordia_port) > 0:
|
||||
address += ':'+host.concordia_port
|
||||
|
||||
start = time.time()
|
||||
req = urllib2.Request('http://localhost:8800')
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||
end = time.time()
|
||||
|
Loading…
Reference in New Issue
Block a user