import requests

This commit is contained in:
Rafał Jaworski 2017-06-25 23:16:43 +02:00
parent d9ef86b4d4
commit efa2bc2416
23 changed files with 879 additions and 29 deletions

1
.gitignore vendored
View File

@ -34,3 +34,4 @@ mgiza-aligner/mgiza/mgizapp/src/mkcls/-MT
mgiza-aligner/mgiza/mgizapp/src/mkcls/CMakeFiles/ mgiza-aligner/mgiza/mgizapp/src/mkcls/CMakeFiles/
mgiza-aligner/mgiza/mgizapp/src/mkcls/Makefile mgiza-aligner/mgiza/mgizapp/src/mkcls/Makefile
mgiza-aligner/mgiza/mgizapp/src/mkcls/cmake_install.cmake mgiza-aligner/mgiza/mgizapp/src/mkcls/cmake_install.cmake
__pycache__

View File

@ -1,4 +1,4 @@
- sudo apt-get install postgresql libfcgi-dev libpq-dev mono-complete - sudo apt-get install postgresql libfcgi-dev libpq-dev mono-complete python3-psycopg2
- clone github repo, mkdir build, cd build, ../cmake.sh, make - clone github repo, mkdir build, cd build, ../cmake.sh, make
- sudo -u postgres psql postgres - sudo -u postgres psql postgres
- create user concordia with encrypted password 'concordia'; - create user concordia with encrypted password 'concordia';

View File

@ -213,6 +213,7 @@ h2 {
color: #e32; color: #e32;
font-family:'Gill Sans','lucida grande', helvetica, arial, sans-serif; font-family:'Gill Sans','lucida grande', helvetica, arial, sans-serif;
font-size: 190%; font-size: 190%;
margin-top: 30px;
} }
h3 { h3 {
color: #2c6877; color: #2c6877;
@ -620,6 +621,7 @@ label {
display: block; display: block;
font-size: 110%; font-size: 110%;
margin-bottom:3px; margin-bottom:3px;
margin-top:15px;
} }
input, textarea { input, textarea {
clear: both; clear: both;
@ -685,14 +687,14 @@ form .submit input[type=submit]:hover {
background: #5BA150; background: #5BA150;
} }
/* Form errors */ /* Form errors */
form .error { .error {
background: #FFDACC; background: #FFDACC;
-moz-border-radius: 4px; -moz-border-radius: 4px;
-webkit-border-radius: 4px; -webkit-border-radius: 4px;
border-radius: 4px; border-radius: 4px;
font-weight: normal; font-weight: normal;
} }
form .error-message { .error-message {
-moz-border-radius: none; -moz-border-radius: none;
-webkit-border-radius: none; -webkit-border-radius: none;
border-radius: none; border-radius: none;
@ -702,8 +704,8 @@ form .error-message {
padding-left: 4px; padding-left: 4px;
padding-right: 0; padding-right: 0;
} }
form .error, .error,
form .error-message { .error-message {
color: #9E2424; color: #9E2424;
-webkit-box-shadow: none; -webkit-box-shadow: none;
-moz-box-shadow: none; -moz-box-shadow: none;

View File

@ -1,3 +1,28 @@
function showNewTmOptions() {
showField('tm_name');
hideField('tm_id');
showField('src_lang_id');
showField('trg_lang_id');
}
function showExtendTmOptions() {
hideField('tm_name');
showField('tm_id');
hideField('src_lang_id');
hideField('trg_lang_id');
}
function hideField(fieldId) {
$('#'+fieldId).addClass('hidden');
$('label[for='+fieldId+']').addClass('hidden');
}
function showField(fieldId) {
$('#'+fieldId).removeClass('hidden');
$('label[for='+fieldId+']').removeClass('hidden');
}
function toggleHelp() { function toggleHelp() {
$('#help').toggleClass('hidden'); $('#help').toggleClass('hidden');
} }

View File

@ -3,6 +3,13 @@
import sys, os, shutil, re import sys, os, shutil, re
def config_file(config, file_name, root_dir):
with open(file_name+'_pattern', 'r') as pattern_file, open(root_dir+'/'+file_name, 'w') as out_file:
for line in pattern_file:
for field, value in config.iteritems():
line = re.sub('@'+field+'@', value, line)
out_file.write(line)
root_dir = sys.argv[1] root_dir = sys.argv[1]
if not os.path.exists(root_dir): if not os.path.exists(root_dir):
@ -23,31 +30,16 @@ shutil.copytree('images', root_dir+'/images')
shutil.copy('favicon.ico', root_dir+'/favicon.ico') shutil.copy('favicon.ico', root_dir+'/favicon.ico')
config = dict()
with open('host.cfg', 'r') as host_file: with open('host.cfg', 'r') as host_file:
for line in host_file: for line in host_file:
field, value = line.strip().split('@#@') field, value = line.strip().split('@#@')
if field == 'concordia_host': config[field] = value
concordia_host = value
elif field == 'concordia_port':
concordia_port = value
with open('concordia_gate.php_pattern', 'r') as gate_pattern_file, open(root_dir+'/concordia_gate.php', 'w') as gate_file: config_file(config, 'concordia_gate.php', root_dir)
for line in gate_pattern_file: config_file(config, 'concordia_search.php', root_dir)
line = re.sub('@concordia_host@', concordia_host, line) config_file(config, 'tm_info.php', root_dir)
line = re.sub('@concordia_port@', concordia_port, line) config_file(config, 'tm_manager.php', root_dir)
gate_file.write(line)
with open('concordia_search.php_pattern', 'r') as search_pattern_file, open(root_dir+'/concordia_search.php', 'w') as search_file:
for line in search_pattern_file:
line = re.sub('@concordia_host@', concordia_host, line)
line = re.sub('@concordia_port@', concordia_port, line)
search_file.write(line)
with open('tm_info.php_pattern', 'r') as tm_info_pattern_file, open(root_dir+'/tm_info.php', 'w') as tm_info_file:
for line in tm_info_pattern_file:
line = re.sub('@concordia_host@', concordia_host, line)
line = re.sub('@concordia_port@', concordia_port, line)
tm_info_file.write(line)
versions_dir = 'versions_enabled' versions_dir = 'versions_enabled'

257
cat/tm_manager.php_pattern Normal file
View File

@ -0,0 +1,257 @@
<?php
function postJson($url, $request) {
$options = array(
'http' => array(
'header' => "Content-type: application/x-www-form-urlencoded\r\n",
'method' => 'POST',
'content' => json_encode($request),
),
);
$context = stream_context_create($options);
$response = file_get_contents($url, false, $context);
return json_decode($response);
}
function lineCount($file_name)
{
$linecount = 0;
$handle = fopen($file_name, 'r');
while (!feof($handle)) {
$line = fgets($handle);
++$linecount;
}
fclose($handle);
return $linecount;
}
function addRequest($url, $postArray, $filesArray) {
// add import request. Return empty string if no error occurred
if ($filesArray['src_file']['error'] != 0 && $filesArray['src_file']['size'] > 0) {
return "Error uploading source file or no source file given.";
}
if ($filesArray['trg_file']['error'] != 0 && $filesArray['trg_file']['size'] > 0) {
return "Error uploading target file or no target file given.";
}
$srcFilePath = "/tmp/".uniqid("srcFile", true);
$trgFilePath = "/tmp/".uniqid("trgFile", true);
move_uploaded_file($filesArray['src_file']['tmp_name'], $srcFilePath);
move_uploaded_file($filesArray['trg_file']['tmp_name'], $trgFilePath);
$srcLineCount = lineCount($srcFilePath);
$trgLineCount = lineCount($trgFilePath);
if ($srcLineCount != $trgLineCount) {
return "Files have different number of lines ($srcLineCount and $trgLineCount).";
}
$request = array (
"operation" => "addRequest",
"sourceFilePath" => $srcFilePath,
"targetFilePath" => $trgFilePath,
"sourceLangId" => intval($postArray['src_lang_id']),
"targetLangId" => intval($postArray['trg_lang_id']),
"name" => $postArray['tm_name'],
"type" => intval($postArray['tm_type']),
"tmId" => intval($postArray['tm_id'])
);
$response = postJson($url, $request);
return "";
}
$url = 'http://@concordia_host@:@concordia_port@';
$errorMessage = "";
if ($_SERVER['REQUEST_METHOD'] == 'POST' ) {
$errorMessage = addRequest($url, $_POST, $_FILES);
}
$tmsData = postJson($url, array("operation" =>"getTmsInfo"));
$requestsData = postJson($url, array("operation" =>"getRequestsInfo"));
$languagesData = postJson($url, array("operation" =>"getLanguages"));
?>
<html>
<head>
<script src="js/jquery-1.11.3.min.js"></script>
<script src="js/cat.js"></script>
<link rel="stylesheet" href="css/concordia_cat.css" />
<meta charset="UTF-8">
</head>
<body>
<div id="header">
</div>
<div id="content">
<section id="banner">
<h1>Concordia</h1>
<img class="banner-bg" src="images/banner-thin.png" alt="Banner">
<img class="banner-icon" src="images/concordia-thin.png" alt="Banner">
</section><!-- // end #banner -->
<?php
if ($errorMessage != "") {
?>
<div class="error-message"><?= $errorMessage ?></div>
<?php
}
?>
<h2>Available translation memories:</h2>
<table>
<tr>
<th>Id</th>
<th>Name</th>
<th>Source language</th>
<th>Target language</th>
</tr>
<?php
foreach ($tmsData->tms as $tm) {
?>
<tr>
<td><?= $tm->id ?></td>
<td><?= $tm->name ?></td>
<td><?= $tm->sourceLanguageCode ?></td>
<td><?= $tm->targetLanguageCode ?></td>
</tr>
<?php
}
?>
</table>
<h2>Import new translations</h2>
<form action="" method="POST" enctype="multipart/form-data">
<div class="radio">
<input id="new_tm" type="radio" name="tm_type" value="0" checked onclick="showNewTmOptions()">
<label for="new_tm">Create a new translation memory</label>
<input id="extend_tm" type="radio" name="tm_type" value="1" onclick="showExtendTmOptions()">
<label for="extend_tm">Extend existing translation memory</label>
</div>
<label for="tm_name">Name of the new translation memory:</label>
<input id="tm_name" type="text" name="tm_name">
<label class="hidden" for="tm_id">Choose translation memory to extend:</label>
<select class="hidden" id="tm_id" name="tm_id">
<?php
foreach ($tmsData->tms as $tm) {
?>
<option value="<?=$tm->id?>"><?=$tm->name?> (<?=$tm->sourceLanguageCode?>&rarr;<?=$tm->targetLanguageCode?>)</option>
<?php
}
?>
</select>
<table>
<tr>
<td width="70%">
<label for="src_file">Source file (TXT)</label>
<input id="src_file" name="src_file" type="file">
</td>
<td>
<label for="src_lang_id">Source language</label>
<select id="src_lang_id" name="src_lang_id">
<?php
foreach ($languagesData->languages as $language) {
?>
<option value="<?=$language->id?>"><?=$language->name?> (<?=$language->code?>)</option>
<?php
}
?>
</select>
</td>
</tr>
<tr>
<td>
<label for="trg_file">Target file (TXT)</label>
<input id="trg_file" name="trg_file" type="file">
</td>
<td>
<label for="trg_lang_id">Target language</label>
<select id="trg_lang_id" name="trg_lang_id">
<?php
foreach ($languagesData->languages as $language) {
?>
<option value="<?=$language->id?>"><?=$language->name?> (<?=$language->code?>)</option>
<?php
}
?>
</select>
</td>
</tr>
</table>
<input type="submit" value="Import">
</form>
<h2>Latest import requests:</h2>
<table>
<tr>
<th>Id</th>
<th>Name</th>
<th>Source language</th>
<th>Target language</th>
<th>Status</th>
<th>Type</th>
<th>TM id</th>
<th>Created</th>
</tr>
<?php
foreach ($requestsData->requests as $request) {
?>
<tr>
<td><?= $request->id ?></td>
<td><?= $request->name ?></td>
<td>
<?php
if($request->type == 0) {
echo $request->sourceLanguageCode;
} else {
echo "N/A";
}
?>
</td>
<td>
<?php
if($request->type == 0) {
echo $request->targetLanguageCode;
} else {
echo "N/A";
}
?>
</td>
<td><?= $request->status ?></td>
<td>
<?php
if($request->type == 0) {
echo "new TM";
} else {
echo "extend TM";
}
?>
</td>
<td>
<?php
if($request->type == 0) {
echo "N/A";
} else {
echo $request->tmId;
}
?>
</td>
<td><?= $request->created ?></td>
</tr>
<?php
}
?>
</table>
</div>
</body>
</html>

View File

@ -1 +0,0 @@
../versions_available/europarl_sample.cfg

View File

@ -0,0 +1 @@
../versions_available/stocznia_enpl.cfg

View File

@ -0,0 +1 @@
../versions_available/stocznia_plen.cfg

View File

@ -13,6 +13,8 @@
#include "config.hpp" #include "config.hpp"
#include "logger.hpp" #include "logger.hpp"
#include "tm.hpp" #include "tm.hpp"
#include "request.hpp"
#include "language.hpp"
#include "rapidjson/rapidjson.h" #include "rapidjson/rapidjson.h"
#include <boost/foreach.hpp> #include <boost/foreach.hpp>
#include <boost/ptr_container/ptr_map.hpp> #include <boost/ptr_container/ptr_map.hpp>
@ -125,7 +127,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
} }
} }
_indexController->addAlignedLemmatizedSentences(jsonWriter, sourceSentences, targetSentences, alignmentStrings, tmId); _indexController->addAlignedLemmatizedSentences(jsonWriter, sourceSentences, targetSentences, alignmentStrings, tmId);
} else if (operation == GET_TMS_INFO_PARAM) { } else if (operation == GET_TMS_INFO_OP) {
std::vector<Tm> tms = _tmDAO.getTms(); std::vector<Tm> tms = _tmDAO.getTms();
jsonWriter.StartObject(); jsonWriter.StartObject();
@ -148,6 +150,79 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
jsonWriter.EndArray(); jsonWriter.EndArray();
jsonWriter.EndObject(); jsonWriter.EndObject();
} else if (operation == GET_REQUESTS_INFO_OP) {
std::vector<Request> requests = _requestDAO.getRequests();
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.String("requests");
jsonWriter.StartArray();
BOOST_FOREACH(Request & request, requests) {
jsonWriter.StartObject();
jsonWriter.String("id");
jsonWriter.Int(request.getId());
jsonWriter.String("sourceFilePath");
jsonWriter.String(request.getSourceFilePath().c_str());
jsonWriter.String("targetFilePath");
jsonWriter.String(request.getTargetFilePath().c_str());
jsonWriter.String("name");
jsonWriter.String(request.getName().c_str());
jsonWriter.String("sourceLanguageCode");
jsonWriter.String(request.getSourceLanguageCode().c_str());
jsonWriter.String("targetLanguageCode");
jsonWriter.String(request.getTargetLanguageCode().c_str());
jsonWriter.String("status");
jsonWriter.Int(request.getStatus());
jsonWriter.String("type");
jsonWriter.Int(request.getType());
jsonWriter.String("tmId");
jsonWriter.Int(request.getTmId());
jsonWriter.String("created");
jsonWriter.String(request.getCreated().c_str());
jsonWriter.EndObject();
}
jsonWriter.EndArray();
jsonWriter.EndObject();
} else if (operation == GET_LANGUAGES_OP) {
std::vector<Language> languages = _languageDAO.getLanguages();
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.String("languages");
jsonWriter.StartArray();
BOOST_FOREACH(Language & language, languages) {
jsonWriter.StartObject();
jsonWriter.String("id");
jsonWriter.Int(language.getId());
jsonWriter.String("code");
jsonWriter.String(language.getCode().c_str());
jsonWriter.String("name");
jsonWriter.String(language.getName().c_str());
jsonWriter.EndObject();
}
jsonWriter.EndArray();
jsonWriter.EndObject();
} else if (operation == ADD_REQUEST_OP) {
std::string sourceFilePath = _getStringParameter(d, SOURCE_FILE_PARAM);
std::string targetFilePath = _getStringParameter(d, TARGET_FILE_PARAM);
int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);
std::string name = _getStringParameter(d, NAME_PARAM);
int type = _getIntParameter(d, TYPE_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM);
int newId = _requestDAO.addRequest(sourceFilePath, targetFilePath, sourceLangId, targetLangId, name, type, tmId);
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.String("newRequestId");
jsonWriter.Int(newId);
jsonWriter.EndObject();
} else if (operation == "lemmatize") { } else if (operation == "lemmatize") {
std::string sentence = _getStringParameter(d, "sentence"); std::string sentence = _getStringParameter(d, "sentence");

View File

@ -12,6 +12,8 @@
#include "rapidjson/error/en.h" #include "rapidjson/error/en.h"
#include "tm_dao.hpp" #include "tm_dao.hpp"
#include "request_dao.hpp"
#include "language_dao.hpp"
#include "index_controller.hpp" #include "index_controller.hpp"
#include "searcher_controller.hpp" #include "searcher_controller.hpp"
#include "lemmatizer_facade.hpp" #include "lemmatizer_facade.hpp"
@ -48,6 +50,10 @@ private:
TmDAO _tmDAO; TmDAO _tmDAO;
RequestDAO _requestDAO;
LanguageDAO _languageDAO;
boost::shared_ptr<IndexController> _indexController; boost::shared_ptr<IndexController> _indexController;
boost::shared_ptr<SearcherController> _searcherController; boost::shared_ptr<SearcherController> _searcherController;

View File

@ -21,14 +21,20 @@
#define EXAMPLES_PARAM "examples" #define EXAMPLES_PARAM "examples"
#define SOURCE_LANG_PARAM "sourceLangId" #define SOURCE_LANG_PARAM "sourceLangId"
#define TARGET_LANG_PARAM "targetLangId" #define TARGET_LANG_PARAM "targetLangId"
#define SOURCE_FILE_PARAM "sourceFilePath"
#define TARGET_FILE_PARAM "targetFilePath"
#define NAME_PARAM "name" #define NAME_PARAM "name"
#define TYPE_PARAM "type"
#define INTERVALS_PARAM "intervals" #define INTERVALS_PARAM "intervals"
#define GET_TMS_INFO_PARAM "getTmsInfo"
#define ADD_SENTENCE_OP "addSentence" #define ADD_SENTENCE_OP "addSentence"
#define ADD_SENTENCES_OP "addSentences" #define ADD_SENTENCES_OP "addSentences"
#define ADD_ALIGNED_SENTENCES_OP "addAlignedSentences" #define ADD_ALIGNED_SENTENCES_OP "addAlignedSentences"
#define ADD_ALIGNED_LEMMATIZED_SENTENCES_OP "addAlignedLemmatizedSentences" #define ADD_ALIGNED_LEMMATIZED_SENTENCES_OP "addAlignedLemmatizedSentences"
#define ADD_REQUEST_OP "addRequest"
#define GET_TMS_INFO_OP "getTmsInfo"
#define GET_REQUESTS_INFO_OP "getRequestsInfo"
#define GET_LANGUAGES_OP "getLanguages"
#define REFRESH_INDEX_OP "refreshIndex" #define REFRESH_INDEX_OP "refreshIndex"
#define SIMPLE_SEARCH_OP "simpleSearch" #define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch" #define CONCORDIA_SEARCH_OP "concordiaSearch"

View File

@ -0,0 +1,13 @@
#include "language.hpp"
Language::Language(
const int id,
const std::string & code,
const std::string & name) :
_id(id),
_code(code),
_name(name) {
}
Language::~Language() {
}

View File

@ -0,0 +1,41 @@
#ifndef LANGUAGE_HDR
#define LANGUAGE_HDR
#include <string>
#include <vector>
class Language {
public:
/*! Constructor.
*/
Language(
const int id,
const std::string & code,
const std::string & name
);
/*! Destructor.
*/
virtual ~Language();
int getId() const {
return _id;
}
const std::string & getCode() const {
return _code;
}
const std::string & getName() const {
return _name;
}
private:
int _id;
std::string _code;
std::string _name;
};
#endif

View File

@ -0,0 +1,37 @@
#include "language_dao.hpp"
#include "query_param.hpp"
#include "string_param.hpp"
#include "int_param.hpp"
#include "bool_param.hpp"
#include "int_array_param.hpp"
#include "logger.hpp"
#include <boost/foreach.hpp>
#include <libpq-fe.h>
LanguageDAO::LanguageDAO() {
}
LanguageDAO::~LanguageDAO() {
}
std::vector<Language> LanguageDAO::getLanguages() {
std::vector<Language> result;
DBconnection connection;
connection.startTransaction();
std::string query = "select * from language order by name;";
PGresult * dbResult = connection.execute(query);
for (int i=0;i<connection.getRowCount(dbResult);i++) {
int id = connection.getIntValue(dbResult, i, 0);
std::string code = connection.getStringValue(dbResult, i, 1);
std::string name = connection.getStringValue(dbResult, i, 2);
result.push_back(Language(id, code, name));
}
connection.clearResult(dbResult);
connection.endTransaction();
return result;
}

View File

@ -0,0 +1,27 @@
#ifndef LANGUAGE_DAO_HDR
#define LANGUAGE_DAO_HDR
#include <string>
#include <vector>
#include <utility>
#include <concordia/common/config.hpp>
#include "db_connection.hpp"
#include "language.hpp"
class LanguageDAO {
public:
/*! Constructor.
*/
LanguageDAO();
/*! Destructor.
*/
virtual ~LanguageDAO();
std::vector<Language> getLanguages();
private:
};
#endif

View File

@ -0,0 +1,27 @@
#include "request.hpp"
Request::Request(
const int id,
const std::string & sourceFilePath,
const std::string & targetFilePath,
const std::string & name,
const std::string & sourceLanguageCode,
const std::string & targetLanguageCode,
const int status,
const int type,
const int tmId,
const std::string & created) :
_id(id),
_sourceFilePath(sourceFilePath),
_targetFilePath(targetFilePath),
_name(name),
_sourceLanguageCode(sourceLanguageCode),
_targetLanguageCode(targetLanguageCode),
_status(status),
_type(type),
_tmId(tmId),
_created(created) {
}
Request::~Request() {
}

View File

@ -0,0 +1,89 @@
#ifndef REQUEST_HDR
#define REQUEST_HDR
#include <string>
#include <vector>
class Request {
public:
/*! Constructor.
*/
Request(
const int id,
const std::string & sourceFilePath,
const std::string & targetFilePath,
const std::string & name,
const std::string & sourceLanguageCode,
const std::string & targetLanguageCode,
const int status,
const int type,
const int tm_id,
const std::string & created
);
/*! Destructor.
*/
virtual ~Request();
int getId() const {
return _id;
}
const std::string & getSourceFilePath() const {
return _sourceFilePath;
}
const std::string & getTargetFilePath() const {
return _targetFilePath;
}
const std::string & getName() const {
return _name;
}
const std::string & getSourceLanguageCode() const {
return _sourceLanguageCode;
}
const std::string & getTargetLanguageCode() const {
return _targetLanguageCode;
}
int getStatus() const {
return _status;
}
int getType() const {
return _type;
}
int getTmId() const {
return _tmId;
}
const std::string & getCreated() const {
return _created;
}
private:
int _id;
std::string _sourceFilePath;
std::string _targetFilePath;
std::string _name;
std::string _sourceLanguageCode;
std::string _targetLanguageCode;
int _status;
int _type;
int _tmId;
std::string _created;
};
#endif

View File

@ -0,0 +1,80 @@
#include "request_dao.hpp"
#include "query_param.hpp"
#include "string_param.hpp"
#include "int_param.hpp"
#include "bool_param.hpp"
#include "int_array_param.hpp"
#include "logger.hpp"
#include <boost/foreach.hpp>
#include <libpq-fe.h>
RequestDAO::RequestDAO() {
}
RequestDAO::~RequestDAO() {
}
std::vector<Request> RequestDAO::getRequests() {
std::vector<Request> result;
DBconnection connection;
connection.startTransaction();
std::string query = "select request.id, request.source_file_path, request.target_file_path, request.name, src_lang.code as src_code, trg_lang.code as trg_code, request.status, request.type, request.tm_id, to_char(request.created,'YYYY-MM-DD HH24:MI:SS') from request inner join language as src_lang on src_lang.id = request.source_lang_id inner join language as trg_lang on trg_lang.id = request.target_lang_id order by request.created desc limit 20;";
PGresult * dbResult = connection.execute(query);
for (int i=0;i<connection.getRowCount(dbResult);i++) {
int id = connection.getIntValue(dbResult, i, 0);
std::string sourceFilePath = connection.getStringValue(dbResult, i, 1);
std::string targetFilePath = connection.getStringValue(dbResult, i, 2);
std::string name = connection.getStringValue(dbResult, i, 3);
std::string sourceLanguageCode = connection.getStringValue(dbResult, i, 4);
std::string targetLanguageCode = connection.getStringValue(dbResult, i, 5);
int status = connection.getIntValue(dbResult, i, 6);
int type = connection.getIntValue(dbResult, i, 7);
int tmId = connection.getIntValue(dbResult, i, 8);
std::string created = connection.getStringValue(dbResult, i, 9);
result.push_back(Request(id, sourceFilePath, targetFilePath, name, sourceLanguageCode, targetLanguageCode, status, type, tmId, created));
}
connection.clearResult(dbResult);
connection.endTransaction();
return result;
}
int RequestDAO::addRequest(const std::string sourceFilePath, const std::string targetFilePath, const int sourceLangId, const int targetLangId, const std::string name, int type, int tmId) {
Logger::log("addRequest");
Logger::logString("source file path", sourceFilePath);
Logger::logString("target file path", targetFilePath);
Logger::logInt("source lang id", sourceLangId);
Logger::logInt("target lang id", targetLangId);
Logger::logString("name", name);
Logger::logInt("type", type);
Logger::logInt("tm id", tmId);
DBconnection connection;
connection.startTransaction();
std::string query = "INSERT INTO request(source_file_path, target_file_path, source_lang_id, target_lang_id, name, status, type, tm_id) values($1::text,$2::text,$3::integer,$4::integer,$5::text, $6::integer, $7::integer, $8::integer) RETURNING id";
std::vector<QueryParam*> params;
params.push_back(new StringParam(sourceFilePath));
params.push_back(new StringParam(targetFilePath));
params.push_back(new IntParam(sourceLangId));
params.push_back(new IntParam(targetLangId));
params.push_back(new StringParam(name));
params.push_back(new IntParam(0));
params.push_back(new IntParam(type));
params.push_back(new IntParam(tmId));
PGresult * result = connection.execute(query, params);
int newId = connection.getIntValue(result, 0, 0);
connection.clearResult(result);
connection.endTransaction();
BOOST_FOREACH (QueryParam * param, params) {
delete param;
}
return newId;
}

View File

@ -0,0 +1,29 @@
#ifndef REQUEST_DAO_HDR
#define REQUEST_DAO_HDR
#include <string>
#include <vector>
#include <utility>
#include <concordia/common/config.hpp>
#include "db_connection.hpp"
#include "request.hpp"
class RequestDAO {
public:
/*! Constructor.
*/
RequestDAO();
/*! Destructor.
*/
virtual ~RequestDAO();
int addRequest(const std::string sourceFilePath, const std::string targetFilePath, const int sourceLangId, const int targetLangId, const std::string name, int type, int tmId);
std::vector<Request> getRequests();
private:
};
#endif

View File

@ -7,6 +7,21 @@ CREATE TABLE tm (
lemmatized bool DEFAULT false lemmatized bool DEFAULT false
); );
DROP TABLE IF EXISTS request;
CREATE TABLE request (
id SERIAL PRIMARY KEY,
source_file_path varchar(100),
target_file_path varchar(100),
source_lang_id integer,
target_lang_id integer,
name varchar(40),
status integer,
type integer,
tm_id integer,
created timestamp default now()
);
DROP TABLE IF EXISTS language; DROP TABLE IF EXISTS language;
CREATE TABLE language ( CREATE TABLE language (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,

View File

@ -0,0 +1,115 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import psycopg2, os, shutil, subprocess, urllib3, time, json
from importlib.machinery import SourceFileLoader
BUFFER_SIZE = 500
def postJson(address, data):
http = urllib3.PoolManager()
response = http.request('POST', address, headers={'Content-Type': 'application/json'},body=json.dumps(data).encode('utf-8'))
return json.loads(response.data.decode('utf-8'))
def add_examples(address, examplesData):
response = postJson(address, examplesData)
if response['status'] == 'error':
raise Exception(response['message'])
def file_len(fname):
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1
mgiza_path = os.path.dirname(os.path.realpath(__file__))+'/../mgiza-aligner'
conn = psycopg2.connect("dbname='concordia_server' user='concordia' host='localhost' port='6543' password='concordia'")
cur = conn.cursor()
cur.execute("""select request.id, request.source_file_path, request.target_file_path, request.name, src_lang.id as src_lang_id, src_lang.code as src_code, trg_lang.id as trg_lang_id, trg_lang.code as trg_code, request.status, request.type, request.tm_id from request inner join language as src_lang on src_lang.id = request.source_lang_id inner join language as trg_lang on trg_lang.id = request.target_lang_id order by request.created limit 1""")
request = cur.fetchone()
request_id, src_file_path, trg_file_path, tm_name, src_lang_id, src_lang_code, trg_lang_id, trg_lang_code, status, tm_type, tm_id = request
request_corpus_path = mgiza_path+'/corpora/request_'+str(request_id)
os.makedirs(request_corpus_path)
shutil.copy(src_file_path, request_corpus_path+'/src.txt')
shutil.copy(trg_file_path, request_corpus_path+'/trg.txt')
subprocess.run(["make","SRC_LANG="+src_lang_code, "TRG_LANG="+trg_lang_code, "CORPUS_NAME=request_"+str(request_id)], cwd=mgiza_path)
cur.close()
conn.close()
host = SourceFileLoader("host", os.path.dirname(os.path.realpath(__file__))+'/../tests/host.py').load_module()
address = 'http://'+host.concordia_host
if len(host.concordia_port) > 0:
address += ':'+host.concordia_port
sourceFile = request_corpus_path+'/src_final.txt'
targetFile = request_corpus_path+'/trg_final.txt'
alignmentsFile = request_corpus_path+'/aligned_final.txt'
if (file_len(sourceFile) != file_len(targetFile)):
raise Exception("source and target files are not of the same length!")
if (file_len(alignmentsFile) != 3*file_len(sourceFile)):
raise Exception("alignments file is not exactly 3 times longer than source and target")
data = {
'operation': 'addTm',
'sourceLangId':src_lang_id,
'targetLangId':trg_lang_id,
'name':tm_name,
'tmLemmatized':True
}
response = postJson(address, data)
tmId = int(response['newTmId'])
print("Added new tm: %d" % tmId)
data = {
'operation': 'addAlignedLemmatizedSentences',
'tmId':tmId
}
examples = []
with open(sourceFile) as sf, open(targetFile) as tf, open(alignmentsFile) as af:
for sourceLine in sf:
sourceSentence = sourceLine.strip()
targetSentence = tf.readline().strip()
# skip to lines of the alignments file, these are lemmatized and we need the raw sentences from the source and target files.
af.readline()
af.readline()
alignmentString = af.readline().strip()
examples.append([sourceSentence, targetSentence, alignmentString])
if len(examples) >= BUFFER_SIZE:
data['examples'] = examples
add_examples(address, data)
examples = []
if len(examples) > 0:
data['examples'] = examples
add_examples(address, data)
print("Generating index...")
start = time.time()
data = {
'operation': 'refreshIndex',
'tmId' : tmId
}
response = postJson(address, data)
end = time.time()
print("Index regeneration complete. The operation took %.4f s" % (end - start))

12
tests/addStocznia.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/sh
./addTm.py 1 2 placeholder 1
./addAlignedLemmatizedTM.py stocznia_plen ../mgiza-aligner/corpora/stocznia_plen/src_final.txt 1 ../mgiza-aligner/corpora/stocznia_plen/trg_final.txt 2 ../mgiza-aligner/corpora/stocznia_plen/aligned_final.txt
./addTm.py 1 2 placeholder 1
./addTm.py 1 2 placeholder 1
./addAlignedLemmatizedTM.py stocznia_enpl ../mgiza-aligner/corpora/stocznia_enpl/src_final.txt 2 ../mgiza-aligner/corpora/stocznia_enpl/trg_final.txt 1 ../mgiza-aligner/corpora/stocznia_enpl/aligned_final.txt