import requests
This commit is contained in:
parent
d9ef86b4d4
commit
efa2bc2416
1
.gitignore
vendored
1
.gitignore
vendored
@ -34,3 +34,4 @@ mgiza-aligner/mgiza/mgizapp/src/mkcls/-MT
|
||||
mgiza-aligner/mgiza/mgizapp/src/mkcls/CMakeFiles/
|
||||
mgiza-aligner/mgiza/mgizapp/src/mkcls/Makefile
|
||||
mgiza-aligner/mgiza/mgizapp/src/mkcls/cmake_install.cmake
|
||||
__pycache__
|
||||
|
@ -1,4 +1,4 @@
|
||||
- sudo apt-get install postgresql libfcgi-dev libpq-dev mono-complete
|
||||
- sudo apt-get install postgresql libfcgi-dev libpq-dev mono-complete python3-psycopg2
|
||||
- clone github repo, mkdir build, cd build, ../cmake.sh, make
|
||||
- sudo -u postgres psql postgres
|
||||
- create user concordia with encrypted password 'concordia';
|
||||
|
@ -213,6 +213,7 @@ h2 {
|
||||
color: #e32;
|
||||
font-family:'Gill Sans','lucida grande', helvetica, arial, sans-serif;
|
||||
font-size: 190%;
|
||||
margin-top: 30px;
|
||||
}
|
||||
h3 {
|
||||
color: #2c6877;
|
||||
@ -620,6 +621,7 @@ label {
|
||||
display: block;
|
||||
font-size: 110%;
|
||||
margin-bottom:3px;
|
||||
margin-top:15px;
|
||||
}
|
||||
input, textarea {
|
||||
clear: both;
|
||||
@ -685,14 +687,14 @@ form .submit input[type=submit]:hover {
|
||||
background: #5BA150;
|
||||
}
|
||||
/* Form errors */
|
||||
form .error {
|
||||
.error {
|
||||
background: #FFDACC;
|
||||
-moz-border-radius: 4px;
|
||||
-webkit-border-radius: 4px;
|
||||
border-radius: 4px;
|
||||
font-weight: normal;
|
||||
}
|
||||
form .error-message {
|
||||
.error-message {
|
||||
-moz-border-radius: none;
|
||||
-webkit-border-radius: none;
|
||||
border-radius: none;
|
||||
@ -702,8 +704,8 @@ form .error-message {
|
||||
padding-left: 4px;
|
||||
padding-right: 0;
|
||||
}
|
||||
form .error,
|
||||
form .error-message {
|
||||
.error,
|
||||
.error-message {
|
||||
color: #9E2424;
|
||||
-webkit-box-shadow: none;
|
||||
-moz-box-shadow: none;
|
||||
|
@ -1,3 +1,28 @@
|
||||
function showNewTmOptions() {
|
||||
showField('tm_name');
|
||||
hideField('tm_id');
|
||||
showField('src_lang_id');
|
||||
showField('trg_lang_id');
|
||||
}
|
||||
|
||||
function showExtendTmOptions() {
|
||||
hideField('tm_name');
|
||||
showField('tm_id');
|
||||
hideField('src_lang_id');
|
||||
hideField('trg_lang_id');
|
||||
}
|
||||
|
||||
function hideField(fieldId) {
|
||||
$('#'+fieldId).addClass('hidden');
|
||||
$('label[for='+fieldId+']').addClass('hidden');
|
||||
}
|
||||
|
||||
function showField(fieldId) {
|
||||
$('#'+fieldId).removeClass('hidden');
|
||||
$('label[for='+fieldId+']').removeClass('hidden');
|
||||
}
|
||||
|
||||
|
||||
function toggleHelp() {
|
||||
$('#help').toggleClass('hidden');
|
||||
}
|
||||
|
@ -3,6 +3,13 @@
|
||||
|
||||
import sys, os, shutil, re
|
||||
|
||||
def config_file(config, file_name, root_dir):
|
||||
with open(file_name+'_pattern', 'r') as pattern_file, open(root_dir+'/'+file_name, 'w') as out_file:
|
||||
for line in pattern_file:
|
||||
for field, value in config.iteritems():
|
||||
line = re.sub('@'+field+'@', value, line)
|
||||
out_file.write(line)
|
||||
|
||||
root_dir = sys.argv[1]
|
||||
|
||||
if not os.path.exists(root_dir):
|
||||
@ -23,31 +30,16 @@ shutil.copytree('images', root_dir+'/images')
|
||||
shutil.copy('favicon.ico', root_dir+'/favicon.ico')
|
||||
|
||||
|
||||
config = dict()
|
||||
with open('host.cfg', 'r') as host_file:
|
||||
for line in host_file:
|
||||
field, value = line.strip().split('@#@')
|
||||
if field == 'concordia_host':
|
||||
concordia_host = value
|
||||
elif field == 'concordia_port':
|
||||
concordia_port = value
|
||||
config[field] = value
|
||||
|
||||
with open('concordia_gate.php_pattern', 'r') as gate_pattern_file, open(root_dir+'/concordia_gate.php', 'w') as gate_file:
|
||||
for line in gate_pattern_file:
|
||||
line = re.sub('@concordia_host@', concordia_host, line)
|
||||
line = re.sub('@concordia_port@', concordia_port, line)
|
||||
gate_file.write(line)
|
||||
|
||||
with open('concordia_search.php_pattern', 'r') as search_pattern_file, open(root_dir+'/concordia_search.php', 'w') as search_file:
|
||||
for line in search_pattern_file:
|
||||
line = re.sub('@concordia_host@', concordia_host, line)
|
||||
line = re.sub('@concordia_port@', concordia_port, line)
|
||||
search_file.write(line)
|
||||
|
||||
with open('tm_info.php_pattern', 'r') as tm_info_pattern_file, open(root_dir+'/tm_info.php', 'w') as tm_info_file:
|
||||
for line in tm_info_pattern_file:
|
||||
line = re.sub('@concordia_host@', concordia_host, line)
|
||||
line = re.sub('@concordia_port@', concordia_port, line)
|
||||
tm_info_file.write(line)
|
||||
config_file(config, 'concordia_gate.php', root_dir)
|
||||
config_file(config, 'concordia_search.php', root_dir)
|
||||
config_file(config, 'tm_info.php', root_dir)
|
||||
config_file(config, 'tm_manager.php', root_dir)
|
||||
|
||||
|
||||
versions_dir = 'versions_enabled'
|
||||
|
257
cat/tm_manager.php_pattern
Normal file
257
cat/tm_manager.php_pattern
Normal file
@ -0,0 +1,257 @@
|
||||
<?php
|
||||
|
||||
function postJson($url, $request) {
|
||||
$options = array(
|
||||
'http' => array(
|
||||
'header' => "Content-type: application/x-www-form-urlencoded\r\n",
|
||||
'method' => 'POST',
|
||||
'content' => json_encode($request),
|
||||
),
|
||||
);
|
||||
$context = stream_context_create($options);
|
||||
$response = file_get_contents($url, false, $context);
|
||||
|
||||
|
||||
return json_decode($response);
|
||||
|
||||
}
|
||||
|
||||
function lineCount($file_name)
|
||||
{
|
||||
$linecount = 0;
|
||||
$handle = fopen($file_name, 'r');
|
||||
while (!feof($handle)) {
|
||||
$line = fgets($handle);
|
||||
++$linecount;
|
||||
}
|
||||
|
||||
fclose($handle);
|
||||
|
||||
return $linecount;
|
||||
}
|
||||
|
||||
function addRequest($url, $postArray, $filesArray) {
|
||||
// add import request. Return empty string if no error occurred
|
||||
if ($filesArray['src_file']['error'] != 0 && $filesArray['src_file']['size'] > 0) {
|
||||
return "Error uploading source file or no source file given.";
|
||||
}
|
||||
if ($filesArray['trg_file']['error'] != 0 && $filesArray['trg_file']['size'] > 0) {
|
||||
return "Error uploading target file or no target file given.";
|
||||
}
|
||||
|
||||
|
||||
$srcFilePath = "/tmp/".uniqid("srcFile", true);
|
||||
$trgFilePath = "/tmp/".uniqid("trgFile", true);
|
||||
move_uploaded_file($filesArray['src_file']['tmp_name'], $srcFilePath);
|
||||
move_uploaded_file($filesArray['trg_file']['tmp_name'], $trgFilePath);
|
||||
|
||||
$srcLineCount = lineCount($srcFilePath);
|
||||
$trgLineCount = lineCount($trgFilePath);
|
||||
|
||||
if ($srcLineCount != $trgLineCount) {
|
||||
return "Files have different number of lines ($srcLineCount and $trgLineCount).";
|
||||
}
|
||||
|
||||
$request = array (
|
||||
"operation" => "addRequest",
|
||||
"sourceFilePath" => $srcFilePath,
|
||||
"targetFilePath" => $trgFilePath,
|
||||
"sourceLangId" => intval($postArray['src_lang_id']),
|
||||
"targetLangId" => intval($postArray['trg_lang_id']),
|
||||
"name" => $postArray['tm_name'],
|
||||
"type" => intval($postArray['tm_type']),
|
||||
"tmId" => intval($postArray['tm_id'])
|
||||
);
|
||||
$response = postJson($url, $request);
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
$url = 'http://@concordia_host@:@concordia_port@';
|
||||
|
||||
$errorMessage = "";
|
||||
if ($_SERVER['REQUEST_METHOD'] == 'POST' ) {
|
||||
$errorMessage = addRequest($url, $_POST, $_FILES);
|
||||
}
|
||||
|
||||
|
||||
|
||||
$tmsData = postJson($url, array("operation" =>"getTmsInfo"));
|
||||
$requestsData = postJson($url, array("operation" =>"getRequestsInfo"));
|
||||
$languagesData = postJson($url, array("operation" =>"getLanguages"));
|
||||
|
||||
?>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<script src="js/jquery-1.11.3.min.js"></script>
|
||||
<script src="js/cat.js"></script>
|
||||
<link rel="stylesheet" href="css/concordia_cat.css" />
|
||||
<meta charset="UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
<div id="header">
|
||||
</div>
|
||||
<div id="content">
|
||||
<section id="banner">
|
||||
<h1>Concordia</h1>
|
||||
<img class="banner-bg" src="images/banner-thin.png" alt="Banner">
|
||||
<img class="banner-icon" src="images/concordia-thin.png" alt="Banner">
|
||||
</section><!-- // end #banner -->
|
||||
<?php
|
||||
if ($errorMessage != "") {
|
||||
?>
|
||||
<div class="error-message"><?= $errorMessage ?></div>
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
|
||||
<h2>Available translation memories:</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Id</th>
|
||||
<th>Name</th>
|
||||
<th>Source language</th>
|
||||
<th>Target language</th>
|
||||
</tr>
|
||||
<?php
|
||||
foreach ($tmsData->tms as $tm) {
|
||||
?>
|
||||
<tr>
|
||||
<td><?= $tm->id ?></td>
|
||||
<td><?= $tm->name ?></td>
|
||||
<td><?= $tm->sourceLanguageCode ?></td>
|
||||
<td><?= $tm->targetLanguageCode ?></td>
|
||||
</tr>
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
|
||||
</table>
|
||||
<h2>Import new translations</h2>
|
||||
<form action="" method="POST" enctype="multipart/form-data">
|
||||
<div class="radio">
|
||||
<input id="new_tm" type="radio" name="tm_type" value="0" checked onclick="showNewTmOptions()">
|
||||
<label for="new_tm">Create a new translation memory</label>
|
||||
<input id="extend_tm" type="radio" name="tm_type" value="1" onclick="showExtendTmOptions()">
|
||||
<label for="extend_tm">Extend existing translation memory</label>
|
||||
</div>
|
||||
<label for="tm_name">Name of the new translation memory:</label>
|
||||
<input id="tm_name" type="text" name="tm_name">
|
||||
<label class="hidden" for="tm_id">Choose translation memory to extend:</label>
|
||||
<select class="hidden" id="tm_id" name="tm_id">
|
||||
<?php
|
||||
foreach ($tmsData->tms as $tm) {
|
||||
?>
|
||||
<option value="<?=$tm->id?>"><?=$tm->name?> (<?=$tm->sourceLanguageCode?>→<?=$tm->targetLanguageCode?>)</option>
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
</select>
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td width="70%">
|
||||
<label for="src_file">Source file (TXT)</label>
|
||||
<input id="src_file" name="src_file" type="file">
|
||||
</td>
|
||||
<td>
|
||||
<label for="src_lang_id">Source language</label>
|
||||
<select id="src_lang_id" name="src_lang_id">
|
||||
<?php
|
||||
foreach ($languagesData->languages as $language) {
|
||||
?>
|
||||
<option value="<?=$language->id?>"><?=$language->name?> (<?=$language->code?>)</option>
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
</select>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
<label for="trg_file">Target file (TXT)</label>
|
||||
<input id="trg_file" name="trg_file" type="file">
|
||||
</td>
|
||||
<td>
|
||||
<label for="trg_lang_id">Target language</label>
|
||||
<select id="trg_lang_id" name="trg_lang_id">
|
||||
<?php
|
||||
foreach ($languagesData->languages as $language) {
|
||||
?>
|
||||
<option value="<?=$language->id?>"><?=$language->name?> (<?=$language->code?>)</option>
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
</select>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<input type="submit" value="Import">
|
||||
</form>
|
||||
<h2>Latest import requests:</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Id</th>
|
||||
<th>Name</th>
|
||||
<th>Source language</th>
|
||||
<th>Target language</th>
|
||||
<th>Status</th>
|
||||
<th>Type</th>
|
||||
<th>TM id</th>
|
||||
<th>Created</th>
|
||||
</tr>
|
||||
<?php
|
||||
foreach ($requestsData->requests as $request) {
|
||||
?>
|
||||
<tr>
|
||||
<td><?= $request->id ?></td>
|
||||
<td><?= $request->name ?></td>
|
||||
<td>
|
||||
<?php
|
||||
if($request->type == 0) {
|
||||
echo $request->sourceLanguageCode;
|
||||
} else {
|
||||
echo "N/A";
|
||||
}
|
||||
?>
|
||||
</td>
|
||||
<td>
|
||||
<?php
|
||||
if($request->type == 0) {
|
||||
echo $request->targetLanguageCode;
|
||||
} else {
|
||||
echo "N/A";
|
||||
}
|
||||
?>
|
||||
</td>
|
||||
<td><?= $request->status ?></td>
|
||||
<td>
|
||||
<?php
|
||||
if($request->type == 0) {
|
||||
echo "new TM";
|
||||
} else {
|
||||
echo "extend TM";
|
||||
}
|
||||
?>
|
||||
</td>
|
||||
<td>
|
||||
<?php
|
||||
if($request->type == 0) {
|
||||
echo "N/A";
|
||||
} else {
|
||||
echo $request->tmId;
|
||||
}
|
||||
?>
|
||||
</td>
|
||||
<td><?= $request->created ?></td>
|
||||
</tr>
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
|
||||
</table>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@ -1 +0,0 @@
|
||||
../versions_available/europarl_sample.cfg
|
1
cat/versions_enabled/stocznia_enpl.cfg
Symbolic link
1
cat/versions_enabled/stocznia_enpl.cfg
Symbolic link
@ -0,0 +1 @@
|
||||
../versions_available/stocznia_enpl.cfg
|
1
cat/versions_enabled/stocznia_plen.cfg
Symbolic link
1
cat/versions_enabled/stocznia_plen.cfg
Symbolic link
@ -0,0 +1 @@
|
||||
../versions_available/stocznia_plen.cfg
|
@ -13,6 +13,8 @@
|
||||
#include "config.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "tm.hpp"
|
||||
#include "request.hpp"
|
||||
#include "language.hpp"
|
||||
#include "rapidjson/rapidjson.h"
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
@ -125,7 +127,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
}
|
||||
}
|
||||
_indexController->addAlignedLemmatizedSentences(jsonWriter, sourceSentences, targetSentences, alignmentStrings, tmId);
|
||||
} else if (operation == GET_TMS_INFO_PARAM) {
|
||||
} else if (operation == GET_TMS_INFO_OP) {
|
||||
std::vector<Tm> tms = _tmDAO.getTms();
|
||||
|
||||
jsonWriter.StartObject();
|
||||
@ -148,6 +150,79 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
|
||||
} else if (operation == GET_REQUESTS_INFO_OP) {
|
||||
std::vector<Request> requests = _requestDAO.getRequests();
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.String("requests");
|
||||
jsonWriter.StartArray();
|
||||
BOOST_FOREACH(Request & request, requests) {
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("id");
|
||||
jsonWriter.Int(request.getId());
|
||||
jsonWriter.String("sourceFilePath");
|
||||
jsonWriter.String(request.getSourceFilePath().c_str());
|
||||
jsonWriter.String("targetFilePath");
|
||||
jsonWriter.String(request.getTargetFilePath().c_str());
|
||||
jsonWriter.String("name");
|
||||
jsonWriter.String(request.getName().c_str());
|
||||
jsonWriter.String("sourceLanguageCode");
|
||||
jsonWriter.String(request.getSourceLanguageCode().c_str());
|
||||
jsonWriter.String("targetLanguageCode");
|
||||
jsonWriter.String(request.getTargetLanguageCode().c_str());
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.Int(request.getStatus());
|
||||
jsonWriter.String("type");
|
||||
jsonWriter.Int(request.getType());
|
||||
jsonWriter.String("tmId");
|
||||
jsonWriter.Int(request.getTmId());
|
||||
jsonWriter.String("created");
|
||||
jsonWriter.String(request.getCreated().c_str());
|
||||
jsonWriter.EndObject();
|
||||
}
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
|
||||
|
||||
} else if (operation == GET_LANGUAGES_OP) {
|
||||
std::vector<Language> languages = _languageDAO.getLanguages();
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.String("languages");
|
||||
jsonWriter.StartArray();
|
||||
BOOST_FOREACH(Language & language, languages) {
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("id");
|
||||
jsonWriter.Int(language.getId());
|
||||
jsonWriter.String("code");
|
||||
jsonWriter.String(language.getCode().c_str());
|
||||
jsonWriter.String("name");
|
||||
jsonWriter.String(language.getName().c_str());
|
||||
jsonWriter.EndObject();
|
||||
}
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
|
||||
|
||||
} else if (operation == ADD_REQUEST_OP) {
|
||||
std::string sourceFilePath = _getStringParameter(d, SOURCE_FILE_PARAM);
|
||||
std::string targetFilePath = _getStringParameter(d, TARGET_FILE_PARAM);
|
||||
int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
|
||||
int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);
|
||||
std::string name = _getStringParameter(d, NAME_PARAM);
|
||||
int type = _getIntParameter(d, TYPE_PARAM);
|
||||
int tmId = _getIntParameter(d, TM_ID_PARAM);
|
||||
int newId = _requestDAO.addRequest(sourceFilePath, targetFilePath, sourceLangId, targetLangId, name, type, tmId);
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.String("newRequestId");
|
||||
jsonWriter.Int(newId);
|
||||
jsonWriter.EndObject();
|
||||
|
||||
} else if (operation == "lemmatize") {
|
||||
std::string sentence = _getStringParameter(d, "sentence");
|
||||
|
@ -12,6 +12,8 @@
|
||||
#include "rapidjson/error/en.h"
|
||||
|
||||
#include "tm_dao.hpp"
|
||||
#include "request_dao.hpp"
|
||||
#include "language_dao.hpp"
|
||||
#include "index_controller.hpp"
|
||||
#include "searcher_controller.hpp"
|
||||
#include "lemmatizer_facade.hpp"
|
||||
@ -48,6 +50,10 @@ private:
|
||||
|
||||
TmDAO _tmDAO;
|
||||
|
||||
RequestDAO _requestDAO;
|
||||
|
||||
LanguageDAO _languageDAO;
|
||||
|
||||
boost::shared_ptr<IndexController> _indexController;
|
||||
|
||||
boost::shared_ptr<SearcherController> _searcherController;
|
||||
|
@ -21,14 +21,20 @@
|
||||
#define EXAMPLES_PARAM "examples"
|
||||
#define SOURCE_LANG_PARAM "sourceLangId"
|
||||
#define TARGET_LANG_PARAM "targetLangId"
|
||||
#define SOURCE_FILE_PARAM "sourceFilePath"
|
||||
#define TARGET_FILE_PARAM "targetFilePath"
|
||||
#define NAME_PARAM "name"
|
||||
#define TYPE_PARAM "type"
|
||||
#define INTERVALS_PARAM "intervals"
|
||||
#define GET_TMS_INFO_PARAM "getTmsInfo"
|
||||
|
||||
#define ADD_SENTENCE_OP "addSentence"
|
||||
#define ADD_SENTENCES_OP "addSentences"
|
||||
#define ADD_ALIGNED_SENTENCES_OP "addAlignedSentences"
|
||||
#define ADD_ALIGNED_LEMMATIZED_SENTENCES_OP "addAlignedLemmatizedSentences"
|
||||
#define ADD_REQUEST_OP "addRequest"
|
||||
#define GET_TMS_INFO_OP "getTmsInfo"
|
||||
#define GET_REQUESTS_INFO_OP "getRequestsInfo"
|
||||
#define GET_LANGUAGES_OP "getLanguages"
|
||||
#define REFRESH_INDEX_OP "refreshIndex"
|
||||
#define SIMPLE_SEARCH_OP "simpleSearch"
|
||||
#define CONCORDIA_SEARCH_OP "concordiaSearch"
|
||||
|
13
concordia-server/language.cpp
Normal file
13
concordia-server/language.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
#include "language.hpp"
|
||||
|
||||
Language::Language(
|
||||
const int id,
|
||||
const std::string & code,
|
||||
const std::string & name) :
|
||||
_id(id),
|
||||
_code(code),
|
||||
_name(name) {
|
||||
}
|
||||
|
||||
Language::~Language() {
|
||||
}
|
41
concordia-server/language.hpp
Normal file
41
concordia-server/language.hpp
Normal file
@ -0,0 +1,41 @@
|
||||
#ifndef LANGUAGE_HDR
|
||||
#define LANGUAGE_HDR
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class Language {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
Language(
|
||||
const int id,
|
||||
const std::string & code,
|
||||
const std::string & name
|
||||
);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~Language();
|
||||
|
||||
int getId() const {
|
||||
return _id;
|
||||
}
|
||||
|
||||
const std::string & getCode() const {
|
||||
return _code;
|
||||
}
|
||||
|
||||
const std::string & getName() const {
|
||||
return _name;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
int _id;
|
||||
|
||||
std::string _code;
|
||||
|
||||
std::string _name;
|
||||
};
|
||||
|
||||
#endif
|
37
concordia-server/language_dao.cpp
Normal file
37
concordia-server/language_dao.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
#include "language_dao.hpp"
|
||||
|
||||
#include "query_param.hpp"
|
||||
#include "string_param.hpp"
|
||||
#include "int_param.hpp"
|
||||
#include "bool_param.hpp"
|
||||
#include "int_array_param.hpp"
|
||||
#include "logger.hpp"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include <libpq-fe.h>
|
||||
|
||||
LanguageDAO::LanguageDAO() {
|
||||
}
|
||||
|
||||
LanguageDAO::~LanguageDAO() {
|
||||
}
|
||||
|
||||
std::vector<Language> LanguageDAO::getLanguages() {
|
||||
std::vector<Language> result;
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
std::string query = "select * from language order by name;";
|
||||
PGresult * dbResult = connection.execute(query);
|
||||
for (int i=0;i<connection.getRowCount(dbResult);i++) {
|
||||
int id = connection.getIntValue(dbResult, i, 0);
|
||||
std::string code = connection.getStringValue(dbResult, i, 1);
|
||||
std::string name = connection.getStringValue(dbResult, i, 2);
|
||||
|
||||
result.push_back(Language(id, code, name));
|
||||
}
|
||||
connection.clearResult(dbResult);
|
||||
connection.endTransaction();
|
||||
|
||||
return result;
|
||||
|
||||
}
|
27
concordia-server/language_dao.hpp
Normal file
27
concordia-server/language_dao.hpp
Normal file
@ -0,0 +1,27 @@
|
||||
#ifndef LANGUAGE_DAO_HDR
|
||||
#define LANGUAGE_DAO_HDR
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include <concordia/common/config.hpp>
|
||||
#include "db_connection.hpp"
|
||||
#include "language.hpp"
|
||||
|
||||
class LanguageDAO {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
LanguageDAO();
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~LanguageDAO();
|
||||
|
||||
std::vector<Language> getLanguages();
|
||||
|
||||
private:
|
||||
|
||||
};
|
||||
|
||||
#endif
|
27
concordia-server/request.cpp
Normal file
27
concordia-server/request.cpp
Normal file
@ -0,0 +1,27 @@
|
||||
#include "request.hpp"
|
||||
|
||||
Request::Request(
|
||||
const int id,
|
||||
const std::string & sourceFilePath,
|
||||
const std::string & targetFilePath,
|
||||
const std::string & name,
|
||||
const std::string & sourceLanguageCode,
|
||||
const std::string & targetLanguageCode,
|
||||
const int status,
|
||||
const int type,
|
||||
const int tmId,
|
||||
const std::string & created) :
|
||||
_id(id),
|
||||
_sourceFilePath(sourceFilePath),
|
||||
_targetFilePath(targetFilePath),
|
||||
_name(name),
|
||||
_sourceLanguageCode(sourceLanguageCode),
|
||||
_targetLanguageCode(targetLanguageCode),
|
||||
_status(status),
|
||||
_type(type),
|
||||
_tmId(tmId),
|
||||
_created(created) {
|
||||
}
|
||||
|
||||
Request::~Request() {
|
||||
}
|
89
concordia-server/request.hpp
Normal file
89
concordia-server/request.hpp
Normal file
@ -0,0 +1,89 @@
|
||||
#ifndef REQUEST_HDR
|
||||
#define REQUEST_HDR
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class Request {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
Request(
|
||||
const int id,
|
||||
const std::string & sourceFilePath,
|
||||
const std::string & targetFilePath,
|
||||
const std::string & name,
|
||||
const std::string & sourceLanguageCode,
|
||||
const std::string & targetLanguageCode,
|
||||
const int status,
|
||||
const int type,
|
||||
const int tm_id,
|
||||
const std::string & created
|
||||
);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~Request();
|
||||
|
||||
int getId() const {
|
||||
return _id;
|
||||
}
|
||||
|
||||
const std::string & getSourceFilePath() const {
|
||||
return _sourceFilePath;
|
||||
}
|
||||
|
||||
const std::string & getTargetFilePath() const {
|
||||
return _targetFilePath;
|
||||
}
|
||||
|
||||
const std::string & getName() const {
|
||||
return _name;
|
||||
}
|
||||
|
||||
const std::string & getSourceLanguageCode() const {
|
||||
return _sourceLanguageCode;
|
||||
}
|
||||
|
||||
const std::string & getTargetLanguageCode() const {
|
||||
return _targetLanguageCode;
|
||||
}
|
||||
|
||||
int getStatus() const {
|
||||
return _status;
|
||||
}
|
||||
|
||||
int getType() const {
|
||||
return _type;
|
||||
}
|
||||
|
||||
int getTmId() const {
|
||||
return _tmId;
|
||||
}
|
||||
|
||||
const std::string & getCreated() const {
|
||||
return _created;
|
||||
}
|
||||
|
||||
private:
|
||||
int _id;
|
||||
|
||||
std::string _sourceFilePath;
|
||||
|
||||
std::string _targetFilePath;
|
||||
|
||||
std::string _name;
|
||||
|
||||
std::string _sourceLanguageCode;
|
||||
|
||||
std::string _targetLanguageCode;
|
||||
|
||||
int _status;
|
||||
|
||||
int _type;
|
||||
|
||||
int _tmId;
|
||||
|
||||
std::string _created;
|
||||
};
|
||||
|
||||
#endif
|
80
concordia-server/request_dao.cpp
Normal file
80
concordia-server/request_dao.cpp
Normal file
@ -0,0 +1,80 @@
|
||||
#include "request_dao.hpp"
|
||||
|
||||
#include "query_param.hpp"
|
||||
#include "string_param.hpp"
|
||||
#include "int_param.hpp"
|
||||
#include "bool_param.hpp"
|
||||
#include "int_array_param.hpp"
|
||||
#include "logger.hpp"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include <libpq-fe.h>
|
||||
|
||||
RequestDAO::RequestDAO() {
|
||||
}
|
||||
|
||||
RequestDAO::~RequestDAO() {
|
||||
}
|
||||
|
||||
std::vector<Request> RequestDAO::getRequests() {
|
||||
std::vector<Request> result;
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
std::string query = "select request.id, request.source_file_path, request.target_file_path, request.name, src_lang.code as src_code, trg_lang.code as trg_code, request.status, request.type, request.tm_id, to_char(request.created,'YYYY-MM-DD HH24:MI:SS') from request inner join language as src_lang on src_lang.id = request.source_lang_id inner join language as trg_lang on trg_lang.id = request.target_lang_id order by request.created desc limit 20;";
|
||||
PGresult * dbResult = connection.execute(query);
|
||||
for (int i=0;i<connection.getRowCount(dbResult);i++) {
|
||||
int id = connection.getIntValue(dbResult, i, 0);
|
||||
std::string sourceFilePath = connection.getStringValue(dbResult, i, 1);
|
||||
std::string targetFilePath = connection.getStringValue(dbResult, i, 2);
|
||||
std::string name = connection.getStringValue(dbResult, i, 3);
|
||||
std::string sourceLanguageCode = connection.getStringValue(dbResult, i, 4);
|
||||
std::string targetLanguageCode = connection.getStringValue(dbResult, i, 5);
|
||||
int status = connection.getIntValue(dbResult, i, 6);
|
||||
int type = connection.getIntValue(dbResult, i, 7);
|
||||
int tmId = connection.getIntValue(dbResult, i, 8);
|
||||
std::string created = connection.getStringValue(dbResult, i, 9);
|
||||
|
||||
result.push_back(Request(id, sourceFilePath, targetFilePath, name, sourceLanguageCode, targetLanguageCode, status, type, tmId, created));
|
||||
}
|
||||
connection.clearResult(dbResult);
|
||||
connection.endTransaction();
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
int RequestDAO::addRequest(const std::string sourceFilePath, const std::string targetFilePath, const int sourceLangId, const int targetLangId, const std::string name, int type, int tmId) {
|
||||
Logger::log("addRequest");
|
||||
Logger::logString("source file path", sourceFilePath);
|
||||
Logger::logString("target file path", targetFilePath);
|
||||
Logger::logInt("source lang id", sourceLangId);
|
||||
Logger::logInt("target lang id", targetLangId);
|
||||
Logger::logString("name", name);
|
||||
Logger::logInt("type", type);
|
||||
Logger::logInt("tm id", tmId);
|
||||
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
|
||||
std::string query = "INSERT INTO request(source_file_path, target_file_path, source_lang_id, target_lang_id, name, status, type, tm_id) values($1::text,$2::text,$3::integer,$4::integer,$5::text, $6::integer, $7::integer, $8::integer) RETURNING id";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new StringParam(sourceFilePath));
|
||||
params.push_back(new StringParam(targetFilePath));
|
||||
params.push_back(new IntParam(sourceLangId));
|
||||
params.push_back(new IntParam(targetLangId));
|
||||
params.push_back(new StringParam(name));
|
||||
params.push_back(new IntParam(0));
|
||||
params.push_back(new IntParam(type));
|
||||
params.push_back(new IntParam(tmId));
|
||||
|
||||
PGresult * result = connection.execute(query, params);
|
||||
int newId = connection.getIntValue(result, 0, 0);
|
||||
connection.clearResult(result);
|
||||
connection.endTransaction();
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
return newId;
|
||||
|
||||
}
|
29
concordia-server/request_dao.hpp
Normal file
29
concordia-server/request_dao.hpp
Normal file
@ -0,0 +1,29 @@
|
||||
#ifndef REQUEST_DAO_HDR
|
||||
#define REQUEST_DAO_HDR
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include <concordia/common/config.hpp>
|
||||
#include "db_connection.hpp"
|
||||
#include "request.hpp"
|
||||
|
||||
class RequestDAO {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
RequestDAO();
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~RequestDAO();
|
||||
|
||||
int addRequest(const std::string sourceFilePath, const std::string targetFilePath, const int sourceLangId, const int targetLangId, const std::string name, int type, int tmId);
|
||||
|
||||
std::vector<Request> getRequests();
|
||||
|
||||
private:
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -7,6 +7,21 @@ CREATE TABLE tm (
|
||||
lemmatized bool DEFAULT false
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS request;
|
||||
CREATE TABLE request (
|
||||
id SERIAL PRIMARY KEY,
|
||||
source_file_path varchar(100),
|
||||
target_file_path varchar(100),
|
||||
source_lang_id integer,
|
||||
target_lang_id integer,
|
||||
name varchar(40),
|
||||
status integer,
|
||||
type integer,
|
||||
tm_id integer,
|
||||
created timestamp default now()
|
||||
);
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS language;
|
||||
CREATE TABLE language (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
115
import-requests/handle_requests.py
Executable file
115
import-requests/handle_requests.py
Executable file
@ -0,0 +1,115 @@
|
||||
#!/usr/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import psycopg2, os, shutil, subprocess, urllib3, time, json
|
||||
|
||||
from importlib.machinery import SourceFileLoader
|
||||
|
||||
BUFFER_SIZE = 500
|
||||
|
||||
def postJson(address, data):
|
||||
http = urllib3.PoolManager()
|
||||
response = http.request('POST', address, headers={'Content-Type': 'application/json'},body=json.dumps(data).encode('utf-8'))
|
||||
|
||||
return json.loads(response.data.decode('utf-8'))
|
||||
|
||||
|
||||
def add_examples(address, examplesData):
|
||||
response = postJson(address, examplesData)
|
||||
if response['status'] == 'error':
|
||||
raise Exception(response['message'])
|
||||
|
||||
def file_len(fname):
|
||||
with open(fname) as f:
|
||||
for i, l in enumerate(f):
|
||||
pass
|
||||
return i + 1
|
||||
|
||||
|
||||
mgiza_path = os.path.dirname(os.path.realpath(__file__))+'/../mgiza-aligner'
|
||||
|
||||
conn = psycopg2.connect("dbname='concordia_server' user='concordia' host='localhost' port='6543' password='concordia'")
|
||||
cur = conn.cursor()
|
||||
cur.execute("""select request.id, request.source_file_path, request.target_file_path, request.name, src_lang.id as src_lang_id, src_lang.code as src_code, trg_lang.id as trg_lang_id, trg_lang.code as trg_code, request.status, request.type, request.tm_id from request inner join language as src_lang on src_lang.id = request.source_lang_id inner join language as trg_lang on trg_lang.id = request.target_lang_id order by request.created limit 1""")
|
||||
|
||||
|
||||
request = cur.fetchone()
|
||||
request_id, src_file_path, trg_file_path, tm_name, src_lang_id, src_lang_code, trg_lang_id, trg_lang_code, status, tm_type, tm_id = request
|
||||
request_corpus_path = mgiza_path+'/corpora/request_'+str(request_id)
|
||||
os.makedirs(request_corpus_path)
|
||||
shutil.copy(src_file_path, request_corpus_path+'/src.txt')
|
||||
shutil.copy(trg_file_path, request_corpus_path+'/trg.txt')
|
||||
|
||||
subprocess.run(["make","SRC_LANG="+src_lang_code, "TRG_LANG="+trg_lang_code, "CORPUS_NAME=request_"+str(request_id)], cwd=mgiza_path)
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
host = SourceFileLoader("host", os.path.dirname(os.path.realpath(__file__))+'/../tests/host.py').load_module()
|
||||
address = 'http://'+host.concordia_host
|
||||
if len(host.concordia_port) > 0:
|
||||
address += ':'+host.concordia_port
|
||||
|
||||
|
||||
sourceFile = request_corpus_path+'/src_final.txt'
|
||||
targetFile = request_corpus_path+'/trg_final.txt'
|
||||
alignmentsFile = request_corpus_path+'/aligned_final.txt'
|
||||
|
||||
if (file_len(sourceFile) != file_len(targetFile)):
|
||||
raise Exception("source and target files are not of the same length!")
|
||||
|
||||
if (file_len(alignmentsFile) != 3*file_len(sourceFile)):
|
||||
raise Exception("alignments file is not exactly 3 times longer than source and target")
|
||||
|
||||
data = {
|
||||
'operation': 'addTm',
|
||||
'sourceLangId':src_lang_id,
|
||||
'targetLangId':trg_lang_id,
|
||||
'name':tm_name,
|
||||
'tmLemmatized':True
|
||||
}
|
||||
|
||||
response = postJson(address, data)
|
||||
|
||||
tmId = int(response['newTmId'])
|
||||
print("Added new tm: %d" % tmId)
|
||||
|
||||
data = {
|
||||
'operation': 'addAlignedLemmatizedSentences',
|
||||
'tmId':tmId
|
||||
}
|
||||
|
||||
examples = []
|
||||
with open(sourceFile) as sf, open(targetFile) as tf, open(alignmentsFile) as af:
|
||||
for sourceLine in sf:
|
||||
sourceSentence = sourceLine.strip()
|
||||
targetSentence = tf.readline().strip()
|
||||
|
||||
# skip to lines of the alignments file, these are lemmatized and we need the raw sentences from the source and target files.
|
||||
af.readline()
|
||||
af.readline()
|
||||
|
||||
alignmentString = af.readline().strip()
|
||||
|
||||
examples.append([sourceSentence, targetSentence, alignmentString])
|
||||
|
||||
if len(examples) >= BUFFER_SIZE:
|
||||
data['examples'] = examples
|
||||
add_examples(address, data)
|
||||
examples = []
|
||||
|
||||
if len(examples) > 0:
|
||||
data['examples'] = examples
|
||||
add_examples(address, data)
|
||||
|
||||
print("Generating index...")
|
||||
start = time.time()
|
||||
data = {
|
||||
'operation': 'refreshIndex',
|
||||
'tmId' : tmId
|
||||
}
|
||||
|
||||
response = postJson(address, data)
|
||||
|
||||
end = time.time()
|
||||
print("Index regeneration complete. The operation took %.4f s" % (end - start))
|
12
tests/addStocznia.sh
Executable file
12
tests/addStocznia.sh
Executable file
@ -0,0 +1,12 @@
|
||||
#!/bin/sh
|
||||
|
||||
./addTm.py 1 2 placeholder 1
|
||||
|
||||
./addAlignedLemmatizedTM.py stocznia_plen ../mgiza-aligner/corpora/stocznia_plen/src_final.txt 1 ../mgiza-aligner/corpora/stocznia_plen/trg_final.txt 2 ../mgiza-aligner/corpora/stocznia_plen/aligned_final.txt
|
||||
|
||||
./addTm.py 1 2 placeholder 1
|
||||
|
||||
./addTm.py 1 2 placeholder 1
|
||||
|
||||
./addAlignedLemmatizedTM.py stocznia_enpl ../mgiza-aligner/corpora/stocznia_enpl/src_final.txt 2 ../mgiza-aligner/corpora/stocznia_enpl/trg_final.txt 1 ../mgiza-aligner/corpora/stocznia_enpl/aligned_final.txt
|
||||
|
Loading…
Reference in New Issue
Block a user