phrase searching stub

This commit is contained in:
rjawor 2016-01-21 16:51:23 +01:00
parent 3f222f53f6
commit c3df18c110
12 changed files with 95 additions and 48 deletions

1
.gitignore vendored
View File

@ -14,3 +14,4 @@ db/pgbouncer.pid
db/pgbouncer.ini db/pgbouncer.ini
upstart/concordia-server.conf upstart/concordia-server.conf
upstart/pgbouncer.conf upstart/pgbouncer.conf
cat/host.cfg

5
cat/README Normal file
View File

@ -0,0 +1,5 @@
1. Prepare host.cfg file with the address and port number of Concordia. See host.cfg_example. WARNING there should not be any empty lines in the .cfg files.
2. Prepare version file for each tm in Concordia in teh "versions: directory.
3. Clean a directory on your webserver (that supports PHP).
4. sudo ./publish.py PATH_ON_SERVER.

View File

@ -1,7 +1,11 @@
<?php <?php
$url = 'http://concordia.vm.wmi.amu.edu.pl:8800'; $url = 'http://@concordia_host@:@concordia_port@';
$data = array("operation" => $_POST["operation"],"tmId" => intval($_POST["tmId"]),"pattern" => $_POST["pattern"]); $intervalsArray = array();
foreach ($_POST["intervals"] as $interval) {
array_push($intervalsArray, [intval($interval[0]), intval($interval[1])]);
}
$data = array("operation" => $_POST["operation"],"tmId" => intval($_POST["tmId"]),"pattern" => $_POST["pattern"],"intervals" => $intervalsArray);
// use key 'http' even if you send the request to https://... // use key 'http' even if you send the request to https://...
$options = array( $options = array(

View File

@ -133,6 +133,12 @@
cursor:text; cursor:text;
} }
#result-sentence.phrase-mode .matchedFragmentSelected {
background-color:#e5e5ff;
border-color:#e5e5ff;
cursor:text;
}
.matchedFragment { .matchedFragment {
background-color:#99CCFF; background-color:#99CCFF;
border-style: solid; border-style: solid;

3
cat/host.cfg_example Normal file
View File

@ -0,0 +1,3 @@
concordia_host@#@concordia.vm.wmi.amu.edu.pl
concordia_port@#@8800

View File

@ -8,10 +8,11 @@
<body> <body>
<script> <script>
var concordiaUrl = 'http://@concordia_host@:@concordia_port@'; var concordiaUrl = 'http://@concordia_host@:@concordia_port@';
var currentTmId = @tmid@;
$(document).ready(function() { $(document).ready(function() {
$('#search-input').bind("enterKey",function(e){ $('#search-input').bind("enterKey",function(e){
searchHandle(@tmid@); searchHandle(currentTmId);
}); });
$('#search-input').keyup(function(e){ $('#search-input').keyup(function(e){
if(e.keyCode == 13) { if(e.keyCode == 13) {
@ -39,7 +40,7 @@
<label for="search-input">@prompt@</label> <label for="search-input">@prompt@</label>
</div> </div>
<input id="search-input" type="text" value="" /> <input id="search-input" type="text" value="" />
<img id="search-icon" src="../images/search.png" alt="search" onclick="searchHandle(@tmid@)" title="search"/> <img id="search-icon" src="../images/search.png" alt="search" onclick="searchHandle(currentTmId)" title="search"/>
<img id="help-icon" src="../images/help.png" alt="help" onclick="toggleHelp()" title="show/hide help"/> <img id="help-icon" src="../images/help.png" alt="help" onclick="toggleHelp()" title="show/hide help"/>
<div id="result"> <div id="result">

View File

@ -1,40 +0,0 @@
<html>
<head>
<script src="js/jquery-1.11.3.min.js"></script>
<script src="js/cat.js"></script>
<link rel="stylesheet" href="css/iatagger.css" />
<meta charset="UTF-8">
</head>
<body>
<div id="header">
</div>
<div id="content">
<a href="http://tmconcordia.sourceforge.net/" target="_blank"><img src="images/banner.jpg" alt="Banner" /></a>
<br/><br/><br/>
<p>
Welcome to Concordia. The system finds the longest matches of the pattern sentence in its translation memory. This translation memory is 200 000 sentences taken from the SETIMES2 Croatian-English corpus (<a href="http://opus.lingfil.uu.se/SETIMES2.php" target="_blank">link</a>). Please enter a Croatian sentence in the field below and press Enter (or use the search button). You can test the system on predefined samples, simply use the link "show/hide samples" and apply one of the sample sentences.
</p>
<p>
Enjoy your work with the system!
</p>
<label for="searchInput">Enter search pattern (Croatian sentence):</label>
<span class="suggestion" onclick="showHideSuggestions()">show/hide samples</span>
<br/><br/>
<div class="suggestionsInvisible" id="suggestions">
<ul>
<li> Kazna medijskom mogulu obnovila raspravu u Makedoniji <span class="suggestion" onclick="searchText('Kazna medijskom mogulu obnovila raspravu u Makedoniji');">apply</span></li>
<li>Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci <span class="suggestion" onclick="searchText('Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci');">apply</span></li>
</ul>
<br/><br/>
</div>
<input id="searchInput" type="text" value="" />
<br/><br/>
<input type="button" value="search" onclick="searchHandle()" />
<br/><br/><br/><br/>
<div id="result">
</div>
</div>
</body>
</html>

View File

@ -20,6 +20,26 @@ function searchHandle(tmid) {
}); });
} }
function phraseSearchHandle(tmid, intervals) {
var concordiaRequest = {
operation: 'concordiaPhraseSearch',
tmId: tmid,
pattern:$("#search-input").val(),
intervals: intervals
}
$.ajax({
url: '/concordia_gate.php',
type: 'post',
dataType: 'json',
success: function (data) {
$('#result').html(renderResult(data));
},
data: concordiaRequest
});
}
function renderResult(data) { function renderResult(data) {
var res = ''; var res = '';
@ -110,9 +130,18 @@ function showHideSuggestions() {
function phraseSearch(caller) { function phraseSearch(caller) {
if ($('#result-sentence').hasClass('phrase-mode')) { if ($('#result-sentence').hasClass('phrase-mode')) {
var phrase = getSelectedTextWithin(caller); var phrase = getSelectedTextWithin(caller);
console.log('phrase search for: '+phrase); if (phrase.length > 0) {
console.log(getIndicesOf(phrase, $("#search-input").val(), true));
var phrases = $('phrase-prompt').data(); var intervalStarts = getIndicesOf(phrase, $("#search-input").val(), true);
var intervals = [];
for (var i=0;i<intervalStarts.length;i++) {
intervals.push([intervalStarts[i], intervalStarts[i]+phrase.length])
}
phraseSearchHandle(currentTmId, intervals);
}
} }
} }

View File

@ -21,7 +21,21 @@ shutil.copytree('js', root_dir+'/js')
shutil.copytree('css', root_dir+'/css') shutil.copytree('css', root_dir+'/css')
shutil.copytree('images', root_dir+'/images') shutil.copytree('images', root_dir+'/images')
shutil.copy('favicon.ico', root_dir+'/favicon.ico') shutil.copy('favicon.ico', root_dir+'/favicon.ico')
shutil.copy('concordia_gate.php', root_dir+'/concordia_gate.php')
with open('host.cfg', 'r') as host_file:
for line in host_file:
field, value = line.strip().split('@#@')
if field == 'concordia_host':
concordia_host = value
elif field == 'concordia_port':
concordia_port = value
with open('concordia_gate.php_pattern', 'r') as gate_pattern_file, open(root_dir+'/concordia_gate.php', 'w') as gate_file:
for line in gate_pattern_file:
line = re.sub('@concordia_host@', concordia_host, line)
line = re.sub('@concordia_port@', concordia_port, line)
gate_file.write(line)
versions_dir = 'versions' versions_dir = 'versions'
@ -40,6 +54,11 @@ for version_file in os.listdir(versions_dir):
version[field] = value version[field] = value
versions.append(version) versions.append(version)
for version in versions: for version in versions:
version_dir = root_dir+'/'+version['dir'] version_dir = root_dir+'/'+version['dir']
os.mkdir(version_dir) os.mkdir(version_dir)

View File

@ -8,3 +8,4 @@ prompt@#@Enter search pattern (Croatian sentence):
suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji
suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci
suggestion@#@ozbiljno analizira proces suggestion@#@ozbiljno analizira proces
suggestion@#@Kazna medijskom podnio ostavku ozbiljno analizira proces

View File

@ -3,6 +3,8 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <concordia/interval.hpp>
#include "json_generator.hpp" #include "json_generator.hpp"
#include "config.hpp" #include "config.hpp"
#include "logger.hpp" #include "logger.hpp"
@ -35,6 +37,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
outputString << "Content-type: application/json\r\n\r\n"; outputString << "Content-type: application/json\r\n\r\n";
try { try {
rapidjson::Document d; rapidjson::Document d;
Logger::logString("concordia request string", requestString);
bool hasError = d.Parse(requestString.c_str()).HasParseError(); bool hasError = d.Parse(requestString.c_str()).HasParseError();
if (hasError) { if (hasError) {
@ -99,6 +102,19 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
int tmId = _getIntParameter(d, TM_ID_PARAM); int tmId = _getIntParameter(d, TM_ID_PARAM);
Logger::logString("concordia search pattern", pattern); Logger::logString("concordia search pattern", pattern);
_searcherController->concordiaSearch(jsonWriter, pattern, tmId); _searcherController->concordiaSearch(jsonWriter, pattern, tmId);
} else if (operation == CONCORDIA_PHRASE_SEARCH_OP) {
std::string pattern = _getStringParameter(d, PATTERN_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM);
Logger::logString("concordia phrase search pattern", pattern);
std::vector<Interval> intervals;
const rapidjson::Value & intervalsArray = d[INTERVALS_PARAM];
for (rapidjson::SizeType i = 0; i < intervalsArray.Size(); i++) {
Logger::logInt("interval size", intervalsArray[i].Size());
Logger::logInt("search interval start", intervalsArray[i][0].GetInt());
Logger::logInt("search interval end", intervalsArray[i][1].GetInt());
}
//_searcherController->concordiaPhraseSearch(jsonWriter, pattern, tmId);
} else if (operation == ADD_TM_OP) { } else if (operation == ADD_TM_OP) {
int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM); int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM); int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);

View File

@ -19,6 +19,7 @@
#define SOURCE_LANG_PARAM "sourceLangId" #define SOURCE_LANG_PARAM "sourceLangId"
#define TARGET_LANG_PARAM "targetLangId" #define TARGET_LANG_PARAM "targetLangId"
#define NAME_PARAM "name" #define NAME_PARAM "name"
#define INTERVALS_PARAM "intervals"
#define ADD_SENTENCE_OP "addSentence" #define ADD_SENTENCE_OP "addSentence"
#define ADD_SENTENCES_OP "addSentences" #define ADD_SENTENCES_OP "addSentences"
@ -26,5 +27,6 @@
#define REFRESH_INDEX_OP "refreshIndex" #define REFRESH_INDEX_OP "refreshIndex"
#define SIMPLE_SEARCH_OP "simpleSearch" #define SIMPLE_SEARCH_OP "simpleSearch"
#define CONCORDIA_SEARCH_OP "concordiaSearch" #define CONCORDIA_SEARCH_OP "concordiaSearch"
#define CONCORDIA_PHRASE_SEARCH_OP "concordiaPhraseSearch"
#define ADD_TM_OP "addTm" #define ADD_TM_OP "addTm"