phrase search
This commit is contained in:
parent
c3df18c110
commit
b867f8dacc
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,6 +1,7 @@
|
|||||||
build/
|
build/
|
||||||
logs/concordia-server.log
|
logs/concordia-server.log
|
||||||
logs/pgbouncer.log
|
logs/pgbouncer.log
|
||||||
|
logs/phrase-searches.json
|
||||||
concordia.cfg
|
concordia.cfg
|
||||||
concordia-server/config.hpp
|
concordia-server/config.hpp
|
||||||
index/
|
index/
|
||||||
|
@ -62,6 +62,7 @@ configure_file (
|
|||||||
|
|
||||||
set(CONFIG_FILE_PATH "${concordia-server_SOURCE_DIR}/concordia.cfg")
|
set(CONFIG_FILE_PATH "${concordia-server_SOURCE_DIR}/concordia.cfg")
|
||||||
set(LOG_FILE_PATH "${concordia-server_SOURCE_DIR}/logs/concordia-server.log")
|
set(LOG_FILE_PATH "${concordia-server_SOURCE_DIR}/logs/concordia-server.log")
|
||||||
|
set(PHRASE_LOG_FILE_PATH "${concordia-server_SOURCE_DIR}/logs/phrase-searches.json")
|
||||||
|
|
||||||
# --------------
|
# --------------
|
||||||
# db settings
|
# db settings
|
||||||
|
@ -34,7 +34,13 @@
|
|||||||
display:none;
|
display:none;
|
||||||
}
|
}
|
||||||
|
|
||||||
#phrase-icon {
|
#phrase-on-icon {
|
||||||
|
cursor:pointer;
|
||||||
|
vertical-align:middle;
|
||||||
|
margin-right:20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#phrase-off-icon {
|
||||||
cursor:pointer;
|
cursor:pointer;
|
||||||
vertical-align:middle;
|
vertical-align:middle;
|
||||||
margin-right:20px;
|
margin-right:20px;
|
||||||
|
BIN
cat/images/switchOff.png
Normal file
BIN
cat/images/switchOff.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.3 KiB |
BIN
cat/images/switchOn.png
Normal file
BIN
cat/images/switchOn.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.3 KiB |
@ -34,7 +34,12 @@ function phraseSearchHandle(tmid, intervals) {
|
|||||||
type: 'post',
|
type: 'post',
|
||||||
dataType: 'json',
|
dataType: 'json',
|
||||||
success: function (data) {
|
success: function (data) {
|
||||||
$('#result').html(renderResult(data));
|
if (data['found']) {
|
||||||
|
$('#result').html(renderResult(data));
|
||||||
|
} else {
|
||||||
|
$('#phrase-prompt').html('<b>Your phrase was not found. Try selecting another phrase: </b>').fadeOut(200).fadeIn(200);
|
||||||
|
clearTextSelections();
|
||||||
|
}
|
||||||
},
|
},
|
||||||
data: concordiaRequest
|
data: concordiaRequest
|
||||||
});
|
});
|
||||||
@ -43,10 +48,13 @@ function phraseSearchHandle(tmid, intervals) {
|
|||||||
function renderResult(data) {
|
function renderResult(data) {
|
||||||
var res = '';
|
var res = '';
|
||||||
|
|
||||||
var score = data['result']['bestOverlayScore']*100;
|
if (typeof(data['result']['bestOverlayScore']) === 'undefined') {
|
||||||
|
// ignore
|
||||||
res += '<div id="result-score">Concordia score: <b>'+score.toFixed(0)+'%</b></div>';
|
} else {
|
||||||
res += '<div id="phrase-selection"><img id="phrase-icon" src="../images/phrase.png" alt="phrase search" onclick="togglePhraseSearchMode()" title="search for phrases"/><span id="phrase-prompt" class="hidden">Select continuous phrase: <img id="cancel-button" src="../images/cancel-button.png" alt="cancel phrase search" onclick="togglePhraseSearchMode()" title="cancel searching for phrases"/></span></div>';
|
var score = data['result']['bestOverlayScore']*100;
|
||||||
|
res += '<div id="result-score">Concordia score: <b>'+score.toFixed(0)+'%</b></div>';
|
||||||
|
}
|
||||||
|
res += '<div id="phrase-selection">Phrase search mode: <img id="phrase-off-icon" src="../images/switchOff.png" alt="enter phrase search mode" onclick="togglePhraseSearchMode()" title="search for phrases"/><img class="hidden" id="phrase-on-icon" src="../images/switchOn.png" alt="cancel phrase search" onclick="togglePhraseSearchMode()" title="cancel phrase search"/><span id="phrase-prompt" class="hidden">Select continuous phrase: </span></div>';
|
||||||
|
|
||||||
var inputSentence = $('#search-input').val();
|
var inputSentence = $('#search-input').val();
|
||||||
var markedSentence = '';
|
var markedSentence = '';
|
||||||
@ -104,7 +112,8 @@ function renderFragment(fragment, number) {
|
|||||||
|
|
||||||
function togglePhraseSearchMode() {
|
function togglePhraseSearchMode() {
|
||||||
$('#result-sentence').toggleClass('phrase-mode');
|
$('#result-sentence').toggleClass('phrase-mode');
|
||||||
$('#phrase-icon').toggleClass('hidden');
|
$('#phrase-on-icon').toggleClass('hidden');
|
||||||
|
$('#phrase-off-icon').toggleClass('hidden');
|
||||||
$('#phrase-prompt').toggleClass('hidden');
|
$('#phrase-prompt').toggleClass('hidden');
|
||||||
clearTextSelections();
|
clearTextSelections();
|
||||||
}
|
}
|
||||||
|
@ -8,4 +8,4 @@ prompt@#@Enter search pattern (Croatian sentence):
|
|||||||
suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji
|
suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji
|
||||||
suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci
|
suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci
|
||||||
suggestion@#@ozbiljno analizira proces
|
suggestion@#@ozbiljno analizira proces
|
||||||
suggestion@#@Kazna medijskom podnio ostavku ozbiljno analizira proces
|
suggestion@#@Nagrada koja nosi ime po našem velikom snimatelju dodjeljuje se za izniman doprinos filmskoj umjetnosti.
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
#include "complete_concordia_search_result.hpp"
|
#include "complete_concordia_search_result.hpp"
|
||||||
|
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
|
|
||||||
CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
|
CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
|
||||||
const double bestOverlayScore):
|
const double bestOverlayScore):
|
||||||
_bestOverlayScore(bestOverlayScore) {
|
_bestOverlayScore(bestOverlayScore) {
|
||||||
@ -8,3 +10,9 @@ CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
|
|||||||
CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() {
|
CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CompleteConcordiaSearchResult::offsetPattern(int offset) {
|
||||||
|
BOOST_FOREACH(SimpleSearchResult & simpleResult, _bestOverlay) {
|
||||||
|
simpleResult.offsetPattern(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -23,6 +23,8 @@ public:
|
|||||||
return _bestOverlay;
|
return _bestOverlay;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void offsetPattern(int offset);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double _bestOverlayScore;
|
double _bestOverlayScore;
|
||||||
|
|
||||||
|
@ -2,6 +2,9 @@
|
|||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
#include <concordia/interval.hpp>
|
#include <concordia/interval.hpp>
|
||||||
|
|
||||||
@ -37,7 +40,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
|||||||
outputString << "Content-type: application/json\r\n\r\n";
|
outputString << "Content-type: application/json\r\n\r\n";
|
||||||
try {
|
try {
|
||||||
rapidjson::Document d;
|
rapidjson::Document d;
|
||||||
Logger::logString("concordia request string", requestString);
|
// Logger::logString("concordia request string", requestString);
|
||||||
bool hasError = d.Parse(requestString.c_str()).HasParseError();
|
bool hasError = d.Parse(requestString.c_str()).HasParseError();
|
||||||
|
|
||||||
if (hasError) {
|
if (hasError) {
|
||||||
@ -106,15 +109,13 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
|||||||
std::string pattern = _getStringParameter(d, PATTERN_PARAM);
|
std::string pattern = _getStringParameter(d, PATTERN_PARAM);
|
||||||
int tmId = _getIntParameter(d, TM_ID_PARAM);
|
int tmId = _getIntParameter(d, TM_ID_PARAM);
|
||||||
Logger::logString("concordia phrase search pattern", pattern);
|
Logger::logString("concordia phrase search pattern", pattern);
|
||||||
|
_logPhrase(requestString);
|
||||||
std::vector<Interval> intervals;
|
std::vector<Interval> intervals;
|
||||||
const rapidjson::Value & intervalsArray = d[INTERVALS_PARAM];
|
const rapidjson::Value & intervalsArray = d[INTERVALS_PARAM];
|
||||||
for (rapidjson::SizeType i = 0; i < intervalsArray.Size(); i++) {
|
for (rapidjson::SizeType i = 0; i < intervalsArray.Size(); i++) {
|
||||||
Logger::logInt("interval size", intervalsArray[i].Size());
|
intervals.push_back(Interval(intervalsArray[i][0].GetInt(), intervalsArray[i][1].GetInt()));
|
||||||
Logger::logInt("search interval start", intervalsArray[i][0].GetInt());
|
|
||||||
Logger::logInt("search interval end", intervalsArray[i][1].GetInt());
|
|
||||||
}
|
}
|
||||||
|
_searcherController->concordiaPhraseSearch(jsonWriter, pattern, intervals, tmId);
|
||||||
//_searcherController->concordiaPhraseSearch(jsonWriter, pattern, tmId);
|
|
||||||
} else if (operation == ADD_TM_OP) {
|
} else if (operation == ADD_TM_OP) {
|
||||||
int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
|
int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
|
||||||
int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);
|
int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);
|
||||||
@ -177,3 +178,9 @@ void ConcordiaServer::_addTm(int tmId) {
|
|||||||
_concordiasMap->insert(tmId, new Concordia(indexPath.str(), _configFilePath));
|
_concordiasMap->insert(tmId, new Concordia(indexPath.str(), _configFilePath));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ConcordiaServer::_logPhrase(std::string phraseString) {
|
||||||
|
std::ofstream logFile;
|
||||||
|
logFile.open(PHRASE_LOG_FILE_PATH, std::ios::out | std::ios::app);
|
||||||
|
logFile << phraseString.substr(0, phraseString.size()-1) << ", \"timestamp\":" << std::time(0) << "}\n";
|
||||||
|
logFile.close();
|
||||||
|
}
|
||||||
|
@ -30,6 +30,8 @@ public:
|
|||||||
std::string handleRequest(std::string & requestString);
|
std::string handleRequest(std::string & requestString);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void _logPhrase(std::string phraseString);
|
||||||
|
|
||||||
std::string _getStringParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
|
std::string _getStringParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
|
||||||
|
|
||||||
int _getIntParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
|
int _getIntParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#define CONFIG_FILE_PATH "@CONFIG_FILE_PATH@"
|
#define CONFIG_FILE_PATH "@CONFIG_FILE_PATH@"
|
||||||
#define LOG_FILE_PATH "@LOG_FILE_PATH@"
|
#define LOG_FILE_PATH "@LOG_FILE_PATH@"
|
||||||
|
#define PHRASE_LOG_FILE_PATH "@PHRASE_LOG_FILE_PATH@"
|
||||||
#define INDEX_DIRECTORY "@INDEX_DIRECTORY@"
|
#define INDEX_DIRECTORY "@INDEX_DIRECTORY@"
|
||||||
|
|
||||||
// database connection information
|
// database connection information
|
||||||
|
@ -36,6 +36,69 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
|
std::string & pattern,
|
||||||
|
const std::vector<Interval> & intervals,
|
||||||
|
const int tmId) {
|
||||||
|
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||||
|
if (it != _concordiasMap->end()) {
|
||||||
|
if (intervals.size() > 0) {
|
||||||
|
std::string shortPattern = pattern.substr(intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
|
||||||
|
|
||||||
|
std::vector<SimpleSearchResult> shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("status");
|
||||||
|
jsonWriter.String("success");
|
||||||
|
jsonWriter.String("found");
|
||||||
|
if (shortPatternResults.size() > 0) {
|
||||||
|
jsonWriter.Bool(true);
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<SimpleSearchResult> bestOverlay;
|
||||||
|
|
||||||
|
int currStart = 0;
|
||||||
|
BOOST_FOREACH(const Interval & interval, intervals) {
|
||||||
|
CompleteConcordiaSearchResult restResult = _unitDAO.getConcordiaResult(
|
||||||
|
it->second->concordiaSearch(pattern.substr(currStart, interval.getStart()-currStart)));
|
||||||
|
restResult.offsetPattern(currStart);
|
||||||
|
bestOverlay.insert(bestOverlay.end(), restResult.getBestOverlay().begin(), restResult.getBestOverlay().end());
|
||||||
|
|
||||||
|
SimpleSearchResult shortPatternresult = shortPatternResults[0];
|
||||||
|
shortPatternresult.setMatchedPatternStart(interval.getStart());
|
||||||
|
shortPatternresult.setMatchedPatternEnd(interval.getEnd());
|
||||||
|
bestOverlay.push_back(shortPatternresult);
|
||||||
|
currStart = interval.getEnd();
|
||||||
|
}
|
||||||
|
CompleteConcordiaSearchResult lastRestResult = _unitDAO.getConcordiaResult(
|
||||||
|
it->second->concordiaSearch(pattern.substr(currStart)));
|
||||||
|
lastRestResult.offsetPattern(currStart);
|
||||||
|
bestOverlay.insert(bestOverlay.end(), lastRestResult.getBestOverlay().begin(), lastRestResult.getBestOverlay().end());
|
||||||
|
|
||||||
|
jsonWriter.String("result");
|
||||||
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("bestOverlay");
|
||||||
|
jsonWriter.StartArray();
|
||||||
|
BOOST_FOREACH(SimpleSearchResult & simpleResult, bestOverlay) {
|
||||||
|
JsonGenerator::writeSearchResult(jsonWriter, simpleResult);
|
||||||
|
}
|
||||||
|
jsonWriter.EndArray();
|
||||||
|
jsonWriter.EndObject();
|
||||||
|
} else {
|
||||||
|
jsonWriter.Bool(false);
|
||||||
|
}
|
||||||
|
jsonWriter.EndObject();
|
||||||
|
} else {
|
||||||
|
JsonGenerator::signalError(jsonWriter, "no intervals for phrase search");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
std::string & pattern,
|
std::string & pattern,
|
||||||
const int tmId) {
|
const int tmId) {
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#include <boost/ptr_container/ptr_map.hpp>
|
#include <boost/ptr_container/ptr_map.hpp>
|
||||||
#include <concordia/concordia.hpp>
|
#include <concordia/concordia.hpp>
|
||||||
#include <concordia/concordia_exception.hpp>
|
#include <concordia/concordia_exception.hpp>
|
||||||
|
#include <concordia/interval.hpp>
|
||||||
|
|
||||||
#include "unit_dao.hpp"
|
#include "unit_dao.hpp"
|
||||||
#include "simple_search_result.hpp"
|
#include "simple_search_result.hpp"
|
||||||
@ -30,6 +31,10 @@ public:
|
|||||||
std::string & pattern,
|
std::string & pattern,
|
||||||
const int tmId);
|
const int tmId);
|
||||||
|
|
||||||
|
void concordiaPhraseSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
|
std::string & pattern,
|
||||||
|
const std::vector<Interval> & intervals,
|
||||||
|
const int tmId);
|
||||||
private:
|
private:
|
||||||
|
|
||||||
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
||||||
|
@ -24,4 +24,8 @@ void SimpleSearchResult::addMatchedTargetFragment(const std::pair<int,int> & tar
|
|||||||
_targetFragments.push_back(targetFragment);
|
_targetFragments.push_back(targetFragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SimpleSearchResult::offsetPattern(int offset) {
|
||||||
|
_matchedPatternStart += offset;
|
||||||
|
_matchedPatternEnd += offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -28,10 +28,18 @@ public:
|
|||||||
return _matchedPatternStart;
|
return _matchedPatternStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setMatchedPatternStart(int newStart) {
|
||||||
|
_matchedPatternStart = newStart;
|
||||||
|
}
|
||||||
|
|
||||||
int getMatchedPatternEnd() const {
|
int getMatchedPatternEnd() const {
|
||||||
return _matchedPatternEnd;
|
return _matchedPatternEnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void setMatchedPatternEnd(int newEnd) {
|
||||||
|
_matchedPatternEnd = newEnd;
|
||||||
|
}
|
||||||
|
|
||||||
int getMatchedExampleStart() const {
|
int getMatchedExampleStart() const {
|
||||||
return _matchedExampleStart;
|
return _matchedExampleStart;
|
||||||
}
|
}
|
||||||
@ -54,6 +62,8 @@ public:
|
|||||||
|
|
||||||
void addMatchedTargetFragment(const std::pair<int,int> & targetFragment);
|
void addMatchedTargetFragment(const std::pair<int,int> & targetFragment);
|
||||||
|
|
||||||
|
void offsetPattern(int offset);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int _id;
|
int _id;
|
||||||
|
|
||||||
|
@ -116,7 +116,7 @@ void UnitDAO::_getResultsFromFragments(
|
|||||||
delete param;
|
delete param;
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO now add all target fragments matched with this fragment
|
// now add all target fragments matched with this fragment
|
||||||
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
|
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
|
||||||
std::vector<QueryParam*> targetParams;
|
std::vector<QueryParam*> targetParams;
|
||||||
targetParams.push_back(new IntParam(fragment.getExampleId()));
|
targetParams.push_back(new IntParam(fragment.getExampleId()));
|
||||||
|
Loading…
Reference in New Issue
Block a user