phrase search

This commit is contained in:
rjawor 2016-01-25 23:55:32 +01:00
parent c3df18c110
commit b867f8dacc
17 changed files with 134 additions and 15 deletions

1
.gitignore vendored
View File

@ -1,6 +1,7 @@
build/ build/
logs/concordia-server.log logs/concordia-server.log
logs/pgbouncer.log logs/pgbouncer.log
logs/phrase-searches.json
concordia.cfg concordia.cfg
concordia-server/config.hpp concordia-server/config.hpp
index/ index/

View File

@ -62,6 +62,7 @@ configure_file (
set(CONFIG_FILE_PATH "${concordia-server_SOURCE_DIR}/concordia.cfg") set(CONFIG_FILE_PATH "${concordia-server_SOURCE_DIR}/concordia.cfg")
set(LOG_FILE_PATH "${concordia-server_SOURCE_DIR}/logs/concordia-server.log") set(LOG_FILE_PATH "${concordia-server_SOURCE_DIR}/logs/concordia-server.log")
set(PHRASE_LOG_FILE_PATH "${concordia-server_SOURCE_DIR}/logs/phrase-searches.json")
# -------------- # --------------
# db settings # db settings

View File

@ -34,7 +34,13 @@
display:none; display:none;
} }
#phrase-icon { #phrase-on-icon {
cursor:pointer;
vertical-align:middle;
margin-right:20px;
}
#phrase-off-icon {
cursor:pointer; cursor:pointer;
vertical-align:middle; vertical-align:middle;
margin-right:20px; margin-right:20px;

BIN
cat/images/switchOff.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

BIN
cat/images/switchOn.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

@ -34,7 +34,12 @@ function phraseSearchHandle(tmid, intervals) {
type: 'post', type: 'post',
dataType: 'json', dataType: 'json',
success: function (data) { success: function (data) {
$('#result').html(renderResult(data)); if (data['found']) {
$('#result').html(renderResult(data));
} else {
$('#phrase-prompt').html('<b>Your phrase was not found. Try selecting another phrase: </b>').fadeOut(200).fadeIn(200);
clearTextSelections();
}
}, },
data: concordiaRequest data: concordiaRequest
}); });
@ -43,10 +48,13 @@ function phraseSearchHandle(tmid, intervals) {
function renderResult(data) { function renderResult(data) {
var res = ''; var res = '';
var score = data['result']['bestOverlayScore']*100; if (typeof(data['result']['bestOverlayScore']) === 'undefined') {
// ignore
res += '<div id="result-score">Concordia score: <b>'+score.toFixed(0)+'%</b></div>'; } else {
res += '<div id="phrase-selection"><img id="phrase-icon" src="../images/phrase.png" alt="phrase search" onclick="togglePhraseSearchMode()" title="search for phrases"/><span id="phrase-prompt" class="hidden">Select continuous phrase: <img id="cancel-button" src="../images/cancel-button.png" alt="cancel phrase search" onclick="togglePhraseSearchMode()" title="cancel searching for phrases"/></span></div>'; var score = data['result']['bestOverlayScore']*100;
res += '<div id="result-score">Concordia score: <b>'+score.toFixed(0)+'%</b></div>';
}
res += '<div id="phrase-selection">Phrase search mode:&nbsp;<img id="phrase-off-icon" src="../images/switchOff.png" alt="enter phrase search mode" onclick="togglePhraseSearchMode()" title="search for phrases"/><img class="hidden" id="phrase-on-icon" src="../images/switchOn.png" alt="cancel phrase search" onclick="togglePhraseSearchMode()" title="cancel phrase search"/><span id="phrase-prompt" class="hidden">Select continuous phrase: </span></div>';
var inputSentence = $('#search-input').val(); var inputSentence = $('#search-input').val();
var markedSentence = ''; var markedSentence = '';
@ -104,7 +112,8 @@ function renderFragment(fragment, number) {
function togglePhraseSearchMode() { function togglePhraseSearchMode() {
$('#result-sentence').toggleClass('phrase-mode'); $('#result-sentence').toggleClass('phrase-mode');
$('#phrase-icon').toggleClass('hidden'); $('#phrase-on-icon').toggleClass('hidden');
$('#phrase-off-icon').toggleClass('hidden');
$('#phrase-prompt').toggleClass('hidden'); $('#phrase-prompt').toggleClass('hidden');
clearTextSelections(); clearTextSelections();
} }

View File

@ -8,4 +8,4 @@ prompt@#@Enter search pattern (Croatian sentence):
suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji
suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci
suggestion@#@ozbiljno analizira proces suggestion@#@ozbiljno analizira proces
suggestion@#@Kazna medijskom podnio ostavku ozbiljno analizira proces suggestion@#@Nagrada koja nosi ime po našem velikom snimatelju dodjeljuje se za izniman doprinos filmskoj umjetnosti.

View File

@ -1,5 +1,7 @@
#include "complete_concordia_search_result.hpp" #include "complete_concordia_search_result.hpp"
#include <boost/foreach.hpp>
CompleteConcordiaSearchResult::CompleteConcordiaSearchResult( CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
const double bestOverlayScore): const double bestOverlayScore):
_bestOverlayScore(bestOverlayScore) { _bestOverlayScore(bestOverlayScore) {
@ -8,3 +10,9 @@ CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() { CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() {
} }
void CompleteConcordiaSearchResult::offsetPattern(int offset) {
BOOST_FOREACH(SimpleSearchResult & simpleResult, _bestOverlay) {
simpleResult.offsetPattern(offset);
}
}

View File

@ -23,6 +23,8 @@ public:
return _bestOverlay; return _bestOverlay;
} }
void offsetPattern(int offset);
private: private:
double _bestOverlayScore; double _bestOverlayScore;

View File

@ -2,6 +2,9 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <iostream>
#include <fstream>
#include <ctime>
#include <concordia/interval.hpp> #include <concordia/interval.hpp>
@ -37,7 +40,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
outputString << "Content-type: application/json\r\n\r\n"; outputString << "Content-type: application/json\r\n\r\n";
try { try {
rapidjson::Document d; rapidjson::Document d;
Logger::logString("concordia request string", requestString); // Logger::logString("concordia request string", requestString);
bool hasError = d.Parse(requestString.c_str()).HasParseError(); bool hasError = d.Parse(requestString.c_str()).HasParseError();
if (hasError) { if (hasError) {
@ -106,15 +109,13 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
std::string pattern = _getStringParameter(d, PATTERN_PARAM); std::string pattern = _getStringParameter(d, PATTERN_PARAM);
int tmId = _getIntParameter(d, TM_ID_PARAM); int tmId = _getIntParameter(d, TM_ID_PARAM);
Logger::logString("concordia phrase search pattern", pattern); Logger::logString("concordia phrase search pattern", pattern);
_logPhrase(requestString);
std::vector<Interval> intervals; std::vector<Interval> intervals;
const rapidjson::Value & intervalsArray = d[INTERVALS_PARAM]; const rapidjson::Value & intervalsArray = d[INTERVALS_PARAM];
for (rapidjson::SizeType i = 0; i < intervalsArray.Size(); i++) { for (rapidjson::SizeType i = 0; i < intervalsArray.Size(); i++) {
Logger::logInt("interval size", intervalsArray[i].Size()); intervals.push_back(Interval(intervalsArray[i][0].GetInt(), intervalsArray[i][1].GetInt()));
Logger::logInt("search interval start", intervalsArray[i][0].GetInt());
Logger::logInt("search interval end", intervalsArray[i][1].GetInt());
} }
_searcherController->concordiaPhraseSearch(jsonWriter, pattern, intervals, tmId);
//_searcherController->concordiaPhraseSearch(jsonWriter, pattern, tmId);
} else if (operation == ADD_TM_OP) { } else if (operation == ADD_TM_OP) {
int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM); int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM); int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);
@ -177,3 +178,9 @@ void ConcordiaServer::_addTm(int tmId) {
_concordiasMap->insert(tmId, new Concordia(indexPath.str(), _configFilePath)); _concordiasMap->insert(tmId, new Concordia(indexPath.str(), _configFilePath));
} }
void ConcordiaServer::_logPhrase(std::string phraseString) {
std::ofstream logFile;
logFile.open(PHRASE_LOG_FILE_PATH, std::ios::out | std::ios::app);
logFile << phraseString.substr(0, phraseString.size()-1) << ", \"timestamp\":" << std::time(0) << "}\n";
logFile.close();
}

View File

@ -30,6 +30,8 @@ public:
std::string handleRequest(std::string & requestString); std::string handleRequest(std::string & requestString);
private: private:
void _logPhrase(std::string phraseString);
std::string _getStringParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException); std::string _getStringParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
int _getIntParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException); int _getIntParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);

View File

@ -1,5 +1,6 @@
#define CONFIG_FILE_PATH "@CONFIG_FILE_PATH@" #define CONFIG_FILE_PATH "@CONFIG_FILE_PATH@"
#define LOG_FILE_PATH "@LOG_FILE_PATH@" #define LOG_FILE_PATH "@LOG_FILE_PATH@"
#define PHRASE_LOG_FILE_PATH "@PHRASE_LOG_FILE_PATH@"
#define INDEX_DIRECTORY "@INDEX_DIRECTORY@" #define INDEX_DIRECTORY "@INDEX_DIRECTORY@"
// database connection information // database connection information

View File

@ -36,6 +36,69 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
} }
} }
void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & pattern,
const std::vector<Interval> & intervals,
const int tmId) {
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
if (it != _concordiasMap->end()) {
if (intervals.size() > 0) {
std::string shortPattern = pattern.substr(intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
std::vector<SimpleSearchResult> shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern));
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("success");
jsonWriter.String("found");
if (shortPatternResults.size() > 0) {
jsonWriter.Bool(true);
std::vector<SimpleSearchResult> bestOverlay;
int currStart = 0;
BOOST_FOREACH(const Interval & interval, intervals) {
CompleteConcordiaSearchResult restResult = _unitDAO.getConcordiaResult(
it->second->concordiaSearch(pattern.substr(currStart, interval.getStart()-currStart)));
restResult.offsetPattern(currStart);
bestOverlay.insert(bestOverlay.end(), restResult.getBestOverlay().begin(), restResult.getBestOverlay().end());
SimpleSearchResult shortPatternresult = shortPatternResults[0];
shortPatternresult.setMatchedPatternStart(interval.getStart());
shortPatternresult.setMatchedPatternEnd(interval.getEnd());
bestOverlay.push_back(shortPatternresult);
currStart = interval.getEnd();
}
CompleteConcordiaSearchResult lastRestResult = _unitDAO.getConcordiaResult(
it->second->concordiaSearch(pattern.substr(currStart)));
lastRestResult.offsetPattern(currStart);
bestOverlay.insert(bestOverlay.end(), lastRestResult.getBestOverlay().begin(), lastRestResult.getBestOverlay().end());
jsonWriter.String("result");
jsonWriter.StartObject();
jsonWriter.String("bestOverlay");
jsonWriter.StartArray();
BOOST_FOREACH(SimpleSearchResult & simpleResult, bestOverlay) {
JsonGenerator::writeSearchResult(jsonWriter, simpleResult);
}
jsonWriter.EndArray();
jsonWriter.EndObject();
} else {
jsonWriter.Bool(false);
}
jsonWriter.EndObject();
} else {
JsonGenerator::signalError(jsonWriter, "no intervals for phrase search");
}
} else {
JsonGenerator::signalError(jsonWriter, "no such tm!");
}
}
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & pattern, std::string & pattern,
const int tmId) { const int tmId) {

View File

@ -6,6 +6,7 @@
#include <boost/ptr_container/ptr_map.hpp> #include <boost/ptr_container/ptr_map.hpp>
#include <concordia/concordia.hpp> #include <concordia/concordia.hpp>
#include <concordia/concordia_exception.hpp> #include <concordia/concordia_exception.hpp>
#include <concordia/interval.hpp>
#include "unit_dao.hpp" #include "unit_dao.hpp"
#include "simple_search_result.hpp" #include "simple_search_result.hpp"
@ -30,6 +31,10 @@ public:
std::string & pattern, std::string & pattern,
const int tmId); const int tmId);
void concordiaPhraseSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
std::string & pattern,
const std::vector<Interval> & intervals,
const int tmId);
private: private:
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap; boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;

View File

@ -24,4 +24,8 @@ void SimpleSearchResult::addMatchedTargetFragment(const std::pair<int,int> & tar
_targetFragments.push_back(targetFragment); _targetFragments.push_back(targetFragment);
} }
void SimpleSearchResult::offsetPattern(int offset) {
_matchedPatternStart += offset;
_matchedPatternEnd += offset;
}

View File

@ -28,10 +28,18 @@ public:
return _matchedPatternStart; return _matchedPatternStart;
} }
void setMatchedPatternStart(int newStart) {
_matchedPatternStart = newStart;
}
int getMatchedPatternEnd() const { int getMatchedPatternEnd() const {
return _matchedPatternEnd; return _matchedPatternEnd;
} }
void setMatchedPatternEnd(int newEnd) {
_matchedPatternEnd = newEnd;
}
int getMatchedExampleStart() const { int getMatchedExampleStart() const {
return _matchedExampleStart; return _matchedExampleStart;
} }
@ -54,6 +62,8 @@ public:
void addMatchedTargetFragment(const std::pair<int,int> & targetFragment); void addMatchedTargetFragment(const std::pair<int,int> & targetFragment);
void offsetPattern(int offset);
private: private:
int _id; int _id;

View File

@ -116,7 +116,7 @@ void UnitDAO::_getResultsFromFragments(
delete param; delete param;
} }
//TODO now add all target fragments matched with this fragment // now add all target fragments matched with this fragment
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos"; std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
std::vector<QueryParam*> targetParams; std::vector<QueryParam*> targetParams;
targetParams.push_back(new IntParam(fragment.getExampleId())); targetParams.push_back(new IntParam(fragment.getExampleId()));