phrase search
This commit is contained in:
parent
c3df18c110
commit
b867f8dacc
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,6 +1,7 @@
|
||||
build/
|
||||
logs/concordia-server.log
|
||||
logs/pgbouncer.log
|
||||
logs/phrase-searches.json
|
||||
concordia.cfg
|
||||
concordia-server/config.hpp
|
||||
index/
|
||||
|
@ -62,6 +62,7 @@ configure_file (
|
||||
|
||||
set(CONFIG_FILE_PATH "${concordia-server_SOURCE_DIR}/concordia.cfg")
|
||||
set(LOG_FILE_PATH "${concordia-server_SOURCE_DIR}/logs/concordia-server.log")
|
||||
set(PHRASE_LOG_FILE_PATH "${concordia-server_SOURCE_DIR}/logs/phrase-searches.json")
|
||||
|
||||
# --------------
|
||||
# db settings
|
||||
|
@ -34,7 +34,13 @@
|
||||
display:none;
|
||||
}
|
||||
|
||||
#phrase-icon {
|
||||
#phrase-on-icon {
|
||||
cursor:pointer;
|
||||
vertical-align:middle;
|
||||
margin-right:20px;
|
||||
}
|
||||
|
||||
#phrase-off-icon {
|
||||
cursor:pointer;
|
||||
vertical-align:middle;
|
||||
margin-right:20px;
|
||||
|
BIN
cat/images/switchOff.png
Normal file
BIN
cat/images/switchOff.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.3 KiB |
BIN
cat/images/switchOn.png
Normal file
BIN
cat/images/switchOn.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.3 KiB |
@ -34,7 +34,12 @@ function phraseSearchHandle(tmid, intervals) {
|
||||
type: 'post',
|
||||
dataType: 'json',
|
||||
success: function (data) {
|
||||
$('#result').html(renderResult(data));
|
||||
if (data['found']) {
|
||||
$('#result').html(renderResult(data));
|
||||
} else {
|
||||
$('#phrase-prompt').html('<b>Your phrase was not found. Try selecting another phrase: </b>').fadeOut(200).fadeIn(200);
|
||||
clearTextSelections();
|
||||
}
|
||||
},
|
||||
data: concordiaRequest
|
||||
});
|
||||
@ -43,10 +48,13 @@ function phraseSearchHandle(tmid, intervals) {
|
||||
function renderResult(data) {
|
||||
var res = '';
|
||||
|
||||
var score = data['result']['bestOverlayScore']*100;
|
||||
|
||||
res += '<div id="result-score">Concordia score: <b>'+score.toFixed(0)+'%</b></div>';
|
||||
res += '<div id="phrase-selection"><img id="phrase-icon" src="../images/phrase.png" alt="phrase search" onclick="togglePhraseSearchMode()" title="search for phrases"/><span id="phrase-prompt" class="hidden">Select continuous phrase: <img id="cancel-button" src="../images/cancel-button.png" alt="cancel phrase search" onclick="togglePhraseSearchMode()" title="cancel searching for phrases"/></span></div>';
|
||||
if (typeof(data['result']['bestOverlayScore']) === 'undefined') {
|
||||
// ignore
|
||||
} else {
|
||||
var score = data['result']['bestOverlayScore']*100;
|
||||
res += '<div id="result-score">Concordia score: <b>'+score.toFixed(0)+'%</b></div>';
|
||||
}
|
||||
res += '<div id="phrase-selection">Phrase search mode: <img id="phrase-off-icon" src="../images/switchOff.png" alt="enter phrase search mode" onclick="togglePhraseSearchMode()" title="search for phrases"/><img class="hidden" id="phrase-on-icon" src="../images/switchOn.png" alt="cancel phrase search" onclick="togglePhraseSearchMode()" title="cancel phrase search"/><span id="phrase-prompt" class="hidden">Select continuous phrase: </span></div>';
|
||||
|
||||
var inputSentence = $('#search-input').val();
|
||||
var markedSentence = '';
|
||||
@ -104,7 +112,8 @@ function renderFragment(fragment, number) {
|
||||
|
||||
function togglePhraseSearchMode() {
|
||||
$('#result-sentence').toggleClass('phrase-mode');
|
||||
$('#phrase-icon').toggleClass('hidden');
|
||||
$('#phrase-on-icon').toggleClass('hidden');
|
||||
$('#phrase-off-icon').toggleClass('hidden');
|
||||
$('#phrase-prompt').toggleClass('hidden');
|
||||
clearTextSelections();
|
||||
}
|
||||
|
@ -8,4 +8,4 @@ prompt@#@Enter search pattern (Croatian sentence):
|
||||
suggestion@#@Kazna medijskom mogulu obnovila raspravu u Makedoniji
|
||||
suggestion@#@Član Predsjedništva BiH Komšić podnio ostavku u svojoj stranci
|
||||
suggestion@#@ozbiljno analizira proces
|
||||
suggestion@#@Kazna medijskom podnio ostavku ozbiljno analizira proces
|
||||
suggestion@#@Nagrada koja nosi ime po našem velikom snimatelju dodjeljuje se za izniman doprinos filmskoj umjetnosti.
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include "complete_concordia_search_result.hpp"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
|
||||
const double bestOverlayScore):
|
||||
_bestOverlayScore(bestOverlayScore) {
|
||||
@ -8,3 +10,9 @@ CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
|
||||
CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() {
|
||||
}
|
||||
|
||||
void CompleteConcordiaSearchResult::offsetPattern(int offset) {
|
||||
BOOST_FOREACH(SimpleSearchResult & simpleResult, _bestOverlay) {
|
||||
simpleResult.offsetPattern(offset);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -23,6 +23,8 @@ public:
|
||||
return _bestOverlay;
|
||||
}
|
||||
|
||||
void offsetPattern(int offset);
|
||||
|
||||
private:
|
||||
double _bestOverlayScore;
|
||||
|
||||
|
@ -2,6 +2,9 @@
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <ctime>
|
||||
|
||||
#include <concordia/interval.hpp>
|
||||
|
||||
@ -37,7 +40,7 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
outputString << "Content-type: application/json\r\n\r\n";
|
||||
try {
|
||||
rapidjson::Document d;
|
||||
Logger::logString("concordia request string", requestString);
|
||||
// Logger::logString("concordia request string", requestString);
|
||||
bool hasError = d.Parse(requestString.c_str()).HasParseError();
|
||||
|
||||
if (hasError) {
|
||||
@ -106,15 +109,13 @@ std::string ConcordiaServer::handleRequest(std::string & requestString) {
|
||||
std::string pattern = _getStringParameter(d, PATTERN_PARAM);
|
||||
int tmId = _getIntParameter(d, TM_ID_PARAM);
|
||||
Logger::logString("concordia phrase search pattern", pattern);
|
||||
_logPhrase(requestString);
|
||||
std::vector<Interval> intervals;
|
||||
const rapidjson::Value & intervalsArray = d[INTERVALS_PARAM];
|
||||
for (rapidjson::SizeType i = 0; i < intervalsArray.Size(); i++) {
|
||||
Logger::logInt("interval size", intervalsArray[i].Size());
|
||||
Logger::logInt("search interval start", intervalsArray[i][0].GetInt());
|
||||
Logger::logInt("search interval end", intervalsArray[i][1].GetInt());
|
||||
intervals.push_back(Interval(intervalsArray[i][0].GetInt(), intervalsArray[i][1].GetInt()));
|
||||
}
|
||||
|
||||
//_searcherController->concordiaPhraseSearch(jsonWriter, pattern, tmId);
|
||||
_searcherController->concordiaPhraseSearch(jsonWriter, pattern, intervals, tmId);
|
||||
} else if (operation == ADD_TM_OP) {
|
||||
int sourceLangId = _getIntParameter(d, SOURCE_LANG_PARAM);
|
||||
int targetLangId = _getIntParameter(d, TARGET_LANG_PARAM);
|
||||
@ -177,3 +178,9 @@ void ConcordiaServer::_addTm(int tmId) {
|
||||
_concordiasMap->insert(tmId, new Concordia(indexPath.str(), _configFilePath));
|
||||
}
|
||||
|
||||
void ConcordiaServer::_logPhrase(std::string phraseString) {
|
||||
std::ofstream logFile;
|
||||
logFile.open(PHRASE_LOG_FILE_PATH, std::ios::out | std::ios::app);
|
||||
logFile << phraseString.substr(0, phraseString.size()-1) << ", \"timestamp\":" << std::time(0) << "}\n";
|
||||
logFile.close();
|
||||
}
|
||||
|
@ -30,6 +30,8 @@ public:
|
||||
std::string handleRequest(std::string & requestString);
|
||||
|
||||
private:
|
||||
void _logPhrase(std::string phraseString);
|
||||
|
||||
std::string _getStringParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
|
||||
|
||||
int _getIntParameter(rapidjson::Document & d, const char * name) throw (ConcordiaException);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#define CONFIG_FILE_PATH "@CONFIG_FILE_PATH@"
|
||||
#define LOG_FILE_PATH "@LOG_FILE_PATH@"
|
||||
#define PHRASE_LOG_FILE_PATH "@PHRASE_LOG_FILE_PATH@"
|
||||
#define INDEX_DIRECTORY "@INDEX_DIRECTORY@"
|
||||
|
||||
// database connection information
|
||||
|
@ -36,6 +36,69 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
|
||||
}
|
||||
}
|
||||
|
||||
void SearcherController::concordiaPhraseSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & pattern,
|
||||
const std::vector<Interval> & intervals,
|
||||
const int tmId) {
|
||||
boost::ptr_map<int,Concordia>::iterator it = _concordiasMap->find(tmId);
|
||||
if (it != _concordiasMap->end()) {
|
||||
if (intervals.size() > 0) {
|
||||
std::string shortPattern = pattern.substr(intervals[0].getStart(), intervals[0].getEnd() - intervals[0].getStart());
|
||||
|
||||
std::vector<SimpleSearchResult> shortPatternResults = _unitDAO.getSearchResults(it->second->simpleSearch(shortPattern));
|
||||
|
||||
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.String("found");
|
||||
if (shortPatternResults.size() > 0) {
|
||||
jsonWriter.Bool(true);
|
||||
|
||||
|
||||
std::vector<SimpleSearchResult> bestOverlay;
|
||||
|
||||
int currStart = 0;
|
||||
BOOST_FOREACH(const Interval & interval, intervals) {
|
||||
CompleteConcordiaSearchResult restResult = _unitDAO.getConcordiaResult(
|
||||
it->second->concordiaSearch(pattern.substr(currStart, interval.getStart()-currStart)));
|
||||
restResult.offsetPattern(currStart);
|
||||
bestOverlay.insert(bestOverlay.end(), restResult.getBestOverlay().begin(), restResult.getBestOverlay().end());
|
||||
|
||||
SimpleSearchResult shortPatternresult = shortPatternResults[0];
|
||||
shortPatternresult.setMatchedPatternStart(interval.getStart());
|
||||
shortPatternresult.setMatchedPatternEnd(interval.getEnd());
|
||||
bestOverlay.push_back(shortPatternresult);
|
||||
currStart = interval.getEnd();
|
||||
}
|
||||
CompleteConcordiaSearchResult lastRestResult = _unitDAO.getConcordiaResult(
|
||||
it->second->concordiaSearch(pattern.substr(currStart)));
|
||||
lastRestResult.offsetPattern(currStart);
|
||||
bestOverlay.insert(bestOverlay.end(), lastRestResult.getBestOverlay().begin(), lastRestResult.getBestOverlay().end());
|
||||
|
||||
jsonWriter.String("result");
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("bestOverlay");
|
||||
jsonWriter.StartArray();
|
||||
BOOST_FOREACH(SimpleSearchResult & simpleResult, bestOverlay) {
|
||||
JsonGenerator::writeSearchResult(jsonWriter, simpleResult);
|
||||
}
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
jsonWriter.Bool(false);
|
||||
}
|
||||
jsonWriter.EndObject();
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no intervals for phrase search");
|
||||
}
|
||||
} else {
|
||||
JsonGenerator::signalError(jsonWriter, "no such tm!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & pattern,
|
||||
const int tmId) {
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <boost/ptr_container/ptr_map.hpp>
|
||||
#include <concordia/concordia.hpp>
|
||||
#include <concordia/concordia_exception.hpp>
|
||||
#include <concordia/interval.hpp>
|
||||
|
||||
#include "unit_dao.hpp"
|
||||
#include "simple_search_result.hpp"
|
||||
@ -30,6 +31,10 @@ public:
|
||||
std::string & pattern,
|
||||
const int tmId);
|
||||
|
||||
void concordiaPhraseSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
std::string & pattern,
|
||||
const std::vector<Interval> & intervals,
|
||||
const int tmId);
|
||||
private:
|
||||
|
||||
boost::shared_ptr<boost::ptr_map<int,Concordia> > _concordiasMap;
|
||||
|
@ -24,4 +24,8 @@ void SimpleSearchResult::addMatchedTargetFragment(const std::pair<int,int> & tar
|
||||
_targetFragments.push_back(targetFragment);
|
||||
}
|
||||
|
||||
void SimpleSearchResult::offsetPattern(int offset) {
|
||||
_matchedPatternStart += offset;
|
||||
_matchedPatternEnd += offset;
|
||||
}
|
||||
|
||||
|
@ -28,10 +28,18 @@ public:
|
||||
return _matchedPatternStart;
|
||||
}
|
||||
|
||||
void setMatchedPatternStart(int newStart) {
|
||||
_matchedPatternStart = newStart;
|
||||
}
|
||||
|
||||
int getMatchedPatternEnd() const {
|
||||
return _matchedPatternEnd;
|
||||
}
|
||||
|
||||
void setMatchedPatternEnd(int newEnd) {
|
||||
_matchedPatternEnd = newEnd;
|
||||
}
|
||||
|
||||
int getMatchedExampleStart() const {
|
||||
return _matchedExampleStart;
|
||||
}
|
||||
@ -54,6 +62,8 @@ public:
|
||||
|
||||
void addMatchedTargetFragment(const std::pair<int,int> & targetFragment);
|
||||
|
||||
void offsetPattern(int offset);
|
||||
|
||||
private:
|
||||
int _id;
|
||||
|
||||
|
@ -116,7 +116,7 @@ void UnitDAO::_getResultsFromFragments(
|
||||
delete param;
|
||||
}
|
||||
|
||||
//TODO now add all target fragments matched with this fragment
|
||||
// now add all target fragments matched with this fragment
|
||||
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
|
||||
std::vector<QueryParam*> targetParams;
|
||||
targetParams.push_back(new IntParam(fragment.getExampleId()));
|
||||
|
Loading…
Reference in New Issue
Block a user