working concordia searching

This commit is contained in:
rjawor 2015-08-24 19:28:41 +02:00
parent ac7bc4cdbe
commit 14dc4abd56
11 changed files with 191 additions and 43 deletions

View File

@ -12,6 +12,7 @@ add_executable(concordia_server_process
logger.cpp
int_array_param.cpp
simple_search_result.cpp
complete_concordia_search_result.cpp
)
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)

View File

@ -0,0 +1,10 @@
#include "complete_concordia_search_result.hpp"
CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
const double bestOverlayScore):
_bestOverlayScore(bestOverlayScore) {
}
CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() {
}

View File

@ -0,0 +1,32 @@
#ifndef COMPLETE_CONCORDIA_SEARCH_RESULT_HDR
#define COMPLETE_CONCORDIA_SEARCH_RESULT_HDR
#include <vector>
#include "simple_search_result.hpp"
class CompleteConcordiaSearchResult {
public:
/*! Constructor.
*/
CompleteConcordiaSearchResult(const double bestOverlayScore);
/*! Destructor.
*/
virtual ~CompleteConcordiaSearchResult();
const double getBestOverlayScore() {
return _bestOverlayScore;
}
std::vector<SimpleSearchResult> & getBestOverlay() {
return _bestOverlay;
}
private:
double _bestOverlayScore;
std::vector<SimpleSearchResult> _bestOverlay;
};
#endif

View File

@ -8,7 +8,8 @@ JsonGenerator::~JsonGenerator() {
}
void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string message) {
void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::string & message) {
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("error");
@ -17,5 +18,21 @@ void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jso
jsonWriter.EndObject();
}
void JsonGenerator::writeSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const SimpleSearchResult & result) {
jsonWriter.StartObject();
jsonWriter.String("id");
jsonWriter.Int(result.getId());
jsonWriter.String("matchedExampleStart");
jsonWriter.Int(result.getMatchedExampleStart());
jsonWriter.String("matchedExampleEnd");
jsonWriter.Int(result.getMatchedExampleEnd());
jsonWriter.String("sourceSegment");
jsonWriter.String(result.getSourceSegment().c_str());
jsonWriter.String("targetSegment");
jsonWriter.String(result.getTargetSegment().c_str());
jsonWriter.EndObject();
}

View File

@ -5,6 +5,8 @@
#include "rapidjson/writer.h"
#include "simple_search_result.hpp"
class JsonGenerator {
public:
/*! Constructor.
@ -14,7 +16,11 @@ public:
*/
virtual ~JsonGenerator();
static void signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string message);
static void signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const std::string & message);
static void writeSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
const SimpleSearchResult & result);
private:

View File

@ -3,6 +3,8 @@
#include <boost/foreach.hpp>
#include <vector>
#include "json_generator.hpp"
SearcherController::SearcherController(boost::shared_ptr<Concordia> concordia)
throw(ConcordiaException):
_concordia(concordia) {
@ -21,29 +23,32 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
jsonWriter.String("results");
jsonWriter.StartArray();
BOOST_FOREACH(SimpleSearchResult & result, results) {
jsonWriter.StartObject();
jsonWriter.String("id");
jsonWriter.Int(result.getId());
jsonWriter.String("matchedFragmentStart");
jsonWriter.Int(result.getMatchedFragmentStart());
jsonWriter.String("matchedFragmentEnd");
jsonWriter.Int(result.getMatchedFragmentEnd());
jsonWriter.String("sourceSegment");
jsonWriter.String(result.getSourceSegment().c_str());
jsonWriter.String("targetSegment");
jsonWriter.String(result.getTargetSegment().c_str());
jsonWriter.EndObject();
JsonGenerator::writeSearchResult(jsonWriter, result);
}
jsonWriter.EndArray();
jsonWriter.EndObject();
}
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(_concordia->concordiaSearch(pattern));
jsonWriter.StartObject();
jsonWriter.String("status");
jsonWriter.String("error");
jsonWriter.String("data");
jsonWriter.String("concordia searching not yet implemented");
jsonWriter.String("success");
jsonWriter.String("result");
jsonWriter.StartObject();
jsonWriter.String("bestOverlayScore");
jsonWriter.Double(result.getBestOverlayScore());
jsonWriter.String("bestOverlay");
jsonWriter.StartArray();
BOOST_FOREACH(SimpleSearchResult & simpleResult, result.getBestOverlay()) {
JsonGenerator::writeSearchResult(jsonWriter, simpleResult);
}
jsonWriter.EndArray();
jsonWriter.EndObject();
jsonWriter.EndObject();
}

View File

@ -2,13 +2,17 @@
SimpleSearchResult::SimpleSearchResult(
const int id,
const int matchedFragmentStart,
const int matchedFragmentEnd,
const int matchedPatternStart,
const int matchedPatternEnd,
const int matchedExampleStart,
const int matchedExampleEnd,
const std::string & sourceSegment,
const std::string & targetSegment):
_id(id),
_matchedFragmentStart(matchedFragmentStart),
_matchedFragmentEnd(matchedFragmentEnd),
_matchedPatternStart(matchedPatternStart),
_matchedPatternEnd(matchedPatternEnd),
_matchedExampleStart(matchedExampleStart),
_matchedExampleEnd(matchedExampleEnd),
_sourceSegment(sourceSegment),
_targetSegment(targetSegment) {
}

View File

@ -8,8 +8,10 @@ public:
/*! Constructor.
*/
SimpleSearchResult(const int id,
const int matchedFragmentStart,
const int matchedFragmentEnd,
const int matchedPatternStart,
const int matchedPatternEnd,
const int matchedExampleStart,
const int matchedExampleEnd,
const std::string & sourceSegment,
const std::string & targetSegment
);
@ -17,32 +19,44 @@ public:
*/
virtual ~SimpleSearchResult();
const int getId() {
int getId() const {
return _id;
}
const int getMatchedFragmentStart() {
return _matchedFragmentStart;
int getMatchedPatternStart() const {
return _matchedPatternStart;
}
const int getMatchedFragmentEnd() {
return _matchedFragmentEnd;
int getMatchedPatternEnd() const {
return _matchedPatternEnd;
}
const std::string & getSourceSegment() {
int getMatchedExampleStart() const {
return _matchedExampleStart;
}
int getMatchedExampleEnd() const {
return _matchedExampleEnd;
}
const std::string & getSourceSegment() const {
return _sourceSegment;
}
const std::string & getTargetSegment() {
const std::string & getTargetSegment() const {
return _targetSegment;
}
private:
int _id;
int _matchedFragmentStart;
int _matchedPatternStart;
int _matchedFragmentEnd;
int _matchedPatternEnd;
int _matchedExampleStart;
int _matchedExampleEnd;
std::string _sourceSegment;

View File

@ -45,31 +45,57 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
}
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults) {
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & fragments) {
std::vector<SimpleSearchResult> results;
TokenizedSentence ts("");
_getResultsFromFragments(results, fragments, ts);
return results;
}
CompleteConcordiaSearchResult UnitDAO::getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult) {
CompleteConcordiaSearchResult result(rawConcordiaResult->getBestOverlayScore());
_getResultsFromFragments(result.getBestOverlay(),
rawConcordiaResult->getBestOverlay(),
rawConcordiaResult->getTokenizedPattern());
return result;
}
void UnitDAO::_getResultsFromFragments(
std::vector<SimpleSearchResult> & results,
const std::vector<MatchedPatternFragment> & fragments,
const TokenizedSentence & tokenizedPattern) {
DBconnection connection;
connection.startTransaction();
BOOST_FOREACH(const MatchedPatternFragment & fragment, concordiaResults) {
BOOST_FOREACH(const MatchedPatternFragment & fragment, fragments) {
int matchedPatternStart = 0;
int matchedPatternEnd = 0;
if (tokenizedPattern.getTokens().size() > 0) {
// if it is concordia searching
matchedPatternStart = tokenizedPattern.getTokens().at(fragment.getStart()).getStart();
matchedPatternEnd = tokenizedPattern.getTokens().at(fragment.getStart()+fragment.getMatchedLength() - 1).getEnd();
}
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
std::vector<QueryParam*> params;
params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength())));
params.push_back(new IntParam(fragment.getExampleId()));
PGresult * result = connection.execute(query, params);
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0),
connection.getIntValue(result,0,3),
connection.getIntValue(result,0,4),
connection.getStringValue(result,0,1),
connection.getStringValue(result,0,2)));
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0), // example id
matchedPatternStart,
matchedPatternEnd,
connection.getIntValue(result,0,3), // matched example start
connection.getIntValue(result,0,4), // matched example end
connection.getStringValue(result,0,1), // source segment
connection.getStringValue(result,0,2))); // target segment
connection.clearResult(result);
BOOST_FOREACH (QueryParam * param, params) {
delete param;
}
}
connection.endTransaction();
return results;
}

View File

@ -8,9 +8,11 @@
#include <concordia/tokenized_sentence.hpp>
#include <concordia/substring_occurence.hpp>
#include <concordia/matched_pattern_fragment.hpp>
#include <concordia/concordia_search_result.hpp>
#include <boost/shared_ptr.hpp>
#include "simple_search_result.hpp"
#include "complete_concordia_search_result.hpp"
#include "db_connection.hpp"
class UnitDAO {
@ -32,9 +34,15 @@ public:
const std::vector<std::string> & targetSentences,
const std::vector<int> & tmIds);
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults);
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
private:
void _getResultsFromFragments(std::vector<SimpleSearchResult> & results,
const std::vector<MatchedPatternFragment> & fragments,
const TokenizedSentence & tokenizedPattern);
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
int _addSingleSentence(

25
tests/concordiaSearch.py Executable file
View File

@ -0,0 +1,25 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import unittest
import json
import urllib2
import sys
import time
data = {
'operation': 'concordiaSearch',
'pattern':sys.argv[1]
}
start = time.time()
req = urllib2.Request('http://localhost')
req.add_header('Content-Type', 'application/json')
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
end = time.time()
print "Execution time: %.4f seconds." % (end-start)
print "Result: "
print response