working concordia searching
This commit is contained in:
parent
ac7bc4cdbe
commit
14dc4abd56
@ -12,6 +12,7 @@ add_executable(concordia_server_process
|
|||||||
logger.cpp
|
logger.cpp
|
||||||
int_array_param.cpp
|
int_array_param.cpp
|
||||||
simple_search_result.cpp
|
simple_search_result.cpp
|
||||||
|
complete_concordia_search_result.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)
|
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)
|
||||||
|
|
||||||
|
10
concordia-server/complete_concordia_search_result.cpp
Normal file
10
concordia-server/complete_concordia_search_result.cpp
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#include "complete_concordia_search_result.hpp"
|
||||||
|
|
||||||
|
CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
|
||||||
|
const double bestOverlayScore):
|
||||||
|
_bestOverlayScore(bestOverlayScore) {
|
||||||
|
}
|
||||||
|
|
||||||
|
CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() {
|
||||||
|
}
|
||||||
|
|
32
concordia-server/complete_concordia_search_result.hpp
Normal file
32
concordia-server/complete_concordia_search_result.hpp
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#ifndef COMPLETE_CONCORDIA_SEARCH_RESULT_HDR
|
||||||
|
#define COMPLETE_CONCORDIA_SEARCH_RESULT_HDR
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "simple_search_result.hpp"
|
||||||
|
|
||||||
|
class CompleteConcordiaSearchResult {
|
||||||
|
public:
|
||||||
|
/*! Constructor.
|
||||||
|
*/
|
||||||
|
CompleteConcordiaSearchResult(const double bestOverlayScore);
|
||||||
|
|
||||||
|
/*! Destructor.
|
||||||
|
*/
|
||||||
|
virtual ~CompleteConcordiaSearchResult();
|
||||||
|
|
||||||
|
const double getBestOverlayScore() {
|
||||||
|
return _bestOverlayScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<SimpleSearchResult> & getBestOverlay() {
|
||||||
|
return _bestOverlay;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
double _bestOverlayScore;
|
||||||
|
|
||||||
|
std::vector<SimpleSearchResult> _bestOverlay;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -8,7 +8,8 @@ JsonGenerator::~JsonGenerator() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string message) {
|
void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
|
const std::string & message) {
|
||||||
jsonWriter.StartObject();
|
jsonWriter.StartObject();
|
||||||
jsonWriter.String("status");
|
jsonWriter.String("status");
|
||||||
jsonWriter.String("error");
|
jsonWriter.String("error");
|
||||||
@ -16,6 +17,22 @@ void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jso
|
|||||||
jsonWriter.String(message.c_str());
|
jsonWriter.String(message.c_str());
|
||||||
jsonWriter.EndObject();
|
jsonWriter.EndObject();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void JsonGenerator::writeSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
|
const SimpleSearchResult & result) {
|
||||||
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("id");
|
||||||
|
jsonWriter.Int(result.getId());
|
||||||
|
jsonWriter.String("matchedExampleStart");
|
||||||
|
jsonWriter.Int(result.getMatchedExampleStart());
|
||||||
|
jsonWriter.String("matchedExampleEnd");
|
||||||
|
jsonWriter.Int(result.getMatchedExampleEnd());
|
||||||
|
jsonWriter.String("sourceSegment");
|
||||||
|
jsonWriter.String(result.getSourceSegment().c_str());
|
||||||
|
jsonWriter.String("targetSegment");
|
||||||
|
jsonWriter.String(result.getTargetSegment().c_str());
|
||||||
|
jsonWriter.EndObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
|
|
||||||
#include "rapidjson/writer.h"
|
#include "rapidjson/writer.h"
|
||||||
|
|
||||||
|
#include "simple_search_result.hpp"
|
||||||
|
|
||||||
class JsonGenerator {
|
class JsonGenerator {
|
||||||
public:
|
public:
|
||||||
/*! Constructor.
|
/*! Constructor.
|
||||||
@ -14,7 +16,11 @@ public:
|
|||||||
*/
|
*/
|
||||||
virtual ~JsonGenerator();
|
virtual ~JsonGenerator();
|
||||||
|
|
||||||
static void signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string message);
|
static void signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
|
const std::string & message);
|
||||||
|
|
||||||
|
static void writeSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||||
|
const SimpleSearchResult & result);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
#include <boost/foreach.hpp>
|
#include <boost/foreach.hpp>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "json_generator.hpp"
|
||||||
|
|
||||||
SearcherController::SearcherController(boost::shared_ptr<Concordia> concordia)
|
SearcherController::SearcherController(boost::shared_ptr<Concordia> concordia)
|
||||||
throw(ConcordiaException):
|
throw(ConcordiaException):
|
||||||
_concordia(concordia) {
|
_concordia(concordia) {
|
||||||
@ -21,29 +23,32 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
|
|||||||
jsonWriter.String("results");
|
jsonWriter.String("results");
|
||||||
jsonWriter.StartArray();
|
jsonWriter.StartArray();
|
||||||
BOOST_FOREACH(SimpleSearchResult & result, results) {
|
BOOST_FOREACH(SimpleSearchResult & result, results) {
|
||||||
jsonWriter.StartObject();
|
JsonGenerator::writeSearchResult(jsonWriter, result);
|
||||||
jsonWriter.String("id");
|
|
||||||
jsonWriter.Int(result.getId());
|
|
||||||
jsonWriter.String("matchedFragmentStart");
|
|
||||||
jsonWriter.Int(result.getMatchedFragmentStart());
|
|
||||||
jsonWriter.String("matchedFragmentEnd");
|
|
||||||
jsonWriter.Int(result.getMatchedFragmentEnd());
|
|
||||||
jsonWriter.String("sourceSegment");
|
|
||||||
jsonWriter.String(result.getSourceSegment().c_str());
|
|
||||||
jsonWriter.String("targetSegment");
|
|
||||||
jsonWriter.String(result.getTargetSegment().c_str());
|
|
||||||
jsonWriter.EndObject();
|
|
||||||
}
|
}
|
||||||
jsonWriter.EndArray();
|
jsonWriter.EndArray();
|
||||||
jsonWriter.EndObject();
|
jsonWriter.EndObject();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
|
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
|
||||||
|
|
||||||
|
CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(_concordia->concordiaSearch(pattern));
|
||||||
|
|
||||||
jsonWriter.StartObject();
|
jsonWriter.StartObject();
|
||||||
jsonWriter.String("status");
|
jsonWriter.String("status");
|
||||||
jsonWriter.String("error");
|
jsonWriter.String("success");
|
||||||
jsonWriter.String("data");
|
jsonWriter.String("result");
|
||||||
jsonWriter.String("concordia searching not yet implemented");
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("bestOverlayScore");
|
||||||
|
jsonWriter.Double(result.getBestOverlayScore());
|
||||||
|
jsonWriter.String("bestOverlay");
|
||||||
|
jsonWriter.StartArray();
|
||||||
|
BOOST_FOREACH(SimpleSearchResult & simpleResult, result.getBestOverlay()) {
|
||||||
|
JsonGenerator::writeSearchResult(jsonWriter, simpleResult);
|
||||||
|
}
|
||||||
|
jsonWriter.EndArray();
|
||||||
|
jsonWriter.EndObject();
|
||||||
|
|
||||||
|
|
||||||
jsonWriter.EndObject();
|
jsonWriter.EndObject();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,13 +2,17 @@
|
|||||||
|
|
||||||
SimpleSearchResult::SimpleSearchResult(
|
SimpleSearchResult::SimpleSearchResult(
|
||||||
const int id,
|
const int id,
|
||||||
const int matchedFragmentStart,
|
const int matchedPatternStart,
|
||||||
const int matchedFragmentEnd,
|
const int matchedPatternEnd,
|
||||||
|
const int matchedExampleStart,
|
||||||
|
const int matchedExampleEnd,
|
||||||
const std::string & sourceSegment,
|
const std::string & sourceSegment,
|
||||||
const std::string & targetSegment):
|
const std::string & targetSegment):
|
||||||
_id(id),
|
_id(id),
|
||||||
_matchedFragmentStart(matchedFragmentStart),
|
_matchedPatternStart(matchedPatternStart),
|
||||||
_matchedFragmentEnd(matchedFragmentEnd),
|
_matchedPatternEnd(matchedPatternEnd),
|
||||||
|
_matchedExampleStart(matchedExampleStart),
|
||||||
|
_matchedExampleEnd(matchedExampleEnd),
|
||||||
_sourceSegment(sourceSegment),
|
_sourceSegment(sourceSegment),
|
||||||
_targetSegment(targetSegment) {
|
_targetSegment(targetSegment) {
|
||||||
}
|
}
|
||||||
|
@ -8,8 +8,10 @@ public:
|
|||||||
/*! Constructor.
|
/*! Constructor.
|
||||||
*/
|
*/
|
||||||
SimpleSearchResult(const int id,
|
SimpleSearchResult(const int id,
|
||||||
const int matchedFragmentStart,
|
const int matchedPatternStart,
|
||||||
const int matchedFragmentEnd,
|
const int matchedPatternEnd,
|
||||||
|
const int matchedExampleStart,
|
||||||
|
const int matchedExampleEnd,
|
||||||
const std::string & sourceSegment,
|
const std::string & sourceSegment,
|
||||||
const std::string & targetSegment
|
const std::string & targetSegment
|
||||||
);
|
);
|
||||||
@ -17,32 +19,44 @@ public:
|
|||||||
*/
|
*/
|
||||||
virtual ~SimpleSearchResult();
|
virtual ~SimpleSearchResult();
|
||||||
|
|
||||||
const int getId() {
|
int getId() const {
|
||||||
return _id;
|
return _id;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int getMatchedFragmentStart() {
|
int getMatchedPatternStart() const {
|
||||||
return _matchedFragmentStart;
|
return _matchedPatternStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int getMatchedFragmentEnd() {
|
int getMatchedPatternEnd() const {
|
||||||
return _matchedFragmentEnd;
|
return _matchedPatternEnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string & getSourceSegment() {
|
int getMatchedExampleStart() const {
|
||||||
|
return _matchedExampleStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMatchedExampleEnd() const {
|
||||||
|
return _matchedExampleEnd;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string & getSourceSegment() const {
|
||||||
return _sourceSegment;
|
return _sourceSegment;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string & getTargetSegment() {
|
const std::string & getTargetSegment() const {
|
||||||
return _targetSegment;
|
return _targetSegment;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int _id;
|
int _id;
|
||||||
|
|
||||||
int _matchedFragmentStart;
|
int _matchedPatternStart;
|
||||||
|
|
||||||
int _matchedFragmentEnd;
|
int _matchedPatternEnd;
|
||||||
|
|
||||||
|
int _matchedExampleStart;
|
||||||
|
|
||||||
|
int _matchedExampleEnd;
|
||||||
|
|
||||||
std::string _sourceSegment;
|
std::string _sourceSegment;
|
||||||
|
|
||||||
|
@ -45,31 +45,57 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults) {
|
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & fragments) {
|
||||||
std::vector<SimpleSearchResult> results;
|
std::vector<SimpleSearchResult> results;
|
||||||
|
TokenizedSentence ts("");
|
||||||
|
_getResultsFromFragments(results, fragments, ts);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
CompleteConcordiaSearchResult UnitDAO::getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult) {
|
||||||
|
CompleteConcordiaSearchResult result(rawConcordiaResult->getBestOverlayScore());
|
||||||
|
_getResultsFromFragments(result.getBestOverlay(),
|
||||||
|
rawConcordiaResult->getBestOverlay(),
|
||||||
|
rawConcordiaResult->getTokenizedPattern());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnitDAO::_getResultsFromFragments(
|
||||||
|
std::vector<SimpleSearchResult> & results,
|
||||||
|
const std::vector<MatchedPatternFragment> & fragments,
|
||||||
|
const TokenizedSentence & tokenizedPattern) {
|
||||||
|
|
||||||
DBconnection connection;
|
DBconnection connection;
|
||||||
connection.startTransaction();
|
connection.startTransaction();
|
||||||
|
|
||||||
BOOST_FOREACH(const MatchedPatternFragment & fragment, concordiaResults) {
|
BOOST_FOREACH(const MatchedPatternFragment & fragment, fragments) {
|
||||||
|
int matchedPatternStart = 0;
|
||||||
|
int matchedPatternEnd = 0;
|
||||||
|
if (tokenizedPattern.getTokens().size() > 0) {
|
||||||
|
// if it is concordia searching
|
||||||
|
matchedPatternStart = tokenizedPattern.getTokens().at(fragment.getStart()).getStart();
|
||||||
|
matchedPatternEnd = tokenizedPattern.getTokens().at(fragment.getStart()+fragment.getMatchedLength() - 1).getEnd();
|
||||||
|
}
|
||||||
|
|
||||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
||||||
std::vector<QueryParam*> params;
|
std::vector<QueryParam*> params;
|
||||||
params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
|
params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
|
||||||
params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength())));
|
params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength())));
|
||||||
params.push_back(new IntParam(fragment.getExampleId()));
|
params.push_back(new IntParam(fragment.getExampleId()));
|
||||||
PGresult * result = connection.execute(query, params);
|
PGresult * result = connection.execute(query, params);
|
||||||
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0),
|
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0), // example id
|
||||||
connection.getIntValue(result,0,3),
|
matchedPatternStart,
|
||||||
connection.getIntValue(result,0,4),
|
matchedPatternEnd,
|
||||||
connection.getStringValue(result,0,1),
|
connection.getIntValue(result,0,3), // matched example start
|
||||||
connection.getStringValue(result,0,2)));
|
connection.getIntValue(result,0,4), // matched example end
|
||||||
|
connection.getStringValue(result,0,1), // source segment
|
||||||
|
connection.getStringValue(result,0,2))); // target segment
|
||||||
connection.clearResult(result);
|
connection.clearResult(result);
|
||||||
BOOST_FOREACH (QueryParam * param, params) {
|
BOOST_FOREACH (QueryParam * param, params) {
|
||||||
delete param;
|
delete param;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
connection.endTransaction();
|
connection.endTransaction();
|
||||||
return results;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,9 +8,11 @@
|
|||||||
#include <concordia/tokenized_sentence.hpp>
|
#include <concordia/tokenized_sentence.hpp>
|
||||||
#include <concordia/substring_occurence.hpp>
|
#include <concordia/substring_occurence.hpp>
|
||||||
#include <concordia/matched_pattern_fragment.hpp>
|
#include <concordia/matched_pattern_fragment.hpp>
|
||||||
|
#include <concordia/concordia_search_result.hpp>
|
||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
|
|
||||||
#include "simple_search_result.hpp"
|
#include "simple_search_result.hpp"
|
||||||
|
#include "complete_concordia_search_result.hpp"
|
||||||
#include "db_connection.hpp"
|
#include "db_connection.hpp"
|
||||||
|
|
||||||
class UnitDAO {
|
class UnitDAO {
|
||||||
@ -32,9 +34,15 @@ public:
|
|||||||
const std::vector<std::string> & targetSentences,
|
const std::vector<std::string> & targetSentences,
|
||||||
const std::vector<int> & tmIds);
|
const std::vector<int> & tmIds);
|
||||||
|
|
||||||
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults);
|
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
|
||||||
|
|
||||||
|
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void _getResultsFromFragments(std::vector<SimpleSearchResult> & results,
|
||||||
|
const std::vector<MatchedPatternFragment> & fragments,
|
||||||
|
const TokenizedSentence & tokenizedPattern);
|
||||||
|
|
||||||
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
|
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
|
||||||
|
|
||||||
int _addSingleSentence(
|
int _addSingleSentence(
|
||||||
|
25
tests/concordiaSearch.py
Executable file
25
tests/concordiaSearch.py
Executable file
@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import json
|
||||||
|
import urllib2
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'operation': 'concordiaSearch',
|
||||||
|
'pattern':sys.argv[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
req = urllib2.Request('http://localhost')
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print "Execution time: %.4f seconds." % (end-start)
|
||||||
|
print "Result: "
|
||||||
|
print response
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user