working concordia searching
This commit is contained in:
parent
ac7bc4cdbe
commit
14dc4abd56
@ -12,6 +12,7 @@ add_executable(concordia_server_process
|
||||
logger.cpp
|
||||
int_array_param.cpp
|
||||
simple_search_result.cpp
|
||||
complete_concordia_search_result.cpp
|
||||
)
|
||||
target_link_libraries(concordia_server_process fcgi fcgi++ pq concordia config++ log4cpp ${Boost_LIBRARIES} divsufsort utf8case)
|
||||
|
||||
|
10
concordia-server/complete_concordia_search_result.cpp
Normal file
10
concordia-server/complete_concordia_search_result.cpp
Normal file
@ -0,0 +1,10 @@
|
||||
#include "complete_concordia_search_result.hpp"
|
||||
|
||||
CompleteConcordiaSearchResult::CompleteConcordiaSearchResult(
|
||||
const double bestOverlayScore):
|
||||
_bestOverlayScore(bestOverlayScore) {
|
||||
}
|
||||
|
||||
CompleteConcordiaSearchResult::~CompleteConcordiaSearchResult() {
|
||||
}
|
||||
|
32
concordia-server/complete_concordia_search_result.hpp
Normal file
32
concordia-server/complete_concordia_search_result.hpp
Normal file
@ -0,0 +1,32 @@
|
||||
#ifndef COMPLETE_CONCORDIA_SEARCH_RESULT_HDR
|
||||
#define COMPLETE_CONCORDIA_SEARCH_RESULT_HDR
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "simple_search_result.hpp"
|
||||
|
||||
class CompleteConcordiaSearchResult {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
CompleteConcordiaSearchResult(const double bestOverlayScore);
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~CompleteConcordiaSearchResult();
|
||||
|
||||
const double getBestOverlayScore() {
|
||||
return _bestOverlayScore;
|
||||
}
|
||||
|
||||
std::vector<SimpleSearchResult> & getBestOverlay() {
|
||||
return _bestOverlay;
|
||||
}
|
||||
|
||||
private:
|
||||
double _bestOverlayScore;
|
||||
|
||||
std::vector<SimpleSearchResult> _bestOverlay;
|
||||
};
|
||||
|
||||
#endif
|
@ -8,7 +8,8 @@ JsonGenerator::~JsonGenerator() {
|
||||
}
|
||||
|
||||
|
||||
void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string message) {
|
||||
void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const std::string & message) {
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("error");
|
||||
@ -17,5 +18,21 @@ void JsonGenerator::signalError(rapidjson::Writer<rapidjson::StringBuffer> & jso
|
||||
jsonWriter.EndObject();
|
||||
}
|
||||
|
||||
void JsonGenerator::writeSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const SimpleSearchResult & result) {
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("id");
|
||||
jsonWriter.Int(result.getId());
|
||||
jsonWriter.String("matchedExampleStart");
|
||||
jsonWriter.Int(result.getMatchedExampleStart());
|
||||
jsonWriter.String("matchedExampleEnd");
|
||||
jsonWriter.Int(result.getMatchedExampleEnd());
|
||||
jsonWriter.String("sourceSegment");
|
||||
jsonWriter.String(result.getSourceSegment().c_str());
|
||||
jsonWriter.String("targetSegment");
|
||||
jsonWriter.String(result.getTargetSegment().c_str());
|
||||
jsonWriter.EndObject();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include "rapidjson/writer.h"
|
||||
|
||||
#include "simple_search_result.hpp"
|
||||
|
||||
class JsonGenerator {
|
||||
public:
|
||||
/*! Constructor.
|
||||
@ -14,7 +16,11 @@ public:
|
||||
*/
|
||||
virtual ~JsonGenerator();
|
||||
|
||||
static void signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string message);
|
||||
static void signalError(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const std::string & message);
|
||||
|
||||
static void writeSearchResult(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter,
|
||||
const SimpleSearchResult & result);
|
||||
|
||||
private:
|
||||
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <boost/foreach.hpp>
|
||||
#include <vector>
|
||||
|
||||
#include "json_generator.hpp"
|
||||
|
||||
SearcherController::SearcherController(boost::shared_ptr<Concordia> concordia)
|
||||
throw(ConcordiaException):
|
||||
_concordia(concordia) {
|
||||
@ -21,29 +23,32 @@ void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer>
|
||||
jsonWriter.String("results");
|
||||
jsonWriter.StartArray();
|
||||
BOOST_FOREACH(SimpleSearchResult & result, results) {
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("id");
|
||||
jsonWriter.Int(result.getId());
|
||||
jsonWriter.String("matchedFragmentStart");
|
||||
jsonWriter.Int(result.getMatchedFragmentStart());
|
||||
jsonWriter.String("matchedFragmentEnd");
|
||||
jsonWriter.Int(result.getMatchedFragmentEnd());
|
||||
jsonWriter.String("sourceSegment");
|
||||
jsonWriter.String(result.getSourceSegment().c_str());
|
||||
jsonWriter.String("targetSegment");
|
||||
jsonWriter.String(result.getTargetSegment().c_str());
|
||||
jsonWriter.EndObject();
|
||||
JsonGenerator::writeSearchResult(jsonWriter, result);
|
||||
}
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
}
|
||||
|
||||
void SearcherController::concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
|
||||
|
||||
CompleteConcordiaSearchResult result = _unitDAO.getConcordiaResult(_concordia->concordiaSearch(pattern));
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("error");
|
||||
jsonWriter.String("data");
|
||||
jsonWriter.String("concordia searching not yet implemented");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.String("result");
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("bestOverlayScore");
|
||||
jsonWriter.Double(result.getBestOverlayScore());
|
||||
jsonWriter.String("bestOverlay");
|
||||
jsonWriter.StartArray();
|
||||
BOOST_FOREACH(SimpleSearchResult & simpleResult, result.getBestOverlay()) {
|
||||
JsonGenerator::writeSearchResult(jsonWriter, simpleResult);
|
||||
}
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
|
||||
|
||||
jsonWriter.EndObject();
|
||||
}
|
||||
|
||||
|
@ -2,13 +2,17 @@
|
||||
|
||||
SimpleSearchResult::SimpleSearchResult(
|
||||
const int id,
|
||||
const int matchedFragmentStart,
|
||||
const int matchedFragmentEnd,
|
||||
const int matchedPatternStart,
|
||||
const int matchedPatternEnd,
|
||||
const int matchedExampleStart,
|
||||
const int matchedExampleEnd,
|
||||
const std::string & sourceSegment,
|
||||
const std::string & targetSegment):
|
||||
_id(id),
|
||||
_matchedFragmentStart(matchedFragmentStart),
|
||||
_matchedFragmentEnd(matchedFragmentEnd),
|
||||
_matchedPatternStart(matchedPatternStart),
|
||||
_matchedPatternEnd(matchedPatternEnd),
|
||||
_matchedExampleStart(matchedExampleStart),
|
||||
_matchedExampleEnd(matchedExampleEnd),
|
||||
_sourceSegment(sourceSegment),
|
||||
_targetSegment(targetSegment) {
|
||||
}
|
||||
|
@ -8,8 +8,10 @@ public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
SimpleSearchResult(const int id,
|
||||
const int matchedFragmentStart,
|
||||
const int matchedFragmentEnd,
|
||||
const int matchedPatternStart,
|
||||
const int matchedPatternEnd,
|
||||
const int matchedExampleStart,
|
||||
const int matchedExampleEnd,
|
||||
const std::string & sourceSegment,
|
||||
const std::string & targetSegment
|
||||
);
|
||||
@ -17,32 +19,44 @@ public:
|
||||
*/
|
||||
virtual ~SimpleSearchResult();
|
||||
|
||||
const int getId() {
|
||||
int getId() const {
|
||||
return _id;
|
||||
}
|
||||
|
||||
const int getMatchedFragmentStart() {
|
||||
return _matchedFragmentStart;
|
||||
int getMatchedPatternStart() const {
|
||||
return _matchedPatternStart;
|
||||
}
|
||||
|
||||
const int getMatchedFragmentEnd() {
|
||||
return _matchedFragmentEnd;
|
||||
int getMatchedPatternEnd() const {
|
||||
return _matchedPatternEnd;
|
||||
}
|
||||
|
||||
const std::string & getSourceSegment() {
|
||||
int getMatchedExampleStart() const {
|
||||
return _matchedExampleStart;
|
||||
}
|
||||
|
||||
int getMatchedExampleEnd() const {
|
||||
return _matchedExampleEnd;
|
||||
}
|
||||
|
||||
const std::string & getSourceSegment() const {
|
||||
return _sourceSegment;
|
||||
}
|
||||
|
||||
const std::string & getTargetSegment() {
|
||||
const std::string & getTargetSegment() const {
|
||||
return _targetSegment;
|
||||
}
|
||||
|
||||
private:
|
||||
int _id;
|
||||
|
||||
int _matchedFragmentStart;
|
||||
int _matchedPatternStart;
|
||||
|
||||
int _matchedFragmentEnd;
|
||||
int _matchedPatternEnd;
|
||||
|
||||
int _matchedExampleStart;
|
||||
|
||||
int _matchedExampleEnd;
|
||||
|
||||
std::string _sourceSegment;
|
||||
|
||||
|
@ -45,31 +45,57 @@ std::vector<SUFFIX_MARKER_TYPE> UnitDAO::addSentences(
|
||||
}
|
||||
|
||||
|
||||
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults) {
|
||||
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(const std::vector<MatchedPatternFragment> & fragments) {
|
||||
std::vector<SimpleSearchResult> results;
|
||||
TokenizedSentence ts("");
|
||||
_getResultsFromFragments(results, fragments, ts);
|
||||
return results;
|
||||
}
|
||||
|
||||
CompleteConcordiaSearchResult UnitDAO::getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult) {
|
||||
CompleteConcordiaSearchResult result(rawConcordiaResult->getBestOverlayScore());
|
||||
_getResultsFromFragments(result.getBestOverlay(),
|
||||
rawConcordiaResult->getBestOverlay(),
|
||||
rawConcordiaResult->getTokenizedPattern());
|
||||
return result;
|
||||
}
|
||||
|
||||
void UnitDAO::_getResultsFromFragments(
|
||||
std::vector<SimpleSearchResult> & results,
|
||||
const std::vector<MatchedPatternFragment> & fragments,
|
||||
const TokenizedSentence & tokenizedPattern) {
|
||||
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
|
||||
BOOST_FOREACH(const MatchedPatternFragment & fragment, concordiaResults) {
|
||||
BOOST_FOREACH(const MatchedPatternFragment & fragment, fragments) {
|
||||
int matchedPatternStart = 0;
|
||||
int matchedPatternEnd = 0;
|
||||
if (tokenizedPattern.getTokens().size() > 0) {
|
||||
// if it is concordia searching
|
||||
matchedPatternStart = tokenizedPattern.getTokens().at(fragment.getStart()).getStart();
|
||||
matchedPatternEnd = tokenizedPattern.getTokens().at(fragment.getStart()+fragment.getMatchedLength() - 1).getEnd();
|
||||
}
|
||||
|
||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new IntParam(2*fragment.getExampleOffset()+1));
|
||||
params.push_back(new IntParam(2*(fragment.getExampleOffset()+fragment.getMatchedLength())));
|
||||
params.push_back(new IntParam(fragment.getExampleId()));
|
||||
PGresult * result = connection.execute(query, params);
|
||||
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0),
|
||||
connection.getIntValue(result,0,3),
|
||||
connection.getIntValue(result,0,4),
|
||||
connection.getStringValue(result,0,1),
|
||||
connection.getStringValue(result,0,2)));
|
||||
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0), // example id
|
||||
matchedPatternStart,
|
||||
matchedPatternEnd,
|
||||
connection.getIntValue(result,0,3), // matched example start
|
||||
connection.getIntValue(result,0,4), // matched example end
|
||||
connection.getStringValue(result,0,1), // source segment
|
||||
connection.getStringValue(result,0,2))); // target segment
|
||||
connection.clearResult(result);
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
}
|
||||
connection.endTransaction();
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
|
@ -8,9 +8,11 @@
|
||||
#include <concordia/tokenized_sentence.hpp>
|
||||
#include <concordia/substring_occurence.hpp>
|
||||
#include <concordia/matched_pattern_fragment.hpp>
|
||||
#include <concordia/concordia_search_result.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
#include "simple_search_result.hpp"
|
||||
#include "complete_concordia_search_result.hpp"
|
||||
#include "db_connection.hpp"
|
||||
|
||||
class UnitDAO {
|
||||
@ -32,9 +34,15 @@ public:
|
||||
const std::vector<std::string> & targetSentences,
|
||||
const std::vector<int> & tmIds);
|
||||
|
||||
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & concordiaResults);
|
||||
std::vector<SimpleSearchResult> getSearchResults(const std::vector<MatchedPatternFragment> & fragments);
|
||||
|
||||
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
|
||||
|
||||
private:
|
||||
void _getResultsFromFragments(std::vector<SimpleSearchResult> & results,
|
||||
const std::vector<MatchedPatternFragment> & fragments,
|
||||
const TokenizedSentence & tokenizedPattern);
|
||||
|
||||
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
|
||||
|
||||
int _addSingleSentence(
|
||||
|
25
tests/concordiaSearch.py
Executable file
25
tests/concordiaSearch.py
Executable file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import unittest
|
||||
import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
|
||||
data = {
|
||||
'operation': 'concordiaSearch',
|
||||
'pattern':sys.argv[1]
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
req = urllib2.Request('http://localhost')
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = json.loads(urllib2.urlopen(req, json.dumps(data)).read())
|
||||
end = time.time()
|
||||
|
||||
print "Execution time: %.4f seconds." % (end-start)
|
||||
print "Result: "
|
||||
print response
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user