full search - work in progress
This commit is contained in:
parent
7622369f5c
commit
fb5e7bcc8a
1
cat/versions_enabled/stocznia_plen.cfg
Symbolic link
1
cat/versions_enabled/stocznia_plen.cfg
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../versions_available/stocznia_plen.cfg
|
@ -50,7 +50,7 @@ void SearcherController::fullSearch(rapidjson::Writer<rapidjson::StringBuffer> &
|
|||||||
if (it != _concordiasMap->end()) {
|
if (it != _concordiasMap->end()) {
|
||||||
TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false);
|
TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false);
|
||||||
pattern = _lemmatizerFacade->lemmatizeIfNeeded(tokenizedPattern.getTokenizedSentence(), tmId);
|
pattern = _lemmatizerFacade->lemmatizeIfNeeded(tokenizedPattern.getTokenizedSentence(), tmId);
|
||||||
FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true));
|
FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true), tokenizedPattern.getTokens().size());
|
||||||
jsonWriter.StartObject();
|
jsonWriter.StartObject();
|
||||||
jsonWriter.String("status");
|
jsonWriter.String("status");
|
||||||
jsonWriter.String("success");
|
jsonWriter.String("success");
|
||||||
|
@ -72,7 +72,7 @@ SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment &
|
|||||||
return _getResultFromFragment(fragment, ts, true);
|
return _getResultFromFragment(fragment, ts, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
FullSearchResult UnitDAO::getFullSearchResult(const OccurencesList & occurencesList) {
|
FullSearchResult UnitDAO::getFullSearchResult(const OccurencesList & occurencesList, const int patternLength) {
|
||||||
return FullSearchResult(5);
|
return FullSearchResult(5);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,66 +117,11 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
|
|||||||
|
|
||||||
SimpleSearchResult ssResult(matchedPatternStart, matchedPatternEnd);
|
SimpleSearchResult ssResult(matchedPatternStart, matchedPatternEnd);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (getOccurences) {
|
if (getOccurences) {
|
||||||
BOOST_FOREACH(SubstringOccurence sOccurence, fragment.getOccurences()) {
|
BOOST_FOREACH(SubstringOccurence sOccurence, fragment.getOccurences()) {
|
||||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
ssResult.addOccurence(_getExampleOccurence(connection, sOccurence, fragment.getMatchedLength()));
|
||||||
std::vector<QueryParam*> params;
|
|
||||||
params.push_back(new IntParam(2*sOccurence.getOffset()+1));
|
|
||||||
params.push_back(new IntParam(2*(sOccurence.getOffset()+fragment.getMatchedLength())));
|
|
||||||
params.push_back(new IntParam(sOccurence.getId()));
|
|
||||||
PGresult * result = connection.execute(query, params);
|
|
||||||
Logger::log("got examples");
|
|
||||||
ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id
|
|
||||||
connection.getIntValue(result,0,3), // matched example start
|
|
||||||
connection.getIntValue(result,0,4), // matched example end
|
|
||||||
connection.getStringValue(result,0,1), // source segment
|
|
||||||
connection.getStringValue(result,0,2)); // target segment
|
|
||||||
connection.clearResult(result);
|
|
||||||
BOOST_FOREACH (QueryParam * param, params) {
|
|
||||||
delete param;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now add all target fragments matched with this fragment
|
|
||||||
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
|
|
||||||
std::vector<QueryParam*> targetParams;
|
|
||||||
targetParams.push_back(new IntParam(sOccurence.getId()));
|
|
||||||
targetParams.push_back(new IntParam(sOccurence.getOffset()));
|
|
||||||
targetParams.push_back(new IntParam(sOccurence.getOffset() + fragment.getMatchedLength() - 1));
|
|
||||||
PGresult * targetResult = connection.execute(targetQuery, targetParams);
|
|
||||||
Logger::log("got target fragments");
|
|
||||||
|
|
||||||
int prevPos = -2;
|
|
||||||
int currStart = -1;
|
|
||||||
int currEnd = -1;
|
|
||||||
|
|
||||||
for (int i=0;i<connection.getRowCount(targetResult);i++) {
|
|
||||||
int targetPos = connection.getIntValue(targetResult, i, 0);
|
|
||||||
int targetStart = connection.getIntValue(targetResult, i, 1);
|
|
||||||
int targetEnd = connection.getIntValue(targetResult, i, 2);
|
|
||||||
|
|
||||||
if (prevPos < targetPos - 1) { // beginning of detached fragment
|
|
||||||
// check if there is a fragment to end
|
|
||||||
if (currStart >= 0) {
|
|
||||||
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
|
||||||
}
|
|
||||||
currStart = targetStart;
|
|
||||||
}
|
|
||||||
|
|
||||||
currEnd = targetEnd;
|
|
||||||
prevPos = targetPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if there are remaining fragments
|
|
||||||
if (currStart >= 0) {
|
|
||||||
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
|
||||||
}
|
|
||||||
|
|
||||||
connection.clearResult(targetResult);
|
|
||||||
BOOST_FOREACH (QueryParam * param, targetParams) {
|
|
||||||
delete param;
|
|
||||||
}
|
|
||||||
|
|
||||||
ssResult.addOccurence(occurence);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -185,6 +130,66 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
|
|||||||
return ssResult;
|
return ssResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength) {
|
||||||
|
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
||||||
|
std::vector<QueryParam*> params;
|
||||||
|
params.push_back(new IntParam(2*sOccurence.getOffset()+1));
|
||||||
|
params.push_back(new IntParam(2*(sOccurence.getOffset()+matchedLength)));
|
||||||
|
params.push_back(new IntParam(sOccurence.getId()));
|
||||||
|
PGresult * result = connection.execute(query, params);
|
||||||
|
ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id
|
||||||
|
connection.getIntValue(result,0,3), // matched example start
|
||||||
|
connection.getIntValue(result,0,4), // matched example end
|
||||||
|
connection.getStringValue(result,0,1), // source segment
|
||||||
|
connection.getStringValue(result,0,2)); // target segment
|
||||||
|
connection.clearResult(result);
|
||||||
|
BOOST_FOREACH (QueryParam * param, params) {
|
||||||
|
delete param;
|
||||||
|
}
|
||||||
|
|
||||||
|
// now add all target fragments matched with this fragment
|
||||||
|
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
|
||||||
|
std::vector<QueryParam*> targetParams;
|
||||||
|
targetParams.push_back(new IntParam(sOccurence.getId()));
|
||||||
|
targetParams.push_back(new IntParam(sOccurence.getOffset()));
|
||||||
|
targetParams.push_back(new IntParam(sOccurence.getOffset() + matchedLength - 1));
|
||||||
|
PGresult * targetResult = connection.execute(targetQuery, targetParams);
|
||||||
|
|
||||||
|
int prevPos = -2;
|
||||||
|
int currStart = -1;
|
||||||
|
int currEnd = -1;
|
||||||
|
|
||||||
|
for (int i=0;i<connection.getRowCount(targetResult);i++) {
|
||||||
|
|
||||||
|
int targetPos = connection.getIntValue(targetResult, i, 0);
|
||||||
|
int targetStart = connection.getIntValue(targetResult, i, 1);
|
||||||
|
int targetEnd = connection.getIntValue(targetResult, i, 2);
|
||||||
|
|
||||||
|
if (prevPos < targetPos - 1) { // beginning of detached fragment
|
||||||
|
// check if there is a fragment to end
|
||||||
|
if (currStart >= 0) {
|
||||||
|
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
||||||
|
}
|
||||||
|
currStart = targetStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
currEnd = targetEnd;
|
||||||
|
prevPos = targetPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if there are remaining fragments
|
||||||
|
if (currStart >= 0) {
|
||||||
|
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
||||||
|
}
|
||||||
|
|
||||||
|
connection.clearResult(targetResult);
|
||||||
|
BOOST_FOREACH (QueryParam * param, targetParams) {
|
||||||
|
delete param;
|
||||||
|
}
|
||||||
|
|
||||||
|
return occurence;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<int> UnitDAO::_getTokenPositions(const TokenizedSentence & ts) {
|
std::vector<int> UnitDAO::_getTokenPositions(const TokenizedSentence & ts) {
|
||||||
std::vector<int> result;
|
std::vector<int> result;
|
||||||
|
@ -45,7 +45,7 @@ public:
|
|||||||
|
|
||||||
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
|
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
|
||||||
|
|
||||||
FullSearchResult getFullSearchResult(const OccurencesList & occurencesList);
|
FullSearchResult getFullSearchResult(const OccurencesList & occurencesList, const int patternLength);
|
||||||
|
|
||||||
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
|
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
|
||||||
|
|
||||||
@ -63,6 +63,8 @@ private:
|
|||||||
|
|
||||||
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
|
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
|
||||||
|
|
||||||
|
ExampleOccurence _getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength);
|
||||||
|
|
||||||
int _addSingleSentence(
|
int _addSingleSentence(
|
||||||
DBconnection & connection,
|
DBconnection & connection,
|
||||||
const TokenizedSentence & sourceSentence,
|
const TokenizedSentence & sourceSentence,
|
||||||
|
31
tests/fullSearch.py
Executable file
31
tests/fullSearch.py
Executable file
@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import json
|
||||||
|
import urllib2
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import host
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'operation': 'fullSearch',
|
||||||
|
'pattern':sys.argv[1],
|
||||||
|
'tmId':int(sys.argv[2]),
|
||||||
|
'limit':int(sys.argv[3]),
|
||||||
|
'offset':int(sys.argv[4])
|
||||||
|
}
|
||||||
|
|
||||||
|
address = 'http://'+host.concordia_host
|
||||||
|
if len(host.concordia_port) > 0:
|
||||||
|
address += ':'+host.concordia_port
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
req = urllib2.Request(address)
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
response = urllib2.urlopen(req, json.dumps(data)).read()
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
print "Execution time: %.4f seconds." % (end-start)
|
||||||
|
print "Result: "
|
||||||
|
print response
|
Loading…
Reference in New Issue
Block a user