full search - work in progress

This commit is contained in:
Rafał Jaworski 2019-01-10 16:04:15 +01:00
parent 7622369f5c
commit fb5e7bcc8a
5 changed files with 100 additions and 61 deletions

View File

@ -0,0 +1 @@
../versions_available/stocznia_plen.cfg

View File

@ -50,7 +50,7 @@ void SearcherController::fullSearch(rapidjson::Writer<rapidjson::StringBuffer> &
if (it != _concordiasMap->end()) { if (it != _concordiasMap->end()) {
TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false); TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false);
pattern = _lemmatizerFacade->lemmatizeIfNeeded(tokenizedPattern.getTokenizedSentence(), tmId); pattern = _lemmatizerFacade->lemmatizeIfNeeded(tokenizedPattern.getTokenizedSentence(), tmId);
FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true)); FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true), tokenizedPattern.getTokens().size());
jsonWriter.StartObject(); jsonWriter.StartObject();
jsonWriter.String("status"); jsonWriter.String("status");
jsonWriter.String("success"); jsonWriter.String("success");

View File

@ -72,7 +72,7 @@ SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment &
return _getResultFromFragment(fragment, ts, true); return _getResultFromFragment(fragment, ts, true);
} }
FullSearchResult UnitDAO::getFullSearchResult(const OccurencesList & occurencesList) { FullSearchResult UnitDAO::getFullSearchResult(const OccurencesList & occurencesList, const int patternLength) {
return FullSearchResult(5); return FullSearchResult(5);
} }
@ -117,15 +117,26 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
SimpleSearchResult ssResult(matchedPatternStart, matchedPatternEnd); SimpleSearchResult ssResult(matchedPatternStart, matchedPatternEnd);
if (getOccurences) { if (getOccurences) {
BOOST_FOREACH(SubstringOccurence sOccurence, fragment.getOccurences()) { BOOST_FOREACH(SubstringOccurence sOccurence, fragment.getOccurences()) {
ssResult.addOccurence(_getExampleOccurence(connection, sOccurence, fragment.getMatchedLength()));
}
}
connection.endTransaction();
return ssResult;
}
ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength) {
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;"; std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
std::vector<QueryParam*> params; std::vector<QueryParam*> params;
params.push_back(new IntParam(2*sOccurence.getOffset()+1)); params.push_back(new IntParam(2*sOccurence.getOffset()+1));
params.push_back(new IntParam(2*(sOccurence.getOffset()+fragment.getMatchedLength()))); params.push_back(new IntParam(2*(sOccurence.getOffset()+matchedLength)));
params.push_back(new IntParam(sOccurence.getId())); params.push_back(new IntParam(sOccurence.getId()));
PGresult * result = connection.execute(query, params); PGresult * result = connection.execute(query, params);
Logger::log("got examples");
ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id
connection.getIntValue(result,0,3), // matched example start connection.getIntValue(result,0,3), // matched example start
connection.getIntValue(result,0,4), // matched example end connection.getIntValue(result,0,4), // matched example end
@ -141,15 +152,15 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
std::vector<QueryParam*> targetParams; std::vector<QueryParam*> targetParams;
targetParams.push_back(new IntParam(sOccurence.getId())); targetParams.push_back(new IntParam(sOccurence.getId()));
targetParams.push_back(new IntParam(sOccurence.getOffset())); targetParams.push_back(new IntParam(sOccurence.getOffset()));
targetParams.push_back(new IntParam(sOccurence.getOffset() + fragment.getMatchedLength() - 1)); targetParams.push_back(new IntParam(sOccurence.getOffset() + matchedLength - 1));
PGresult * targetResult = connection.execute(targetQuery, targetParams); PGresult * targetResult = connection.execute(targetQuery, targetParams);
Logger::log("got target fragments");
int prevPos = -2; int prevPos = -2;
int currStart = -1; int currStart = -1;
int currEnd = -1; int currEnd = -1;
for (int i=0;i<connection.getRowCount(targetResult);i++) { for (int i=0;i<connection.getRowCount(targetResult);i++) {
int targetPos = connection.getIntValue(targetResult, i, 0); int targetPos = connection.getIntValue(targetResult, i, 0);
int targetStart = connection.getIntValue(targetResult, i, 1); int targetStart = connection.getIntValue(targetResult, i, 1);
int targetEnd = connection.getIntValue(targetResult, i, 2); int targetEnd = connection.getIntValue(targetResult, i, 2);
@ -176,13 +187,7 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
delete param; delete param;
} }
ssResult.addOccurence(occurence); return occurence;
}
}
connection.endTransaction();
return ssResult;
} }

View File

@ -45,7 +45,7 @@ public:
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment); SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
FullSearchResult getFullSearchResult(const OccurencesList & occurencesList); FullSearchResult getFullSearchResult(const OccurencesList & occurencesList, const int patternLength);
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult); CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
@ -63,6 +63,8 @@ private:
std::vector<int> _getTokenPositions(const TokenizedSentence & ts); std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
ExampleOccurence _getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength);
int _addSingleSentence( int _addSingleSentence(
DBconnection & connection, DBconnection & connection,
const TokenizedSentence & sourceSentence, const TokenizedSentence & sourceSentence,

31
tests/fullSearch.py Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import unittest
import json
import urllib2
import sys
import time
import host
data = {
'operation': 'fullSearch',
'pattern':sys.argv[1],
'tmId':int(sys.argv[2]),
'limit':int(sys.argv[3]),
'offset':int(sys.argv[4])
}
address = 'http://'+host.concordia_host
if len(host.concordia_port) > 0:
address += ':'+host.concordia_port
start = time.time()
req = urllib2.Request(address)
req.add_header('Content-Type', 'application/json')
response = urllib2.urlopen(req, json.dumps(data)).read()
end = time.time()
print "Execution time: %.4f seconds." % (end-start)
print "Result: "
print response