full search - work in progress
This commit is contained in:
parent
7622369f5c
commit
fb5e7bcc8a
1
cat/versions_enabled/stocznia_plen.cfg
Symbolic link
1
cat/versions_enabled/stocznia_plen.cfg
Symbolic link
@ -0,0 +1 @@
|
||||
../versions_available/stocznia_plen.cfg
|
@ -50,7 +50,7 @@ void SearcherController::fullSearch(rapidjson::Writer<rapidjson::StringBuffer> &
|
||||
if (it != _concordiasMap->end()) {
|
||||
TokenizedSentence tokenizedPattern = it->second->tokenize(pattern, false, false);
|
||||
pattern = _lemmatizerFacade->lemmatizeIfNeeded(tokenizedPattern.getTokenizedSentence(), tmId);
|
||||
FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true));
|
||||
FullSearchResult result = _unitDAO.getFullSearchResult(it->second->fullSearch(pattern, limit, offset, true), tokenizedPattern.getTokens().size());
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
|
@ -72,7 +72,7 @@ SimpleSearchResult UnitDAO::getSimpleSearchResult(const MatchedPatternFragment &
|
||||
return _getResultFromFragment(fragment, ts, true);
|
||||
}
|
||||
|
||||
FullSearchResult UnitDAO::getFullSearchResult(const OccurencesList & occurencesList) {
|
||||
FullSearchResult UnitDAO::getFullSearchResult(const OccurencesList & occurencesList, const int patternLength) {
|
||||
return FullSearchResult(5);
|
||||
}
|
||||
|
||||
@ -117,66 +117,11 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
|
||||
|
||||
SimpleSearchResult ssResult(matchedPatternStart, matchedPatternEnd);
|
||||
|
||||
|
||||
|
||||
if (getOccurences) {
|
||||
BOOST_FOREACH(SubstringOccurence sOccurence, fragment.getOccurences()) {
|
||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new IntParam(2*sOccurence.getOffset()+1));
|
||||
params.push_back(new IntParam(2*(sOccurence.getOffset()+fragment.getMatchedLength())));
|
||||
params.push_back(new IntParam(sOccurence.getId()));
|
||||
PGresult * result = connection.execute(query, params);
|
||||
Logger::log("got examples");
|
||||
ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id
|
||||
connection.getIntValue(result,0,3), // matched example start
|
||||
connection.getIntValue(result,0,4), // matched example end
|
||||
connection.getStringValue(result,0,1), // source segment
|
||||
connection.getStringValue(result,0,2)); // target segment
|
||||
connection.clearResult(result);
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
// now add all target fragments matched with this fragment
|
||||
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
|
||||
std::vector<QueryParam*> targetParams;
|
||||
targetParams.push_back(new IntParam(sOccurence.getId()));
|
||||
targetParams.push_back(new IntParam(sOccurence.getOffset()));
|
||||
targetParams.push_back(new IntParam(sOccurence.getOffset() + fragment.getMatchedLength() - 1));
|
||||
PGresult * targetResult = connection.execute(targetQuery, targetParams);
|
||||
Logger::log("got target fragments");
|
||||
|
||||
int prevPos = -2;
|
||||
int currStart = -1;
|
||||
int currEnd = -1;
|
||||
|
||||
for (int i=0;i<connection.getRowCount(targetResult);i++) {
|
||||
int targetPos = connection.getIntValue(targetResult, i, 0);
|
||||
int targetStart = connection.getIntValue(targetResult, i, 1);
|
||||
int targetEnd = connection.getIntValue(targetResult, i, 2);
|
||||
|
||||
if (prevPos < targetPos - 1) { // beginning of detached fragment
|
||||
// check if there is a fragment to end
|
||||
if (currStart >= 0) {
|
||||
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
||||
}
|
||||
currStart = targetStart;
|
||||
}
|
||||
|
||||
currEnd = targetEnd;
|
||||
prevPos = targetPos;
|
||||
}
|
||||
|
||||
// check if there are remaining fragments
|
||||
if (currStart >= 0) {
|
||||
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
||||
}
|
||||
|
||||
connection.clearResult(targetResult);
|
||||
BOOST_FOREACH (QueryParam * param, targetParams) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
ssResult.addOccurence(occurence);
|
||||
ssResult.addOccurence(_getExampleOccurence(connection, sOccurence, fragment.getMatchedLength()));
|
||||
|
||||
}
|
||||
}
|
||||
@ -185,6 +130,66 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
|
||||
return ssResult;
|
||||
}
|
||||
|
||||
ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength) {
|
||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new IntParam(2*sOccurence.getOffset()+1));
|
||||
params.push_back(new IntParam(2*(sOccurence.getOffset()+matchedLength)));
|
||||
params.push_back(new IntParam(sOccurence.getId()));
|
||||
PGresult * result = connection.execute(query, params);
|
||||
ExampleOccurence occurence(connection.getIntValue(result,0,0), // example id
|
||||
connection.getIntValue(result,0,3), // matched example start
|
||||
connection.getIntValue(result,0,4), // matched example end
|
||||
connection.getStringValue(result,0,1), // source segment
|
||||
connection.getStringValue(result,0,2)); // target segment
|
||||
connection.clearResult(result);
|
||||
BOOST_FOREACH (QueryParam * param, params) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
// now add all target fragments matched with this fragment
|
||||
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
|
||||
std::vector<QueryParam*> targetParams;
|
||||
targetParams.push_back(new IntParam(sOccurence.getId()));
|
||||
targetParams.push_back(new IntParam(sOccurence.getOffset()));
|
||||
targetParams.push_back(new IntParam(sOccurence.getOffset() + matchedLength - 1));
|
||||
PGresult * targetResult = connection.execute(targetQuery, targetParams);
|
||||
|
||||
int prevPos = -2;
|
||||
int currStart = -1;
|
||||
int currEnd = -1;
|
||||
|
||||
for (int i=0;i<connection.getRowCount(targetResult);i++) {
|
||||
|
||||
int targetPos = connection.getIntValue(targetResult, i, 0);
|
||||
int targetStart = connection.getIntValue(targetResult, i, 1);
|
||||
int targetEnd = connection.getIntValue(targetResult, i, 2);
|
||||
|
||||
if (prevPos < targetPos - 1) { // beginning of detached fragment
|
||||
// check if there is a fragment to end
|
||||
if (currStart >= 0) {
|
||||
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
||||
}
|
||||
currStart = targetStart;
|
||||
}
|
||||
|
||||
currEnd = targetEnd;
|
||||
prevPos = targetPos;
|
||||
}
|
||||
|
||||
// check if there are remaining fragments
|
||||
if (currStart >= 0) {
|
||||
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
||||
}
|
||||
|
||||
connection.clearResult(targetResult);
|
||||
BOOST_FOREACH (QueryParam * param, targetParams) {
|
||||
delete param;
|
||||
}
|
||||
|
||||
return occurence;
|
||||
}
|
||||
|
||||
|
||||
std::vector<int> UnitDAO::_getTokenPositions(const TokenizedSentence & ts) {
|
||||
std::vector<int> result;
|
||||
|
@ -45,7 +45,7 @@ public:
|
||||
|
||||
SimpleSearchResult getSimpleSearchResult(const MatchedPatternFragment & fragment);
|
||||
|
||||
FullSearchResult getFullSearchResult(const OccurencesList & occurencesList);
|
||||
FullSearchResult getFullSearchResult(const OccurencesList & occurencesList, const int patternLength);
|
||||
|
||||
CompleteConcordiaSearchResult getConcordiaResult(boost::shared_ptr<ConcordiaSearchResult> rawConcordiaResult);
|
||||
|
||||
@ -63,6 +63,8 @@ private:
|
||||
|
||||
std::vector<int> _getTokenPositions(const TokenizedSentence & ts);
|
||||
|
||||
ExampleOccurence _getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength);
|
||||
|
||||
int _addSingleSentence(
|
||||
DBconnection & connection,
|
||||
const TokenizedSentence & sourceSentence,
|
||||
|
31
tests/fullSearch.py
Executable file
31
tests/fullSearch.py
Executable file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import unittest
|
||||
import json
|
||||
import urllib2
|
||||
import sys
|
||||
import time
|
||||
import host
|
||||
|
||||
data = {
|
||||
'operation': 'fullSearch',
|
||||
'pattern':sys.argv[1],
|
||||
'tmId':int(sys.argv[2]),
|
||||
'limit':int(sys.argv[3]),
|
||||
'offset':int(sys.argv[4])
|
||||
}
|
||||
|
||||
address = 'http://'+host.concordia_host
|
||||
if len(host.concordia_port) > 0:
|
||||
address += ':'+host.concordia_port
|
||||
|
||||
start = time.time()
|
||||
req = urllib2.Request(address)
|
||||
req.add_header('Content-Type', 'application/json')
|
||||
response = urllib2.urlopen(req, json.dumps(data)).read()
|
||||
end = time.time()
|
||||
|
||||
print "Execution time: %.4f seconds." % (end-start)
|
||||
print "Result: "
|
||||
print response
|
Loading…
Reference in New Issue
Block a user