optimized db querying
This commit is contained in:
parent
329421b0c1
commit
e352e0a8b2
@ -3,6 +3,7 @@
|
|||||||
<script src="../js/jquery-1.11.3.min.js"></script>
|
<script src="../js/jquery-1.11.3.min.js"></script>
|
||||||
<script src="../js/cat.js"></script>
|
<script src="../js/cat.js"></script>
|
||||||
<link rel="stylesheet" href="../css/concordia_cat.css" />
|
<link rel="stylesheet" href="../css/concordia_cat.css" />
|
||||||
|
<title>Concordia</title>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
44
concordia-server/int_2d_array_param.cpp
Normal file
44
concordia-server/int_2d_array_param.cpp
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#include "int_2d_array_param.hpp"
|
||||||
|
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
Int2DArrayParam::Int2DArrayParam(std::vector<std::vector<int> > array) {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "[";
|
||||||
|
int i = 0;
|
||||||
|
BOOST_FOREACH(std::vector<int> & intArray, array) {
|
||||||
|
ss << "[";
|
||||||
|
int j = 0;
|
||||||
|
BOOST_FOREACH(int & number, intArray) {
|
||||||
|
ss << number;
|
||||||
|
if (j < intArray.size() - 1) {
|
||||||
|
ss << ",";
|
||||||
|
}
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
ss << "]";
|
||||||
|
|
||||||
|
if (i < array.size() -1) {
|
||||||
|
ss << ";";
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
ss << "]";
|
||||||
|
_arrayString = ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
Int2DArrayParam::~Int2DArrayParam() {
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * Int2DArrayParam::getValue() {
|
||||||
|
return _arrayString.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
const int Int2DArrayParam::getLength() {
|
||||||
|
return _arrayString.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
const int Int2DArrayParam::isBinary() {
|
||||||
|
return 0;
|
||||||
|
}
|
27
concordia-server/int_2d_array_param.hpp
Normal file
27
concordia-server/int_2d_array_param.hpp
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#ifndef INT_2D_ARRAY_PARAM_HDR
|
||||||
|
#define INT_2D_ARRAY_PARAM_HDR
|
||||||
|
|
||||||
|
#include "query_param.hpp"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
class Int2DArrayParam : public QueryParam {
|
||||||
|
public:
|
||||||
|
/*! Constructor.
|
||||||
|
*/
|
||||||
|
Int2DArrayParam(std::vector<std::vector<int> > array);
|
||||||
|
/*! Destructor.
|
||||||
|
*/
|
||||||
|
virtual ~Int2DArrayParam();
|
||||||
|
|
||||||
|
const char * getValue();
|
||||||
|
|
||||||
|
const int getLength();
|
||||||
|
|
||||||
|
const int isBinary();
|
||||||
|
private:
|
||||||
|
std::string _arrayString;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -2,11 +2,15 @@
|
|||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
#include "query_param.hpp"
|
#include "query_param.hpp"
|
||||||
#include "string_param.hpp"
|
#include "string_param.hpp"
|
||||||
#include "int_param.hpp"
|
#include "int_param.hpp"
|
||||||
#include "int_array_param.hpp"
|
#include "int_array_param.hpp"
|
||||||
|
#include "int_2d_array_param.hpp"
|
||||||
#include "logger.hpp"
|
#include "logger.hpp"
|
||||||
#include "example_occurence.hpp"
|
#include "example_occurence.hpp"
|
||||||
|
|
||||||
@ -14,6 +18,8 @@
|
|||||||
#include <boost/foreach.hpp>
|
#include <boost/foreach.hpp>
|
||||||
#include <concordia/token_annotation.hpp>
|
#include <concordia/token_annotation.hpp>
|
||||||
|
|
||||||
|
#include <boost/algorithm/string.hpp>
|
||||||
|
|
||||||
UnitDAO::UnitDAO() {
|
UnitDAO::UnitDAO() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,7 +146,7 @@ SimpleSearchResult UnitDAO::_getResultFromFragment(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength) {
|
ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const SubstringOccurence sOccurence, const int matchedLength) {
|
||||||
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer] FROM unit WHERE id = $3::integer;";
|
std::string query = "SELECT id, source_segment, target_segment, source_tokens[$1::integer], source_tokens[$2::integer], target_tokens, alignments FROM unit WHERE id = $3::integer;";
|
||||||
std::vector<QueryParam*> params;
|
std::vector<QueryParam*> params;
|
||||||
params.push_back(new IntParam(2*sOccurence.getOffset()+1));
|
params.push_back(new IntParam(2*sOccurence.getOffset()+1));
|
||||||
params.push_back(new IntParam(2*(sOccurence.getOffset()+matchedLength)));
|
params.push_back(new IntParam(2*(sOccurence.getOffset()+matchedLength)));
|
||||||
@ -151,28 +157,34 @@ ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const
|
|||||||
connection.getIntValue(result,0,4), // matched example end
|
connection.getIntValue(result,0,4), // matched example end
|
||||||
connection.getStringValue(result,0,1), // source segment
|
connection.getStringValue(result,0,1), // source segment
|
||||||
connection.getStringValue(result,0,2)); // target segment
|
connection.getStringValue(result,0,2)); // target segment
|
||||||
|
std::string targetTokensRaw = connection.getStringValue(result,0,5);
|
||||||
|
std::string alignmentsRaw = connection.getStringValue(result,0,6);
|
||||||
|
|
||||||
connection.clearResult(result);
|
connection.clearResult(result);
|
||||||
BOOST_FOREACH (QueryParam * param, params) {
|
BOOST_FOREACH (QueryParam * param, params) {
|
||||||
delete param;
|
delete param;
|
||||||
}
|
}
|
||||||
|
|
||||||
// now add all target fragments matched with this fragment
|
std::vector<int> targetTokens = _getArray(targetTokensRaw);
|
||||||
std::string targetQuery = "SELECT target_token_pos, target_tokens[2*target_token_pos+1], target_tokens[2*target_token_pos+2] FROM unit INNER JOIN alignment ON alignment.unit_id = unit.id AND unit.id = $1::integer AND source_token_pos between $2::integer and $3::integer ORDER BY target_token_pos";
|
std::vector<std::vector<int> > alignments = _get2DArray(alignmentsRaw);
|
||||||
std::vector<QueryParam*> targetParams;
|
|
||||||
targetParams.push_back(new IntParam(sOccurence.getId()));
|
std::set<int> matchedTargetTokens;
|
||||||
targetParams.push_back(new IntParam(sOccurence.getOffset()));
|
for(int sourceTokenIndex = sOccurence.getOffset(); sourceTokenIndex < sOccurence.getOffset()+matchedLength; sourceTokenIndex++) {
|
||||||
targetParams.push_back(new IntParam(sOccurence.getOffset() + matchedLength - 1));
|
BOOST_FOREACH(int & targetTokenIndex, alignments.at(sourceTokenIndex)) {
|
||||||
PGresult * targetResult = connection.execute(targetQuery, targetParams);
|
matchedTargetTokens.insert(targetTokenIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int prevPos = -2;
|
int prevPos = -2;
|
||||||
int currStart = -1;
|
int currStart = -1;
|
||||||
int currEnd = -1;
|
int currEnd = -1;
|
||||||
|
|
||||||
for (int i=0;i<connection.getRowCount(targetResult);i++) {
|
std::set<int>::iterator iter;
|
||||||
|
for(iter=matchedTargetTokens.begin(); iter!=matchedTargetTokens.end();++iter) {
|
||||||
int targetPos = connection.getIntValue(targetResult, i, 0);
|
int targetPos = *iter;
|
||||||
int targetStart = connection.getIntValue(targetResult, i, 1);
|
int targetStart = targetTokens.at(2*targetPos);
|
||||||
int targetEnd = connection.getIntValue(targetResult, i, 2);
|
int targetEnd = targetTokens.at(2*targetPos+1);
|
||||||
|
|
||||||
if (prevPos < targetPos - 1) { // beginning of detached fragment
|
if (prevPos < targetPos - 1) { // beginning of detached fragment
|
||||||
// check if there is a fragment to end
|
// check if there is a fragment to end
|
||||||
@ -186,16 +198,12 @@ ExampleOccurence UnitDAO::_getExampleOccurence(DBconnection & connection, const
|
|||||||
prevPos = targetPos;
|
prevPos = targetPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// check if there are remaining fragments
|
// check if there are remaining fragments
|
||||||
if (currStart >= 0) {
|
if (currStart >= 0) {
|
||||||
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
occurence.addMatchedTargetFragment(std::pair<int,int>(currStart,currEnd));
|
||||||
}
|
}
|
||||||
|
|
||||||
connection.clearResult(targetResult);
|
|
||||||
BOOST_FOREACH (QueryParam * param, targetParams) {
|
|
||||||
delete param;
|
|
||||||
}
|
|
||||||
|
|
||||||
return occurence;
|
return occurence;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,13 +255,14 @@ int UnitDAO::_addAlignedUnit (
|
|||||||
throw ConcordiaException(ss.str());
|
throw ConcordiaException(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens, target_tokens) values($1::text,$2::text,$3::integer,$4,$5) RETURNING id";
|
std::string query = "INSERT INTO unit(source_segment, target_segment, tm_id, source_tokens, target_tokens, alignments) values($1::text,$2::text,$3::integer,$4,$5,$6) RETURNING id";
|
||||||
std::vector<QueryParam*> params;
|
std::vector<QueryParam*> params;
|
||||||
params.push_back(new StringParam(sourceSentence.getOriginalSentence()));
|
params.push_back(new StringParam(sourceSentence.getOriginalSentence()));
|
||||||
params.push_back(new StringParam(targetSentence.getOriginalSentence()));
|
params.push_back(new StringParam(targetSentence.getOriginalSentence()));
|
||||||
params.push_back(new IntParam(tmId));
|
params.push_back(new IntParam(tmId));
|
||||||
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
|
params.push_back(new IntArrayParam(_getTokenPositions(sourceSentence)));
|
||||||
params.push_back(new IntArrayParam(_getTokenPositions(targetSentence)));
|
params.push_back(new IntArrayParam(_getTokenPositions(targetSentence)));
|
||||||
|
params.push_back(new Int2DArrayParam(alignments));
|
||||||
|
|
||||||
PGresult * result = connection.execute(query, params);
|
PGresult * result = connection.execute(query, params);
|
||||||
int newId = connection.getIntValue(result, 0, 0);
|
int newId = connection.getIntValue(result, 0, 0);
|
||||||
@ -262,23 +271,31 @@ int UnitDAO::_addAlignedUnit (
|
|||||||
delete param;
|
delete param;
|
||||||
}
|
}
|
||||||
|
|
||||||
// add alignments
|
|
||||||
bool nonEmpty = false;
|
|
||||||
std::stringstream alignmentsQuery;
|
|
||||||
alignmentsQuery << "INSERT INTO alignment(unit_id, source_token_pos, target_token_pos) values ";
|
|
||||||
|
|
||||||
for(int i=0;i<alignments.size();i++) {
|
|
||||||
for (int j=0;j<alignments[i].size();j++) {
|
|
||||||
nonEmpty = true;
|
|
||||||
alignmentsQuery << "(" << newId << "," << i << "," << alignments[i][j] << "),";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (nonEmpty) {
|
|
||||||
query = alignmentsQuery.str();
|
|
||||||
query = query.substr(0, query.length()-1);
|
|
||||||
PGresult * result = connection.execute(query);
|
|
||||||
connection.clearResult(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
return newId;
|
return newId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<int> UnitDAO::_getArray(std::string arrayString) {
|
||||||
|
std::vector<int> result;
|
||||||
|
if (arrayString.length()>2) {
|
||||||
|
std::vector<std::string> numberStrings;
|
||||||
|
std::string strippedArrayString = arrayString.substr(1,arrayString.length()-2);
|
||||||
|
boost::split(numberStrings,strippedArrayString,boost::is_any_of(","));
|
||||||
|
BOOST_FOREACH (std::string & numberString, numberStrings) {
|
||||||
|
result.push_back(atoi(numberString.c_str()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > UnitDAO::_get2DArray(std::string arrayString) {
|
||||||
|
std::vector<std::vector<int> > result;
|
||||||
|
std::vector<std::string> arrayStrings;
|
||||||
|
std::string strippedArrayString = arrayString.substr(1,arrayString.length()-2);
|
||||||
|
boost::split(arrayStrings,strippedArrayString,boost::is_any_of(";"));
|
||||||
|
BOOST_FOREACH (std::string & arrayString, arrayStrings) {
|
||||||
|
result.push_back(_getArray(arrayString));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
@ -77,6 +77,10 @@ private:
|
|||||||
const TokenizedSentence & targetSentence,
|
const TokenizedSentence & targetSentence,
|
||||||
const std::vector<std::vector<int> > & alignments,
|
const std::vector<std::vector<int> > & alignments,
|
||||||
const int tmId) throw(ConcordiaException);
|
const int tmId) throw(ConcordiaException);
|
||||||
|
|
||||||
|
std::vector<int> _getArray(std::string arrayString);
|
||||||
|
|
||||||
|
std::vector<std::vector<int> > _get2DArray(std::string arrayString);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -36,17 +36,8 @@ CREATE TABLE unit (
|
|||||||
source_segment text,
|
source_segment text,
|
||||||
target_segment text,
|
target_segment text,
|
||||||
source_tokens integer[],
|
source_tokens integer[],
|
||||||
target_tokens integer[]
|
target_tokens integer[],
|
||||||
|
alignments text
|
||||||
);
|
);
|
||||||
|
|
||||||
DROP TABLE IF EXISTS alignment;
|
|
||||||
CREATE TABLE alignment (
|
|
||||||
id SERIAL PRIMARY KEY,
|
|
||||||
unit_id integer,
|
|
||||||
source_token_pos integer,
|
|
||||||
target_token_pos integer
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE INDEX ON alignment(unit_id, source_token_pos);
|
|
||||||
|
|
||||||
CREATE INDEX ON unit(tm_id);
|
CREATE INDEX ON unit(tm_id);
|
||||||
|
Loading…
Reference in New Issue
Block a user