working simple search
This commit is contained in:
parent
de5d1f4a63
commit
e8f1f21195
@ -124,4 +124,10 @@ int DBconnection::getIntValue(PGresult * result, int row, int col) {
|
|||||||
return strtol(valueStr, NULL, 10);
|
return strtol(valueStr, NULL, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string DBconnection::getStringValue(PGresult * result, int row, int col) {
|
||||||
|
char * valueStr = PQgetvalue(result,row,col);
|
||||||
|
return std::string(valueStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,6 +31,8 @@ public:
|
|||||||
|
|
||||||
int getIntValue(PGresult * result, int row, int col);
|
int getIntValue(PGresult * result, int row, int col);
|
||||||
|
|
||||||
|
std::string getStringValue(PGresult * result, int row, int col);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void close();
|
void close();
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "searcher_controller.hpp"
|
#include "searcher_controller.hpp"
|
||||||
|
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
SearcherController::SearcherController(boost::shared_ptr<Concordia> concordia)
|
SearcherController::SearcherController(boost::shared_ptr<Concordia> concordia)
|
||||||
@ -12,14 +13,25 @@ SearcherController::~SearcherController() {
|
|||||||
|
|
||||||
|
|
||||||
void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
|
void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
|
||||||
std::vector<SubstringOccurence> results = _concordia->simpleSearch(pattern);
|
std::vector<SimpleSearchResult> results = _unitDAO.getSearchResults(_concordia->simpleSearch(pattern));
|
||||||
|
|
||||||
jsonWriter.StartObject();
|
jsonWriter.StartObject();
|
||||||
jsonWriter.String("status");
|
jsonWriter.String("status");
|
||||||
jsonWriter.String("success");
|
jsonWriter.String("success");
|
||||||
jsonWriter.String("results");
|
jsonWriter.String("results");
|
||||||
jsonWriter.StartArray();
|
jsonWriter.StartArray();
|
||||||
|
BOOST_FOREACH(SimpleSearchResult & result, results) {
|
||||||
|
jsonWriter.StartObject();
|
||||||
|
jsonWriter.String("id");
|
||||||
|
jsonWriter.Int(result.getId());
|
||||||
|
jsonWriter.String("matchedFragment");
|
||||||
|
jsonWriter.String(result.getMatchedFragment().c_str());
|
||||||
|
jsonWriter.String("sourceSegment");
|
||||||
|
jsonWriter.String(result.getSourceSegment().c_str());
|
||||||
|
jsonWriter.String("targetSegment");
|
||||||
|
jsonWriter.String(result.getTargetSegment().c_str());
|
||||||
|
jsonWriter.EndObject();
|
||||||
|
}
|
||||||
jsonWriter.EndArray();
|
jsonWriter.EndArray();
|
||||||
jsonWriter.EndObject();
|
jsonWriter.EndObject();
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
#include <concordia/concordia.hpp>
|
#include <concordia/concordia.hpp>
|
||||||
#include <concordia/concordia_exception.hpp>
|
#include <concordia/concordia_exception.hpp>
|
||||||
|
|
||||||
|
#include "unit_dao.hpp"
|
||||||
|
#include "simple_search_result.hpp"
|
||||||
#include "rapidjson/writer.h"
|
#include "rapidjson/writer.h"
|
||||||
|
|
||||||
|
|
||||||
@ -24,8 +26,10 @@ public:
|
|||||||
void concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern);
|
void concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
boost::shared_ptr<Concordia> _concordia;
|
boost::shared_ptr<Concordia> _concordia;
|
||||||
|
|
||||||
|
UnitDAO _unitDAO;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,14 @@
|
|||||||
#include "simple_search_result.hpp"
|
#include "simple_search_result.hpp"
|
||||||
|
|
||||||
SimpleSearchResult::SimpleSearchResult() {
|
SimpleSearchResult::SimpleSearchResult(
|
||||||
|
const int id,
|
||||||
|
const std::string & matchedFragment,
|
||||||
|
const std::string & sourceSegment,
|
||||||
|
const std::string & targetSegment):
|
||||||
|
_id(id),
|
||||||
|
_matchedFragment(matchedFragment),
|
||||||
|
_sourceSegment(sourceSegment),
|
||||||
|
_targetSegment(targetSegment) {
|
||||||
}
|
}
|
||||||
|
|
||||||
SimpleSearchResult::~SimpleSearchResult() {
|
SimpleSearchResult::~SimpleSearchResult() {
|
||||||
|
@ -7,13 +7,33 @@ class SimpleSearchResult {
|
|||||||
public:
|
public:
|
||||||
/*! Constructor.
|
/*! Constructor.
|
||||||
*/
|
*/
|
||||||
SimpleSearchResult();
|
SimpleSearchResult(const int id,
|
||||||
|
const std::string & matchedFragment,
|
||||||
|
const std::string & sourceSegment,
|
||||||
|
const std::string & targetSegment
|
||||||
|
);
|
||||||
/*! Destructor.
|
/*! Destructor.
|
||||||
*/
|
*/
|
||||||
virtual ~SimpleSearchResult();
|
virtual ~SimpleSearchResult();
|
||||||
|
|
||||||
|
int & getId() {
|
||||||
|
return _id;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string & getMatchedFragment() {
|
||||||
|
return _matchedFragment;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string & getSourceSegment() {
|
||||||
|
return _sourceSegment;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string & getTargetSegment() {
|
||||||
|
return _targetSegment;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int id;
|
int _id;
|
||||||
|
|
||||||
std::string _matchedFragment;
|
std::string _matchedFragment;
|
||||||
|
|
||||||
|
@ -44,6 +44,36 @@ int UnitDAO::addSentence(
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(std::vector<MatchedPatternFragment> concordiaResults) {
|
||||||
|
std::vector<SimpleSearchResult> results;
|
||||||
|
|
||||||
|
DBconnection connection;
|
||||||
|
connection.startTransaction();
|
||||||
|
|
||||||
|
BOOST_FOREACH(MatchedPatternFragment & fragment, concordiaResults) {
|
||||||
|
std::string query = "SELECT id, source_segment, target_segment, substring(source_segment,source_tokens[$1::integer*2+1]+1,source_tokens[$2::integer*2]-source_tokens[$1::integer*2+1]) as matched_fragment FROM unit WHERE id = $3::integer;";
|
||||||
|
std::vector<QueryParam*> params;
|
||||||
|
params.push_back(new IntParam(fragment.getExampleOffset()));
|
||||||
|
params.push_back(new IntParam(fragment.getExampleOffset()+fragment.getMatchedLength()));
|
||||||
|
params.push_back(new IntParam(fragment.getExampleId()));
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "example offset: " << fragment.getExampleOffset()
|
||||||
|
<< ", matched length: " << fragment.getMatchedLength()
|
||||||
|
<< ", example id: " << fragment.getExampleId();
|
||||||
|
Logger::log(ss.str());
|
||||||
|
PGresult * result = connection.execute(query, params);
|
||||||
|
|
||||||
|
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0),
|
||||||
|
connection.getStringValue(result,0,3),
|
||||||
|
connection.getStringValue(result,0,1),
|
||||||
|
connection.getStringValue(result,0,2)));
|
||||||
|
connection.clearResult(result);
|
||||||
|
}
|
||||||
|
connection.endTransaction();
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<int> UnitDAO::_getTokenPositions(boost::shared_ptr<TokenizedSentence> ts) {
|
std::vector<int> UnitDAO::_getTokenPositions(boost::shared_ptr<TokenizedSentence> ts) {
|
||||||
std::vector<int> result;
|
std::vector<int> result;
|
||||||
BOOST_FOREACH(const TokenAnnotation & token, ts->getTokens()) {
|
BOOST_FOREACH(const TokenAnnotation & token, ts->getTokens()) {
|
||||||
@ -54,3 +84,4 @@ std::vector<int> UnitDAO::_getTokenPositions(boost::shared_ptr<TokenizedSentence
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,8 +5,12 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <concordia/tokenized_sentence.hpp>
|
#include <concordia/tokenized_sentence.hpp>
|
||||||
|
#include <concordia/substring_occurence.hpp>
|
||||||
|
#include <concordia/matched_pattern_fragment.hpp>
|
||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
|
|
||||||
|
#include "simple_search_result.hpp"
|
||||||
|
|
||||||
class UnitDAO {
|
class UnitDAO {
|
||||||
public:
|
public:
|
||||||
/*! Constructor.
|
/*! Constructor.
|
||||||
@ -20,6 +24,9 @@ public:
|
|||||||
boost::shared_ptr<TokenizedSentence> sourceSentence,
|
boost::shared_ptr<TokenizedSentence> sourceSentence,
|
||||||
std::string & targetSentence,
|
std::string & targetSentence,
|
||||||
int tmId);
|
int tmId);
|
||||||
|
|
||||||
|
std::vector<SimpleSearchResult> getSearchResults(std::vector<MatchedPatternFragment> concordiaResults);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<int> _getTokenPositions(boost::shared_ptr<TokenizedSentence> ts);
|
std::vector<int> _getTokenPositions(boost::shared_ptr<TokenizedSentence> ts);
|
||||||
};
|
};
|
||||||
|
@ -1 +0,0 @@
|
|||||||
select substring(source_segment,source_tokens[start_token*2+1]+1,source_tokens[end_token*2+2]-source_tokens[start_token*2+1]) from unit where id = 3;
|
|
@ -1,3 +0,0 @@
|
|||||||
http://chriswu.me/blog/writing-hello-world-in-fcgi-with-c-plus-plus/
|
|
||||||
|
|
||||||
use the echo.cpp source as an example for concordia-server-starter. It works with the up-to-date version of test.html (the one that specifies UTF-8 as character encoding in the <form>)
|
|
@ -1,175 +0,0 @@
|
|||||||
/*
|
|
||||||
* A simple FastCGI application example in C++.
|
|
||||||
*
|
|
||||||
* $Id: echo-cpp.cpp,v 1.10 2002/02/25 00:46:17 robs Exp $
|
|
||||||
*
|
|
||||||
* Copyright (c) 2001 Rob Saccoccio and Chelsea Networks
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
*
|
|
||||||
* 1. Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* 3. The name of the author may not be used to endorse or promote products
|
|
||||||
* derived from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
||||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
||||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
||||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
||||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
||||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef _WIN32
|
|
||||||
#include <process.h>
|
|
||||||
#else
|
|
||||||
#include <unistd.h>
|
|
||||||
extern char ** environ;
|
|
||||||
#endif
|
|
||||||
#include "fcgio.h"
|
|
||||||
#include "fcgi_config.h" // HAVE_IOSTREAM_WITHASSIGN_STREAMBUF
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
// Maximum number of bytes allowed to be read from stdin
|
|
||||||
static const unsigned long STDIN_MAX = 1000000;
|
|
||||||
|
|
||||||
static void penv(const char * const * envp)
|
|
||||||
{
|
|
||||||
cout << "<PRE>\n";
|
|
||||||
for ( ; *envp; ++envp)
|
|
||||||
{
|
|
||||||
cout << *envp << "\n";
|
|
||||||
}
|
|
||||||
cout << "</PRE>\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
static long gstdin(FCGX_Request * request, char ** content)
|
|
||||||
{
|
|
||||||
char * clenstr = FCGX_GetParam("CONTENT_LENGTH", request->envp);
|
|
||||||
unsigned long clen = STDIN_MAX;
|
|
||||||
|
|
||||||
if (clenstr)
|
|
||||||
{
|
|
||||||
clen = strtol(clenstr, &clenstr, 10);
|
|
||||||
if (*clenstr)
|
|
||||||
{
|
|
||||||
cerr << "can't parse \"CONTENT_LENGTH="
|
|
||||||
<< FCGX_GetParam("CONTENT_LENGTH", request->envp)
|
|
||||||
<< "\"\n";
|
|
||||||
clen = STDIN_MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
// *always* put a cap on the amount of data that will be read
|
|
||||||
if (clen > STDIN_MAX) clen = STDIN_MAX;
|
|
||||||
|
|
||||||
*content = new char[clen];
|
|
||||||
|
|
||||||
cin.read(*content, clen);
|
|
||||||
clen = cin.gcount();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// *never* read stdin when CONTENT_LENGTH is missing or unparsable
|
|
||||||
*content = 0;
|
|
||||||
clen = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Chew up any remaining stdin - this shouldn't be necessary
|
|
||||||
// but is because mod_fastcgi doesn't handle it correctly.
|
|
||||||
|
|
||||||
// ignore() doesn't set the eof bit in some versions of glibc++
|
|
||||||
// so use gcount() instead of eof()...
|
|
||||||
do cin.ignore(1024); while (cin.gcount() == 1024);
|
|
||||||
|
|
||||||
return clen;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main (void)
|
|
||||||
{
|
|
||||||
int count = 0;
|
|
||||||
long pid = getpid();
|
|
||||||
|
|
||||||
streambuf * cin_streambuf = cin.rdbuf();
|
|
||||||
streambuf * cout_streambuf = cout.rdbuf();
|
|
||||||
streambuf * cerr_streambuf = cerr.rdbuf();
|
|
||||||
|
|
||||||
FCGX_Request request;
|
|
||||||
|
|
||||||
FCGX_Init();
|
|
||||||
FCGX_InitRequest(&request, 0, 0);
|
|
||||||
|
|
||||||
while (FCGX_Accept_r(&request) == 0)
|
|
||||||
{
|
|
||||||
// Note that the default bufsize (0) will cause the use of iostream
|
|
||||||
// methods that require positioning (such as peek(), seek(),
|
|
||||||
// unget() and putback()) to fail (in favour of more efficient IO).
|
|
||||||
fcgi_streambuf cin_fcgi_streambuf(request.in);
|
|
||||||
fcgi_streambuf cout_fcgi_streambuf(request.out);
|
|
||||||
fcgi_streambuf cerr_fcgi_streambuf(request.err);
|
|
||||||
|
|
||||||
#if HAVE_IOSTREAM_WITHASSIGN_STREAMBUF
|
|
||||||
cin = &cin_fcgi_streambuf;
|
|
||||||
cout = &cout_fcgi_streambuf;
|
|
||||||
cerr = &cerr_fcgi_streambuf;
|
|
||||||
#else
|
|
||||||
cin.rdbuf(&cin_fcgi_streambuf);
|
|
||||||
cout.rdbuf(&cout_fcgi_streambuf);
|
|
||||||
cerr.rdbuf(&cerr_fcgi_streambuf);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Although FastCGI supports writing before reading,
|
|
||||||
// many http clients (browsers) don't support it (so
|
|
||||||
// the connection deadlocks until a timeout expires!).
|
|
||||||
char * content;
|
|
||||||
unsigned long clen = gstdin(&request, &content);
|
|
||||||
|
|
||||||
cout << "Content-type: text/html\r\n"
|
|
||||||
"\r\n"
|
|
||||||
"<TITLE>echo-cpp</TITLE>\n"
|
|
||||||
"<H1>echo-cpp</H1>\n"
|
|
||||||
"<H4>PID: " << pid << "</H4>\n"
|
|
||||||
"<H4>Request Number: " << ++count << "</H4>\n";
|
|
||||||
|
|
||||||
cout << "<H4>Request Environment</H4>\n";
|
|
||||||
penv(request.envp);
|
|
||||||
|
|
||||||
cout << "<H4>Process/Initial Environment</H4>\n";
|
|
||||||
penv(environ);
|
|
||||||
|
|
||||||
cout << "<H4>Standard Input - " << clen;
|
|
||||||
if (clen == STDIN_MAX) cout << " (STDIN_MAX)";
|
|
||||||
cout << " bytes</H4>\n";
|
|
||||||
if (clen) cout.write(content, clen);
|
|
||||||
|
|
||||||
if (content) delete []content;
|
|
||||||
|
|
||||||
// If the output streambufs had non-zero bufsizes and
|
|
||||||
// were constructed outside of the accept loop (i.e.
|
|
||||||
// their destructor won't be called here), they would
|
|
||||||
// have to be flushed here.
|
|
||||||
}
|
|
||||||
|
|
||||||
#if HAVE_IOSTREAM_WITHASSIGN_STREAMBUF
|
|
||||||
cin = cin_streambuf;
|
|
||||||
cout = cout_streambuf;
|
|
||||||
cerr = cerr_streambuf;
|
|
||||||
#else
|
|
||||||
cin.rdbuf(cin_streambuf);
|
|
||||||
cout.rdbuf(cout_streambuf);
|
|
||||||
cerr.rdbuf(cerr_streambuf);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -1,46 +0,0 @@
|
|||||||
#include <iostream>
|
|
||||||
#include "fcgio.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
int main(void) {
|
|
||||||
// Backup the stdio streambufs
|
|
||||||
streambuf * cin_streambuf = cin.rdbuf();
|
|
||||||
streambuf * cout_streambuf = cout.rdbuf();
|
|
||||||
streambuf * cerr_streambuf = cerr.rdbuf();
|
|
||||||
|
|
||||||
FCGX_Request request;
|
|
||||||
|
|
||||||
FCGX_Init();
|
|
||||||
FCGX_InitRequest(&request, 0, 0);
|
|
||||||
|
|
||||||
while (FCGX_Accept_r(&request) == 0) {
|
|
||||||
fcgi_streambuf cin_fcgi_streambuf(request.in);
|
|
||||||
fcgi_streambuf cout_fcgi_streambuf(request.out);
|
|
||||||
fcgi_streambuf cerr_fcgi_streambuf(request.err);
|
|
||||||
|
|
||||||
cin.rdbuf(&cin_fcgi_streambuf);
|
|
||||||
cout.rdbuf(&cout_fcgi_streambuf);
|
|
||||||
cerr.rdbuf(&cerr_fcgi_streambuf);
|
|
||||||
|
|
||||||
cout << "Content-type: text/html\r\n"
|
|
||||||
<< "\r\n"
|
|
||||||
<< "<html>\n"
|
|
||||||
<< " <head>\n"
|
|
||||||
<< " <title>Hello, World!</title>\n"
|
|
||||||
<< " </head>\n"
|
|
||||||
<< " <body>\n"
|
|
||||||
<< " <h1>Hello, World!</h1>\n"
|
|
||||||
<< " </body>\n"
|
|
||||||
<< "</html>\n";
|
|
||||||
|
|
||||||
// Note: the fcgi_streambuf destructor will auto flush
|
|
||||||
}
|
|
||||||
|
|
||||||
// restore stdio streambufs
|
|
||||||
cin.rdbuf(cin_streambuf);
|
|
||||||
cout.rdbuf(cout_streambuf);
|
|
||||||
cerr.rdbuf(cerr_streambuf);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -1,7 +1,7 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"zu\"pełnie nowe zdanie", "targetSentence":"zażółć gęślą jaźńZAŻÓŁĆ GĘŚLĄ JAŹŃ", "tmId":1234782314}' http://localhost
|
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"Marysia ma rysia", "targetSentence":"Mary has a bobcat", "tmId":1}' http://localhost
|
||||||
|
|
||||||
|
|
||||||
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "sentence":"zupełnie nowe"}' http://localhost
|
curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"ma rysia"}' http://localhost
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user