working simple search
This commit is contained in:
parent
de5d1f4a63
commit
e8f1f21195
@ -124,4 +124,10 @@ int DBconnection::getIntValue(PGresult * result, int row, int col) {
|
||||
return strtol(valueStr, NULL, 10);
|
||||
}
|
||||
|
||||
std::string DBconnection::getStringValue(PGresult * result, int row, int col) {
|
||||
char * valueStr = PQgetvalue(result,row,col);
|
||||
return std::string(valueStr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -31,6 +31,8 @@ public:
|
||||
|
||||
int getIntValue(PGresult * result, int row, int col);
|
||||
|
||||
std::string getStringValue(PGresult * result, int row, int col);
|
||||
|
||||
private:
|
||||
void close();
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "searcher_controller.hpp"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include <vector>
|
||||
|
||||
SearcherController::SearcherController(boost::shared_ptr<Concordia> concordia)
|
||||
@ -12,14 +13,25 @@ SearcherController::~SearcherController() {
|
||||
|
||||
|
||||
void SearcherController::simpleSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern) {
|
||||
std::vector<SubstringOccurence> results = _concordia->simpleSearch(pattern);
|
||||
std::vector<SimpleSearchResult> results = _unitDAO.getSearchResults(_concordia->simpleSearch(pattern));
|
||||
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("status");
|
||||
jsonWriter.String("success");
|
||||
jsonWriter.String("results");
|
||||
jsonWriter.StartArray();
|
||||
|
||||
BOOST_FOREACH(SimpleSearchResult & result, results) {
|
||||
jsonWriter.StartObject();
|
||||
jsonWriter.String("id");
|
||||
jsonWriter.Int(result.getId());
|
||||
jsonWriter.String("matchedFragment");
|
||||
jsonWriter.String(result.getMatchedFragment().c_str());
|
||||
jsonWriter.String("sourceSegment");
|
||||
jsonWriter.String(result.getSourceSegment().c_str());
|
||||
jsonWriter.String("targetSegment");
|
||||
jsonWriter.String(result.getTargetSegment().c_str());
|
||||
jsonWriter.EndObject();
|
||||
}
|
||||
jsonWriter.EndArray();
|
||||
jsonWriter.EndObject();
|
||||
}
|
||||
|
@ -6,6 +6,8 @@
|
||||
#include <concordia/concordia.hpp>
|
||||
#include <concordia/concordia_exception.hpp>
|
||||
|
||||
#include "unit_dao.hpp"
|
||||
#include "simple_search_result.hpp"
|
||||
#include "rapidjson/writer.h"
|
||||
|
||||
|
||||
@ -24,8 +26,10 @@ public:
|
||||
void concordiaSearch(rapidjson::Writer<rapidjson::StringBuffer> & jsonWriter, std::string & pattern);
|
||||
|
||||
private:
|
||||
boost::shared_ptr<Concordia> _concordia;
|
||||
|
||||
boost::shared_ptr<Concordia> _concordia;
|
||||
|
||||
UnitDAO _unitDAO;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,6 +1,14 @@
|
||||
#include "simple_search_result.hpp"
|
||||
|
||||
SimpleSearchResult::SimpleSearchResult() {
|
||||
SimpleSearchResult::SimpleSearchResult(
|
||||
const int id,
|
||||
const std::string & matchedFragment,
|
||||
const std::string & sourceSegment,
|
||||
const std::string & targetSegment):
|
||||
_id(id),
|
||||
_matchedFragment(matchedFragment),
|
||||
_sourceSegment(sourceSegment),
|
||||
_targetSegment(targetSegment) {
|
||||
}
|
||||
|
||||
SimpleSearchResult::~SimpleSearchResult() {
|
||||
|
@ -7,13 +7,33 @@ class SimpleSearchResult {
|
||||
public:
|
||||
/*! Constructor.
|
||||
*/
|
||||
SimpleSearchResult();
|
||||
SimpleSearchResult(const int id,
|
||||
const std::string & matchedFragment,
|
||||
const std::string & sourceSegment,
|
||||
const std::string & targetSegment
|
||||
);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~SimpleSearchResult();
|
||||
|
||||
int & getId() {
|
||||
return _id;
|
||||
}
|
||||
|
||||
const std::string & getMatchedFragment() {
|
||||
return _matchedFragment;
|
||||
}
|
||||
|
||||
const std::string & getSourceSegment() {
|
||||
return _sourceSegment;
|
||||
}
|
||||
|
||||
const std::string & getTargetSegment() {
|
||||
return _targetSegment;
|
||||
}
|
||||
|
||||
private:
|
||||
int id;
|
||||
int _id;
|
||||
|
||||
std::string _matchedFragment;
|
||||
|
||||
|
@ -44,6 +44,36 @@ int UnitDAO::addSentence(
|
||||
|
||||
}
|
||||
|
||||
std::vector<SimpleSearchResult> UnitDAO::getSearchResults(std::vector<MatchedPatternFragment> concordiaResults) {
|
||||
std::vector<SimpleSearchResult> results;
|
||||
|
||||
DBconnection connection;
|
||||
connection.startTransaction();
|
||||
|
||||
BOOST_FOREACH(MatchedPatternFragment & fragment, concordiaResults) {
|
||||
std::string query = "SELECT id, source_segment, target_segment, substring(source_segment,source_tokens[$1::integer*2+1]+1,source_tokens[$2::integer*2]-source_tokens[$1::integer*2+1]) as matched_fragment FROM unit WHERE id = $3::integer;";
|
||||
std::vector<QueryParam*> params;
|
||||
params.push_back(new IntParam(fragment.getExampleOffset()));
|
||||
params.push_back(new IntParam(fragment.getExampleOffset()+fragment.getMatchedLength()));
|
||||
params.push_back(new IntParam(fragment.getExampleId()));
|
||||
std::stringstream ss;
|
||||
ss << "example offset: " << fragment.getExampleOffset()
|
||||
<< ", matched length: " << fragment.getMatchedLength()
|
||||
<< ", example id: " << fragment.getExampleId();
|
||||
Logger::log(ss.str());
|
||||
PGresult * result = connection.execute(query, params);
|
||||
|
||||
results.push_back(SimpleSearchResult(connection.getIntValue(result,0,0),
|
||||
connection.getStringValue(result,0,3),
|
||||
connection.getStringValue(result,0,1),
|
||||
connection.getStringValue(result,0,2)));
|
||||
connection.clearResult(result);
|
||||
}
|
||||
connection.endTransaction();
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
std::vector<int> UnitDAO::_getTokenPositions(boost::shared_ptr<TokenizedSentence> ts) {
|
||||
std::vector<int> result;
|
||||
BOOST_FOREACH(const TokenAnnotation & token, ts->getTokens()) {
|
||||
@ -54,3 +84,4 @@ std::vector<int> UnitDAO::_getTokenPositions(boost::shared_ptr<TokenizedSentence
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -5,8 +5,12 @@
|
||||
#include <vector>
|
||||
|
||||
#include <concordia/tokenized_sentence.hpp>
|
||||
#include <concordia/substring_occurence.hpp>
|
||||
#include <concordia/matched_pattern_fragment.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
#include "simple_search_result.hpp"
|
||||
|
||||
class UnitDAO {
|
||||
public:
|
||||
/*! Constructor.
|
||||
@ -20,6 +24,9 @@ public:
|
||||
boost::shared_ptr<TokenizedSentence> sourceSentence,
|
||||
std::string & targetSentence,
|
||||
int tmId);
|
||||
|
||||
std::vector<SimpleSearchResult> getSearchResults(std::vector<MatchedPatternFragment> concordiaResults);
|
||||
|
||||
private:
|
||||
std::vector<int> _getTokenPositions(boost::shared_ptr<TokenizedSentence> ts);
|
||||
};
|
||||
|
@ -1 +0,0 @@
|
||||
select substring(source_segment,source_tokens[start_token*2+1]+1,source_tokens[end_token*2+2]-source_tokens[start_token*2+1]) from unit where id = 3;
|
@ -1,3 +0,0 @@
|
||||
http://chriswu.me/blog/writing-hello-world-in-fcgi-with-c-plus-plus/
|
||||
|
||||
use the echo.cpp source as an example for concordia-server-starter. It works with the up-to-date version of test.html (the one that specifies UTF-8 as character encoding in the <form>)
|
@ -1,175 +0,0 @@
|
||||
/*
|
||||
* A simple FastCGI application example in C++.
|
||||
*
|
||||
* $Id: echo-cpp.cpp,v 1.10 2002/02/25 00:46:17 robs Exp $
|
||||
*
|
||||
* Copyright (c) 2001 Rob Saccoccio and Chelsea Networks
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#ifdef _WIN32
|
||||
#include <process.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
extern char ** environ;
|
||||
#endif
|
||||
#include "fcgio.h"
|
||||
#include "fcgi_config.h" // HAVE_IOSTREAM_WITHASSIGN_STREAMBUF
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Maximum number of bytes allowed to be read from stdin
|
||||
static const unsigned long STDIN_MAX = 1000000;
|
||||
|
||||
static void penv(const char * const * envp)
|
||||
{
|
||||
cout << "<PRE>\n";
|
||||
for ( ; *envp; ++envp)
|
||||
{
|
||||
cout << *envp << "\n";
|
||||
}
|
||||
cout << "</PRE>\n";
|
||||
}
|
||||
|
||||
static long gstdin(FCGX_Request * request, char ** content)
|
||||
{
|
||||
char * clenstr = FCGX_GetParam("CONTENT_LENGTH", request->envp);
|
||||
unsigned long clen = STDIN_MAX;
|
||||
|
||||
if (clenstr)
|
||||
{
|
||||
clen = strtol(clenstr, &clenstr, 10);
|
||||
if (*clenstr)
|
||||
{
|
||||
cerr << "can't parse \"CONTENT_LENGTH="
|
||||
<< FCGX_GetParam("CONTENT_LENGTH", request->envp)
|
||||
<< "\"\n";
|
||||
clen = STDIN_MAX;
|
||||
}
|
||||
|
||||
// *always* put a cap on the amount of data that will be read
|
||||
if (clen > STDIN_MAX) clen = STDIN_MAX;
|
||||
|
||||
*content = new char[clen];
|
||||
|
||||
cin.read(*content, clen);
|
||||
clen = cin.gcount();
|
||||
}
|
||||
else
|
||||
{
|
||||
// *never* read stdin when CONTENT_LENGTH is missing or unparsable
|
||||
*content = 0;
|
||||
clen = 0;
|
||||
}
|
||||
|
||||
// Chew up any remaining stdin - this shouldn't be necessary
|
||||
// but is because mod_fastcgi doesn't handle it correctly.
|
||||
|
||||
// ignore() doesn't set the eof bit in some versions of glibc++
|
||||
// so use gcount() instead of eof()...
|
||||
do cin.ignore(1024); while (cin.gcount() == 1024);
|
||||
|
||||
return clen;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int count = 0;
|
||||
long pid = getpid();
|
||||
|
||||
streambuf * cin_streambuf = cin.rdbuf();
|
||||
streambuf * cout_streambuf = cout.rdbuf();
|
||||
streambuf * cerr_streambuf = cerr.rdbuf();
|
||||
|
||||
FCGX_Request request;
|
||||
|
||||
FCGX_Init();
|
||||
FCGX_InitRequest(&request, 0, 0);
|
||||
|
||||
while (FCGX_Accept_r(&request) == 0)
|
||||
{
|
||||
// Note that the default bufsize (0) will cause the use of iostream
|
||||
// methods that require positioning (such as peek(), seek(),
|
||||
// unget() and putback()) to fail (in favour of more efficient IO).
|
||||
fcgi_streambuf cin_fcgi_streambuf(request.in);
|
||||
fcgi_streambuf cout_fcgi_streambuf(request.out);
|
||||
fcgi_streambuf cerr_fcgi_streambuf(request.err);
|
||||
|
||||
#if HAVE_IOSTREAM_WITHASSIGN_STREAMBUF
|
||||
cin = &cin_fcgi_streambuf;
|
||||
cout = &cout_fcgi_streambuf;
|
||||
cerr = &cerr_fcgi_streambuf;
|
||||
#else
|
||||
cin.rdbuf(&cin_fcgi_streambuf);
|
||||
cout.rdbuf(&cout_fcgi_streambuf);
|
||||
cerr.rdbuf(&cerr_fcgi_streambuf);
|
||||
#endif
|
||||
|
||||
// Although FastCGI supports writing before reading,
|
||||
// many http clients (browsers) don't support it (so
|
||||
// the connection deadlocks until a timeout expires!).
|
||||
char * content;
|
||||
unsigned long clen = gstdin(&request, &content);
|
||||
|
||||
cout << "Content-type: text/html\r\n"
|
||||
"\r\n"
|
||||
"<TITLE>echo-cpp</TITLE>\n"
|
||||
"<H1>echo-cpp</H1>\n"
|
||||
"<H4>PID: " << pid << "</H4>\n"
|
||||
"<H4>Request Number: " << ++count << "</H4>\n";
|
||||
|
||||
cout << "<H4>Request Environment</H4>\n";
|
||||
penv(request.envp);
|
||||
|
||||
cout << "<H4>Process/Initial Environment</H4>\n";
|
||||
penv(environ);
|
||||
|
||||
cout << "<H4>Standard Input - " << clen;
|
||||
if (clen == STDIN_MAX) cout << " (STDIN_MAX)";
|
||||
cout << " bytes</H4>\n";
|
||||
if (clen) cout.write(content, clen);
|
||||
|
||||
if (content) delete []content;
|
||||
|
||||
// If the output streambufs had non-zero bufsizes and
|
||||
// were constructed outside of the accept loop (i.e.
|
||||
// their destructor won't be called here), they would
|
||||
// have to be flushed here.
|
||||
}
|
||||
|
||||
#if HAVE_IOSTREAM_WITHASSIGN_STREAMBUF
|
||||
cin = cin_streambuf;
|
||||
cout = cout_streambuf;
|
||||
cerr = cerr_streambuf;
|
||||
#else
|
||||
cin.rdbuf(cin_streambuf);
|
||||
cout.rdbuf(cout_streambuf);
|
||||
cerr.rdbuf(cerr_streambuf);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
#include <iostream>
|
||||
#include "fcgio.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(void) {
|
||||
// Backup the stdio streambufs
|
||||
streambuf * cin_streambuf = cin.rdbuf();
|
||||
streambuf * cout_streambuf = cout.rdbuf();
|
||||
streambuf * cerr_streambuf = cerr.rdbuf();
|
||||
|
||||
FCGX_Request request;
|
||||
|
||||
FCGX_Init();
|
||||
FCGX_InitRequest(&request, 0, 0);
|
||||
|
||||
while (FCGX_Accept_r(&request) == 0) {
|
||||
fcgi_streambuf cin_fcgi_streambuf(request.in);
|
||||
fcgi_streambuf cout_fcgi_streambuf(request.out);
|
||||
fcgi_streambuf cerr_fcgi_streambuf(request.err);
|
||||
|
||||
cin.rdbuf(&cin_fcgi_streambuf);
|
||||
cout.rdbuf(&cout_fcgi_streambuf);
|
||||
cerr.rdbuf(&cerr_fcgi_streambuf);
|
||||
|
||||
cout << "Content-type: text/html\r\n"
|
||||
<< "\r\n"
|
||||
<< "<html>\n"
|
||||
<< " <head>\n"
|
||||
<< " <title>Hello, World!</title>\n"
|
||||
<< " </head>\n"
|
||||
<< " <body>\n"
|
||||
<< " <h1>Hello, World!</h1>\n"
|
||||
<< " </body>\n"
|
||||
<< "</html>\n";
|
||||
|
||||
// Note: the fcgi_streambuf destructor will auto flush
|
||||
}
|
||||
|
||||
// restore stdio streambufs
|
||||
cin.rdbuf(cin_streambuf);
|
||||
cout.rdbuf(cout_streambuf);
|
||||
cerr.rdbuf(cerr_streambuf);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"zu\"pełnie nowe zdanie", "targetSentence":"zażółć gęślą jaźńZAŻÓŁĆ GĘŚLĄ JAŹŃ", "tmId":1234782314}' http://localhost
|
||||
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"addSentence", "sourceSentence":"Marysia ma rysia", "targetSentence":"Mary has a bobcat", "tmId":1}' http://localhost
|
||||
|
||||
|
||||
#curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "sentence":"zupełnie nowe"}' http://localhost
|
||||
curl -H "Content-Type: application/json" -X POST -d '{"operation":"simpleSearch", "pattern":"ma rysia"}' http://localhost
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user