modified todo, removed concordia-server
This commit is contained in:
parent
07d5d4438b
commit
680eb54ae5
10
TODO.txt
10
TODO.txt
@ -1,17 +1,19 @@
|
|||||||
---------------------------- Developer's private notes (language may vary, bo tak czasem wygodniej) -----------------------------
|
---------------------------- Developer's private notes (language may vary, bo tak czasem wygodniej) -----------------------------
|
||||||
|
|
||||||
- document the code
|
- mess with gcc performance optimization options (https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html)
|
||||||
- prepare website (home, downloads, documentation, tutorial, issues, sourceforge page, about author)
|
|
||||||
- copyright libdivsufsort i psi-toolkit (in documentation)
|
|
||||||
IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie?
|
IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie?
|
||||||
- testy zużycia pamięci
|
- testy zużycia pamięci
|
||||||
- Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła.
|
- Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła.
|
||||||
- Multi-threading? (przy concordia search jak najbardziej. Tylko wtedy trzebaby zastosować sortowanie po końcach przedziału przed liczeniem best overlay, co nawiasem mówiąc jest gotowe).
|
- Multi-threading? (przy concordia search jak najbardziej. Tylko wtedy trzebaby zastosować sortowanie po końcach przedziału przed liczeniem best overlay, co nawiasem mówiąc jest gotowe).
|
||||||
- concordia-server (zastanowić się, czy nie napisać CAT-a oraz nad tym, czy nie oddzielić projektu concordia-server).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---------------------------- Archive -----------------------------
|
---------------------------- Archive -----------------------------
|
||||||
|
DONE - concordia-server (zastanowić się, czy nie napisać CAT-a oraz nad tym, czy nie oddzielić projektu concordia-server).
|
||||||
|
DONE - document the code
|
||||||
|
DONE - prepare website (home, downloads, documentation, tutorial, issues, sourceforge page, about author)
|
||||||
|
DONE - copyright libdivsufsort i psi-toolkit (in documentation)
|
||||||
|
|
||||||
DONE - puścić 100% search test na jrc
|
DONE - puścić 100% search test na jrc
|
||||||
|
|
||||||
REJECTED - zastanowić się nad empty hash examples (rozwiązanie: w ogóle nie szukać fraz o pustym hashu, rzucać wyjątek).
|
REJECTED - zastanowić się nad empty hash examples (rozwiązanie: w ogóle nie szukać fraz o pustym hashu, rzucać wyjątek).
|
||||||
|
@ -1,21 +0,0 @@
|
|||||||
|
|
||||||
add_executable(concordia-server-starter concordia_server_starter.cpp)
|
|
||||||
|
|
||||||
target_link_libraries(concordia-server-starter concordia-server concordia ${Boost_LIBRARIES} ${FCGIPP_LIB} ${FCGI_LIB} ${LIBCONFIG_LIB})
|
|
||||||
|
|
||||||
if (WITH_RE2)
|
|
||||||
target_link_libraries(concordia-server-starter re2)
|
|
||||||
if (WITH_PCRE)
|
|
||||||
target_link_libraries(concordia-server-starter pcrecpp)
|
|
||||||
endif(WITH_PCRE)
|
|
||||||
else(WITH_RE2)
|
|
||||||
if (WITH_PCRE)
|
|
||||||
target_link_libraries(concordia-server-starter pcrecpp)
|
|
||||||
endif(WITH_PCRE)
|
|
||||||
endif(WITH_RE2)
|
|
||||||
|
|
||||||
# =====================================
|
|
||||||
|
|
||||||
install(TARGETS concordia-server-starter DESTINATION bin/)
|
|
||||||
|
|
||||||
|
|
@ -1,99 +0,0 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <string>
|
|
||||||
#include <boost/algorithm/string/replace.hpp>
|
|
||||||
#include <fcgio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include "concordia-server/concordia_server.hpp"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
static const unsigned long STDIN_MAX = 1000000;
|
|
||||||
|
|
||||||
static long gstdin(FCGX_Request * request, char ** content)
|
|
||||||
{
|
|
||||||
char * clenstr = FCGX_GetParam("CONTENT_LENGTH", request->envp);
|
|
||||||
unsigned long clen = STDIN_MAX;
|
|
||||||
|
|
||||||
if (clenstr)
|
|
||||||
{
|
|
||||||
clen = strtol(clenstr, &clenstr, 10);
|
|
||||||
if (*clenstr)
|
|
||||||
{
|
|
||||||
cerr << "can't parse \"CONTENT_LENGTH="
|
|
||||||
<< FCGX_GetParam("CONTENT_LENGTH", request->envp)
|
|
||||||
<< "\"\n";
|
|
||||||
clen = STDIN_MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
// *always* put a cap on the amount of data that will be read
|
|
||||||
if (clen > STDIN_MAX) clen = STDIN_MAX;
|
|
||||||
|
|
||||||
*content = new char[clen];
|
|
||||||
|
|
||||||
cin.read(*content, clen);
|
|
||||||
clen = cin.gcount();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// *never* read stdin when CONTENT_LENGTH is missing or unparsable
|
|
||||||
*content = 0;
|
|
||||||
clen = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Chew up any remaining stdin - this shouldn't be necessary
|
|
||||||
// but is because mod_fastcgi doesn't handle it correctly.
|
|
||||||
|
|
||||||
// ignore() doesn't set the eof bit in some versions of glibc++
|
|
||||||
// so use gcount() instead of eof()...
|
|
||||||
do cin.ignore(1024); while (cin.gcount() == 1024);
|
|
||||||
|
|
||||||
return clen;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
|
|
||||||
// Backup the stdio streambufs
|
|
||||||
streambuf * cin_streambuf = cin.rdbuf();
|
|
||||||
streambuf * cout_streambuf = cout.rdbuf();
|
|
||||||
streambuf * cerr_streambuf = cerr.rdbuf();
|
|
||||||
|
|
||||||
ConcordiaServer concordiaServer("concordia.cfg");
|
|
||||||
|
|
||||||
FCGX_Request request;
|
|
||||||
|
|
||||||
FCGX_Init();
|
|
||||||
FCGX_InitRequest(&request, 0, 0);
|
|
||||||
|
|
||||||
while (FCGX_Accept_r(&request) == 0) {
|
|
||||||
fcgi_streambuf cin_fcgi_streambuf(request.in);
|
|
||||||
fcgi_streambuf cout_fcgi_streambuf(request.out);
|
|
||||||
fcgi_streambuf cerr_fcgi_streambuf(request.err);
|
|
||||||
|
|
||||||
cin.rdbuf(&cin_fcgi_streambuf);
|
|
||||||
cout.rdbuf(&cout_fcgi_streambuf);
|
|
||||||
cerr.rdbuf(&cerr_fcgi_streambuf);
|
|
||||||
|
|
||||||
|
|
||||||
char * content;
|
|
||||||
unsigned long clen = gstdin(&request, &content);
|
|
||||||
|
|
||||||
string requestString(content);
|
|
||||||
|
|
||||||
//TODO passing unicode strings
|
|
||||||
//boost::replace_all(requestString, "+", " ");
|
|
||||||
|
|
||||||
cout << concordiaServer.handleRequest(requestString);
|
|
||||||
|
|
||||||
// Note: the fcgi_streambuf destructor will auto flush
|
|
||||||
}
|
|
||||||
|
|
||||||
// restore stdio streambufs
|
|
||||||
cin.rdbuf(cin_streambuf);
|
|
||||||
cout.rdbuf(cout_streambuf);
|
|
||||||
cerr.rdbuf(cerr_streambuf);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -1,24 +0,0 @@
|
|||||||
add_library(concordia-server SHARED
|
|
||||||
concordia_server.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
add_subdirectory(t)
|
|
||||||
# =====================================
|
|
||||||
|
|
||||||
install(TARGETS concordia-server DESTINATION lib/)
|
|
||||||
install(FILES concordia_server.hpp DESTINATION include/concordia-server/)
|
|
||||||
|
|
||||||
target_link_libraries(concordia-server log4cpp)
|
|
||||||
target_link_libraries(concordia-server ${Boost_LIBRARIES})
|
|
||||||
|
|
||||||
if (WITH_RE2)
|
|
||||||
target_link_libraries(concordia-server re2)
|
|
||||||
if (WITH_PCRE)
|
|
||||||
target_link_libraries(concordia-server pcrecpp)
|
|
||||||
endif(WITH_PCRE)
|
|
||||||
else(WITH_RE2)
|
|
||||||
if (WITH_PCRE)
|
|
||||||
target_link_libraries(concordia-server pcrecpp)
|
|
||||||
endif(WITH_PCRE)
|
|
||||||
endif(WITH_RE2)
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
use the echo.cpp source as an example for concordia-server-starter. It works with the up-to-date version of test.html (the one that specifies UTF-8 as character encoding in the <form>)
|
|
@ -1,62 +0,0 @@
|
|||||||
#include "concordia-server/concordia_server.hpp"
|
|
||||||
|
|
||||||
#include <sstream>
|
|
||||||
#include <concordia/example.hpp>
|
|
||||||
#include <concordia/substring_occurence.hpp>
|
|
||||||
#include <boost/ptr_container/ptr_vector.hpp>
|
|
||||||
#include <boost/foreach.hpp>
|
|
||||||
|
|
||||||
|
|
||||||
ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
|
|
||||||
throw(ConcordiaException) {
|
|
||||||
_concordia = boost::shared_ptr<Concordia> (
|
|
||||||
new Concordia(configFilePath));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
ConcordiaServer::~ConcordiaServer() {
|
|
||||||
}
|
|
||||||
|
|
||||||
string ConcordiaServer::handleRequest(string & requestString) {
|
|
||||||
stringstream ss;
|
|
||||||
try {
|
|
||||||
ss << "Content-type: text/html\r\n"
|
|
||||||
<< "\r\n"
|
|
||||||
<< "<html>\n"
|
|
||||||
<< " <head>\n"
|
|
||||||
<< " <title>Hello, World!</title>\n"
|
|
||||||
<< " </head>\n"
|
|
||||||
<< " <body>\n"
|
|
||||||
<< " <h1>Hello, World!</h1>\n"
|
|
||||||
<< " The concordia version is: "<< _concordia->getVersion() << "\n"
|
|
||||||
<< " <h1>Input data:</h1>\n"
|
|
||||||
<< requestString;
|
|
||||||
|
|
||||||
ss << " <h1>Adding content as example:</h1>\n";
|
|
||||||
|
|
||||||
Example example1(requestString, 0);
|
|
||||||
Example example2("Ala ma kota", 1);
|
|
||||||
Example example3("Marysia nie ma kota chyba", 2);
|
|
||||||
_concordia->addExample(example1);
|
|
||||||
_concordia->addExample(example2);
|
|
||||||
_concordia->addExample(example3);
|
|
||||||
|
|
||||||
_concordia->refreshSAfromRAM();
|
|
||||||
|
|
||||||
ss << " <h1>Searching ma kota:</h1>\n";
|
|
||||||
boost::ptr_vector<SubstringOccurence> result =
|
|
||||||
_concordia->simpleSearch("ma kota");
|
|
||||||
BOOST_FOREACH(SubstringOccurence occurence, result) {
|
|
||||||
ss << "\t\tfound match in sentence number: "
|
|
||||||
<< occurence.getId() << "<br/><br/>";
|
|
||||||
}
|
|
||||||
ss << " </body>\n"
|
|
||||||
<< "</html>\n";
|
|
||||||
|
|
||||||
} catch (ConcordiaException & e) {
|
|
||||||
ss << "<h1> Concordia error:" << e.what() << "</h1>";
|
|
||||||
|
|
||||||
}
|
|
||||||
return ss.str();
|
|
||||||
|
|
||||||
}
|
|
@ -1,30 +0,0 @@
|
|||||||
#ifndef CONCORDIA_SERVER_HDR
|
|
||||||
#define CONCORDIA_SERVER_HDR
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <concordia/concordia_exception.hpp>
|
|
||||||
#include <boost/shared_ptr.hpp>
|
|
||||||
#include <concordia/concordia.hpp>
|
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
class ConcordiaServer {
|
|
||||||
public:
|
|
||||||
/*! Constructor.
|
|
||||||
\param configFilePath path to the Concordia configuration file
|
|
||||||
\throws ConcordiaException
|
|
||||||
*/
|
|
||||||
explicit ConcordiaServer(const std::string & configFilePath)
|
|
||||||
throw(ConcordiaException);
|
|
||||||
/*! Destructor.
|
|
||||||
*/
|
|
||||||
virtual ~ConcordiaServer();
|
|
||||||
|
|
||||||
string handleRequest(string & requestString);
|
|
||||||
|
|
||||||
private:
|
|
||||||
boost::shared_ptr<Concordia> _concordia;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,6 +0,0 @@
|
|||||||
add_library(concordia-server-tests
|
|
||||||
test_concordia.cpp
|
|
||||||
test_concordia_config.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
target_link_libraries(concordia-server-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)
|
|
@ -1,19 +0,0 @@
|
|||||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
|
||||||
#include "concordia/concordia.hpp"
|
|
||||||
#include "tests/common/test_resources_manager.hpp"
|
|
||||||
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE(concordia_main)
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConcordiaVersion )
|
|
||||||
{
|
|
||||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
|
||||||
string version = concordia.getVersion();
|
|
||||||
BOOST_CHECK_EQUAL( version , "0.1");
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
|
@ -1,52 +0,0 @@
|
|||||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
|
||||||
|
|
||||||
#include "concordia/concordia_config.hpp"
|
|
||||||
#include "tests/common/test_resources_manager.hpp"
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <list>
|
|
||||||
#include <boost/algorithm/string/predicate.hpp>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE(concordia_config)
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConfigParameters )
|
|
||||||
{
|
|
||||||
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-test.cfg"));
|
|
||||||
BOOST_CHECK_EQUAL( config.getPuddleTagsetFilePath() , "puddle/tagset.txt" );
|
|
||||||
}
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( NonexistentConfigTest )
|
|
||||||
{
|
|
||||||
bool exceptionThrown = false;
|
|
||||||
string message = "";
|
|
||||||
try {
|
|
||||||
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("foo.cfg"));
|
|
||||||
} catch (ConcordiaException & e) {
|
|
||||||
exceptionThrown = true;
|
|
||||||
message = e.what();
|
|
||||||
}
|
|
||||||
BOOST_CHECK_EQUAL(exceptionThrown, true);
|
|
||||||
BOOST_CHECK_EQUAL(boost::starts_with(message, "I/O error reading config file"), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( InvalidConfigTest )
|
|
||||||
{
|
|
||||||
bool exceptionThrown = false;
|
|
||||||
string message = "";
|
|
||||||
try {
|
|
||||||
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("invalid.cfg"));
|
|
||||||
} catch (ConcordiaException & e) {
|
|
||||||
exceptionThrown = true;
|
|
||||||
message = e.what();
|
|
||||||
}
|
|
||||||
BOOST_CHECK_EQUAL(exceptionThrown, true);
|
|
||||||
BOOST_CHECK_EQUAL(boost::starts_with(message, "Error parsing config file"), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
|
@ -1,7 +0,0 @@
|
|||||||
<html>
|
|
||||||
<body>
|
|
||||||
<form enctype="multipart/form-data" action="http://localhost:8081" accept-charset="UTF-8" method="POST">
|
|
||||||
<input type="text" name="input_field" />
|
|
||||||
</form>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
Loading…
Reference in New Issue
Block a user