modified todo, removed concordia-server
This commit is contained in:
parent
07d5d4438b
commit
680eb54ae5
10
TODO.txt
10
TODO.txt
@ -1,17 +1,19 @@
|
||||
---------------------------- Developer's private notes (language may vary, bo tak czasem wygodniej) -----------------------------
|
||||
|
||||
- document the code
|
||||
- prepare website (home, downloads, documentation, tutorial, issues, sourceforge page, about author)
|
||||
- copyright libdivsufsort i psi-toolkit (in documentation)
|
||||
- mess with gcc performance optimization options (https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html)
|
||||
IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie?
|
||||
- testy zużycia pamięci
|
||||
- Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła.
|
||||
- Multi-threading? (przy concordia search jak najbardziej. Tylko wtedy trzebaby zastosować sortowanie po końcach przedziału przed liczeniem best overlay, co nawiasem mówiąc jest gotowe).
|
||||
- concordia-server (zastanowić się, czy nie napisać CAT-a oraz nad tym, czy nie oddzielić projektu concordia-server).
|
||||
|
||||
|
||||
|
||||
---------------------------- Archive -----------------------------
|
||||
DONE - concordia-server (zastanowić się, czy nie napisać CAT-a oraz nad tym, czy nie oddzielić projektu concordia-server).
|
||||
DONE - document the code
|
||||
DONE - prepare website (home, downloads, documentation, tutorial, issues, sourceforge page, about author)
|
||||
DONE - copyright libdivsufsort i psi-toolkit (in documentation)
|
||||
|
||||
DONE - puścić 100% search test na jrc
|
||||
|
||||
REJECTED - zastanowić się nad empty hash examples (rozwiązanie: w ogóle nie szukać fraz o pustym hashu, rzucać wyjątek).
|
||||
|
@ -1,21 +0,0 @@
|
||||
|
||||
add_executable(concordia-server-starter concordia_server_starter.cpp)
|
||||
|
||||
target_link_libraries(concordia-server-starter concordia-server concordia ${Boost_LIBRARIES} ${FCGIPP_LIB} ${FCGI_LIB} ${LIBCONFIG_LIB})
|
||||
|
||||
if (WITH_RE2)
|
||||
target_link_libraries(concordia-server-starter re2)
|
||||
if (WITH_PCRE)
|
||||
target_link_libraries(concordia-server-starter pcrecpp)
|
||||
endif(WITH_PCRE)
|
||||
else(WITH_RE2)
|
||||
if (WITH_PCRE)
|
||||
target_link_libraries(concordia-server-starter pcrecpp)
|
||||
endif(WITH_PCRE)
|
||||
endif(WITH_RE2)
|
||||
|
||||
# =====================================
|
||||
|
||||
install(TARGETS concordia-server-starter DESTINATION bin/)
|
||||
|
||||
|
@ -1,99 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <fcgio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "concordia-server/concordia_server.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
static const unsigned long STDIN_MAX = 1000000;
|
||||
|
||||
static long gstdin(FCGX_Request * request, char ** content)
|
||||
{
|
||||
char * clenstr = FCGX_GetParam("CONTENT_LENGTH", request->envp);
|
||||
unsigned long clen = STDIN_MAX;
|
||||
|
||||
if (clenstr)
|
||||
{
|
||||
clen = strtol(clenstr, &clenstr, 10);
|
||||
if (*clenstr)
|
||||
{
|
||||
cerr << "can't parse \"CONTENT_LENGTH="
|
||||
<< FCGX_GetParam("CONTENT_LENGTH", request->envp)
|
||||
<< "\"\n";
|
||||
clen = STDIN_MAX;
|
||||
}
|
||||
|
||||
// *always* put a cap on the amount of data that will be read
|
||||
if (clen > STDIN_MAX) clen = STDIN_MAX;
|
||||
|
||||
*content = new char[clen];
|
||||
|
||||
cin.read(*content, clen);
|
||||
clen = cin.gcount();
|
||||
}
|
||||
else
|
||||
{
|
||||
// *never* read stdin when CONTENT_LENGTH is missing or unparsable
|
||||
*content = 0;
|
||||
clen = 0;
|
||||
}
|
||||
|
||||
// Chew up any remaining stdin - this shouldn't be necessary
|
||||
// but is because mod_fastcgi doesn't handle it correctly.
|
||||
|
||||
// ignore() doesn't set the eof bit in some versions of glibc++
|
||||
// so use gcount() instead of eof()...
|
||||
do cin.ignore(1024); while (cin.gcount() == 1024);
|
||||
|
||||
return clen;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
// Backup the stdio streambufs
|
||||
streambuf * cin_streambuf = cin.rdbuf();
|
||||
streambuf * cout_streambuf = cout.rdbuf();
|
||||
streambuf * cerr_streambuf = cerr.rdbuf();
|
||||
|
||||
ConcordiaServer concordiaServer("concordia.cfg");
|
||||
|
||||
FCGX_Request request;
|
||||
|
||||
FCGX_Init();
|
||||
FCGX_InitRequest(&request, 0, 0);
|
||||
|
||||
while (FCGX_Accept_r(&request) == 0) {
|
||||
fcgi_streambuf cin_fcgi_streambuf(request.in);
|
||||
fcgi_streambuf cout_fcgi_streambuf(request.out);
|
||||
fcgi_streambuf cerr_fcgi_streambuf(request.err);
|
||||
|
||||
cin.rdbuf(&cin_fcgi_streambuf);
|
||||
cout.rdbuf(&cout_fcgi_streambuf);
|
||||
cerr.rdbuf(&cerr_fcgi_streambuf);
|
||||
|
||||
|
||||
char * content;
|
||||
unsigned long clen = gstdin(&request, &content);
|
||||
|
||||
string requestString(content);
|
||||
|
||||
//TODO passing unicode strings
|
||||
//boost::replace_all(requestString, "+", " ");
|
||||
|
||||
cout << concordiaServer.handleRequest(requestString);
|
||||
|
||||
// Note: the fcgi_streambuf destructor will auto flush
|
||||
}
|
||||
|
||||
// restore stdio streambufs
|
||||
cin.rdbuf(cin_streambuf);
|
||||
cout.rdbuf(cout_streambuf);
|
||||
cerr.rdbuf(cerr_streambuf);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
add_library(concordia-server SHARED
|
||||
concordia_server.cpp
|
||||
)
|
||||
|
||||
add_subdirectory(t)
|
||||
# =====================================
|
||||
|
||||
install(TARGETS concordia-server DESTINATION lib/)
|
||||
install(FILES concordia_server.hpp DESTINATION include/concordia-server/)
|
||||
|
||||
target_link_libraries(concordia-server log4cpp)
|
||||
target_link_libraries(concordia-server ${Boost_LIBRARIES})
|
||||
|
||||
if (WITH_RE2)
|
||||
target_link_libraries(concordia-server re2)
|
||||
if (WITH_PCRE)
|
||||
target_link_libraries(concordia-server pcrecpp)
|
||||
endif(WITH_PCRE)
|
||||
else(WITH_RE2)
|
||||
if (WITH_PCRE)
|
||||
target_link_libraries(concordia-server pcrecpp)
|
||||
endif(WITH_PCRE)
|
||||
endif(WITH_RE2)
|
||||
|
@ -1 +0,0 @@
|
||||
use the echo.cpp source as an example for concordia-server-starter. It works with the up-to-date version of test.html (the one that specifies UTF-8 as character encoding in the <form>)
|
@ -1,62 +0,0 @@
|
||||
#include "concordia-server/concordia_server.hpp"
|
||||
|
||||
#include <sstream>
|
||||
#include <concordia/example.hpp>
|
||||
#include <concordia/substring_occurence.hpp>
|
||||
#include <boost/ptr_container/ptr_vector.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
|
||||
ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
|
||||
throw(ConcordiaException) {
|
||||
_concordia = boost::shared_ptr<Concordia> (
|
||||
new Concordia(configFilePath));
|
||||
|
||||
}
|
||||
|
||||
ConcordiaServer::~ConcordiaServer() {
|
||||
}
|
||||
|
||||
string ConcordiaServer::handleRequest(string & requestString) {
|
||||
stringstream ss;
|
||||
try {
|
||||
ss << "Content-type: text/html\r\n"
|
||||
<< "\r\n"
|
||||
<< "<html>\n"
|
||||
<< " <head>\n"
|
||||
<< " <title>Hello, World!</title>\n"
|
||||
<< " </head>\n"
|
||||
<< " <body>\n"
|
||||
<< " <h1>Hello, World!</h1>\n"
|
||||
<< " The concordia version is: "<< _concordia->getVersion() << "\n"
|
||||
<< " <h1>Input data:</h1>\n"
|
||||
<< requestString;
|
||||
|
||||
ss << " <h1>Adding content as example:</h1>\n";
|
||||
|
||||
Example example1(requestString, 0);
|
||||
Example example2("Ala ma kota", 1);
|
||||
Example example3("Marysia nie ma kota chyba", 2);
|
||||
_concordia->addExample(example1);
|
||||
_concordia->addExample(example2);
|
||||
_concordia->addExample(example3);
|
||||
|
||||
_concordia->refreshSAfromRAM();
|
||||
|
||||
ss << " <h1>Searching ma kota:</h1>\n";
|
||||
boost::ptr_vector<SubstringOccurence> result =
|
||||
_concordia->simpleSearch("ma kota");
|
||||
BOOST_FOREACH(SubstringOccurence occurence, result) {
|
||||
ss << "\t\tfound match in sentence number: "
|
||||
<< occurence.getId() << "<br/><br/>";
|
||||
}
|
||||
ss << " </body>\n"
|
||||
<< "</html>\n";
|
||||
|
||||
} catch (ConcordiaException & e) {
|
||||
ss << "<h1> Concordia error:" << e.what() << "</h1>";
|
||||
|
||||
}
|
||||
return ss.str();
|
||||
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
#ifndef CONCORDIA_SERVER_HDR
|
||||
#define CONCORDIA_SERVER_HDR
|
||||
|
||||
#include <string>
|
||||
#include <concordia/concordia_exception.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <concordia/concordia.hpp>
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
class ConcordiaServer {
|
||||
public:
|
||||
/*! Constructor.
|
||||
\param configFilePath path to the Concordia configuration file
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
explicit ConcordiaServer(const std::string & configFilePath)
|
||||
throw(ConcordiaException);
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~ConcordiaServer();
|
||||
|
||||
string handleRequest(string & requestString);
|
||||
|
||||
private:
|
||||
boost::shared_ptr<Concordia> _concordia;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,6 +0,0 @@
|
||||
add_library(concordia-server-tests
|
||||
test_concordia.cpp
|
||||
test_concordia_config.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(concordia-server-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)
|
@ -1,19 +0,0 @@
|
||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||
#include "concordia/concordia.hpp"
|
||||
#include "tests/common/test_resources_manager.hpp"
|
||||
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(concordia_main)
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaVersion )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
string version = concordia.getVersion();
|
||||
BOOST_CHECK_EQUAL( version , "0.1");
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
@ -1,52 +0,0 @@
|
||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||
|
||||
#include "concordia/concordia_config.hpp"
|
||||
#include "tests/common/test_resources_manager.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(concordia_config)
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConfigParameters )
|
||||
{
|
||||
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-test.cfg"));
|
||||
BOOST_CHECK_EQUAL( config.getPuddleTagsetFilePath() , "puddle/tagset.txt" );
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( NonexistentConfigTest )
|
||||
{
|
||||
bool exceptionThrown = false;
|
||||
string message = "";
|
||||
try {
|
||||
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("foo.cfg"));
|
||||
} catch (ConcordiaException & e) {
|
||||
exceptionThrown = true;
|
||||
message = e.what();
|
||||
}
|
||||
BOOST_CHECK_EQUAL(exceptionThrown, true);
|
||||
BOOST_CHECK_EQUAL(boost::starts_with(message, "I/O error reading config file"), true);
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE( InvalidConfigTest )
|
||||
{
|
||||
bool exceptionThrown = false;
|
||||
string message = "";
|
||||
try {
|
||||
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("invalid.cfg"));
|
||||
} catch (ConcordiaException & e) {
|
||||
exceptionThrown = true;
|
||||
message = e.what();
|
||||
}
|
||||
BOOST_CHECK_EQUAL(exceptionThrown, true);
|
||||
BOOST_CHECK_EQUAL(boost::starts_with(message, "Error parsing config file"), true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
@ -1,7 +0,0 @@
|
||||
<html>
|
||||
<body>
|
||||
<form enctype="multipart/form-data" action="http://localhost:8081" accept-charset="UTF-8" method="POST">
|
||||
<input type="text" name="input_field" />
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user