diff --git a/TODO.txt b/TODO.txt index 1a88419..81d7923 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,17 +1,19 @@ ---------------------------- Developer's private notes (language may vary, bo tak czasem wygodniej) ----------------------------- -- document the code -- prepare website (home, downloads, documentation, tutorial, issues, sourceforge page, about author) -- copyright libdivsufsort i psi-toolkit (in documentation) +- mess with gcc performance optimization options (https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html) IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie? - testy zużycia pamięci - Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła. - Multi-threading? (przy concordia search jak najbardziej. Tylko wtedy trzebaby zastosować sortowanie po końcach przedziału przed liczeniem best overlay, co nawiasem mówiąc jest gotowe). -- concordia-server (zastanowić się, czy nie napisać CAT-a oraz nad tym, czy nie oddzielić projektu concordia-server). ---------------------------- Archive ----------------------------- +DONE - concordia-server (zastanowić się, czy nie napisać CAT-a oraz nad tym, czy nie oddzielić projektu concordia-server). +DONE - document the code +DONE - prepare website (home, downloads, documentation, tutorial, issues, sourceforge page, about author) +DONE - copyright libdivsufsort i psi-toolkit (in documentation) + DONE - puścić 100% search test na jrc REJECTED - zastanowić się nad empty hash examples (rozwiązanie: w ogóle nie szukać fraz o pustym hashu, rzucać wyjątek). diff --git a/concordia-server-starter/CMakeLists.txt b/concordia-server-starter/CMakeLists.txt deleted file mode 100644 index 4546796..0000000 --- a/concordia-server-starter/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ - -add_executable(concordia-server-starter concordia_server_starter.cpp) - -target_link_libraries(concordia-server-starter concordia-server concordia ${Boost_LIBRARIES} ${FCGIPP_LIB} ${FCGI_LIB} ${LIBCONFIG_LIB}) - -if (WITH_RE2) - target_link_libraries(concordia-server-starter re2) - if (WITH_PCRE) - target_link_libraries(concordia-server-starter pcrecpp) - endif(WITH_PCRE) -else(WITH_RE2) - if (WITH_PCRE) - target_link_libraries(concordia-server-starter pcrecpp) - endif(WITH_PCRE) -endif(WITH_RE2) - -# ===================================== - -install(TARGETS concordia-server-starter DESTINATION bin/) - - diff --git a/concordia-server-starter/concordia_server_starter.cpp b/concordia-server-starter/concordia_server_starter.cpp deleted file mode 100644 index 3fa83d0..0000000 --- a/concordia-server-starter/concordia_server_starter.cpp +++ /dev/null @@ -1,99 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "concordia-server/concordia_server.hpp" - -using namespace std; - -static const unsigned long STDIN_MAX = 1000000; - -static long gstdin(FCGX_Request * request, char ** content) -{ - char * clenstr = FCGX_GetParam("CONTENT_LENGTH", request->envp); - unsigned long clen = STDIN_MAX; - - if (clenstr) - { - clen = strtol(clenstr, &clenstr, 10); - if (*clenstr) - { - cerr << "can't parse \"CONTENT_LENGTH=" - << FCGX_GetParam("CONTENT_LENGTH", request->envp) - << "\"\n"; - clen = STDIN_MAX; - } - - // *always* put a cap on the amount of data that will be read - if (clen > STDIN_MAX) clen = STDIN_MAX; - - *content = new char[clen]; - - cin.read(*content, clen); - clen = cin.gcount(); - } - else - { - // *never* read stdin when CONTENT_LENGTH is missing or unparsable - *content = 0; - clen = 0; - } - - // Chew up any remaining stdin - this shouldn't be necessary - // but is because mod_fastcgi doesn't handle it correctly. - - // ignore() doesn't set the eof bit in some versions of glibc++ - // so use gcount() instead of eof()... - do cin.ignore(1024); while (cin.gcount() == 1024); - - return clen; -} - -int main(int argc, char** argv) { - - // Backup the stdio streambufs - streambuf * cin_streambuf = cin.rdbuf(); - streambuf * cout_streambuf = cout.rdbuf(); - streambuf * cerr_streambuf = cerr.rdbuf(); - - ConcordiaServer concordiaServer("concordia.cfg"); - - FCGX_Request request; - - FCGX_Init(); - FCGX_InitRequest(&request, 0, 0); - - while (FCGX_Accept_r(&request) == 0) { - fcgi_streambuf cin_fcgi_streambuf(request.in); - fcgi_streambuf cout_fcgi_streambuf(request.out); - fcgi_streambuf cerr_fcgi_streambuf(request.err); - - cin.rdbuf(&cin_fcgi_streambuf); - cout.rdbuf(&cout_fcgi_streambuf); - cerr.rdbuf(&cerr_fcgi_streambuf); - - - char * content; - unsigned long clen = gstdin(&request, &content); - - string requestString(content); - - //TODO passing unicode strings - //boost::replace_all(requestString, "+", " "); - - cout << concordiaServer.handleRequest(requestString); - - // Note: the fcgi_streambuf destructor will auto flush - } - - // restore stdio streambufs - cin.rdbuf(cin_streambuf); - cout.rdbuf(cout_streambuf); - cerr.rdbuf(cerr_streambuf); - - return 0; -} diff --git a/concordia-server/CMakeLists.txt b/concordia-server/CMakeLists.txt deleted file mode 100644 index 95ad56f..0000000 --- a/concordia-server/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -add_library(concordia-server SHARED - concordia_server.cpp - ) - -add_subdirectory(t) -# ===================================== - -install(TARGETS concordia-server DESTINATION lib/) -install(FILES concordia_server.hpp DESTINATION include/concordia-server/) - -target_link_libraries(concordia-server log4cpp) -target_link_libraries(concordia-server ${Boost_LIBRARIES}) - -if (WITH_RE2) - target_link_libraries(concordia-server re2) - if (WITH_PCRE) - target_link_libraries(concordia-server pcrecpp) - endif(WITH_PCRE) -else(WITH_RE2) - if (WITH_PCRE) - target_link_libraries(concordia-server pcrecpp) - endif(WITH_PCRE) -endif(WITH_RE2) - diff --git a/concordia-server/TODO.txt b/concordia-server/TODO.txt deleted file mode 100644 index f28f475..0000000 --- a/concordia-server/TODO.txt +++ /dev/null @@ -1 +0,0 @@ -use the echo.cpp source as an example for concordia-server-starter. It works with the up-to-date version of test.html (the one that specifies UTF-8 as character encoding in the
) diff --git a/concordia-server/concordia_server.cpp b/concordia-server/concordia_server.cpp deleted file mode 100644 index 2628ac3..0000000 --- a/concordia-server/concordia_server.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "concordia-server/concordia_server.hpp" - -#include -#include -#include -#include -#include - - -ConcordiaServer::ConcordiaServer(const std::string & configFilePath) - throw(ConcordiaException) { - _concordia = boost::shared_ptr ( - new Concordia(configFilePath)); - -} - -ConcordiaServer::~ConcordiaServer() { -} - -string ConcordiaServer::handleRequest(string & requestString) { - stringstream ss; - try { - ss << "Content-type: text/html\r\n" - << "\r\n" - << "\n" - << " \n" - << " Hello, World!\n" - << " \n" - << " \n" - << "

Hello, World!

\n" - << " The concordia version is: "<< _concordia->getVersion() << "\n" - << "

Input data:

\n" - << requestString; - - ss << "

Adding content as example:

\n"; - - Example example1(requestString, 0); - Example example2("Ala ma kota", 1); - Example example3("Marysia nie ma kota chyba", 2); - _concordia->addExample(example1); - _concordia->addExample(example2); - _concordia->addExample(example3); - - _concordia->refreshSAfromRAM(); - - ss << "

Searching ma kota:

\n"; - boost::ptr_vector result = - _concordia->simpleSearch("ma kota"); - BOOST_FOREACH(SubstringOccurence occurence, result) { - ss << "\t\tfound match in sentence number: " - << occurence.getId() << "

"; - } - ss << " \n" - << "\n"; - - } catch (ConcordiaException & e) { - ss << "

Concordia error:" << e.what() << "

"; - - } - return ss.str(); - -} diff --git a/concordia-server/concordia_server.hpp b/concordia-server/concordia_server.hpp deleted file mode 100644 index ffe8b06..0000000 --- a/concordia-server/concordia_server.hpp +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef CONCORDIA_SERVER_HDR -#define CONCORDIA_SERVER_HDR - -#include -#include -#include -#include - - -using namespace std; - -class ConcordiaServer { -public: - /*! Constructor. - \param configFilePath path to the Concordia configuration file - \throws ConcordiaException - */ - explicit ConcordiaServer(const std::string & configFilePath) - throw(ConcordiaException); - /*! Destructor. - */ - virtual ~ConcordiaServer(); - - string handleRequest(string & requestString); - -private: - boost::shared_ptr _concordia; -}; - -#endif diff --git a/concordia-server/t/CMakeLists.txt b/concordia-server/t/CMakeLists.txt deleted file mode 100644 index ddc9c13..0000000 --- a/concordia-server/t/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_library(concordia-server-tests - test_concordia.cpp - test_concordia_config.cpp - ) - -target_link_libraries(concordia-server-tests concordia ${LIBCONFIG_LIB} concordia-tests-common) diff --git a/concordia-server/t/test_concordia.cpp b/concordia-server/t/test_concordia.cpp deleted file mode 100644 index 9248238..0000000 --- a/concordia-server/t/test_concordia.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "tests/unit-tests/unit_tests_globals.hpp" -#include "concordia/concordia.hpp" -#include "tests/common/test_resources_manager.hpp" - - -#include - -using namespace std; - -BOOST_AUTO_TEST_SUITE(concordia_main) - -BOOST_AUTO_TEST_CASE( ConcordiaVersion ) -{ - Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); - string version = concordia.getVersion(); - BOOST_CHECK_EQUAL( version , "0.1"); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/concordia-server/t/test_concordia_config.cpp b/concordia-server/t/test_concordia_config.cpp deleted file mode 100644 index 9e6566b..0000000 --- a/concordia-server/t/test_concordia_config.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "tests/unit-tests/unit_tests_globals.hpp" - -#include "concordia/concordia_config.hpp" -#include "tests/common/test_resources_manager.hpp" - -#include -#include -#include - -using namespace std; - -BOOST_AUTO_TEST_SUITE(concordia_config) - -BOOST_AUTO_TEST_CASE( ConfigParameters ) -{ - ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-test.cfg")); - BOOST_CHECK_EQUAL( config.getPuddleTagsetFilePath() , "puddle/tagset.txt" ); -} - -BOOST_AUTO_TEST_CASE( NonexistentConfigTest ) -{ - bool exceptionThrown = false; - string message = ""; - try { - ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("foo.cfg")); - } catch (ConcordiaException & e) { - exceptionThrown = true; - message = e.what(); - } - BOOST_CHECK_EQUAL(exceptionThrown, true); - BOOST_CHECK_EQUAL(boost::starts_with(message, "I/O error reading config file"), true); -} - - -BOOST_AUTO_TEST_CASE( InvalidConfigTest ) -{ - bool exceptionThrown = false; - string message = ""; - try { - ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("invalid.cfg")); - } catch (ConcordiaException & e) { - exceptionThrown = true; - message = e.what(); - } - BOOST_CHECK_EQUAL(exceptionThrown, true); - BOOST_CHECK_EQUAL(boost::starts_with(message, "Error parsing config file"), true); -} - - - - -BOOST_AUTO_TEST_SUITE_END() diff --git a/concordia-server/test.html b/concordia-server/test.html deleted file mode 100644 index c5ba0af..0000000 --- a/concordia-server/test.html +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - -