modified todo, removed concordia-server

This commit is contained in:
rjawor 2015-06-09 13:09:10 +02:00
parent 07d5d4438b
commit 680eb54ae5
11 changed files with 6 additions and 325 deletions

View File

@ -1,17 +1,19 @@
---------------------------- Developer's private notes (language may vary, bo tak czasem wygodniej) ----------------------------- ---------------------------- Developer's private notes (language may vary, bo tak czasem wygodniej) -----------------------------
- document the code - mess with gcc performance optimization options (https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html)
- prepare website (home, downloads, documentation, tutorial, issues, sourceforge page, about author)
- copyright libdivsufsort i psi-toolkit (in documentation)
IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie? IN PROGRESS - concordia search zwraca pozycje tokenów z hash'a. Jak to odnieść do examples w korpusie?
- testy zużycia pamięci - testy zużycia pamięci
- Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła. - Prawdopodobnie długość example w markers będzie potrzebna tylko anubisowi (który, jak się okazuje, jest wolny). Pomyśleć, do czego można wykorzystać markery, bo ich idea wydaje się niezła.
- Multi-threading? (przy concordia search jak najbardziej. Tylko wtedy trzebaby zastosować sortowanie po końcach przedziału przed liczeniem best overlay, co nawiasem mówiąc jest gotowe). - Multi-threading? (przy concordia search jak najbardziej. Tylko wtedy trzebaby zastosować sortowanie po końcach przedziału przed liczeniem best overlay, co nawiasem mówiąc jest gotowe).
- concordia-server (zastanowić się, czy nie napisać CAT-a oraz nad tym, czy nie oddzielić projektu concordia-server).
---------------------------- Archive ----------------------------- ---------------------------- Archive -----------------------------
DONE - concordia-server (zastanowić się, czy nie napisać CAT-a oraz nad tym, czy nie oddzielić projektu concordia-server).
DONE - document the code
DONE - prepare website (home, downloads, documentation, tutorial, issues, sourceforge page, about author)
DONE - copyright libdivsufsort i psi-toolkit (in documentation)
DONE - puścić 100% search test na jrc DONE - puścić 100% search test na jrc
REJECTED - zastanowić się nad empty hash examples (rozwiązanie: w ogóle nie szukać fraz o pustym hashu, rzucać wyjątek). REJECTED - zastanowić się nad empty hash examples (rozwiązanie: w ogóle nie szukać fraz o pustym hashu, rzucać wyjątek).

View File

@ -1,21 +0,0 @@
add_executable(concordia-server-starter concordia_server_starter.cpp)
target_link_libraries(concordia-server-starter concordia-server concordia ${Boost_LIBRARIES} ${FCGIPP_LIB} ${FCGI_LIB} ${LIBCONFIG_LIB})
if (WITH_RE2)
target_link_libraries(concordia-server-starter re2)
if (WITH_PCRE)
target_link_libraries(concordia-server-starter pcrecpp)
endif(WITH_PCRE)
else(WITH_RE2)
if (WITH_PCRE)
target_link_libraries(concordia-server-starter pcrecpp)
endif(WITH_PCRE)
endif(WITH_RE2)
# =====================================
install(TARGETS concordia-server-starter DESTINATION bin/)

View File

@ -1,99 +0,0 @@
#include <iostream>
#include <sstream>
#include <string>
#include <boost/algorithm/string/replace.hpp>
#include <fcgio.h>
#include <stdlib.h>
#include <unistd.h>
#include "concordia-server/concordia_server.hpp"
using namespace std;
static const unsigned long STDIN_MAX = 1000000;
static long gstdin(FCGX_Request * request, char ** content)
{
char * clenstr = FCGX_GetParam("CONTENT_LENGTH", request->envp);
unsigned long clen = STDIN_MAX;
if (clenstr)
{
clen = strtol(clenstr, &clenstr, 10);
if (*clenstr)
{
cerr << "can't parse \"CONTENT_LENGTH="
<< FCGX_GetParam("CONTENT_LENGTH", request->envp)
<< "\"\n";
clen = STDIN_MAX;
}
// *always* put a cap on the amount of data that will be read
if (clen > STDIN_MAX) clen = STDIN_MAX;
*content = new char[clen];
cin.read(*content, clen);
clen = cin.gcount();
}
else
{
// *never* read stdin when CONTENT_LENGTH is missing or unparsable
*content = 0;
clen = 0;
}
// Chew up any remaining stdin - this shouldn't be necessary
// but is because mod_fastcgi doesn't handle it correctly.
// ignore() doesn't set the eof bit in some versions of glibc++
// so use gcount() instead of eof()...
do cin.ignore(1024); while (cin.gcount() == 1024);
return clen;
}
int main(int argc, char** argv) {
// Backup the stdio streambufs
streambuf * cin_streambuf = cin.rdbuf();
streambuf * cout_streambuf = cout.rdbuf();
streambuf * cerr_streambuf = cerr.rdbuf();
ConcordiaServer concordiaServer("concordia.cfg");
FCGX_Request request;
FCGX_Init();
FCGX_InitRequest(&request, 0, 0);
while (FCGX_Accept_r(&request) == 0) {
fcgi_streambuf cin_fcgi_streambuf(request.in);
fcgi_streambuf cout_fcgi_streambuf(request.out);
fcgi_streambuf cerr_fcgi_streambuf(request.err);
cin.rdbuf(&cin_fcgi_streambuf);
cout.rdbuf(&cout_fcgi_streambuf);
cerr.rdbuf(&cerr_fcgi_streambuf);
char * content;
unsigned long clen = gstdin(&request, &content);
string requestString(content);
//TODO passing unicode strings
//boost::replace_all(requestString, "+", " ");
cout << concordiaServer.handleRequest(requestString);
// Note: the fcgi_streambuf destructor will auto flush
}
// restore stdio streambufs
cin.rdbuf(cin_streambuf);
cout.rdbuf(cout_streambuf);
cerr.rdbuf(cerr_streambuf);
return 0;
}

View File

@ -1,24 +0,0 @@
add_library(concordia-server SHARED
concordia_server.cpp
)
add_subdirectory(t)
# =====================================
install(TARGETS concordia-server DESTINATION lib/)
install(FILES concordia_server.hpp DESTINATION include/concordia-server/)
target_link_libraries(concordia-server log4cpp)
target_link_libraries(concordia-server ${Boost_LIBRARIES})
if (WITH_RE2)
target_link_libraries(concordia-server re2)
if (WITH_PCRE)
target_link_libraries(concordia-server pcrecpp)
endif(WITH_PCRE)
else(WITH_RE2)
if (WITH_PCRE)
target_link_libraries(concordia-server pcrecpp)
endif(WITH_PCRE)
endif(WITH_RE2)

View File

@ -1 +0,0 @@
use the echo.cpp source as an example for concordia-server-starter. It works with the up-to-date version of test.html (the one that specifies UTF-8 as character encoding in the <form>)

View File

@ -1,62 +0,0 @@
#include "concordia-server/concordia_server.hpp"
#include <sstream>
#include <concordia/example.hpp>
#include <concordia/substring_occurence.hpp>
#include <boost/ptr_container/ptr_vector.hpp>
#include <boost/foreach.hpp>
ConcordiaServer::ConcordiaServer(const std::string & configFilePath)
throw(ConcordiaException) {
_concordia = boost::shared_ptr<Concordia> (
new Concordia(configFilePath));
}
ConcordiaServer::~ConcordiaServer() {
}
string ConcordiaServer::handleRequest(string & requestString) {
stringstream ss;
try {
ss << "Content-type: text/html\r\n"
<< "\r\n"
<< "<html>\n"
<< " <head>\n"
<< " <title>Hello, World!</title>\n"
<< " </head>\n"
<< " <body>\n"
<< " <h1>Hello, World!</h1>\n"
<< " The concordia version is: "<< _concordia->getVersion() << "\n"
<< " <h1>Input data:</h1>\n"
<< requestString;
ss << " <h1>Adding content as example:</h1>\n";
Example example1(requestString, 0);
Example example2("Ala ma kota", 1);
Example example3("Marysia nie ma kota chyba", 2);
_concordia->addExample(example1);
_concordia->addExample(example2);
_concordia->addExample(example3);
_concordia->refreshSAfromRAM();
ss << " <h1>Searching ma kota:</h1>\n";
boost::ptr_vector<SubstringOccurence> result =
_concordia->simpleSearch("ma kota");
BOOST_FOREACH(SubstringOccurence occurence, result) {
ss << "\t\tfound match in sentence number: "
<< occurence.getId() << "<br/><br/>";
}
ss << " </body>\n"
<< "</html>\n";
} catch (ConcordiaException & e) {
ss << "<h1> Concordia error:" << e.what() << "</h1>";
}
return ss.str();
}

View File

@ -1,30 +0,0 @@
#ifndef CONCORDIA_SERVER_HDR
#define CONCORDIA_SERVER_HDR
#include <string>
#include <concordia/concordia_exception.hpp>
#include <boost/shared_ptr.hpp>
#include <concordia/concordia.hpp>
using namespace std;
class ConcordiaServer {
public:
/*! Constructor.
\param configFilePath path to the Concordia configuration file
\throws ConcordiaException
*/
explicit ConcordiaServer(const std::string & configFilePath)
throw(ConcordiaException);
/*! Destructor.
*/
virtual ~ConcordiaServer();
string handleRequest(string & requestString);
private:
boost::shared_ptr<Concordia> _concordia;
};
#endif

View File

@ -1,6 +0,0 @@
add_library(concordia-server-tests
test_concordia.cpp
test_concordia_config.cpp
)
target_link_libraries(concordia-server-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)

View File

@ -1,19 +0,0 @@
#include "tests/unit-tests/unit_tests_globals.hpp"
#include "concordia/concordia.hpp"
#include "tests/common/test_resources_manager.hpp"
#include <string>
using namespace std;
BOOST_AUTO_TEST_SUITE(concordia_main)
BOOST_AUTO_TEST_CASE( ConcordiaVersion )
{
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
string version = concordia.getVersion();
BOOST_CHECK_EQUAL( version , "0.1");
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -1,52 +0,0 @@
#include "tests/unit-tests/unit_tests_globals.hpp"
#include "concordia/concordia_config.hpp"
#include "tests/common/test_resources_manager.hpp"
#include <string>
#include <list>
#include <boost/algorithm/string/predicate.hpp>
using namespace std;
BOOST_AUTO_TEST_SUITE(concordia_config)
BOOST_AUTO_TEST_CASE( ConfigParameters )
{
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("concordia-test.cfg"));
BOOST_CHECK_EQUAL( config.getPuddleTagsetFilePath() , "puddle/tagset.txt" );
}
BOOST_AUTO_TEST_CASE( NonexistentConfigTest )
{
bool exceptionThrown = false;
string message = "";
try {
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("foo.cfg"));
} catch (ConcordiaException & e) {
exceptionThrown = true;
message = e.what();
}
BOOST_CHECK_EQUAL(exceptionThrown, true);
BOOST_CHECK_EQUAL(boost::starts_with(message, "I/O error reading config file"), true);
}
BOOST_AUTO_TEST_CASE( InvalidConfigTest )
{
bool exceptionThrown = false;
string message = "";
try {
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("invalid.cfg"));
} catch (ConcordiaException & e) {
exceptionThrown = true;
message = e.what();
}
BOOST_CHECK_EQUAL(exceptionThrown, true);
BOOST_CHECK_EQUAL(boost::starts_with(message, "Error parsing config file"), true);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -1,7 +0,0 @@
<html>
<body>
<form enctype="multipart/form-data" action="http://localhost:8081" accept-charset="UTF-8" method="POST">
<input type="text" name="input_field" />
</form>
</body>
</html>