diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ed8094..2bbf932 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,23 +3,45 @@ cmake_minimum_required(VERSION 2.6) project(concordia C CXX) -set (CONCORDIA_VERSION_MAJOR 0) -set (CONCORDIA_VERSION_MINOR 1) +set (CONCORDIA_VERSION_MAJOR 1) +set (CONCORDIA_VERSION_MINOR 0) # Whether to use stop words set (STOP_WORDS_ENABLED "false") +include(CheckTypeSize) # Type of the characters in SA -set (INDEX_CHARACTER_TYPE "unsigned int") -set (INDEX_CHARACTER_TYPE_MAX_VALUE "ULONG_MAX") -# The above allows for (roughly) 2^32 = 4 294 967 295 words in corpus. +CHECK_TYPE_SIZE("unsigned int" UINT_SIZE) +message(STATUS "UINT_SIZE: ${UINT_SIZE}") +CHECK_TYPE_SIZE("unsigned long" ULONG_SIZE) +message(STATUS "ULONG_SIZE: ${ULONG_SIZE}") +CHECK_TYPE_SIZE("unsigned long long" ULLONG_SIZE) +message(STATUS "ULLONG_SIZE: ${ULLONG_SIZE}") + +if (UINT_SIZE EQUAL 4 AND ULONG_SIZE EQUAL 8) + set (INDEX_CHARACTER_TYPE "unsigned int") + set (INDEX_CHARACTER_TYPE_MAX_VALUE "UINT_MAX") + + # Suffix markers + set (SUFFIX_MARKER_TYPE "unsigned long") + set (SUFFIX_MARKER_TYPE_MAX_VALUE "ULONG_MAX") +elseif(ULONG_SIZE EQUAL 4 AND ULLONG_SIZE EQUAL 8) + set (INDEX_CHARACTER_TYPE "unsigned long") + set (INDEX_CHARACTER_TYPE_MAX_VALUE "ULONG_MAX") + + # Suffix markers + set (SUFFIX_MARKER_TYPE "unsigned long long") + set (SUFFIX_MARKER_TYPE_MAX_VALUE "ULLONG_MAX") +else() + message(FATAL_ERROR "Can not find proper C++ types. Try installing in 64-bit architecture") + +endif() -# Suffix markers -set (SUFFIX_MARKER_TYPE "unsigned long") -set (SUFFIX_MARKER_TYPE_MAX_VALUE "ULLONG_MAX") set (SUFFIX_MARKER_SENTENCE_BYTES 2) -# The above settings assign 4 bytes to sentence id and 2 bytes each for suffix offset and sentence length. + +# The above allows for (roughly) 2^32 = 4 294 967 295 words in corpus. +# It assigns 4 bytes to sentence id and 2 bytes each for suffix offset and sentence length. # This allows to store 2^32=4 294 967 296 sentences no longer than 65536 words. # After changing these values be sure to adjust tests (as well as the above calculations). # Also, you might want to run TooLongHashTest from test_hash_generator.cpp @@ -93,17 +115,6 @@ if(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE}) link_directories(${LIBCONFIG_LIB}) endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE}) -# ---------------------------------------------------- -# Snowball stemmer -# ---------------------------------------------------- -find_library(LIBSTEMMER_LIB NAMES stemmer REQUIRED) -find_path(LIBSTEMMER_INCLUDE libstemmer.h) - -if(EXISTS ${LIBSTEMMER_LIB} AND EXISTS ${LIBSTEMMER_INCLUDE}) - message(STATUS "Found libstemmer") - include_directories(${LIBSTEMMER_INCLUDE}) - link_directories(${LIBSTEMMER_LIB}) -endif(EXISTS ${LIBSTEMMER_LIB} AND EXISTS ${LIBSTEMMER_INCLUDE}) # ---------------------------------------------------- # Logging @@ -149,6 +160,9 @@ set(ALL_DIRECTORIES concordia concordia-console libdivsufsort utf8 utf8case) include_directories("${concordia_SOURCE_DIR}") +# In order to include dynamically generated libdivsufsort headers +include_directories("${concordia_SOURCE_DIR}/build/libdivsufsort/include") + foreach(dir ${ALL_DIRECTORIES}) link_directories("${concordia_BINARY_DIR}/${dir}") add_subdirectory(${dir}) @@ -191,4 +205,3 @@ if(DOXYGEN_FOUND) endif(DOXYGEN_FOUND) - diff --git a/INSTALL.txt b/INSTALL.txt index 5e90687..6a8b953 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -1,60 +1,61 @@ Concordia Installation & Build Manual ================================= -This file describes how to compile, build +This page describes how to compile, build and install Concordia library. -Requirements -============ +========= Requirements =============== +Before you compile, make sure you have these installed: +- g++ compiler +- cmake +- Boost library +- Log4cpp +- (optional) Doxygen +- (optional) TeX -* cmake -* Boost library -* Log4cpp -* libstemmer (Snowball stemming library) -* (optional) Doxygen +========= Ubuntu package list ======== -Boost Ubuntu installation -========================= +On Ubuntu 14.04, the above software comes in standard packages. Here is the complete list of these packages: +- g++ +- cmake +- libboost-dev +- libboost-serialization-dev +- libboost-test-dev +- libboost-filesystem-dev +- libboost-system-dev +- libboost-program-options-dev +- libboost-iostreams-dev +- libboost-regex-dev +- libboost-locale-dev +- liblog4cpp5-dev +- libconfig++-dev +- libconfig-dev +- libpcre3-dev +- doxygen +- texlive-font-utils -sudo apt-get install libboost-dev libboost-serialization-dev libboost-test-dev libboost-filesystem-dev libboost-system-de libboost-program-options-dev libboost-iostreams-dev +========= Ubuntu requirements install command ======= -Log4cpp Ubuntu installation -=========================== +If you want to install all the above packages at once, simply use the below command (this will also install the optional packages): -sudo apt-get install liblog4cpp5-dev +sudo apt-get install g++ cmake libboost-dev libboost-serialization-dev libboost-test-dev libboost-filesystem-dev libboost-system-dev libboost-program-options-dev libboost-iostreams-dev libboost-regex-dev libboost-locale-dev liblog4cpp5-dev libconfig++-dev libconfig-dev libpcre3-dev doxygen texlive-font-utils -libconfig Ubuntu installation -============================= +========= Build & installation procedure =========== -sudo apt-get install libconfig++-dev -sudo apt-get install libconfig-dev - -libstemmer Ubuntu installation -============================== -sudo apt-get install libstemmer-dev - -Perl-compatible regular expressions (PCRE) Ubuntu installation -======================================================= - -sudo apt-get install libpcre3-dev - -Doxygen Ubuntu installation -======================================================= - -sudo apt-get install doxygen - -Installation procedure -====================== +To build and install Concordia, navigate to its home directory and issue the following commands: mkdir build cd build -cmake -DCMAKE_BUILD_TYPE=RELEASE .. +../cmake.sh make make test -make install +sudo make install -Documentation -============= +After that it is strongly recommended to run ldconfig to update linker info with newly installed shared libraries: + +sudo ldconfig -v + +========= Documentation =========================== If Doxygen is available, a successful compilation generates documentation data in three formats in the build/doc directory. @@ -67,3 +68,24 @@ cd doc/latex make This should generate a single file called refman.pdf in the same directory. + +========= Sample program ============================ + +In order to verify whether Concordia has been installed successfully, run the following minimal example. Prepare the file test.cpp with the following contents (remember to substitute with the path of the unpacked Concordia package). + +#include +#include + +using namespace std; + +int main() { + + Concordia concordia("/tests/resources/concordia-config/concordia.cfg"); + cout << concordia.getVersion() << endl; + +} + +Compilation method: + +g++ test.cpp -lconcordia -lconfig++ -lboost_system -lboost_serialization -lboost_unit_test_framework -lboost_filesystem -lboost_program_options -lboost_iostreams -lboost_regex -lboost_locale -lutf8case + diff --git a/concordia-console/CMakeLists.txt b/concordia-console/CMakeLists.txt index 452ab39..9daf064 100644 --- a/concordia-console/CMakeLists.txt +++ b/concordia-console/CMakeLists.txt @@ -1,7 +1,7 @@ add_executable(concordia-console concordia-console.cpp) -target_link_libraries(concordia-console concordia utf8case ${Boost_LIBRARIES} ${LIBCONFIG_LIB} ${LIBSTEMMER_LIB}) +target_link_libraries(concordia-console concordia utf8case ${Boost_LIBRARIES} ${LIBCONFIG_LIB}) if (WITH_RE2) target_link_libraries(concordia-console re2) diff --git a/concordia-server/CMakeLists.txt b/concordia-server/CMakeLists.txt index 2cdd220..95ad56f 100644 --- a/concordia-server/CMakeLists.txt +++ b/concordia-server/CMakeLists.txt @@ -9,7 +9,6 @@ install(TARGETS concordia-server DESTINATION lib/) install(FILES concordia_server.hpp DESTINATION include/concordia-server/) target_link_libraries(concordia-server log4cpp) -target_link_libraries(concordia-server ${LIBSTEMMER_LIB}) target_link_libraries(concordia-server ${Boost_LIBRARIES}) if (WITH_RE2) diff --git a/concordia/CMakeLists.txt b/concordia/CMakeLists.txt index aac35db..816b27d 100644 --- a/concordia/CMakeLists.txt +++ b/concordia/CMakeLists.txt @@ -72,7 +72,6 @@ if(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE}) endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE}) target_link_libraries(concordia log4cpp) -target_link_libraries(concordia ${LIBSTEMMER_LIB}) target_link_libraries(concordia ${Boost_LIBRARIES}) target_link_libraries(concordia divsufsort) diff --git a/concordia/compilation.dox b/concordia/compilation.dox index e4ed8c3..76e82ba 100644 --- a/concordia/compilation.dox +++ b/concordia/compilation.dox @@ -1,49 +1,68 @@ /** \page compilation Concordia Installation & Build Manual -This file describes how to compile, build +This page describes how to compile, build and install Concordia library. \section compilation1 Requirements - +Before you compile, make sure you have these installed: +- g++ compiler - cmake - Boost library - Log4cpp -- libstemmer (Snowball stemming library) - (optional) Doxygen +- (optional) TeX -\subsection compilation1_1 Boost Ubuntu installation +\subsection compilation1_1 Ubuntu package list -sudo apt-get install libboost-dev libboost-serialization-dev libboost-test-dev libboost-filesystem-dev libboost-system-de libboost-program-options-dev libboost-iostreams-dev +On Ubuntu 14.04, the above software comes in standard packages. Here is the complete list of these packages: +- g++ +- cmake +- libboost-dev +- libboost-serialization-dev +- libboost-test-dev +- libboost-filesystem-dev +- libboost-system-dev +- libboost-program-options-dev +- libboost-iostreams-dev +- libboost-regex-dev +- libboost-locale-dev +- liblog4cpp5-dev +- libconfig++-dev +- libconfig-dev +- libpcre3-dev +- doxygen +- texlive-font-utils -\subsection compilation1_2 Log4cpp Ubuntu installation +\subsection compilation1_2 Ubuntu requirements install command -sudo apt-get install liblog4cpp5-dev +If you want to install all the above packages at once, simply use the below command (this will also install the optional packages): -\subsection compilation1_3 Libconfig Ubuntu installation +\verbatim -sudo apt-get install libconfig++-dev -sudo apt-get install libconfig-dev +sudo apt-get install g++ cmake libboost-dev libboost-serialization-dev libboost-test-dev libboost-filesystem-dev libboost-system-dev libboost-program-options-dev libboost-iostreams-dev libboost-regex-dev libboost-locale-dev liblog4cpp5-dev libconfig++-dev libconfig-dev libpcre3-dev doxygen texlive-font-utils -\subsection compilation1_4 Libstemmer Ubuntu installation - -sudo apt-get install libstemmer-dev - -\subsection compilation1_5 Perl-compatible regular expressions (PCRE) Ubuntu installation - -sudo apt-get install libpcre3-dev - -\subsection compilation1_6 Doxygen Ubuntu installation - -sudo apt-get install doxygen +\endverbatim \section compilation2 Build & installation procedure -mkdir build
-cd build
-../cmake.sh
-make
-make test
-make install +To build and install Concordia, navigate to its home directory and issue the following commands: + +\verbatim + +mkdir build +cd build +../cmake.sh +make +make test +sudo make install + +\endverbatim + +After that it is strongly recommended to run ldconfig to update linker info with newly installed shared libraries: + +\verbatim +sudo ldconfig -v +\endverbatim \section compilation3 Documentation @@ -54,24 +73,29 @@ The man files in doc/man will be installed during installation. Open doc/html/in a HTML version of the same documentation. The latex directory contains uncompiled latex files. To generate a single pdf file run +\verbatim + cd doc/latex make +\endverbatim + This should generate a single file called refman.pdf in the same directory. \section compilation4 Sample program -Sample program using the library: +In order to verify whether Concordia has been installed successfully, run the following minimal example. Prepare the file test.cpp with the following contents (remember to substitute with the path of the unpacked Concordia package). \verbatim #include +#include using namespace std; int main() { - Concordia concordia("concordia.cfg"); + Concordia concordia("/tests/resources/concordia-config/concordia.cfg"); cout << concordia.getVersion() << endl; } @@ -82,7 +106,7 @@ Compilation method: \verbatim -g++ test.cpp -lconcordia -lconfig++ -lboost_system -lboost_serialization -lboost_unit_test_framework -lboost_filesystem -lboost_program_options -lboost_iostreams +g++ test.cpp -lconcordia -lconfig++ -lboost_system -lboost_serialization -lboost_unit_test_framework -lboost_filesystem -lboost_program_options -lboost_iostreams -lboost_regex -lboost_locale -lutf8case \endverbatim */ diff --git a/concordia/t/test_concordia.cpp b/concordia/t/test_concordia.cpp index 70ff375..7c2dc47 100644 --- a/concordia/t/test_concordia.cpp +++ b/concordia/t/test_concordia.cpp @@ -15,7 +15,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaVersion ) { Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); std::string version = concordia.getVersion(); - BOOST_CHECK_EQUAL( version , "0.1"); + BOOST_CHECK_EQUAL( version , "1.0"); } diff --git a/concordia/t/test_example.cpp b/concordia/t/test_example.cpp index b72f766..fd4b7af 100644 --- a/concordia/t/test_example.cpp +++ b/concordia/t/test_example.cpp @@ -3,13 +3,14 @@ #include #include +#include "concordia/common/config.hpp" #include "concordia/example.hpp" BOOST_AUTO_TEST_SUITE(exampleTest) BOOST_AUTO_TEST_CASE( ExceedingId ) { - unsigned long maxId = (ULLONG_MAX >> 8) - 1; + SUFFIX_MARKER_TYPE maxId = (SUFFIX_MARKER_TYPE_MAX_VALUE >> 8) - 1; Example example1("Test", maxId); bool exceptionThrown = false; diff --git a/concordia/t/test_utils.cpp b/concordia/t/test_utils.cpp index 36c88e7..75ed024 100644 --- a/concordia/t/test_utils.cpp +++ b/concordia/t/test_utils.cpp @@ -4,7 +4,7 @@ #include "tests/common/test_resources_manager.hpp" #include -#include "divsufsort.h" +#include BOOST_AUTO_TEST_SUITE(utils) diff --git a/libdivsufsort/CMakeLists.txt b/libdivsufsort/CMakeLists.txt index 7ceace3..04bffc2 100644 --- a/libdivsufsort/CMakeLists.txt +++ b/libdivsufsort/CMakeLists.txt @@ -92,10 +92,3 @@ if(BUILD_EXAMPLES) add_subdirectory(examples) endif(BUILD_EXAMPLES) -## Add 'uninstall' target ## -CONFIGURE_FILE( - "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/cmake_uninstall.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake" - IMMEDIATE @ONLY) -ADD_CUSTOM_TARGET(uninstall - "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake") diff --git a/libdivsufsort/include/CMakeLists.txt b/libdivsufsort/include/CMakeLists.txt index 2532e56..26a1c23 100644 --- a/libdivsufsort/include/CMakeLists.txt +++ b/libdivsufsort/include/CMakeLists.txt @@ -160,3 +160,4 @@ if(BUILD_DIVSUFSORT64) "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" @ONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" DESTINATION include) endif(BUILD_DIVSUFSORT64) +