clear index, examples
This commit is contained in:
parent
abbd5b1ae8
commit
07d5d4438b
4
.gitignore
vendored
4
.gitignore
vendored
@ -7,5 +7,5 @@ tests/resources/concordia-config/concordia.cfg
|
|||||||
tests/resources/temp
|
tests/resources/temp
|
||||||
prod/resources/temp
|
prod/resources/temp
|
||||||
prod/resources/text-files/jrc_smaller.txt
|
prod/resources/text-files/jrc_smaller.txt
|
||||||
|
examples/build
|
||||||
|
examples/config.hpp
|
||||||
|
@ -71,9 +71,11 @@ if(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
|||||||
link_directories(${LIBCONFIG_LIB})
|
link_directories(${LIBCONFIG_LIB})
|
||||||
endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
endif(EXISTS ${LIBCONFIG_LIB} AND EXISTS ${LIBCONFIG_INCLUDE})
|
||||||
|
|
||||||
|
target_link_libraries(concordia config++)
|
||||||
target_link_libraries(concordia log4cpp)
|
target_link_libraries(concordia log4cpp)
|
||||||
target_link_libraries(concordia ${Boost_LIBRARIES})
|
target_link_libraries(concordia ${Boost_LIBRARIES})
|
||||||
target_link_libraries(concordia divsufsort)
|
target_link_libraries(concordia divsufsort)
|
||||||
|
target_link_libraries(concordia utf8case)
|
||||||
|
|
||||||
if (WITH_RE2)
|
if (WITH_RE2)
|
||||||
target_link_libraries(concordia re2)
|
target_link_libraries(concordia re2)
|
||||||
|
@ -84,29 +84,6 @@ This should generate a single file called refman.pdf in the same directory.
|
|||||||
|
|
||||||
\section compilation4 Sample program
|
\section compilation4 Sample program
|
||||||
|
|
||||||
In order to verify whether Concordia has been installed successfully, run the following minimal example. Prepare the file test.cpp with the following contents (remember to substitute <CONCORDIA_HOME> with the path of the unpacked Concordia package).
|
In order to verify whether Concordia has been installed successfully, proceed to \ref tutorial1 and run sample programs.
|
||||||
|
|
||||||
\verbatim
|
|
||||||
|
|
||||||
#include <concordia/concordia.hpp>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
|
|
||||||
Concordia concordia("<CONCORDIA_HOME>/tests/resources/concordia-config/concordia.cfg");
|
|
||||||
cout << concordia.getVersion() << endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
\endverbatim
|
|
||||||
|
|
||||||
Compilation method:
|
|
||||||
|
|
||||||
\verbatim
|
|
||||||
|
|
||||||
g++ test.cpp -lconcordia -lconfig++ -lboost_system -lboost_serialization -lboost_unit_test_framework -lboost_filesystem -lboost_program_options -lboost_iostreams -lboost_regex -lboost_locale -lutf8case
|
|
||||||
|
|
||||||
\endverbatim
|
\endverbatim
|
||||||
*/
|
*/
|
||||||
|
@ -169,4 +169,17 @@ boost::shared_ptr<ConcordiaSearchResult> Concordia::concordiaSearch(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Concordia::clearIndex() throw(ConcordiaException) {
|
||||||
|
_hashGenerator->clearWordMap();
|
||||||
|
_T = boost::shared_ptr<std::vector<sauchar_t> >(
|
||||||
|
new std::vector<sauchar_t>);
|
||||||
|
_markers = boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> >(
|
||||||
|
new std::vector<SUFFIX_MARKER_TYPE>);
|
||||||
|
_SA = boost::shared_ptr<std::vector<saidx_t> >(
|
||||||
|
new std::vector<saidx_t>);
|
||||||
|
|
||||||
|
boost::filesystem::remove(_config->getHashedIndexFilePath());
|
||||||
|
boost::filesystem::remove(_config->getMarkersFilePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,6 +108,11 @@ public:
|
|||||||
*/
|
*/
|
||||||
void refreshSAfromRAM() throw(ConcordiaException);
|
void refreshSAfromRAM() throw(ConcordiaException);
|
||||||
|
|
||||||
|
/*! Clears all the examples from the index
|
||||||
|
\throws ConcordiaException
|
||||||
|
*/
|
||||||
|
void clearIndex() throw(ConcordiaException);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void _initializeIndex() throw(ConcordiaException);
|
void _initializeIndex() throw(ConcordiaException);
|
||||||
|
|
||||||
|
@ -59,4 +59,8 @@ void HashGenerator::serializeWordMap() {
|
|||||||
oa << *_wordMap;
|
oa << *_wordMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HashGenerator::clearWordMap() {
|
||||||
|
_wordMap = boost::shared_ptr<WordMap>(new WordMap);
|
||||||
|
boost::filesystem::remove(_wordMapFilePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -63,6 +63,11 @@ public:
|
|||||||
*/
|
*/
|
||||||
void serializeWordMap();
|
void serializeWordMap();
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Clears word map.
|
||||||
|
*/
|
||||||
|
void clearWordMap();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
boost::shared_ptr<WordMap> _wordMap;
|
boost::shared_ptr<WordMap> _wordMap;
|
||||||
|
|
||||||
|
@ -52,9 +52,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
|
|||||||
std::vector<SubstringOccurence> searchResult1 = concordia.simpleSearch("posiada rysia");
|
std::vector<SubstringOccurence> searchResult1 = concordia.simpleSearch("posiada rysia");
|
||||||
std::vector<SubstringOccurence> searchResult2 = concordia.simpleSearch("posiada kota Ala");
|
std::vector<SubstringOccurence> searchResult2 = concordia.simpleSearch("posiada kota Ala");
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
concordia.clearIndex();
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1.size(), 2);
|
BOOST_CHECK_EQUAL(searchResult1.size(), 2);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.at(0).getId(), 123);
|
BOOST_CHECK_EQUAL(searchResult1.at(0).getId(), 123);
|
||||||
@ -107,9 +105,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
|||||||
std::vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("xto xjest");
|
std::vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("xto xjest");
|
||||||
std::vector<SubstringOccurence> searchResult2 = concordia2.simpleSearch("xjest okno");
|
std::vector<SubstringOccurence> searchResult2 = concordia2.simpleSearch("xjest okno");
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
concordia2.clearIndex();
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1.size(), 3);
|
BOOST_CHECK_EQUAL(searchResult1.size(), 3);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.at(0).getId(), 312);
|
BOOST_CHECK_EQUAL(searchResult1.at(0).getId(), 312);
|
||||||
@ -137,9 +133,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
|||||||
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||||
std::vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia");
|
std::vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia");
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
concordia2.clearIndex();
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
|
||||||
|
|
||||||
BOOST_CHECK_EQUAL(searchResult1.size(), 1);
|
BOOST_CHECK_EQUAL(searchResult1.size(), 1);
|
||||||
BOOST_CHECK_EQUAL(searchResult1.at(0).getId(), 312);
|
BOOST_CHECK_EQUAL(searchResult1.at(0).getId(), 312);
|
||||||
@ -188,9 +182,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
|
|||||||
BOOST_CHECK_EQUAL(searchResult4.at(1).getExampleId(), 51);
|
BOOST_CHECK_EQUAL(searchResult4.at(1).getExampleId(), 51);
|
||||||
BOOST_CHECK_CLOSE(searchResult4.at(1).getScore(), 0.4707, 0.1);
|
BOOST_CHECK_CLOSE(searchResult4.at(1).getScore(), 0.4707, 0.1);
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
concordia.clearIndex();
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
|
BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
|
||||||
@ -262,9 +254,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch1 )
|
|||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(6).getPatternOffset(), 2);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(6).getPatternOffset(), 2);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(6).getMatchedLength(), 1);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(6).getMatchedLength(), 1);
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
concordia.clearIndex();
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
|
BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
|
||||||
@ -308,8 +298,6 @@ BOOST_AUTO_TEST_CASE( ConcordiaSearch2 )
|
|||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getStart(), 4);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getStart(), 4);
|
||||||
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getEnd(), 9);
|
BOOST_CHECK_EQUAL(searchResult1->getFragments().at(0).getEnd(), 9);
|
||||||
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
concordia.clearIndex();
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
|
||||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
|
||||||
}
|
}
|
||||||
BOOST_AUTO_TEST_SUITE_END()
|
BOOST_AUTO_TEST_SUITE_END()
|
||||||
|
@ -90,7 +90,7 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
|
|||||||
expected2.push_back(3);
|
expected2.push_back(3);
|
||||||
BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end());
|
BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end());
|
||||||
|
|
||||||
boost::filesystem::remove(config->getWordMapFilePath());
|
hashGenerator1.clearWordMap();
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOST_AUTO_TEST_CASE( TokenVectorTest )
|
BOOST_AUTO_TEST_CASE( TokenVectorTest )
|
||||||
|
@ -2,33 +2,35 @@
|
|||||||
|
|
||||||
\section tutorial1 Code examples
|
\section tutorial1 Code examples
|
||||||
|
|
||||||
This section gives a few examples of programs in C++ which make use of the Concordia library. You can run them after successful installation of Concordia (the installation process is covered in \ref compilation). Each of these sample programs is compiled with the command:
|
This section describes a few examples of programs in C++ which make use of the Concordia library. You can run them after successful installation of Concordia (the installation process is covered in \ref compilation). Their source codes are located in the project's main directory, in the subfolder "examples".
|
||||||
|
|
||||||
|
The directory also contains a simple CMakeLists.txt file, which helps to perform compilation and linking of the examples. In order to compile the examples, issue the following commands from within the examples directory:
|
||||||
\verbatim
|
\verbatim
|
||||||
g++ test.cpp -lconcordia -lconfig++ -lboost_system -lboost_serialization -lboost_unit_test_framework -lboost_filesystem -lboost_program_options -lboost_iostreams -lboost_regex -lboost_locale -lutf8case
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make
|
||||||
\endverbatim
|
\endverbatim
|
||||||
|
|
||||||
Do not forget to substitute "<CONCORDIA_HOME>" with the path to unpacked Concordia sources. Also, make sure that the folder: <CONCORDIA_HOME>/tests/resources/temp is empty before running each example (this is explained in \ref tutorial2):
|
After these operations, three executables are created in the build directory: first, simple_search and concordia_search. A small config.hpp file is also generated to store the path to the examples folder.
|
||||||
|
|
||||||
\verbatim
|
|
||||||
rm <CONCORDIA_HOME>/tests/resources/temp/*
|
|
||||||
\endverbatim
|
|
||||||
|
|
||||||
\subsection tutorial1_1 Minimal example
|
\subsection tutorial1_1 Minimal example
|
||||||
|
|
||||||
Only crate the Concordia object and print version of the library.
|
This program only creates the Concordia object and print version of the library.
|
||||||
|
|
||||||
|
File first.cpp:
|
||||||
\verbatim
|
\verbatim
|
||||||
#include <concordia/concordia.hpp>
|
#include <concordia/concordia.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "config.hpp"
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||||
Concordia concordia("<CONCORDIA_HOME>/tests/resources/concordia-config/concordia.cfg");
|
|
||||||
cout << concordia.getVersion() << endl;
|
cout << concordia.getVersion() << endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
\endverbatim
|
\endverbatim
|
||||||
|
|
||||||
@ -36,19 +38,21 @@ int main() {
|
|||||||
|
|
||||||
This code snippet shows the basic Concordia functionality - simple substring lookup in the index.
|
This code snippet shows the basic Concordia functionality - simple substring lookup in the index.
|
||||||
|
|
||||||
|
File simple_search.cpp:
|
||||||
\verbatim
|
\verbatim
|
||||||
#include <concordia/concordia.hpp>
|
#include <concordia/concordia.hpp>
|
||||||
#include <concordia/substring_occurence.hpp>
|
#include <concordia/substring_occurence.hpp>
|
||||||
#include <concordia/example.hpp>
|
#include <concordia/example.hpp>
|
||||||
|
|
||||||
|
#include "config.hpp"
|
||||||
|
|
||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||||
Concordia concordia("<CONCORDIA_HOME>/tests/resources/concordia-config/concordia.cfg");
|
|
||||||
|
|
||||||
// adding sentences to index
|
// adding sentences to index
|
||||||
concordia.addExample(Example("Alice has a cat", 56));
|
concordia.addExample(Example("Alice has a cat", 56));
|
||||||
@ -67,7 +71,10 @@ int main() {
|
|||||||
for(vector<SubstringOccurence>::iterator it = result.begin();
|
for(vector<SubstringOccurence>::iterator it = result.begin();
|
||||||
it != result.end(); ++it) {
|
it != result.end(); ++it) {
|
||||||
cout << "Found substring in sentence: " << it->getId() << " at offset: " << it->getOffset() << endl;
|
cout << "Found substring in sentence: " << it->getId() << " at offset: " << it->getOffset() << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// clearing index
|
||||||
|
concordia.clearIndex();
|
||||||
}
|
}
|
||||||
\endverbatim
|
\endverbatim
|
||||||
|
|
||||||
@ -91,22 +98,22 @@ Concordia is equipped with a unique functionality of so called Concordia search,
|
|||||||
|
|
||||||
Additionally, the score for this best overlay is computed. The score is a real number between 0 and 1, where 0 indicates, that the pattern is not covered at all (i.e. not a single word from this pattern is found in the index). The score 1 represents the perfect match - pattern is covered completely by just one fragment, which means that the pattern is found in the index as one of the examples.
|
Additionally, the score for this best overlay is computed. The score is a real number between 0 and 1, where 0 indicates, that the pattern is not covered at all (i.e. not a single word from this pattern is found in the index). The score 1 represents the perfect match - pattern is covered completely by just one fragment, which means that the pattern is found in the index as one of the examples.
|
||||||
|
|
||||||
Sample concordia searching:
|
File concordia_searching.cpp:
|
||||||
|
|
||||||
\verbatim
|
\verbatim
|
||||||
#include <concordia/concordia.hpp>
|
#include <concordia/concordia.hpp>
|
||||||
#include <concordia/concordia_search_result.hpp>
|
#include <concordia/concordia_search_result.hpp>
|
||||||
#include <concordia/matched_pattern_fragment.hpp>
|
#include <concordia/matched_pattern_fragment.hpp>
|
||||||
#include <concordia/example.hpp>
|
#include <concordia/example.hpp>
|
||||||
|
|
||||||
|
#include "config.hpp"
|
||||||
|
|
||||||
#include <boost/shared_ptr.hpp>
|
#include <boost/shared_ptr.hpp>
|
||||||
#include <boost/foreach.hpp>
|
#include <boost/foreach.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||||
Concordia concordia("<CONCORDIA_HOME>/tests/resources/concordia-config/concordia.cfg");
|
|
||||||
|
|
||||||
concordia.addExample(Example("Alice has a cat", 56));
|
concordia.addExample(Example("Alice has a cat", 56));
|
||||||
concordia.addExample(Example("Alice has a dog", 23));
|
concordia.addExample(Example("Alice has a dog", 23));
|
||||||
@ -138,7 +145,8 @@ int main() {
|
|||||||
|
|
||||||
cout << "Best overlay score: " << result->getBestOverlayScore() << endl;
|
cout << "Best overlay score: " << result->getBestOverlayScore() << endl;
|
||||||
|
|
||||||
|
// clearing index
|
||||||
|
concordia.clearIndex();
|
||||||
}
|
}
|
||||||
\endverbatim
|
\endverbatim
|
||||||
|
|
||||||
|
44
examples/CMakeLists.txt
Normal file
44
examples/CMakeLists.txt
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# Tutorial: http://www.cmake.org/cmake/help/cmake_tutorial.html
|
||||||
|
|
||||||
|
cmake_minimum_required(VERSION 2.6)
|
||||||
|
project(examples CXX)
|
||||||
|
|
||||||
|
# Put the path to the examples folder in config.hpp
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.hpp.in ${CMAKE_CURRENT_SOURCE_DIR}/config.hpp @ONLY)
|
||||||
|
|
||||||
|
# Find boost libraries
|
||||||
|
find_package(Boost COMPONENTS
|
||||||
|
serialization unit_test_framework system filesystem program_options iostreams regex locale REQUIRED)
|
||||||
|
|
||||||
|
|
||||||
|
# 1. example - minimal program
|
||||||
|
|
||||||
|
add_executable(first first.cpp)
|
||||||
|
target_link_libraries(first concordia)
|
||||||
|
target_link_libraries(first config++)
|
||||||
|
target_link_libraries(first log4cpp)
|
||||||
|
target_link_libraries(first ${Boost_LIBRARIES})
|
||||||
|
target_link_libraries(first divsufsort)
|
||||||
|
target_link_libraries(first utf8case)
|
||||||
|
|
||||||
|
# 2. example - simple substring lookup
|
||||||
|
|
||||||
|
add_executable(simple_search simple_search.cpp)
|
||||||
|
target_link_libraries(simple_search concordia)
|
||||||
|
target_link_libraries(simple_search config++)
|
||||||
|
target_link_libraries(simple_search log4cpp)
|
||||||
|
target_link_libraries(simple_search ${Boost_LIBRARIES})
|
||||||
|
target_link_libraries(simple_search divsufsort)
|
||||||
|
target_link_libraries(simple_search utf8case)
|
||||||
|
|
||||||
|
|
||||||
|
# 3. example - concordia search
|
||||||
|
|
||||||
|
add_executable(concordia_search concordia_search.cpp)
|
||||||
|
target_link_libraries(concordia_search concordia)
|
||||||
|
target_link_libraries(concordia_search config++)
|
||||||
|
target_link_libraries(concordia_search log4cpp)
|
||||||
|
target_link_libraries(concordia_search ${Boost_LIBRARIES})
|
||||||
|
target_link_libraries(concordia_search divsufsort)
|
||||||
|
target_link_libraries(concordia_search utf8case)
|
||||||
|
|
48
examples/concordia_search.cpp
Normal file
48
examples/concordia_search.cpp
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#include <concordia/concordia.hpp>
|
||||||
|
#include <concordia/concordia_search_result.hpp>
|
||||||
|
#include <concordia/matched_pattern_fragment.hpp>
|
||||||
|
#include <concordia/example.hpp>
|
||||||
|
|
||||||
|
#include "config.hpp"
|
||||||
|
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||||
|
|
||||||
|
concordia.addExample(Example("Alice has a cat", 56));
|
||||||
|
concordia.addExample(Example("Alice has a dog", 23));
|
||||||
|
concordia.addExample(Example("New test product has a mistake", 321));
|
||||||
|
concordia.addExample(Example("This is just testing and it has nothing to do with the above", 14));
|
||||||
|
|
||||||
|
concordia.refreshSAfromRAM();
|
||||||
|
|
||||||
|
cout << "Searching for pattern: Our new test product has nothing to do with computers" << endl;
|
||||||
|
boost::shared_ptr<ConcordiaSearchResult> result =
|
||||||
|
concordia.concordiaSearch("Our new test product has nothing to do with computers");
|
||||||
|
|
||||||
|
cout << "Printing all matched fragments:" << endl;
|
||||||
|
BOOST_FOREACH(MatchedPatternFragment fragment, result->getFragments()) {
|
||||||
|
cout << "Matched pattern fragment found. Pattern fragment: ["
|
||||||
|
<< fragment.getStart() << "," << fragment.getEnd() << "]"
|
||||||
|
<< " in sentence " << fragment.getExampleId()
|
||||||
|
<< ", at offset: " << fragment.getExampleOffset() << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
cout << "Best overlay:" << endl;
|
||||||
|
BOOST_FOREACH(MatchedPatternFragment fragment, result->getBestOverlay()) {
|
||||||
|
cout << "\tPattern fragment: [" << fragment.getStart()
|
||||||
|
<< "," << fragment.getEnd() << "]"
|
||||||
|
<< " in sentence " << fragment.getExampleId()
|
||||||
|
<< ", at offset: " << fragment.getExampleOffset() << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "Best overlay score: " << result->getBestOverlayScore() << endl;
|
||||||
|
|
||||||
|
// clearing index
|
||||||
|
concordia.clearIndex();
|
||||||
|
}
|
1
examples/config.hpp.in
Normal file
1
examples/config.hpp.in
Normal file
@ -0,0 +1 @@
|
|||||||
|
#define EXAMPLES_DIR "@CMAKE_CURRENT_SOURCE_DIR@"
|
12
examples/first.cpp
Normal file
12
examples/first.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include <concordia/concordia.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "config.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||||
|
cout << concordia.getVersion() << endl;
|
||||||
|
}
|
36
examples/simple_search.cpp
Normal file
36
examples/simple_search.cpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#include <concordia/concordia.hpp>
|
||||||
|
#include <concordia/substring_occurence.hpp>
|
||||||
|
#include <concordia/example.hpp>
|
||||||
|
|
||||||
|
#include "config.hpp"
|
||||||
|
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
Concordia concordia(EXAMPLES_DIR"/../tests/resources/concordia-config/concordia.cfg");
|
||||||
|
|
||||||
|
// adding sentences to index
|
||||||
|
concordia.addExample(Example("Alice has a cat", 56));
|
||||||
|
concordia.addExample(Example("Alice has a dog", 23));
|
||||||
|
concordia.addExample(Example("New test product has a mistake", 321));
|
||||||
|
concordia.addExample(Example("This is just testing and it has nothing to do with the above", 14));
|
||||||
|
|
||||||
|
// generating index
|
||||||
|
concordia.refreshSAfromRAM();
|
||||||
|
|
||||||
|
// searching
|
||||||
|
cout << "Searching for pattern: has a" << endl;
|
||||||
|
vector<SubstringOccurence> result = concordia.simpleSearch("has a");
|
||||||
|
|
||||||
|
// printing results
|
||||||
|
for(vector<SubstringOccurence>::iterator it = result.begin();
|
||||||
|
it != result.end(); ++it) {
|
||||||
|
cout << "Found substring in sentence: " << it->getId() << " at offset: " << it->getOffset() << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// clearing index
|
||||||
|
concordia.clearIndex();
|
||||||
|
}
|
@ -4,4 +4,4 @@ TARGET_DIR=build
|
|||||||
|
|
||||||
./cpplint.py --filter=-legal,-build/namespaces,-whitespace/labels,-build/include_what_you_use,-runtime/int,-readability/streams,-build/include_order `find concordia concordia-console -type f -regextype posix-extended -regex '.*\.(cpp|hpp|h|c)' ! -regex '.*\./build.*' ! -regex '.*concordia/common/config.hpp' ! -regex '.*/(t|tests)/.*'` 2> cpplint-result.txt
|
./cpplint.py --filter=-legal,-build/namespaces,-whitespace/labels,-build/include_what_you_use,-runtime/int,-readability/streams,-build/include_order `find concordia concordia-console -type f -regextype posix-extended -regex '.*\.(cpp|hpp|h|c)' ! -regex '.*\./build.*' ! -regex '.*concordia/common/config.hpp' ! -regex '.*/(t|tests)/.*'` 2> cpplint-result.txt
|
||||||
|
|
||||||
cppcheck -D__cplusplus -D__GNUC__=3 -f --enable=all echo `find . -type d ! -path './.git*' ! -path "./${TARGET_DIR}"'*' | perl -ne 'chomp; print "-I$_ "'` `find . -type f -regextype posix-extended -regex '.*\.(cpp|hpp)' ! -regex '.*\./build.*'` 2> cppcheck-result.txt
|
cppcheck -D__cplusplus -D__GNUC__=3 -f --enable=all echo `find . -type d ! -path './.git*' ! -path "./${TARGET_DIR}"'*' | perl -ne 'chomp; print "-I$_ "'` `find . -type f -regextype posix-extended -regex '.*\.(cpp|hpp)' ! -regex '.*\./build.*' ! -regex '.*\./examples/build.*'` 2> cppcheck-result.txt
|
||||||
|
Loading…
Reference in New Issue
Block a user