removed using namespace std

Former-commit-id: dbb5129e1f94d83eca887ada0f89d6bb45250f1e
This commit is contained in:
rjawor 2015-04-15 14:14:10 +02:00
parent a09999c130
commit 0d4bdf12de
49 changed files with 333 additions and 392 deletions

View File

@ -84,11 +84,11 @@ int main(int argc, char** argv) {
std::string filePath = cli["read-file"].as<std::string>(); std::string filePath = cli["read-file"].as<std::string>();
std::cout << "\tReading sentences from file: " << filePath << std::cout << "\tReading sentences from file: " << filePath <<
std::endl; std::endl;
ifstream text_file(filePath.c_str()); std::ifstream text_file(filePath.c_str());
std::string line; std::string line;
if (text_file.is_open()) { if (text_file.is_open()) {
long lineCount = 0; long lineCount = 0;
vector<Example> buffer; std::vector<Example> buffer;
boost::posix_time::ptime timeStart = boost::posix_time::ptime timeStart =
boost::posix_time::microsec_clock::local_time(); boost::posix_time::microsec_clock::local_time();
while (getline(text_file, line)) { while (getline(text_file, line)) {
@ -147,7 +147,7 @@ int main(int argc, char** argv) {
<< "Terminating execution." << "Terminating execution."
<< std::endl; << std::endl;
return 1; return 1;
} catch(exception & e) { } catch(std::exception & e) {
std::cerr << "Unexpected exception caught with message: " std::cerr << "Unexpected exception caught with message: "
<< std::endl << std::endl
<< e.what() << e.what()

View File

@ -8,8 +8,6 @@
*/ */
using namespace std;
class AnubisSearchResult { class AnubisSearchResult {
public: public:
explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId, explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId,

View File

@ -18,8 +18,9 @@ std::vector<AnubisSearchResult> AnubisSearcher::anubisSearch(
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const std::vector<INDEX_CHARACTER_TYPE> & pattern) const std::vector<INDEX_CHARACTER_TYPE> & pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
boost::shared_ptr<TmMatchesMap> tmMatchesMap = getTmMatches(T, markers, SA, pattern); boost::shared_ptr<TmMatchesMap> tmMatchesMap =
getTmMatches(T, markers, SA, pattern);
// get the tmMatches list sorted descending by score // get the tmMatches list sorted descending by score
std::vector<AnubisSearchResult> result; std::vector<AnubisSearchResult> result;
return result; return result;
@ -31,7 +32,6 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const std::vector<INDEX_CHARACTER_TYPE> & pattern) const std::vector<INDEX_CHARACTER_TYPE> & pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
std::vector<sauchar_t> patternVector = std::vector<sauchar_t> patternVector =
Utils::indexVectorToSaucharVector(pattern); Utils::indexVectorToSaucharVector(pattern);
@ -45,7 +45,7 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE); int highResOffset = offset * sizeof(INDEX_CHARACTER_TYPE);
std::vector<sauchar_t> currentPattern( std::vector<sauchar_t> currentPattern(
patternVector.begin()+highResOffset, patternVector.end()); patternVector.begin()+highResOffset, patternVector.end());
saidx_t patternLength = 0; saidx_t patternLength = 0;
saidx_t size = SA->size(); saidx_t size = SA->size();
saidx_t left = 0; saidx_t left = 0;
@ -64,8 +64,8 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
saidx_t localLeft; saidx_t localLeft;
size = sa_search(T->data(), (saidx_t) T->size(), size = sa_search(T->data(), (saidx_t) T->size(),
(const sauchar_t *) patternArray, patternLength, (const sauchar_t *) patternArray, patternLength,
SAleft, size, &localLeft); SAleft, size, &localLeft);
left += localLeft; left += localLeft;
@ -75,20 +75,24 @@ boost::shared_ptr<TmMatchesMap> AnubisSearcher::getTmMatches(
// Add to tm matches map results surrounding the main stream. // Add to tm matches map results surrounding the main stream.
// from left // from left
for (saidx_t i = prevLeft; i < left; i++) { for (saidx_t i = prevLeft; i < left; i++) {
_addToMap(SA, markers, tmMatchesMap, i, pattern.size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset); _addToMap(SA, markers, tmMatchesMap, i, pattern.size(),
} (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1,
offset);
}
// from right // from right
for (saidx_t i = left+size; i < prevLeft+prevSize; i++) { for (saidx_t i = left+size; i < prevLeft+prevSize; i++) {
_addToMap(SA, markers, tmMatchesMap, i, pattern.size(), (patternLength / sizeof(INDEX_CHARACTER_TYPE)) -1, offset); _addToMap(SA, markers, tmMatchesMap, i, pattern.size(),
} (patternLength / sizeof(INDEX_CHARACTER_TYPE)) - 1,
offset);
}
} }
} while (patternLength < currentPattern.size() && size > 0); } while (patternLength < currentPattern.size() && size > 0);
if (size > 0) { if (size > 0) {
for (saidx_t i = left; i < left+size; i++) { for (saidx_t i = left; i < left+size; i++) {
_addToMap(SA, markers, tmMatchesMap, i, pattern.size(), patternLength / sizeof(INDEX_CHARACTER_TYPE), offset); _addToMap(SA, markers, tmMatchesMap, i, pattern.size(),
} patternLength / sizeof(INDEX_CHARACTER_TYPE), offset);
}
} }
} }
@ -121,16 +125,16 @@ std::vector<SubstringOccurence> AnubisSearcher::lcpSearch(
saidx_t localLeft; saidx_t localLeft;
size = sa_search(T->data(), (saidx_t) T->size(), size = sa_search(T->data(), (saidx_t) T->size(),
(const sauchar_t *) patternArray, patternLength, (const sauchar_t *) patternArray, patternLength,
SAleft, size, &localLeft); SAleft, size, &localLeft);
left += localLeft; left += localLeft;
SAleft += localLeft; SAleft += localLeft;
} while (patternLength < pattern.size() && size > 0); } while (patternLength < pattern.size() && size > 0);
vector<SubstringOccurence> result; std::vector<SubstringOccurence> result;
if (size == 0) { if (size == 0) {
// The search managed to find exactly the longest common prefixes. // The search managed to find exactly the longest common prefixes.
length = patternLength - sizeof(INDEX_CHARACTER_TYPE); length = patternLength - sizeof(INDEX_CHARACTER_TYPE);
if (length > 0) { if (length > 0) {
// Get the results of the previous search // Get the results of the previous search
@ -149,27 +153,28 @@ std::vector<SubstringOccurence> AnubisSearcher::lcpSearch(
} }
void AnubisSearcher::_collectResults( void AnubisSearcher::_collectResults(
vector<SubstringOccurence> & result, std::vector<SubstringOccurence> & result,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
saidx_t left, saidx_t size) { saidx_t left, saidx_t size) {
for (saidx_t i = 0; i < size; i++) { for (saidx_t i = 0; i < size; i++) {
saidx_t resultPos = SA->at(left + i); saidx_t resultPos = SA->at(left + i);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) { if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE)); SUFFIX_MARKER_TYPE marker =
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
result.push_back(SubstringOccurence(marker)); result.push_back(SubstringOccurence(marker));
} }
} }
} }
void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA, void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<TmMatchesMap> tmMatchesMap, boost::shared_ptr<TmMatchesMap> tmMatchesMap,
saidx_t sa_pos, saidx_t sa_pos,
SUFFIX_MARKER_TYPE totalPatternLength, SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength, SUFFIX_MARKER_TYPE matchedFragmentLength,
SUFFIX_MARKER_TYPE patternOffset) { SUFFIX_MARKER_TYPE patternOffset) {
SubstringOccurence occurence; SubstringOccurence occurence;
if (_getOccurenceFromSA(SA, markers, sa_pos, occurence)) { if (_getOccurenceFromSA(SA, markers, sa_pos, occurence)) {
_addOccurenceToMap(tmMatchesMap, _addOccurenceToMap(tmMatchesMap,
@ -178,24 +183,24 @@ void AnubisSearcher::_addToMap(boost::shared_ptr<std::vector<saidx_t> > SA,
matchedFragmentLength, matchedFragmentLength,
patternOffset); patternOffset);
} }
}
bool AnubisSearcher::_getOccurenceFromSA(
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
saidx_t sa_pos,
SubstringOccurence & occurence) {
saidx_t resultPos = SA->at(sa_pos);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
SUFFIX_MARKER_TYPE marker = markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
occurence.enterDataFromMarker(marker);
}
} }
void AnubisSearcher::_addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap, bool AnubisSearcher::_getOccurenceFromSA(
boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
saidx_t sa_pos,
SubstringOccurence & occurence) {
saidx_t resultPos = SA->at(sa_pos);
if (resultPos % sizeof(INDEX_CHARACTER_TYPE) == 0) {
SUFFIX_MARKER_TYPE marker =
markers->at(resultPos / sizeof(INDEX_CHARACTER_TYPE));
occurence.enterDataFromMarker(marker);
}
}
void AnubisSearcher::_addOccurenceToMap(
boost::shared_ptr<TmMatchesMap> tmMatchesMap,
SubstringOccurence & occurence, SubstringOccurence & occurence,
SUFFIX_MARKER_TYPE totalPatternLength, SUFFIX_MARKER_TYPE totalPatternLength,
SUFFIX_MARKER_TYPE matchedFragmentLength, SUFFIX_MARKER_TYPE matchedFragmentLength,
@ -213,16 +218,12 @@ void AnubisSearcher::_addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatche
SUFFIX_MARKER_TYPE key = occurence.getId(); SUFFIX_MARKER_TYPE key = occurence.getId();
tmMatchesMap->insert(key, tmMatches); tmMatchesMap->insert(key, tmMatches);
} }
// add intervals to tmMatches // add intervals to tmMatches
tmMatches->addExampleInterval( tmMatches->addExampleInterval(
occurence.getOffset(), occurence.getOffset(),
occurence.getOffset() + matchedFragmentLength occurence.getOffset() + matchedFragmentLength);
);
tmMatches->addPatternInterval( tmMatches->addPatternInterval(
patternOffset, patternOffset,
patternOffset + matchedFragmentLength patternOffset + matchedFragmentLength);
);
} }

View File

@ -10,6 +10,7 @@
#include "concordia/anubis_search_result.hpp" #include "concordia/anubis_search_result.hpp"
#include "concordia/tm_matches.hpp" #include "concordia/tm_matches.hpp"
#include<vector>
#include <divsufsort.h> #include <divsufsort.h>
/*! /*!
@ -17,8 +18,6 @@
*/ */
using namespace std;
class AnubisSearcher { class AnubisSearcher {
public: public:
explicit AnubisSearcher(); explicit AnubisSearcher();
@ -49,7 +48,7 @@ public:
SUFFIX_MARKER_TYPE & length) throw(ConcordiaException); SUFFIX_MARKER_TYPE & length) throw(ConcordiaException);
private: private:
void _collectResults(vector<SubstringOccurence> & result, void _collectResults(std::vector<SubstringOccurence> & result,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
saidx_t left, saidx_t size); saidx_t left, saidx_t size);
@ -63,9 +62,9 @@ private:
SUFFIX_MARKER_TYPE patternOffset); SUFFIX_MARKER_TYPE patternOffset);
bool _getOccurenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA, bool _getOccurenceFromSA(boost::shared_ptr<std::vector<saidx_t> > SA,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
saidx_t sa_pos, saidx_t sa_pos,
SubstringOccurence & occurence); SubstringOccurence & occurence);
void _addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap, void _addOccurenceToMap(boost::shared_ptr<TmMatchesMap> tmMatchesMap,
SubstringOccurence & occurence, SubstringOccurence & occurence,

View File

@ -10,10 +10,10 @@ TextUtils::TextUtils() {
StringCaseConverterManager::getInstance().getUpperCaseConverter("pl"); StringCaseConverterManager::getInstance().getUpperCaseConverter("pl");
} }
string TextUtils::toLowerCase(const string & text) { std::string TextUtils::toLowerCase(const std::string & text) {
return simpleConvert(*_lowerConverter, text); return simpleConvert(*_lowerConverter, text);
} }
string TextUtils::toUpperCase(const string & text) { std::string TextUtils::toUpperCase(const std::string & text) {
return simpleConvert(*_upperConverter, text); return simpleConvert(*_upperConverter, text);
} }

View File

@ -7,9 +7,6 @@
#include "utf8case/case_converter_factory.hpp" #include "utf8case/case_converter_factory.hpp"
#include "utf8case/string_case_converter_manager.hpp" #include "utf8case/string_case_converter_manager.hpp"
using namespace std;
/*! Utility class for performing simple string operations. /*! Utility class for performing simple string operations.
*/ */
class TextUtils { class TextUtils {
@ -26,13 +23,13 @@ public:
\param text input string \param text input string
\returns lower case version of the input string. \returns lower case version of the input string.
*/ */
string toLowerCase(const string & text); std::string toLowerCase(const std::string & text);
/*! A method for converting all string letters to upper case. /*! A method for converting all string letters to upper case.
\param text input string \param text input string
\returns upper case version of the input string. \returns upper case version of the input string.
*/ */
string toUpperCase(const string & text); std::string toUpperCase(const std::string & text);
private: private:
explicit TextUtils(TextUtils const&); // Don't Implement explicit TextUtils(TextUtils const&); // Don't Implement

View File

@ -7,35 +7,35 @@ Utils::Utils() {
Utils::~Utils() { Utils::~Utils() {
} }
void Utils::writeIndexCharacter(ofstream & file, void Utils::writeIndexCharacter(std::ofstream & file,
INDEX_CHARACTER_TYPE character) { INDEX_CHARACTER_TYPE character) {
file.write(reinterpret_cast<char *>(&character), sizeof(character)); file.write(reinterpret_cast<char *>(&character), sizeof(character));
} }
void Utils::writeMarker(ofstream & file, void Utils::writeMarker(std::ofstream & file,
SUFFIX_MARKER_TYPE marker) { SUFFIX_MARKER_TYPE marker) {
file.write(reinterpret_cast<char *>(&marker), sizeof(marker)); file.write(reinterpret_cast<char *>(&marker), sizeof(marker));
} }
INDEX_CHARACTER_TYPE Utils::readIndexCharacter(ifstream & file) { INDEX_CHARACTER_TYPE Utils::readIndexCharacter(std::ifstream & file) {
INDEX_CHARACTER_TYPE character; INDEX_CHARACTER_TYPE character;
file.read(reinterpret_cast<char *>(&character), sizeof(character)); file.read(reinterpret_cast<char *>(&character), sizeof(character));
return character; return character;
} }
SUFFIX_MARKER_TYPE Utils::readMarker(ifstream & file) { SUFFIX_MARKER_TYPE Utils::readMarker(std::ifstream & file) {
SUFFIX_MARKER_TYPE marker; SUFFIX_MARKER_TYPE marker;
file.read(reinterpret_cast<char *>(&marker), sizeof(marker)); file.read(reinterpret_cast<char *>(&marker), sizeof(marker));
return marker; return marker;
} }
sauchar_t * Utils::indexVectorToSaucharArray( sauchar_t * Utils::indexVectorToSaucharArray(
const vector<INDEX_CHARACTER_TYPE> & input) { const std::vector<INDEX_CHARACTER_TYPE> & input) {
const int kArraySize = input.size()*sizeof(INDEX_CHARACTER_TYPE); const int kArraySize = input.size()*sizeof(INDEX_CHARACTER_TYPE);
sauchar_t * patternArray = sauchar_t * patternArray =
new sauchar_t[kArraySize]; new sauchar_t[kArraySize];
int pos = 0; int pos = 0;
for (vector<INDEX_CHARACTER_TYPE>::const_iterator it = input.begin(); for (std::vector<INDEX_CHARACTER_TYPE>::const_iterator it = input.begin();
it != input.end(); ++it) { it != input.end(); ++it) {
_insertCharToSaucharArray(patternArray, *it, pos); _insertCharToSaucharArray(patternArray, *it, pos);
pos += sizeof(INDEX_CHARACTER_TYPE); pos += sizeof(INDEX_CHARACTER_TYPE);
@ -44,9 +44,9 @@ sauchar_t * Utils::indexVectorToSaucharArray(
} }
std::vector<sauchar_t> Utils::indexVectorToSaucharVector( std::vector<sauchar_t> Utils::indexVectorToSaucharVector(
const vector<INDEX_CHARACTER_TYPE> & input) { const std::vector<INDEX_CHARACTER_TYPE> & input) {
std::vector<sauchar_t> result; std::vector<sauchar_t> result;
for (vector<INDEX_CHARACTER_TYPE>::const_iterator it = input.begin(); for (std::vector<INDEX_CHARACTER_TYPE>::const_iterator it = input.begin();
it != input.end(); ++it) { it != input.end(); ++it) {
appendCharToSaucharVector(result, *it); appendCharToSaucharVector(result, *it);
} }

View File

@ -11,8 +11,6 @@
#include "concordia/concordia_exception.hpp" #include "concordia/concordia_exception.hpp"
#include <divsufsort.h> #include <divsufsort.h>
using namespace std;
class Utils { class Utils {
public: public:
explicit Utils(); explicit Utils();
@ -21,21 +19,21 @@ public:
*/ */
virtual ~Utils(); virtual ~Utils();
static void writeIndexCharacter(ofstream & file, static void writeIndexCharacter(std::ofstream & file,
INDEX_CHARACTER_TYPE character); INDEX_CHARACTER_TYPE character);
static void writeMarker(ofstream & file, static void writeMarker(std::ofstream & file,
SUFFIX_MARKER_TYPE marker); SUFFIX_MARKER_TYPE marker);
static INDEX_CHARACTER_TYPE readIndexCharacter(ifstream & file); static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file);
static SUFFIX_MARKER_TYPE readMarker(ifstream & file); static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file);
static sauchar_t * indexVectorToSaucharArray( static sauchar_t * indexVectorToSaucharArray(
const vector<INDEX_CHARACTER_TYPE> & input); const std::vector<INDEX_CHARACTER_TYPE> & input);
static std::vector<sauchar_t> indexVectorToSaucharVector( static std::vector<sauchar_t> indexVectorToSaucharVector(
const vector<INDEX_CHARACTER_TYPE> & input); const std::vector<INDEX_CHARACTER_TYPE> & input);
static void appendCharToSaucharVector( static void appendCharToSaucharVector(
boost::shared_ptr<std::vector<sauchar_t> > vector, boost::shared_ptr<std::vector<sauchar_t> > vector,
@ -70,8 +68,8 @@ private:
template <typename T> template <typename T>
void Utils::printVector(const std::vector<T> & vector) { void Utils::printVector(const std::vector<T> & vector) {
for (int i = 0; i < vector.size(); i++) { for (int i = 0; i < vector.size(); i++) {
cout << static_cast<int>(vector.at(i)) << " "; std::cout << static_cast<int>(vector.at(i)) << " ";
} }
cout << endl; std::cout << std::endl;
} }
#endif #endif

View File

@ -62,12 +62,13 @@ void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
&& boost::filesystem::exists(_config->getMarkersFilePath())) { && boost::filesystem::exists(_config->getMarkersFilePath())) {
// reading index from file // reading index from file
_T->clear(); _T->clear();
ifstream hashedIndexFile; std::ifstream hashedIndexFile;
hashedIndexFile.open(_config->getHashedIndexFilePath().c_str(), ios::in hashedIndexFile.open(
| ios::ate | ios::binary); _config->getHashedIndexFilePath().c_str(), std::ios::in
| std::ios::ate | std::ios::binary);
saidx_t hiFileSize = hashedIndexFile.tellg(); saidx_t hiFileSize = hashedIndexFile.tellg();
if (hiFileSize > 0) { if (hiFileSize > 0) {
hashedIndexFile.seekg(0, ios::beg); hashedIndexFile.seekg(0, std::ios::beg);
while (!hashedIndexFile.eof()) { while (!hashedIndexFile.eof()) {
INDEX_CHARACTER_TYPE character = INDEX_CHARACTER_TYPE character =
@ -82,12 +83,12 @@ void Concordia::loadRAMIndexFromDisk() throw(ConcordiaException) {
// reading markers from file // reading markers from file
_markers->clear(); _markers->clear();
ifstream markersFile; std::ifstream markersFile;
markersFile.open(_config->getMarkersFilePath().c_str(), ios::in markersFile.open(_config->getMarkersFilePath().c_str(), std::ios::in
| ios::ate | ios::binary); | std::ios::ate | std::ios::binary);
saidx_t maFileSize = markersFile.tellg(); saidx_t maFileSize = markersFile.tellg();
if (maFileSize > 0) { if (maFileSize > 0) {
markersFile.seekg(0, ios::beg); markersFile.seekg(0, std::ios::beg);
while (!markersFile.eof()) { while (!markersFile.eof()) {
SUFFIX_MARKER_TYPE marker = SUFFIX_MARKER_TYPE marker =
@ -132,7 +133,7 @@ void Concordia::_initializeIndex() throw(ConcordiaException) {
} }
std::vector<SubstringOccurence> Concordia::simpleSearch( std::vector<SubstringOccurence> Concordia::simpleSearch(
const string & pattern) const std::string & pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
if (_T->size() > 0) { if (_T->size() > 0) {
return _searcher->simpleSearch(_hashGenerator, _T, return _searcher->simpleSearch(_hashGenerator, _T,
@ -144,7 +145,7 @@ std::vector<SubstringOccurence> Concordia::simpleSearch(
} }
std::vector<AnubisSearchResult> Concordia::anubisSearch( std::vector<AnubisSearchResult> Concordia::anubisSearch(
const string & pattern) const std::string & pattern)
throw(ConcordiaException) { throw(ConcordiaException) {
if (_T->size() > 0) { if (_T->size() > 0) {
return _searcher->anubisSearch(_hashGenerator, _T, return _searcher->anubisSearch(_hashGenerator, _T,

View File

@ -13,13 +13,13 @@
#define NAMED_ENTITIES_PARAM "named_entities_path" #define NAMED_ENTITIES_PARAM "named_entities_path"
#define STOP_SYMBOLS_PARAM "stop_symbols_path" #define STOP_SYMBOLS_PARAM "stop_symbols_path"
ConcordiaConfig::ConcordiaConfig(const string & configFilePath) ConcordiaConfig::ConcordiaConfig(const std::string & configFilePath)
throw(ConcordiaException) { throw(ConcordiaException) {
try { try {
_config.readFile(configFilePath.c_str()); _config.readFile(configFilePath.c_str());
} catch(ParseException & e) { } catch(libconfig::ParseException & e) {
throw ConcordiaException("Error parsing config file: "+configFilePath); throw ConcordiaException("Error parsing config file: "+configFilePath);
} catch(FileIOException & e) { } catch(libconfig::FileIOException & e) {
throw ConcordiaException("I/O error reading config file: " throw ConcordiaException("I/O error reading config file: "
+configFilePath); +configFilePath);
} }
@ -49,7 +49,7 @@ ConcordiaConfig::ConcordiaConfig(const string & configFilePath)
ConcordiaConfig::~ConcordiaConfig() { ConcordiaConfig::~ConcordiaConfig() {
} }
string ConcordiaConfig::_readConfigParameterStr(const string & name) std::string ConcordiaConfig::_readConfigParameterStr(const std::string & name)
throw(ConcordiaException) { throw(ConcordiaException) {
if (!_config.exists(name)) { if (!_config.exists(name)) {
throw ConcordiaException("Config error: "+name+" setting not found"); throw ConcordiaException("Config error: "+name+" setting not found");

View File

@ -7,9 +7,6 @@
#include "concordia/concordia_exception.hpp" #include "concordia/concordia_exception.hpp"
using namespace std;
using namespace libconfig;
/*! /*!
Class representing the Concordia configuration. Class representing the Concordia configuration.
*/ */
@ -20,8 +17,8 @@ public:
\param configFilePath path of the configuration file (see \ref running3 for file specification). \param configFilePath path of the configuration file (see \ref running3 for file specification).
\throws ConcordiaException \throws ConcordiaException
*/ */
explicit ConcordiaConfig(const string & configFilePath) explicit ConcordiaConfig(const std::string & configFilePath)
throw(ConcordiaException); throw(ConcordiaException);
/*! Destructor. /*! Destructor.
*/ */
@ -30,70 +27,70 @@ public:
/*! Getter for the puddle file path parameter. /*! Getter for the puddle file path parameter.
\returns file path of the puddle tagset \returns file path of the puddle tagset
*/ */
string & getPuddleTagsetFilePath() { std::string & getPuddleTagsetFilePath() {
return _puddleTagsetFilePath; return _puddleTagsetFilePath;
} }
string & getWordMapFilePath() { std::string & getWordMapFilePath() {
return _wordMapFilePath; return _wordMapFilePath;
} }
string & getHashedIndexFilePath() { std::string & getHashedIndexFilePath() {
return _hashedIndexFilePath; return _hashedIndexFilePath;
} }
string & getMarkersFilePath() { std::string & getMarkersFilePath() {
return _markersFilePath; return _markersFilePath;
} }
string & getSuffixArrayFilePath() { std::string & getSuffixArrayFilePath() {
return _suffixArrayFilePath; return _suffixArrayFilePath;
} }
string & getHtmlTagsFilePath() { std::string & getHtmlTagsFilePath() {
return _htmlTagsFilePath; return _htmlTagsFilePath;
} }
string & getSpaceSymbolsFilePath() { std::string & getSpaceSymbolsFilePath() {
return _spaceSymbolsFilePath; return _spaceSymbolsFilePath;
} }
string & getStopWordsFilePath() { std::string & getStopWordsFilePath() {
return _stopWordsFilePath; return _stopWordsFilePath;
} }
string & getNamedEntitiesFilePath() { std::string & getNamedEntitiesFilePath() {
return _namedEntitiesFilePath; return _namedEntitiesFilePath;
} }
string & getStopSymbolsFilePath() { std::string & getStopSymbolsFilePath() {
return _stopSymbolsFilePath; return _stopSymbolsFilePath;
} }
private: private:
Config _config; libconfig::Config _config;
string _puddleTagsetFilePath; std::string _puddleTagsetFilePath;
string _wordMapFilePath; std::string _wordMapFilePath;
string _hashedIndexFilePath; std::string _hashedIndexFilePath;
string _markersFilePath; std::string _markersFilePath;
string _suffixArrayFilePath; std::string _suffixArrayFilePath;
string _htmlTagsFilePath; std::string _htmlTagsFilePath;
string _spaceSymbolsFilePath; std::string _spaceSymbolsFilePath;
string _stopWordsFilePath; std::string _stopWordsFilePath;
string _namedEntitiesFilePath; std::string _namedEntitiesFilePath;
string _stopSymbolsFilePath; std::string _stopSymbolsFilePath;
string _readConfigParameterStr(const string & name) std::string _readConfigParameterStr(const std::string & name)
throw(ConcordiaException); throw(ConcordiaException);
}; };

View File

@ -4,7 +4,7 @@ ConcordiaException::ConcordiaException() throw():
_message("Concordia exception") { _message("Concordia exception") {
} }
ConcordiaException::ConcordiaException(const string & message) throw(): ConcordiaException::ConcordiaException(const std::string & message) throw():
_message(message) { _message(message) {
} }

View File

@ -5,12 +5,10 @@
#include<string> #include<string>
#include<string.h> #include<string.h>
using namespace std;
/*! /*!
Class representing an internal exception thrown in the Concordia library. Class representing an internal exception thrown in the Concordia library.
*/ */
class ConcordiaException : public exception { class ConcordiaException : public std::exception {
public: public:
/*! Constructor. /*! Constructor.
*/ */
@ -19,7 +17,7 @@ public:
/*! Constructor with a message. /*! Constructor with a message.
\param message message of the exception \param message message of the exception
*/ */
explicit ConcordiaException(const string & message) throw(); explicit ConcordiaException(const std::string & message) throw();
/*! Destructor. /*! Destructor.
*/ */
@ -30,7 +28,7 @@ public:
virtual const char* what() const throw(); virtual const char* what() const throw();
private: private:
string _message; std::string _message;
}; };
#endif #endif

View File

@ -7,8 +7,8 @@
#include <iostream> #include <iostream>
#include <climits> #include <climits>
ConcordiaIndex::ConcordiaIndex(const string & hashedIndexFilePath, ConcordiaIndex::ConcordiaIndex(const std::string & hashedIndexFilePath,
const string & markersFilePath) const std::string & markersFilePath)
throw(ConcordiaException) : throw(ConcordiaException) :
_hashedIndexFilePath(hashedIndexFilePath), _hashedIndexFilePath(hashedIndexFilePath),
_markersFilePath(markersFilePath) { _markersFilePath(markersFilePath) {
@ -17,15 +17,15 @@ ConcordiaIndex::ConcordiaIndex(const string & hashedIndexFilePath,
ConcordiaIndex::~ConcordiaIndex() { ConcordiaIndex::~ConcordiaIndex() {
} }
boost::shared_ptr<vector<saidx_t> > ConcordiaIndex::generateSuffixArray( boost::shared_ptr<std::vector<saidx_t> > ConcordiaIndex::generateSuffixArray(
boost::shared_ptr<vector<sauchar_t> > T) { boost::shared_ptr<std::vector<sauchar_t> > T) {
saidx_t * SA_array = new saidx_t[T->size()]; saidx_t * SA_array = new saidx_t[T->size()];
if (divsufsort(T->data(), SA_array, (saidx_t) T->size()) != 0) { if (divsufsort(T->data(), SA_array, (saidx_t) T->size()) != 0) {
throw ConcordiaException("Error creating suffix array."); throw ConcordiaException("Error creating suffix array.");
} }
boost::shared_ptr<vector<saidx_t> > result = boost::shared_ptr<std::vector<saidx_t> > result =
boost::shared_ptr<vector<saidx_t> >(new vector<saidx_t>); boost::shared_ptr<std::vector<saidx_t> >(new std::vector<saidx_t>);
for (int i = 0; i < T->size(); i++) { for (int i = 0; i < T->size(); i++) {
result->push_back(SA_array[i]); result->push_back(SA_array[i]);
} }
@ -36,15 +36,15 @@ boost::shared_ptr<vector<saidx_t> > ConcordiaIndex::generateSuffixArray(
void ConcordiaIndex::addExample( void ConcordiaIndex::addExample(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
const Example & example) { const Example & example) {
ofstream hashedIndexFile; std::ofstream hashedIndexFile;
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out| hashedIndexFile.open(_hashedIndexFilePath.c_str(), std::ios::out|
ios::app|ios::binary); std::ios::app|std::ios::binary);
ofstream markersFile; std::ofstream markersFile;
markersFile.open(_markersFilePath.c_str(), ios::out| markersFile.open(_markersFilePath.c_str(), std::ios::out|
ios::app|ios::binary); std::ios::app|std::ios::binary);
_addSingleExample(hashedIndexFile, markersFile, hashGenerator, _addSingleExample(hashedIndexFile, markersFile, hashGenerator,
T, markers, example); T, markers, example);
hashedIndexFile.close(); hashedIndexFile.close();
@ -54,15 +54,15 @@ void ConcordiaIndex::addExample(
void ConcordiaIndex::addAllExamples( void ConcordiaIndex::addAllExamples(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
const vector<Example> & examples) { const std::vector<Example> & examples) {
ofstream hashedIndexFile; std::ofstream hashedIndexFile;
hashedIndexFile.open(_hashedIndexFilePath.c_str(), ios::out| hashedIndexFile.open(_hashedIndexFilePath.c_str(), std::ios::out|
ios::app|ios::binary); std::ios::app|std::ios::binary);
ofstream markersFile; std::ofstream markersFile;
markersFile.open(_markersFilePath.c_str(), ios::out| markersFile.open(_markersFilePath.c_str(), std::ios::out|
ios::app|ios::binary); std::ios::app|std::ios::binary);
BOOST_FOREACH(Example example, examples) { BOOST_FOREACH(Example example, examples) {
_addSingleExample(hashedIndexFile, markersFile, hashGenerator, _addSingleExample(hashedIndexFile, markersFile, hashGenerator,
@ -75,16 +75,16 @@ void ConcordiaIndex::addAllExamples(
} }
void ConcordiaIndex::_addSingleExample( void ConcordiaIndex::_addSingleExample(
ofstream & hashedIndexFile, std::ofstream & hashedIndexFile,
ofstream & markersFile, std::ofstream & markersFile,
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
const Example & example) { const Example & example) {
vector<INDEX_CHARACTER_TYPE> hash std::vector<INDEX_CHARACTER_TYPE> hash
= hashGenerator->generateHash(example.getSentence()); = hashGenerator->generateHash(example.getSentence());
int offset = 0; int offset = 0;
for (vector<INDEX_CHARACTER_TYPE>::iterator it = hash.begin(); for (std::vector<INDEX_CHARACTER_TYPE>::iterator it = hash.begin();
it != hash.end(); ++it) { it != hash.end(); ++it) {
INDEX_CHARACTER_TYPE character = *it; INDEX_CHARACTER_TYPE character = *it;
Utils::writeIndexCharacter(hashedIndexFile, character); Utils::writeIndexCharacter(hashedIndexFile, character);

View File

@ -18,12 +18,10 @@
*/ */
using namespace std;
class ConcordiaIndex { class ConcordiaIndex {
public: public:
explicit ConcordiaIndex(const string & hashedIndexFilePath, explicit ConcordiaIndex(const std::string & hashedIndexFilePath,
const string & markersFilePath) const std::string & markersFilePath)
throw(ConcordiaException); throw(ConcordiaException);
/*! Destructor. /*! Destructor.
@ -32,31 +30,31 @@ public:
void addExample( void addExample(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
const Example & example); const Example & example);
void addAllExamples( void addAllExamples(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
const vector<Example> & examples); const std::vector<Example> & examples);
boost::shared_ptr<vector<saidx_t> > generateSuffixArray( boost::shared_ptr<std::vector<saidx_t> > generateSuffixArray(
boost::shared_ptr<vector<sauchar_t> > T); boost::shared_ptr<std::vector<sauchar_t> > T);
private: private:
// Add example to disk index and update RAM index. // Add example to disk index and update RAM index.
void _addSingleExample(ofstream & hashedIndexFile, void _addSingleExample(std::ofstream & hashedIndexFile,
ofstream & markersFile, std::ofstream & markersFile,
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
const Example & example); const Example & example);
string _hashedIndexFilePath; std::string _hashedIndexFilePath;
string _markersFilePath; std::string _markersFilePath;
}; };
#endif #endif

View File

@ -1,8 +1,7 @@
#include "concordia/example.hpp" #include "concordia/example.hpp"
#include <climits> #include <climits>
#include <iostream>
Example::Example(const string & sentence, const SUFFIX_MARKER_TYPE & id) Example::Example(const std::string & sentence, const SUFFIX_MARKER_TYPE & id)
throw(ConcordiaException): throw(ConcordiaException):
_sentence(sentence), _sentence(sentence),
_id(id) { _id(id) {

View File

@ -10,18 +10,17 @@
*/ */
using namespace std;
class Example { class Example {
public: public:
explicit Example(const string & sentence, const SUFFIX_MARKER_TYPE & id) explicit Example(const std::string & sentence,
throw(ConcordiaException); const SUFFIX_MARKER_TYPE & id)
throw(ConcordiaException);
/*! Destructor. /*! Destructor.
*/ */
virtual ~Example(); virtual ~Example();
string getSentence() const { std::string getSentence() const {
return _sentence; return _sentence;
} }
@ -30,7 +29,7 @@ public:
} }
private: private:
string _sentence; std::string _sentence;
SUFFIX_MARKER_TYPE _id; SUFFIX_MARKER_TYPE _id;
}; };

View File

@ -15,7 +15,7 @@ HashGenerator::HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
_sentenceAnonymizer(boost::shared_ptr<SentenceAnonymizer>( _sentenceAnonymizer(boost::shared_ptr<SentenceAnonymizer>(
new SentenceAnonymizer(config))) { new SentenceAnonymizer(config))) {
if (boost::filesystem::exists(_wordMapFilePath)) { if (boost::filesystem::exists(_wordMapFilePath)) {
ifstream ifs(_wordMapFilePath.c_str(), std::ios::binary); std::ifstream ifs(_wordMapFilePath.c_str(), std::ios::binary);
boost::archive::binary_iarchive ia(ifs); boost::archive::binary_iarchive ia(ifs);
boost::shared_ptr<WordMap> restoredWordMap(new WordMap); boost::shared_ptr<WordMap> restoredWordMap(new WordMap);
ia >> *_wordMap; ia >> *_wordMap;
@ -25,16 +25,16 @@ HashGenerator::HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
HashGenerator::~HashGenerator() { HashGenerator::~HashGenerator() {
} }
vector<INDEX_CHARACTER_TYPE> HashGenerator::generateHash( std::vector<INDEX_CHARACTER_TYPE> HashGenerator::generateHash(
const string & sentence) throw(ConcordiaException) { const std::string & sentence) throw(ConcordiaException) {
vector<INDEX_CHARACTER_TYPE> result; std::vector<INDEX_CHARACTER_TYPE> result;
vector<string> tokenTexts = generateTokenVector(sentence); std::vector<std::string> tokenTexts = generateTokenVector(sentence);
if (tokenTexts.size() > Utils::maxSentenceSize) { if (tokenTexts.size() > Utils::maxSentenceSize) {
throw ConcordiaException("Trying to add too long sentence."); throw ConcordiaException("Trying to add too long sentence.");
} }
for (vector<string>::iterator it = tokenTexts.begin(); for (std::vector<std::string>::iterator it = tokenTexts.begin();
it != tokenTexts.end(); ++it) { it != tokenTexts.end(); ++it) {
string token = *it; std::string token = *it;
INDEX_CHARACTER_TYPE code = _wordMap->getWordCode(token); INDEX_CHARACTER_TYPE code = _wordMap->getWordCode(token);
result.push_back(code); result.push_back(code);
} }
@ -42,10 +42,11 @@ vector<INDEX_CHARACTER_TYPE> HashGenerator::generateHash(
return result; return result;
} }
vector<string> HashGenerator::generateTokenVector(const string & sentence) { std::vector<std::string> HashGenerator::generateTokenVector(
string anonymizedSentence = _sentenceAnonymizer->anonymize(sentence); const std::string & sentence) {
std::string anonymizedSentence = _sentenceAnonymizer->anonymize(sentence);
boost::trim(anonymizedSentence); boost::trim(anonymizedSentence);
vector<string> tokenTexts; std::vector<std::string> tokenTexts;
boost::split(tokenTexts, anonymizedSentence, boost::is_any_of(" \t\r\n"), boost::split(tokenTexts, anonymizedSentence, boost::is_any_of(" \t\r\n"),
boost::algorithm::token_compress_on); boost::algorithm::token_compress_on);
return tokenTexts; return tokenTexts;
@ -53,7 +54,7 @@ vector<string> HashGenerator::generateTokenVector(const string & sentence) {
void HashGenerator::serializeWordMap() { void HashGenerator::serializeWordMap() {
ofstream ofs(_wordMapFilePath.c_str(), std::ios::binary); std::ofstream ofs(_wordMapFilePath.c_str(), std::ios::binary);
boost::archive::binary_oarchive oa(ofs); boost::archive::binary_oarchive oa(ofs);
oa << *_wordMap; oa << *_wordMap;
} }

View File

@ -18,8 +18,6 @@
*/ */
using namespace std;
class HashGenerator { class HashGenerator {
public: public:
explicit HashGenerator(boost::shared_ptr<ConcordiaConfig> config) explicit HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
@ -29,10 +27,10 @@ public:
*/ */
virtual ~HashGenerator(); virtual ~HashGenerator();
vector<INDEX_CHARACTER_TYPE> generateHash(const string & sentence) std::vector<INDEX_CHARACTER_TYPE> generateHash(const std::string & sentence)
throw(ConcordiaException); throw(ConcordiaException);
vector<string> generateTokenVector(const string & sentence); std::vector<std::string> generateTokenVector(const std::string & sentence);
void serializeWordMap(); void serializeWordMap();
@ -41,7 +39,7 @@ private:
boost::shared_ptr<SentenceAnonymizer> _sentenceAnonymizer; boost::shared_ptr<SentenceAnonymizer> _sentenceAnonymizer;
string _wordMapFilePath; std::string _wordMapFilePath;
}; };
#endif #endif

View File

@ -12,16 +12,17 @@ IndexSearcher::IndexSearcher() {
IndexSearcher::~IndexSearcher() { IndexSearcher::~IndexSearcher() {
} }
vector<SubstringOccurence> IndexSearcher::simpleSearch( std::vector<SubstringOccurence> IndexSearcher::simpleSearch(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException) { const std::string & pattern) throw(ConcordiaException) {
vector<SubstringOccurence> result; std::vector<SubstringOccurence> result;
int left; int left;
vector<INDEX_CHARACTER_TYPE> hash = hashGenerator->generateHash(pattern); std::vector<INDEX_CHARACTER_TYPE> hash =
hashGenerator->generateHash(pattern);
saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE); saidx_t patternLength = hash.size()*sizeof(INDEX_CHARACTER_TYPE);
sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash); sauchar_t * patternArray = Utils::indexVectorToSaucharArray(hash);
@ -47,12 +48,13 @@ vector<SubstringOccurence> IndexSearcher::simpleSearch(
return result; return result;
} }
vector<AnubisSearchResult> IndexSearcher::anubisSearch( std::vector<AnubisSearchResult> IndexSearcher::anubisSearch(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException) { const std::string & pattern) throw(ConcordiaException) {
vector<INDEX_CHARACTER_TYPE> hash = hashGenerator->generateHash(pattern); std::vector<INDEX_CHARACTER_TYPE> hash =
hashGenerator->generateHash(pattern);
return _anubisSearcher->anubisSearch(T, markers, SA, hash); return _anubisSearcher->anubisSearch(T, markers, SA, hash);
} }

View File

@ -20,8 +20,6 @@
*/ */
using namespace std;
class IndexSearcher { class IndexSearcher {
public: public:
explicit IndexSearcher(); explicit IndexSearcher();
@ -30,19 +28,19 @@ public:
*/ */
virtual ~IndexSearcher(); virtual ~IndexSearcher();
vector<SubstringOccurence> simpleSearch( std::vector<SubstringOccurence> simpleSearch(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException); const std::string & pattern) throw(ConcordiaException);
vector<AnubisSearchResult> anubisSearch( std::vector<AnubisSearchResult> anubisSearch(
boost::shared_ptr<HashGenerator> hashGenerator, boost::shared_ptr<HashGenerator> hashGenerator,
boost::shared_ptr<std::vector<sauchar_t> > T, boost::shared_ptr<std::vector<sauchar_t> > T,
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers, boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
boost::shared_ptr<std::vector<saidx_t> > SA, boost::shared_ptr<std::vector<saidx_t> > SA,
const string & pattern) throw(ConcordiaException); const std::string & pattern) throw(ConcordiaException);
private: private:
boost::shared_ptr<AnubisSearcher> _anubisSearcher; boost::shared_ptr<AnubisSearcher> _anubisSearcher;
}; };

View File

@ -1,9 +1,10 @@
#include "concordia/interval.hpp" #include "concordia/interval.hpp"
Interval::Interval(const SUFFIX_MARKER_TYPE start, const SUFFIX_MARKER_TYPE end): Interval::Interval(const SUFFIX_MARKER_TYPE start,
_start(start), const SUFFIX_MARKER_TYPE end):
_end(end) { _start(start),
_end(end) {
} }
Interval::~Interval() { Interval::~Interval() {

View File

@ -8,11 +8,10 @@
*/ */
using namespace std;
class Interval { class Interval {
public: public:
explicit Interval(const SUFFIX_MARKER_TYPE start, const SUFFIX_MARKER_TYPE end); explicit Interval(const SUFFIX_MARKER_TYPE start,
const SUFFIX_MARKER_TYPE end);
/*! Destructor. /*! Destructor.
*/ */

View File

@ -3,7 +3,8 @@
#include <boost/exception/all.hpp> #include <boost/exception/all.hpp>
#include <boost/throw_exception.hpp> #include <boost/throw_exception.hpp>
RegexReplacement::RegexReplacement(string patternString, string replacement, RegexReplacement::RegexReplacement(std::string patternString,
std::string replacement,
bool caseSensitive) bool caseSensitive)
throw(ConcordiaException): throw(ConcordiaException):
_replacement(replacement) { _replacement(replacement) {
@ -15,7 +16,7 @@ RegexReplacement::RegexReplacement(string patternString, string replacement,
boost::regex::icase); boost::regex::icase);
} }
} catch(const std::exception & e) { } catch(const std::exception & e) {
stringstream ss; std::stringstream ss;
ss << "Bad regex pattern: " << patternString << ss << "Bad regex pattern: " << patternString <<
" Detailed info: " << e.what(); " Detailed info: " << e.what();
@ -31,7 +32,7 @@ RegexReplacement::RegexReplacement(string patternString, string replacement,
RegexReplacement::~RegexReplacement() { RegexReplacement::~RegexReplacement() {
} }
string RegexReplacement::apply(const string & text) { std::string RegexReplacement::apply(const std::string & text) {
try { try {
return boost::u32regex_replace(text, _pattern, _replacement, return boost::u32regex_replace(text, _pattern, _replacement,
boost::match_default | boost::format_all); boost::match_default | boost::format_all);

View File

@ -14,26 +14,24 @@
*/ */
using namespace std;
typedef boost::error_info<struct my_tag, std::string> my_tag_error_info; typedef boost::error_info<struct my_tag, std::string> my_tag_error_info;
class RegexReplacement { class RegexReplacement {
public: public:
RegexReplacement(string patternString, string replacement, RegexReplacement(std::string patternString, std::string replacement,
bool caseSensitive = true) bool caseSensitive = true)
throw(ConcordiaException); throw(ConcordiaException);
/*! Destructor. /*! Destructor.
*/ */
virtual ~RegexReplacement(); virtual ~RegexReplacement();
string apply(const string & text); std::string apply(const std::string & text);
private: private:
boost::u32regex _pattern; boost::u32regex _pattern;
string _replacement; std::string _replacement;
}; };
#endif #endif

View File

@ -23,8 +23,8 @@ SentenceAnonymizer::SentenceAnonymizer(
SentenceAnonymizer::~SentenceAnonymizer() { SentenceAnonymizer::~SentenceAnonymizer() {
} }
string SentenceAnonymizer::anonymize(const string & sentence) { std::string SentenceAnonymizer::anonymize(const std::string & sentence) {
string result = sentence; std::string result = sentence;
result = _htmlTags->apply(result); result = _htmlTags->apply(result);
@ -41,20 +41,20 @@ string SentenceAnonymizer::anonymize(const string & sentence) {
return result; return result;
} }
void SentenceAnonymizer::_createNeRules(string & namedEntitiesPath) { void SentenceAnonymizer::_createNeRules(std::string & namedEntitiesPath) {
if (boost::filesystem::exists(namedEntitiesPath)) { if (boost::filesystem::exists(namedEntitiesPath)) {
string line; std::string line;
ifstream neFile(namedEntitiesPath.c_str()); std::ifstream neFile(namedEntitiesPath.c_str());
if (neFile.is_open()) { if (neFile.is_open()) {
int lineCounter = 0; int lineCounter = 0;
while (getline(neFile, line)) { while (getline(neFile, line)) {
lineCounter++; lineCounter++;
boost::shared_ptr<vector<string> > boost::shared_ptr<std::vector<std::string> >
tokenTexts(new vector<string>()); tokenTexts(new std::vector<std::string>());
boost::split(*tokenTexts, line, boost::is_any_of(" "), boost::split(*tokenTexts, line, boost::is_any_of(" "),
boost::token_compress_on); boost::token_compress_on);
if (tokenTexts->size() != 2) { if (tokenTexts->size() != 2) {
stringstream ss; std::stringstream ss;
ss << "Invalid line: " << lineCounter ss << "Invalid line: " << lineCounter
<< " in NE file: " << namedEntitiesPath; << " in NE file: " << namedEntitiesPath;
throw ConcordiaException(ss.str()); throw ConcordiaException(ss.str());
@ -72,11 +72,11 @@ void SentenceAnonymizer::_createNeRules(string & namedEntitiesPath) {
} }
} }
void SentenceAnonymizer::_createHtmlTagsRule(string & htmlTagsPath) { void SentenceAnonymizer::_createHtmlTagsRule(std::string & htmlTagsPath) {
string tagsExpression = "<\\/?("; std::string tagsExpression = "<\\/?(";
if (boost::filesystem::exists(htmlTagsPath)) { if (boost::filesystem::exists(htmlTagsPath)) {
string line; std::string line;
ifstream tagsFile(htmlTagsPath.c_str()); std::ifstream tagsFile(htmlTagsPath.c_str());
if (tagsFile.is_open()) { if (tagsFile.is_open()) {
while (getline(tagsFile, line)) { while (getline(tagsFile, line)) {
tagsExpression += "|"; tagsExpression += "|";
@ -95,12 +95,12 @@ void SentenceAnonymizer::_createHtmlTagsRule(string & htmlTagsPath) {
} }
boost::shared_ptr<RegexReplacement> boost::shared_ptr<RegexReplacement>
SentenceAnonymizer::_getMultipleReplacementRule( SentenceAnonymizer::_getMultipleReplacementRule(
string & filePath, string replacement, bool wholeWord) { std::string & filePath, std::string replacement, bool wholeWord) {
string expression = "("; std::string expression = "(";
if (boost::filesystem::exists(filePath)) { if (boost::filesystem::exists(filePath)) {
string line; std::string line;
ifstream ruleFile(filePath.c_str()); std::ifstream ruleFile(filePath.c_str());
if (ruleFile.is_open()) { if (ruleFile.is_open()) {
while (getline(ruleFile, line)) { while (getline(ruleFile, line)) {
if (wholeWord) { if (wholeWord) {

View File

@ -16,8 +16,6 @@
*/ */
using namespace std;
class SentenceAnonymizer { class SentenceAnonymizer {
public: public:
explicit SentenceAnonymizer(boost::shared_ptr<ConcordiaConfig> config) explicit SentenceAnonymizer(boost::shared_ptr<ConcordiaConfig> config)
@ -27,19 +25,19 @@ public:
*/ */
virtual ~SentenceAnonymizer(); virtual ~SentenceAnonymizer();
string anonymize(const string & sentence); std::string anonymize(const std::string & sentence);
private: private:
void _createNeRules(string & namedEntitiesPath); void _createNeRules(std::string & namedEntitiesPath);
void _createHtmlTagsRule(string & htmlTagsPath); void _createHtmlTagsRule(std::string & htmlTagsPath);
boost::shared_ptr<RegexReplacement> _getMultipleReplacementRule( boost::shared_ptr<RegexReplacement> _getMultipleReplacementRule(
string & filePath, std::string & filePath,
string replacement, std::string replacement,
bool wholeWord = false); bool wholeWord = false);
vector<RegexReplacement> _namedEntities; std::vector<RegexReplacement> _namedEntities;
boost::shared_ptr<RegexReplacement> _htmlTags; boost::shared_ptr<RegexReplacement> _htmlTags;

View File

@ -7,10 +7,11 @@ SubstringOccurence::SubstringOccurence() {
SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) { SubstringOccurence::SubstringOccurence(const SUFFIX_MARKER_TYPE & marker) {
_id = Utils::getIdFromMarker(marker); _id = Utils::getIdFromMarker(marker);
_offset = Utils::getOffsetFromMarker(marker); _offset = Utils::getOffsetFromMarker(marker);
_exampleLength = Utils::getLengthFromMarker(marker); _exampleLength = Utils::getLengthFromMarker(marker);
} }
void SubstringOccurence::enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker) { void SubstringOccurence::enterDataFromMarker(
const SUFFIX_MARKER_TYPE & marker) {
_id = Utils::getIdFromMarker(marker); _id = Utils::getIdFromMarker(marker);
_offset = Utils::getOffsetFromMarker(marker); _offset = Utils::getOffsetFromMarker(marker);
_exampleLength = Utils::getLengthFromMarker(marker); _exampleLength = Utils::getLengthFromMarker(marker);

View File

@ -9,8 +9,6 @@
*/ */
using namespace std;
class SubstringOccurence { class SubstringOccurence {
public: public:
SubstringOccurence(); SubstringOccurence();
@ -35,7 +33,7 @@ public:
SUFFIX_MARKER_TYPE getExampleLength() const { SUFFIX_MARKER_TYPE getExampleLength() const {
return _exampleLength; return _exampleLength;
} }
void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker); void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker);
private: private:
@ -43,7 +41,7 @@ private:
SUFFIX_MARKER_TYPE _offset; SUFFIX_MARKER_TYPE _offset;
// the example // the example
SUFFIX_MARKER_TYPE _exampleLength; SUFFIX_MARKER_TYPE _exampleLength;
}; };

View File

@ -12,8 +12,6 @@
#include "concordia/common/logging.hpp" #include "concordia/common/logging.hpp"
#include "tests/common/test_resources_manager.hpp" #include "tests/common/test_resources_manager.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(anubis_searcher) BOOST_AUTO_TEST_SUITE(anubis_searcher)
BOOST_AUTO_TEST_CASE( LcpSearch1 ) BOOST_AUTO_TEST_CASE( LcpSearch1 )
@ -187,7 +185,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern2.push_back(2); pattern2.push_back(2);
SUFFIX_MARKER_TYPE highResLength2; SUFFIX_MARKER_TYPE highResLength2;
vector<SubstringOccurence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2); std::vector<SubstringOccurence> result2 = searcher.lcpSearch(T, markers, SA, pattern2, highResLength2);
SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length2 = highResLength2 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get one result from SA: /* Expecting to get one result from SA:
@ -230,7 +228,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern3.push_back(3); pattern3.push_back(3);
SUFFIX_MARKER_TYPE highResLength3; SUFFIX_MARKER_TYPE highResLength3;
vector<SubstringOccurence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3); std::vector<SubstringOccurence> result3 = searcher.lcpSearch(T, markers, SA, pattern3, highResLength3);
SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length3 = highResLength3 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get one result from SA: /* Expecting to get one result from SA:
@ -267,7 +265,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern4.push_back(4); pattern4.push_back(4);
SUFFIX_MARKER_TYPE highResLength4; SUFFIX_MARKER_TYPE highResLength4;
vector<SubstringOccurence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4); std::vector<SubstringOccurence> result4 = searcher.lcpSearch(T, markers, SA, pattern4, highResLength4);
SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length4 = highResLength4 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 2 results from SA: /* Expecting to get 2 results from SA:
@ -298,7 +296,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern5.push_back(4); pattern5.push_back(4);
SUFFIX_MARKER_TYPE highResLength5; SUFFIX_MARKER_TYPE highResLength5;
vector<SubstringOccurence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5); std::vector<SubstringOccurence> result5 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength5);
SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length5 = highResLength5 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 0 results from SA, lcp length = 0; /* Expecting to get 0 results from SA, lcp length = 0;
@ -322,7 +320,7 @@ BOOST_AUTO_TEST_CASE( LcpSearch1 )
pattern6.push_back(0); pattern6.push_back(0);
SUFFIX_MARKER_TYPE highResLength6; SUFFIX_MARKER_TYPE highResLength6;
vector<SubstringOccurence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6); std::vector<SubstringOccurence> result6 = searcher.lcpSearch(T, markers, SA, pattern5, highResLength6);
SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE); SUFFIX_MARKER_TYPE length6 = highResLength6 / sizeof(INDEX_CHARACTER_TYPE);
/* Expecting to get 0 results from SA, lcp length = 0; /* Expecting to get 0 results from SA, lcp length = 0;
@ -393,38 +391,38 @@ BOOST_AUTO_TEST_CASE( TmMatchesTest )
// example 14 // example 14
// example interval list: [(1,2)] // example interval list: [(1,2)]
vector<Interval> exampleIntervals14 = tmMatches14->getExampleIntervals(); std::vector<Interval> exampleIntervals14 = tmMatches14->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals14.size(), 1); BOOST_CHECK_EQUAL(exampleIntervals14.size(), 1);
BOOST_CHECK_EQUAL(exampleIntervals14[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals14[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals14[0].getEnd(), 2); BOOST_CHECK_EQUAL(exampleIntervals14[0].getEnd(), 2);
// pattern interval list: [(1,2)] // pattern interval list: [(1,2)]
vector<Interval> patternIntervals14 = tmMatches14->getPatternIntervals(); std::vector<Interval> patternIntervals14 = tmMatches14->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals14.size(), 1); BOOST_CHECK_EQUAL(patternIntervals14.size(), 1);
BOOST_CHECK_EQUAL(patternIntervals14[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals14[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals14[0].getEnd(), 2); BOOST_CHECK_EQUAL(patternIntervals14[0].getEnd(), 2);
// example 51 // example 51
// example interval list: [(1,3)] // example interval list: [(1,3)]
vector<Interval> exampleIntervals51 = tmMatches51->getExampleIntervals(); std::vector<Interval> exampleIntervals51 = tmMatches51->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals51.size(), 1); BOOST_CHECK_EQUAL(exampleIntervals51.size(), 1);
BOOST_CHECK_EQUAL(exampleIntervals51[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals51[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals51[0].getEnd(), 3); BOOST_CHECK_EQUAL(exampleIntervals51[0].getEnd(), 3);
// pattern interval list: [(1,3)] // pattern interval list: [(1,3)]
vector<Interval> patternIntervals51 = tmMatches51->getPatternIntervals(); std::vector<Interval> patternIntervals51 = tmMatches51->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals51.size(), 1); BOOST_CHECK_EQUAL(patternIntervals51.size(), 1);
BOOST_CHECK_EQUAL(patternIntervals51[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals51[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals51[0].getEnd(), 3); BOOST_CHECK_EQUAL(patternIntervals51[0].getEnd(), 3);
// example 123 // example 123
// example interval list: [(1,3), (0,1)] // example interval list: [(1,3), (0,1)]
vector<Interval> exampleIntervals123 = tmMatches123->getExampleIntervals(); std::vector<Interval> exampleIntervals123 = tmMatches123->getExampleIntervals();
BOOST_CHECK_EQUAL(exampleIntervals123.size(), 2); BOOST_CHECK_EQUAL(exampleIntervals123.size(), 2);
BOOST_CHECK_EQUAL(exampleIntervals123[0].getStart(), 1); BOOST_CHECK_EQUAL(exampleIntervals123[0].getStart(), 1);
BOOST_CHECK_EQUAL(exampleIntervals123[0].getEnd(), 3); BOOST_CHECK_EQUAL(exampleIntervals123[0].getEnd(), 3);
BOOST_CHECK_EQUAL(exampleIntervals123[1].getStart(), 0); BOOST_CHECK_EQUAL(exampleIntervals123[1].getStart(), 0);
BOOST_CHECK_EQUAL(exampleIntervals123[1].getEnd(), 1); BOOST_CHECK_EQUAL(exampleIntervals123[1].getEnd(), 1);
// pattern interval list: [(1,3), (3,4)] // pattern interval list: [(1,3), (3,4)]
vector<Interval> patternIntervals123 = tmMatches123->getPatternIntervals(); std::vector<Interval> patternIntervals123 = tmMatches123->getPatternIntervals();
BOOST_CHECK_EQUAL(patternIntervals123.size(), 2); BOOST_CHECK_EQUAL(patternIntervals123.size(), 2);
BOOST_CHECK_EQUAL(patternIntervals123[0].getStart(), 1); BOOST_CHECK_EQUAL(patternIntervals123[0].getStart(), 1);
BOOST_CHECK_EQUAL(patternIntervals123[0].getEnd(), 3); BOOST_CHECK_EQUAL(patternIntervals123[0].getEnd(), 3);

View File

@ -9,14 +9,12 @@
#include <string> #include <string>
using namespace std;
BOOST_AUTO_TEST_SUITE(concordia_main) BOOST_AUTO_TEST_SUITE(concordia_main)
BOOST_AUTO_TEST_CASE( ConcordiaVersion ) BOOST_AUTO_TEST_CASE( ConcordiaVersion )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
string version = concordia.getVersion(); std::string version = concordia.getVersion();
BOOST_CHECK_EQUAL( version , "0.1"); BOOST_CHECK_EQUAL( version , "0.1");
} }
@ -51,8 +49,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch1 )
*/ */
vector<SubstringOccurence> searchResult1 = concordia.simpleSearch("posiada rysia"); std::vector<SubstringOccurence> searchResult1 = concordia.simpleSearch("posiada rysia");
vector<SubstringOccurence> searchResult2 = concordia.simpleSearch("posiada kota Ala"); std::vector<SubstringOccurence> searchResult2 = concordia.simpleSearch("posiada kota Ala");
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
@ -73,7 +71,7 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
{ {
// modified stop words to avoid anonymization // modified stop words to avoid anonymization
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
vector<Example> testExamples; std::vector<Example> testExamples;
testExamples.push_back(Example("xto xjest okno",312)); testExamples.push_back(Example("xto xjest okno",312));
testExamples.push_back(Example("czy xjest okno otwarte",202)); testExamples.push_back(Example("czy xjest okno otwarte",202));
testExamples.push_back(Example("chyba xto xjest xtutaj",45)); testExamples.push_back(Example("chyba xto xjest xtutaj",45));
@ -106,8 +104,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
*/ */
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("xto xjest"); std::vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("xto xjest");
vector<SubstringOccurence> searchResult2 = concordia2.simpleSearch("xjest okno"); std::vector<SubstringOccurence> searchResult2 = concordia2.simpleSearch("xjest okno");
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
@ -131,13 +129,13 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 ) BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
{ {
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
vector<Example> testExamples; std::vector<Example> testExamples;
testExamples.push_back(Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312)); testExamples.push_back(Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312));
testExamples.push_back(Example("czy xjest żółte otwarte",202)); testExamples.push_back(Example("czy xjest żółte otwarte",202));
concordia.addAllExamples(testExamples); concordia.addAllExamples(testExamples);
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")); Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia"); std::vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia");
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
@ -176,8 +174,8 @@ BOOST_AUTO_TEST_CASE( ConcordiaAnubisSearch1 )
n: 0 1 2 3 4 5 6 7 8 9 10 11 n: 0 1 2 3 4 5 6 7 8 9 10 11
SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7 SA[n]: 0 4 1 9 5 2 10 6 8 11 3 7
vector<AnubisSearchResult> searchResult1 = concordia.anubisSearch("posiada rysia chyba"); std::vector<AnubisSearchResult> searchResult1 = concordia.anubisSearch("posiada rysia chyba");
vector<AnubisSearchResult> searchResult2 = concordia.anubisSearch("posiada kota Ala"); std::vector<AnubisSearchResult> searchResult2 = concordia.anubisSearch("posiada kota Ala");
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS)); boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));

View File

@ -7,8 +7,6 @@
#include <list> #include <list>
#include <boost/algorithm/string/predicate.hpp> #include <boost/algorithm/string/predicate.hpp>
using namespace std;
BOOST_AUTO_TEST_SUITE(concordia_config) BOOST_AUTO_TEST_SUITE(concordia_config)
BOOST_AUTO_TEST_CASE( ConfigParameters ) BOOST_AUTO_TEST_CASE( ConfigParameters )
@ -29,7 +27,7 @@ BOOST_AUTO_TEST_CASE( ConfigParameters )
BOOST_AUTO_TEST_CASE( NonexistentConfigTest ) BOOST_AUTO_TEST_CASE( NonexistentConfigTest )
{ {
bool exceptionThrown = false; bool exceptionThrown = false;
string message = ""; std::string message = "";
try { try {
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("foo.cfg")); ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("foo.cfg"));
} catch (ConcordiaException & e) { } catch (ConcordiaException & e) {
@ -44,7 +42,7 @@ BOOST_AUTO_TEST_CASE( NonexistentConfigTest )
BOOST_AUTO_TEST_CASE( InvalidConfigTest ) BOOST_AUTO_TEST_CASE( InvalidConfigTest )
{ {
bool exceptionThrown = false; bool exceptionThrown = false;
string message = ""; std::string message = "";
try { try {
ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("invalid.cfg")); ConcordiaConfig config(TestResourcesManager::getTestConcordiaConfigFilePath("invalid.cfg"));
} catch (ConcordiaException & e) { } catch (ConcordiaException & e) {

View File

@ -7,8 +7,6 @@
#include <boost/algorithm/string/predicate.hpp> #include <boost/algorithm/string/predicate.hpp>
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
using namespace std;
BOOST_AUTO_TEST_SUITE(concordia_index) BOOST_AUTO_TEST_SUITE(concordia_index)
@ -16,7 +14,7 @@ BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest )
{ {
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"), ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
TestResourcesManager::getTestFilePath("temp","test_markers.bin")); TestResourcesManager::getTestFilePath("temp","test_markers.bin"));
boost::shared_ptr<vector<sauchar_t> > T = boost::shared_ptr<vector<sauchar_t> >(new vector<sauchar_t>()); boost::shared_ptr<std::vector<sauchar_t> > T = boost::shared_ptr<std::vector<sauchar_t> >(new std::vector<sauchar_t>());
// Test hashed index: // Test hashed index:
// n: 0 1 2 3 4 5 6 7 8 // n: 0 1 2 3 4 5 6 7 8
// T[n]: 0 1 2 0 1 3 4 1 3 // T[n]: 0 1 2 0 1 3 4 1 3
@ -36,7 +34,7 @@ BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest )
boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(T); boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(T);
boost::shared_ptr<vector<saidx_t> > expectedSA = boost::shared_ptr<vector<saidx_t> >(new vector<saidx_t>()); boost::shared_ptr<std::vector<saidx_t> > expectedSA = boost::shared_ptr<std::vector<saidx_t> >(new std::vector<saidx_t>());
expectedSA->push_back(0); expectedSA->push_back(0);
expectedSA->push_back(3); expectedSA->push_back(3);
expectedSA->push_back(1); expectedSA->push_back(1);
@ -53,7 +51,7 @@ BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest2 )
{ {
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"), ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
TestResourcesManager::getTestFilePath("temp","test_markers.bin")); TestResourcesManager::getTestFilePath("temp","test_markers.bin"));
boost::shared_ptr<vector<sauchar_t> > T = boost::shared_ptr<vector<sauchar_t> >(new vector<sauchar_t>()); boost::shared_ptr<std::vector<sauchar_t> > T = boost::shared_ptr<std::vector<sauchar_t> >(new std::vector<sauchar_t>());
//Test hashed index: //Test hashed index:
// n: 0 1 2 3 4 5 6 7 8 9 10 11 // n: 0 1 2 3 4 5 6 7 8 9 10 11
@ -77,7 +75,7 @@ BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest2 )
boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(T); boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(T);
boost::shared_ptr<vector<saidx_t> > expectedSA = boost::shared_ptr<vector<saidx_t> >(new vector<saidx_t>()); boost::shared_ptr<std::vector<saidx_t> > expectedSA = boost::shared_ptr<std::vector<saidx_t> >(new std::vector<saidx_t>());
expectedSA->push_back(0); expectedSA->push_back(0);
expectedSA->push_back(4); expectedSA->push_back(4);
expectedSA->push_back(1); expectedSA->push_back(1);
@ -97,7 +95,7 @@ BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest3 )
{ {
ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"), ConcordiaIndex index(TestResourcesManager::getTestFilePath("temp","test_hash_index.bin"),
TestResourcesManager::getTestFilePath("temp","test_markers.bin")); TestResourcesManager::getTestFilePath("temp","test_markers.bin"));
boost::shared_ptr<vector<sauchar_t> > T = boost::shared_ptr<vector<sauchar_t> >(new vector<sauchar_t>()); boost::shared_ptr<std::vector<sauchar_t> > T = boost::shared_ptr<std::vector<sauchar_t> >(new std::vector<sauchar_t>());
//Test hashed index: //Test hashed index:
// n: 0 1 2 3 4 5 // n: 0 1 2 3 4 5
@ -115,7 +113,7 @@ BOOST_AUTO_TEST_CASE( SuffixArrayGenerationTest3 )
boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(T); boost::shared_ptr<std::vector<saidx_t> > SA = index.generateSuffixArray(T);
boost::shared_ptr<vector<saidx_t> > expectedSA = boost::shared_ptr<vector<saidx_t> >(new vector<saidx_t>()); boost::shared_ptr<std::vector<saidx_t> > expectedSA = boost::shared_ptr<std::vector<saidx_t> >(new std::vector<saidx_t>());
expectedSA->push_back(0); expectedSA->push_back(0);
expectedSA->push_back(5); expectedSA->push_back(5);
expectedSA->push_back(3); expectedSA->push_back(3);

View File

@ -5,8 +5,6 @@
#include "concordia/example.hpp" #include "concordia/example.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(exampleTest) BOOST_AUTO_TEST_SUITE(exampleTest)
BOOST_AUTO_TEST_CASE( ExceedingId ) BOOST_AUTO_TEST_CASE( ExceedingId )
@ -15,7 +13,7 @@ BOOST_AUTO_TEST_CASE( ExceedingId )
Example example1("Test", maxId); Example example1("Test", maxId);
bool exceptionThrown = false; bool exceptionThrown = false;
string message = ""; std::string message = "";
try { try {
Example example2("Test", maxId+1); Example example2("Test", maxId+1);
} catch (ConcordiaException & e) { } catch (ConcordiaException & e) {

View File

@ -8,9 +8,6 @@
#include "concordia/hash_generator.hpp" #include "concordia/hash_generator.hpp"
#include "tests/common/test_resources_manager.hpp" #include "tests/common/test_resources_manager.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(hash_generator) BOOST_AUTO_TEST_SUITE(hash_generator)
BOOST_AUTO_TEST_CASE( SimpleHashTest ) BOOST_AUTO_TEST_CASE( SimpleHashTest )
@ -23,8 +20,8 @@ BOOST_AUTO_TEST_CASE( SimpleHashTest )
HashGenerator hashGenerator = HashGenerator(config); HashGenerator hashGenerator = HashGenerator(config);
vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota"); std::vector<INDEX_CHARACTER_TYPE> hash = hashGenerator.generateHash("Ala posiada kota");
vector<INDEX_CHARACTER_TYPE> expected; std::vector<INDEX_CHARACTER_TYPE> expected;
expected.push_back(0); expected.push_back(0);
expected.push_back(1); expected.push_back(1);
expected.push_back(2); expected.push_back(2);
@ -44,17 +41,17 @@ BOOST_AUTO_TEST_CASE( TooLongHashTest )
HashGenerator hashGenerator = HashGenerator(config); HashGenerator hashGenerator = HashGenerator(config);
stringstream ss; std::stringstream ss;
for (int i=0;i<65537;i++) { for (int i=0;i<65537;i++) {
ss << "xx" << i << " "; ss << "xx" << i << " ";
} }
string longSentence = ss.str(); std::string longSentence = ss.str();
bool exceptionThrown = false; bool exceptionThrown = false;
string message = ""; std::string message = "";
try { try {
boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > hash = hashGenerator.generateHash(longSentence); boost::shared_ptr<std::vector<INDEX_CHARACTER_TYPE> > hash = hashGenerator.generateHash(longSentence);
} catch (ConcordiaException & e) { } catch (ConcordiaException & e) {
exceptionThrown = true; exceptionThrown = true;
message = e.what(); message = e.what();
@ -76,8 +73,8 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
HashGenerator hashGenerator1 = HashGenerator(config); HashGenerator hashGenerator1 = HashGenerator(config);
vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota"); std::vector<INDEX_CHARACTER_TYPE> hash1 = hashGenerator1.generateHash("Ala posiada kota");
vector<INDEX_CHARACTER_TYPE> expected1; std::vector<INDEX_CHARACTER_TYPE> expected1;
expected1.push_back(0); expected1.push_back(0);
expected1.push_back(1); expected1.push_back(1);
expected1.push_back(2); expected1.push_back(2);
@ -86,8 +83,8 @@ BOOST_AUTO_TEST_CASE( HashSerializationTest )
hashGenerator1.serializeWordMap(); hashGenerator1.serializeWordMap();
HashGenerator hashGenerator2 = HashGenerator(config); HashGenerator hashGenerator2 = HashGenerator(config);
vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa"); std::vector<INDEX_CHARACTER_TYPE> hash2 = hashGenerator2.generateHash("Ala posiada psa");
vector<INDEX_CHARACTER_TYPE> expected2; std::vector<INDEX_CHARACTER_TYPE> expected2;
expected2.push_back(0); expected2.push_back(0);
expected2.push_back(1); expected2.push_back(1);
expected2.push_back(3); expected2.push_back(3);
@ -106,8 +103,8 @@ BOOST_AUTO_TEST_CASE( TokenVectorTest )
HashGenerator hashGenerator = HashGenerator(config); HashGenerator hashGenerator = HashGenerator(config);
vector<string> tokenVector = hashGenerator.generateTokenVector("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że kierowca zaparkował samochód."); std::vector<std::string> tokenVector = hashGenerator.generateTokenVector("12.02.2014 o godzinie 17:40 doszło do kolizji na ulicy Grobla; policjanci ustalili, że kierowca zaparkował samochód.");
vector<string> expected; std::vector<std::string> expected;
expected.push_back("ne_date"); expected.push_back("ne_date");
expected.push_back("godzinie"); expected.push_back("godzinie");
expected.push_back("ne_number"); expected.push_back("ne_number");

View File

@ -2,8 +2,6 @@
#include "concordia/interval.hpp" #include "concordia/interval.hpp"
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(interval) BOOST_AUTO_TEST_SUITE(interval)
BOOST_AUTO_TEST_CASE( IntervalIntersects1 ) BOOST_AUTO_TEST_CASE( IntervalIntersects1 )

View File

@ -6,8 +6,6 @@
#define TMP_LOG_FILE "/tmp/concordia.log" #define TMP_LOG_FILE "/tmp/concordia.log"
using namespace std;
BOOST_AUTO_TEST_SUITE(logging) BOOST_AUTO_TEST_SUITE(logging)
BOOST_AUTO_TEST_CASE( LoggingTest ) BOOST_AUTO_TEST_CASE( LoggingTest )

View File

@ -5,8 +5,6 @@
#include <boost/locale.hpp> #include <boost/locale.hpp>
#include <boost/algorithm/string/case_conv.hpp> #include <boost/algorithm/string/case_conv.hpp>
using namespace std;
BOOST_AUTO_TEST_SUITE(regex_replacement) BOOST_AUTO_TEST_SUITE(regex_replacement)
BOOST_AUTO_TEST_CASE( SimpleReplacement ) BOOST_AUTO_TEST_CASE( SimpleReplacement )
@ -18,7 +16,7 @@ BOOST_AUTO_TEST_CASE( SimpleReplacement )
BOOST_AUTO_TEST_CASE( BadRegex ) BOOST_AUTO_TEST_CASE( BadRegex )
{ {
bool exceptionThrown = false; bool exceptionThrown = false;
string message = ""; std::string message = "";
try { try {
RegexReplacement rr("+a","b"); RegexReplacement rr("+a","b");
} catch (ConcordiaException & e) { } catch (ConcordiaException & e) {

View File

@ -8,9 +8,6 @@
#include "concordia/sentence_anonymizer.hpp" #include "concordia/sentence_anonymizer.hpp"
#include "tests/common/test_resources_manager.hpp" #include "tests/common/test_resources_manager.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(sentence_anonymizer) BOOST_AUTO_TEST_SUITE(sentence_anonymizer)
BOOST_AUTO_TEST_CASE( NETest ) BOOST_AUTO_TEST_CASE( NETest )
@ -19,7 +16,7 @@ BOOST_AUTO_TEST_CASE( NETest )
SentenceAnonymizer anonymizer(config); SentenceAnonymizer anonymizer(config);
string sentence = "Date: 12.04.2012, mail: test@example.com, number: 5.34"; std::string sentence = "Date: 12.04.2012, mail: test@example.com, number: 5.34";
BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"date ne_date mail ne_email number ne_number"); BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"date ne_date mail ne_email number ne_number");
} }
@ -29,7 +26,7 @@ BOOST_AUTO_TEST_CASE( HtmlTagsTest )
SentenceAnonymizer anonymizer(config); SentenceAnonymizer anonymizer(config);
string sentence = "<a href='http://wp.pl'>link</a> and <b>bold</b> and newline <br/>"; std::string sentence = "<a href='http://wp.pl'>link</a> and <b>bold</b> and newline <br/>";
BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"link and bold and newline "); BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"link and bold and newline ");
} }
@ -40,7 +37,7 @@ BOOST_AUTO_TEST_CASE( StopWordsTest )
SentenceAnonymizer anonymizer(config); SentenceAnonymizer anonymizer(config);
string sentence = "Aczkolwiek nie wiem, czy to konieczne"; std::string sentence = "Aczkolwiek nie wiem, czy to konieczne";
BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence)," wiem konieczne"); BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence)," wiem konieczne");
} }
@ -51,7 +48,7 @@ BOOST_AUTO_TEST_CASE( StopSymbolsTest )
SentenceAnonymizer anonymizer(config); SentenceAnonymizer anonymizer(config);
string sentence = "xxx, . xxx # xx $xx@ xx"; std::string sentence = "xxx, . xxx # xx $xx@ xx";
BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"xxx xxx xx xx xx"); BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"xxx xxx xx xx xx");
} }
@ -62,7 +59,7 @@ BOOST_AUTO_TEST_CASE( SpaceSymbolsTest )
SentenceAnonymizer anonymizer(config); SentenceAnonymizer anonymizer(config);
string sentence = "xxx-xxx xx|xx"; std::string sentence = "xxx-xxx xx|xx";
BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"xxx xxx xx xx"); BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"xxx xxx xx xx");
} }
@ -72,7 +69,7 @@ BOOST_AUTO_TEST_CASE( WeirdSentenceTest )
boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"))); boost::shared_ptr<ConcordiaConfig> config(new ConcordiaConfig(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg")));
SentenceAnonymizer anonymizer(config); SentenceAnonymizer anonymizer(config);
string sentence = "Sony | DXC-M7PKDXC-M7PDXC-M7PHDXC-M7PK/1DXC-M7P/1DXC-M7PH/1DXC-327PKDXC-327PLDXC-327PHDXC-327APKDXC-327APLDXC-327AHDXC-537PKDXC-537PLDXC-537PHDXC-537APKDXC-537APLDXC-537APHEVW-537PKEVW-327PKDXC-637PDXC-637PKDXC-637PLDXC-637PHPVW-637PKPVW-637PLDXC-D30PFDXC-D30PKDXC-D30PLDXC-D30PHDSR-130PFDSR-130PKDSR-130PLPVW-D30PFPVW-D30PKPVW-D30PLDXC-327BPFDXC-327BPKDXC-327BPLDXC-327BPHDXC-D30WSPDXC-D35PHDXC-D35PLDXC-D35PKDXC-D35WSPLDSR-135PL | DXF-3000CEDXF-325CEDXF-501CEDXF-M3CEDXF-M7CEDXF-40CEDXF-40ACEDXF-50CEDXF-601CEDXF-40BCEDXF-50BCEDXF-701CEDXF-WSCEDXF-801CEHDVF-C30W | CCU-M3PCCU-M5PCCU-M7PCUU-M5AP | RM-M7GRM-M7E | — | CA-325PCA-325APCA-325BCA-327PCA-537PCA-511CA-512PCA-513VCT-U14 |"; std::string sentence = "Sony | DXC-M7PKDXC-M7PDXC-M7PHDXC-M7PK/1DXC-M7P/1DXC-M7PH/1DXC-327PKDXC-327PLDXC-327PHDXC-327APKDXC-327APLDXC-327AHDXC-537PKDXC-537PLDXC-537PHDXC-537APKDXC-537APLDXC-537APHEVW-537PKEVW-327PKDXC-637PDXC-637PKDXC-637PLDXC-637PHPVW-637PKPVW-637PLDXC-D30PFDXC-D30PKDXC-D30PLDXC-D30PHDSR-130PFDSR-130PKDSR-130PLPVW-D30PFPVW-D30PKPVW-D30PLDXC-327BPFDXC-327BPKDXC-327BPLDXC-327BPHDXC-D30WSPDXC-D35PHDXC-D35PLDXC-D35PKDXC-D35WSPLDSR-135PL | DXF-3000CEDXF-325CEDXF-501CEDXF-M3CEDXF-M7CEDXF-40CEDXF-40ACEDXF-50CEDXF-601CEDXF-40BCEDXF-50BCEDXF-701CEDXF-WSCEDXF-801CEHDVF-C30W | CCU-M3PCCU-M5PCCU-M7PCUU-M5AP | RM-M7GRM-M7E | — | CA-325PCA-325APCA-325BCA-327PCA-537PCA-511CA-512PCA-513VCT-U14 |";
BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"sony dxc mne_numberpkdxc mne_numberpdxc mne_numberphdxc mne_numberpk ne_numberdxc mne_numberp ne_numberdxc mne_numberph ne_numberdxc ne_numberpkdxc ne_numberpldxc ne_numberphdxc ne_numberapkdxc ne_numberapldxc ne_numberahdxc ne_numberpkdxc ne_numberpldxc ne_numberphdxc ne_numberapkdxc ne_numberapldxc ne_numberaphevw ne_numberpkevw ne_numberpkdxc ne_numberpdxc ne_numberpkdxc ne_numberpldxc ne_numberphpvw ne_numberpkpvw ne_numberpldxc dne_numberpfdxc dne_numberpkdxc dne_numberpldxc dne_numberphdsr ne_numberpfdsr ne_numberpkdsr ne_numberplpvw dne_numberpfpvw dne_numberpkpvw dne_numberpldxc ne_numberbpfdxc ne_numberbpkdxc ne_numberbpldxc ne_numberbphdxc dne_numberwspdxc dne_numberphdxc dne_numberpldxc dne_numberpkdxc dne_numberwspldsr ne_numberpl dxf ne_numbercedxf ne_numbercedxf ne_numbercedxf mne_numbercedxf mne_numbercedxf ne_numbercedxf ne_numberacedxf ne_numbercedxf ne_numbercedxf ne_numberbcedxf ne_numberbcedxf ne_numbercedxf wscedxf ne_numbercehdvf cne_numberw ccu mne_numberpccu mne_numberpccu mne_numberpcuu mne_numberap rm mne_numbergrm mne_numbere — ca ne_numberpca ne_numberapca ne_numberbca ne_numberpca ne_numberpca ne_numberca ne_numberpca ne_numbervct une_number "); BOOST_CHECK_EQUAL(anonymizer.anonymize(sentence),"sony dxc mne_numberpkdxc mne_numberpdxc mne_numberphdxc mne_numberpk ne_numberdxc mne_numberp ne_numberdxc mne_numberph ne_numberdxc ne_numberpkdxc ne_numberpldxc ne_numberphdxc ne_numberapkdxc ne_numberapldxc ne_numberahdxc ne_numberpkdxc ne_numberpldxc ne_numberphdxc ne_numberapkdxc ne_numberapldxc ne_numberaphevw ne_numberpkevw ne_numberpkdxc ne_numberpdxc ne_numberpkdxc ne_numberpldxc ne_numberphpvw ne_numberpkpvw ne_numberpldxc dne_numberpfdxc dne_numberpkdxc dne_numberpldxc dne_numberphdsr ne_numberpfdsr ne_numberpkdsr ne_numberplpvw dne_numberpfpvw dne_numberpkpvw dne_numberpldxc ne_numberbpfdxc ne_numberbpkdxc ne_numberbpldxc ne_numberbphdxc dne_numberwspdxc dne_numberphdxc dne_numberpldxc dne_numberpkdxc dne_numberwspldsr ne_numberpl dxf ne_numbercedxf ne_numbercedxf ne_numbercedxf mne_numbercedxf mne_numbercedxf ne_numbercedxf ne_numberacedxf ne_numbercedxf ne_numbercedxf ne_numberbcedxf ne_numberbcedxf ne_numbercedxf wscedxf ne_numbercehdvf cne_numberw ccu mne_numberpccu mne_numberpccu mne_numberpcuu mne_numberap rm mne_numbergrm mne_numbere — ca ne_numberpca ne_numberapca ne_numberbca ne_numberpca ne_numberpca ne_numberca ne_numberpca ne_numbervct une_number ");
} }

View File

@ -2,19 +2,17 @@
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
#include "concordia/common/text_utils.hpp" #include "concordia/common/text_utils.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(text_utils) BOOST_AUTO_TEST_SUITE(text_utils)
BOOST_AUTO_TEST_CASE( ToLower ) BOOST_AUTO_TEST_CASE( ToLower )
{ {
string str = "ZAŻÓŁĆ GĘŚLĄ JAŹŃ"; std::string str = "ZAŻÓŁĆ GĘŚLĄ JAŹŃ";
BOOST_CHECK_EQUAL(TextUtils::getInstance().toLowerCase(str),"zażółć gęślą jaźń"); BOOST_CHECK_EQUAL(TextUtils::getInstance().toLowerCase(str),"zażółć gęślą jaźń");
} }
BOOST_AUTO_TEST_CASE( ToUpper ) BOOST_AUTO_TEST_CASE( ToUpper )
{ {
string str = "zażółć gęślą jaźń"; std::string str = "zażółć gęślą jaźń";
BOOST_CHECK_EQUAL(TextUtils::getInstance().toUpperCase(str),"ZAŻÓŁĆ GĘŚLĄ JAŹŃ"); BOOST_CHECK_EQUAL(TextUtils::getInstance().toUpperCase(str),"ZAŻÓŁĆ GĘŚLĄ JAŹŃ");
} }

View File

@ -3,8 +3,6 @@
#include "concordia/tm_matches.hpp" #include "concordia/tm_matches.hpp"
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(tm_matches) BOOST_AUTO_TEST_SUITE(tm_matches)
BOOST_AUTO_TEST_CASE( TmMatchesSimpleScore1 ) BOOST_AUTO_TEST_CASE( TmMatchesSimpleScore1 )

View File

@ -5,23 +5,20 @@
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
#include "divsufsort.h" #include "divsufsort.h"
#include <string>
using namespace std;
BOOST_AUTO_TEST_SUITE(utils) BOOST_AUTO_TEST_SUITE(utils)
BOOST_AUTO_TEST_CASE( WriteReadSingleCharacter ) BOOST_AUTO_TEST_CASE( WriteReadSingleCharacter )
{ {
ofstream testFileOutput; std::ofstream testFileOutput;
testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(), testFileOutput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),
ios::out|ios::binary); std::ios::out|std::ios::binary);
INDEX_CHARACTER_TYPE testCharacter = 123456789; //in hex: 75BCD15 INDEX_CHARACTER_TYPE testCharacter = 123456789; //in hex: 75BCD15
Utils::writeIndexCharacter(testFileOutput,testCharacter); Utils::writeIndexCharacter(testFileOutput,testCharacter);
testFileOutput.close(); testFileOutput.close();
ifstream testFileInput; std::ifstream testFileInput;
testFileInput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),ios::in|ios::binary); testFileInput.open(TestResourcesManager::getTestFilePath("temp","temp_file.bin").c_str(),std::ios::in|std::ios::binary);
INDEX_CHARACTER_TYPE retrievedCharacter = Utils::readIndexCharacter(testFileInput); INDEX_CHARACTER_TYPE retrievedCharacter = Utils::readIndexCharacter(testFileInput);
BOOST_CHECK_EQUAL(retrievedCharacter, testCharacter); BOOST_CHECK_EQUAL(retrievedCharacter, testCharacter);
testFileInput.close(); testFileInput.close();
@ -31,7 +28,7 @@ BOOST_AUTO_TEST_CASE( WriteReadSingleCharacter )
BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray ) BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray )
{ {
vector<INDEX_CHARACTER_TYPE> hash; std::vector<INDEX_CHARACTER_TYPE> hash;
hash.push_back(123456789); // in hex: 75BCD15 hash.push_back(123456789); // in hex: 75BCD15
// in memory: 15 cd 5b 07 // in memory: 15 cd 5b 07
// in memory DEC: 21 205 91 7 // in memory DEC: 21 205 91 7
@ -41,13 +38,13 @@ BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray )
// in memory DEC: 177 104 222 58 // in memory DEC: 177 104 222 58
sauchar_t * dataArray = Utils::indexVectorToSaucharArray(hash); sauchar_t * dataArray = Utils::indexVectorToSaucharArray(hash);
vector<INDEX_CHARACTER_TYPE> result; std::vector<INDEX_CHARACTER_TYPE> result;
for (int i=0;i<8;i++) { for (int i=0;i<8;i++) {
INDEX_CHARACTER_TYPE a = dataArray[i]; INDEX_CHARACTER_TYPE a = dataArray[i];
result.push_back(a); result.push_back(a);
} }
vector<INDEX_CHARACTER_TYPE> expected; std::vector<INDEX_CHARACTER_TYPE> expected;
expected.push_back(21); expected.push_back(21);
expected.push_back(205); expected.push_back(205);
expected.push_back(91); expected.push_back(91);
@ -62,7 +59,7 @@ BOOST_AUTO_TEST_CASE( IndexVectorToSaucharArray )
BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector ) BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector )
{ {
vector<INDEX_CHARACTER_TYPE> hash; std::vector<INDEX_CHARACTER_TYPE> hash;
hash.push_back(123456789); // in hex: 75BCD15 hash.push_back(123456789); // in hex: 75BCD15
// in memory: 15 cd 5b 07 // in memory: 15 cd 5b 07
// in memory DEC: 21 205 91 7 // in memory DEC: 21 205 91 7
@ -70,9 +67,9 @@ BOOST_AUTO_TEST_CASE( IndexVectorToSaucharVector )
hash.push_back(987654321); // in hex: 3ADE68B1 hash.push_back(987654321); // in hex: 3ADE68B1
// in memory: b1 68 de 3a // in memory: b1 68 de 3a
// in memory DEC: 177 104 222 58 // in memory DEC: 177 104 222 58
vector<sauchar_t> result = Utils::indexVectorToSaucharVector(hash); std::vector<sauchar_t> result = Utils::indexVectorToSaucharVector(hash);
vector<sauchar_t> expected; std::vector<sauchar_t> expected;
expected.push_back(21); expected.push_back(21);
expected.push_back(205); expected.push_back(205);
expected.push_back(91); expected.push_back(91);

View File

@ -1,10 +1,6 @@
#include "tests/unit-tests/unit_tests_globals.hpp" #include "tests/unit-tests/unit_tests_globals.hpp"
#include "concordia/word_map.hpp" #include "concordia/word_map.hpp"
#include <string>
using namespace std;
BOOST_AUTO_TEST_SUITE(word_map) BOOST_AUTO_TEST_SUITE(word_map)
BOOST_AUTO_TEST_CASE( WordCodeTest ) BOOST_AUTO_TEST_CASE( WordCodeTest )

View File

@ -54,7 +54,7 @@ void TmMatches::addPatternInterval(int start, int end) {
} }
bool TmMatches::_alreadyIntersects( bool TmMatches::_alreadyIntersects(
const vector<Interval> & intervalList, const std::vector<Interval> & intervalList,
int start, int end) { int start, int end) {
Interval tempInterval(start, end); Interval tempInterval(start, end);
BOOST_FOREACH(Interval oldInterval, intervalList) { BOOST_FOREACH(Interval oldInterval, intervalList) {
@ -66,7 +66,7 @@ bool TmMatches::_alreadyIntersects(
} }
double TmMatches::_getLogarithmicOverlay( double TmMatches::_getLogarithmicOverlay(
const vector<Interval> & intervalList, const std::vector<Interval> & intervalList,
SUFFIX_MARKER_TYPE sentenceSize, SUFFIX_MARKER_TYPE sentenceSize,
double k) { double k) {
double overlayScore = 0; double overlayScore = 0;

View File

@ -14,12 +14,10 @@
*/ */
using namespace std;
class TmMatches { class TmMatches {
public: public:
TmMatches(); TmMatches();
TmMatches(const SUFFIX_MARKER_TYPE exampleId, TmMatches(const SUFFIX_MARKER_TYPE exampleId,
const SUFFIX_MARKER_TYPE exampleSize, const SUFFIX_MARKER_TYPE exampleSize,
const SUFFIX_MARKER_TYPE patternSize); const SUFFIX_MARKER_TYPE patternSize);
@ -32,11 +30,11 @@ public:
return _score; return _score;
} }
vector<Interval> getExampleIntervals() const { std::vector<Interval> getExampleIntervals() const {
return _exampleMatchedRegions; return _exampleMatchedRegions;
} }
vector<Interval> getPatternIntervals() const { std::vector<Interval> getPatternIntervals() const {
return _patternMatchedRegions; return _patternMatchedRegions;
} }
@ -53,18 +51,18 @@ public:
void addPatternInterval(int start, int end); void addPatternInterval(int start, int end);
private: private:
bool _alreadyIntersects(const vector<Interval> & intervalList, bool _alreadyIntersects(const std::vector<Interval> & intervalList,
int start, int end); int start, int end);
double _getLogarithmicOverlay(const vector<Interval> & intervalList, double _getLogarithmicOverlay(const std::vector<Interval> & intervalList,
SUFFIX_MARKER_TYPE sentenceSize, SUFFIX_MARKER_TYPE sentenceSize,
double k); double k);
SUFFIX_MARKER_TYPE _exampleId; SUFFIX_MARKER_TYPE _exampleId;
vector<Interval> _exampleMatchedRegions; std::vector<Interval> _exampleMatchedRegions;
vector<Interval> _patternMatchedRegions; std::vector<Interval> _patternMatchedRegions;
SUFFIX_MARKER_TYPE _patternSize; SUFFIX_MARKER_TYPE _patternSize;

View File

@ -9,7 +9,7 @@ WordMap::WordMap() throw(ConcordiaException) {
WordMap::~WordMap() { WordMap::~WordMap() {
} }
INDEX_CHARACTER_TYPE WordMap::getWordCode(const string & word) INDEX_CHARACTER_TYPE WordMap::getWordCode(const std::string & word)
throw(ConcordiaException) { throw(ConcordiaException) {
if (_map.find(word) == _map.end()) { if (_map.find(word) == _map.end()) {
if (_nextFree == INDEX_CHARACTER_TYPE_MAX_VALUE) { if (_nextFree == INDEX_CHARACTER_TYPE_MAX_VALUE) {

View File

@ -14,8 +14,6 @@
*/ */
using namespace std;
class WordMap { class WordMap {
public: public:
explicit WordMap() throw(ConcordiaException); explicit WordMap() throw(ConcordiaException);
@ -24,8 +22,8 @@ public:
*/ */
virtual ~WordMap(); virtual ~WordMap();
INDEX_CHARACTER_TYPE getWordCode(const string & word) INDEX_CHARACTER_TYPE getWordCode(const std::string & word)
throw(ConcordiaException); throw(ConcordiaException);
private: private:
friend class boost::serialization::access; friend class boost::serialization::access;
@ -37,7 +35,7 @@ private:
ar & _nextFree; ar & _nextFree;
} }
map<string, INDEX_CHARACTER_TYPE> _map; std::map<std::string, INDEX_CHARACTER_TYPE> _map;
INDEX_CHARACTER_TYPE _nextFree; INDEX_CHARACTER_TYPE _nextFree;
}; };

View File

@ -4,24 +4,24 @@
#define CONCORDIA_TAGSET_DIRECTORY "concordia-tagset" #define CONCORDIA_TAGSET_DIRECTORY "concordia-tagset"
#define CONCORDIA_CONFIG_DIRECTORY "concordia-config" #define CONCORDIA_CONFIG_DIRECTORY "concordia-config"
string TestResourcesManager::getPuddleFilePath(const string & filename) { std::string TestResourcesManager::getPuddleFilePath(const std::string & filename) {
string result = string(TEST_RESOURCES_DIRECTORY); std::string result = std::string(TEST_RESOURCES_DIRECTORY);
return result + "/" + PUDDLE_TEST_DIRECTORY + "/" + filename; return result + "/" + PUDDLE_TEST_DIRECTORY + "/" + filename;
} }
string TestResourcesManager::getTestConcordiaConfigFilePath(const string & filename) { std::string TestResourcesManager::getTestConcordiaConfigFilePath(const std::string & filename) {
string result = string(TEST_RESOURCES_DIRECTORY); std::string result = std::string(TEST_RESOURCES_DIRECTORY);
return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename; return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename;
} }
string TestResourcesManager::getProdConcordiaConfigFilePath(const string & filename) { std::string TestResourcesManager::getProdConcordiaConfigFilePath(const std::string & filename) {
string result = string(PROD_RESOURCES_DIRECTORY); std::string result = std::string(PROD_RESOURCES_DIRECTORY);
return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename; return result + "/" + CONCORDIA_CONFIG_DIRECTORY + "/" + filename;
} }
string TestResourcesManager::getTestFilePath(const string & module, const string & filename) { std::string TestResourcesManager::getTestFilePath(const std::string & module, const std::string & filename) {
string result = string(TEST_RESOURCES_DIRECTORY); std::string result = std::string(TEST_RESOURCES_DIRECTORY);
return result + "/" + module + "/" + filename; return result + "/" + module + "/" + filename;
} }

View File

@ -6,17 +6,15 @@
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
using namespace std;
class TestResourcesManager { class TestResourcesManager {
public: public:
static string getPuddleFilePath(const string & filename); static std::string getPuddleFilePath(const std::string & filename);
static string getTestConcordiaConfigFilePath(const string & filename); static std::string getTestConcordiaConfigFilePath(const std::string & filename);
static string getProdConcordiaConfigFilePath(const string & filename); static std::string getProdConcordiaConfigFilePath(const std::string & filename);
static string getTestFilePath(const string & module, const string & filename); static std::string getTestFilePath(const std::string & module, const std::string & filename);
}; };