trimming anonymized sentence
Former-commit-id: 316b76717e4075e466828c628e064076d39481c5
This commit is contained in:
parent
2a3c7eddfe
commit
f83aaef4ed
2
.gitignore
vendored
2
.gitignore
vendored
@ -6,4 +6,6 @@ concordia/common/config.hpp
|
||||
tests/resources/concordia-config/concordia.cfg
|
||||
tests/resources/temp
|
||||
prod/resources/temp
|
||||
prod/resources/text-files/jrc_smaller.txt
|
||||
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
echo "CONCORDIA SEARCHER: searching for pattern: \"Parlamentu Europejskiego\""
|
||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "Parlamentu Europejskiego" -n
|
||||
echo "CONCORDIA SEARCHER: searching for pattern: \"Dostęp do zatrudnienia\""
|
||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "Dostęp do zatrudnienia" -n
|
||||
|
||||
./build/concordia-console/concordia-console -c prod/resources/concordia-config/concordia.cfg -s "$1"
|
||||
|
||||
|
@ -1,8 +1,10 @@
|
||||
#include "concordia/hash_generator.hpp"
|
||||
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <boost/archive/binary_oarchive.hpp>
|
||||
#include <boost/archive/binary_iarchive.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include <fstream>
|
||||
|
||||
HashGenerator::HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
|
||||
@ -44,10 +46,10 @@ boost::shared_ptr<vector<INDEX_CHARACTER_TYPE> > HashGenerator::generateHash(
|
||||
boost::shared_ptr<vector<string> >
|
||||
HashGenerator::generateTokenVector(const string & sentence) {
|
||||
string anonymizedSentence = _sentenceAnonymizer->anonymize(sentence);
|
||||
boost::trim(anonymizedSentence);
|
||||
boost::shared_ptr<vector<string> > tokenTexts(new vector<string>());
|
||||
boost::split(*tokenTexts, anonymizedSentence, boost::is_any_of(" \t\r\n"),
|
||||
boost::algorithm::token_compress_on);
|
||||
|
||||
return tokenTexts;
|
||||
}
|
||||
|
||||
|
@ -123,9 +123,30 @@ BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch2 )
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult2.size(), 2);
|
||||
BOOST_CHECK_EQUAL(searchResult2.at(0).getId(), 202);
|
||||
BOOST_CHECK_EQUAL(searchResult2.at(0).getOffset(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult2.at(0).getOffset(), 0);
|
||||
BOOST_CHECK_EQUAL(searchResult2.at(1).getId(), 312);
|
||||
BOOST_CHECK_EQUAL(searchResult2.at(1).getOffset(), 1);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( ConcordiaSimpleSearch3 )
|
||||
{
|
||||
Concordia concordia = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
boost::ptr_vector<Example> testExamples;
|
||||
testExamples.push_back(new Example("2. Ma on w szczególności prawo do podjęcia zatrudnienia dostępnego na terytorium innego Państwa Członkowskiego z takim samym pierwszeństwem, z jakiego korzystają obywatele tego państwa.",312));
|
||||
testExamples.push_back(new Example("czy xjest żółte otwarte",202));
|
||||
concordia.addAllExamples(testExamples);
|
||||
|
||||
Concordia concordia2 = Concordia(TestResourcesManager::getTestConcordiaConfigFilePath("concordia.cfg"));
|
||||
boost::ptr_vector<SubstringOccurence> searchResult1 = concordia2.simpleSearch("on w szczególności prawo do podjęcia");
|
||||
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_WORD_MAP));
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_MARKERS));
|
||||
boost::filesystem::remove(TestResourcesManager::getTestFilePath("temp",TEMP_HASHED_INDEX));
|
||||
|
||||
BOOST_CHECK_EQUAL(searchResult1.size(), 1);
|
||||
BOOST_CHECK_EQUAL(searchResult1.at(0).getId(), 312);
|
||||
BOOST_CHECK_EQUAL(searchResult1.at(0).getOffset(), 1);
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
@ -43,8 +43,8 @@ BOOST_AUTO_TEST_CASE( TooLongHashTest )
|
||||
HashGenerator hashGenerator = HashGenerator(config);
|
||||
|
||||
stringstream ss;
|
||||
for (int i=0;i<256;i++) {
|
||||
ss << "a" << i << " ";
|
||||
for (int i=0;i<257;i++) {
|
||||
ss << "xx" << i << " ";
|
||||
}
|
||||
|
||||
string longSentence = ss.str();
|
||||
|
Loading…
Reference in New Issue
Block a user