working hash generator

This commit is contained in:
rjawor 2013-11-14 15:44:50 +01:00
parent 3aa4091e4d
commit b238995a16
6 changed files with 98 additions and 28 deletions

View File

@ -3,18 +3,18 @@
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/serialization/map.hpp>
#include <fstream>
HashGenerator::HashGenerator(const string & wordMapFilename) throw(ConcordiaException) {
_wordMapFilename = wordMapFilename;
HashGenerator::HashGenerator(const string & wordMapFilename)
throw(ConcordiaException) :
_wordMapFilename(wordMapFilename),
_wordMap(boost::shared_ptr<WordMap>(new WordMap)) {
if (boost::filesystem::exists(_wordMapFilename)) {
ifstream ifs(_wordMapFilename.c_str(), std::ios::binary);
boost::archive::binary_iarchive ia(ifs);
ia >> _wordMap;
}
boost::shared_ptr<WordMap> restoredWordMap(new WordMap);
ia >> *_wordMap;
}
}
HashGenerator::~HashGenerator() {
@ -24,18 +24,21 @@ vector<int> HashGenerator::generateHash(const string & sentence) {
vector<int> result;
vector<string> tokenTexts;
boost::split(tokenTexts, sentence, boost::is_any_of(" "));
for(vector<string>::iterator it = tokenTexts.begin(); it != tokenTexts.end(); ++it) {
for (vector<string>::iterator it = tokenTexts.begin();
it != tokenTexts.end(); ++it) {
string token = *it;
}
int code = _wordMap->getWordCode(token);
result.push_back(code);
}
return result;
}
void HashGenerator::serializeWordMap() {
ofstream ofs(_wordMapFilename.c_str(), std::ios::binary);
boost::archive::binary_oarchive oa(ofs);
oa << _wordMap;
oa << *_wordMap;
}

View File

@ -4,6 +4,8 @@
#include <string>
#include <map>
#include <vector>
#include <boost/shared_ptr.hpp>
#include "concordia/word_map.hpp"
#include "concordia/concordia_exception.hpp"
/*!
@ -15,8 +17,9 @@ using namespace std;
class HashGenerator {
public:
explicit HashGenerator(const string & wordMapFilename) throw(ConcordiaException);
explicit HashGenerator(const string & wordMapFilename)
throw(ConcordiaException);
/*! Destructor.
*/
virtual ~HashGenerator();
@ -26,11 +29,9 @@ public:
void serializeWordMap();
private:
map<string,int> _wordMap;
string _wordMapFilename;
boost::shared_ptr<WordMap> _wordMap;
string _wordMapFilename;
};
#endif

View File

@ -2,6 +2,7 @@ add_library(concordia-tests
test_concordia.cpp
test_concordia_config.cpp
test_word_map.cpp
test_hash_generator.cpp
)
target_link_libraries(concordia-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)

View File

@ -0,0 +1,57 @@
#include <boost/filesystem.hpp>
#include "tests/unit-tests/unit_tests_globals.hpp"
#include <string>
#include "concordia/hash_generator.hpp"
#define TEST_WORD_MAP_PATH "/tmp/test_word_map.bin"
using namespace std;
BOOST_AUTO_TEST_SUITE(hash_generator)
BOOST_AUTO_TEST_CASE( SimpleHashTest )
{
if (boost::filesystem::exists(TEST_WORD_MAP_PATH)) {
boost::filesystem::remove(TEST_WORD_MAP_PATH);
}
HashGenerator hashGenerator = HashGenerator(TEST_WORD_MAP_PATH);
vector<int> hash = hashGenerator.generateHash("Ala ma kota");
vector<int> expected;
expected.push_back(0);
expected.push_back(1);
expected.push_back(2);
BOOST_CHECK_EQUAL_COLLECTIONS(hash.begin(), hash.end(), expected.begin(), expected.end());
}
BOOST_AUTO_TEST_CASE( HashSerializationTest )
{
if (boost::filesystem::exists(TEST_WORD_MAP_PATH)) {
boost::filesystem::remove(TEST_WORD_MAP_PATH);
}
HashGenerator hashGenerator1 = HashGenerator(TEST_WORD_MAP_PATH);
vector<int> hash1 = hashGenerator1.generateHash("Ala ma kota");
vector<int> expected1;
expected1.push_back(0);
expected1.push_back(1);
expected1.push_back(2);
BOOST_CHECK_EQUAL_COLLECTIONS(hash1.begin(), hash1.end(), expected1.begin(), expected1.end());
hashGenerator1.serializeWordMap();
HashGenerator hashGenerator2 = HashGenerator(TEST_WORD_MAP_PATH);
vector<int> hash2 = hashGenerator2.generateHash("Ala ma psa");
vector<int> expected2;
expected2.push_back(0);
expected2.push_back(1);
expected2.push_back(3);
BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end());
boost::filesystem::remove(TEST_WORD_MAP_PATH);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -9,16 +9,14 @@ WordMap::~WordMap() {
}
int WordMap::getWordCode(const string & word) {
if (_map.find(word) == _map.end() ) {
if (_map.find(word) == _map.end()) {
int newCode = _nextFree;
_map[word] = newCode;
_nextFree++;
return newCode;
} else {
return _map[word];
return _map[word];
}
}

View File

@ -4,10 +4,13 @@
#include <string>
#include <map>
#include "concordia/concordia_exception.hpp"
#include <boost/archive/text_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <boost/serialization/map.hpp>
/*!
Class for generating a sentence hash.
Class representing dictionary for word to int encoding.
*/
@ -16,7 +19,7 @@ using namespace std;
class WordMap {
public:
explicit WordMap() throw(ConcordiaException);
/*! Destructor.
*/
virtual ~WordMap();
@ -24,11 +27,18 @@ public:
int getWordCode(const string & word);
private:
map<string,int> _map;
int _nextFree;
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive & ar, const unsigned int version) {
ar & _map;
ar & _nextFree;
}
map<string, int> _map;
int _nextFree;
};
#endif