working hash generator
This commit is contained in:
parent
3aa4091e4d
commit
b238995a16
@ -3,17 +3,17 @@
|
||||
#include <boost/archive/binary_oarchive.hpp>
|
||||
#include <boost/archive/binary_iarchive.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/serialization/map.hpp>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
HashGenerator::HashGenerator(const string & wordMapFilename) throw(ConcordiaException) {
|
||||
_wordMapFilename = wordMapFilename;
|
||||
HashGenerator::HashGenerator(const string & wordMapFilename)
|
||||
throw(ConcordiaException) :
|
||||
_wordMapFilename(wordMapFilename),
|
||||
_wordMap(boost::shared_ptr<WordMap>(new WordMap)) {
|
||||
if (boost::filesystem::exists(_wordMapFilename)) {
|
||||
ifstream ifs(_wordMapFilename.c_str(), std::ios::binary);
|
||||
boost::archive::binary_iarchive ia(ifs);
|
||||
ia >> _wordMap;
|
||||
|
||||
boost::shared_ptr<WordMap> restoredWordMap(new WordMap);
|
||||
ia >> *_wordMap;
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,8 +25,11 @@ vector<int> HashGenerator::generateHash(const string & sentence) {
|
||||
vector<string> tokenTexts;
|
||||
boost::split(tokenTexts, sentence, boost::is_any_of(" "));
|
||||
|
||||
for(vector<string>::iterator it = tokenTexts.begin(); it != tokenTexts.end(); ++it) {
|
||||
for (vector<string>::iterator it = tokenTexts.begin();
|
||||
it != tokenTexts.end(); ++it) {
|
||||
string token = *it;
|
||||
int code = _wordMap->getWordCode(token);
|
||||
result.push_back(code);
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -35,7 +38,7 @@ vector<int> HashGenerator::generateHash(const string & sentence) {
|
||||
void HashGenerator::serializeWordMap() {
|
||||
ofstream ofs(_wordMapFilename.c_str(), std::ios::binary);
|
||||
boost::archive::binary_oarchive oa(ofs);
|
||||
oa << _wordMap;
|
||||
oa << *_wordMap;
|
||||
}
|
||||
|
||||
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include "concordia/word_map.hpp"
|
||||
#include "concordia/concordia_exception.hpp"
|
||||
|
||||
/*!
|
||||
@ -15,7 +17,8 @@ using namespace std;
|
||||
|
||||
class HashGenerator {
|
||||
public:
|
||||
explicit HashGenerator(const string & wordMapFilename) throw(ConcordiaException);
|
||||
explicit HashGenerator(const string & wordMapFilename)
|
||||
throw(ConcordiaException);
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
@ -26,11 +29,9 @@ public:
|
||||
void serializeWordMap();
|
||||
|
||||
private:
|
||||
|
||||
map<string,int> _wordMap;
|
||||
boost::shared_ptr<WordMap> _wordMap;
|
||||
|
||||
string _wordMapFilename;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -2,6 +2,7 @@ add_library(concordia-tests
|
||||
test_concordia.cpp
|
||||
test_concordia_config.cpp
|
||||
test_word_map.cpp
|
||||
test_hash_generator.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(concordia-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)
|
||||
|
57
concordia/t/test_hash_generator.cpp
Normal file
57
concordia/t/test_hash_generator.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
#include <boost/filesystem.hpp>
|
||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||
#include <string>
|
||||
|
||||
#include "concordia/hash_generator.hpp"
|
||||
|
||||
#define TEST_WORD_MAP_PATH "/tmp/test_word_map.bin"
|
||||
|
||||
using namespace std;
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(hash_generator)
|
||||
|
||||
BOOST_AUTO_TEST_CASE( SimpleHashTest )
|
||||
{
|
||||
if (boost::filesystem::exists(TEST_WORD_MAP_PATH)) {
|
||||
boost::filesystem::remove(TEST_WORD_MAP_PATH);
|
||||
}
|
||||
|
||||
HashGenerator hashGenerator = HashGenerator(TEST_WORD_MAP_PATH);
|
||||
|
||||
vector<int> hash = hashGenerator.generateHash("Ala ma kota");
|
||||
vector<int> expected;
|
||||
expected.push_back(0);
|
||||
expected.push_back(1);
|
||||
expected.push_back(2);
|
||||
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(hash.begin(), hash.end(), expected.begin(), expected.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( HashSerializationTest )
|
||||
{
|
||||
if (boost::filesystem::exists(TEST_WORD_MAP_PATH)) {
|
||||
boost::filesystem::remove(TEST_WORD_MAP_PATH);
|
||||
}
|
||||
|
||||
HashGenerator hashGenerator1 = HashGenerator(TEST_WORD_MAP_PATH);
|
||||
vector<int> hash1 = hashGenerator1.generateHash("Ala ma kota");
|
||||
vector<int> expected1;
|
||||
expected1.push_back(0);
|
||||
expected1.push_back(1);
|
||||
expected1.push_back(2);
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(hash1.begin(), hash1.end(), expected1.begin(), expected1.end());
|
||||
|
||||
hashGenerator1.serializeWordMap();
|
||||
|
||||
HashGenerator hashGenerator2 = HashGenerator(TEST_WORD_MAP_PATH);
|
||||
vector<int> hash2 = hashGenerator2.generateHash("Ala ma psa");
|
||||
vector<int> expected2;
|
||||
expected2.push_back(0);
|
||||
expected2.push_back(1);
|
||||
expected2.push_back(3);
|
||||
BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end());
|
||||
|
||||
boost::filesystem::remove(TEST_WORD_MAP_PATH);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
@ -9,8 +9,7 @@ WordMap::~WordMap() {
|
||||
}
|
||||
|
||||
int WordMap::getWordCode(const string & word) {
|
||||
|
||||
if (_map.find(word) == _map.end() ) {
|
||||
if (_map.find(word) == _map.end()) {
|
||||
int newCode = _nextFree;
|
||||
_map[word] = newCode;
|
||||
_nextFree++;
|
||||
@ -18,7 +17,6 @@ int WordMap::getWordCode(const string & word) {
|
||||
} else {
|
||||
return _map[word];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -4,10 +4,13 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "concordia/concordia_exception.hpp"
|
||||
#include <boost/archive/text_oarchive.hpp>
|
||||
#include <boost/archive/text_iarchive.hpp>
|
||||
#include <boost/serialization/map.hpp>
|
||||
|
||||
|
||||
/*!
|
||||
Class for generating a sentence hash.
|
||||
Class representing dictionary for word to int encoding.
|
||||
|
||||
*/
|
||||
|
||||
@ -24,11 +27,18 @@ public:
|
||||
int getWordCode(const string & word);
|
||||
|
||||
private:
|
||||
friend class boost::serialization::access;
|
||||
|
||||
map<string,int> _map;
|
||||
template<class Archive>
|
||||
|
||||
void serialize(Archive & ar, const unsigned int version) {
|
||||
ar & _map;
|
||||
ar & _nextFree;
|
||||
}
|
||||
|
||||
map<string, int> _map;
|
||||
|
||||
int _nextFree;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user