working hash generator
This commit is contained in:
parent
3aa4091e4d
commit
b238995a16
@ -3,17 +3,17 @@
|
|||||||
#include <boost/archive/binary_oarchive.hpp>
|
#include <boost/archive/binary_oarchive.hpp>
|
||||||
#include <boost/archive/binary_iarchive.hpp>
|
#include <boost/archive/binary_iarchive.hpp>
|
||||||
#include <boost/algorithm/string.hpp>
|
#include <boost/algorithm/string.hpp>
|
||||||
#include <boost/serialization/map.hpp>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
|
||||||
|
HashGenerator::HashGenerator(const string & wordMapFilename)
|
||||||
HashGenerator::HashGenerator(const string & wordMapFilename) throw(ConcordiaException) {
|
throw(ConcordiaException) :
|
||||||
_wordMapFilename = wordMapFilename;
|
_wordMapFilename(wordMapFilename),
|
||||||
|
_wordMap(boost::shared_ptr<WordMap>(new WordMap)) {
|
||||||
if (boost::filesystem::exists(_wordMapFilename)) {
|
if (boost::filesystem::exists(_wordMapFilename)) {
|
||||||
ifstream ifs(_wordMapFilename.c_str(), std::ios::binary);
|
ifstream ifs(_wordMapFilename.c_str(), std::ios::binary);
|
||||||
boost::archive::binary_iarchive ia(ifs);
|
boost::archive::binary_iarchive ia(ifs);
|
||||||
ia >> _wordMap;
|
boost::shared_ptr<WordMap> restoredWordMap(new WordMap);
|
||||||
|
ia >> *_wordMap;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -25,8 +25,11 @@ vector<int> HashGenerator::generateHash(const string & sentence) {
|
|||||||
vector<string> tokenTexts;
|
vector<string> tokenTexts;
|
||||||
boost::split(tokenTexts, sentence, boost::is_any_of(" "));
|
boost::split(tokenTexts, sentence, boost::is_any_of(" "));
|
||||||
|
|
||||||
for(vector<string>::iterator it = tokenTexts.begin(); it != tokenTexts.end(); ++it) {
|
for (vector<string>::iterator it = tokenTexts.begin();
|
||||||
|
it != tokenTexts.end(); ++it) {
|
||||||
string token = *it;
|
string token = *it;
|
||||||
|
int code = _wordMap->getWordCode(token);
|
||||||
|
result.push_back(code);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
@ -35,7 +38,7 @@ vector<int> HashGenerator::generateHash(const string & sentence) {
|
|||||||
void HashGenerator::serializeWordMap() {
|
void HashGenerator::serializeWordMap() {
|
||||||
ofstream ofs(_wordMapFilename.c_str(), std::ios::binary);
|
ofstream ofs(_wordMapFilename.c_str(), std::ios::binary);
|
||||||
boost::archive::binary_oarchive oa(ofs);
|
boost::archive::binary_oarchive oa(ofs);
|
||||||
oa << _wordMap;
|
oa << *_wordMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <boost/shared_ptr.hpp>
|
||||||
|
#include "concordia/word_map.hpp"
|
||||||
#include "concordia/concordia_exception.hpp"
|
#include "concordia/concordia_exception.hpp"
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
@ -15,7 +17,8 @@ using namespace std;
|
|||||||
|
|
||||||
class HashGenerator {
|
class HashGenerator {
|
||||||
public:
|
public:
|
||||||
explicit HashGenerator(const string & wordMapFilename) throw(ConcordiaException);
|
explicit HashGenerator(const string & wordMapFilename)
|
||||||
|
throw(ConcordiaException);
|
||||||
|
|
||||||
/*! Destructor.
|
/*! Destructor.
|
||||||
*/
|
*/
|
||||||
@ -26,11 +29,9 @@ public:
|
|||||||
void serializeWordMap();
|
void serializeWordMap();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
boost::shared_ptr<WordMap> _wordMap;
|
||||||
map<string,int> _wordMap;
|
|
||||||
|
|
||||||
string _wordMapFilename;
|
string _wordMapFilename;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,6 +2,7 @@ add_library(concordia-tests
|
|||||||
test_concordia.cpp
|
test_concordia.cpp
|
||||||
test_concordia_config.cpp
|
test_concordia_config.cpp
|
||||||
test_word_map.cpp
|
test_word_map.cpp
|
||||||
|
test_hash_generator.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(concordia-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)
|
target_link_libraries(concordia-tests concordia ${LIBCONFIG_LIB} concordia-tests-common)
|
||||||
|
57
concordia/t/test_hash_generator.cpp
Normal file
57
concordia/t/test_hash_generator.cpp
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
#include <boost/filesystem.hpp>
|
||||||
|
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "concordia/hash_generator.hpp"
|
||||||
|
|
||||||
|
#define TEST_WORD_MAP_PATH "/tmp/test_word_map.bin"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_SUITE(hash_generator)
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE( SimpleHashTest )
|
||||||
|
{
|
||||||
|
if (boost::filesystem::exists(TEST_WORD_MAP_PATH)) {
|
||||||
|
boost::filesystem::remove(TEST_WORD_MAP_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
HashGenerator hashGenerator = HashGenerator(TEST_WORD_MAP_PATH);
|
||||||
|
|
||||||
|
vector<int> hash = hashGenerator.generateHash("Ala ma kota");
|
||||||
|
vector<int> expected;
|
||||||
|
expected.push_back(0);
|
||||||
|
expected.push_back(1);
|
||||||
|
expected.push_back(2);
|
||||||
|
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(hash.begin(), hash.end(), expected.begin(), expected.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_CASE( HashSerializationTest )
|
||||||
|
{
|
||||||
|
if (boost::filesystem::exists(TEST_WORD_MAP_PATH)) {
|
||||||
|
boost::filesystem::remove(TEST_WORD_MAP_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
HashGenerator hashGenerator1 = HashGenerator(TEST_WORD_MAP_PATH);
|
||||||
|
vector<int> hash1 = hashGenerator1.generateHash("Ala ma kota");
|
||||||
|
vector<int> expected1;
|
||||||
|
expected1.push_back(0);
|
||||||
|
expected1.push_back(1);
|
||||||
|
expected1.push_back(2);
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(hash1.begin(), hash1.end(), expected1.begin(), expected1.end());
|
||||||
|
|
||||||
|
hashGenerator1.serializeWordMap();
|
||||||
|
|
||||||
|
HashGenerator hashGenerator2 = HashGenerator(TEST_WORD_MAP_PATH);
|
||||||
|
vector<int> hash2 = hashGenerator2.generateHash("Ala ma psa");
|
||||||
|
vector<int> expected2;
|
||||||
|
expected2.push_back(0);
|
||||||
|
expected2.push_back(1);
|
||||||
|
expected2.push_back(3);
|
||||||
|
BOOST_CHECK_EQUAL_COLLECTIONS(hash2.begin(), hash2.end(), expected2.begin(), expected2.end());
|
||||||
|
|
||||||
|
boost::filesystem::remove(TEST_WORD_MAP_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOST_AUTO_TEST_SUITE_END()
|
@ -9,8 +9,7 @@ WordMap::~WordMap() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int WordMap::getWordCode(const string & word) {
|
int WordMap::getWordCode(const string & word) {
|
||||||
|
if (_map.find(word) == _map.end()) {
|
||||||
if (_map.find(word) == _map.end() ) {
|
|
||||||
int newCode = _nextFree;
|
int newCode = _nextFree;
|
||||||
_map[word] = newCode;
|
_map[word] = newCode;
|
||||||
_nextFree++;
|
_nextFree++;
|
||||||
@ -18,7 +17,6 @@ int WordMap::getWordCode(const string & word) {
|
|||||||
} else {
|
} else {
|
||||||
return _map[word];
|
return _map[word];
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,10 +4,13 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include "concordia/concordia_exception.hpp"
|
#include "concordia/concordia_exception.hpp"
|
||||||
|
#include <boost/archive/text_oarchive.hpp>
|
||||||
|
#include <boost/archive/text_iarchive.hpp>
|
||||||
|
#include <boost/serialization/map.hpp>
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Class for generating a sentence hash.
|
Class representing dictionary for word to int encoding.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -24,11 +27,18 @@ public:
|
|||||||
int getWordCode(const string & word);
|
int getWordCode(const string & word);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend class boost::serialization::access;
|
||||||
|
|
||||||
map<string,int> _map;
|
template<class Archive>
|
||||||
|
|
||||||
|
void serialize(Archive & ar, const unsigned int version) {
|
||||||
|
ar & _map;
|
||||||
|
ar & _nextFree;
|
||||||
|
}
|
||||||
|
|
||||||
|
map<string, int> _map;
|
||||||
|
|
||||||
int _nextFree;
|
int _nextFree;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user