2013-11-12 22:08:37 +01:00
|
|
|
#ifndef WORD_MAP_HDR
|
|
|
|
#define WORD_MAP_HDR
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <map>
|
|
|
|
#include "concordia/concordia_exception.hpp"
|
2013-12-06 22:29:25 +01:00
|
|
|
#include "concordia/common/config.hpp"
|
2013-11-14 15:44:50 +01:00
|
|
|
#include <boost/archive/text_oarchive.hpp>
|
|
|
|
#include <boost/archive/text_iarchive.hpp>
|
|
|
|
#include <boost/serialization/map.hpp>
|
2013-11-12 22:08:37 +01:00
|
|
|
|
|
|
|
/*!
|
2015-05-01 14:52:53 +02:00
|
|
|
Class representing dictionary for word to integer encoding.
|
2013-11-12 22:08:37 +01:00
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
class WordMap {
|
|
|
|
public:
|
2015-05-01 14:52:53 +02:00
|
|
|
/*!
|
|
|
|
Constructor.
|
|
|
|
|
|
|
|
*/
|
2013-11-12 22:08:37 +01:00
|
|
|
explicit WordMap() throw(ConcordiaException);
|
2013-11-14 15:44:50 +01:00
|
|
|
|
2013-11-12 22:08:37 +01:00
|
|
|
/*! Destructor.
|
|
|
|
*/
|
|
|
|
virtual ~WordMap();
|
|
|
|
|
2015-05-01 14:52:53 +02:00
|
|
|
/*!
|
|
|
|
Gets the integer code of a token. If the token is found in
|
|
|
|
the dictionary, the dictionary code is returned. If not,
|
|
|
|
the word is added to the dictionary and its newly created
|
|
|
|
code is returned.
|
|
|
|
\param word token to generate the code
|
|
|
|
\returns code of the token
|
|
|
|
*/
|
2015-04-15 14:14:10 +02:00
|
|
|
INDEX_CHARACTER_TYPE getWordCode(const std::string & word)
|
|
|
|
throw(ConcordiaException);
|
2013-11-12 22:08:37 +01:00
|
|
|
|
|
|
|
private:
|
2013-11-14 15:44:50 +01:00
|
|
|
friend class boost::serialization::access;
|
|
|
|
|
|
|
|
template<class Archive>
|
2013-11-12 22:08:37 +01:00
|
|
|
|
2013-11-14 15:44:50 +01:00
|
|
|
void serialize(Archive & ar, const unsigned int version) {
|
|
|
|
ar & _map;
|
|
|
|
ar & _nextFree;
|
|
|
|
}
|
|
|
|
|
2015-04-15 14:14:10 +02:00
|
|
|
std::map<std::string, INDEX_CHARACTER_TYPE> _map;
|
2013-11-14 15:44:50 +01:00
|
|
|
|
2013-12-06 22:29:25 +01:00
|
|
|
INDEX_CHARACTER_TYPE _nextFree;
|
2013-11-12 22:08:37 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|