2013-11-14 20:36:34 +01:00
|
|
|
#ifndef CONCORDIA_INDEX_HDR
|
|
|
|
#define CONCORDIA_INDEX_HDR
|
|
|
|
|
|
|
|
#include <boost/shared_ptr.hpp>
|
2013-11-20 17:43:29 +01:00
|
|
|
#include <fstream>
|
|
|
|
#include <iostream>
|
|
|
|
#include <sstream>
|
|
|
|
|
2013-11-14 20:36:34 +01:00
|
|
|
#include "concordia/hash_generator.hpp"
|
|
|
|
#include "concordia/concordia_exception.hpp"
|
2013-11-29 16:19:49 +01:00
|
|
|
#include "build/libdivsufsort/include/divsufsort.h"
|
2013-11-14 20:36:34 +01:00
|
|
|
|
|
|
|
/*!
|
|
|
|
Class for creating and maintaining the index.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
class ConcordiaIndex {
|
|
|
|
public:
|
2013-12-14 15:23:17 +01:00
|
|
|
explicit ConcordiaIndex(const string & hashedIndexFilePath)
|
2013-11-14 20:36:34 +01:00
|
|
|
throw(ConcordiaException);
|
|
|
|
|
|
|
|
/*! Destructor.
|
|
|
|
*/
|
|
|
|
virtual ~ConcordiaIndex();
|
|
|
|
|
2013-12-14 15:23:17 +01:00
|
|
|
void addSentence(
|
|
|
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
|
|
|
boost::shared_ptr<vector<sauchar_t> > T,
|
|
|
|
const string & sentence);
|
2013-11-14 20:36:34 +01:00
|
|
|
|
2013-12-14 15:23:17 +01:00
|
|
|
void addAllSentences(
|
|
|
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
|
|
|
boost::shared_ptr<vector<sauchar_t> > T,
|
|
|
|
boost::shared_ptr<vector<string> > sentences);
|
2013-11-14 20:36:34 +01:00
|
|
|
|
2013-12-14 15:23:17 +01:00
|
|
|
boost::shared_ptr<vector<saidx_t> > generateSuffixArray(
|
|
|
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
|
|
|
boost::shared_ptr<vector<sauchar_t> > T);
|
2013-11-14 20:36:34 +01:00
|
|
|
|
|
|
|
private:
|
2013-12-14 15:23:17 +01:00
|
|
|
// Add sentence to disk index and update RAM index.
|
|
|
|
void _addSingleSentence(ofstream & hashedIndexFile,
|
|
|
|
boost::shared_ptr<HashGenerator> hashGenerator,
|
|
|
|
boost::shared_ptr<std::vector<sauchar_t> > T,
|
|
|
|
const string & sentence);
|
2013-11-20 17:43:29 +01:00
|
|
|
|
2013-11-28 16:47:57 +01:00
|
|
|
string _hashedIndexFilePath;
|
2013-11-14 20:36:34 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|