144 lines
4.8 KiB
C++
144 lines
4.8 KiB
C++
#ifndef CONCORDIA_HDR
|
|
#define CONCORDIA_HDR
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/filesystem.hpp>
|
|
|
|
#include "concordia/common/config.hpp"
|
|
#include "concordia/example.hpp"
|
|
#include "concordia/substring_occurence.hpp"
|
|
#include "concordia/concordia_config.hpp"
|
|
#include "concordia/concordia_index.hpp"
|
|
#include "concordia/index_searcher.hpp"
|
|
#include "concordia/concordia_search_result.hpp"
|
|
#include "concordia/tokenized_sentence.hpp"
|
|
#include "concordia/anubis_search_result.hpp"
|
|
#include <divsufsort.h>
|
|
|
|
|
|
/*!
|
|
The Concordia class is the main access point to the library.
|
|
This class holds references to three out of four main data
|
|
structures used by Concordia: hashed index, markers array
|
|
and suffix array. Word map is maintained by the class
|
|
HashGenerator. Concordia has references to:
|
|
- the hash generator (HashGenerator)
|
|
- concordia index (ConcordiaIndex)
|
|
- concordia searcher (ConcordiaSearcher)
|
|
- configuration (ConcordiaConfig)
|
|
|
|
Whenever it is necessary, the data structures and tools
|
|
held by Concordia are passed by smart pointers to methods which
|
|
carry out specific functionalities.
|
|
|
|
*/
|
|
|
|
class Concordia {
|
|
public:
|
|
/*! Constructor.
|
|
\param configFilePath path to the Concordia configuration file
|
|
\throws ConcordiaException
|
|
*/
|
|
explicit Concordia(const std::string & configFilePath)
|
|
throw(ConcordiaException);
|
|
/*! Destructor.
|
|
*/
|
|
virtual ~Concordia();
|
|
|
|
/*! Getter for version.
|
|
\returns version of the Concordia library.
|
|
*/
|
|
std::string & getVersion();
|
|
|
|
/*! Adds an Example to the index.
|
|
\param example example to be added
|
|
\returns tokenized sentence object,
|
|
containing information about original word positions
|
|
\throws ConcordiaException
|
|
*/
|
|
boost::shared_ptr<TokenizedSentence> addExample(const Example & example)
|
|
throw(ConcordiaException);
|
|
|
|
/*! Adds multiple examples to the index.
|
|
\param examples vector of examples to be added
|
|
\returns vector of tokenized sentence objects,
|
|
containing information about original word positions
|
|
\throws ConcordiaException
|
|
*/
|
|
std::vector<TokenizedSentence> addAllExamples(
|
|
const std::vector<Example> & examples)
|
|
throw(ConcordiaException);
|
|
|
|
/*! Performs a simple substring lookup on the index.
|
|
For more info see \ref tutorial1_2.
|
|
\param pattern pattern to be searched in the index
|
|
\returns vector of matched results
|
|
\throws ConcordiaException
|
|
*/
|
|
std::vector<SubstringOccurence> simpleSearch(const std::string & pattern)
|
|
throw(ConcordiaException);
|
|
|
|
/*! \deprecated
|
|
Finds the examples from the index, whose resemblance to the
|
|
pattern is maximal. This method may perform very slow,
|
|
try using concordiaSearch instead.
|
|
\param pattern pattern to be searched in the index
|
|
\returns vector of anubis results
|
|
\throws ConcordiaException
|
|
*/
|
|
std::vector<AnubisSearchResult> anubisSearch(const std::string & pattern)
|
|
throw(ConcordiaException);
|
|
|
|
/*! Performs concordia lookup on the index. This is a unique library
|
|
functionality, designed to facilitate Computer-Aided Translation.
|
|
For more info see \ref tutorial1_3.
|
|
\param pattern pattern to be searched in the index
|
|
\returns concordia result
|
|
\throws ConcordiaException
|
|
*/
|
|
boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
|
|
const std::string & pattern)
|
|
throw(ConcordiaException);
|
|
|
|
/*! Loads HDD stored index files to RAM and generates
|
|
suffix array based on RAM stored data structures.
|
|
For more info see \ref tutorial2.
|
|
\throws ConcordiaException
|
|
*/
|
|
void loadRAMIndexFromDisk() throw(ConcordiaException);
|
|
|
|
/*! Generates suffix array based on RAM stored data structures.
|
|
For more info see \ref tutorial2.
|
|
\throws ConcordiaException
|
|
*/
|
|
void refreshSAfromRAM() throw(ConcordiaException);
|
|
|
|
/*! Clears all the examples from the index
|
|
\throws ConcordiaException
|
|
*/
|
|
void clearIndex() throw(ConcordiaException);
|
|
|
|
private:
|
|
void _initializeIndex() throw(ConcordiaException);
|
|
|
|
static std::string _libraryVersion;
|
|
|
|
boost::shared_ptr<ConcordiaConfig> _config;
|
|
|
|
boost::shared_ptr<ConcordiaIndex> _index;
|
|
|
|
boost::shared_ptr<IndexSearcher> _searcher;
|
|
|
|
boost::shared_ptr<HashGenerator> _hashGenerator;
|
|
|
|
boost::shared_ptr<std::vector<sauchar_t> > _T;
|
|
|
|
boost::shared_ptr<std::vector<saidx_t> > _SA;
|
|
|
|
boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > _markers;
|
|
};
|
|
|
|
#endif
|