more doc
This commit is contained in:
parent
87a26bfa3b
commit
9e550ca1cf
@ -45,7 +45,7 @@ PROJECT_BRIEF =
|
||||
# exceed 55 pixels and the maximum width should not exceed 200 pixels.
|
||||
# Doxygen will copy the logo to the output directory.
|
||||
|
||||
PROJECT_LOGO =
|
||||
PROJECT_LOGO = ../concordia.png
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
BIN
concordia.png
Normal file
BIN
concordia.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.1 KiB |
@ -4,12 +4,17 @@
|
||||
#include "concordia/common/config.hpp"
|
||||
|
||||
/*!
|
||||
Class representing an example found by anubis search.
|
||||
Class representing an example found by anubis search. Contains
|
||||
the id of the example and anubis score of the search.
|
||||
|
||||
*/
|
||||
|
||||
class AnubisSearchResult {
|
||||
public:
|
||||
/*! Constructor.
|
||||
\param exampleId the id of found example
|
||||
\param score score of this example
|
||||
*/
|
||||
explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId,
|
||||
const double score);
|
||||
|
||||
@ -17,14 +22,24 @@ public:
|
||||
*/
|
||||
virtual ~AnubisSearchResult();
|
||||
|
||||
/*! Getter for example id.
|
||||
\returns example id
|
||||
*/
|
||||
SUFFIX_MARKER_TYPE getExampleId() const {
|
||||
return _exampleId;
|
||||
}
|
||||
|
||||
/*! Getter for anubis score.
|
||||
\returns anubis score of the example
|
||||
*/
|
||||
double getScore() const {
|
||||
return _score;
|
||||
}
|
||||
|
||||
/*! Operator "greater than", used to sort objects of this class.
|
||||
\returns true if the score of the current result is larger than
|
||||
the score of another result
|
||||
*/
|
||||
bool operator > (const AnubisSearchResult & other) const {
|
||||
return (_score > other.getScore());
|
||||
}
|
||||
|
@ -11,8 +11,13 @@
|
||||
*/
|
||||
class TextUtils {
|
||||
public:
|
||||
/*! Constructor
|
||||
*/
|
||||
|
||||
TextUtils();
|
||||
|
||||
/*! Method for ensuring singleton.
|
||||
*/
|
||||
static TextUtils & getInstance() {
|
||||
static TextUtils instance; // Guaranteed to be destroyed.
|
||||
// Instantiated on first use.
|
||||
|
@ -13,61 +13,144 @@
|
||||
#include "concordia/matched_pattern_fragment.hpp"
|
||||
#include <divsufsort.h>
|
||||
|
||||
/*!
|
||||
Class offering a range of simple utility methods.
|
||||
|
||||
*/
|
||||
|
||||
class Utils {
|
||||
public:
|
||||
|
||||
/*! Constructor
|
||||
*/
|
||||
explicit Utils();
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~Utils();
|
||||
|
||||
/*! Writes an index character to a binary file.
|
||||
The character is written to the file as bytes, not necessarily in
|
||||
the order in which they come in the character.
|
||||
\param file file to write the character to
|
||||
\param character character to write
|
||||
*/
|
||||
static void writeIndexCharacter(std::ofstream & file,
|
||||
INDEX_CHARACTER_TYPE character);
|
||||
|
||||
/*! Writes a marker to a binary file.
|
||||
The marker is written to the file as bytes, not necessarily in
|
||||
the order in which they come in the marker.
|
||||
\param file file to write the marker to
|
||||
\param marker marker to write
|
||||
*/
|
||||
static void writeMarker(std::ofstream & file,
|
||||
SUFFIX_MARKER_TYPE marker);
|
||||
|
||||
/*! Reads an index character from a binary file.
|
||||
Reading restores the order of the bytes in the original
|
||||
character, which was written to the file.
|
||||
\param file file to read the character from
|
||||
\returns read character
|
||||
*/
|
||||
static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file);
|
||||
|
||||
/*! Reads a marker from a binary file.
|
||||
Reading restores the order of the bytes in the original
|
||||
marker, which was written to the file.
|
||||
\param file file to read the marker from
|
||||
\returns read marker
|
||||
*/
|
||||
static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file);
|
||||
|
||||
/*! Converts a vector of index characters to higher resolution array.
|
||||
\param input vector of index characters
|
||||
\returns array of smaller parts of characters
|
||||
*/
|
||||
static sauchar_t * indexVectorToSaucharArray(
|
||||
const std::vector<INDEX_CHARACTER_TYPE> & input);
|
||||
|
||||
/*! Converts a vector of index characters to higher resolution vector.
|
||||
\param input vector of index characters
|
||||
\returns vector of smaller parts of characters
|
||||
*/
|
||||
static std::vector<sauchar_t> indexVectorToSaucharVector(
|
||||
const std::vector<INDEX_CHARACTER_TYPE> & input);
|
||||
|
||||
/*! Appends an index character to a high resolution vector.
|
||||
\param vector vector to be appended to
|
||||
\param character character to append
|
||||
*/
|
||||
static void appendCharToSaucharVector(
|
||||
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
||||
INDEX_CHARACTER_TYPE character);
|
||||
|
||||
/*! Appends an index character to a high resolution vector.
|
||||
\param vector vector to be appended to
|
||||
\param character character to append
|
||||
*/
|
||||
static void appendCharToSaucharVector(
|
||||
std::vector<sauchar_t> & vector,
|
||||
INDEX_CHARACTER_TYPE character);
|
||||
|
||||
/*! Prints a given vector to standard output.
|
||||
\param vector vector to be printed
|
||||
*/
|
||||
template <typename T>
|
||||
static void printVector(const std::vector<T> & vector);
|
||||
|
||||
/*! Retrieves sentence id from a marker.
|
||||
\param marker input marker
|
||||
\returns sentence id
|
||||
*/
|
||||
static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker);
|
||||
|
||||
/*! Retrieves offset from a marker.
|
||||
\param marker input marker
|
||||
\returns offset
|
||||
*/
|
||||
static SUFFIX_MARKER_TYPE getOffsetFromMarker(SUFFIX_MARKER_TYPE marker);
|
||||
|
||||
/*! Retrieves example length from a marker.
|
||||
\param marker input marker
|
||||
\returns example length
|
||||
*/
|
||||
static SUFFIX_MARKER_TYPE getLengthFromMarker(SUFFIX_MARKER_TYPE marker);
|
||||
|
||||
/*! Creates a marker from given data.
|
||||
\param id sentence id
|
||||
\param offset offset
|
||||
\param length example length
|
||||
\returns generated marker
|
||||
*/
|
||||
static SUFFIX_MARKER_TYPE createMarker(SUFFIX_MARKER_TYPE id,
|
||||
SUFFIX_MARKER_TYPE offset,
|
||||
SUFFIX_MARKER_TYPE length);
|
||||
|
||||
/*! Computes overlay score based on a list of non-intersecting intervals.
|
||||
\param intervalList list of the intervals
|
||||
\param sentenceSize the total size of the sentence (or pattern)
|
||||
\param k significance factor. When set to 1, the significance is neutral.
|
||||
\returns score
|
||||
*/
|
||||
static double getLogarithmicOverlay(
|
||||
const std::vector<Interval> & intervalList,
|
||||
SUFFIX_MARKER_TYPE sentenceSize,
|
||||
double k);
|
||||
|
||||
/*! Computes overlay score based on a list of non-intersecting fragments.
|
||||
\param intervalList list of the intervals
|
||||
\param sentenceSize the total size of the sentence (or pattern)
|
||||
\param k significance factor. When set to 1, the significance is neutral.
|
||||
\returns score
|
||||
*/
|
||||
static double getLogarithmicOverlay(
|
||||
const std::vector<MatchedPatternFragment> & fragmentList,
|
||||
SUFFIX_MARKER_TYPE patternSize,
|
||||
double k);
|
||||
|
||||
/*! Field holding the maximum sentence size allowed in the index.
|
||||
*/
|
||||
static SUFFIX_MARKER_TYPE maxSentenceSize;
|
||||
|
||||
private:
|
||||
|
@ -39,23 +39,61 @@ public:
|
||||
*/
|
||||
std::string & getVersion();
|
||||
|
||||
/*! Adds an Example to the index.
|
||||
\param example example to be added
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
void addExample(const Example & example) throw(ConcordiaException);
|
||||
|
||||
/*! Adds multiple examples to the index.
|
||||
\param examples vector of examples to be added
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
void addAllExamples(const std::vector<Example> & examples)
|
||||
throw(ConcordiaException);
|
||||
|
||||
/*! Performs a simple substring lookup on the index.
|
||||
For more info see \ref tutorial1_2.
|
||||
\param pattern pattern to be searched in the index
|
||||
\returns vector of matched results
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
std::vector<SubstringOccurence> simpleSearch(const std::string & pattern)
|
||||
throw(ConcordiaException);
|
||||
|
||||
/*! \deprecated
|
||||
Finds the examples from the index, whose resemblance to the
|
||||
pattern is maximal. This method may perform very slow,
|
||||
try using concordiaSearch instead.
|
||||
\param pattern pattern to be searched in the index
|
||||
\returns vector of anubis results
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
std::vector<AnubisSearchResult> anubisSearch(const std::string & pattern)
|
||||
throw(ConcordiaException);
|
||||
|
||||
/*! Performs concordia lookup on the index. This is a unique library
|
||||
functionality, designed to facilitate Computer-Aided Translation.
|
||||
For more info see \ref tutorial1_3.
|
||||
\param pattern pattern to be searched in the index
|
||||
\returns concordia result
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
|
||||
const std::string & pattern)
|
||||
throw(ConcordiaException);
|
||||
|
||||
/*! Loads HDD stored index files to RAM and generates
|
||||
suffix array based on RAM stored data structures.
|
||||
For more info see \ref tutorial2.
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
void loadRAMIndexFromDisk() throw(ConcordiaException);
|
||||
|
||||
/*! Generates suffix array based on RAM stored data structures.
|
||||
For more info see \ref tutorial2.
|
||||
\throws ConcordiaException
|
||||
*/
|
||||
void refreshSAfromRAM() throw(ConcordiaException);
|
||||
|
||||
private:
|
||||
|
@ -24,42 +24,82 @@ public:
|
||||
*/
|
||||
virtual ~ConcordiaConfig();
|
||||
|
||||
/*! Getter for word map file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns word map file path
|
||||
*/
|
||||
std::string & getWordMapFilePath() {
|
||||
return _wordMapFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for hashed index file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns hashed index file path
|
||||
*/
|
||||
std::string & getHashedIndexFilePath() {
|
||||
return _hashedIndexFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for markers file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns markers file path
|
||||
*/
|
||||
std::string & getMarkersFilePath() {
|
||||
return _markersFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for html tags file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns html tags file path
|
||||
*/
|
||||
std::string & getHtmlTagsFilePath() {
|
||||
return _htmlTagsFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for space symbols file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns space symbols file path
|
||||
*/
|
||||
std::string & getSpaceSymbolsFilePath() {
|
||||
return _spaceSymbolsFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for stop symbols enabled parameter.
|
||||
For more information see \ref tutorial3.
|
||||
\returns true if stop words are enabled
|
||||
*/
|
||||
bool & isStopWordsEnabled() {
|
||||
return _stopWordsEnabled;
|
||||
}
|
||||
|
||||
/*! Getter for stop words file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns stop words file path
|
||||
*/
|
||||
std::string & getStopWordsFilePath() {
|
||||
return _stopWordsFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for named entities file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns named entities file path
|
||||
*/
|
||||
std::string & getNamedEntitiesFilePath() {
|
||||
return _namedEntitiesFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for stop symbols file path.
|
||||
For more information see \ref tutorial3.
|
||||
\returns stop symbols file path
|
||||
*/
|
||||
std::string & getStopSymbolsFilePath() {
|
||||
return _stopSymbolsFilePath;
|
||||
}
|
||||
|
||||
/*! Getter for anubis threshold. Anubis search results with
|
||||
scores below that threshold will be discarded.
|
||||
\returns anubis threshold
|
||||
*/
|
||||
double getAnubisThreshold() {
|
||||
return _anubisThreshold;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user