more doc
This commit is contained in:
parent
87a26bfa3b
commit
9e550ca1cf
@ -45,7 +45,7 @@ PROJECT_BRIEF =
|
|||||||
# exceed 55 pixels and the maximum width should not exceed 200 pixels.
|
# exceed 55 pixels and the maximum width should not exceed 200 pixels.
|
||||||
# Doxygen will copy the logo to the output directory.
|
# Doxygen will copy the logo to the output directory.
|
||||||
|
|
||||||
PROJECT_LOGO =
|
PROJECT_LOGO = ../concordia.png
|
||||||
|
|
||||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||||
# base path where the generated documentation will be put.
|
# base path where the generated documentation will be put.
|
||||||
|
@ -282,7 +282,7 @@ int main(int argc, char** argv) {
|
|||||||
if (totalTimeElapsed == 0) {
|
if (totalTimeElapsed == 0) {
|
||||||
totalTimeElapsed++;
|
totalTimeElapsed++;
|
||||||
}
|
}
|
||||||
|
|
||||||
double totalSpeed =
|
double totalSpeed =
|
||||||
static_cast<double>(1000 * lineCount / totalTimeElapsed);
|
static_cast<double>(1000 * lineCount / totalTimeElapsed);
|
||||||
std::cout << "\tReading finished. Read and added to index "
|
std::cout << "\tReading finished. Read and added to index "
|
||||||
|
BIN
concordia.png
Normal file
BIN
concordia.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 9.1 KiB |
@ -4,12 +4,17 @@
|
|||||||
#include "concordia/common/config.hpp"
|
#include "concordia/common/config.hpp"
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
Class representing an example found by anubis search.
|
Class representing an example found by anubis search. Contains
|
||||||
|
the id of the example and anubis score of the search.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class AnubisSearchResult {
|
class AnubisSearchResult {
|
||||||
public:
|
public:
|
||||||
|
/*! Constructor.
|
||||||
|
\param exampleId the id of found example
|
||||||
|
\param score score of this example
|
||||||
|
*/
|
||||||
explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId,
|
explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId,
|
||||||
const double score);
|
const double score);
|
||||||
|
|
||||||
@ -17,14 +22,24 @@ public:
|
|||||||
*/
|
*/
|
||||||
virtual ~AnubisSearchResult();
|
virtual ~AnubisSearchResult();
|
||||||
|
|
||||||
|
/*! Getter for example id.
|
||||||
|
\returns example id
|
||||||
|
*/
|
||||||
SUFFIX_MARKER_TYPE getExampleId() const {
|
SUFFIX_MARKER_TYPE getExampleId() const {
|
||||||
return _exampleId;
|
return _exampleId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for anubis score.
|
||||||
|
\returns anubis score of the example
|
||||||
|
*/
|
||||||
double getScore() const {
|
double getScore() const {
|
||||||
return _score;
|
return _score;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Operator "greater than", used to sort objects of this class.
|
||||||
|
\returns true if the score of the current result is larger than
|
||||||
|
the score of another result
|
||||||
|
*/
|
||||||
bool operator > (const AnubisSearchResult & other) const {
|
bool operator > (const AnubisSearchResult & other) const {
|
||||||
return (_score > other.getScore());
|
return (_score > other.getScore());
|
||||||
}
|
}
|
||||||
|
@ -11,8 +11,13 @@
|
|||||||
*/
|
*/
|
||||||
class TextUtils {
|
class TextUtils {
|
||||||
public:
|
public:
|
||||||
|
/*! Constructor
|
||||||
|
*/
|
||||||
|
|
||||||
TextUtils();
|
TextUtils();
|
||||||
|
|
||||||
|
/*! Method for ensuring singleton.
|
||||||
|
*/
|
||||||
static TextUtils & getInstance() {
|
static TextUtils & getInstance() {
|
||||||
static TextUtils instance; // Guaranteed to be destroyed.
|
static TextUtils instance; // Guaranteed to be destroyed.
|
||||||
// Instantiated on first use.
|
// Instantiated on first use.
|
||||||
|
@ -13,61 +13,144 @@
|
|||||||
#include "concordia/matched_pattern_fragment.hpp"
|
#include "concordia/matched_pattern_fragment.hpp"
|
||||||
#include <divsufsort.h>
|
#include <divsufsort.h>
|
||||||
|
|
||||||
|
/*!
|
||||||
|
Class offering a range of simple utility methods.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
class Utils {
|
class Utils {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
/*! Constructor
|
||||||
|
*/
|
||||||
explicit Utils();
|
explicit Utils();
|
||||||
|
|
||||||
/*! Destructor.
|
/*! Destructor.
|
||||||
*/
|
*/
|
||||||
virtual ~Utils();
|
virtual ~Utils();
|
||||||
|
|
||||||
|
/*! Writes an index character to a binary file.
|
||||||
|
The character is written to the file as bytes, not necessarily in
|
||||||
|
the order in which they come in the character.
|
||||||
|
\param file file to write the character to
|
||||||
|
\param character character to write
|
||||||
|
*/
|
||||||
static void writeIndexCharacter(std::ofstream & file,
|
static void writeIndexCharacter(std::ofstream & file,
|
||||||
INDEX_CHARACTER_TYPE character);
|
INDEX_CHARACTER_TYPE character);
|
||||||
|
|
||||||
|
/*! Writes a marker to a binary file.
|
||||||
|
The marker is written to the file as bytes, not necessarily in
|
||||||
|
the order in which they come in the marker.
|
||||||
|
\param file file to write the marker to
|
||||||
|
\param marker marker to write
|
||||||
|
*/
|
||||||
static void writeMarker(std::ofstream & file,
|
static void writeMarker(std::ofstream & file,
|
||||||
SUFFIX_MARKER_TYPE marker);
|
SUFFIX_MARKER_TYPE marker);
|
||||||
|
|
||||||
|
/*! Reads an index character from a binary file.
|
||||||
|
Reading restores the order of the bytes in the original
|
||||||
|
character, which was written to the file.
|
||||||
|
\param file file to read the character from
|
||||||
|
\returns read character
|
||||||
|
*/
|
||||||
static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file);
|
static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file);
|
||||||
|
|
||||||
|
/*! Reads a marker from a binary file.
|
||||||
|
Reading restores the order of the bytes in the original
|
||||||
|
marker, which was written to the file.
|
||||||
|
\param file file to read the marker from
|
||||||
|
\returns read marker
|
||||||
|
*/
|
||||||
static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file);
|
static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file);
|
||||||
|
|
||||||
|
/*! Converts a vector of index characters to higher resolution array.
|
||||||
|
\param input vector of index characters
|
||||||
|
\returns array of smaller parts of characters
|
||||||
|
*/
|
||||||
static sauchar_t * indexVectorToSaucharArray(
|
static sauchar_t * indexVectorToSaucharArray(
|
||||||
const std::vector<INDEX_CHARACTER_TYPE> & input);
|
const std::vector<INDEX_CHARACTER_TYPE> & input);
|
||||||
|
|
||||||
|
/*! Converts a vector of index characters to higher resolution vector.
|
||||||
|
\param input vector of index characters
|
||||||
|
\returns vector of smaller parts of characters
|
||||||
|
*/
|
||||||
static std::vector<sauchar_t> indexVectorToSaucharVector(
|
static std::vector<sauchar_t> indexVectorToSaucharVector(
|
||||||
const std::vector<INDEX_CHARACTER_TYPE> & input);
|
const std::vector<INDEX_CHARACTER_TYPE> & input);
|
||||||
|
|
||||||
|
/*! Appends an index character to a high resolution vector.
|
||||||
|
\param vector vector to be appended to
|
||||||
|
\param character character to append
|
||||||
|
*/
|
||||||
static void appendCharToSaucharVector(
|
static void appendCharToSaucharVector(
|
||||||
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
||||||
INDEX_CHARACTER_TYPE character);
|
INDEX_CHARACTER_TYPE character);
|
||||||
|
|
||||||
|
/*! Appends an index character to a high resolution vector.
|
||||||
|
\param vector vector to be appended to
|
||||||
|
\param character character to append
|
||||||
|
*/
|
||||||
static void appendCharToSaucharVector(
|
static void appendCharToSaucharVector(
|
||||||
std::vector<sauchar_t> & vector,
|
std::vector<sauchar_t> & vector,
|
||||||
INDEX_CHARACTER_TYPE character);
|
INDEX_CHARACTER_TYPE character);
|
||||||
|
|
||||||
|
/*! Prints a given vector to standard output.
|
||||||
|
\param vector vector to be printed
|
||||||
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void printVector(const std::vector<T> & vector);
|
static void printVector(const std::vector<T> & vector);
|
||||||
|
|
||||||
|
/*! Retrieves sentence id from a marker.
|
||||||
|
\param marker input marker
|
||||||
|
\returns sentence id
|
||||||
|
*/
|
||||||
static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker);
|
static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker);
|
||||||
|
|
||||||
|
/*! Retrieves offset from a marker.
|
||||||
|
\param marker input marker
|
||||||
|
\returns offset
|
||||||
|
*/
|
||||||
static SUFFIX_MARKER_TYPE getOffsetFromMarker(SUFFIX_MARKER_TYPE marker);
|
static SUFFIX_MARKER_TYPE getOffsetFromMarker(SUFFIX_MARKER_TYPE marker);
|
||||||
|
|
||||||
|
/*! Retrieves example length from a marker.
|
||||||
|
\param marker input marker
|
||||||
|
\returns example length
|
||||||
|
*/
|
||||||
static SUFFIX_MARKER_TYPE getLengthFromMarker(SUFFIX_MARKER_TYPE marker);
|
static SUFFIX_MARKER_TYPE getLengthFromMarker(SUFFIX_MARKER_TYPE marker);
|
||||||
|
|
||||||
|
/*! Creates a marker from given data.
|
||||||
|
\param id sentence id
|
||||||
|
\param offset offset
|
||||||
|
\param length example length
|
||||||
|
\returns generated marker
|
||||||
|
*/
|
||||||
static SUFFIX_MARKER_TYPE createMarker(SUFFIX_MARKER_TYPE id,
|
static SUFFIX_MARKER_TYPE createMarker(SUFFIX_MARKER_TYPE id,
|
||||||
SUFFIX_MARKER_TYPE offset,
|
SUFFIX_MARKER_TYPE offset,
|
||||||
SUFFIX_MARKER_TYPE length);
|
SUFFIX_MARKER_TYPE length);
|
||||||
|
|
||||||
|
/*! Computes overlay score based on a list of non-intersecting intervals.
|
||||||
|
\param intervalList list of the intervals
|
||||||
|
\param sentenceSize the total size of the sentence (or pattern)
|
||||||
|
\param k significance factor. When set to 1, the significance is neutral.
|
||||||
|
\returns score
|
||||||
|
*/
|
||||||
static double getLogarithmicOverlay(
|
static double getLogarithmicOverlay(
|
||||||
const std::vector<Interval> & intervalList,
|
const std::vector<Interval> & intervalList,
|
||||||
SUFFIX_MARKER_TYPE sentenceSize,
|
SUFFIX_MARKER_TYPE sentenceSize,
|
||||||
double k);
|
double k);
|
||||||
|
|
||||||
|
/*! Computes overlay score based on a list of non-intersecting fragments.
|
||||||
|
\param intervalList list of the intervals
|
||||||
|
\param sentenceSize the total size of the sentence (or pattern)
|
||||||
|
\param k significance factor. When set to 1, the significance is neutral.
|
||||||
|
\returns score
|
||||||
|
*/
|
||||||
static double getLogarithmicOverlay(
|
static double getLogarithmicOverlay(
|
||||||
const std::vector<MatchedPatternFragment> & fragmentList,
|
const std::vector<MatchedPatternFragment> & fragmentList,
|
||||||
SUFFIX_MARKER_TYPE patternSize,
|
SUFFIX_MARKER_TYPE patternSize,
|
||||||
double k);
|
double k);
|
||||||
|
|
||||||
|
/*! Field holding the maximum sentence size allowed in the index.
|
||||||
|
*/
|
||||||
static SUFFIX_MARKER_TYPE maxSentenceSize;
|
static SUFFIX_MARKER_TYPE maxSentenceSize;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -39,23 +39,61 @@ public:
|
|||||||
*/
|
*/
|
||||||
std::string & getVersion();
|
std::string & getVersion();
|
||||||
|
|
||||||
|
/*! Adds an Example to the index.
|
||||||
|
\param example example to be added
|
||||||
|
\throws ConcordiaException
|
||||||
|
*/
|
||||||
void addExample(const Example & example) throw(ConcordiaException);
|
void addExample(const Example & example) throw(ConcordiaException);
|
||||||
|
|
||||||
|
/*! Adds multiple examples to the index.
|
||||||
|
\param examples vector of examples to be added
|
||||||
|
\throws ConcordiaException
|
||||||
|
*/
|
||||||
void addAllExamples(const std::vector<Example> & examples)
|
void addAllExamples(const std::vector<Example> & examples)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
|
/*! Performs a simple substring lookup on the index.
|
||||||
|
For more info see \ref tutorial1_2.
|
||||||
|
\param pattern pattern to be searched in the index
|
||||||
|
\returns vector of matched results
|
||||||
|
\throws ConcordiaException
|
||||||
|
*/
|
||||||
std::vector<SubstringOccurence> simpleSearch(const std::string & pattern)
|
std::vector<SubstringOccurence> simpleSearch(const std::string & pattern)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
|
/*! \deprecated
|
||||||
|
Finds the examples from the index, whose resemblance to the
|
||||||
|
pattern is maximal. This method may perform very slow,
|
||||||
|
try using concordiaSearch instead.
|
||||||
|
\param pattern pattern to be searched in the index
|
||||||
|
\returns vector of anubis results
|
||||||
|
\throws ConcordiaException
|
||||||
|
*/
|
||||||
std::vector<AnubisSearchResult> anubisSearch(const std::string & pattern)
|
std::vector<AnubisSearchResult> anubisSearch(const std::string & pattern)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
|
/*! Performs concordia lookup on the index. This is a unique library
|
||||||
|
functionality, designed to facilitate Computer-Aided Translation.
|
||||||
|
For more info see \ref tutorial1_3.
|
||||||
|
\param pattern pattern to be searched in the index
|
||||||
|
\returns concordia result
|
||||||
|
\throws ConcordiaException
|
||||||
|
*/
|
||||||
boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
|
boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
|
||||||
const std::string & pattern)
|
const std::string & pattern)
|
||||||
throw(ConcordiaException);
|
throw(ConcordiaException);
|
||||||
|
|
||||||
|
/*! Loads HDD stored index files to RAM and generates
|
||||||
|
suffix array based on RAM stored data structures.
|
||||||
|
For more info see \ref tutorial2.
|
||||||
|
\throws ConcordiaException
|
||||||
|
*/
|
||||||
void loadRAMIndexFromDisk() throw(ConcordiaException);
|
void loadRAMIndexFromDisk() throw(ConcordiaException);
|
||||||
|
|
||||||
|
/*! Generates suffix array based on RAM stored data structures.
|
||||||
|
For more info see \ref tutorial2.
|
||||||
|
\throws ConcordiaException
|
||||||
|
*/
|
||||||
void refreshSAfromRAM() throw(ConcordiaException);
|
void refreshSAfromRAM() throw(ConcordiaException);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -24,42 +24,82 @@ public:
|
|||||||
*/
|
*/
|
||||||
virtual ~ConcordiaConfig();
|
virtual ~ConcordiaConfig();
|
||||||
|
|
||||||
|
/*! Getter for word map file path.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns word map file path
|
||||||
|
*/
|
||||||
std::string & getWordMapFilePath() {
|
std::string & getWordMapFilePath() {
|
||||||
return _wordMapFilePath;
|
return _wordMapFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for hashed index file path.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns hashed index file path
|
||||||
|
*/
|
||||||
std::string & getHashedIndexFilePath() {
|
std::string & getHashedIndexFilePath() {
|
||||||
return _hashedIndexFilePath;
|
return _hashedIndexFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for markers file path.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns markers file path
|
||||||
|
*/
|
||||||
std::string & getMarkersFilePath() {
|
std::string & getMarkersFilePath() {
|
||||||
return _markersFilePath;
|
return _markersFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for html tags file path.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns html tags file path
|
||||||
|
*/
|
||||||
std::string & getHtmlTagsFilePath() {
|
std::string & getHtmlTagsFilePath() {
|
||||||
return _htmlTagsFilePath;
|
return _htmlTagsFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for space symbols file path.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns space symbols file path
|
||||||
|
*/
|
||||||
std::string & getSpaceSymbolsFilePath() {
|
std::string & getSpaceSymbolsFilePath() {
|
||||||
return _spaceSymbolsFilePath;
|
return _spaceSymbolsFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for stop symbols enabled parameter.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns true if stop words are enabled
|
||||||
|
*/
|
||||||
bool & isStopWordsEnabled() {
|
bool & isStopWordsEnabled() {
|
||||||
return _stopWordsEnabled;
|
return _stopWordsEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for stop words file path.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns stop words file path
|
||||||
|
*/
|
||||||
std::string & getStopWordsFilePath() {
|
std::string & getStopWordsFilePath() {
|
||||||
return _stopWordsFilePath;
|
return _stopWordsFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for named entities file path.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns named entities file path
|
||||||
|
*/
|
||||||
std::string & getNamedEntitiesFilePath() {
|
std::string & getNamedEntitiesFilePath() {
|
||||||
return _namedEntitiesFilePath;
|
return _namedEntitiesFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for stop symbols file path.
|
||||||
|
For more information see \ref tutorial3.
|
||||||
|
\returns stop symbols file path
|
||||||
|
*/
|
||||||
std::string & getStopSymbolsFilePath() {
|
std::string & getStopSymbolsFilePath() {
|
||||||
return _stopSymbolsFilePath;
|
return _stopSymbolsFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*! Getter for anubis threshold. Anubis search results with
|
||||||
|
scores below that threshold will be discarded.
|
||||||
|
\returns anubis threshold
|
||||||
|
*/
|
||||||
double getAnubisThreshold() {
|
double getAnubisThreshold() {
|
||||||
return _anubisThreshold;
|
return _anubisThreshold;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user