This commit is contained in:
rjawor 2015-04-30 22:22:54 +02:00
parent 87a26bfa3b
commit 9e550ca1cf
8 changed files with 184 additions and 3 deletions

View File

@ -45,7 +45,7 @@ PROJECT_BRIEF =
# exceed 55 pixels and the maximum width should not exceed 200 pixels. # exceed 55 pixels and the maximum width should not exceed 200 pixels.
# Doxygen will copy the logo to the output directory. # Doxygen will copy the logo to the output directory.
PROJECT_LOGO = PROJECT_LOGO = ../concordia.png
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put. # base path where the generated documentation will be put.

BIN
concordia.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.1 KiB

View File

@ -4,12 +4,17 @@
#include "concordia/common/config.hpp" #include "concordia/common/config.hpp"
/*! /*!
Class representing an example found by anubis search. Class representing an example found by anubis search. Contains
the id of the example and anubis score of the search.
*/ */
class AnubisSearchResult { class AnubisSearchResult {
public: public:
/*! Constructor.
\param exampleId the id of found example
\param score score of this example
*/
explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId, explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId,
const double score); const double score);
@ -17,14 +22,24 @@ public:
*/ */
virtual ~AnubisSearchResult(); virtual ~AnubisSearchResult();
/*! Getter for example id.
\returns example id
*/
SUFFIX_MARKER_TYPE getExampleId() const { SUFFIX_MARKER_TYPE getExampleId() const {
return _exampleId; return _exampleId;
} }
/*! Getter for anubis score.
\returns anubis score of the example
*/
double getScore() const { double getScore() const {
return _score; return _score;
} }
/*! Operator "greater than", used to sort objects of this class.
\returns true if the score of the current result is larger than
the score of another result
*/
bool operator > (const AnubisSearchResult & other) const { bool operator > (const AnubisSearchResult & other) const {
return (_score > other.getScore()); return (_score > other.getScore());
} }

View File

@ -11,8 +11,13 @@
*/ */
class TextUtils { class TextUtils {
public: public:
/*! Constructor
*/
TextUtils(); TextUtils();
/*! Method for ensuring singleton.
*/
static TextUtils & getInstance() { static TextUtils & getInstance() {
static TextUtils instance; // Guaranteed to be destroyed. static TextUtils instance; // Guaranteed to be destroyed.
// Instantiated on first use. // Instantiated on first use.

View File

@ -13,61 +13,144 @@
#include "concordia/matched_pattern_fragment.hpp" #include "concordia/matched_pattern_fragment.hpp"
#include <divsufsort.h> #include <divsufsort.h>
/*!
Class offering a range of simple utility methods.
*/
class Utils { class Utils {
public: public:
/*! Constructor
*/
explicit Utils(); explicit Utils();
/*! Destructor. /*! Destructor.
*/ */
virtual ~Utils(); virtual ~Utils();
/*! Writes an index character to a binary file.
The character is written to the file as bytes, not necessarily in
the order in which they come in the character.
\param file file to write the character to
\param character character to write
*/
static void writeIndexCharacter(std::ofstream & file, static void writeIndexCharacter(std::ofstream & file,
INDEX_CHARACTER_TYPE character); INDEX_CHARACTER_TYPE character);
/*! Writes a marker to a binary file.
The marker is written to the file as bytes, not necessarily in
the order in which they come in the marker.
\param file file to write the marker to
\param marker marker to write
*/
static void writeMarker(std::ofstream & file, static void writeMarker(std::ofstream & file,
SUFFIX_MARKER_TYPE marker); SUFFIX_MARKER_TYPE marker);
/*! Reads an index character from a binary file.
Reading restores the order of the bytes in the original
character, which was written to the file.
\param file file to read the character from
\returns read character
*/
static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file); static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file);
/*! Reads a marker from a binary file.
Reading restores the order of the bytes in the original
marker, which was written to the file.
\param file file to read the marker from
\returns read marker
*/
static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file); static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file);
/*! Converts a vector of index characters to higher resolution array.
\param input vector of index characters
\returns array of smaller parts of characters
*/
static sauchar_t * indexVectorToSaucharArray( static sauchar_t * indexVectorToSaucharArray(
const std::vector<INDEX_CHARACTER_TYPE> & input); const std::vector<INDEX_CHARACTER_TYPE> & input);
/*! Converts a vector of index characters to higher resolution vector.
\param input vector of index characters
\returns vector of smaller parts of characters
*/
static std::vector<sauchar_t> indexVectorToSaucharVector( static std::vector<sauchar_t> indexVectorToSaucharVector(
const std::vector<INDEX_CHARACTER_TYPE> & input); const std::vector<INDEX_CHARACTER_TYPE> & input);
/*! Appends an index character to a high resolution vector.
\param vector vector to be appended to
\param character character to append
*/
static void appendCharToSaucharVector( static void appendCharToSaucharVector(
boost::shared_ptr<std::vector<sauchar_t> > vector, boost::shared_ptr<std::vector<sauchar_t> > vector,
INDEX_CHARACTER_TYPE character); INDEX_CHARACTER_TYPE character);
/*! Appends an index character to a high resolution vector.
\param vector vector to be appended to
\param character character to append
*/
static void appendCharToSaucharVector( static void appendCharToSaucharVector(
std::vector<sauchar_t> & vector, std::vector<sauchar_t> & vector,
INDEX_CHARACTER_TYPE character); INDEX_CHARACTER_TYPE character);
/*! Prints a given vector to standard output.
\param vector vector to be printed
*/
template <typename T> template <typename T>
static void printVector(const std::vector<T> & vector); static void printVector(const std::vector<T> & vector);
/*! Retrieves sentence id from a marker.
\param marker input marker
\returns sentence id
*/
static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker); static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker);
/*! Retrieves offset from a marker.
\param marker input marker
\returns offset
*/
static SUFFIX_MARKER_TYPE getOffsetFromMarker(SUFFIX_MARKER_TYPE marker); static SUFFIX_MARKER_TYPE getOffsetFromMarker(SUFFIX_MARKER_TYPE marker);
/*! Retrieves example length from a marker.
\param marker input marker
\returns example length
*/
static SUFFIX_MARKER_TYPE getLengthFromMarker(SUFFIX_MARKER_TYPE marker); static SUFFIX_MARKER_TYPE getLengthFromMarker(SUFFIX_MARKER_TYPE marker);
/*! Creates a marker from given data.
\param id sentence id
\param offset offset
\param length example length
\returns generated marker
*/
static SUFFIX_MARKER_TYPE createMarker(SUFFIX_MARKER_TYPE id, static SUFFIX_MARKER_TYPE createMarker(SUFFIX_MARKER_TYPE id,
SUFFIX_MARKER_TYPE offset, SUFFIX_MARKER_TYPE offset,
SUFFIX_MARKER_TYPE length); SUFFIX_MARKER_TYPE length);
/*! Computes overlay score based on a list of non-intersecting intervals.
\param intervalList list of the intervals
\param sentenceSize the total size of the sentence (or pattern)
\param k significance factor. When set to 1, the significance is neutral.
\returns score
*/
static double getLogarithmicOverlay( static double getLogarithmicOverlay(
const std::vector<Interval> & intervalList, const std::vector<Interval> & intervalList,
SUFFIX_MARKER_TYPE sentenceSize, SUFFIX_MARKER_TYPE sentenceSize,
double k); double k);
/*! Computes overlay score based on a list of non-intersecting fragments.
\param intervalList list of the intervals
\param sentenceSize the total size of the sentence (or pattern)
\param k significance factor. When set to 1, the significance is neutral.
\returns score
*/
static double getLogarithmicOverlay( static double getLogarithmicOverlay(
const std::vector<MatchedPatternFragment> & fragmentList, const std::vector<MatchedPatternFragment> & fragmentList,
SUFFIX_MARKER_TYPE patternSize, SUFFIX_MARKER_TYPE patternSize,
double k); double k);
/*! Field holding the maximum sentence size allowed in the index.
*/
static SUFFIX_MARKER_TYPE maxSentenceSize; static SUFFIX_MARKER_TYPE maxSentenceSize;
private: private:

View File

@ -39,23 +39,61 @@ public:
*/ */
std::string & getVersion(); std::string & getVersion();
/*! Adds an Example to the index.
\param example example to be added
\throws ConcordiaException
*/
void addExample(const Example & example) throw(ConcordiaException); void addExample(const Example & example) throw(ConcordiaException);
/*! Adds multiple examples to the index.
\param examples vector of examples to be added
\throws ConcordiaException
*/
void addAllExamples(const std::vector<Example> & examples) void addAllExamples(const std::vector<Example> & examples)
throw(ConcordiaException); throw(ConcordiaException);
/*! Performs a simple substring lookup on the index.
For more info see \ref tutorial1_2.
\param pattern pattern to be searched in the index
\returns vector of matched results
\throws ConcordiaException
*/
std::vector<SubstringOccurence> simpleSearch(const std::string & pattern) std::vector<SubstringOccurence> simpleSearch(const std::string & pattern)
throw(ConcordiaException); throw(ConcordiaException);
/*! \deprecated
Finds the examples from the index, whose resemblance to the
pattern is maximal. This method may perform very slow,
try using concordiaSearch instead.
\param pattern pattern to be searched in the index
\returns vector of anubis results
\throws ConcordiaException
*/
std::vector<AnubisSearchResult> anubisSearch(const std::string & pattern) std::vector<AnubisSearchResult> anubisSearch(const std::string & pattern)
throw(ConcordiaException); throw(ConcordiaException);
/*! Performs concordia lookup on the index. This is a unique library
functionality, designed to facilitate Computer-Aided Translation.
For more info see \ref tutorial1_3.
\param pattern pattern to be searched in the index
\returns concordia result
\throws ConcordiaException
*/
boost::shared_ptr<ConcordiaSearchResult> concordiaSearch( boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
const std::string & pattern) const std::string & pattern)
throw(ConcordiaException); throw(ConcordiaException);
/*! Loads HDD stored index files to RAM and generates
suffix array based on RAM stored data structures.
For more info see \ref tutorial2.
\throws ConcordiaException
*/
void loadRAMIndexFromDisk() throw(ConcordiaException); void loadRAMIndexFromDisk() throw(ConcordiaException);
/*! Generates suffix array based on RAM stored data structures.
For more info see \ref tutorial2.
\throws ConcordiaException
*/
void refreshSAfromRAM() throw(ConcordiaException); void refreshSAfromRAM() throw(ConcordiaException);
private: private:

View File

@ -24,42 +24,82 @@ public:
*/ */
virtual ~ConcordiaConfig(); virtual ~ConcordiaConfig();
/*! Getter for word map file path.
For more information see \ref tutorial3.
\returns word map file path
*/
std::string & getWordMapFilePath() { std::string & getWordMapFilePath() {
return _wordMapFilePath; return _wordMapFilePath;
} }
/*! Getter for hashed index file path.
For more information see \ref tutorial3.
\returns hashed index file path
*/
std::string & getHashedIndexFilePath() { std::string & getHashedIndexFilePath() {
return _hashedIndexFilePath; return _hashedIndexFilePath;
} }
/*! Getter for markers file path.
For more information see \ref tutorial3.
\returns markers file path
*/
std::string & getMarkersFilePath() { std::string & getMarkersFilePath() {
return _markersFilePath; return _markersFilePath;
} }
/*! Getter for html tags file path.
For more information see \ref tutorial3.
\returns html tags file path
*/
std::string & getHtmlTagsFilePath() { std::string & getHtmlTagsFilePath() {
return _htmlTagsFilePath; return _htmlTagsFilePath;
} }
/*! Getter for space symbols file path.
For more information see \ref tutorial3.
\returns space symbols file path
*/
std::string & getSpaceSymbolsFilePath() { std::string & getSpaceSymbolsFilePath() {
return _spaceSymbolsFilePath; return _spaceSymbolsFilePath;
} }
/*! Getter for stop symbols enabled parameter.
For more information see \ref tutorial3.
\returns true if stop words are enabled
*/
bool & isStopWordsEnabled() { bool & isStopWordsEnabled() {
return _stopWordsEnabled; return _stopWordsEnabled;
} }
/*! Getter for stop words file path.
For more information see \ref tutorial3.
\returns stop words file path
*/
std::string & getStopWordsFilePath() { std::string & getStopWordsFilePath() {
return _stopWordsFilePath; return _stopWordsFilePath;
} }
/*! Getter for named entities file path.
For more information see \ref tutorial3.
\returns named entities file path
*/
std::string & getNamedEntitiesFilePath() { std::string & getNamedEntitiesFilePath() {
return _namedEntitiesFilePath; return _namedEntitiesFilePath;
} }
/*! Getter for stop symbols file path.
For more information see \ref tutorial3.
\returns stop symbols file path
*/
std::string & getStopSymbolsFilePath() { std::string & getStopSymbolsFilePath() {
return _stopSymbolsFilePath; return _stopSymbolsFilePath;
} }
/*! Getter for anubis threshold. Anubis search results with
scores below that threshold will be discarded.
\returns anubis threshold
*/
double getAnubisThreshold() { double getAnubisThreshold() {
return _anubisThreshold; return _anubisThreshold;
} }