diff --git a/Doxyfile.in b/Doxyfile.in index b088f18..704c20e 100644 --- a/Doxyfile.in +++ b/Doxyfile.in @@ -45,7 +45,7 @@ PROJECT_BRIEF = # exceed 55 pixels and the maximum width should not exceed 200 pixels. # Doxygen will copy the logo to the output directory. -PROJECT_LOGO = +PROJECT_LOGO = ../concordia.png # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/concordia-console/concordia-console.cpp b/concordia-console/concordia-console.cpp index a0d1d5a..e670f58 100644 --- a/concordia-console/concordia-console.cpp +++ b/concordia-console/concordia-console.cpp @@ -282,7 +282,7 @@ int main(int argc, char** argv) { if (totalTimeElapsed == 0) { totalTimeElapsed++; } - + double totalSpeed = static_cast(1000 * lineCount / totalTimeElapsed); std::cout << "\tReading finished. Read and added to index " diff --git a/concordia.png b/concordia.png new file mode 100644 index 0000000..52ca27e Binary files /dev/null and b/concordia.png differ diff --git a/concordia/anubis_search_result.hpp b/concordia/anubis_search_result.hpp index bb87efd..5a436e4 100644 --- a/concordia/anubis_search_result.hpp +++ b/concordia/anubis_search_result.hpp @@ -4,12 +4,17 @@ #include "concordia/common/config.hpp" /*! - Class representing an example found by anubis search. + Class representing an example found by anubis search. Contains + the id of the example and anubis score of the search. */ class AnubisSearchResult { public: + /*! Constructor. + \param exampleId the id of found example + \param score score of this example + */ explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId, const double score); @@ -17,14 +22,24 @@ public: */ virtual ~AnubisSearchResult(); + /*! Getter for example id. + \returns example id + */ SUFFIX_MARKER_TYPE getExampleId() const { return _exampleId; } + /*! Getter for anubis score. + \returns anubis score of the example + */ double getScore() const { return _score; } + /*! Operator "greater than", used to sort objects of this class. + \returns true if the score of the current result is larger than + the score of another result + */ bool operator > (const AnubisSearchResult & other) const { return (_score > other.getScore()); } diff --git a/concordia/common/text_utils.hpp b/concordia/common/text_utils.hpp index 98d0087..de83d1e 100644 --- a/concordia/common/text_utils.hpp +++ b/concordia/common/text_utils.hpp @@ -11,8 +11,13 @@ */ class TextUtils { public: + /*! Constructor + */ + TextUtils(); + /*! Method for ensuring singleton. + */ static TextUtils & getInstance() { static TextUtils instance; // Guaranteed to be destroyed. // Instantiated on first use. diff --git a/concordia/common/utils.hpp b/concordia/common/utils.hpp index 3bf6d31..587c97b 100644 --- a/concordia/common/utils.hpp +++ b/concordia/common/utils.hpp @@ -13,61 +13,144 @@ #include "concordia/matched_pattern_fragment.hpp" #include +/*! + Class offering a range of simple utility methods. + +*/ + class Utils { public: + + /*! Constructor + */ explicit Utils(); /*! Destructor. */ virtual ~Utils(); + /*! Writes an index character to a binary file. + The character is written to the file as bytes, not necessarily in + the order in which they come in the character. + \param file file to write the character to + \param character character to write + */ static void writeIndexCharacter(std::ofstream & file, INDEX_CHARACTER_TYPE character); + /*! Writes a marker to a binary file. + The marker is written to the file as bytes, not necessarily in + the order in which they come in the marker. + \param file file to write the marker to + \param marker marker to write + */ static void writeMarker(std::ofstream & file, SUFFIX_MARKER_TYPE marker); + /*! Reads an index character from a binary file. + Reading restores the order of the bytes in the original + character, which was written to the file. + \param file file to read the character from + \returns read character + */ static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file); + /*! Reads a marker from a binary file. + Reading restores the order of the bytes in the original + marker, which was written to the file. + \param file file to read the marker from + \returns read marker + */ static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file); + /*! Converts a vector of index characters to higher resolution array. + \param input vector of index characters + \returns array of smaller parts of characters + */ static sauchar_t * indexVectorToSaucharArray( const std::vector & input); + /*! Converts a vector of index characters to higher resolution vector. + \param input vector of index characters + \returns vector of smaller parts of characters + */ static std::vector indexVectorToSaucharVector( const std::vector & input); + /*! Appends an index character to a high resolution vector. + \param vector vector to be appended to + \param character character to append + */ static void appendCharToSaucharVector( boost::shared_ptr > vector, INDEX_CHARACTER_TYPE character); + /*! Appends an index character to a high resolution vector. + \param vector vector to be appended to + \param character character to append + */ static void appendCharToSaucharVector( std::vector & vector, INDEX_CHARACTER_TYPE character); + /*! Prints a given vector to standard output. + \param vector vector to be printed + */ template static void printVector(const std::vector & vector); + /*! Retrieves sentence id from a marker. + \param marker input marker + \returns sentence id + */ static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker); + /*! Retrieves offset from a marker. + \param marker input marker + \returns offset + */ static SUFFIX_MARKER_TYPE getOffsetFromMarker(SUFFIX_MARKER_TYPE marker); + /*! Retrieves example length from a marker. + \param marker input marker + \returns example length + */ static SUFFIX_MARKER_TYPE getLengthFromMarker(SUFFIX_MARKER_TYPE marker); + /*! Creates a marker from given data. + \param id sentence id + \param offset offset + \param length example length + \returns generated marker + */ static SUFFIX_MARKER_TYPE createMarker(SUFFIX_MARKER_TYPE id, SUFFIX_MARKER_TYPE offset, SUFFIX_MARKER_TYPE length); + /*! Computes overlay score based on a list of non-intersecting intervals. + \param intervalList list of the intervals + \param sentenceSize the total size of the sentence (or pattern) + \param k significance factor. When set to 1, the significance is neutral. + \returns score + */ static double getLogarithmicOverlay( const std::vector & intervalList, SUFFIX_MARKER_TYPE sentenceSize, double k); + /*! Computes overlay score based on a list of non-intersecting fragments. + \param intervalList list of the intervals + \param sentenceSize the total size of the sentence (or pattern) + \param k significance factor. When set to 1, the significance is neutral. + \returns score + */ static double getLogarithmicOverlay( const std::vector & fragmentList, SUFFIX_MARKER_TYPE patternSize, double k); + /*! Field holding the maximum sentence size allowed in the index. + */ static SUFFIX_MARKER_TYPE maxSentenceSize; private: diff --git a/concordia/concordia.hpp b/concordia/concordia.hpp index 1ae4187..46f9349 100644 --- a/concordia/concordia.hpp +++ b/concordia/concordia.hpp @@ -39,23 +39,61 @@ public: */ std::string & getVersion(); + /*! Adds an Example to the index. + \param example example to be added + \throws ConcordiaException + */ void addExample(const Example & example) throw(ConcordiaException); + /*! Adds multiple examples to the index. + \param examples vector of examples to be added + \throws ConcordiaException + */ void addAllExamples(const std::vector & examples) throw(ConcordiaException); + /*! Performs a simple substring lookup on the index. + For more info see \ref tutorial1_2. + \param pattern pattern to be searched in the index + \returns vector of matched results + \throws ConcordiaException + */ std::vector simpleSearch(const std::string & pattern) throw(ConcordiaException); + /*! \deprecated + Finds the examples from the index, whose resemblance to the + pattern is maximal. This method may perform very slow, + try using concordiaSearch instead. + \param pattern pattern to be searched in the index + \returns vector of anubis results + \throws ConcordiaException + */ std::vector anubisSearch(const std::string & pattern) throw(ConcordiaException); + /*! Performs concordia lookup on the index. This is a unique library + functionality, designed to facilitate Computer-Aided Translation. + For more info see \ref tutorial1_3. + \param pattern pattern to be searched in the index + \returns concordia result + \throws ConcordiaException + */ boost::shared_ptr concordiaSearch( const std::string & pattern) throw(ConcordiaException); + /*! Loads HDD stored index files to RAM and generates + suffix array based on RAM stored data structures. + For more info see \ref tutorial2. + \throws ConcordiaException + */ void loadRAMIndexFromDisk() throw(ConcordiaException); + /*! Generates suffix array based on RAM stored data structures. + For more info see \ref tutorial2. + \throws ConcordiaException + */ void refreshSAfromRAM() throw(ConcordiaException); private: diff --git a/concordia/concordia_config.hpp b/concordia/concordia_config.hpp index cc8e4c3..983665e 100644 --- a/concordia/concordia_config.hpp +++ b/concordia/concordia_config.hpp @@ -24,42 +24,82 @@ public: */ virtual ~ConcordiaConfig(); + /*! Getter for word map file path. + For more information see \ref tutorial3. + \returns word map file path + */ std::string & getWordMapFilePath() { return _wordMapFilePath; } + /*! Getter for hashed index file path. + For more information see \ref tutorial3. + \returns hashed index file path + */ std::string & getHashedIndexFilePath() { return _hashedIndexFilePath; } + /*! Getter for markers file path. + For more information see \ref tutorial3. + \returns markers file path + */ std::string & getMarkersFilePath() { return _markersFilePath; } + /*! Getter for html tags file path. + For more information see \ref tutorial3. + \returns html tags file path + */ std::string & getHtmlTagsFilePath() { return _htmlTagsFilePath; } + /*! Getter for space symbols file path. + For more information see \ref tutorial3. + \returns space symbols file path + */ std::string & getSpaceSymbolsFilePath() { return _spaceSymbolsFilePath; } + /*! Getter for stop symbols enabled parameter. + For more information see \ref tutorial3. + \returns true if stop words are enabled + */ bool & isStopWordsEnabled() { return _stopWordsEnabled; } + /*! Getter for stop words file path. + For more information see \ref tutorial3. + \returns stop words file path + */ std::string & getStopWordsFilePath() { return _stopWordsFilePath; } + /*! Getter for named entities file path. + For more information see \ref tutorial3. + \returns named entities file path + */ std::string & getNamedEntitiesFilePath() { return _namedEntitiesFilePath; } + /*! Getter for stop symbols file path. + For more information see \ref tutorial3. + \returns stop symbols file path + */ std::string & getStopSymbolsFilePath() { return _stopSymbolsFilePath; } + /*! Getter for anubis threshold. Anubis search results with + scores below that threshold will be discarded. + \returns anubis threshold + */ double getAnubisThreshold() { return _anubisThreshold; }