finished documentation

2015-05-01 14:52:53 +02:00 · 2015-05-01 14:52:53 +02:00 · abbd5b1ae8
commit abbd5b1ae8
parent 9e550ca1cf
19 changed files with 410 additions and 34 deletions
--- a/Doxyfile.in
+++ b/Doxyfile.in
@ -1355,18 +1355,6 @@ GENERATE_XML           = NO

 XML_OUTPUT             = xml

-# The XML_SCHEMA tag can be used to specify an XML schema,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_SCHEMA             =
-
-# The XML_DTD tag can be used to specify an XML DTD,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_DTD                =
-
 # If the XML_PROGRAMLISTING tag is set to YES Doxygen will
 # dump the program listings (including syntax highlighting
 # and cross-referencing information) to the XML output. Note that
--- a/concordia/anubis_search_result.hpp
+++ b/concordia/anubis_search_result.hpp
@ -43,6 +43,7 @@ public:
    bool operator > (const AnubisSearchResult & other) const {
        return (_score > other.getScore());
    }
+
 private:
    SUFFIX_MARKER_TYPE _exampleId;

--- a/concordia/common/utils.hpp
+++ b/concordia/common/utils.hpp
@ -20,7 +20,6 @@

 class Utils {
 public:
-
    /*! Constructor
    */
    explicit Utils();
--- a/concordia/concordia.hpp
+++ b/concordia/concordia.hpp
@ -19,6 +19,18 @@

 /*!
  The Concordia class is the main access point to the library.
+  This class holds references to three out of four main data
+  structures used by Concordia: hashed index, markers array
+  and suffix array. Word map is maintained by the class
+  HashGenerator. Concordia has references to:
+  - the hash generator (HashGenerator)
+  - concordia index (ConcordiaIndex)
+  - concordia searcher (ConcordiaSearcher)
+  - configuration (ConcordiaConfig)
+
+  Whenever it is necessary, the data structures and tools
+  held by Concordia are passed by smart pointers to methods which
+  carry out specific functionalities.

 */

@ -85,7 +97,7 @@ public:

    /*! Loads HDD stored index files to RAM and generates
        suffix array based on RAM stored data structures.
-        For more info see \ref tutorial2.
+        For more info see \ref tutorial2.        
      \throws ConcordiaException
    */
    void loadRAMIndexFromDisk() throw(ConcordiaException);
--- a/concordia/concordia_index.hpp
+++ b/concordia/concordia_index.hpp
@ -14,12 +14,22 @@
 #include <divsufsort.h>

 /*!
-  Class for creating and maintaining the index.
+  Class for creating and maintaining the index. This class
+  does not hold the index data structures but only operates on
+  them when they are passed to ConcordiaIndex methods by
+  smart pointers. This class only remembers paths to two
+  files: hashed index and markers array, which are backups
+  of the respective data structures on HDD.

 */

 class ConcordiaIndex {
 public:
+    /*! Constructor.
+      \param hashedIndexFilePath path to the hashed index file
+      \param markersFilePath path to the markers array
+      \throws ConcordiaException
+    */
    explicit ConcordiaIndex(const std::string & hashedIndexFilePath,
                            const std::string & markersFilePath)
                                    throw(ConcordiaException);
@ -28,23 +38,50 @@ public:
    */
    virtual ~ConcordiaIndex();

+    /*! Adds an Example to the index. Example is first hashed using
+        the hash generator passed to this method. Then, hashed index
+        and markers array (also passed to this method) are appended
+        with the hashed example. At the same time, HDD versions of these
+        two data structures are also appended with the same example.
+      \param hashGenerator hash generator to be used to prepare the hash
+             of the example
+      \param T RAM-based hash index to be appended to
+      \param markers RAM-based markers array to be appended to
+      \param example example to be added to index
+      \throws ConcordiaException
+    */
    void addExample(
                boost::shared_ptr<HashGenerator> hashGenerator,
                boost::shared_ptr<std::vector<sauchar_t> > T,
                boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
                const Example & example);

+    /*! Adds multiple examples to the index. Examples are first hashed using
+        the hash generator passed to this method. Then, hashed index
+        and markers array (also passed to this method) are appended
+        with the hashed examples. At the same time, HDD versions of these
+        two data structures are also appended with the same examples.
+      \param hashGenerator hash generator to be used to prepare the hash
+             of the example
+      \param T RAM-based hash index to be appended to
+      \param markers RAM-based markers array to be appended to
+      \param examples vector of examples to be added to index
+      \throws ConcordiaException
+    */
    void addAllExamples(
                boost::shared_ptr<HashGenerator> hashGenerator,
                boost::shared_ptr<std::vector<sauchar_t> > T,
                boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
                const std::vector<Example> & examples);

+    /*! Generates suffix array based on the passed hashed index.
+      \returns the generated suffix array
+      \throws ConcordiaException
+    */
    boost::shared_ptr<std::vector<saidx_t> > generateSuffixArray(
                boost::shared_ptr<std::vector<sauchar_t> > T);

 private:
-    // Add example to disk index and update RAM index.
    void _addSingleExample(std::ofstream & hashedIndexFile,
                std::ofstream & markersFile,
                boost::shared_ptr<HashGenerator> hashGenerator,
--- a/concordia/concordia_search_result.cpp
+++ b/concordia/concordia_search_result.cpp
@ -23,12 +23,11 @@ void ConcordiaSearchResult::sortFragments() {
              std::greater<MatchedPatternFragment>());
 }

-void ConcordiaSearchResult::computeBestOverlay(
-                                     SUFFIX_MARKER_TYPE patternSize) {
+void ConcordiaSearchResult::computeBestOverlay() {
    // the fragments are already sorted by their ends, ascending
    _checkPossibleOverlays(std::vector<MatchedPatternFragment>(),
                           -1,
-                           patternSize);
+                           _tokenVector.size());
 }

 void ConcordiaSearchResult::_checkPossibleOverlays(
--- a/concordia/concordia_search_result.hpp
+++ b/concordia/concordia_search_result.hpp
@ -8,12 +8,22 @@
 #include <string>

 /*!
-  Class representing result of concordia search.
+  Class representing result of concordia search. Contains the following
+  information:
+  - tokenized pattern which was used for searching
+  - list of longest matched fragments sorted in descending order by length
+  - the best overlay
+  - the score of the best overlay.
+  
+  For more info about concordia searching see \ref tutorial1_3.

 */

 class ConcordiaSearchResult {
 public:
+    /*! Constructor.
+      \param tokenVector tokenized patter which was used for searching
+    */
    explicit ConcordiaSearchResult(
                const std::vector<std::string> & tokenVector);

@ -21,24 +31,45 @@ public:
    */
    virtual ~ConcordiaSearchResult();

+    /*! Adds a matched pattern fragment to the list.
+      \param fragment fragment to be added
+    */
    void addFragment(const MatchedPatternFragment & fragment);

+    /*! Sorts the list of matched pattern fragments in descending order
+        by length.
+    */
    void sortFragments();

-    void computeBestOverlay(SUFFIX_MARKER_TYPE patternSize);
+    /*! Computes the best overlay by choosing appropriate fragments
+        from the fragments list. For more info see \ref tutorial1_3.
+    */
+    void computeBestOverlay();

+    /*! Getter for tokenized pattern.
+        \returns tokenized search pattern
+    */
    std::vector<std::string> getTokenVector() const {
        return _tokenVector;
    }

+    /*! Getter for all matched pattern fragments list.
+        \returns matched pattern fragments list
+    */
    std::vector<MatchedPatternFragment> getFragments() const {
        return _matchedPatternFragments;
    }

+    /*! Getter for best overlay.
+        \returns list of fragments that comprise the best overlay
+    */
    std::vector<MatchedPatternFragment> getBestOverlay() const {
        return _bestOverlay;
    }

+    /*! Getter for best overlay score.
+        \returns score of the best overlay
+    */
    double getBestOverlayScore() const {
        return _bestOverlayScore;
    }
--- a/concordia/concordia_searcher.cpp
+++ b/concordia/concordia_searcher.cpp
@ -46,7 +46,7 @@ void ConcordiaSearcher::concordiaSearch(
    }

    // compute best overlay of the pattern by matched fragments
-    result->computeBestOverlay(pattern.size());
+    result->computeBestOverlay();

    result->sortFragments();
 }
--- a/concordia/concordia_searcher.hpp
+++ b/concordia/concordia_searcher.hpp
@ -16,7 +16,8 @@
 #include <divsufsort.h>

 /*!
-  Class for searching using Concordia algorithm.
+  Class for searching using Concordia algorithm. All searches are performed
+  on data structures passed to the methods of this class by smart pointers.

 */

@ -28,6 +29,18 @@ public:
    */
    virtual ~ConcordiaSearcher();

+    /*! Performs concordia lookup on the RAM-based index.
+        This is a unique library functionality, designed
+        to facilitate Computer-Aided Translation.
+        For more info see \ref tutorial1_3.
+      \param result variable to store the result
+      \param T hashed index to search in
+      \param markers markers array for the needs of searching
+      \param SA suffix array for the needs of searching
+      \param pattern pattern to be searched in the index.
+             This pattern needs to be hashed.
+      \throws ConcordiaException
+    */
    void concordiaSearch(
                boost::shared_ptr<ConcordiaSearchResult> result,
                boost::shared_ptr<std::vector<sauchar_t> > T,
@ -36,6 +49,20 @@ public:
                const std::vector<INDEX_CHARACTER_TYPE> & pattern)
                                                     throw(ConcordiaException);

+    /*! \deprecated
+        Finds the examples from the index, whose resemblance to the
+        pattern is maximal. This method may perform very slow,
+        try using concordiaSearch instead.
+      \param config concordia config object
+             (to read the anubis threshold parameter)
+      \param T hashed index to search in
+      \param markers markers array for the needs of searching
+      \param SA suffix array for the needs of searching
+      \param pattern pattern to be searched in the index.
+             This pattern needs to be hashed.
+      \returns vector of results
+      \throws ConcordiaException
+    */
    std::vector<AnubisSearchResult> anubisSearch(
                boost::shared_ptr<ConcordiaConfig> config,
                boost::shared_ptr<std::vector<sauchar_t> > T,
@ -44,6 +71,17 @@ public:
                const std::vector<INDEX_CHARACTER_TYPE> & pattern)
                                                     throw(ConcordiaException);

+    /*! Generates map of all examples in the index which have
+        at least one word in common with the pattern. This method
+        is internally used in anubisSearch and may perform slow.
+      \param T hashed index to search in
+      \param markers markers array for the needs of searching
+      \param SA suffix array for the needs of searching
+      \param pattern pattern to be searched in the index.
+             This pattern needs to be hashed.
+      \returns generated map
+      \throws ConcordiaException
+    */
    boost::shared_ptr<TmMatchesMap> getTmMatches(
                boost::shared_ptr<std::vector<sauchar_t> > T,
                boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
@ -51,6 +89,21 @@ public:
                const std::vector<INDEX_CHARACTER_TYPE> & pattern)
                                                     throw(ConcordiaException);

+    /*! Looks for fragments in the index which have the longest
+        common prefix with the pattern. This method return the list of
+        locations of these longest fragments (as return value) and their
+        length in the length parameter. There is a tight limit on the number
+        of longest fragments (currently set to 3). This method is used in
+        conordiaSearch.
+      \param T hashed index to search in
+      \param markers markers array for the needs of searching
+      \param SA suffix array for the needs of searching
+      \param pattern pattern to be searched in the index.
+             This pattern needs to be hashed.
+      \param length the returned length of the longest fragments       
+      \returns list of locations of the longest fragments 
+      \throws ConcordiaException
+    */
    std::vector<SubstringOccurence> lcpSearch(
                    boost::shared_ptr<std::vector<sauchar_t> > T,
                    boost::shared_ptr<std::vector<SUFFIX_MARKER_TYPE> > markers,
--- a/concordia/example.hpp
+++ b/concordia/example.hpp
@ -7,11 +7,17 @@

 /*!
  Class representing a single sentence to be added into index along with its id.
+  For more info see \ref tutorial1_2.

 */

 class Example {
 public:
+    /*!
+      Constructor.
+      \param sentence sentence to be added to index
+      \param id id of this sentence
+    */
    explicit Example(const std::string & sentence,
                     const SUFFIX_MARKER_TYPE & id)
                                           throw(ConcordiaException);
@ -20,10 +26,16 @@ public:
    */
    virtual ~Example();

+    /*! Getter for sentence.
+        \return sentence
+    */
    std::string getSentence() const {
        return _sentence;
    }

+    /*! Getter for sentence id.
+        \return sentence id
+    */
    SUFFIX_MARKER_TYPE getId() const {
        return _id;
    }
--- a/concordia/hash_generator.hpp
+++ b/concordia/hash_generator.hpp
@ -14,12 +14,24 @@


 /*!
-  Class for generating a sentence hash.
+  Class for generating a sentence hash. The hash is generated from a sentence
+  given in raw string. String is first anonymized and tokenized. After these
+  operations, each token is coded as an integer, according to WordMap.
+  Resulting hash is a vector of integers.
+  
+  Sentence hashed is used when adding a sentence to index and during searching.
+  
+  HashGenerator holds an instance of WordMap, used to code tokens as integers
+  and SentenceAnonymizer, used to preprocess the sentence string.

 */

 class HashGenerator {
 public:
+    /*!
+      Constructor.
+      \param config pointer to current config object
+    */
    explicit HashGenerator(boost::shared_ptr<ConcordiaConfig> config)
                                            throw(ConcordiaException);

@ -27,11 +39,28 @@ public:
    */
    virtual ~HashGenerator();

+    /*!
+      Generates hash of a sentence.
+      \param sentence sentence to generate hash from
+      \returns vector of integers
+    */
    std::vector<INDEX_CHARACTER_TYPE> generateHash(const std::string & sentence)
                                throw(ConcordiaException);

+    /*!
+      Generates vector of tokens from a sentence. This method is internally
+      used by generateHash. However, for the sake of concordiaSearch
+      (see \ref tutorial1_3), the vector of tokens resulting from sentence
+      anonymizing and tokenization is also needed.
+      \param sentence sentence to tokenize
+      \returns vector of tokens
+    */
    std::vector<std::string> generateTokenVector(const std::string & sentence);

+
+    /*!
+        Saves the contents of current WordMap to HDD.
+    */
    void serializeWordMap();

 private:
--- a/concordia/index_searcher.hpp
+++ b/concordia/index_searcher.hpp
@ -16,18 +16,35 @@
 #include <divsufsort.h>

 /*!
-  Class for searching the index with a sentence.
+  Class for searching the index with a sentence. In all searches the sentence
+  is first hashed and then used as a query.
+  
+  IndexSearcher performs the simpleSearch on its own, but uses a
+  ConcordiaSearcher object to carry out concordiaSearch.

 */

 class IndexSearcher {
 public:
+    /*! Constructor.
+    */
    explicit IndexSearcher();

    /*! Destructor.
    */
    virtual ~IndexSearcher();

+    /*! Performs a simple substring lookup in RAM-based index.
+        For more info see \ref tutorial1_2.
+      \param hashGenerator hash generator to be used to convert
+             input sentence to a hash
+      \param T hashed index to search in
+      \param markers markers array for the needs of searching
+      \param SA suffix array for the needs of searching
+      \param pattern string pattern to be searched in the index.
+      \returns vector of occurences of the pattern in the index
+      \throws ConcordiaException
+    */
    std::vector<SubstringOccurence> simpleSearch(
                    boost::shared_ptr<HashGenerator> hashGenerator,
                    boost::shared_ptr<std::vector<sauchar_t> > T,
@ -35,6 +52,21 @@ public:
                    boost::shared_ptr<std::vector<saidx_t> > SA,
                    const std::string & pattern) throw(ConcordiaException);

+    /*! \deprecated
+        Finds the examples from the index, whose resemblance to the
+        pattern is maximal. This method may perform very slow,
+        try using concordiaSearch instead.
+      \param config concordia config object
+             (to read the anubis threshold parameter)
+      \param hashGenerator hash generator to be used to convert
+             input sentence to a hash
+      \param T hashed index to search in
+      \param markers markers array for the needs of searching
+      \param SA suffix array for the needs of searching
+      \param pattern string pattern to be searched in the index.
+      \returns vector of results
+      \throws ConcordiaException
+    */
    std::vector<AnubisSearchResult> anubisSearch(
                    boost::shared_ptr<ConcordiaConfig> config,
                    boost::shared_ptr<HashGenerator> hashGenerator,
@ -43,6 +75,19 @@ public:
                    boost::shared_ptr<std::vector<saidx_t> > SA,
                    const std::string & pattern) throw(ConcordiaException);

+    /*! Performs concordia lookup on the RAM-based index.
+        This is a unique library functionality, designed
+        to facilitate Computer-Aided Translation.
+        For more info see \ref tutorial1_3.
+      \param hashGenerator hash generator to be used to convert
+             input sentence to a hash
+      \param T hashed index to search in
+      \param markers markers array for the needs of searching
+      \param SA suffix array for the needs of searching
+      \param pattern pattern to be searched in the index.
+      \returns result of the search
+      \throws ConcordiaException
+    */
    boost::shared_ptr<ConcordiaSearchResult> concordiaSearch(
                    boost::shared_ptr<HashGenerator> hashGenerator,
                    boost::shared_ptr<std::vector<sauchar_t> > T,
--- a/concordia/interval.hpp
+++ b/concordia/interval.hpp
@ -4,12 +4,20 @@
 #include "concordia/common/config.hpp"

 /*!
-  Class representing word interval.
+  Class representing interval of a sentence, i.e. a sequence of words
+  coming from that sentence. An interval only has its start and end indexes,
+  where the start index is inclusive and end index is exclusive. For example,
+  an interval [2,5] of the sentence "This is just for testing purposes" is:
+  "just for testing".

 */

 class Interval {
 public:
+    /*! Constructor.
+      \param start start index of the interval (0-based)
+      \param end end index of the interval (0-based)
+    */
    explicit Interval(const SUFFIX_MARKER_TYPE start,
                      const SUFFIX_MARKER_TYPE end);

@ -17,14 +25,27 @@ public:
    */
    virtual ~Interval();

+    /*! Checks if this interval intersects another.
+      \param interval another interval
+      \returns true if the two intervals intersect
+    */
    bool intersects(Interval & interval);

+    /*! Getter for interval length.
+      \returns end - start
+    */
    SUFFIX_MARKER_TYPE getLength();

+    /*! Getter for interval start.
+      \returns start
+    */
    SUFFIX_MARKER_TYPE getStart() const {
        return _start;
    }

+    /*! Getter for interval end.
+      \returns end
+    */
    SUFFIX_MARKER_TYPE getEnd() const {
        return _end;
    }
--- a/concordia/matched_pattern_fragment.hpp
+++ b/concordia/matched_pattern_fragment.hpp
@ -7,10 +7,21 @@
 /*!
  Class representing matched pattern fragment in concordia search.
  This fragment can be seen as an interval of the pattern.
+  
+  This class holds information about:
+  - where the pattern fragment was matched (example id and example offset)
+  - where the fragment is located within the pattern
+    (patternOffset, matchedLength)
 */

 class MatchedPatternFragment : public Interval {
 public:
+    /*! Constructor.
+      \param exampleId id of the example where the pattern fragment was matched
+      \param exampleOffset offset of the matched fragment in the example
+      \param patternOffset offset of the matched fragment in the pattern
+      \param matchedLength length of the matched pattern
+    */
    MatchedPatternFragment(const SUFFIX_MARKER_TYPE & exampleId,
                           const SUFFIX_MARKER_TYPE & exampleOffset,
                           const SUFFIX_MARKER_TYPE & patternOffset,
@ -19,22 +30,37 @@ public:
    */
    virtual ~MatchedPatternFragment();

+    /*! Getter for example id.
+      \returns example id
+    */
    SUFFIX_MARKER_TYPE getExampleId() const {
        return _exampleId;
    }

+    /*! Getter for example offset.
+      \returns example offset
+    */
    SUFFIX_MARKER_TYPE getExampleOffset() const {
        return _exampleOffset;
    }

+    /*! Getter for pattern offset.
+      \returns pattern offset
+    */
    SUFFIX_MARKER_TYPE getPatternOffset() const {
        return _patternOffset;
    }

+    /*! Getter for matched length.
+      \returns matched fragment length
+    */
    SUFFIX_MARKER_TYPE getMatchedLength() const {
        return _matchedLength;
    }

+    /*! Operator for comparing fragments by their length.
+      \returns true if current pattern is longer than the other
+    */
    bool operator > (const MatchedPatternFragment & other) const {
        return (_matchedLength > other.getMatchedLength());
    }
--- a/concordia/regex_replacement.hpp
+++ b/concordia/regex_replacement.hpp
@ -9,15 +9,23 @@
 #include <boost/regex/icu.hpp>


-/*!
-  Class for replacing string occurences.
-
-*/

 typedef boost::error_info<struct my_tag, std::string> my_tag_error_info;

+/*!
+  Class for representing a regular expression replacement operation.
+  Holds regex pattern string for matching and replacement string for
+  replacing found matches.
+
+*/
 class RegexReplacement {
 public:
+    /*!
+      Constructor.
+        \param patternString regex pattern to match
+        \param replacement string to substitute the found match
+        \param caseSensitive case sensitivity of the pattern
+    */
    RegexReplacement(std::string patternString, std::string replacement,
                                              bool caseSensitive = true)
                                               throw(ConcordiaException);
@ -26,6 +34,10 @@ public:
    */
    virtual ~RegexReplacement();

+    /*! Applies the operation on input string.
+      \param text the input string
+      \returns altered version of the input string
+    */
    std::string apply(const std::string & text);

 private:
--- a/concordia/sentence_anonymizer.hpp
+++ b/concordia/sentence_anonymizer.hpp
@ -12,12 +12,20 @@


 /*!
-  Class for anonymizing sentence before adding to index.
-
+  Class for anonymizing sentence before generating hash.
+  This operation is is used to
+  remove unnecessary symbols and possibly words from sentences added to index
+  and search patterns. Anonymizer removes html tags, substitutes predefined symbols
+  with a single space, removes stop words (if the option is enabled), as well as
+  named entities and special symbols. All these have to be listed in files
+  (see \ref tutorial3).
 */

 class SentenceAnonymizer {
 public:
+    /*! Constructor.
+      \param config config object, holding paths to necessary files
+    */
    explicit SentenceAnonymizer(boost::shared_ptr<ConcordiaConfig> config)
                                                 throw(ConcordiaException);

@ -25,6 +33,10 @@ public:
    */
    virtual ~SentenceAnonymizer();

+    /*! Anonymizes the sentence.
+      \param sentence input sentence
+      \returns altered version of the input sentence
+    */
    std::string anonymize(const std::string & sentence);

 private:
--- a/concordia/substring_occurence.hpp
+++ b/concordia/substring_occurence.hpp
@ -6,15 +6,32 @@

 /*!
  Class representing occurence of a searched substring.
-
+  It holds the following information:
+  - id of the example where the substring was found
+  - offset of the matched substring in this example
+  - length of the example
 */

 class SubstringOccurence {
 public:
+    /*!
+      Constructor.
+
+    */
    SubstringOccurence();

+    /*!
+      Constructor taking data from a marker.
+        \param marker
+    */
    explicit SubstringOccurence(const SUFFIX_MARKER_TYPE & marker);

+    /*!
+      Constructor with three arguments.
+        \param id example id
+        \param offset offset of the substring in the example
+        \param exampleLength length of the example
+    */
    SubstringOccurence(const SUFFIX_MARKER_TYPE & id,
                                const SUFFIX_MARKER_TYPE & offset,
                                const SUFFIX_MARKER_TYPE & exampleLength);
@ -22,18 +39,30 @@ public:
    */
    virtual ~SubstringOccurence();

+    /*! Getter for example id.
+      \returns example id
+    */
    SUFFIX_MARKER_TYPE getId() const {
        return _id;
    }

+    /*! Getter for example offset
+      \returns example offset
+    */
    SUFFIX_MARKER_TYPE getOffset() const {
        return _offset;
    }

+    /*! Getter for example length.
+      \returns example length
+    */
    SUFFIX_MARKER_TYPE getExampleLength() const {
        return _exampleLength;
    }

+    /*! Setter of all the fields, based on input marker.
+      \param marker marker to read the data from
+    */
    void enterDataFromMarker(const SUFFIX_MARKER_TYPE & marker);

 private:
--- a/concordia/tm_matches.hpp
+++ b/concordia/tm_matches.hpp
@ -11,13 +11,25 @@

 /*!
  Class used within Anubis search algorithm to store partial results.
+  Holds information about mutual overlay of the pattern and found
+  example.

 */

 class TmMatches {
 public:
+    /*!
+      Constructor.
+
+    */
    TmMatches();

+    /*!
+      Constructor setting basic information.
+        \param exampleId id of found example
+        \param exampleSize size of the found example
+        \param patternSize size of the searched pattern
+    */
    TmMatches(const SUFFIX_MARKER_TYPE exampleId,
              const SUFFIX_MARKER_TYPE exampleSize,
              const SUFFIX_MARKER_TYPE patternSize);
@ -26,28 +38,74 @@ public:
    */
    virtual ~TmMatches();

+    /*!
+      Getter for score of the mutual overlay.
+        \returns score
+    */
    double getScore() const {
        return _score;
    }

+    /*!
+      Getter for the list of overlays of the example.
+        \returns example overlays list
+    */
    std::vector<Interval> getExampleIntervals() const {
        return _exampleMatchedRegions;
    }

+    /*!
+      Getter for the list of overlays of the pattern.
+        \returns pattern overlays list
+    */
    std::vector<Interval> getPatternIntervals() const {
        return _patternMatchedRegions;
    }

+    /*!
+      Getter for example id.
+        \returns example id
+    */
    SUFFIX_MARKER_TYPE getExampleId() const {
        return _exampleId;
    }

+    /*!
+      Calculates mutual overlay score in the scale [0,1].
+      Uses generalized Jaccard index for the computation.
+      Score 1 - perfect score - is assigned when the whole pattern
+      and the whole example are covered. Result of the computation
+      is stored in the score field, use getScore() to retrieve it.
+    */
    void calculateSimpleScore();

+    /*!
+      Calculates mutual overlay score in the scale [0,1].
+      Takes into account the number and the length of the
+      fragments (the fewer fragments, the better).
+      Score 1 - perfect score - is assigned when the whole pattern
+      and the whole example are covered with only one fragment.
+      Result of the computation is stored in the score field,
+      use getScore() to retrieve it.
+    */
    void calculateScore();

+    /*!
+      Adds information about covering of example. If the new
+      fragment intersects with any previous fragment, it is
+      not added.
+        \param start start of the example overlay fragment
+        \param end end of the example overlay fragment
+    */
    void addExampleInterval(int start, int end);

+    /*!
+      Adds information about covering of pattern. If the new
+      fragment intersects with any previous fragment, it is
+      not added.
+        \param start start of the pattern overlay fragment
+        \param end end of the pattern overlay fragment
+    */
    void addPatternInterval(int start, int end);

 private:
--- a/concordia/word_map.hpp
+++ b/concordia/word_map.hpp
@ -10,18 +10,30 @@
 #include <boost/serialization/map.hpp>

 /*!
-  Class representing dictionary for word to int encoding.
+  Class representing dictionary for word to integer encoding.

 */

 class WordMap {
 public:
+    /*!
+      Constructor.
+
+    */
    explicit WordMap() throw(ConcordiaException);

    /*! Destructor.
    */
    virtual ~WordMap();

+    /*!
+      Gets the integer code of a token. If the token is found in
+      the dictionary, the dictionary code is returned. If not,
+      the word is added to the dictionary and its newly created
+      code is returned.
+        \param word token to generate the code
+        \returns code of the token
+    */
    INDEX_CHARACTER_TYPE getWordCode(const std::string & word)
                                     throw(ConcordiaException);