concordia-library/concordia/common/utils.hpp

171 lines
5.8 KiB
C++
Raw Normal View History

#ifndef UTILS_HDR
#define UTILS_HDR
#include <boost/shared_ptr.hpp>
#include <boost/foreach.hpp>
#include <fstream>
#include <iostream>
#include <vector>
#include "concordia/common/config.hpp"
#include "concordia/concordia_exception.hpp"
#include "concordia/interval.hpp"
#include "concordia/matched_pattern_fragment.hpp"
#include <divsufsort.h>
2015-04-30 22:22:54 +02:00
/*!
Class offering a range of simple utility methods.
*/
class Utils {
public:
2015-04-30 22:22:54 +02:00
/*! Constructor
*/
explicit Utils();
/*! Destructor.
*/
virtual ~Utils();
2015-04-30 22:22:54 +02:00
/*! Writes an index character to a binary file.
The character is written to the file as bytes, not necessarily in
the order in which they come in the character.
\param file file to write the character to
\param character character to write
*/
static void writeIndexCharacter(std::ofstream & file,
INDEX_CHARACTER_TYPE character);
2015-04-30 22:22:54 +02:00
/*! Writes a marker to a binary file.
The marker is written to the file as bytes, not necessarily in
the order in which they come in the marker.
\param file file to write the marker to
\param marker marker to write
*/
static void writeMarker(std::ofstream & file,
SUFFIX_MARKER_TYPE marker);
2015-04-30 22:22:54 +02:00
/*! Reads an index character from a binary file.
Reading restores the order of the bytes in the original
character, which was written to the file.
\param file file to read the character from
\returns read character
*/
static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file);
2015-04-30 22:22:54 +02:00
/*! Reads a marker from a binary file.
Reading restores the order of the bytes in the original
marker, which was written to the file.
\param file file to read the marker from
\returns read marker
*/
static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file);
2015-04-30 22:22:54 +02:00
/*! Converts a vector of index characters to higher resolution array.
\param input vector of index characters
\returns array of smaller parts of characters
*/
static sauchar_t * indexVectorToSaucharArray(
const std::vector<INDEX_CHARACTER_TYPE> & input);
2015-04-30 22:22:54 +02:00
/*! Converts a vector of index characters to higher resolution vector.
\param input vector of index characters
\returns vector of smaller parts of characters
*/
static std::vector<sauchar_t> indexVectorToSaucharVector(
const std::vector<INDEX_CHARACTER_TYPE> & input);
2015-04-30 22:22:54 +02:00
/*! Appends an index character to a high resolution vector.
\param vector vector to be appended to
\param character character to append
*/
static void appendCharToSaucharVector(
boost::shared_ptr<std::vector<sauchar_t> > vector,
INDEX_CHARACTER_TYPE character);
2015-04-30 22:22:54 +02:00
/*! Appends an index character to a high resolution vector.
\param vector vector to be appended to
\param character character to append
*/
static void appendCharToSaucharVector(
std::vector<sauchar_t> & vector,
INDEX_CHARACTER_TYPE character);
2015-04-30 22:22:54 +02:00
/*! Prints a given vector to standard output.
\param vector vector to be printed
*/
template <typename T>
static void printVector(const std::vector<T> & vector);
2015-04-30 22:22:54 +02:00
/*! Retrieves sentence id from a marker.
\param marker input marker
\returns sentence id
*/
static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker);
2015-04-30 22:22:54 +02:00
/*! Retrieves offset from a marker.
\param marker input marker
\returns offset
*/
static SUFFIX_MARKER_TYPE getOffsetFromMarker(SUFFIX_MARKER_TYPE marker);
2015-04-30 22:22:54 +02:00
/*! Retrieves example length from a marker.
\param marker input marker
\returns example length
*/
static SUFFIX_MARKER_TYPE getLengthFromMarker(SUFFIX_MARKER_TYPE marker);
2015-04-30 22:22:54 +02:00
/*! Creates a marker from given data.
\param id sentence id
\param offset offset
\param length example length
\returns generated marker
*/
static SUFFIX_MARKER_TYPE createMarker(SUFFIX_MARKER_TYPE id,
SUFFIX_MARKER_TYPE offset,
SUFFIX_MARKER_TYPE length);
2015-04-30 22:22:54 +02:00
/*! Computes overlay score based on a list of non-intersecting intervals.
\param intervalList list of the intervals
\param sentenceSize the total size of the sentence (or pattern)
\param k significance factor. When set to 1, the significance is neutral.
\returns score
*/
static double getLogarithmicOverlay(
const std::vector<Interval> & intervalList,
SUFFIX_MARKER_TYPE sentenceSize,
double k);
2015-04-30 22:22:54 +02:00
/*! Computes overlay score based on a list of non-intersecting fragments.
\param intervalList list of the intervals
\param sentenceSize the total size of the sentence (or pattern)
\param k significance factor. When set to 1, the significance is neutral.
\returns score
*/
static double getLogarithmicOverlay(
const std::vector<MatchedPatternFragment> & fragmentList,
SUFFIX_MARKER_TYPE patternSize,
double k);
2015-04-30 22:22:54 +02:00
/*! Field holding the maximum sentence size allowed in the index.
*/
static SUFFIX_MARKER_TYPE maxSentenceSize;
private:
static void _insertCharToSaucharArray(sauchar_t * array,
INDEX_CHARACTER_TYPE character, int pos);
static int _idBytes;
};
template <typename T>
void Utils::printVector(const std::vector<T> & vector) {
for (int i = 0; i < vector.size(); i++) {
std::cout << static_cast<int>(vector.at(i)) << " ";
}
std::cout << std::endl;
}
#endif