171 lines
5.8 KiB
C++
171 lines
5.8 KiB
C++
#ifndef UTILS_HDR
|
|
#define UTILS_HDR
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
#include <boost/foreach.hpp>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <vector>
|
|
|
|
#include "concordia/common/config.hpp"
|
|
#include "concordia/concordia_exception.hpp"
|
|
#include "concordia/interval.hpp"
|
|
#include "concordia/matched_pattern_fragment.hpp"
|
|
#include <divsufsort.h>
|
|
|
|
/*!
|
|
Class offering a range of simple utility methods.
|
|
|
|
*/
|
|
|
|
class Utils {
|
|
public:
|
|
|
|
/*! Constructor
|
|
*/
|
|
explicit Utils();
|
|
|
|
/*! Destructor.
|
|
*/
|
|
virtual ~Utils();
|
|
|
|
/*! Writes an index character to a binary file.
|
|
The character is written to the file as bytes, not necessarily in
|
|
the order in which they come in the character.
|
|
\param file file to write the character to
|
|
\param character character to write
|
|
*/
|
|
static void writeIndexCharacter(std::ofstream & file,
|
|
INDEX_CHARACTER_TYPE character);
|
|
|
|
/*! Writes a marker to a binary file.
|
|
The marker is written to the file as bytes, not necessarily in
|
|
the order in which they come in the marker.
|
|
\param file file to write the marker to
|
|
\param marker marker to write
|
|
*/
|
|
static void writeMarker(std::ofstream & file,
|
|
SUFFIX_MARKER_TYPE marker);
|
|
|
|
/*! Reads an index character from a binary file.
|
|
Reading restores the order of the bytes in the original
|
|
character, which was written to the file.
|
|
\param file file to read the character from
|
|
\returns read character
|
|
*/
|
|
static INDEX_CHARACTER_TYPE readIndexCharacter(std::ifstream & file);
|
|
|
|
/*! Reads a marker from a binary file.
|
|
Reading restores the order of the bytes in the original
|
|
marker, which was written to the file.
|
|
\param file file to read the marker from
|
|
\returns read marker
|
|
*/
|
|
static SUFFIX_MARKER_TYPE readMarker(std::ifstream & file);
|
|
|
|
/*! Converts a vector of index characters to higher resolution array.
|
|
\param input vector of index characters
|
|
\returns array of smaller parts of characters
|
|
*/
|
|
static sauchar_t * indexVectorToSaucharArray(
|
|
const std::vector<INDEX_CHARACTER_TYPE> & input);
|
|
|
|
/*! Converts a vector of index characters to higher resolution vector.
|
|
\param input vector of index characters
|
|
\returns vector of smaller parts of characters
|
|
*/
|
|
static std::vector<sauchar_t> indexVectorToSaucharVector(
|
|
const std::vector<INDEX_CHARACTER_TYPE> & input);
|
|
|
|
/*! Appends an index character to a high resolution vector.
|
|
\param vector vector to be appended to
|
|
\param character character to append
|
|
*/
|
|
static void appendCharToSaucharVector(
|
|
boost::shared_ptr<std::vector<sauchar_t> > vector,
|
|
INDEX_CHARACTER_TYPE character);
|
|
|
|
/*! Appends an index character to a high resolution vector.
|
|
\param vector vector to be appended to
|
|
\param character character to append
|
|
*/
|
|
static void appendCharToSaucharVector(
|
|
std::vector<sauchar_t> & vector,
|
|
INDEX_CHARACTER_TYPE character);
|
|
|
|
/*! Prints a given vector to standard output.
|
|
\param vector vector to be printed
|
|
*/
|
|
template <typename T>
|
|
static void printVector(const std::vector<T> & vector);
|
|
|
|
/*! Retrieves sentence id from a marker.
|
|
\param marker input marker
|
|
\returns sentence id
|
|
*/
|
|
static SUFFIX_MARKER_TYPE getIdFromMarker(SUFFIX_MARKER_TYPE marker);
|
|
|
|
/*! Retrieves offset from a marker.
|
|
\param marker input marker
|
|
\returns offset
|
|
*/
|
|
static SUFFIX_MARKER_TYPE getOffsetFromMarker(SUFFIX_MARKER_TYPE marker);
|
|
|
|
/*! Retrieves example length from a marker.
|
|
\param marker input marker
|
|
\returns example length
|
|
*/
|
|
static SUFFIX_MARKER_TYPE getLengthFromMarker(SUFFIX_MARKER_TYPE marker);
|
|
|
|
/*! Creates a marker from given data.
|
|
\param id sentence id
|
|
\param offset offset
|
|
\param length example length
|
|
\returns generated marker
|
|
*/
|
|
static SUFFIX_MARKER_TYPE createMarker(SUFFIX_MARKER_TYPE id,
|
|
SUFFIX_MARKER_TYPE offset,
|
|
SUFFIX_MARKER_TYPE length);
|
|
|
|
/*! Computes overlay score based on a list of non-intersecting intervals.
|
|
\param intervalList list of the intervals
|
|
\param sentenceSize the total size of the sentence (or pattern)
|
|
\param k significance factor. When set to 1, the significance is neutral.
|
|
\returns score
|
|
*/
|
|
static double getLogarithmicOverlay(
|
|
const std::vector<Interval> & intervalList,
|
|
SUFFIX_MARKER_TYPE sentenceSize,
|
|
double k);
|
|
|
|
/*! Computes overlay score based on a list of non-intersecting fragments.
|
|
\param intervalList list of the intervals
|
|
\param sentenceSize the total size of the sentence (or pattern)
|
|
\param k significance factor. When set to 1, the significance is neutral.
|
|
\returns score
|
|
*/
|
|
static double getLogarithmicOverlay(
|
|
const std::vector<MatchedPatternFragment> & fragmentList,
|
|
SUFFIX_MARKER_TYPE patternSize,
|
|
double k);
|
|
|
|
/*! Field holding the maximum sentence size allowed in the index.
|
|
*/
|
|
static SUFFIX_MARKER_TYPE maxSentenceSize;
|
|
|
|
private:
|
|
static void _insertCharToSaucharArray(sauchar_t * array,
|
|
INDEX_CHARACTER_TYPE character, int pos);
|
|
|
|
static int _idBytes;
|
|
};
|
|
|
|
template <typename T>
|
|
void Utils::printVector(const std::vector<T> & vector) {
|
|
for (int i = 0; i < vector.size(); i++) {
|
|
std::cout << static_cast<int>(vector.at(i)) << " ";
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
#endif
|