#include "concordia/tm_matches.hpp" #include #include TmMatches::TmMatches() { } TmMatches::TmMatches(const SUFFIX_MARKER_TYPE exampleId, const SUFFIX_MARKER_TYPE exampleSize, const SUFFIX_MARKER_TYPE patternSize): _exampleId(exampleId), _exampleSize(exampleSize), _patternSize(patternSize), _score(0) { } TmMatches::~TmMatches() { } void TmMatches::calculateScore() { double exampleOverlay = _getLogarithmicOverlay(_exampleMatchedRegions, _exampleSize, 1.0); double patternOverlay = _getLogarithmicOverlay(_patternMatchedRegions, _patternSize, 2.0); _score = (exampleOverlay + patternOverlay) / 2.0; } void TmMatches::calculateSimpleScore() { unsigned char exampleMatchedLength = 0; BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) { exampleMatchedLength += interval.getLength(); } unsigned char patternMatchedLength = 0; BOOST_FOREACH(Interval & interval, _patternMatchedRegions) { patternMatchedLength += interval.getLength(); } _score = static_cast(exampleMatchedLength + patternMatchedLength) / static_cast(_exampleSize + _patternSize); } void TmMatches::addExampleInterval(int start, int end) { if (!_alreadyIntersects(_exampleMatchedRegions, start, end)) { _exampleMatchedRegions.push_back(Interval(start, end)); } } void TmMatches::addPatternInterval(int start, int end) { if (!_alreadyIntersects(_patternMatchedRegions, start, end)) { _patternMatchedRegions.push_back(Interval(start, end)); } } bool TmMatches::_alreadyIntersects( const vector & intervalList, int start, int end) { Interval tempInterval(start, end); BOOST_FOREACH(Interval oldInterval, intervalList) { if (oldInterval.intersects(tempInterval)) { return true; } } return false; } double TmMatches::_getLogarithmicOverlay( const vector & intervalList, SUFFIX_MARKER_TYPE sentenceSize, double k) { double overlayScore = 0; BOOST_FOREACH(Interval interval, intervalList) { double intervalOverlay = static_cast(interval.getLength()) / static_cast(sentenceSize); double significanceFactor = pow(log(interval.getLength()+1) / log(sentenceSize+1), 1/k); overlayScore += intervalOverlay * significanceFactor; } return overlayScore; }