concordia-library/concordia/tm_matches.cpp
rjawor 8a38831306 logarithmic score
Former-commit-id: ec2704b3a206cc39ed42d19620bef6ce0fedbc7e
2014-03-14 12:05:06 +01:00

82 lines
2.6 KiB
C++

#include "concordia/tm_matches.hpp"
#include <boost/foreach.hpp>
#include <math.h>
TmMatches::TmMatches(const SUFFIX_MARKER_TYPE exampleId,
const unsigned char exampleSize,
const unsigned char patternSize):
_exampleId(exampleId),
_exampleSize(exampleSize),
_patternSize(patternSize) {
}
TmMatches::~TmMatches() {
}
void TmMatches::calculateScore() {
double exampleOverlay = _getLogarithmicOverlay(_exampleMatchedRegions,
_exampleSize, 1.0);
double patternOverlay = _getLogarithmicOverlay(_patternMatchedRegions,
_patternSize, 2.0);
_score = (exampleOverlay + patternOverlay) / 2.0;
}
void TmMatches::calculateSimpleScore() {
unsigned char exampleMatchedLength = 0;
BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) {
exampleMatchedLength += interval.getLength();
}
unsigned char patternMatchedLength = 0;
BOOST_FOREACH(Interval & interval, _patternMatchedRegions) {
patternMatchedLength += interval.getLength();
}
_score = (double) (exampleMatchedLength + patternMatchedLength)
/ (double) (_exampleSize + _patternSize);
}
void TmMatches::addExampleInterval(int start, int end) {
if (!_alreadyIntersects(_exampleMatchedRegions, start, end)) {
_exampleMatchedRegions.push_back(new Interval(start, end));
}
}
void TmMatches::addPatternInterval(int start, int end) {
if (!_alreadyIntersects(_patternMatchedRegions, start, end)) {
_patternMatchedRegions.push_back(new Interval(start, end));
}
}
bool TmMatches::_alreadyIntersects(
boost::ptr_vector<Interval> intervalList,
int start, int end) {
Interval * tempInterval = new Interval(start, end);
BOOST_FOREACH(Interval & oldInterval, intervalList) {
if (oldInterval.intersects(*tempInterval)) {
delete tempInterval;
return true;
}
}
delete tempInterval;
return false;
}
double TmMatches::_getLogarithmicOverlay(boost::ptr_vector<Interval> intervalList,
unsigned char sentenceSize,
double k) {
double overlayScore = 0;
BOOST_FOREACH(Interval & interval, intervalList) {
double intervalOverlay = (double) interval.getLength() / (double) sentenceSize;
double significanceFactor = pow(log(interval.getLength()+1) / log(sentenceSize+1), 1/k);
overlayScore += intervalOverlay * significanceFactor;
}
return overlayScore;
}