logarithmic score

Former-commit-id: ec2704b3a206cc39ed42d19620bef6ce0fedbc7e
This commit is contained in:
rjawor 2014-03-14 12:05:06 +01:00
parent 4b921decae
commit 8a38831306
3 changed files with 53 additions and 14 deletions

View File

@ -17,4 +17,32 @@ BOOST_AUTO_TEST_CASE( TmMatchesSimpleScore1 )
BOOST_CHECK_EQUAL(tmMatches.getScore(),0.35);
}
BOOST_AUTO_TEST_CASE( TmMatchesLogarithmicScore1 )
{
TmMatches tmMatches(0,10,10);
tmMatches.calculateScore();
BOOST_CHECK_EQUAL(tmMatches.getScore(),0.0);
}
BOOST_AUTO_TEST_CASE( TmMatchesLogarithmicScore2 )
{
TmMatches tmMatches(0,10,10);
tmMatches.addPatternInterval(0,10);
tmMatches.addExampleInterval(0,10);
tmMatches.calculateScore();
BOOST_CHECK_EQUAL(tmMatches.getScore(),1.0);
}
BOOST_AUTO_TEST_CASE( TmMatchesLogarithmicScore3 )
{
TmMatches tmMatches(0,10,10);
tmMatches.addPatternInterval(2,5);
tmMatches.addExampleInterval(1,5);
tmMatches.calculateScore();
BOOST_CHECK_CLOSE(tmMatches.getScore(),0.2482, 0.1);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -1,6 +1,6 @@
#include "concordia/tm_matches.hpp"
#include <boost/foreach.hpp>
#include <math.h>
TmMatches::TmMatches(const SUFFIX_MARKER_TYPE exampleId,
const unsigned char exampleSize,
@ -14,20 +14,13 @@ TmMatches::~TmMatches() {
}
void TmMatches::calculateScore() {
/* TODO logarithmic function
unsigned char exampleMatchedLength = 0;
BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) {
exampleMatchedLength += interval.getLength();
}
unsigned char patternMatchedLength = 0;
BOOST_FOREACH(Interval & interval, _patternMatchedRegions) {
patternMatchedLength += interval.getLength();
}
_score = (double) (exampleMatchedLength + patternMatchedLength)
/ (double) (_exampleSize + _patternSize);
*/
double exampleOverlay = _getLogarithmicOverlay(_exampleMatchedRegions,
_exampleSize, 1.0);
double patternOverlay = _getLogarithmicOverlay(_patternMatchedRegions,
_patternSize, 2.0);
_score = (exampleOverlay + patternOverlay) / 2.0;
}
void TmMatches::calculateSimpleScore() {
@ -72,3 +65,17 @@ bool TmMatches::_alreadyIntersects(
return false;
}
double TmMatches::_getLogarithmicOverlay(boost::ptr_vector<Interval> intervalList,
unsigned char sentenceSize,
double k) {
double overlayScore = 0;
BOOST_FOREACH(Interval & interval, intervalList) {
double intervalOverlay = (double) interval.getLength() / (double) sentenceSize;
double significanceFactor = pow(log(interval.getLength()+1) / log(sentenceSize+1), 1/k);
overlayScore += intervalOverlay * significanceFactor;
}
return overlayScore;
}

View File

@ -44,6 +44,10 @@ private:
bool _alreadyIntersects(boost::ptr_vector<Interval> intervalList,
int start, int end);
double _getLogarithmicOverlay(boost::ptr_vector<Interval> intervalList,
unsigned char sentenceSize,
double k);
SUFFIX_MARKER_TYPE _exampleId;
boost::ptr_vector<Interval> _exampleMatchedRegions;