anubis search stub

Former-commit-id: 41cf0c8811767219f6f58bc06d9729d724269e73
This commit is contained in:
rjawor 2014-03-11 14:32:10 +01:00
parent d5e692ebfd
commit 655087582e
8 changed files with 332 additions and 0 deletions

View File

@ -0,0 +1,12 @@
#include "concordia/anubis_search_result.hpp"
AnubisSearchResult::AnubisSearchResult(
const SUFFIX_MARKER_TYPE & exampleId, const double score):
_exampleId(exampleId),
_score(score) {
}
AnubisSearchResult::~AnubisSearchResult() {
}

View File

@ -0,0 +1,36 @@
#ifndef ANUBIS_SEARCH_RESULT_HDR
#define ANUBIS_SEARCH_RESULT_HDR
#include "concordia/common/config.hpp"
/*!
Class representing an example found by anubis search.
*/
using namespace std;
class AnubisSearchResult {
public:
explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId, const double score);
/*! Destructor.
*/
virtual ~AnubisSearchResult();
SUFFIX_MARKER_TYPE getExampleId() const {
return _exampleId;
}
double getScore() const {
return _score;
}
private:
SUFFIX_MARKER_TYPE _exampleId;
double _score;
};
#endif

20
concordia/interval.cpp Normal file
View File

@ -0,0 +1,20 @@
#include "concordia/interval.hpp"
Interval::Interval(const unsigned char start, const unsigned char end):
_start(start),
_end(end) {
}
Interval::~Interval() {
}
bool Interval::intersects(Interval & interval) {
return !(_end - 1 < interval.getStart() ||
interval.getEnd() - 1 < _start);
}
unsigned char Interval::getLength() {
return _end - _start;
}

37
concordia/interval.hpp Normal file
View File

@ -0,0 +1,37 @@
#ifndef INTERVAL_HDR
#define INTERVAL_HDR
/*!
Class representing word interval.
*/
using namespace std;
class Interval {
public:
explicit Interval(const unsigned char start, const unsigned char end);
/*! Destructor.
*/
virtual ~Interval();
bool intersects(Interval & interval);
unsigned char getLength();
unsigned char getStart() const {
return _start;
}
unsigned char getEnd() const {
return _end;
}
private:
unsigned char _start;
unsigned char _end;
};
#endif

View File

@ -0,0 +1,73 @@
#include "tests/unit-tests/unit_tests_globals.hpp"
#include "concordia/interval.hpp"
#include "concordia/common/config.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(interval)
BOOST_AUTO_TEST_CASE( IntervalIntersects1 )
{
Interval interval1(2,5);
Interval interval2(6,7);
BOOST_CHECK(!interval1.intersects(interval2));
BOOST_CHECK(!interval2.intersects(interval1));
}
BOOST_AUTO_TEST_CASE( IntervalIntersects2 )
{
Interval interval1(2,5);
Interval interval2(5,7);
BOOST_CHECK(!interval1.intersects(interval2));
BOOST_CHECK(!interval2.intersects(interval1));
}
BOOST_AUTO_TEST_CASE( IntervalIntersects3 )
{
Interval interval1(2,5);
Interval interval2(4,7);
BOOST_CHECK(interval1.intersects(interval2));
BOOST_CHECK(interval2.intersects(interval1));
}
BOOST_AUTO_TEST_CASE( IntervalIntersects4 )
{
Interval interval1(3,5);
Interval interval2(3,7);
BOOST_CHECK(interval1.intersects(interval2));
BOOST_CHECK(interval2.intersects(interval1));
}
BOOST_AUTO_TEST_CASE( IntervalIntersects5 )
{
Interval interval1(4,5);
Interval interval2(3,7);
BOOST_CHECK(interval1.intersects(interval2));
BOOST_CHECK(interval2.intersects(interval1));
}
BOOST_AUTO_TEST_CASE( IntervalIntersects6 )
{
Interval interval1(4,9);
Interval interval2(3,7);
BOOST_CHECK(interval1.intersects(interval2));
BOOST_CHECK(interval2.intersects(interval1));
}
BOOST_AUTO_TEST_CASE( IntervalIntersects7 )
{
Interval interval1(7,9);
Interval interval2(3,7);
BOOST_CHECK(!interval1.intersects(interval2));
BOOST_CHECK(!interval2.intersects(interval1));
}
BOOST_AUTO_TEST_CASE( IntervalIntersects8 )
{
Interval interval1(8,9);
Interval interval2(3,7);
BOOST_CHECK(!interval1.intersects(interval2));
BOOST_CHECK(!interval2.intersects(interval1));
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -0,0 +1,20 @@
#include "tests/unit-tests/unit_tests_globals.hpp"
#include "concordia/interval.hpp"
#include "concordia/tm_matches.hpp"
#include "concordia/common/config.hpp"
using namespace std;
BOOST_AUTO_TEST_SUITE(tm_matches)
BOOST_AUTO_TEST_CASE( TmMatchesSimpleScore1 )
{
TmMatches tmMatches(0,10,10);
tmMatches.addPatternInterval(2,5);
tmMatches.addExampleInterval(1,5);
tmMatches.calculateSimpleScore();
BOOST_CHECK_EQUAL(tmMatches.getScore(),0.35);
}
BOOST_AUTO_TEST_SUITE_END()

74
concordia/tm_matches.cpp Normal file
View File

@ -0,0 +1,74 @@
#include "concordia/tm_matches.hpp"
#include <boost/foreach.hpp>
TmMatches::TmMatches(const SUFFIX_MARKER_TYPE exampleId,
const unsigned char exampleSize,
const unsigned char patternSize):
_exampleId(exampleId),
_exampleSize(exampleSize),
_patternSize(patternSize) {
}
TmMatches::~TmMatches() {
}
void TmMatches::calculateScore() {
/* TODO logarithmic function
unsigned char exampleMatchedLength = 0;
BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) {
exampleMatchedLength += interval.getLength();
}
unsigned char patternMatchedLength = 0;
BOOST_FOREACH(Interval & interval, _patternMatchedRegions) {
patternMatchedLength += interval.getLength();
}
_score = (double) (exampleMatchedLength + patternMatchedLength)
/ (double) (_exampleSize + _patternSize);
*/
}
void TmMatches::calculateSimpleScore() {
unsigned char exampleMatchedLength = 0;
BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) {
exampleMatchedLength += interval.getLength();
}
unsigned char patternMatchedLength = 0;
BOOST_FOREACH(Interval & interval, _patternMatchedRegions) {
patternMatchedLength += interval.getLength();
}
_score = (double) (exampleMatchedLength + patternMatchedLength)
/ (double) (_exampleSize + _patternSize);
}
void TmMatches::addExampleInterval(int start, int end) {
if (!_alreadyIntersects(_exampleMatchedRegions, start, end)) {
_exampleMatchedRegions.push_back(new Interval(start, end));
}
}
void TmMatches::addPatternInterval(int start, int end) {
if (!_alreadyIntersects(_patternMatchedRegions, start, end)) {
_patternMatchedRegions.push_back(new Interval(start, end));
}
}
bool TmMatches::_alreadyIntersects(
boost::ptr_vector<Interval> intervalList,
int start, int end) {
Interval * tempInterval = new Interval(start, end);
BOOST_FOREACH(Interval & oldInterval, intervalList) {
if (oldInterval.intersects(*tempInterval)) {
delete tempInterval;
return true;
}
}
delete tempInterval;
return false;
}

60
concordia/tm_matches.hpp Normal file
View File

@ -0,0 +1,60 @@
#ifndef TM_MATCHES_HDR
#define TM_MATCHES_HDR
#include <string>
#include "concordia/common/config.hpp"
#include "concordia/interval.hpp"
#include <boost/ptr_container/ptr_vector.hpp>
/*!
Class used within Anubis search algorithm to store partial results.
*/
using namespace std;
class TmMatches {
public:
explicit TmMatches(const SUFFIX_MARKER_TYPE exampleId,
const unsigned char exampleSize,
const unsigned char patternSize);
/*! Destructor.
*/
virtual ~TmMatches();
double getScore() const {
return _score;
}
SUFFIX_MARKER_TYPE getExampleId() const {
return _exampleId;
}
void calculateSimpleScore();
void calculateScore();
void addExampleInterval(int start, int end);
void addPatternInterval(int start, int end);
private:
bool _alreadyIntersects(boost::ptr_vector<Interval> intervalList,
int start, int end);
SUFFIX_MARKER_TYPE _exampleId;
boost::ptr_vector<Interval> _exampleMatchedRegions;
boost::ptr_vector<Interval> _patternMatchedRegions;
unsigned char _patternSize;
unsigned char _exampleSize;
double _score;
};
#endif