anubis search stub
Former-commit-id: 41cf0c8811767219f6f58bc06d9729d724269e73
This commit is contained in:
parent
d5e692ebfd
commit
655087582e
12
concordia/anubis_search_result.cpp
Normal file
12
concordia/anubis_search_result.cpp
Normal file
@ -0,0 +1,12 @@
|
||||
#include "concordia/anubis_search_result.hpp"
|
||||
|
||||
|
||||
AnubisSearchResult::AnubisSearchResult(
|
||||
const SUFFIX_MARKER_TYPE & exampleId, const double score):
|
||||
_exampleId(exampleId),
|
||||
_score(score) {
|
||||
}
|
||||
|
||||
AnubisSearchResult::~AnubisSearchResult() {
|
||||
}
|
||||
|
36
concordia/anubis_search_result.hpp
Normal file
36
concordia/anubis_search_result.hpp
Normal file
@ -0,0 +1,36 @@
|
||||
#ifndef ANUBIS_SEARCH_RESULT_HDR
|
||||
#define ANUBIS_SEARCH_RESULT_HDR
|
||||
|
||||
#include "concordia/common/config.hpp"
|
||||
|
||||
/*!
|
||||
Class representing an example found by anubis search.
|
||||
|
||||
*/
|
||||
|
||||
using namespace std;
|
||||
|
||||
class AnubisSearchResult {
|
||||
public:
|
||||
explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId, const double score);
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~AnubisSearchResult();
|
||||
|
||||
SUFFIX_MARKER_TYPE getExampleId() const {
|
||||
return _exampleId;
|
||||
}
|
||||
|
||||
double getScore() const {
|
||||
return _score;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
SUFFIX_MARKER_TYPE _exampleId;
|
||||
|
||||
double _score;
|
||||
};
|
||||
|
||||
#endif
|
20
concordia/interval.cpp
Normal file
20
concordia/interval.cpp
Normal file
@ -0,0 +1,20 @@
|
||||
#include "concordia/interval.hpp"
|
||||
|
||||
|
||||
Interval::Interval(const unsigned char start, const unsigned char end):
|
||||
_start(start),
|
||||
_end(end) {
|
||||
}
|
||||
|
||||
Interval::~Interval() {
|
||||
}
|
||||
|
||||
bool Interval::intersects(Interval & interval) {
|
||||
return !(_end - 1 < interval.getStart() ||
|
||||
interval.getEnd() - 1 < _start);
|
||||
}
|
||||
|
||||
unsigned char Interval::getLength() {
|
||||
return _end - _start;
|
||||
}
|
||||
|
37
concordia/interval.hpp
Normal file
37
concordia/interval.hpp
Normal file
@ -0,0 +1,37 @@
|
||||
#ifndef INTERVAL_HDR
|
||||
#define INTERVAL_HDR
|
||||
|
||||
/*!
|
||||
Class representing word interval.
|
||||
|
||||
*/
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Interval {
|
||||
public:
|
||||
explicit Interval(const unsigned char start, const unsigned char end);
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~Interval();
|
||||
|
||||
bool intersects(Interval & interval);
|
||||
|
||||
unsigned char getLength();
|
||||
|
||||
unsigned char getStart() const {
|
||||
return _start;
|
||||
}
|
||||
|
||||
unsigned char getEnd() const {
|
||||
return _end;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned char _start;
|
||||
|
||||
unsigned char _end;
|
||||
};
|
||||
|
||||
#endif
|
73
concordia/t/test_interval.cpp
Normal file
73
concordia/t/test_interval.cpp
Normal file
@ -0,0 +1,73 @@
|
||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||
#include "concordia/interval.hpp"
|
||||
#include "concordia/common/config.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(interval)
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IntervalIntersects1 )
|
||||
{
|
||||
Interval interval1(2,5);
|
||||
Interval interval2(6,7);
|
||||
BOOST_CHECK(!interval1.intersects(interval2));
|
||||
BOOST_CHECK(!interval2.intersects(interval1));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IntervalIntersects2 )
|
||||
{
|
||||
Interval interval1(2,5);
|
||||
Interval interval2(5,7);
|
||||
BOOST_CHECK(!interval1.intersects(interval2));
|
||||
BOOST_CHECK(!interval2.intersects(interval1));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IntervalIntersects3 )
|
||||
{
|
||||
Interval interval1(2,5);
|
||||
Interval interval2(4,7);
|
||||
BOOST_CHECK(interval1.intersects(interval2));
|
||||
BOOST_CHECK(interval2.intersects(interval1));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IntervalIntersects4 )
|
||||
{
|
||||
Interval interval1(3,5);
|
||||
Interval interval2(3,7);
|
||||
BOOST_CHECK(interval1.intersects(interval2));
|
||||
BOOST_CHECK(interval2.intersects(interval1));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IntervalIntersects5 )
|
||||
{
|
||||
Interval interval1(4,5);
|
||||
Interval interval2(3,7);
|
||||
BOOST_CHECK(interval1.intersects(interval2));
|
||||
BOOST_CHECK(interval2.intersects(interval1));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IntervalIntersects6 )
|
||||
{
|
||||
Interval interval1(4,9);
|
||||
Interval interval2(3,7);
|
||||
BOOST_CHECK(interval1.intersects(interval2));
|
||||
BOOST_CHECK(interval2.intersects(interval1));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IntervalIntersects7 )
|
||||
{
|
||||
Interval interval1(7,9);
|
||||
Interval interval2(3,7);
|
||||
BOOST_CHECK(!interval1.intersects(interval2));
|
||||
BOOST_CHECK(!interval2.intersects(interval1));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE( IntervalIntersects8 )
|
||||
{
|
||||
Interval interval1(8,9);
|
||||
Interval interval2(3,7);
|
||||
BOOST_CHECK(!interval1.intersects(interval2));
|
||||
BOOST_CHECK(!interval2.intersects(interval1));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
20
concordia/t/test_tm_matches.cpp
Normal file
20
concordia/t/test_tm_matches.cpp
Normal file
@ -0,0 +1,20 @@
|
||||
#include "tests/unit-tests/unit_tests_globals.hpp"
|
||||
#include "concordia/interval.hpp"
|
||||
#include "concordia/tm_matches.hpp"
|
||||
#include "concordia/common/config.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
BOOST_AUTO_TEST_SUITE(tm_matches)
|
||||
|
||||
BOOST_AUTO_TEST_CASE( TmMatchesSimpleScore1 )
|
||||
{
|
||||
TmMatches tmMatches(0,10,10);
|
||||
tmMatches.addPatternInterval(2,5);
|
||||
tmMatches.addExampleInterval(1,5);
|
||||
tmMatches.calculateSimpleScore();
|
||||
|
||||
BOOST_CHECK_EQUAL(tmMatches.getScore(),0.35);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
74
concordia/tm_matches.cpp
Normal file
74
concordia/tm_matches.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
#include "concordia/tm_matches.hpp"
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
|
||||
TmMatches::TmMatches(const SUFFIX_MARKER_TYPE exampleId,
|
||||
const unsigned char exampleSize,
|
||||
const unsigned char patternSize):
|
||||
_exampleId(exampleId),
|
||||
_exampleSize(exampleSize),
|
||||
_patternSize(patternSize) {
|
||||
}
|
||||
|
||||
TmMatches::~TmMatches() {
|
||||
}
|
||||
|
||||
void TmMatches::calculateScore() {
|
||||
/* TODO logarithmic function
|
||||
unsigned char exampleMatchedLength = 0;
|
||||
BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) {
|
||||
exampleMatchedLength += interval.getLength();
|
||||
}
|
||||
|
||||
unsigned char patternMatchedLength = 0;
|
||||
BOOST_FOREACH(Interval & interval, _patternMatchedRegions) {
|
||||
patternMatchedLength += interval.getLength();
|
||||
}
|
||||
|
||||
_score = (double) (exampleMatchedLength + patternMatchedLength)
|
||||
/ (double) (_exampleSize + _patternSize);
|
||||
*/
|
||||
}
|
||||
|
||||
void TmMatches::calculateSimpleScore() {
|
||||
unsigned char exampleMatchedLength = 0;
|
||||
BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) {
|
||||
exampleMatchedLength += interval.getLength();
|
||||
}
|
||||
|
||||
unsigned char patternMatchedLength = 0;
|
||||
BOOST_FOREACH(Interval & interval, _patternMatchedRegions) {
|
||||
patternMatchedLength += interval.getLength();
|
||||
}
|
||||
|
||||
_score = (double) (exampleMatchedLength + patternMatchedLength)
|
||||
/ (double) (_exampleSize + _patternSize);
|
||||
}
|
||||
|
||||
void TmMatches::addExampleInterval(int start, int end) {
|
||||
if (!_alreadyIntersects(_exampleMatchedRegions, start, end)) {
|
||||
_exampleMatchedRegions.push_back(new Interval(start, end));
|
||||
}
|
||||
}
|
||||
|
||||
void TmMatches::addPatternInterval(int start, int end) {
|
||||
if (!_alreadyIntersects(_patternMatchedRegions, start, end)) {
|
||||
_patternMatchedRegions.push_back(new Interval(start, end));
|
||||
}
|
||||
}
|
||||
|
||||
bool TmMatches::_alreadyIntersects(
|
||||
boost::ptr_vector<Interval> intervalList,
|
||||
int start, int end) {
|
||||
Interval * tempInterval = new Interval(start, end);
|
||||
BOOST_FOREACH(Interval & oldInterval, intervalList) {
|
||||
if (oldInterval.intersects(*tempInterval)) {
|
||||
delete tempInterval;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
delete tempInterval;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
60
concordia/tm_matches.hpp
Normal file
60
concordia/tm_matches.hpp
Normal file
@ -0,0 +1,60 @@
|
||||
#ifndef TM_MATCHES_HDR
|
||||
#define TM_MATCHES_HDR
|
||||
|
||||
#include <string>
|
||||
#include "concordia/common/config.hpp"
|
||||
#include "concordia/interval.hpp"
|
||||
#include <boost/ptr_container/ptr_vector.hpp>
|
||||
|
||||
|
||||
/*!
|
||||
Class used within Anubis search algorithm to store partial results.
|
||||
|
||||
*/
|
||||
|
||||
using namespace std;
|
||||
|
||||
class TmMatches {
|
||||
public:
|
||||
explicit TmMatches(const SUFFIX_MARKER_TYPE exampleId,
|
||||
const unsigned char exampleSize,
|
||||
const unsigned char patternSize);
|
||||
|
||||
/*! Destructor.
|
||||
*/
|
||||
virtual ~TmMatches();
|
||||
|
||||
double getScore() const {
|
||||
return _score;
|
||||
}
|
||||
|
||||
SUFFIX_MARKER_TYPE getExampleId() const {
|
||||
return _exampleId;
|
||||
}
|
||||
|
||||
void calculateSimpleScore();
|
||||
|
||||
void calculateScore();
|
||||
|
||||
void addExampleInterval(int start, int end);
|
||||
|
||||
void addPatternInterval(int start, int end);
|
||||
|
||||
private:
|
||||
bool _alreadyIntersects(boost::ptr_vector<Interval> intervalList,
|
||||
int start, int end);
|
||||
|
||||
SUFFIX_MARKER_TYPE _exampleId;
|
||||
|
||||
boost::ptr_vector<Interval> _exampleMatchedRegions;
|
||||
|
||||
boost::ptr_vector<Interval> _patternMatchedRegions;
|
||||
|
||||
unsigned char _patternSize;
|
||||
|
||||
unsigned char _exampleSize;
|
||||
|
||||
double _score;
|
||||
};
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user