From 655087582e58adbdbf017155bec8fb2014a017ac Mon Sep 17 00:00:00 2001 From: rjawor Date: Tue, 11 Mar 2014 14:32:10 +0100 Subject: [PATCH] anubis search stub Former-commit-id: 41cf0c8811767219f6f58bc06d9729d724269e73 --- concordia/anubis_search_result.cpp | 12 +++++ concordia/anubis_search_result.hpp | 36 +++++++++++++++ concordia/interval.cpp | 20 ++++++++ concordia/interval.hpp | 37 +++++++++++++++ concordia/t/test_interval.cpp | 73 +++++++++++++++++++++++++++++ concordia/t/test_tm_matches.cpp | 20 ++++++++ concordia/tm_matches.cpp | 74 ++++++++++++++++++++++++++++++ concordia/tm_matches.hpp | 60 ++++++++++++++++++++++++ 8 files changed, 332 insertions(+) create mode 100644 concordia/anubis_search_result.cpp create mode 100644 concordia/anubis_search_result.hpp create mode 100644 concordia/interval.cpp create mode 100644 concordia/interval.hpp create mode 100644 concordia/t/test_interval.cpp create mode 100644 concordia/t/test_tm_matches.cpp create mode 100644 concordia/tm_matches.cpp create mode 100644 concordia/tm_matches.hpp diff --git a/concordia/anubis_search_result.cpp b/concordia/anubis_search_result.cpp new file mode 100644 index 0000000..d31c9dc --- /dev/null +++ b/concordia/anubis_search_result.cpp @@ -0,0 +1,12 @@ +#include "concordia/anubis_search_result.hpp" + + +AnubisSearchResult::AnubisSearchResult( + const SUFFIX_MARKER_TYPE & exampleId, const double score): + _exampleId(exampleId), + _score(score) { +} + +AnubisSearchResult::~AnubisSearchResult() { +} + diff --git a/concordia/anubis_search_result.hpp b/concordia/anubis_search_result.hpp new file mode 100644 index 0000000..b6f2037 --- /dev/null +++ b/concordia/anubis_search_result.hpp @@ -0,0 +1,36 @@ +#ifndef ANUBIS_SEARCH_RESULT_HDR +#define ANUBIS_SEARCH_RESULT_HDR + +#include "concordia/common/config.hpp" + +/*! + Class representing an example found by anubis search. + +*/ + +using namespace std; + +class AnubisSearchResult { +public: + explicit AnubisSearchResult(const SUFFIX_MARKER_TYPE & exampleId, const double score); + + /*! Destructor. + */ + virtual ~AnubisSearchResult(); + + SUFFIX_MARKER_TYPE getExampleId() const { + return _exampleId; + } + + double getScore() const { + return _score; + } + + +private: + SUFFIX_MARKER_TYPE _exampleId; + + double _score; +}; + +#endif diff --git a/concordia/interval.cpp b/concordia/interval.cpp new file mode 100644 index 0000000..c2eb5cd --- /dev/null +++ b/concordia/interval.cpp @@ -0,0 +1,20 @@ +#include "concordia/interval.hpp" + + +Interval::Interval(const unsigned char start, const unsigned char end): + _start(start), + _end(end) { +} + +Interval::~Interval() { +} + +bool Interval::intersects(Interval & interval) { + return !(_end - 1 < interval.getStart() || + interval.getEnd() - 1 < _start); +} + +unsigned char Interval::getLength() { + return _end - _start; +} + diff --git a/concordia/interval.hpp b/concordia/interval.hpp new file mode 100644 index 0000000..38c45be --- /dev/null +++ b/concordia/interval.hpp @@ -0,0 +1,37 @@ +#ifndef INTERVAL_HDR +#define INTERVAL_HDR + +/*! + Class representing word interval. + +*/ + +using namespace std; + +class Interval { +public: + explicit Interval(const unsigned char start, const unsigned char end); + + /*! Destructor. + */ + virtual ~Interval(); + + bool intersects(Interval & interval); + + unsigned char getLength(); + + unsigned char getStart() const { + return _start; + } + + unsigned char getEnd() const { + return _end; + } + +private: + unsigned char _start; + + unsigned char _end; +}; + +#endif diff --git a/concordia/t/test_interval.cpp b/concordia/t/test_interval.cpp new file mode 100644 index 0000000..3891882 --- /dev/null +++ b/concordia/t/test_interval.cpp @@ -0,0 +1,73 @@ +#include "tests/unit-tests/unit_tests_globals.hpp" +#include "concordia/interval.hpp" +#include "concordia/common/config.hpp" + +using namespace std; + +BOOST_AUTO_TEST_SUITE(interval) + +BOOST_AUTO_TEST_CASE( IntervalIntersects1 ) +{ + Interval interval1(2,5); + Interval interval2(6,7); + BOOST_CHECK(!interval1.intersects(interval2)); + BOOST_CHECK(!interval2.intersects(interval1)); +} + +BOOST_AUTO_TEST_CASE( IntervalIntersects2 ) +{ + Interval interval1(2,5); + Interval interval2(5,7); + BOOST_CHECK(!interval1.intersects(interval2)); + BOOST_CHECK(!interval2.intersects(interval1)); +} + +BOOST_AUTO_TEST_CASE( IntervalIntersects3 ) +{ + Interval interval1(2,5); + Interval interval2(4,7); + BOOST_CHECK(interval1.intersects(interval2)); + BOOST_CHECK(interval2.intersects(interval1)); +} + +BOOST_AUTO_TEST_CASE( IntervalIntersects4 ) +{ + Interval interval1(3,5); + Interval interval2(3,7); + BOOST_CHECK(interval1.intersects(interval2)); + BOOST_CHECK(interval2.intersects(interval1)); +} + +BOOST_AUTO_TEST_CASE( IntervalIntersects5 ) +{ + Interval interval1(4,5); + Interval interval2(3,7); + BOOST_CHECK(interval1.intersects(interval2)); + BOOST_CHECK(interval2.intersects(interval1)); +} + +BOOST_AUTO_TEST_CASE( IntervalIntersects6 ) +{ + Interval interval1(4,9); + Interval interval2(3,7); + BOOST_CHECK(interval1.intersects(interval2)); + BOOST_CHECK(interval2.intersects(interval1)); +} + +BOOST_AUTO_TEST_CASE( IntervalIntersects7 ) +{ + Interval interval1(7,9); + Interval interval2(3,7); + BOOST_CHECK(!interval1.intersects(interval2)); + BOOST_CHECK(!interval2.intersects(interval1)); +} + +BOOST_AUTO_TEST_CASE( IntervalIntersects8 ) +{ + Interval interval1(8,9); + Interval interval2(3,7); + BOOST_CHECK(!interval1.intersects(interval2)); + BOOST_CHECK(!interval2.intersects(interval1)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/concordia/t/test_tm_matches.cpp b/concordia/t/test_tm_matches.cpp new file mode 100644 index 0000000..5cc3108 --- /dev/null +++ b/concordia/t/test_tm_matches.cpp @@ -0,0 +1,20 @@ +#include "tests/unit-tests/unit_tests_globals.hpp" +#include "concordia/interval.hpp" +#include "concordia/tm_matches.hpp" +#include "concordia/common/config.hpp" + +using namespace std; + +BOOST_AUTO_TEST_SUITE(tm_matches) + +BOOST_AUTO_TEST_CASE( TmMatchesSimpleScore1 ) +{ + TmMatches tmMatches(0,10,10); + tmMatches.addPatternInterval(2,5); + tmMatches.addExampleInterval(1,5); + tmMatches.calculateSimpleScore(); + + BOOST_CHECK_EQUAL(tmMatches.getScore(),0.35); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/concordia/tm_matches.cpp b/concordia/tm_matches.cpp new file mode 100644 index 0000000..4b7c8ca --- /dev/null +++ b/concordia/tm_matches.cpp @@ -0,0 +1,74 @@ +#include "concordia/tm_matches.hpp" +#include + + +TmMatches::TmMatches(const SUFFIX_MARKER_TYPE exampleId, + const unsigned char exampleSize, + const unsigned char patternSize): + _exampleId(exampleId), + _exampleSize(exampleSize), + _patternSize(patternSize) { +} + +TmMatches::~TmMatches() { +} + +void TmMatches::calculateScore() { + /* TODO logarithmic function + unsigned char exampleMatchedLength = 0; + BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) { + exampleMatchedLength += interval.getLength(); + } + + unsigned char patternMatchedLength = 0; + BOOST_FOREACH(Interval & interval, _patternMatchedRegions) { + patternMatchedLength += interval.getLength(); + } + + _score = (double) (exampleMatchedLength + patternMatchedLength) + / (double) (_exampleSize + _patternSize); + */ +} + +void TmMatches::calculateSimpleScore() { + unsigned char exampleMatchedLength = 0; + BOOST_FOREACH(Interval & interval, _exampleMatchedRegions) { + exampleMatchedLength += interval.getLength(); + } + + unsigned char patternMatchedLength = 0; + BOOST_FOREACH(Interval & interval, _patternMatchedRegions) { + patternMatchedLength += interval.getLength(); + } + + _score = (double) (exampleMatchedLength + patternMatchedLength) + / (double) (_exampleSize + _patternSize); +} + +void TmMatches::addExampleInterval(int start, int end) { + if (!_alreadyIntersects(_exampleMatchedRegions, start, end)) { + _exampleMatchedRegions.push_back(new Interval(start, end)); + } +} + +void TmMatches::addPatternInterval(int start, int end) { + if (!_alreadyIntersects(_patternMatchedRegions, start, end)) { + _patternMatchedRegions.push_back(new Interval(start, end)); + } +} + +bool TmMatches::_alreadyIntersects( + boost::ptr_vector intervalList, + int start, int end) { + Interval * tempInterval = new Interval(start, end); + BOOST_FOREACH(Interval & oldInterval, intervalList) { + if (oldInterval.intersects(*tempInterval)) { + delete tempInterval; + return true; + } + } + delete tempInterval; + + return false; +} + diff --git a/concordia/tm_matches.hpp b/concordia/tm_matches.hpp new file mode 100644 index 0000000..fee8f6f --- /dev/null +++ b/concordia/tm_matches.hpp @@ -0,0 +1,60 @@ +#ifndef TM_MATCHES_HDR +#define TM_MATCHES_HDR + +#include +#include "concordia/common/config.hpp" +#include "concordia/interval.hpp" +#include + + +/*! + Class used within Anubis search algorithm to store partial results. + +*/ + +using namespace std; + +class TmMatches { +public: + explicit TmMatches(const SUFFIX_MARKER_TYPE exampleId, + const unsigned char exampleSize, + const unsigned char patternSize); + + /*! Destructor. + */ + virtual ~TmMatches(); + + double getScore() const { + return _score; + } + + SUFFIX_MARKER_TYPE getExampleId() const { + return _exampleId; + } + + void calculateSimpleScore(); + + void calculateScore(); + + void addExampleInterval(int start, int end); + + void addPatternInterval(int start, int end); + +private: + bool _alreadyIntersects(boost::ptr_vector intervalList, + int start, int end); + + SUFFIX_MARKER_TYPE _exampleId; + + boost::ptr_vector _exampleMatchedRegions; + + boost::ptr_vector _patternMatchedRegions; + + unsigned char _patternSize; + + unsigned char _exampleSize; + + double _score; +}; + +#endif