concordia-library/concordia/tokenized_sentence.cpp
2015-06-25 10:12:51 +02:00

49 lines
1.9 KiB
C++

#include "concordia/tokenized_sentence.hpp"
#include "concordia/common/text_utils.hpp"
#include <iostream>
TokenizedSentence::TokenizedSentence(std::string sentence):
_sentence(sentence) {
}
TokenizedSentence::~TokenizedSentence() {
}
void TokenizedSentence::addAnnotations(std::vector<TokenAnnotation> annotations) {
std::vector<TokenAnnotation>::iterator newAnnotation = annotations.begin();
std::list<TokenAnnotation>::iterator existingAnnotation = _tokenAnnotations.begin();
while(newAnnotation != annotations.end()) {
if (existingAnnotation != _tokenAnnotations.end()) {
// there are still some existing annotations, so perform checks
if (newAnnotation->intersects(*existingAnnotation)) {
// The new annotation intersects with the existing.
// We can not add it, so let us just move on to the
// next new annoation.
newAnnotation++;
} else {
// it is now important whether the new interval is before
// or after existing
if (newAnnotation->getStart() < existingAnnotation->getStart()) {
// New interval does not intersect and is before existing. We add it.
_tokenAnnotations.insert(existingAnnotation, *newAnnotation);
newAnnotation++;
} else {
// If the new interval is after existing we move to the next existing annoation.
existingAnnotation++;
}
}
} else {
// no more existing annotations, so just add the new annotation
_tokenAnnotations.push_back(*newAnnotation);
newAnnotation++;
}
}
}
void TokenizedSentence::toLowerCase() {
_sentence = TextUtils::getInstance().toLowerCase(_sentence);
}