6ddba32f48
Former-commit-id: fa7407621e839f87613476596c6589aeceb9d796
139 lines
4.4 KiB
C++
139 lines
4.4 KiB
C++
#ifndef GENERAL_CASE_CONVERTER_HDR
|
|
#define GENERAL_CASE_CONVERTER_HDR
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
#include "range_based_case_converter.hpp"
|
|
#include "special_casing_converter.hpp"
|
|
#include "contextual_case_converter.hpp"
|
|
|
|
#include "utf8/utf8.h"
|
|
|
|
template<typename octet_iterator, typename output_iterator>
|
|
class GeneralCaseConverter {
|
|
|
|
public:
|
|
GeneralCaseConverter(
|
|
boost::shared_ptr<RangeBasedCaseConverter> rangeBasedCaseConverter,
|
|
boost::shared_ptr<SpecialCasingConverter> specialCasingConverter,
|
|
boost::shared_ptr<ContextualCaseConverter> contextualCaseConverter)
|
|
:rangeBasedCaseConverter_(rangeBasedCaseConverter),
|
|
specialCasingConverter_(specialCasingConverter),
|
|
contextualCaseConverter_(contextualCaseConverter) {
|
|
}
|
|
|
|
bool willBeTouchedWhenConverted(octet_iterator start, octet_iterator end) const {
|
|
while (start != end) {
|
|
uint32_t code_point = utf8::unchecked::next(start);
|
|
|
|
if (specialCasingConverter_->convert(code_point)
|
|
|| rangeBasedCaseConverter_->convert(code_point) != code_point)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool willBeTouchedWhenHeadConverted(octet_iterator start, octet_iterator end) const {
|
|
if (start == end)
|
|
return false;
|
|
|
|
octet_iterator prev_start = start;
|
|
utf8::unchecked::next(start);
|
|
return willBeTouchedWhenConverted(prev_start, start);
|
|
}
|
|
|
|
bool willBeTouchedWhenTailConverted(octet_iterator start, octet_iterator end) const {
|
|
if (start == end)
|
|
return false;
|
|
|
|
utf8::unchecked::next(start);
|
|
return willBeTouchedWhenConverted(start, end);
|
|
}
|
|
|
|
void convert(octet_iterator start, octet_iterator end, output_iterator out) const {
|
|
uint32_t prev_prev_code_point = SPECIAL_CODE_POINT;
|
|
uint32_t prev_code_point = SPECIAL_CODE_POINT;
|
|
|
|
while (start != end) {
|
|
uint32_t code_point = utf8::unchecked::next(start);
|
|
|
|
if (prev_code_point != SPECIAL_CODE_POINT)
|
|
convertSingleCodePoint(
|
|
prev_prev_code_point,
|
|
prev_code_point,
|
|
code_point,
|
|
out);
|
|
|
|
prev_prev_code_point = prev_code_point;
|
|
prev_code_point = code_point;
|
|
}
|
|
|
|
if (prev_code_point != SPECIAL_CODE_POINT)
|
|
convertSingleCodePoint(
|
|
prev_prev_code_point,
|
|
prev_code_point,
|
|
SPECIAL_CODE_POINT,
|
|
out);
|
|
}
|
|
|
|
void convertSingleCodePoint(
|
|
uint32_t prev_code_point,
|
|
uint32_t current_code_point,
|
|
uint32_t next_code_point,
|
|
output_iterator out) const {
|
|
|
|
if (const char* contextual = contextualCaseConverter_->convert(
|
|
prev_code_point,
|
|
current_code_point,
|
|
next_code_point)) {
|
|
copyCharArrayToOutputIterator_(contextual, out);
|
|
} else if (const char* special = specialCasingConverter_->convert(current_code_point)) {
|
|
copyCharArrayToOutputIterator_(special, out);
|
|
} else {
|
|
uint32_t converted_code_point = rangeBasedCaseConverter_->convert(current_code_point);
|
|
utf8::unchecked::append(converted_code_point, out);
|
|
}
|
|
}
|
|
|
|
void headConvert(octet_iterator start, octet_iterator end, output_iterator out) const {
|
|
bool first = true;
|
|
|
|
while (start != end) {
|
|
if (first) {
|
|
octet_iterator prev_start = start;
|
|
utf8::unchecked::next(start);
|
|
convert(prev_start, start, out);
|
|
first = false;
|
|
} else {
|
|
*out++ = *start++;
|
|
}
|
|
}
|
|
}
|
|
|
|
void tailConvert(octet_iterator start, octet_iterator end, output_iterator out) const {
|
|
if (start != end) {
|
|
uint32_t code_point = utf8::unchecked::next(start);
|
|
|
|
utf8::unchecked::append(code_point, out);
|
|
|
|
convert(start, end, out);
|
|
}
|
|
}
|
|
|
|
|
|
private:
|
|
void copyCharArrayToOutputIterator_(const char* charVector, output_iterator out) const {
|
|
while (*charVector)
|
|
*out++ = *charVector++;
|
|
}
|
|
|
|
boost::shared_ptr<RangeBasedCaseConverter> rangeBasedCaseConverter_;
|
|
boost::shared_ptr<SpecialCasingConverter> specialCasingConverter_;
|
|
boost::shared_ptr<ContextualCaseConverter> contextualCaseConverter_;
|
|
|
|
const static uint32_t SPECIAL_CODE_POINT = 0xFFFFFFFF;
|
|
};
|
|
|
|
#endif
|