From 6ddba32f486066ed2b6afaf751a8777e02ab3575 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Jaworski?= <rafalj.jaworski@poleng.pl>
Date: Thu, 24 Apr 2014 11:51:04 +0200
Subject: [PATCH] utf8

Former-commit-id: fa7407621e839f87613476596c6589aeceb9d796
---
 CMakeLists.txt                                |   6 +-
 TODO.txt                                      |   1 +
 tests/tests.hpp                               |   7 +
 utf8/CMakeLists.txt                           |   0
 utf8/utf8.h                                   |  45 +
 utf8/utf8/checked.h                           | 327 +++++++
 utf8/utf8/core.h                              | 358 ++++++++
 utf8/utf8/unchecked.h                         | 234 +++++
 utf8case/CMakeLists.txt                       |  28 +
 utf8case/case_converter_factory.hpp           | 149 ++++
 utf8case/case_tables.cpp                      | 804 ++++++++++++++++++
 utf8case/case_tables.hpp                      |  42 +
 utf8case/contextual_case_converter.hpp        |  17 +
 utf8case/general_case_converter.hpp           | 138 +++
 utf8case/generate_case_tables.pl              | 251 ++++++
 utf8case/range_based_case_converter.cpp       |  63 ++
 utf8case/range_based_case_converter.hpp       |  27 +
 .../regular_contextual_case_converter.cpp     |  11 +
 .../regular_contextual_case_converter.hpp     |  16 +
 utf8case/simple_convert.cpp                   |  56 ++
 utf8case/simple_convert.hpp                   |  34 +
 utf8case/special_casing_converter.cpp         |  16 +
 utf8case/special_casing_converter.hpp         |  21 +
 utf8case/string_case_converter_manager.cpp    |  11 +
 utf8case/string_case_converter_manager.hpp    |  16 +
 utf8case/t/CMakeLists.txt                     |   7 +
 .../t/range_based_case_converter_tests.cpp    |  43 +
 utf8case/t/simple_convert_tests.cpp           | 176 ++++
 utf8case/t/special_casing_converter_tests.cpp |  32 +
 ..._azeri_lower_contextual_case_converter.cpp |  29 +
 ..._azeri_lower_contextual_case_converter.hpp |  21 +
 ..._azeri_upper_contextual_case_converter.cpp |  17 +
 ..._azeri_upper_contextual_case_converter.hpp |  20 +
 33 files changed, 3020 insertions(+), 3 deletions(-)
 create mode 100644 tests/tests.hpp
 create mode 100644 utf8/CMakeLists.txt
 create mode 100644 utf8/utf8.h
 create mode 100644 utf8/utf8/checked.h
 create mode 100644 utf8/utf8/core.h
 create mode 100644 utf8/utf8/unchecked.h
 create mode 100644 utf8case/CMakeLists.txt
 create mode 100644 utf8case/case_converter_factory.hpp
 create mode 100644 utf8case/case_tables.cpp
 create mode 100644 utf8case/case_tables.hpp
 create mode 100644 utf8case/contextual_case_converter.hpp
 create mode 100644 utf8case/general_case_converter.hpp
 create mode 100755 utf8case/generate_case_tables.pl
 create mode 100644 utf8case/range_based_case_converter.cpp
 create mode 100644 utf8case/range_based_case_converter.hpp
 create mode 100644 utf8case/regular_contextual_case_converter.cpp
 create mode 100644 utf8case/regular_contextual_case_converter.hpp
 create mode 100644 utf8case/simple_convert.cpp
 create mode 100644 utf8case/simple_convert.hpp
 create mode 100644 utf8case/special_casing_converter.cpp
 create mode 100644 utf8case/special_casing_converter.hpp
 create mode 100644 utf8case/string_case_converter_manager.cpp
 create mode 100644 utf8case/string_case_converter_manager.hpp
 create mode 100644 utf8case/t/CMakeLists.txt
 create mode 100644 utf8case/t/range_based_case_converter_tests.cpp
 create mode 100644 utf8case/t/simple_convert_tests.cpp
 create mode 100644 utf8case/t/special_casing_converter_tests.cpp
 create mode 100644 utf8case/turkish_and_azeri_lower_contextual_case_converter.cpp
 create mode 100644 utf8case/turkish_and_azeri_lower_contextual_case_converter.hpp
 create mode 100644 utf8case/turkish_and_azeri_upper_contextual_case_converter.cpp
 create mode 100644 utf8case/turkish_and_azeri_upper_contextual_case_converter.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8b46060..d23e74a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -43,7 +43,7 @@ file(MAKE_DIRECTORY ${PROD_RESOURCES_DIRECTORY}/temp)
 
 SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
 
-set(BASE_TARGETS concordia)
+set(BASE_TARGETS concordia utf8case)
 
 
                 
@@ -135,7 +135,7 @@ configure_file (
 # Concordia: sub-projects
 # ================================================
 
-set(ALL_DIRECTORIES concordia concordia-console libdivsufsort)
+set(ALL_DIRECTORIES concordia concordia-console libdivsufsort utf8 utf8case)
 
 include_directories("${concordia_SOURCE_DIR}")
 
@@ -150,7 +150,7 @@ endforeach(dir)
 # Tests
 # ================================================
 
-set(TESTS_TARGETS concordia-tests)
+set(TESTS_TARGETS concordia-tests utf8case-tests)
 
 add_subdirectory(tests)
 
diff --git a/TODO.txt b/TODO.txt
index 2f60d3f..c366fad 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,3 +1,4 @@
 1. lokalizowane to_lower
 2. anonimizacja zdań
 3. Dzielenie zdań (max 255 tokenów)
+4. concordia-server
diff --git a/tests/tests.hpp b/tests/tests.hpp
new file mode 100644
index 0000000..ecbddc8
--- /dev/null
+++ b/tests/tests.hpp
@@ -0,0 +1,7 @@
+#ifndef TESTS_HDR
+#define TESTS_HDR
+
+#define BOOST_TEST_NO_MAIN
+#include <boost/test/unit_test.hpp>
+
+#endif
diff --git a/utf8/CMakeLists.txt b/utf8/CMakeLists.txt
new file mode 100644
index 0000000..e69de29
diff --git a/utf8/utf8.h b/utf8/utf8.h
new file mode 100644
index 0000000..836bf69
--- /dev/null
+++ b/utf8/utf8.h
@@ -0,0 +1,45 @@
+/*!
+  ## Character encoding
+
+  In PSI toolkit UTF8 is uniformly used. All the textual data is
+  assumed to be encoded in UTF8.
+
+  Technically, std::string is simply used to store UTF8 strings. To handle
+  UTF8, a small external library was incorporated into the project,
+  see: http://utfcpp.sourceforge.net/
+*/
+
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "utf8/checked.h"
+#include "utf8/unchecked.h"
+
+#endif // header guard
diff --git a/utf8/utf8/checked.h b/utf8/utf8/checked.h
new file mode 100644
index 0000000..383c1e5
--- /dev/null
+++ b/utf8/utf8/checked.h
@@ -0,0 +1,327 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include <stdexcept>
+
+namespace utf8
+{
+    // Base for the exceptions that may be thrown from the library
+    class exception : public std::exception {
+    };
+
+    // Exceptions that may be thrown from the library functions.
+    class invalid_code_point : public exception {
+        uint32_t cp;
+    public:
+        invalid_code_point(uint32_t cp) : cp(cp) {}
+        virtual const char* what() const throw() { return "Invalid code point"; }
+        uint32_t code_point() const {return cp;}
+    };
+
+    class invalid_utf8 : public exception {
+        uint8_t u8;
+    public:
+        invalid_utf8 (uint8_t u) : u8(u) {}
+        virtual const char* what() const throw() { return "invalid UTF-8, convert file to UTF-8 encoding and run again"; }
+        uint8_t utf8_octet() const {return u8;}
+    };
+
+    class invalid_utf16 : public exception {
+        uint16_t u16;
+    public:
+        invalid_utf16 (uint16_t u) : u16(u) {}
+        virtual const char* what() const throw() { return "invalid UTF-16, convert file to UTF-16 encoding and run again"; }
+        uint16_t utf16_word() const {return u16;}
+    };
+
+    class not_enough_room : public exception {
+    public:
+        virtual const char* what() const throw() { return "Not enough space"; }
+    };
+
+    /// The library API - functions intended to be called by the users
+
+    template <typename octet_iterator, typename output_iterator>
+    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+    {
+        while (start != end) {
+            octet_iterator sequence_start = start;
+            internal::utf_error err_code = internal::validate_next(start, end);
+            switch (err_code) {
+                case internal::UTF8_OK :
+                    for (octet_iterator it = sequence_start; it != start; ++it)
+                        *out++ = *it;
+                    break;
+                case internal::NOT_ENOUGH_ROOM:
+                    throw not_enough_room();
+                case internal::INVALID_LEAD:
+                    append (replacement, out);
+                    ++start;
+                    break;
+                case internal::INCOMPLETE_SEQUENCE:
+                case internal::OVERLONG_SEQUENCE:
+                case internal::INVALID_CODE_POINT:
+                    append (replacement, out);
+                    ++start;
+                    // just one replacement mark for the sequence
+                    while (internal::is_trail(*start) && start != end)
+                        ++start;
+                    break;
+            }
+        }
+        return out;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+    {
+        static const uint32_t replacement_marker = internal::mask16(0xfffd);
+        return replace_invalid(start, end, out, replacement_marker);
+    }
+
+    template <typename octet_iterator>
+    octet_iterator append(uint32_t cp, octet_iterator result)
+    {
+        if (!internal::is_code_point_valid(cp))
+            throw invalid_code_point(cp);
+
+        if (cp < 0x80)                        // one octet
+            *(result++) = static_cast<uint8_t>(cp);
+        else if (cp < 0x800) {                // two octets
+            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else if (cp < 0x10000) {              // three octets
+            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else {      // four octets
+            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
+            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator>
+    uint32_t next(octet_iterator& it, octet_iterator end)
+    {
+        uint32_t cp = 0;
+        internal::utf_error err_code = internal::validate_next(it, end, &cp);
+        switch (err_code) {
+            case internal::UTF8_OK :
+                break;
+            case internal::NOT_ENOUGH_ROOM :
+                throw not_enough_room();
+            case internal::INVALID_LEAD :
+            case internal::INCOMPLETE_SEQUENCE :
+            case internal::OVERLONG_SEQUENCE :
+                throw invalid_utf8(*it);
+            case internal::INVALID_CODE_POINT :
+                throw invalid_code_point(cp);
+        }
+        return cp;
+    }
+
+    template <typename octet_iterator>
+    uint32_t peek_next(octet_iterator it, octet_iterator end)
+    {
+        return next(it, end);
+    }
+
+    template <typename octet_iterator>
+    uint32_t prior(octet_iterator& it, octet_iterator start)
+    {
+        // can't do much if it == start
+        if (it == start)
+            throw not_enough_room();
+
+        octet_iterator end = it;
+        // Go back until we hit either a lead octet or start
+        while (internal::is_trail(*(--it)))
+            if (it == start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        return peek_next(it, end);
+    }
+
+    /// Deprecated in versions that include "prior"
+    template <typename octet_iterator>
+    uint32_t previous(octet_iterator& it, octet_iterator pass_start)
+    {
+        octet_iterator end = it;
+        while (internal::is_trail(*(--it)))
+            if (it == pass_start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        octet_iterator temp = it;
+        return next(temp, end);
+    }
+
+    template <typename octet_iterator, typename distance_type>
+    void advance (octet_iterator& it, distance_type n, octet_iterator end)
+    {
+        for (distance_type i = 0; i < n; ++i)
+            next(it, end);
+    }
+
+    template <typename octet_iterator>
+    typename std::iterator_traits<octet_iterator>::difference_type
+    distance (octet_iterator first, octet_iterator last)
+    {
+        typename std::iterator_traits<octet_iterator>::difference_type dist;
+        for (dist = 0; first < last; ++dist)
+            next(first, last);
+        return dist;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+    {
+        while (start != end) {
+            uint32_t cp = internal::mask16(*start++);
+            // Take care of surrogate pairs first
+            if (internal::is_lead_surrogate(cp)) {
+                if (start != end) {
+                    uint32_t trail_surrogate = internal::mask16(*start++);
+                    if (internal::is_trail_surrogate(trail_surrogate))
+                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                    else
+                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
+                }
+                else
+                    throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            }
+            // Lone trail surrogate
+            else if (internal::is_trail_surrogate(cp))
+                throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            result = append(cp, result);
+        }
+        return result;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+    {
+        while (start != end) {
+            uint32_t cp = next(start, end);
+            if (cp > 0xffff) { //make a surrogate pair
+                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+            }
+            else
+                *result++ = static_cast<uint16_t>(cp);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+    {
+        while (start != end)
+            result = append(*(start++), result);
+
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+    {
+        while (start != end)
+            (*result++) = next(start, end);
+
+        return result;
+    }
+
+    // The iterator class
+    template <typename octet_iterator>
+    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
+      octet_iterator it;
+      octet_iterator range_start;
+      octet_iterator range_end;
+      public:
+      iterator () {};
+      explicit iterator (const octet_iterator& octet_it,
+                         const octet_iterator& range_start,
+                         const octet_iterator& range_end) :
+               it(octet_it), range_start(range_start), range_end(range_end)
+      {
+          if (it < range_start || it > range_end)
+              throw std::out_of_range("Invalid utf-8 iterator position");
+      }
+      // the default "big three" are OK
+      octet_iterator base () const { return it; }
+      uint32_t operator * () const
+      {
+          octet_iterator temp = it;
+          return next(temp, range_end);
+      }
+      bool operator == (const iterator& rhs) const
+      {
+          if (range_start != rhs.range_start || range_end != rhs.range_end)
+              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
+          return (it == rhs.it);
+      }
+      bool operator != (const iterator& rhs) const
+      {
+          return !(operator == (rhs));
+      }
+      iterator& operator ++ ()
+      {
+          next(it, range_end);
+          return *this;
+      }
+      iterator operator ++ (int)
+      {
+          iterator temp = *this;
+          next(it, range_end);
+          return temp;
+      }
+      iterator& operator -- ()
+      {
+          prior(it, range_start);
+          return *this;
+      }
+      iterator operator -- (int)
+      {
+          iterator temp = *this;
+          prior(it, range_start);
+          return temp;
+      }
+    }; // class iterator
+
+} // namespace utf8
+
+#endif //header guard
+
+
diff --git a/utf8/utf8/core.h b/utf8/utf8/core.h
new file mode 100644
index 0000000..268cf7c
--- /dev/null
+++ b/utf8/utf8/core.h
@@ -0,0 +1,358 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include <iterator>
+
+namespace utf8
+{
+    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
+    // You may need to change them to match your system.
+    // These typedefs have the same names as ones from cstdint, or boost/cstdint
+    typedef unsigned char   uint8_t;
+    typedef unsigned short  uint16_t;
+    typedef unsigned int    uint32_t;
+
+// Helper code - not intended to be directly called by the library users. May be changed at any time
+namespace internal
+{
+    // Unicode constants
+    // Leading (high) surrogates: 0xd800 - 0xdbff
+    // Trailing (low) surrogates: 0xdc00 - 0xdfff
+    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
+    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
+    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
+    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
+    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
+    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
+
+    // Maximum valid value for a Unicode code point
+    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
+
+    template<typename octet_type>
+    inline uint8_t mask8(octet_type oc)
+    {
+        return static_cast<uint8_t>(0xff & oc);
+    }
+    template<typename u16_type>
+    inline uint16_t mask16(u16_type oc)
+    {
+        return static_cast<uint16_t>(0xffff & oc);
+    }
+    template<typename octet_type>
+    inline bool is_trail(octet_type oc)
+    {
+        return ((mask8(oc) >> 6) == 0x2);
+    }
+
+    template <typename u16>
+    inline bool is_lead_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_trail_surrogate(u16 cp)
+    {
+        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u32>
+    inline bool is_code_point_valid(u32 cp)
+    {
+        return (cp <= CODE_POINT_MAX && !is_surrogate(cp));
+    }
+
+    template <typename octet_iterator>
+    inline typename std::iterator_traits<octet_iterator>::difference_type
+    sequence_length(octet_iterator lead_it)
+    {
+        uint8_t lead = mask8(*lead_it);
+        if (lead < 0x80)
+            return 1;
+        else if ((lead >> 5) == 0x6)
+            return 2;
+        else if ((lead >> 4) == 0xe)
+            return 3;
+        else if ((lead >> 3) == 0x1e)
+            return 4;
+        else
+            return 0;
+    }
+
+    template <typename octet_difference_type>
+    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
+    {
+        if (cp < 0x80) {
+            if (length != 1) 
+                return true;
+        }
+        else if (cp < 0x800) {
+            if (length != 2) 
+                return true;
+        }
+        else if (cp < 0x10000) {
+            if (length != 3) 
+                return true;
+        }
+
+        return false;
+    }
+
+    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+    /// get_sequence_x functions decode utf-8 sequences of the length x
+
+    template <typename octet_iterator>
+    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+    {
+        if (it != end) {
+            if (code_point)
+                *code_point = mask8(*it);
+            return UTF8_OK;
+        }
+        return NOT_ENOUGH_ROOM;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+    {
+        utf_error ret_code = NOT_ENOUGH_ROOM;
+
+        if (it != end) {
+            uint32_t cp = mask8(*it);
+            if (++it != end) {
+                if (is_trail(*it)) {
+                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+
+                    if (code_point)
+                        *code_point = cp;
+                    ret_code = UTF8_OK;
+                }
+                else
+                    ret_code = INCOMPLETE_SEQUENCE;
+            }
+            else
+                ret_code = NOT_ENOUGH_ROOM;
+        }
+
+        return ret_code;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+    {
+        utf_error ret_code = NOT_ENOUGH_ROOM;
+
+        if (it != end) {
+            uint32_t cp = mask8(*it);
+            if (++it != end) {
+                if (is_trail(*it)) {
+                    cp = ((cp << 12) & 0xffff) + ((mask8(*it) << 6) & 0xfff);
+                    if (++it != end) {
+                        if (is_trail(*it)) {
+                            cp += (*it) & 0x3f;
+
+                            if (code_point)
+                                *code_point = cp;
+                            ret_code = UTF8_OK;
+                        }
+                        else 
+                            ret_code = INCOMPLETE_SEQUENCE;
+                    }
+                    else
+                        ret_code = NOT_ENOUGH_ROOM;
+                }
+                else
+                    ret_code = INCOMPLETE_SEQUENCE;
+            }
+            else
+                ret_code = NOT_ENOUGH_ROOM;
+        }
+
+        return ret_code;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+    {
+        utf_error ret_code = NOT_ENOUGH_ROOM;
+
+        if (it != end) {
+            uint32_t cp = mask8(*it);
+            if (++it != end) {
+                if (is_trail(*it)) {
+                    cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff);
+                    if (++it != end) {
+                        if (is_trail(*it)) {
+                            cp += (mask8(*it) << 6) & 0xfff;
+                            if (++it != end) {
+                                if (is_trail(*it)) {
+                                    cp += (*it) & 0x3f;
+
+                                    if (code_point)
+                                        *code_point = cp;
+                                    ret_code = UTF8_OK;
+                                }
+                                else
+                                    ret_code = INCOMPLETE_SEQUENCE;
+                            }
+                            else
+                                ret_code = NOT_ENOUGH_ROOM;
+                        }
+                        else
+                            ret_code = INCOMPLETE_SEQUENCE;
+                    }
+                    else
+                        ret_code = NOT_ENOUGH_ROOM;
+                }
+                else 
+                    ret_code = INCOMPLETE_SEQUENCE;
+            }
+            else
+                ret_code = NOT_ENOUGH_ROOM;
+        }
+
+        return ret_code;
+    }
+
+    template <typename octet_iterator>
+    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point)
+    {
+        // Save the original value of it so we can go back in case of failure
+        // Of course, it does not make much sense with i.e. stream iterators
+        octet_iterator original_it = it;
+
+        uint32_t cp = 0;
+        // Determine the sequence length based on the lead octet
+        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
+        octet_difference_type length = sequence_length(it);
+        if (length == 0)
+            return INVALID_LEAD;
+
+        // Now that we have a valid sequence length, get trail octets and calculate the code point
+        utf_error err = UTF8_OK;
+        switch (length) {
+            case 1:
+                err = get_sequence_1(it, end, &cp);
+                break;
+            case 2:
+                err = get_sequence_2(it, end, &cp);
+            break;
+            case 3:
+                err = get_sequence_3(it, end, &cp);
+            break;
+            case 4:
+                err = get_sequence_4(it, end, &cp);
+            break;
+        }
+
+        if (err == UTF8_OK) {
+            // Decoding succeeded. Now, security checks...
+            if (is_code_point_valid(cp)) {
+                if (!is_overlong_sequence(cp, length)){
+                    // Passed! Return here.
+                    if (code_point)
+                        *code_point = cp;
+                    ++it;
+                    return UTF8_OK;
+                }
+                else
+                    err = OVERLONG_SEQUENCE;
+            }
+            else 
+                err = INVALID_CODE_POINT;
+        }
+
+        // Failure branch - restore the original value of the iterator
+        it = original_it;
+        return err;
+    }
+
+    template <typename octet_iterator>
+    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
+        return validate_next(it, end, 0);
+    }
+
+} // namespace internal
+
+    /// The library API - functions intended to be called by the users
+
+    // Byte order mark
+    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
+
+    template <typename octet_iterator>
+    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
+    {
+        octet_iterator result = start;
+        while (result != end) {
+            internal::utf_error err_code = internal::validate_next(result, end);
+            if (err_code != internal::UTF8_OK)
+                return result;
+        }
+        return result;
+    }
+
+    template <typename octet_iterator>
+    inline bool is_valid(octet_iterator start, octet_iterator end)
+    {
+        return (find_invalid(start, end) == end);
+    }
+
+    template <typename octet_iterator>
+    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
+    {
+        return (
+            ((it != end) && (internal::mask8(*it++)) == bom[0]) &&
+            ((it != end) && (internal::mask8(*it++)) == bom[1]) &&
+            ((it != end) && (internal::mask8(*it))   == bom[2])
+           );
+    }
+	
+	//Deprecated in release 2.3 
+    template <typename octet_iterator>
+    inline bool is_bom (octet_iterator it)
+    {
+        return (
+            (internal::mask8(*it++)) == bom[0] &&
+            (internal::mask8(*it++)) == bom[1] &&
+            (internal::mask8(*it))   == bom[2]
+           );
+    }
+} // namespace utf8
+
+#endif // header guard
+
+
diff --git a/utf8/utf8/unchecked.h b/utf8/utf8/unchecked.h
new file mode 100644
index 0000000..95a3d74
--- /dev/null
+++ b/utf8/utf8/unchecked.h
@@ -0,0 +1,234 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+
+namespace utf8
+{
+    namespace unchecked 
+    {
+        template <typename octet_iterator>
+        octet_iterator append(uint32_t cp, octet_iterator result)
+        {
+            if (cp < 0x80)                        // one octet
+                *(result++) = static_cast<uint8_t>(cp);  
+            else if (cp < 0x800) {                // two octets
+                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else if (cp < 0x10000) {              // three octets
+                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else {                                // four octets
+                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
+                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator>
+        uint32_t sequence_length(octet_iterator it)
+        {
+            return utf8::internal::sequence_length(it);
+        }
+
+        template <typename octet_iterator>
+        uint32_t next(octet_iterator& it)
+        {
+            uint32_t cp = internal::mask8(*it);
+            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
+            switch (length) {
+                case 1:
+                    break;
+                case 2:
+                    it++;
+                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+                    break;
+                case 3:
+                    ++it; 
+                    cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff);
+                    ++it;
+                    cp += (*it) & 0x3f;
+                    break;
+                case 4:
+                    ++it;
+                    cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff);                
+                    ++it;
+                    cp += (internal::mask8(*it) << 6) & 0xfff;
+                    ++it;
+                    cp += (*it) & 0x3f; 
+                    break;
+            }
+            ++it;
+            return cp;        
+        }
+
+        template <typename octet_iterator>
+        uint32_t peek_next(octet_iterator it)
+        {
+            return next(it);    
+        }
+
+        template <typename octet_iterator>
+        uint32_t prior(octet_iterator& it)
+        {
+            while (internal::is_trail(*(--it))) ;
+            octet_iterator temp = it;
+            return next(temp);
+        }
+
+        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
+        template <typename octet_iterator>
+        inline uint32_t previous(octet_iterator& it)
+        {
+            return prior(it);
+        }
+
+        template <typename octet_iterator, typename distance_type>
+        void advance (octet_iterator& it, distance_type n)
+        {
+            for (distance_type i = 0; i < n; ++i)
+                next(it);
+        }
+
+        template <typename octet_iterator>
+        typename std::iterator_traits<octet_iterator>::difference_type
+        distance (octet_iterator first, octet_iterator last)
+        {
+            typename std::iterator_traits<octet_iterator>::difference_type dist;
+            for (dist = 0; first < last; ++dist) 
+                next(first);
+            return dist;
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+        {       
+            while (start != end) {
+                uint32_t cp = internal::mask16(*start++);
+            // Take care of surrogate pairs first
+                if (internal::is_lead_surrogate(cp)) {
+                    uint32_t trail_surrogate = internal::mask16(*start++);
+                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                }
+                result = append(cp, result);
+            }
+            return result;         
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+        {
+            while (start < end) {
+                uint32_t cp = next(start);
+                if (cp > 0xffff) { //make a surrogate pair
+                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+                }
+                else
+                    *result++ = static_cast<uint16_t>(cp);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+        {
+            while (start != end)
+                result = append(*(start++), result);
+
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+        {
+            while (start < end)
+                (*result++) = next(start);
+
+            return result;
+        }
+
+        // The iterator class
+        template <typename octet_iterator>
+          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { 
+            octet_iterator it;
+            public:
+            iterator () {};
+            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
+            // the default "big three" are OK
+            octet_iterator base () const { return it; }
+            uint32_t operator * () const
+            {
+                octet_iterator temp = it;
+                return next(temp);
+            }
+            bool operator == (const iterator& rhs) const 
+            { 
+                return (it == rhs.it);
+            }
+            bool operator != (const iterator& rhs) const
+            {
+                return !(operator == (rhs));
+            }
+            iterator& operator ++ () 
+            {
+                std::advance(it, internal::sequence_length(it));
+                return *this;
+            }
+            iterator operator ++ (int)
+            {
+                iterator temp = *this;
+                std::advance(it, internal::sequence_length(it));
+                return temp;
+            }  
+            iterator& operator -- ()
+            {
+                prior(it);
+                return *this;
+            }
+            iterator operator -- (int)
+            {
+                iterator temp = *this;
+                prior(it);
+                return temp;
+            }
+          }; // class iterator
+
+    } // namespace utf8::unchecked
+} // namespace utf8 
+
+
+#endif // header guard
+
diff --git a/utf8case/CMakeLists.txt b/utf8case/CMakeLists.txt
new file mode 100644
index 0000000..8c2edbb
--- /dev/null
+++ b/utf8case/CMakeLists.txt
@@ -0,0 +1,28 @@
+add_library(utf8case SHARED
+    case_tables.cpp
+    range_based_case_converter.cpp
+    regular_contextual_case_converter.cpp
+    simple_convert.cpp
+    special_casing_converter.cpp
+    string_case_converter_manager.cpp
+    turkish_and_azeri_lower_contextual_case_converter.cpp
+    turkish_and_azeri_upper_contextual_case_converter.cpp
+)
+
+add_subdirectory(t)
+
+install(TARGETS utf8case DESTINATION lib/)
+install(FILES 
+        case_converter_factory.hpp
+        general_case_converter.hpp
+        simple_convert.hpp
+        turkish_and_azeri_lower_contextual_case_converter.hpp
+        case_tables.hpp
+        range_based_case_converter.hpp
+        special_casing_converter.hpp
+        turkish_and_azeri_upper_contextual_case_converter.hpp
+        contextual_case_converter.hpp
+        regular_contextual_case_converter.hpp
+        string_case_converter_manager.hpp
+
+DESTINATION include/utf8case/)
diff --git a/utf8case/case_converter_factory.hpp b/utf8case/case_converter_factory.hpp
new file mode 100644
index 0000000..34211e8
--- /dev/null
+++ b/utf8case/case_converter_factory.hpp
@@ -0,0 +1,149 @@
+#ifndef CASE_CONVERTER_FACTORY_HDR
+#define CASE_CONVERTER_FACTORY_HDR
+
+#include <boost/shared_ptr.hpp>
+
+#include "general_case_converter.hpp"
+#include "concordia/concordia_exception.hpp"
+#include "regular_contextual_case_converter.hpp"
+#include "turkish_and_azeri_lower_contextual_case_converter.hpp"
+#include "turkish_and_azeri_upper_contextual_case_converter.hpp"
+
+const size_t NUMBER_OF_CASE_TYPES = 3;
+
+template<typename octet_iterator, typename output_iterator>
+class CaseConverterFactory {
+
+private:
+    enum {
+        LOWER_INDEX = 0,
+        UPPER_INDEX = 1,
+        TITLE_INDEX = 2
+    };
+
+    boost::shared_ptr<RangeBasedCaseConverter> rangeBasedCaseConverters_[NUMBER_OF_CASE_TYPES];
+    boost::shared_ptr<SpecialCasingConverter> specialCasingConverters_[NUMBER_OF_CASE_TYPES];
+
+    boost::shared_ptr<ContextualCaseConverter> regularContextualCaseConverter_;
+    boost::shared_ptr<ContextualCaseConverter> turkishAndAzeriUpperContextualCaseConverter_;
+    boost::shared_ptr<ContextualCaseConverter> turkishAndAzeriLowerContextualCaseConverter_;
+
+    class Exception : public ConcordiaException  {
+    public:
+        Exception(const std::string& msg): ConcordiaException(msg) {
+        }
+
+        virtual ~Exception() throw() {}
+    };
+
+    void checkRawConverters_(int case_index) {
+        if (!rangeBasedCaseConverters_[case_index]) {
+            boost::shared_ptr<RangeBasedCaseConverter> converter;
+
+            switch (case_index) {
+            case LOWER_INDEX: converter.reset(
+                new RangeBasedCaseConverter(
+                    LOWER_CASE_RANGES_SIZE,
+                    LOWER_CASE_RANGES));
+                break;
+            case UPPER_INDEX: converter.reset(
+                new RangeBasedCaseConverter(
+                    UPPER_CASE_RANGES_SIZE,
+                    UPPER_CASE_RANGES));
+                break;
+            case TITLE_INDEX: converter.reset(
+                new RangeBasedCaseConverter(
+                    TITLE_CASE_RANGES_SIZE,
+                    TITLE_CASE_RANGES));
+                break;
+            default:
+                throw Exception("????");
+            }
+
+            rangeBasedCaseConverters_[case_index] = converter;
+        }
+
+        if (!specialCasingConverters_[case_index]) {
+            boost::shared_ptr<SpecialCasingConverter> converter;
+
+            switch (case_index) {
+            case LOWER_INDEX: converter.reset(
+                new SpecialCasingConverter(
+                    LOWER_SPECIAL_CASING_SIZE,
+                    LOWER_SPECIAL_CASING));
+                break;
+            case UPPER_INDEX: converter.reset(
+                new SpecialCasingConverter(
+                    UPPER_SPECIAL_CASING_SIZE,
+                    UPPER_SPECIAL_CASING));
+                break;
+            case TITLE_INDEX: converter.reset(
+                new SpecialCasingConverter(
+                    TITLE_SPECIAL_CASING_SIZE,
+                    TITLE_SPECIAL_CASING));
+                break;
+            default:
+                throw Exception("????");
+            }
+
+            specialCasingConverters_[case_index] = converter;
+        }
+    }
+
+    boost::shared_ptr<GeneralCaseConverter<octet_iterator, output_iterator> > getCaseConverter_(
+        int case_index, const std::string& language_code) {
+
+        checkRawConverters_(case_index);
+
+        return boost::shared_ptr<GeneralCaseConverter<octet_iterator, output_iterator> >(
+            new GeneralCaseConverter<octet_iterator, output_iterator> (
+                rangeBasedCaseConverters_[case_index],
+                specialCasingConverters_[case_index],
+                getContextualCaseConverterForLanguage_(language_code, case_index)));
+    }
+
+public:
+    CaseConverterFactory():
+        regularContextualCaseConverter_(
+            boost::shared_ptr<ContextualCaseConverter>(new RegularContextualCaseConverter())),
+        turkishAndAzeriUpperContextualCaseConverter_(
+            boost::shared_ptr<ContextualCaseConverter>(
+                new TurkishAndAzeriUpperContextualCaseConverter())),
+        turkishAndAzeriLowerContextualCaseConverter_(
+            boost::shared_ptr<ContextualCaseConverter>(
+                new TurkishAndAzeriLowerContextualCaseConverter())) {
+    }
+
+    boost::shared_ptr<ContextualCaseConverter> getContextualCaseConverterForLanguage_(
+        const std::string& languageCode, int caseIndex) {
+        if (languageCode == "lt")
+            throw Exception(std::string("language '") + languageCode
+                            + "' is not handled yet in lower/upper/title-casing");
+
+        if (languageCode == "tr" || languageCode == "az")
+            return
+                caseIndex == LOWER_INDEX
+                ? turkishAndAzeriLowerContextualCaseConverter_
+                : turkishAndAzeriUpperContextualCaseConverter_;
+
+        return regularContextualCaseConverter_;
+    }
+
+    boost::shared_ptr<GeneralCaseConverter<octet_iterator, output_iterator> >
+    getLowerCaseConverter(const std::string& language_code) {
+        return getCaseConverter_(LOWER_INDEX, language_code);
+    }
+
+    boost::shared_ptr<GeneralCaseConverter<octet_iterator, output_iterator> >
+    getUpperCaseConverter(const std::string& language_code) {
+        return getCaseConverter_(UPPER_INDEX, language_code);
+    }
+
+    boost::shared_ptr<GeneralCaseConverter<octet_iterator, output_iterator> >
+    getTitleCaseConverter(const std::string& language_code) {
+        return getCaseConverter_(TITLE_INDEX, language_code);
+    }
+
+};
+
+#endif
diff --git a/utf8case/case_tables.cpp b/utf8case/case_tables.cpp
new file mode 100644
index 0000000..4816d59
--- /dev/null
+++ b/utf8case/case_tables.cpp
@@ -0,0 +1,804 @@
+// GENERATED AUTOMATICALLY BY generate_case_tables.pl; DO NOT EDIT.
+
+#include "case_tables.hpp"
+
+const size_t LOWER_CASE_RANGES_SIZE = 151;
+const CaseConversionRecord LOWER_CASE_RANGES[LOWER_CASE_RANGES_SIZE] = {
+    {65, 90, 32},
+    {192, 214, 32},
+    {216, 222, 32},
+    {256, 302, EVEN_ODD_SKIP},
+    {304, 304, -199},
+    {306, 310, EVEN_ODD_SKIP},
+    {313, 327, ODD_EVEN_SKIP},
+    {330, 374, EVEN_ODD_SKIP},
+    {376, 376, -121},
+    {377, 381, ODD_EVEN_SKIP},
+    {385, 385, 210},
+    {386, 388, EVEN_ODD_SKIP},
+    {390, 390, 206},
+    {391, 391, ODD_EVEN},
+    {393, 394, 205},
+    {395, 395, ODD_EVEN},
+    {398, 398, 79},
+    {399, 399, 202},
+    {400, 400, 203},
+    {401, 401, ODD_EVEN},
+    {403, 403, 205},
+    {404, 404, 207},
+    {406, 406, 211},
+    {407, 407, 209},
+    {408, 408, EVEN_ODD},
+    {412, 412, 211},
+    {413, 413, 213},
+    {415, 415, 214},
+    {416, 420, EVEN_ODD_SKIP},
+    {422, 422, 218},
+    {423, 423, ODD_EVEN},
+    {425, 425, 218},
+    {428, 428, EVEN_ODD},
+    {430, 430, 218},
+    {431, 431, ODD_EVEN},
+    {433, 434, 217},
+    {435, 437, ODD_EVEN_SKIP},
+    {439, 439, 219},
+    {440, 440, EVEN_ODD},
+    {444, 444, EVEN_ODD},
+    {452, 452, 2},
+    {453, 453, ODD_EVEN},
+    {455, 455, 2},
+    {456, 456, EVEN_ODD},
+    {458, 458, 2},
+    {459, 475, ODD_EVEN_SKIP},
+    {478, 494, EVEN_ODD_SKIP},
+    {497, 497, 2},
+    {498, 500, EVEN_ODD_SKIP},
+    {502, 502, -97},
+    {503, 503, -56},
+    {504, 542, EVEN_ODD_SKIP},
+    {544, 544, -130},
+    {546, 562, EVEN_ODD_SKIP},
+    {570, 570, 10795},
+    {571, 571, ODD_EVEN},
+    {573, 573, -163},
+    {574, 574, 10792},
+    {577, 577, ODD_EVEN},
+    {579, 579, -195},
+    {580, 580, 69},
+    {581, 581, 71},
+    {582, 590, EVEN_ODD_SKIP},
+    {880, 882, EVEN_ODD_SKIP},
+    {886, 886, EVEN_ODD},
+    {902, 902, 38},
+    {904, 906, 37},
+    {908, 908, 64},
+    {910, 911, 63},
+    {913, 929, 32},
+    {931, 939, 32},
+    {975, 975, 8},
+    {984, 1006, EVEN_ODD_SKIP},
+    {1012, 1012, -60},
+    {1015, 1015, ODD_EVEN},
+    {1017, 1017, -7},
+    {1018, 1018, EVEN_ODD},
+    {1021, 1023, -130},
+    {1024, 1039, 80},
+    {1040, 1071, 32},
+    {1120, 1152, EVEN_ODD_SKIP},
+    {1162, 1214, EVEN_ODD_SKIP},
+    {1216, 1216, 15},
+    {1217, 1229, ODD_EVEN_SKIP},
+    {1232, 1318, EVEN_ODD_SKIP},
+    {1329, 1366, 48},
+    {4256, 4293, 7264},
+    {7680, 7828, EVEN_ODD_SKIP},
+    {7838, 7838, -7615},
+    {7840, 7934, EVEN_ODD_SKIP},
+    {7944, 7951, -8},
+    {7960, 7965, -8},
+    {7976, 7983, -8},
+    {7992, 7999, -8},
+    {8008, 8013, -8},
+    {8025, 8025, -8},
+    {8027, 8027, -8},
+    {8029, 8029, -8},
+    {8031, 8031, -8},
+    {8040, 8047, -8},
+    {8072, 8079, -8},
+    {8088, 8095, -8},
+    {8104, 8111, -8},
+    {8120, 8121, -8},
+    {8122, 8123, -74},
+    {8124, 8124, -9},
+    {8136, 8139, -86},
+    {8140, 8140, -9},
+    {8152, 8153, -8},
+    {8154, 8155, -100},
+    {8168, 8169, -8},
+    {8170, 8171, -112},
+    {8172, 8172, -7},
+    {8184, 8185, -128},
+    {8186, 8187, -126},
+    {8188, 8188, -9},
+    {8486, 8486, -7517},
+    {8490, 8490, -8383},
+    {8491, 8491, -8262},
+    {8498, 8498, 28},
+    {8544, 8559, 16},
+    {8579, 8579, ODD_EVEN},
+    {9398, 9423, 26},
+    {11264, 11310, 48},
+    {11360, 11360, EVEN_ODD},
+    {11362, 11362, -10743},
+    {11363, 11363, -3814},
+    {11364, 11364, -10727},
+    {11367, 11371, ODD_EVEN_SKIP},
+    {11373, 11373, -10780},
+    {11374, 11374, -10749},
+    {11375, 11375, -10783},
+    {11376, 11376, -10782},
+    {11378, 11378, EVEN_ODD},
+    {11381, 11381, ODD_EVEN},
+    {11390, 11391, -10815},
+    {11392, 11490, EVEN_ODD_SKIP},
+    {11499, 11501, ODD_EVEN_SKIP},
+    {42560, 42604, EVEN_ODD_SKIP},
+    {42624, 42646, EVEN_ODD_SKIP},
+    {42786, 42798, EVEN_ODD_SKIP},
+    {42802, 42862, EVEN_ODD_SKIP},
+    {42873, 42875, ODD_EVEN_SKIP},
+    {42877, 42877, -35332},
+    {42878, 42886, EVEN_ODD_SKIP},
+    {42891, 42891, ODD_EVEN},
+    {42893, 42893, -42280},
+    {42896, 42896, EVEN_ODD},
+    {42912, 42920, EVEN_ODD_SKIP},
+    {65313, 65338, 32},
+    {66560, 66599, 40}
+};
+const size_t UPPER_CASE_RANGES_SIZE = 161;
+const CaseConversionRecord UPPER_CASE_RANGES[UPPER_CASE_RANGES_SIZE] = {
+    {97, 122, -32},
+    {181, 181, 743},
+    {224, 246, -32},
+    {248, 254, -32},
+    {255, 255, 121},
+    {257, 303, EVEN_ODD_SKIP},
+    {305, 305, -232},
+    {307, 311, EVEN_ODD_SKIP},
+    {314, 328, ODD_EVEN_SKIP},
+    {331, 375, EVEN_ODD_SKIP},
+    {378, 382, ODD_EVEN_SKIP},
+    {383, 383, -300},
+    {384, 384, 195},
+    {387, 389, EVEN_ODD_SKIP},
+    {392, 392, ODD_EVEN},
+    {396, 396, ODD_EVEN},
+    {402, 402, ODD_EVEN},
+    {405, 405, 97},
+    {409, 409, EVEN_ODD},
+    {410, 410, 163},
+    {414, 414, 130},
+    {417, 421, EVEN_ODD_SKIP},
+    {424, 424, ODD_EVEN},
+    {429, 429, EVEN_ODD},
+    {432, 432, ODD_EVEN},
+    {436, 438, ODD_EVEN_SKIP},
+    {441, 441, EVEN_ODD},
+    {445, 445, EVEN_ODD},
+    {447, 447, 56},
+    {453, 453, EVEN_ODD},
+    {454, 454, -2},
+    {456, 456, ODD_EVEN},
+    {457, 457, -2},
+    {459, 459, EVEN_ODD},
+    {460, 460, -2},
+    {462, 476, ODD_EVEN_SKIP},
+    {477, 477, -79},
+    {479, 495, EVEN_ODD_SKIP},
+    {498, 498, ODD_EVEN},
+    {499, 499, -2},
+    {501, 501, EVEN_ODD},
+    {505, 543, EVEN_ODD_SKIP},
+    {547, 563, EVEN_ODD_SKIP},
+    {572, 572, ODD_EVEN},
+    {575, 576, 10815},
+    {578, 578, ODD_EVEN},
+    {583, 591, EVEN_ODD_SKIP},
+    {592, 592, 10783},
+    {593, 593, 10780},
+    {594, 594, 10782},
+    {595, 595, -210},
+    {596, 596, -206},
+    {598, 599, -205},
+    {601, 601, -202},
+    {603, 603, -203},
+    {608, 608, -205},
+    {611, 611, -207},
+    {613, 613, 42280},
+    {616, 616, -209},
+    {617, 617, -211},
+    {619, 619, 10743},
+    {623, 623, -211},
+    {625, 625, 10749},
+    {626, 626, -213},
+    {629, 629, -214},
+    {637, 637, 10727},
+    {640, 640, -218},
+    {643, 643, -218},
+    {648, 648, -218},
+    {649, 649, -69},
+    {650, 651, -217},
+    {652, 652, -71},
+    {658, 658, -219},
+    {837, 837, 84},
+    {881, 883, EVEN_ODD_SKIP},
+    {887, 887, EVEN_ODD},
+    {891, 893, 130},
+    {940, 940, -38},
+    {941, 943, -37},
+    {945, 961, -32},
+    {962, 962, -31},
+    {963, 971, -32},
+    {972, 972, -64},
+    {973, 974, -63},
+    {976, 976, -62},
+    {977, 977, -57},
+    {981, 981, -47},
+    {982, 982, -54},
+    {983, 983, -8},
+    {985, 1007, EVEN_ODD_SKIP},
+    {1008, 1008, -86},
+    {1009, 1009, -80},
+    {1010, 1010, 7},
+    {1013, 1013, -96},
+    {1016, 1016, ODD_EVEN},
+    {1019, 1019, EVEN_ODD},
+    {1072, 1103, -32},
+    {1104, 1119, -80},
+    {1121, 1153, EVEN_ODD_SKIP},
+    {1163, 1215, EVEN_ODD_SKIP},
+    {1218, 1230, ODD_EVEN_SKIP},
+    {1231, 1231, -15},
+    {1233, 1319, EVEN_ODD_SKIP},
+    {1377, 1414, -48},
+    {7545, 7545, 35332},
+    {7549, 7549, 3814},
+    {7681, 7829, EVEN_ODD_SKIP},
+    {7835, 7835, -59},
+    {7841, 7935, EVEN_ODD_SKIP},
+    {7936, 7943, 8},
+    {7952, 7957, 8},
+    {7968, 7975, 8},
+    {7984, 7991, 8},
+    {8000, 8005, 8},
+    {8017, 8017, 8},
+    {8019, 8019, 8},
+    {8021, 8021, 8},
+    {8023, 8023, 8},
+    {8032, 8039, 8},
+    {8048, 8049, 74},
+    {8050, 8053, 86},
+    {8054, 8055, 100},
+    {8056, 8057, 128},
+    {8058, 8059, 112},
+    {8060, 8061, 126},
+    {8064, 8071, 8},
+    {8080, 8087, 8},
+    {8096, 8103, 8},
+    {8112, 8113, 8},
+    {8115, 8115, 9},
+    {8126, 8126, -7205},
+    {8131, 8131, 9},
+    {8144, 8145, 8},
+    {8160, 8161, 8},
+    {8165, 8165, 7},
+    {8179, 8179, 9},
+    {8526, 8526, -28},
+    {8560, 8575, -16},
+    {8580, 8580, ODD_EVEN},
+    {9424, 9449, -26},
+    {11312, 11358, -48},
+    {11361, 11361, EVEN_ODD},
+    {11365, 11365, -10795},
+    {11366, 11366, -10792},
+    {11368, 11372, ODD_EVEN_SKIP},
+    {11379, 11379, EVEN_ODD},
+    {11382, 11382, ODD_EVEN},
+    {11393, 11491, EVEN_ODD_SKIP},
+    {11500, 11502, ODD_EVEN_SKIP},
+    {11520, 11557, -7264},
+    {42561, 42605, EVEN_ODD_SKIP},
+    {42625, 42647, EVEN_ODD_SKIP},
+    {42787, 42799, EVEN_ODD_SKIP},
+    {42803, 42863, EVEN_ODD_SKIP},
+    {42874, 42876, ODD_EVEN_SKIP},
+    {42879, 42887, EVEN_ODD_SKIP},
+    {42892, 42892, ODD_EVEN},
+    {42897, 42897, EVEN_ODD},
+    {42913, 42921, EVEN_ODD_SKIP},
+    {65345, 65370, -32},
+    {66600, 66639, -40}
+};
+const size_t TITLE_CASE_RANGES_SIZE = 161;
+const CaseConversionRecord TITLE_CASE_RANGES[TITLE_CASE_RANGES_SIZE] = {
+    {97, 122, -32},
+    {181, 181, 743},
+    {224, 246, -32},
+    {248, 254, -32},
+    {255, 255, 121},
+    {257, 303, EVEN_ODD_SKIP},
+    {305, 305, -232},
+    {307, 311, EVEN_ODD_SKIP},
+    {314, 328, ODD_EVEN_SKIP},
+    {331, 375, EVEN_ODD_SKIP},
+    {378, 382, ODD_EVEN_SKIP},
+    {383, 383, -300},
+    {384, 384, 195},
+    {387, 389, EVEN_ODD_SKIP},
+    {392, 392, ODD_EVEN},
+    {396, 396, ODD_EVEN},
+    {402, 402, ODD_EVEN},
+    {405, 405, 97},
+    {409, 409, EVEN_ODD},
+    {410, 410, 163},
+    {414, 414, 130},
+    {417, 421, EVEN_ODD_SKIP},
+    {424, 424, ODD_EVEN},
+    {429, 429, EVEN_ODD},
+    {432, 432, ODD_EVEN},
+    {436, 438, ODD_EVEN_SKIP},
+    {441, 441, EVEN_ODD},
+    {445, 445, EVEN_ODD},
+    {447, 447, 56},
+    {452, 452, EVEN_ODD},
+    {453, 453, 0},
+    {454, 455, ODD_EVEN},
+    {456, 456, 0},
+    {457, 458, EVEN_ODD},
+    {459, 459, 0},
+    {460, 476, ODD_EVEN_SKIP},
+    {477, 477, -79},
+    {479, 495, EVEN_ODD_SKIP},
+    {497, 497, ODD_EVEN},
+    {498, 498, 0},
+    {499, 501, EVEN_ODD_SKIP},
+    {505, 543, EVEN_ODD_SKIP},
+    {547, 563, EVEN_ODD_SKIP},
+    {572, 572, ODD_EVEN},
+    {575, 576, 10815},
+    {578, 578, ODD_EVEN},
+    {583, 591, EVEN_ODD_SKIP},
+    {592, 592, 10783},
+    {593, 593, 10780},
+    {594, 594, 10782},
+    {595, 595, -210},
+    {596, 596, -206},
+    {598, 599, -205},
+    {601, 601, -202},
+    {603, 603, -203},
+    {608, 608, -205},
+    {611, 611, -207},
+    {613, 613, 42280},
+    {616, 616, -209},
+    {617, 617, -211},
+    {619, 619, 10743},
+    {623, 623, -211},
+    {625, 625, 10749},
+    {626, 626, -213},
+    {629, 629, -214},
+    {637, 637, 10727},
+    {640, 640, -218},
+    {643, 643, -218},
+    {648, 648, -218},
+    {649, 649, -69},
+    {650, 651, -217},
+    {652, 652, -71},
+    {658, 658, -219},
+    {837, 837, 84},
+    {881, 883, EVEN_ODD_SKIP},
+    {887, 887, EVEN_ODD},
+    {891, 893, 130},
+    {940, 940, -38},
+    {941, 943, -37},
+    {945, 961, -32},
+    {962, 962, -31},
+    {963, 971, -32},
+    {972, 972, -64},
+    {973, 974, -63},
+    {976, 976, -62},
+    {977, 977, -57},
+    {981, 981, -47},
+    {982, 982, -54},
+    {983, 983, -8},
+    {985, 1007, EVEN_ODD_SKIP},
+    {1008, 1008, -86},
+    {1009, 1009, -80},
+    {1010, 1010, 7},
+    {1013, 1013, -96},
+    {1016, 1016, ODD_EVEN},
+    {1019, 1019, EVEN_ODD},
+    {1072, 1103, -32},
+    {1104, 1119, -80},
+    {1121, 1153, EVEN_ODD_SKIP},
+    {1163, 1215, EVEN_ODD_SKIP},
+    {1218, 1230, ODD_EVEN_SKIP},
+    {1231, 1231, -15},
+    {1233, 1319, EVEN_ODD_SKIP},
+    {1377, 1414, -48},
+    {7545, 7545, 35332},
+    {7549, 7549, 3814},
+    {7681, 7829, EVEN_ODD_SKIP},
+    {7835, 7835, -59},
+    {7841, 7935, EVEN_ODD_SKIP},
+    {7936, 7943, 8},
+    {7952, 7957, 8},
+    {7968, 7975, 8},
+    {7984, 7991, 8},
+    {8000, 8005, 8},
+    {8017, 8017, 8},
+    {8019, 8019, 8},
+    {8021, 8021, 8},
+    {8023, 8023, 8},
+    {8032, 8039, 8},
+    {8048, 8049, 74},
+    {8050, 8053, 86},
+    {8054, 8055, 100},
+    {8056, 8057, 128},
+    {8058, 8059, 112},
+    {8060, 8061, 126},
+    {8064, 8071, 8},
+    {8080, 8087, 8},
+    {8096, 8103, 8},
+    {8112, 8113, 8},
+    {8115, 8115, 9},
+    {8126, 8126, -7205},
+    {8131, 8131, 9},
+    {8144, 8145, 8},
+    {8160, 8161, 8},
+    {8165, 8165, 7},
+    {8179, 8179, 9},
+    {8526, 8526, -28},
+    {8560, 8575, -16},
+    {8580, 8580, ODD_EVEN},
+    {9424, 9449, -26},
+    {11312, 11358, -48},
+    {11361, 11361, EVEN_ODD},
+    {11365, 11365, -10795},
+    {11366, 11366, -10792},
+    {11368, 11372, ODD_EVEN_SKIP},
+    {11379, 11379, EVEN_ODD},
+    {11382, 11382, ODD_EVEN},
+    {11393, 11491, EVEN_ODD_SKIP},
+    {11500, 11502, ODD_EVEN_SKIP},
+    {11520, 11557, -7264},
+    {42561, 42605, EVEN_ODD_SKIP},
+    {42625, 42647, EVEN_ODD_SKIP},
+    {42787, 42799, EVEN_ODD_SKIP},
+    {42803, 42863, EVEN_ODD_SKIP},
+    {42874, 42876, ODD_EVEN_SKIP},
+    {42879, 42887, EVEN_ODD_SKIP},
+    {42892, 42892, ODD_EVEN},
+    {42897, 42897, EVEN_ODD},
+    {42913, 42921, EVEN_ODD_SKIP},
+    {65345, 65370, -32},
+    {66600, 66639, -40}
+};
+const size_t LOWER_SPECIAL_CASING_SIZE = 103;
+const SpecialCasingConversionRecord LOWER_SPECIAL_CASING[LOWER_SPECIAL_CASING_SIZE] = {
+    {223, "\xc3\x9f"},
+    {304, "\x69\xcc\x87"},
+    {64256, "\xef\xac\x80"},
+    {64257, "\xef\xac\x81"},
+    {64258, "\xef\xac\x82"},
+    {64259, "\xef\xac\x83"},
+    {64260, "\xef\xac\x84"},
+    {64261, "\xef\xac\x85"},
+    {64262, "\xef\xac\x86"},
+    {1415, "\xd6\x87"},
+    {64275, "\xef\xac\x93"},
+    {64276, "\xef\xac\x94"},
+    {64277, "\xef\xac\x95"},
+    {64278, "\xef\xac\x96"},
+    {64279, "\xef\xac\x97"},
+    {329, "\xc5\x89"},
+    {912, "\xce\x90"},
+    {944, "\xce\xb0"},
+    {496, "\xc7\xb0"},
+    {7830, "\xe1\xba\x96"},
+    {7831, "\xe1\xba\x97"},
+    {7832, "\xe1\xba\x98"},
+    {7833, "\xe1\xba\x99"},
+    {7834, "\xe1\xba\x9a"},
+    {8016, "\xe1\xbd\x90"},
+    {8018, "\xe1\xbd\x92"},
+    {8020, "\xe1\xbd\x94"},
+    {8022, "\xe1\xbd\x96"},
+    {8118, "\xe1\xbe\xb6"},
+    {8134, "\xe1\xbf\x86"},
+    {8146, "\xe1\xbf\x92"},
+    {8147, "\xe1\xbf\x93"},
+    {8150, "\xe1\xbf\x96"},
+    {8151, "\xe1\xbf\x97"},
+    {8162, "\xe1\xbf\xa2"},
+    {8163, "\xe1\xbf\xa3"},
+    {8164, "\xe1\xbf\xa4"},
+    {8166, "\xe1\xbf\xa6"},
+    {8167, "\xe1\xbf\xa7"},
+    {8182, "\xe1\xbf\xb6"},
+    {8064, "\xe1\xbe\x80"},
+    {8065, "\xe1\xbe\x81"},
+    {8066, "\xe1\xbe\x82"},
+    {8067, "\xe1\xbe\x83"},
+    {8068, "\xe1\xbe\x84"},
+    {8069, "\xe1\xbe\x85"},
+    {8070, "\xe1\xbe\x86"},
+    {8071, "\xe1\xbe\x87"},
+    {8072, "\xe1\xbe\x80"},
+    {8073, "\xe1\xbe\x81"},
+    {8074, "\xe1\xbe\x82"},
+    {8075, "\xe1\xbe\x83"},
+    {8076, "\xe1\xbe\x84"},
+    {8077, "\xe1\xbe\x85"},
+    {8078, "\xe1\xbe\x86"},
+    {8079, "\xe1\xbe\x87"},
+    {8080, "\xe1\xbe\x90"},
+    {8081, "\xe1\xbe\x91"},
+    {8082, "\xe1\xbe\x92"},
+    {8083, "\xe1\xbe\x93"},
+    {8084, "\xe1\xbe\x94"},
+    {8085, "\xe1\xbe\x95"},
+    {8086, "\xe1\xbe\x96"},
+    {8087, "\xe1\xbe\x97"},
+    {8088, "\xe1\xbe\x90"},
+    {8089, "\xe1\xbe\x91"},
+    {8090, "\xe1\xbe\x92"},
+    {8091, "\xe1\xbe\x93"},
+    {8092, "\xe1\xbe\x94"},
+    {8093, "\xe1\xbe\x95"},
+    {8094, "\xe1\xbe\x96"},
+    {8095, "\xe1\xbe\x97"},
+    {8096, "\xe1\xbe\xa0"},
+    {8097, "\xe1\xbe\xa1"},
+    {8098, "\xe1\xbe\xa2"},
+    {8099, "\xe1\xbe\xa3"},
+    {8100, "\xe1\xbe\xa4"},
+    {8101, "\xe1\xbe\xa5"},
+    {8102, "\xe1\xbe\xa6"},
+    {8103, "\xe1\xbe\xa7"},
+    {8104, "\xe1\xbe\xa0"},
+    {8105, "\xe1\xbe\xa1"},
+    {8106, "\xe1\xbe\xa2"},
+    {8107, "\xe1\xbe\xa3"},
+    {8108, "\xe1\xbe\xa4"},
+    {8109, "\xe1\xbe\xa5"},
+    {8110, "\xe1\xbe\xa6"},
+    {8111, "\xe1\xbe\xa7"},
+    {8115, "\xe1\xbe\xb3"},
+    {8124, "\xe1\xbe\xb3"},
+    {8131, "\xe1\xbf\x83"},
+    {8140, "\xe1\xbf\x83"},
+    {8179, "\xe1\xbf\xb3"},
+    {8188, "\xe1\xbf\xb3"},
+    {8114, "\xe1\xbe\xb2"},
+    {8116, "\xe1\xbe\xb4"},
+    {8130, "\xe1\xbf\x82"},
+    {8132, "\xe1\xbf\x84"},
+    {8178, "\xe1\xbf\xb2"},
+    {8180, "\xe1\xbf\xb4"},
+    {8119, "\xe1\xbe\xb7"},
+    {8135, "\xe1\xbf\x87"},
+    {8183, "\xe1\xbf\xb7"}
+};
+const size_t TITLE_SPECIAL_CASING_SIZE = 103;
+const SpecialCasingConversionRecord TITLE_SPECIAL_CASING[TITLE_SPECIAL_CASING_SIZE] = {
+    {223, "\x53\x73"},
+    {304, "\xc4\xb0"},
+    {64256, "\x46\x66"},
+    {64257, "\x46\x69"},
+    {64258, "\x46\x6c"},
+    {64259, "\x46\x66\x69"},
+    {64260, "\x46\x66\x6c"},
+    {64261, "\x53\x74"},
+    {64262, "\x53\x74"},
+    {1415, "\xd4\xb5\xd6\x82"},
+    {64275, "\xd5\x84\xd5\xb6"},
+    {64276, "\xd5\x84\xd5\xa5"},
+    {64277, "\xd5\x84\xd5\xab"},
+    {64278, "\xd5\x8e\xd5\xb6"},
+    {64279, "\xd5\x84\xd5\xad"},
+    {329, "\xca\xbc\x4e"},
+    {912, "\xce\x99\xcc\x88\xcc\x81"},
+    {944, "\xce\xa5\xcc\x88\xcc\x81"},
+    {496, "\x4a\xcc\x8c"},
+    {7830, "\x48\xcc\xb1"},
+    {7831, "\x54\xcc\x88"},
+    {7832, "\x57\xcc\x8a"},
+    {7833, "\x59\xcc\x8a"},
+    {7834, "\x41\xca\xbe"},
+    {8016, "\xce\xa5\xcc\x93"},
+    {8018, "\xce\xa5\xcc\x93\xcc\x80"},
+    {8020, "\xce\xa5\xcc\x93\xcc\x81"},
+    {8022, "\xce\xa5\xcc\x93\xcd\x82"},
+    {8118, "\xce\x91\xcd\x82"},
+    {8134, "\xce\x97\xcd\x82"},
+    {8146, "\xce\x99\xcc\x88\xcc\x80"},
+    {8147, "\xce\x99\xcc\x88\xcc\x81"},
+    {8150, "\xce\x99\xcd\x82"},
+    {8151, "\xce\x99\xcc\x88\xcd\x82"},
+    {8162, "\xce\xa5\xcc\x88\xcc\x80"},
+    {8163, "\xce\xa5\xcc\x88\xcc\x81"},
+    {8164, "\xce\xa1\xcc\x93"},
+    {8166, "\xce\xa5\xcd\x82"},
+    {8167, "\xce\xa5\xcc\x88\xcd\x82"},
+    {8182, "\xce\xa9\xcd\x82"},
+    {8064, "\xe1\xbe\x88"},
+    {8065, "\xe1\xbe\x89"},
+    {8066, "\xe1\xbe\x8a"},
+    {8067, "\xe1\xbe\x8b"},
+    {8068, "\xe1\xbe\x8c"},
+    {8069, "\xe1\xbe\x8d"},
+    {8070, "\xe1\xbe\x8e"},
+    {8071, "\xe1\xbe\x8f"},
+    {8072, "\xe1\xbe\x88"},
+    {8073, "\xe1\xbe\x89"},
+    {8074, "\xe1\xbe\x8a"},
+    {8075, "\xe1\xbe\x8b"},
+    {8076, "\xe1\xbe\x8c"},
+    {8077, "\xe1\xbe\x8d"},
+    {8078, "\xe1\xbe\x8e"},
+    {8079, "\xe1\xbe\x8f"},
+    {8080, "\xe1\xbe\x98"},
+    {8081, "\xe1\xbe\x99"},
+    {8082, "\xe1\xbe\x9a"},
+    {8083, "\xe1\xbe\x9b"},
+    {8084, "\xe1\xbe\x9c"},
+    {8085, "\xe1\xbe\x9d"},
+    {8086, "\xe1\xbe\x9e"},
+    {8087, "\xe1\xbe\x9f"},
+    {8088, "\xe1\xbe\x98"},
+    {8089, "\xe1\xbe\x99"},
+    {8090, "\xe1\xbe\x9a"},
+    {8091, "\xe1\xbe\x9b"},
+    {8092, "\xe1\xbe\x9c"},
+    {8093, "\xe1\xbe\x9d"},
+    {8094, "\xe1\xbe\x9e"},
+    {8095, "\xe1\xbe\x9f"},
+    {8096, "\xe1\xbe\xa8"},
+    {8097, "\xe1\xbe\xa9"},
+    {8098, "\xe1\xbe\xaa"},
+    {8099, "\xe1\xbe\xab"},
+    {8100, "\xe1\xbe\xac"},
+    {8101, "\xe1\xbe\xad"},
+    {8102, "\xe1\xbe\xae"},
+    {8103, "\xe1\xbe\xaf"},
+    {8104, "\xe1\xbe\xa8"},
+    {8105, "\xe1\xbe\xa9"},
+    {8106, "\xe1\xbe\xaa"},
+    {8107, "\xe1\xbe\xab"},
+    {8108, "\xe1\xbe\xac"},
+    {8109, "\xe1\xbe\xad"},
+    {8110, "\xe1\xbe\xae"},
+    {8111, "\xe1\xbe\xaf"},
+    {8115, "\xe1\xbe\xbc"},
+    {8124, "\xe1\xbe\xbc"},
+    {8131, "\xe1\xbf\x8c"},
+    {8140, "\xe1\xbf\x8c"},
+    {8179, "\xe1\xbf\xbc"},
+    {8188, "\xe1\xbf\xbc"},
+    {8114, "\xe1\xbe\xba\xcd\x85"},
+    {8116, "\xce\x86\xcd\x85"},
+    {8130, "\xe1\xbf\x8a\xcd\x85"},
+    {8132, "\xce\x89\xcd\x85"},
+    {8178, "\xe1\xbf\xba\xcd\x85"},
+    {8180, "\xce\x8f\xcd\x85"},
+    {8119, "\xce\x91\xcd\x82\xcd\x85"},
+    {8135, "\xce\x97\xcd\x82\xcd\x85"},
+    {8183, "\xce\xa9\xcd\x82\xcd\x85"}
+};
+const size_t UPPER_SPECIAL_CASING_SIZE = 103;
+const SpecialCasingConversionRecord UPPER_SPECIAL_CASING[UPPER_SPECIAL_CASING_SIZE] = {
+    {223, "\x53\x53"},
+    {304, "\xc4\xb0"},
+    {64256, "\x46\x46"},
+    {64257, "\x46\x49"},
+    {64258, "\x46\x4c"},
+    {64259, "\x46\x46\x49"},
+    {64260, "\x46\x46\x4c"},
+    {64261, "\x53\x54"},
+    {64262, "\x53\x54"},
+    {1415, "\xd4\xb5\xd5\x92"},
+    {64275, "\xd5\x84\xd5\x86"},
+    {64276, "\xd5\x84\xd4\xb5"},
+    {64277, "\xd5\x84\xd4\xbb"},
+    {64278, "\xd5\x8e\xd5\x86"},
+    {64279, "\xd5\x84\xd4\xbd"},
+    {329, "\xca\xbc\x4e"},
+    {912, "\xce\x99\xcc\x88\xcc\x81"},
+    {944, "\xce\xa5\xcc\x88\xcc\x81"},
+    {496, "\x4a\xcc\x8c"},
+    {7830, "\x48\xcc\xb1"},
+    {7831, "\x54\xcc\x88"},
+    {7832, "\x57\xcc\x8a"},
+    {7833, "\x59\xcc\x8a"},
+    {7834, "\x41\xca\xbe"},
+    {8016, "\xce\xa5\xcc\x93"},
+    {8018, "\xce\xa5\xcc\x93\xcc\x80"},
+    {8020, "\xce\xa5\xcc\x93\xcc\x81"},
+    {8022, "\xce\xa5\xcc\x93\xcd\x82"},
+    {8118, "\xce\x91\xcd\x82"},
+    {8134, "\xce\x97\xcd\x82"},
+    {8146, "\xce\x99\xcc\x88\xcc\x80"},
+    {8147, "\xce\x99\xcc\x88\xcc\x81"},
+    {8150, "\xce\x99\xcd\x82"},
+    {8151, "\xce\x99\xcc\x88\xcd\x82"},
+    {8162, "\xce\xa5\xcc\x88\xcc\x80"},
+    {8163, "\xce\xa5\xcc\x88\xcc\x81"},
+    {8164, "\xce\xa1\xcc\x93"},
+    {8166, "\xce\xa5\xcd\x82"},
+    {8167, "\xce\xa5\xcc\x88\xcd\x82"},
+    {8182, "\xce\xa9\xcd\x82"},
+    {8064, "\xe1\xbc\x88\xce\x99"},
+    {8065, "\xe1\xbc\x89\xce\x99"},
+    {8066, "\xe1\xbc\x8a\xce\x99"},
+    {8067, "\xe1\xbc\x8b\xce\x99"},
+    {8068, "\xe1\xbc\x8c\xce\x99"},
+    {8069, "\xe1\xbc\x8d\xce\x99"},
+    {8070, "\xe1\xbc\x8e\xce\x99"},
+    {8071, "\xe1\xbc\x8f\xce\x99"},
+    {8072, "\xe1\xbc\x88\xce\x99"},
+    {8073, "\xe1\xbc\x89\xce\x99"},
+    {8074, "\xe1\xbc\x8a\xce\x99"},
+    {8075, "\xe1\xbc\x8b\xce\x99"},
+    {8076, "\xe1\xbc\x8c\xce\x99"},
+    {8077, "\xe1\xbc\x8d\xce\x99"},
+    {8078, "\xe1\xbc\x8e\xce\x99"},
+    {8079, "\xe1\xbc\x8f\xce\x99"},
+    {8080, "\xe1\xbc\xa8\xce\x99"},
+    {8081, "\xe1\xbc\xa9\xce\x99"},
+    {8082, "\xe1\xbc\xaa\xce\x99"},
+    {8083, "\xe1\xbc\xab\xce\x99"},
+    {8084, "\xe1\xbc\xac\xce\x99"},
+    {8085, "\xe1\xbc\xad\xce\x99"},
+    {8086, "\xe1\xbc\xae\xce\x99"},
+    {8087, "\xe1\xbc\xaf\xce\x99"},
+    {8088, "\xe1\xbc\xa8\xce\x99"},
+    {8089, "\xe1\xbc\xa9\xce\x99"},
+    {8090, "\xe1\xbc\xaa\xce\x99"},
+    {8091, "\xe1\xbc\xab\xce\x99"},
+    {8092, "\xe1\xbc\xac\xce\x99"},
+    {8093, "\xe1\xbc\xad\xce\x99"},
+    {8094, "\xe1\xbc\xae\xce\x99"},
+    {8095, "\xe1\xbc\xaf\xce\x99"},
+    {8096, "\xe1\xbd\xa8\xce\x99"},
+    {8097, "\xe1\xbd\xa9\xce\x99"},
+    {8098, "\xe1\xbd\xaa\xce\x99"},
+    {8099, "\xe1\xbd\xab\xce\x99"},
+    {8100, "\xe1\xbd\xac\xce\x99"},
+    {8101, "\xe1\xbd\xad\xce\x99"},
+    {8102, "\xe1\xbd\xae\xce\x99"},
+    {8103, "\xe1\xbd\xaf\xce\x99"},
+    {8104, "\xe1\xbd\xa8\xce\x99"},
+    {8105, "\xe1\xbd\xa9\xce\x99"},
+    {8106, "\xe1\xbd\xaa\xce\x99"},
+    {8107, "\xe1\xbd\xab\xce\x99"},
+    {8108, "\xe1\xbd\xac\xce\x99"},
+    {8109, "\xe1\xbd\xad\xce\x99"},
+    {8110, "\xe1\xbd\xae\xce\x99"},
+    {8111, "\xe1\xbd\xaf\xce\x99"},
+    {8115, "\xce\x91\xce\x99"},
+    {8124, "\xce\x91\xce\x99"},
+    {8131, "\xce\x97\xce\x99"},
+    {8140, "\xce\x97\xce\x99"},
+    {8179, "\xce\xa9\xce\x99"},
+    {8188, "\xce\xa9\xce\x99"},
+    {8114, "\xe1\xbe\xba\xce\x99"},
+    {8116, "\xce\x86\xce\x99"},
+    {8130, "\xe1\xbf\x8a\xce\x99"},
+    {8132, "\xce\x89\xce\x99"},
+    {8178, "\xe1\xbf\xba\xce\x99"},
+    {8180, "\xce\x8f\xce\x99"},
+    {8119, "\xce\x91\xcd\x82\xce\x99"},
+    {8135, "\xce\x97\xcd\x82\xce\x99"},
+    {8183, "\xce\xa9\xcd\x82\xce\x99"}
+};
diff --git a/utf8case/case_tables.hpp b/utf8case/case_tables.hpp
new file mode 100644
index 0000000..bd91695
--- /dev/null
+++ b/utf8case/case_tables.hpp
@@ -0,0 +1,42 @@
+#ifndef CASE_TABLES_HDR
+#define CASE_TABLES_HDR
+
+#include <boost/cstdint.hpp>
+
+enum {
+  EVEN_ODD = 1,
+  ODD_EVEN = -1,
+  EVEN_ODD_SKIP = 1<<30,
+  ODD_EVEN_SKIP,
+};
+
+struct CaseConversionRecord {
+    uint32_t lo_code_point;
+    uint32_t hi_code_point;
+    int32_t delta;
+};
+
+struct SpecialCasingConversionRecord {
+    uint32_t code_point;
+    const char* replacement;
+};
+
+extern const size_t LOWER_CASE_RANGES_SIZE;
+extern const CaseConversionRecord LOWER_CASE_RANGES[];
+
+extern const size_t UPPER_CASE_RANGES_SIZE;
+extern const CaseConversionRecord UPPER_CASE_RANGES[];
+
+extern const size_t TITLE_CASE_RANGES_SIZE;
+extern const CaseConversionRecord TITLE_CASE_RANGES[];
+
+extern const size_t LOWER_SPECIAL_CASING_SIZE;
+extern const SpecialCasingConversionRecord LOWER_SPECIAL_CASING[];
+
+extern const size_t TITLE_SPECIAL_CASING_SIZE;
+extern const SpecialCasingConversionRecord TITLE_SPECIAL_CASING[];
+
+extern const size_t UPPER_SPECIAL_CASING_SIZE;
+extern const SpecialCasingConversionRecord UPPER_SPECIAL_CASING[];
+
+#endif
diff --git a/utf8case/contextual_case_converter.hpp b/utf8case/contextual_case_converter.hpp
new file mode 100644
index 0000000..0a6e9de
--- /dev/null
+++ b/utf8case/contextual_case_converter.hpp
@@ -0,0 +1,17 @@
+#ifndef CONTEXTUAL_CASE_CONVERTER_HDR
+#define CONTEXTUAL_CASE_CONVERTER_HDR
+
+#include <boost/cstdint.hpp>
+
+class ContextualCaseConverter {
+public:
+    virtual ~ContextualCaseConverter() {
+    }
+
+    virtual const char* convert(
+        uint32_t prev_code_point,
+        uint32_t code_point,
+        uint32_t next_code_point) = 0;
+};
+
+#endif
diff --git a/utf8case/general_case_converter.hpp b/utf8case/general_case_converter.hpp
new file mode 100644
index 0000000..6d5439e
--- /dev/null
+++ b/utf8case/general_case_converter.hpp
@@ -0,0 +1,138 @@
+#ifndef GENERAL_CASE_CONVERTER_HDR
+#define GENERAL_CASE_CONVERTER_HDR
+
+#include <boost/shared_ptr.hpp>
+
+#include "range_based_case_converter.hpp"
+#include "special_casing_converter.hpp"
+#include "contextual_case_converter.hpp"
+
+#include "utf8/utf8.h"
+
+template<typename octet_iterator, typename output_iterator>
+class GeneralCaseConverter {
+
+public:
+    GeneralCaseConverter(
+        boost::shared_ptr<RangeBasedCaseConverter> rangeBasedCaseConverter,
+        boost::shared_ptr<SpecialCasingConverter> specialCasingConverter,
+        boost::shared_ptr<ContextualCaseConverter> contextualCaseConverter)
+        :rangeBasedCaseConverter_(rangeBasedCaseConverter),
+         specialCasingConverter_(specialCasingConverter),
+         contextualCaseConverter_(contextualCaseConverter) {
+    }
+
+    bool willBeTouchedWhenConverted(octet_iterator start, octet_iterator end) const {
+        while (start != end) {
+            uint32_t code_point = utf8::unchecked::next(start);
+
+            if (specialCasingConverter_->convert(code_point)
+                || rangeBasedCaseConverter_->convert(code_point) != code_point)
+                return true;
+        }
+
+        return false;
+    }
+
+    bool willBeTouchedWhenHeadConverted(octet_iterator start, octet_iterator end) const {
+        if (start == end)
+            return false;
+
+        octet_iterator prev_start = start;
+        utf8::unchecked::next(start);
+        return willBeTouchedWhenConverted(prev_start, start);
+    }
+
+    bool willBeTouchedWhenTailConverted(octet_iterator start, octet_iterator end) const {
+        if (start == end)
+            return false;
+
+        utf8::unchecked::next(start);
+        return willBeTouchedWhenConverted(start, end);
+    }
+
+    void convert(octet_iterator start, octet_iterator end, output_iterator out) const {
+        uint32_t prev_prev_code_point = SPECIAL_CODE_POINT;
+        uint32_t prev_code_point = SPECIAL_CODE_POINT;
+
+        while (start != end) {
+            uint32_t code_point = utf8::unchecked::next(start);
+
+            if (prev_code_point != SPECIAL_CODE_POINT)
+                convertSingleCodePoint(
+                    prev_prev_code_point,
+                    prev_code_point,
+                    code_point,
+                    out);
+
+            prev_prev_code_point = prev_code_point;
+            prev_code_point = code_point;
+        }
+
+        if (prev_code_point != SPECIAL_CODE_POINT)
+            convertSingleCodePoint(
+                prev_prev_code_point,
+                prev_code_point,
+                SPECIAL_CODE_POINT,
+                out);
+    }
+
+    void convertSingleCodePoint(
+        uint32_t prev_code_point,
+        uint32_t current_code_point,
+        uint32_t next_code_point,
+        output_iterator out) const {
+
+        if (const char* contextual = contextualCaseConverter_->convert(
+                prev_code_point,
+                current_code_point,
+                next_code_point)) {
+            copyCharArrayToOutputIterator_(contextual, out);
+        } else if (const char* special = specialCasingConverter_->convert(current_code_point)) {
+            copyCharArrayToOutputIterator_(special, out);
+        } else {
+            uint32_t converted_code_point = rangeBasedCaseConverter_->convert(current_code_point);
+            utf8::unchecked::append(converted_code_point, out);
+        }
+    }
+
+    void headConvert(octet_iterator start, octet_iterator end, output_iterator out) const {
+        bool first = true;
+
+        while (start != end) {
+            if (first) {
+                octet_iterator prev_start = start;
+                utf8::unchecked::next(start);
+                convert(prev_start, start, out);
+                first = false;
+            } else {
+                *out++ = *start++;
+            }
+        }
+    }
+
+    void tailConvert(octet_iterator start, octet_iterator end, output_iterator out) const {
+        if (start != end) {
+            uint32_t code_point = utf8::unchecked::next(start);
+
+            utf8::unchecked::append(code_point, out);
+
+            convert(start, end, out);
+        }
+    }
+
+
+private:
+    void copyCharArrayToOutputIterator_(const char* charVector, output_iterator out) const {
+        while (*charVector)
+            *out++ = *charVector++;
+    }
+
+    boost::shared_ptr<RangeBasedCaseConverter> rangeBasedCaseConverter_;
+    boost::shared_ptr<SpecialCasingConverter> specialCasingConverter_;
+    boost::shared_ptr<ContextualCaseConverter> contextualCaseConverter_;
+
+    const static uint32_t SPECIAL_CODE_POINT = 0xFFFFFFFF;
+};
+
+#endif
diff --git a/utf8case/generate_case_tables.pl b/utf8case/generate_case_tables.pl
new file mode 100755
index 0000000..6a163c1
--- /dev/null
+++ b/utf8case/generate_case_tables.pl
@@ -0,0 +1,251 @@
+#!/usr/bin/perl
+
+# Based on ideas from re2 library.
+
+use strict;
+use LWP::Simple;
+use String::Util qw(hascontent);
+use Data::Dumper;
+use Clone qw(clone);
+
+my $UNIDATA_PREFIX= q{http://unicode.org/Public/UNIDATA/};
+my $OUTPUT_CPP_FILE = 'case_tables.cpp';
+
+my @lower_case_ranges;
+my @upper_case_ranges;
+my @title_case_ranges;
+
+my @lower_special_casing;
+my @upper_special_casing;
+my @title_special_casing;
+
+open my $output_cpp_fh, '>', $OUTPUT_CPP_FILE;
+generate_intro();
+generate_standard_case_tables();
+generate_special_casing_tables();
+
+sub generate_intro {
+    print $output_cpp_fh <<'END_OF_INTRO';
+// GENERATED AUTOMATICALLY BY generate_case_tables.pl; DO NOT EDIT.
+
+#include "case_tables.hpp"
+
+END_OF_INTRO
+}
+
+sub generate_standard_case_tables {
+    my @unicode_data_lines = download_unidata_file('UnicodeData.txt');
+
+    for my $line (@unicode_data_lines) {
+        append_to_case_ranges(\@upper_case_ranges, $line->[0], $line->[12]);
+        append_to_case_ranges(\@lower_case_ranges, $line->[0], $line->[13]);
+        append_to_case_ranges(\@title_case_ranges, $line->[0], $line->[14]);
+    }
+
+    @lower_case_ranges = compactify(\@lower_case_ranges);
+    @upper_case_ranges = compactify(\@upper_case_ranges);
+    @title_case_ranges = compactify(\@title_case_ranges);
+
+    write_case_table('lower_case_ranges', \@lower_case_ranges);
+    print "\n";
+
+    write_case_table('upper_case_ranges', \@upper_case_ranges);
+    print "\n";
+
+    write_case_table('title_case_ranges', \@title_case_ranges);
+    print "\n";
+}
+
+sub generate_special_casing_tables {
+    my @special_casing_lines = download_unidata_file('SpecialCasing.txt');
+
+    for my $line (@special_casing_lines) {
+        if (hascontent($line->[4])) {
+            print STDERR "This cannot be handled: ", join('; ', @{$line}),"\n";
+        } else {
+            append_to_special_casing_table(\@lower_special_casing, $line->[0], $line->[1]);
+            append_to_special_casing_table(\@title_special_casing, $line->[0], $line->[2]);
+            append_to_special_casing_table(\@upper_special_casing, $line->[0], $line->[3]);
+        }
+    }
+
+    write_special_casing_table('lower_special_casing', \@lower_special_casing);
+    print "\n";
+
+    write_special_casing_table('title_special_casing', \@title_special_casing);
+    print "\n";
+
+    write_special_casing_table('upper_special_casing', \@upper_special_casing);
+}
+
+sub download_unidata_file {
+    my ($file_name) = @_;
+
+    my $url = $UNIDATA_PREFIX . $file_name;
+
+    print STDERR "Downloading ${url}...\n";
+
+    my $contents = get($url);
+
+    return map { [ split/\s*;\s*/ ] }
+           grep { /\S/ }
+           map{ s/\#.*\Z//; $_}
+           split/\r?\n/, $contents;
+}
+
+sub append_to_case_ranges {
+    my ($case_ranges_ref, $hex_code_point, $hex_modified_code_point) = @_;
+
+    if (!hascontent($hex_modified_code_point)) {
+        return;
+    }
+
+    my $code_point = hex($hex_code_point);
+    my $modified_code_point = hex($hex_modified_code_point);
+
+    push @{$case_ranges_ref},
+        [ $code_point, $code_point, delta($code_point, $modified_code_point) ];
+}
+
+sub compactify {
+    my ($case_ranges_ref) = @_;
+
+    my @new_table;
+
+    my $current_compact_range;
+
+    for my $range (@{$case_ranges_ref}) {
+        if (!defined($current_compact_range)) {
+            $current_compact_range = clone($range);
+        } elsif ($range->[2] eq $current_compact_range->[2]
+            && $range->[0] == $current_compact_range->[1] + 1) {
+            ++$current_compact_range->[1];
+        } elsif ($range->[2] eq de_skip($current_compact_range->[2])
+            && $range->[0] == $current_compact_range->[1] + 2) {
+            $current_compact_range->[1] += 2;
+            $current_compact_range->[2] = add_skip($current_compact_range->[2]);
+        } else {
+            push @new_table, $current_compact_range;
+            $current_compact_range = clone($range);
+        }
+    }
+
+    push @new_table, $current_compact_range;
+
+    return @new_table;
+}
+
+sub write_case_table {
+    my ($name, $case_ranges_ref) = @_;
+
+    my $table_name = uc($name);
+    my $size_constant_name = $table_name . "_SIZE";
+    my $table_size = $#{$case_ranges_ref} + 1;
+
+
+    print $output_cpp_fh <<"END_OF_INTRO";
+const size_t $size_constant_name = $table_size;
+const CaseConversionRecord ${table_name}[$size_constant_name] = {
+END_OF_INTRO
+
+    my $string_to_prepend = '';
+
+    for my $range (@{$case_ranges_ref}) {
+        my $from = $range->[0];
+        my $to = $range->[1];
+        my $delta = $range->[2];
+
+        print $output_cpp_fh "${string_to_prepend}    {$from, $to, $delta}";
+
+        $string_to_prepend = ",\n";
+    }
+
+
+    print $output_cpp_fh "\n};\n";
+}
+
+sub append_to_special_casing_table {
+    my ($special_casing_table_ref, $hex_code_point, $hex_code_point_vector) = @_;
+
+    if (!hascontent($hex_code_point_vector)) {
+        return;
+    }
+
+    my $code_point = hex($hex_code_point);
+    my @code_point_vector = map { hex($_) } split/\s+/, $hex_code_point_vector;
+
+    push $special_casing_table_ref, [$code_point, cpp_encode(@code_point_vector)];
+}
+
+sub write_special_casing_table {
+    my ($name, $special_casing_table_ref) = @_;
+
+    my $table_name = uc($name);
+    my $size_constant_name = $table_name . "_SIZE";
+    my $table_size = $#{$special_casing_table_ref} + 1;
+
+    print $output_cpp_fh <<"END_OF_INTRO";
+const size_t $size_constant_name = $table_size;
+const SpecialCasingConversionRecord ${table_name}[$size_constant_name] = {
+END_OF_INTRO
+
+    my $string_to_prepend = '';
+
+    for my $item (@{$special_casing_table_ref}) {
+        my $code_point = $item->[0];
+        my $replacement = $item->[1];
+
+        print $output_cpp_fh "${string_to_prepend}    {$code_point, \"$replacement\"}";
+
+        $string_to_prepend = ",\n";
+    }
+
+    print $output_cpp_fh "\n};\n";
+}
+
+sub cpp_encode {
+    my (@v) = @_;
+
+    my $s = join('', map{ chr($_) } @v);
+
+    return join('', map { "\\x$_" } unpack("U0(H2)*", $s));
+}
+
+sub de_skip {
+    my ($delta) = @_;
+
+    if ($delta =~ /^(EVEN_ODD|ODD_EVEN)(?:_SKIP)?$/) {
+        return $1;
+    }
+
+    return 'CANNOT_BE_SKIPPED';
+}
+
+sub add_skip {
+    my ($delta) = @_;
+
+    return de_skip($delta) . '_SKIP';
+}
+
+
+sub delta {
+    my ($a, $b) = @_;
+
+    if ($a + 1 == $b) {
+        if ($a % 2 == 0) {
+            return 'EVEN_ODD'
+        }
+        else {
+            return 'ODD_EVEN';
+        }
+    } elsif ($a == $b + 1) {
+        if ($a % 2 == 0) {
+            return 'ODD_EVEN';
+        }
+        else {
+            return 'EVEN_ODD';
+        }
+    }
+
+    return $b - $a;
+}
diff --git a/utf8case/range_based_case_converter.cpp b/utf8case/range_based_case_converter.cpp
new file mode 100644
index 0000000..a6aa8b5
--- /dev/null
+++ b/utf8case/range_based_case_converter.cpp
@@ -0,0 +1,63 @@
+#include "range_based_case_converter.hpp"
+
+uint32_t RangeBasedCaseConverter::convert(uint32_t code_point) const {
+
+    const CaseConversionRecord* conversionRecord = findRecord_(code_point);
+
+    return
+        conversionRecord == 0
+        ? code_point
+        : applyRecord_(conversionRecord, code_point);
+}
+
+const CaseConversionRecord* RangeBasedCaseConverter::findRecord_(uint32_t code_point) const {
+
+    for (size_t i = 0; i < tableSize_; ++i) {
+        const CaseConversionRecord* currentRecord = &conversionTable_[i];
+
+        if (code_point < currentRecord->lo_code_point)
+            return 0;
+
+        if (code_point <= currentRecord->hi_code_point)
+            return currentRecord;
+    }
+
+    return 0;
+}
+
+uint32_t RangeBasedCaseConverter::applyRecord_(
+    const CaseConversionRecord* conversionRecord, uint32_t code_point) const {
+
+    if (shouldBeSkipped_(conversionRecord, code_point))
+        return code_point;
+
+    return applyDelta_(conversionRecord->delta, code_point);
+}
+
+
+bool RangeBasedCaseConverter::shouldBeSkipped_(
+    const CaseConversionRecord* conversionRecord, uint32_t code_point) const {
+
+    return
+        isSkipRecord_(conversionRecord)
+        && code_point % 2 != conversionRecord->lo_code_point % 2;
+}
+
+bool RangeBasedCaseConverter::isSkipRecord_(const CaseConversionRecord* conversionRecord) const {
+    return
+        conversionRecord->delta == EVEN_ODD_SKIP
+        || conversionRecord->delta == ODD_EVEN_SKIP;
+}
+
+uint32_t RangeBasedCaseConverter::applyDelta_(int32_t delta, uint32_t code_point) const {
+    switch (delta) {
+    case EVEN_ODD:
+    case EVEN_ODD_SKIP:
+        return code_point % 2 == 0 ? code_point+1 : code_point-1;
+    case ODD_EVEN:
+    case ODD_EVEN_SKIP:
+        return code_point % 2 == 1 ? code_point+1 : code_point-1;
+    default:
+        return code_point + delta;
+    }
+}
diff --git a/utf8case/range_based_case_converter.hpp b/utf8case/range_based_case_converter.hpp
new file mode 100644
index 0000000..4451f79
--- /dev/null
+++ b/utf8case/range_based_case_converter.hpp
@@ -0,0 +1,27 @@
+#ifndef RANGE_BASED_CASE_CONVERTED_HDR
+#define RANGE_BASED_CASE_CONVERTED_HDR
+
+#include "case_tables.hpp"
+
+class RangeBasedCaseConverter {
+
+public:
+    RangeBasedCaseConverter(size_t tableSize, const CaseConversionRecord* conversionTable)
+        :tableSize_(tableSize), conversionTable_(conversionTable) {
+    }
+
+    uint32_t convert(uint32_t code_point) const;
+
+private:
+    const CaseConversionRecord* findRecord_(uint32_t code_point) const;
+    uint32_t applyRecord_(const CaseConversionRecord* conversionRecord, uint32_t code_point) const;
+    bool shouldBeSkipped_(
+        const CaseConversionRecord* conversionRecord, uint32_t code_point) const;
+    bool isSkipRecord_(const CaseConversionRecord* conversionRecord) const;
+    uint32_t applyDelta_(int32_t delta, uint32_t code_point) const;
+
+    size_t tableSize_;
+    const CaseConversionRecord* conversionTable_;
+};
+
+#endif
diff --git a/utf8case/regular_contextual_case_converter.cpp b/utf8case/regular_contextual_case_converter.cpp
new file mode 100644
index 0000000..184b6db
--- /dev/null
+++ b/utf8case/regular_contextual_case_converter.cpp
@@ -0,0 +1,11 @@
+#include "regular_contextual_case_converter.hpp"
+
+RegularContextualCaseConverter::~RegularContextualCaseConverter() {
+}
+
+const char* RegularContextualCaseConverter::convert(
+    uint32_t /*prev_code_point*/,
+    uint32_t /*code_point*/,
+    uint32_t /*next_code_point*/) {
+    return 0;
+}
diff --git a/utf8case/regular_contextual_case_converter.hpp b/utf8case/regular_contextual_case_converter.hpp
new file mode 100644
index 0000000..3b02e10
--- /dev/null
+++ b/utf8case/regular_contextual_case_converter.hpp
@@ -0,0 +1,16 @@
+#ifndef REGULAR_CONTEXTUAL_CASE_CONVERTER_HDR
+#define REGULAR_CONTEXTUAL_CASE_CONVERTER_HDR
+
+#include "contextual_case_converter.hpp"
+
+class RegularContextualCaseConverter: public ContextualCaseConverter {
+public:
+    virtual ~RegularContextualCaseConverter();
+
+    virtual const char* convert(
+        uint32_t prev_code_point,
+        uint32_t code_point,
+        uint32_t next_code_point);
+};
+
+#endif
diff --git a/utf8case/simple_convert.cpp b/utf8case/simple_convert.cpp
new file mode 100644
index 0000000..62304e5
--- /dev/null
+++ b/utf8case/simple_convert.cpp
@@ -0,0 +1,56 @@
+#include "simple_convert.hpp"
+
+std::string simpleConvert(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s) {
+
+    std::string result;
+
+    converter.convert(s.begin(), s.end(), std::back_inserter(result));
+
+    return result;
+}
+
+std::string simpleHeadConvert(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s) {
+
+    std::string result;
+
+    converter.headConvert(s.begin(), s.end(), std::back_inserter(result));
+
+    return result;
+}
+
+std::string simpleTailConvert(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s) {
+
+    std::string result;
+
+    converter.tailConvert(s.begin(), s.end(), std::back_inserter(result));
+
+    return result;
+}
+
+
+bool simpleWillBeTouchedWhenConverted(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s) {
+
+    return converter.willBeTouchedWhenConverted(s.begin(), s.end());
+}
+
+bool simpleWillBeTouchedWhenHeadConverted(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s) {
+
+    return converter.willBeTouchedWhenHeadConverted(s.begin(), s.end());
+}
+
+bool simpleWillBeTouchedWhenTailConverted(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s) {
+
+    return converter.willBeTouchedWhenTailConverted(s.begin(), s.end());
+}
diff --git a/utf8case/simple_convert.hpp b/utf8case/simple_convert.hpp
new file mode 100644
index 0000000..4e0011e
--- /dev/null
+++ b/utf8case/simple_convert.hpp
@@ -0,0 +1,34 @@
+#ifndef SIMPLE_CONVERT_HDR
+#define SIMPLE_CONVERT_HDR
+
+#include "general_case_converter.hpp"
+
+typedef GeneralCaseConverter<std::string::const_iterator,
+                             std::back_insert_iterator<std::string> > StringGeneralCaseConverter;
+
+std::string simpleConvert(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s);
+
+std::string simpleHeadConvert(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s);
+
+std::string simpleTailConvert(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s);
+
+
+bool simpleWillBeTouchedWhenConverted(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s);
+
+bool simpleWillBeTouchedWhenHeadConverted(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s);
+
+bool simpleWillBeTouchedWhenTailConverted(
+    const StringGeneralCaseConverter& converter,
+    const std::string& s);
+
+#endif
diff --git a/utf8case/special_casing_converter.cpp b/utf8case/special_casing_converter.cpp
new file mode 100644
index 0000000..c23afb9
--- /dev/null
+++ b/utf8case/special_casing_converter.cpp
@@ -0,0 +1,16 @@
+#include "special_casing_converter.hpp"
+
+const char* SpecialCasingConverter::convert(uint32_t code_point) const {
+
+    for (size_t i = 0; i < tableSize_; ++i) {
+        const SpecialCasingConversionRecord* currentRecord = &conversionTable_[i];
+
+        if (code_point < currentRecord->code_point)
+            return 0;
+
+        if (code_point == currentRecord->code_point)
+            return currentRecord->replacement;
+    }
+
+    return 0;
+}
diff --git a/utf8case/special_casing_converter.hpp b/utf8case/special_casing_converter.hpp
new file mode 100644
index 0000000..08f7e92
--- /dev/null
+++ b/utf8case/special_casing_converter.hpp
@@ -0,0 +1,21 @@
+#ifndef SPECIAL_CASING_CONVERTER_HDR
+#define SPECIAL_CASING_CONVERTER_HDR
+
+#include "case_tables.hpp"
+
+class SpecialCasingConverter {
+
+public:
+    SpecialCasingConverter(size_t tableSize, const SpecialCasingConversionRecord* conversionTable)
+        :tableSize_(tableSize), conversionTable_(conversionTable) {
+    }
+
+    const char* convert(uint32_t code_point) const;
+
+private:
+
+    size_t tableSize_;
+    const SpecialCasingConversionRecord* conversionTable_;
+};
+
+#endif
diff --git a/utf8case/string_case_converter_manager.cpp b/utf8case/string_case_converter_manager.cpp
new file mode 100644
index 0000000..0af4316
--- /dev/null
+++ b/utf8case/string_case_converter_manager.cpp
@@ -0,0 +1,11 @@
+#include "string_case_converter_manager.hpp"
+
+StringCaseConverterManager& StringCaseConverterManager::getInstance() {
+    static StringCaseConverterManager instance;
+
+    return instance;
+}
+
+
+StringCaseConverterManager::StringCaseConverterManager() {
+}
diff --git a/utf8case/string_case_converter_manager.hpp b/utf8case/string_case_converter_manager.hpp
new file mode 100644
index 0000000..c4dd5e2
--- /dev/null
+++ b/utf8case/string_case_converter_manager.hpp
@@ -0,0 +1,16 @@
+#ifndef STRING_CASE_CONVERTER_MANAGER_HDR
+#define STRING_CASE_CONVERTER_MANAGER_HDR
+
+#include "case_converter_factory.hpp"
+
+class StringCaseConverterManager : public CaseConverterFactory<
+    std::string::const_iterator, std::back_insert_iterator<std::string> > {
+
+public:
+    static StringCaseConverterManager& getInstance();
+
+private:
+    StringCaseConverterManager();
+};
+
+#endif
diff --git a/utf8case/t/CMakeLists.txt b/utf8case/t/CMakeLists.txt
new file mode 100644
index 0000000..a550899
--- /dev/null
+++ b/utf8case/t/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_library(utf8case-tests
+   range_based_case_converter_tests.cpp
+   simple_convert_tests.cpp
+   special_casing_converter_tests.cpp
+)
+
+target_link_libraries(utf8case-tests utf8case)
diff --git a/utf8case/t/range_based_case_converter_tests.cpp b/utf8case/t/range_based_case_converter_tests.cpp
new file mode 100644
index 0000000..cd33d3a
--- /dev/null
+++ b/utf8case/t/range_based_case_converter_tests.cpp
@@ -0,0 +1,43 @@
+#include "tests/tests.hpp"
+
+#include "utf8case/range_based_case_converter.hpp"
+
+BOOST_AUTO_TEST_SUITE( utf8case )
+
+void lower_single_letter_checker(uint32_t lower_code_point, uint32_t upper_code_point) {
+    RangeBasedCaseConverter converter(LOWER_CASE_RANGES_SIZE,
+                                      LOWER_CASE_RANGES);
+
+    BOOST_CHECK_EQUAL(converter.convert(upper_code_point), lower_code_point);
+    BOOST_CHECK_EQUAL(converter.convert(lower_code_point), lower_code_point);
+}
+
+BOOST_AUTO_TEST_CASE( range_based_case_converter ) {
+    RangeBasedCaseConverter converter(LOWER_CASE_RANGES_SIZE,
+                                      LOWER_CASE_RANGES);
+
+    const uint32_t COMMA_CODE_POINT = 44U;
+    BOOST_CHECK_EQUAL(converter.convert(COMMA_CODE_POINT), COMMA_CODE_POINT);
+
+    const uint32_t UPPER_F_CODE_POINT = 70U;
+    const uint32_t LOWER_F_CODE_POINT = 102U;
+    lower_single_letter_checker(LOWER_F_CODE_POINT, UPPER_F_CODE_POINT);
+
+    const uint32_t UPPER_A_CODE_POINT = 65U;
+    const uint32_t LOWER_A_CODE_POINT = 97U;
+    lower_single_letter_checker(LOWER_A_CODE_POINT, UPPER_A_CODE_POINT);
+
+    const uint32_t UPPER_Z_CODE_POINT = 90U;
+    const uint32_t LOWER_Z_CODE_POINT = 122U;
+    lower_single_letter_checker(LOWER_Z_CODE_POINT, UPPER_Z_CODE_POINT);
+
+    const uint32_t UPPER_E_OGONEK_CODE_POINT = 280U;
+    const uint32_t LOWER_E_OGONEK_CODE_POINT = 281U;
+    lower_single_letter_checker(LOWER_E_OGONEK_CODE_POINT, UPPER_E_OGONEK_CODE_POINT);
+
+    const uint32_t UPPER_SHCHA_CODE_POINT = 1065U;
+    const uint32_t LOWER_SHCHA_CODE_POINT = 1097U;
+    lower_single_letter_checker(LOWER_SHCHA_CODE_POINT, UPPER_SHCHA_CODE_POINT);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/utf8case/t/simple_convert_tests.cpp b/utf8case/t/simple_convert_tests.cpp
new file mode 100644
index 0000000..3f72e20
--- /dev/null
+++ b/utf8case/t/simple_convert_tests.cpp
@@ -0,0 +1,176 @@
+#include "tests/tests.hpp"
+
+#include "utf8case/simple_convert.hpp"
+#include "utf8case/case_converter_factory.hpp"
+#include "utf8case/string_case_converter_manager.hpp"
+
+BOOST_AUTO_TEST_SUITE( utf8case )
+
+BOOST_AUTO_TEST_CASE( simple_convert_lower ) {
+
+    boost::shared_ptr<StringGeneralCaseConverter> lowerConverter =
+        StringCaseConverterManager::getInstance().getLowerCaseConverter("pl");
+
+    BOOST_CHECK_EQUAL(simpleConvert(*lowerConverter, "KOMPUTER"),
+                      std::string("komputer"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*lowerConverter, "ŹDŹBŁO"),
+                      std::string("źdźbło"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*lowerConverter, "Zażółć gęślą JAŹŃ"),
+                      std::string("zażółć gęślą jaźń"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*lowerConverter, "I"),
+                      std::string("i"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*lowerConverter, "БУКВЫ"),
+                      std::string("буквы"));
+
+
+    BOOST_CHECK_EQUAL(simpleHeadConvert(*lowerConverter, "ŹDŹBŁO"),
+                      std::string("źDŹBŁO"));
+
+    BOOST_CHECK_EQUAL(simpleTailConvert(*lowerConverter, "ŹDŹBŁO"),
+                      std::string("Źdźbło"));
+
+
+    BOOST_CHECK_EQUAL(simpleConvert(*lowerConverter, "Ś"),
+                      std::string("ś"));
+
+    BOOST_CHECK_EQUAL(simpleHeadConvert(*lowerConverter, "Ś"),
+                      std::string("ś"));
+
+    BOOST_CHECK_EQUAL(simpleTailConvert(*lowerConverter, "Ś"),
+                      std::string("Ś"));
+
+
+    BOOST_CHECK_EQUAL(simpleConvert(*lowerConverter, ""),
+                      std::string(""));
+
+    BOOST_CHECK_EQUAL(simpleHeadConvert(*lowerConverter, ""),
+                      std::string(""));
+
+    BOOST_CHECK_EQUAL(simpleTailConvert(*lowerConverter, ""),
+                      std::string(""));
+
+}
+
+
+BOOST_AUTO_TEST_CASE( will_be_touched ) {
+    boost::shared_ptr<StringGeneralCaseConverter> upperConverter =
+        StringCaseConverterManager::getInstance().getUpperCaseConverter("pl");
+
+    BOOST_CHECK(simpleWillBeTouchedWhenConverted(*upperConverter, "KOMPUTEr"));
+    BOOST_CHECK(simpleWillBeTouchedWhenTailConverted(*upperConverter, "KOMPUTEr"));
+    BOOST_CHECK(!simpleWillBeTouchedWhenHeadConverted(*upperConverter, "KOMPUTEr"));
+
+    BOOST_CHECK(!simpleWillBeTouchedWhenConverted(*upperConverter, "KOMPUTER"));
+    BOOST_CHECK(!simpleWillBeTouchedWhenTailConverted(*upperConverter, "KOMPUTER"));
+    BOOST_CHECK(!simpleWillBeTouchedWhenHeadConverted(*upperConverter, "KOMPUTER"));
+
+    BOOST_CHECK(simpleWillBeTouchedWhenConverted(*upperConverter, "śNIEG"));
+    BOOST_CHECK(!simpleWillBeTouchedWhenTailConverted(*upperConverter, "śNIEG"));
+    BOOST_CHECK(simpleWillBeTouchedWhenHeadConverted(*upperConverter, "śNIEG"));
+
+    BOOST_CHECK(simpleWillBeTouchedWhenConverted(*upperConverter, "ź"));
+    BOOST_CHECK(!simpleWillBeTouchedWhenTailConverted(*upperConverter, "ź"));
+    BOOST_CHECK(simpleWillBeTouchedWhenHeadConverted(*upperConverter, "ź"));
+
+    BOOST_CHECK(!simpleWillBeTouchedWhenConverted(*upperConverter, ""));
+    BOOST_CHECK(!simpleWillBeTouchedWhenTailConverted(*upperConverter, ""));
+    BOOST_CHECK(!simpleWillBeTouchedWhenHeadConverted(*upperConverter, ""));
+}
+
+BOOST_AUTO_TEST_CASE( simple_convert_upper ) {
+
+    boost::shared_ptr<StringGeneralCaseConverter> upperConverter =
+        StringCaseConverterManager::getInstance().getUpperCaseConverter("pl");
+
+    BOOST_CHECK_EQUAL(simpleConvert(*upperConverter, "komputer"),
+                      std::string("KOMPUTER"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*upperConverter, "źdźbło"),
+                      std::string("ŹDŹBŁO"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*upperConverter, "daß"),
+                      std::string("DASS"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*upperConverter, "ﬃ"),
+                      std::string("FFI"));
+
+}
+
+
+BOOST_AUTO_TEST_CASE( simple_convert_title ) {
+
+    boost::shared_ptr<StringGeneralCaseConverter> titleConverter =
+        StringCaseConverterManager::getInstance().getTitleCaseConverter("pl");
+
+    BOOST_CHECK_EQUAL(simpleConvert(*titleConverter, "źdźbło"),
+                      std::string("ŹDŹBŁO"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*titleConverter, "daß"),
+                      std::string("DASs"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*titleConverter, "ﬃ"),
+                      std::string("Ffi"));
+
+}
+
+BOOST_AUTO_TEST_CASE( simple_turkish_lower ) {
+
+    boost::shared_ptr<StringGeneralCaseConverter> standardLowerConverter =
+        StringCaseConverterManager::getInstance().getLowerCaseConverter("pl");
+
+    boost::shared_ptr<StringGeneralCaseConverter> turkishLowerConverter =
+        StringCaseConverterManager::getInstance().getLowerCaseConverter("tr");
+
+    BOOST_CHECK_EQUAL(simpleConvert(*standardLowerConverter, "YAZICI"),
+                      std::string("yazici"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*turkishLowerConverter, "YAZICI"),
+                      std::string("yazıcı"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*standardLowerConverter, "I"),
+                      std::string("i"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*turkishLowerConverter, "I"),
+                      std::string("ı"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*standardLowerConverter, "İ"),
+                      std::string("i̇"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*turkishLowerConverter, "İ"),
+                      std::string("i"));
+
+}
+
+BOOST_AUTO_TEST_CASE( simple_turkish_upper ) {
+
+    boost::shared_ptr<StringGeneralCaseConverter> standardUpperConverter =
+        StringCaseConverterManager::getInstance().getUpperCaseConverter("pl");
+
+    boost::shared_ptr<StringGeneralCaseConverter> turkishUpperConverter =
+        StringCaseConverterManager::getInstance().getUpperCaseConverter("tr");
+
+    BOOST_CHECK_EQUAL(simpleConvert(*standardUpperConverter, "yazici"),
+                      std::string("YAZICI"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*turkishUpperConverter, "yazici"),
+                      std::string("YAZİCİ"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*standardUpperConverter, "i"),
+                      std::string("I"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*turkishUpperConverter, "i"),
+                      std::string("İ"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*standardUpperConverter, "ı"),
+                      std::string("I"));
+
+    BOOST_CHECK_EQUAL(simpleConvert(*turkishUpperConverter, "ı"),
+                      std::string("I"));
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/utf8case/t/special_casing_converter_tests.cpp b/utf8case/t/special_casing_converter_tests.cpp
new file mode 100644
index 0000000..f320e86
--- /dev/null
+++ b/utf8case/t/special_casing_converter_tests.cpp
@@ -0,0 +1,32 @@
+#include "tests/tests.hpp"
+
+#include "utf8case/special_casing_converter.hpp"
+
+BOOST_AUTO_TEST_SUITE( utf8case )
+
+BOOST_AUTO_TEST_CASE( special_casing_converter ) {
+    SpecialCasingConverter converter(UPPER_SPECIAL_CASING_SIZE,
+                                     UPPER_SPECIAL_CASING);
+
+    const uint32_t COMMA_CODE_POINT = 44U;
+    BOOST_CHECK_EQUAL(converter.convert(COMMA_CODE_POINT), (const char*)0);
+
+    const uint32_t UPPER_F_CODE_POINT = 70U;
+    const uint32_t LOWER_F_CODE_POINT = 102U;
+    BOOST_CHECK_EQUAL(converter.convert(UPPER_F_CODE_POINT), (const char*)0);
+    BOOST_CHECK_EQUAL(converter.convert(LOWER_F_CODE_POINT), (const char*)0);
+
+    const uint32_t UPPER_SHCHA_CODE_POINT = 1065U;
+    const uint32_t LOWER_SHCHA_CODE_POINT = 1097U;
+    BOOST_CHECK_EQUAL(converter.convert(UPPER_SHCHA_CODE_POINT), (const char*)0);
+    BOOST_CHECK_EQUAL(converter.convert(LOWER_SHCHA_CODE_POINT), (const char*)0);
+
+    const uint32_t ESZET_CODE_POINT = 223U;
+    BOOST_CHECK_EQUAL(converter.convert(ESZET_CODE_POINT), "SS");
+}
+
+BOOST_AUTO_TEST_CASE( special_casing_converter2 ) {
+    BOOST_CHECK_EQUAL("SS", "SS");
+
+}
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/utf8case/turkish_and_azeri_lower_contextual_case_converter.cpp b/utf8case/turkish_and_azeri_lower_contextual_case_converter.cpp
new file mode 100644
index 0000000..fed182c
--- /dev/null
+++ b/utf8case/turkish_and_azeri_lower_contextual_case_converter.cpp
@@ -0,0 +1,29 @@
+#include "turkish_and_azeri_lower_contextual_case_converter.hpp"
+
+TurkishAndAzeriLowerContextualCaseConverter::~TurkishAndAzeriLowerContextualCaseConverter() {
+}
+
+const char* TurkishAndAzeriLowerContextualCaseConverter::convert(
+    uint32_t prev_code_point,
+    uint32_t code_point,
+    uint32_t next_code_point) {
+
+    if (code_point == LATIN_CAPITAL_LETTER_I && next_code_point != DOT_ABOVE)
+        return "ı";
+
+    if (code_point == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)
+        return "i";
+
+    if (code_point == DOT_ABOVE && prev_code_point == LATIN_CAPITAL_LETTER_I)
+        return "";
+
+    return 0;
+}
+
+const uint32_t TurkishAndAzeriLowerContextualCaseConverter::LATIN_CAPITAL_LETTER_I = 0x0049;
+
+
+const uint32_t TurkishAndAzeriLowerContextualCaseConverter::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE =
+    0x0130;
+
+const uint32_t TurkishAndAzeriLowerContextualCaseConverter::DOT_ABOVE = 0x0307;
diff --git a/utf8case/turkish_and_azeri_lower_contextual_case_converter.hpp b/utf8case/turkish_and_azeri_lower_contextual_case_converter.hpp
new file mode 100644
index 0000000..ba14436
--- /dev/null
+++ b/utf8case/turkish_and_azeri_lower_contextual_case_converter.hpp
@@ -0,0 +1,21 @@
+#ifndef TURKISH_AND_AZERI_LOWER_CONTEXTUAL_CASE_CONVERTER_HDR
+#define TURKISH_AND_AZERI_LOWER_CONTEXTUAL_CASE_CONVERTER_HDR
+
+#include "contextual_case_converter.hpp"
+
+class TurkishAndAzeriLowerContextualCaseConverter: public ContextualCaseConverter {
+public:
+    virtual ~TurkishAndAzeriLowerContextualCaseConverter();
+
+    virtual const char* convert(
+        uint32_t prev_code_point,
+        uint32_t code_point,
+        uint32_t next_code_point);
+private:
+    const static uint32_t LATIN_CAPITAL_LETTER_I;
+    const static uint32_t LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
+    const static uint32_t DOT_ABOVE;
+};
+
+
+#endif
diff --git a/utf8case/turkish_and_azeri_upper_contextual_case_converter.cpp b/utf8case/turkish_and_azeri_upper_contextual_case_converter.cpp
new file mode 100644
index 0000000..7a70c5f
--- /dev/null
+++ b/utf8case/turkish_and_azeri_upper_contextual_case_converter.cpp
@@ -0,0 +1,17 @@
+#include "turkish_and_azeri_upper_contextual_case_converter.hpp"
+
+TurkishAndAzeriUpperContextualCaseConverter::~TurkishAndAzeriUpperContextualCaseConverter() {
+}
+
+const char* TurkishAndAzeriUpperContextualCaseConverter::convert(
+    uint32_t /*prev_code_point*/,
+    uint32_t code_point,
+    uint32_t /*next_code_point*/) {
+
+    if (code_point == LATIN_SMALL_LETTER_I)
+        return "İ";
+
+    return 0;
+}
+
+const uint32_t TurkishAndAzeriUpperContextualCaseConverter::LATIN_SMALL_LETTER_I = 0x0069;
diff --git a/utf8case/turkish_and_azeri_upper_contextual_case_converter.hpp b/utf8case/turkish_and_azeri_upper_contextual_case_converter.hpp
new file mode 100644
index 0000000..d5a845a
--- /dev/null
+++ b/utf8case/turkish_and_azeri_upper_contextual_case_converter.hpp
@@ -0,0 +1,20 @@
+#ifndef TURKISH_AND_AZERI_UPPER_CONTEXTUAL_CASE_CONVERTER_HDR
+#define TURKISH_AND_AZERI_UPPER_CONTEXTUAL_CASE_CONVERTER_HDR
+
+#include "contextual_case_converter.hpp"
+
+class TurkishAndAzeriUpperContextualCaseConverter: public ContextualCaseConverter {
+public:
+    virtual ~TurkishAndAzeriUpperContextualCaseConverter();
+
+    virtual const char* convert(
+        uint32_t prev_code_point,
+        uint32_t code_point,
+        uint32_t next_code_point);
+
+private:
+    const static uint32_t LATIN_SMALL_LETTER_I;
+};
+
+
+#endif