AIprojekt-wozek/venv/Lib/site-packages/pip/_vendor/pyparsing/unicode.py

# unicode.py

import sys
from itertools import filterfalse
from typing import List, Tuple, Union


class _lazyclassproperty:
    def __init__(self, fn):
        self.fn = fn
        self.__doc__ = fn.__doc__
        self.__name__ = fn.__name__

    def __get__(self, obj, cls):
        if cls is None:
            cls = type(obj)
        if not hasattr(cls, "_intern") or any(
            cls._intern is getattr(superclass, "_intern", [])
            for superclass in cls.__mro__[1:]
        ):
            cls._intern = {}
        attrname = self.fn.__name__
        if attrname not in cls._intern:
            cls._intern[attrname] = self.fn(cls)
        return cls._intern[attrname]


UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]


class unicode_set:
    """
    A set of Unicode characters, for language-specific strings for
    ``alphas``, ``nums``, ``alphanums``, and ``printables``.
    A unicode_set is defined by a list of ranges in the Unicode character
    set, in a class attribute ``_ranges``. Ranges can be specified using
    2-tuples or a 1-tuple, such as::

        _ranges = [
            (0x0020, 0x007e),
            (0x00a0, 0x00ff),
            (0x0100,),
            ]

    Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).

    A unicode set can also be defined using multiple inheritance of other unicode sets::

        class CJK(Chinese, Japanese, Korean):
            pass
    """

    _ranges: UnicodeRangeList = []

    @_lazyclassproperty
    def _chars_for_ranges(cls):
        ret = []
        for cc in cls.__mro__:
            if cc is unicode_set:
                break
            for rr in getattr(cc, "_ranges", ()):
                ret.extend(range(rr[0], rr[-1] + 1))
        return [chr(c) for c in sorted(set(ret))]

    @_lazyclassproperty
    def printables(cls):
        "all non-whitespace characters in this range"
        return "".join(filterfalse(str.isspace, cls._chars_for_ranges))

    @_lazyclassproperty
    def alphas(cls):
        "all alphabetic characters in this range"
        return "".join(filter(str.isalpha, cls._chars_for_ranges))

    @_lazyclassproperty
    def nums(cls):
        "all numeric digit characters in this range"
        return "".join(filter(str.isdigit, cls._chars_for_ranges))

    @_lazyclassproperty
    def alphanums(cls):
        "all alphanumeric characters in this range"
        return cls.alphas + cls.nums

    @_lazyclassproperty
    def identchars(cls):
        "all characters in this range that are valid identifier characters, plus underscore '_'"
        return "".join(
            sorted(
                set(
                    "".join(filter(str.isidentifier, cls._chars_for_ranges))
                    + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
                    + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
                    + "_"
                )
            )
        )

    @_lazyclassproperty
    def identbodychars(cls):
        """
        all characters in this range that are valid identifier body characters,
        plus the digits 0-9
        """
        return "".join(
            sorted(
                set(
                    cls.identchars
                    + "0123456789"
                    + "".join(
                        [c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
                    )
                )
            )
        )


class pyparsing_unicode(unicode_set):
    """
    A namespace class for defining common language unicode_sets.
    """

    _ranges: UnicodeRangeList = [(32, sys.maxunicode)]

    class Latin1(unicode_set):
        "Unicode set for Latin-1 Unicode Character Range"
        _ranges: UnicodeRangeList = [
            (0x0020, 0x007E),
            (0x00A0, 0x00FF),
        ]

    class LatinA(unicode_set):
        "Unicode set for Latin-A Unicode Character Range"
        _ranges: UnicodeRangeList = [
            (0x0100, 0x017F),
        ]

    class LatinB(unicode_set):
        "Unicode set for Latin-B Unicode Character Range"
        _ranges: UnicodeRangeList = [
            (0x0180, 0x024F),
        ]

    class Greek(unicode_set):
        "Unicode set for Greek Unicode Character Ranges"
        _ranges: UnicodeRangeList = [
            (0x0342, 0x0345),
            (0x0370, 0x0377),
            (0x037A, 0x037F),
            (0x0384, 0x038A),
            (0x038C,),
            (0x038E, 0x03A1),
            (0x03A3, 0x03E1),
            (0x03F0, 0x03FF),
            (0x1D26, 0x1D2A),
            (0x1D5E,),
            (0x1D60,),
            (0x1D66, 0x1D6A),
            (0x1F00, 0x1F15),
            (0x1F18, 0x1F1D),
            (0x1F20, 0x1F45),
            (0x1F48, 0x1F4D),
            (0x1F50, 0x1F57),
            (0x1F59,),
            (0x1F5B,),
            (0x1F5D,),
            (0x1F5F, 0x1F7D),
            (0x1F80, 0x1FB4),
            (0x1FB6, 0x1FC4),
            (0x1FC6, 0x1FD3),
            (0x1FD6, 0x1FDB),
            (0x1FDD, 0x1FEF),
            (0x1FF2, 0x1FF4),
            (0x1FF6, 0x1FFE),
            (0x2129,),
            (0x2719, 0x271A),
            (0xAB65,),
            (0x10140, 0x1018D),
            (0x101A0,),
            (0x1D200, 0x1D245),
            (0x1F7A1, 0x1F7A7),
        ]

    class Cyrillic(unicode_set):
        "Unicode set for Cyrillic Unicode Character Range"
        _ranges: UnicodeRangeList = [
            (0x0400, 0x052F),
            (0x1C80, 0x1C88),
            (0x1D2B,),
            (0x1D78,),
            (0x2DE0, 0x2DFF),
            (0xA640, 0xA672),
            (0xA674, 0xA69F),
            (0xFE2E, 0xFE2F),
        ]

    class Chinese(unicode_set):
        "Unicode set for Chinese Unicode Character Range"
        _ranges: UnicodeRangeList = [
            (0x2E80, 0x2E99),
            (0x2E9B, 0x2EF3),
            (0x31C0, 0x31E3),
            (0x3400, 0x4DB5),
            (0x4E00, 0x9FEF),
            (0xA700, 0xA707),
            (0xF900, 0xFA6D),
            (0xFA70, 0xFAD9),
            (0x16FE2, 0x16FE3),
            (0x1F210, 0x1F212),
            (0x1F214, 0x1F23B),
            (0x1F240, 0x1F248),
            (0x20000, 0x2A6D6),
            (0x2A700, 0x2B734),
            (0x2B740, 0x2B81D),
            (0x2B820, 0x2CEA1),
            (0x2CEB0, 0x2EBE0),
            (0x2F800, 0x2FA1D),
        ]

    class Japanese(unicode_set):
        "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
        _ranges: UnicodeRangeList = []

        class Kanji(unicode_set):
            "Unicode set for Kanji Unicode Character Range"
            _ranges: UnicodeRangeList = [
                (0x4E00, 0x9FBF),
                (0x3000, 0x303F),
            ]

        class Hiragana(unicode_set):
            "Unicode set for Hiragana Unicode Character Range"
            _ranges: UnicodeRangeList = [
                (0x3041, 0x3096),
                (0x3099, 0x30A0),
                (0x30FC,),
                (0xFF70,),
                (0x1B001,),
                (0x1B150, 0x1B152),
                (0x1F200,),
            ]

        class Katakana(unicode_set):
            "Unicode set for Katakana  Unicode Character Range"
            _ranges: UnicodeRangeList = [
                (0x3099, 0x309C),
                (0x30A0, 0x30FF),
                (0x31F0, 0x31FF),
                (0x32D0, 0x32FE),
                (0xFF65, 0xFF9F),
                (0x1B000,),
                (0x1B164, 0x1B167),
                (0x1F201, 0x1F202),
                (0x1F213,),
            ]

    class Hangul(unicode_set):
        "Unicode set for Hangul (Korean) Unicode Character Range"
        _ranges: UnicodeRangeList = [
            (0x1100, 0x11FF),
            (0x302E, 0x302F),
            (0x3131, 0x318E),
            (0x3200, 0x321C),
            (0x3260, 0x327B),
            (0x327E,),
            (0xA960, 0xA97C),
            (0xAC00, 0xD7A3),
            (0xD7B0, 0xD7C6),
            (0xD7CB, 0xD7FB),
            (0xFFA0, 0xFFBE),
            (0xFFC2, 0xFFC7),
            (0xFFCA, 0xFFCF),
            (0xFFD2, 0xFFD7),
            (0xFFDA, 0xFFDC),
        ]

    Korean = Hangul

    class CJK(Chinese, Japanese, Hangul):
        "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
        pass

    class Thai(unicode_set):
        "Unicode set for Thai Unicode Character Range"
        _ranges: UnicodeRangeList = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)]

    class Arabic(unicode_set):
        "Unicode set for Arabic Unicode Character Range"
        _ranges: UnicodeRangeList = [
            (0x0600, 0x061B),
            (0x061E, 0x06FF),
            (0x0700, 0x077F),
        ]

    class Hebrew(unicode_set):
        "Unicode set for Hebrew Unicode Character Range"
        _ranges: UnicodeRangeList = [
            (0x0591, 0x05C7),
            (0x05D0, 0x05EA),
            (0x05EF, 0x05F4),
            (0xFB1D, 0xFB36),
            (0xFB38, 0xFB3C),
            (0xFB3E,),
            (0xFB40, 0xFB41),
            (0xFB43, 0xFB44),
            (0xFB46, 0xFB4F),
        ]

    class Devanagari(unicode_set):
        "Unicode set for Devanagari Unicode Character Range"
        _ranges: UnicodeRangeList = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)]


pyparsing_unicode.Japanese._ranges = (
    pyparsing_unicode.Japanese.Kanji._ranges
    + pyparsing_unicode.Japanese.Hiragana._ranges
    + pyparsing_unicode.Japanese.Katakana._ranges
)

# define ranges in language character sets
pyparsing_unicode.العربية = pyparsing_unicode.Arabic
pyparsing_unicode.中文 = pyparsing_unicode.Chinese
pyparsing_unicode.кириллица = pyparsing_unicode.Cyrillic
pyparsing_unicode.Ελληνικά = pyparsing_unicode.Greek
pyparsing_unicode.עִברִית = pyparsing_unicode.Hebrew
pyparsing_unicode.日本語 = pyparsing_unicode.Japanese
pyparsing_unicode.Japanese.漢字 = pyparsing_unicode.Japanese.Kanji
pyparsing_unicode.Japanese.カタカナ = pyparsing_unicode.Japanese.Katakana
pyparsing_unicode.Japanese.ひらがな = pyparsing_unicode.Japanese.Hiragana
pyparsing_unicode.한국어 = pyparsing_unicode.Korean
pyparsing_unicode.ไทย = pyparsing_unicode.Thai
pyparsing_unicode.देवनागरी = pyparsing_unicode.Devanagari
srodowisko 2022-03-10 19:45:28 +01:00			`# unicode.py`

			`import sys`
			`from itertools import filterfalse`
			`from typing import List, Tuple, Union`


			`class _lazyclassproperty:`
			`def __init__(self, fn):`
			`self.fn = fn`
			`self.__doc__ = fn.__doc__`
			`self.__name__ = fn.__name__`

			`def __get__(self, obj, cls):`
			`if cls is None:`
			`cls = type(obj)`
			`if not hasattr(cls, "_intern") or any(`
			`cls._intern is getattr(superclass, "_intern", [])`
			`for superclass in cls.__mro__[1:]`
			`):`
			`cls._intern = {}`
			`attrname = self.fn.__name__`
			`if attrname not in cls._intern:`
			`cls._intern[attrname] = self.fn(cls)`
			`return cls._intern[attrname]`


			`UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]`


			`class unicode_set:`
			`"""`
			`A set of Unicode characters, for language-specific strings for`
			``alphas``, ``nums``, ``alphanums``, and ``printables``.
			`A unicode_set is defined by a list of ranges in the Unicode character`
			set, in a class attribute ``_ranges``. Ranges can be specified using
			`2-tuples or a 1-tuple, such as::`

			`_ranges = [`
			`(0x0020, 0x007e),`
			`(0x00a0, 0x00ff),`
			`(0x0100,),`
			`]`

			`Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).`

			`A unicode set can also be defined using multiple inheritance of other unicode sets::`

			`class CJK(Chinese, Japanese, Korean):`
			`pass`
			`"""`

			`_ranges: UnicodeRangeList = []`

			`@_lazyclassproperty`
			`def _chars_for_ranges(cls):`
			`ret = []`
			`for cc in cls.__mro__:`
			`if cc is unicode_set:`
			`break`
			`for rr in getattr(cc, "_ranges", ()):`
			`ret.extend(range(rr[0], rr[-1] + 1))`
			`return [chr(c) for c in sorted(set(ret))]`

			`@_lazyclassproperty`
			`def printables(cls):`
			`"all non-whitespace characters in this range"`
			`return "".join(filterfalse(str.isspace, cls._chars_for_ranges))`

			`@_lazyclassproperty`
			`def alphas(cls):`
			`"all alphabetic characters in this range"`
			`return "".join(filter(str.isalpha, cls._chars_for_ranges))`

			`@_lazyclassproperty`
			`def nums(cls):`
			`"all numeric digit characters in this range"`
			`return "".join(filter(str.isdigit, cls._chars_for_ranges))`

			`@_lazyclassproperty`
			`def alphanums(cls):`
			`"all alphanumeric characters in this range"`
			`return cls.alphas + cls.nums`

			`@_lazyclassproperty`
			`def identchars(cls):`
			`"all characters in this range that are valid identifier characters, plus underscore '_'"`
			`return "".join(`
			`sorted(`
			`set(`
			`"".join(filter(str.isidentifier, cls._chars_for_ranges))`
			`+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"`
			`+ "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"`
			`+ "_"`
			`)`
			`)`
			`)`

			`@_lazyclassproperty`
			`def identbodychars(cls):`
			`"""`
			`all characters in this range that are valid identifier body characters,`
			`plus the digits 0-9`
			`"""`
			`return "".join(`
			`sorted(`
			`set(`
			`cls.identchars`
			`+ "0123456789"`
			`+ "".join(`
			`[c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]`
			`)`
			`)`
			`)`
			`)`


			`class pyparsing_unicode(unicode_set):`
			`"""`
			`A namespace class for defining common language unicode_sets.`
			`"""`

			`_ranges: UnicodeRangeList = [(32, sys.maxunicode)]`

			`class Latin1(unicode_set):`
			`"Unicode set for Latin-1 Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x0020, 0x007E),`
			`(0x00A0, 0x00FF),`
			`]`

			`class LatinA(unicode_set):`
			`"Unicode set for Latin-A Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x0100, 0x017F),`
			`]`

			`class LatinB(unicode_set):`
			`"Unicode set for Latin-B Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x0180, 0x024F),`
			`]`

			`class Greek(unicode_set):`
			`"Unicode set for Greek Unicode Character Ranges"`
			`_ranges: UnicodeRangeList = [`
			`(0x0342, 0x0345),`
			`(0x0370, 0x0377),`
			`(0x037A, 0x037F),`
			`(0x0384, 0x038A),`
			`(0x038C,),`
			`(0x038E, 0x03A1),`
			`(0x03A3, 0x03E1),`
			`(0x03F0, 0x03FF),`
			`(0x1D26, 0x1D2A),`
			`(0x1D5E,),`
			`(0x1D60,),`
			`(0x1D66, 0x1D6A),`
			`(0x1F00, 0x1F15),`
			`(0x1F18, 0x1F1D),`
			`(0x1F20, 0x1F45),`
			`(0x1F48, 0x1F4D),`
			`(0x1F50, 0x1F57),`
			`(0x1F59,),`
			`(0x1F5B,),`
			`(0x1F5D,),`
			`(0x1F5F, 0x1F7D),`
			`(0x1F80, 0x1FB4),`
			`(0x1FB6, 0x1FC4),`
			`(0x1FC6, 0x1FD3),`
			`(0x1FD6, 0x1FDB),`
			`(0x1FDD, 0x1FEF),`
			`(0x1FF2, 0x1FF4),`
			`(0x1FF6, 0x1FFE),`
			`(0x2129,),`
			`(0x2719, 0x271A),`
			`(0xAB65,),`
			`(0x10140, 0x1018D),`
			`(0x101A0,),`
			`(0x1D200, 0x1D245),`
			`(0x1F7A1, 0x1F7A7),`
			`]`

			`class Cyrillic(unicode_set):`
			`"Unicode set for Cyrillic Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x0400, 0x052F),`
			`(0x1C80, 0x1C88),`
			`(0x1D2B,),`
			`(0x1D78,),`
			`(0x2DE0, 0x2DFF),`
			`(0xA640, 0xA672),`
			`(0xA674, 0xA69F),`
			`(0xFE2E, 0xFE2F),`
			`]`

			`class Chinese(unicode_set):`
			`"Unicode set for Chinese Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x2E80, 0x2E99),`
			`(0x2E9B, 0x2EF3),`
			`(0x31C0, 0x31E3),`
			`(0x3400, 0x4DB5),`
			`(0x4E00, 0x9FEF),`
			`(0xA700, 0xA707),`
			`(0xF900, 0xFA6D),`
			`(0xFA70, 0xFAD9),`
			`(0x16FE2, 0x16FE3),`
			`(0x1F210, 0x1F212),`
			`(0x1F214, 0x1F23B),`
			`(0x1F240, 0x1F248),`
			`(0x20000, 0x2A6D6),`
			`(0x2A700, 0x2B734),`
			`(0x2B740, 0x2B81D),`
			`(0x2B820, 0x2CEA1),`
			`(0x2CEB0, 0x2EBE0),`
			`(0x2F800, 0x2FA1D),`
			`]`

			`class Japanese(unicode_set):`
			`"Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"`
			`_ranges: UnicodeRangeList = []`

			`class Kanji(unicode_set):`
			`"Unicode set for Kanji Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x4E00, 0x9FBF),`
			`(0x3000, 0x303F),`
			`]`

			`class Hiragana(unicode_set):`
			`"Unicode set for Hiragana Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x3041, 0x3096),`
			`(0x3099, 0x30A0),`
			`(0x30FC,),`
			`(0xFF70,),`
			`(0x1B001,),`
			`(0x1B150, 0x1B152),`
			`(0x1F200,),`
			`]`

			`class Katakana(unicode_set):`
			`"Unicode set for Katakana Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x3099, 0x309C),`
			`(0x30A0, 0x30FF),`
			`(0x31F0, 0x31FF),`
			`(0x32D0, 0x32FE),`
			`(0xFF65, 0xFF9F),`
			`(0x1B000,),`
			`(0x1B164, 0x1B167),`
			`(0x1F201, 0x1F202),`
			`(0x1F213,),`
			`]`

			`class Hangul(unicode_set):`
			`"Unicode set for Hangul (Korean) Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x1100, 0x11FF),`
			`(0x302E, 0x302F),`
			`(0x3131, 0x318E),`
			`(0x3200, 0x321C),`
			`(0x3260, 0x327B),`
			`(0x327E,),`
			`(0xA960, 0xA97C),`
			`(0xAC00, 0xD7A3),`
			`(0xD7B0, 0xD7C6),`
			`(0xD7CB, 0xD7FB),`
			`(0xFFA0, 0xFFBE),`
			`(0xFFC2, 0xFFC7),`
			`(0xFFCA, 0xFFCF),`
			`(0xFFD2, 0xFFD7),`
			`(0xFFDA, 0xFFDC),`
			`]`

			`Korean = Hangul`

			`class CJK(Chinese, Japanese, Hangul):`
			`"Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"`
			`pass`

			`class Thai(unicode_set):`
			`"Unicode set for Thai Unicode Character Range"`
			`_ranges: UnicodeRangeList = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)]`

			`class Arabic(unicode_set):`
			`"Unicode set for Arabic Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x0600, 0x061B),`
			`(0x061E, 0x06FF),`
			`(0x0700, 0x077F),`
			`]`

			`class Hebrew(unicode_set):`
			`"Unicode set for Hebrew Unicode Character Range"`
			`_ranges: UnicodeRangeList = [`
			`(0x0591, 0x05C7),`
			`(0x05D0, 0x05EA),`
			`(0x05EF, 0x05F4),`
			`(0xFB1D, 0xFB36),`
			`(0xFB38, 0xFB3C),`
			`(0xFB3E,),`
			`(0xFB40, 0xFB41),`
			`(0xFB43, 0xFB44),`
			`(0xFB46, 0xFB4F),`
			`]`

			`class Devanagari(unicode_set):`
			`"Unicode set for Devanagari Unicode Character Range"`
			`_ranges: UnicodeRangeList = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)]`


			`pyparsing_unicode.Japanese._ranges = (`
			`pyparsing_unicode.Japanese.Kanji._ranges`
			`+ pyparsing_unicode.Japanese.Hiragana._ranges`
			`+ pyparsing_unicode.Japanese.Katakana._ranges`
			`)`

			`# define ranges in language character sets`
			`pyparsing_unicode.العربية = pyparsing_unicode.Arabic`
			`pyparsing_unicode.中文 = pyparsing_unicode.Chinese`
			`pyparsing_unicode.кириллица = pyparsing_unicode.Cyrillic`
			`pyparsing_unicode.Ελληνικά = pyparsing_unicode.Greek`
			`pyparsing_unicode.עִברִית = pyparsing_unicode.Hebrew`
			`pyparsing_unicode.日本語 = pyparsing_unicode.Japanese`
			`pyparsing_unicode.Japanese.漢字 = pyparsing_unicode.Japanese.Kanji`
			`pyparsing_unicode.Japanese.カタカナ = pyparsing_unicode.Japanese.Katakana`
			`pyparsing_unicode.Japanese.ひらがな = pyparsing_unicode.Japanese.Hiragana`
			`pyparsing_unicode.한국어 = pyparsing_unicode.Korean`
			`pyparsing_unicode.ไทย = pyparsing_unicode.Thai`
			`pyparsing_unicode.देवनागरी = pyparsing_unicode.Devanagari`