fix: token generation for text > 0x10000 (#75)
* Fix token generation for text > 0x10000 Also minor optimizations. * Missed a var rename
This commit is contained in:
parent
48653332ef
commit
759a0baf46
@ -135,6 +135,19 @@ class TokenAcquirer(object):
|
|||||||
return a
|
return a
|
||||||
|
|
||||||
def acquire(self, text):
|
def acquire(self, text):
|
||||||
|
a = []
|
||||||
|
# Convert text to ints
|
||||||
|
for i in text:
|
||||||
|
val = ord(i)
|
||||||
|
if val < 0x10000:
|
||||||
|
a += [val]
|
||||||
|
else:
|
||||||
|
# Python doesn't natively use Unicode surrogates, so account for those
|
||||||
|
a += [
|
||||||
|
math.floor((val - 0x10000)/0x400 + 0xD800),
|
||||||
|
math.floor((val - 0x10000)%0x400 + 0xDC00)
|
||||||
|
]
|
||||||
|
|
||||||
b = self.tkk if self.tkk != '0' else ''
|
b = self.tkk if self.tkk != '0' else ''
|
||||||
d = b.split('.')
|
d = b.split('.')
|
||||||
b = int(d[0]) if len(d) > 1 else 0
|
b = int(d[0]) if len(d) > 1 else 0
|
||||||
@ -143,8 +156,8 @@ class TokenAcquirer(object):
|
|||||||
e = []
|
e = []
|
||||||
g = 0
|
g = 0
|
||||||
size = len(text)
|
size = len(text)
|
||||||
for i, char in enumerate(text):
|
while g < size:
|
||||||
l = ord(char)
|
l = a[g]
|
||||||
# just append if l is less than 128(ascii: DEL)
|
# just append if l is less than 128(ascii: DEL)
|
||||||
if l < 128:
|
if l < 128:
|
||||||
e.append(l)
|
e.append(l)
|
||||||
@ -155,15 +168,16 @@ class TokenAcquirer(object):
|
|||||||
else:
|
else:
|
||||||
# append calculated value if l matches special condition
|
# append calculated value if l matches special condition
|
||||||
if (l & 64512) == 55296 and g + 1 < size and \
|
if (l & 64512) == 55296 and g + 1 < size and \
|
||||||
ord(text[g + 1]) & 64512 == 56320:
|
a[g + 1] & 64512 == 56320:
|
||||||
g += 1
|
g += 1
|
||||||
l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
|
l = 65536 + ((l & 1023) << 10) + (a[g] & 1023) # This bracket is important
|
||||||
e.append(l >> 18 | 240)
|
e.append(l >> 18 | 240)
|
||||||
e.append(l >> 12 & 63 | 128)
|
e.append(l >> 12 & 63 | 128)
|
||||||
else:
|
else:
|
||||||
e.append(l >> 12 | 224)
|
e.append(l >> 12 | 224)
|
||||||
e.append(l >> 6 & 63 | 128)
|
e.append(l >> 6 & 63 | 128)
|
||||||
e.append(l & 63 | 128)
|
e.append(l & 63 | 128)
|
||||||
|
g += 1
|
||||||
a = b
|
a = b
|
||||||
for i, value in enumerate(e):
|
for i, value in enumerate(e):
|
||||||
a += value
|
a += value
|
||||||
|
Loading…
Reference in New Issue
Block a user