Fix invalid tokenizer due to the complexity of the obfuscated code

This commit solves #14 and bumps the version to 2.1.2
This commit is contained in:
SuHun Han 2017-04-07 22:43:48 +09:00
parent d991f5e3e3
commit 286593607e
3 changed files with 16 additions and 16 deletions

View File

@ -1,6 +1,6 @@
"""Free Google Translate API for Python. Translates totally free of charge."""
__all__ = 'Translator',
__version_info__ = 2, 1, 1
__version_info__ = 2, 1, 2
__version__ = '.'.join(str(v) for v in __version_info__)

View File

@ -148,21 +148,21 @@ class TokenAcquirer(object):
if l < 128:
e.append(l)
# append calculated value if l is less than 2048
elif l < 2048:
e.append(l >> 6 | 192)
e.append(l)
# append calculated value if l matches special condition
elif (l & 64512) == 55296 and g + 1 < size and \
ord(text[g + 1]) & 64512 == 56320:
g += 1
l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
e.append(l >> 18 | 240)
e.append(l >> 12 & 63 | 128)
else:
e.append(l >> 12 | 224)
e.append(l >> 6 & 63 | 128)
if l < 2048:
e.append(l >> 6 | 192)
else:
# append calculated value if l matches special condition
if (l & 64512) == 55296 and g + 1 < size and \
ord(text[g + 1]) & 64512 == 56320:
g += 1
l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
e.append(l >> 18 | 240)
e.append(l >> 12 & 63 | 128)
else:
e.append(l >> 12 | 224)
e.append(l >> 6 & 63 | 128)
e.append(l & 63 | 128)
a = b
for i, value in enumerate(e):
a += value

View File

@ -28,9 +28,9 @@ def test_unicode(translator):
def test_special_chars(translator):
text = u"Copyright © Google"
text = u"©×《》"
result = translator.translate(text, src='en', dest='fr')
result = translator.translate(text, src='en', dest='en')
assert result.text == text