From 286593607e8a9d43627a00a82cfcde55183b0818 Mon Sep 17 00:00:00 2001 From: SuHun Han Date: Fri, 7 Apr 2017 22:43:48 +0900 Subject: [PATCH] Fix invalid tokenizer due to the complexity of the obfuscated code This commit solves #14 and bumps the version to 2.1.2 --- googletrans/__init__.py | 2 +- googletrans/gtoken.py | 26 +++++++++++++------------- tests/test_client.py | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/googletrans/__init__.py b/googletrans/__init__.py index 9f818df..c5dba57 100644 --- a/googletrans/__init__.py +++ b/googletrans/__init__.py @@ -1,6 +1,6 @@ """Free Google Translate API for Python. Translates totally free of charge.""" __all__ = 'Translator', -__version_info__ = 2, 1, 1 +__version_info__ = 2, 1, 2 __version__ = '.'.join(str(v) for v in __version_info__) diff --git a/googletrans/gtoken.py b/googletrans/gtoken.py index 34294a0..cdf29de 100644 --- a/googletrans/gtoken.py +++ b/googletrans/gtoken.py @@ -148,21 +148,21 @@ class TokenAcquirer(object): if l < 128: e.append(l) # append calculated value if l is less than 2048 - elif l < 2048: - e.append(l >> 6 | 192) - e.append(l) - # append calculated value if l matches special condition - elif (l & 64512) == 55296 and g + 1 < size and \ - ord(text[g + 1]) & 64512 == 56320: - g += 1 - l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023 - e.append(l >> 18 | 240) - e.append(l >> 12 & 63 | 128) else: - e.append(l >> 12 | 224) - e.append(l >> 6 & 63 | 128) + if l < 2048: + e.append(l >> 6 | 192) + else: + # append calculated value if l matches special condition + if (l & 64512) == 55296 and g + 1 < size and \ + ord(text[g + 1]) & 64512 == 56320: + g += 1 + l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023 + e.append(l >> 18 | 240) + e.append(l >> 12 & 63 | 128) + else: + e.append(l >> 12 | 224) + e.append(l >> 6 & 63 | 128) e.append(l & 63 | 128) - a = b for i, value in enumerate(e): a += value diff --git a/tests/test_client.py b/tests/test_client.py index 2407ab0..ccbfae6 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -28,9 +28,9 @@ def test_unicode(translator): def test_special_chars(translator): - text = u"Copyright © Google" + text = u"©×《》" - result = translator.translate(text, src='en', dest='fr') + result = translator.translate(text, src='en', dest='en') assert result.text == text