Fix invalid tokenizer due to the complexity of the obfuscated code

This commit solves #14 and bumps the version to 2.1.2
2017-04-07 22:43:48 +09:00 · 2017-04-07 22:43:48 +09:00 · 286593607e
commit 286593607e
parent d991f5e3e3
3 changed files with 16 additions and 16 deletions
--- a/googletrans/init.py
+++ b/googletrans/init.py
@ -1,6 +1,6 @@
 """Free Google Translate API for Python. Translates totally free of charge."""
 __all__ = 'Translator',
-__version_info__ = 2, 1, 1
+__version_info__ = 2, 1, 2
 __version__ = '.'.join(str(v) for v in __version_info__)


--- a/googletrans/gtoken.py
+++ b/googletrans/gtoken.py
@ -148,21 +148,21 @@ class TokenAcquirer(object):
            if l < 128:
                e.append(l)
            # append calculated value if l is less than 2048
-            elif l < 2048:
-                e.append(l >> 6 | 192)
-                e.append(l)
-            # append calculated value if l matches special condition
-            elif (l & 64512) == 55296 and g + 1 < size and \
-                    ord(text[g + 1]) & 64512 == 56320:
-                g += 1
-                l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
-                e.append(l >> 18 | 240)
-                e.append(l >> 12 & 63 | 128)
            else:
-                e.append(l >> 12 | 224)
-                e.append(l >> 6 & 63 | 128)
+                if l < 2048:
+                    e.append(l >> 6 | 192)
+                else:
+                    # append calculated value if l matches special condition
+                    if (l & 64512) == 55296 and g + 1 < size and \
+                            ord(text[g + 1]) & 64512 == 56320:
+                        g += 1
+                        l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
+                        e.append(l >> 18 | 240)
+                        e.append(l >> 12 & 63 | 128)
+                    else:
+                        e.append(l >> 12 | 224)
+                        e.append(l >> 6 & 63 | 128)
                e.append(l & 63 | 128)
-
        a = b
        for i, value in enumerate(e):
            a += value
--- a/tests/test_client.py
+++ b/tests/test_client.py
@ -28,9 +28,9 @@ def test_unicode(translator):


 def test_special_chars(translator):
-    text = u"Copyright © Google"
+    text = u"©×《》"

-    result = translator.translate(text, src='en', dest='fr')
+    result = translator.translate(text, src='en', dest='en')
    assert result.text == text