Add google translate token generator

2017-03-10 19:19:26 +09:00 · 2017-03-10 19:19:26 +09:00 · 59d96f75a0
commit 59d96f75a0
parent 2f76c45d6e
1 changed files with 181 additions and 0 deletions
--- a/googletrans/gtoken.py
+++ b/googletrans/gtoken.py
@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+import ast
+import math
+import re
+import sys
+import time
+
+import requests
+
+
+from googletrans.compat import PY3
+from googletrans.compat import unicode
+from googletrans.utils import rshift
+
+
+class TokenAcquirer(object):
+    """Google Translate API token generator
+
+    translate.google.com uses a token to authorize the requests. If you are
+    not Google, you do have this token and will have to pay for use.
+    This class is the result of reverse engineering on the obfuscated and
+    minified code used by Google to generate such token.
+
+    The token is based on a seed which is updated once per hour and on the
+    text that will be translated.
+    Both are combined - by some strange math - in order to generate a final
+    token (e.g. 744915.856682) which is used by the API to validate the
+    request.
+
+    This operation will cause an additional request to get an initial
+    token from translate.google.com.
+
+    :Example:
+
+        >>> from gtoken import TokenAcquirer
+        >>> acquirer = TokenAcquirer()
+        >>> text = 'test'
+        >>> tk = acquirer.do(text)
+        >>> print(tk)
+        950629.577246
+    """
+
+    RE_TKK = re.compile(r'TKK=eval\(\'\(\(function\(\)\{(.+?)\}\)\(\)\)\'\);', re.DOTALL)
+
+
+    def __init__(self, tkk='0', session=None):
+        self.session = session or requests.Session()
+        self.tkk = tkk
+
+    def _update(self):
+        """update tkk
+        """
+        # we don't need to update the base TKK value when it is still valid
+        now = math.floor(int(time.time() * 1000) / 3600000.0)
+        if self.tkk and int(self.tkk.split('.')[0]) == now:
+            return
+
+        r = self.session.get('https://translate.google.com')
+        # this will be the same as python code after stripping out a reserved word 'var'
+        code = unicode(self.RE_TKK.search(r.text)[1]).replace('var ', '')
+        # unescape special ascii characters such like a \x3d(=)
+        if PY3:  # pragma: no cover
+            code = code.encode().decode('unicode-escape')
+        else:  # pragma: no cover
+            code = code.decode('string_escape')
+
+        if code:
+            tree = ast.parse(code)
+            visit_return = False
+            operator = '+'
+            n, keys = 0, dict(a=0, b=0)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Assign):
+                    name = node.targets[0].id
+                    if name in keys:
+                        if isinstance(node.value, ast.Num):
+                            keys[name] = node.value.n
+                        # the value can sometimes be negative
+                        elif isinstance(node.value, ast.UnaryOp) and \
+                             isinstance(node.value.op, ast.USub):
+                            keys[name] = -node.value.operand.n
+                elif isinstance(node, ast.Return):
+                    # parameters should be set after this point
+                    visit_return = True
+                elif visit_return and isinstance(node, ast.Num):
+                    n = node.n
+                elif visit_return and n > 0:
+                    # the default operator is '+' but implement some more for
+                    # all possible scenarios
+                    if isinstance(node, ast.Add):  # pragma: nocover
+                        pass
+                    elif isinstance(node, ast.Sub):  # pragma: nocover
+                        operator = '-'
+                    elif isinstance(node, ast.Mult):  # pragma: nocover
+                        operator = '*'
+                    elif isinstance(node, ast.Pow):  # pragma: nocover
+                        operator = '**'
+                    elif isinstance(node, ast.BitXor):  # pragma: nocover
+                        operator = '^'
+            # a safety way to avoid Exceptions
+            clause = compile('{1}{0}{2}'.format(operator, keys['a'], keys['b']), '', 'eval')
+            value = eval(clause, dict(__builtin__={}))
+            result = '{}.{}'.format(n, value)
+
+            self.tkk = result
+
+    def _lazy(self, value):
+        """like lazy evalution, this method returns a lambda function that
+        returns value given.
+        We won't be needing this because this seems to have been built for
+        code obfuscation.
+
+        the original code of this method is as follows:
+
+           ... code-block: javascript
+
+               var ek = function(a) {
+                return function() {
+                    return a;
+                };
+               }
+        """
+        return lambda: value
+
+    def _xr(self, a, b):
+        size_b = len(b)
+        c = 0
+        while c < size_b - 2:
+            d = b[c + 2]
+            d = ord(d[0]) - 87 if 'a' <= d else int(d)
+            d = rshift(a, d) if '+' == b[c + 1] else a << d
+            a = a + d & 4294967295 if '+' == b[c] else a ^ d
+
+            c += 3
+        return a
+
+    def acquire(self, text):
+        b = self.tkk if self.tkk != '0' else ''
+        d = b.split('.')
+        b = int(d[0]) if len(d) > 1 else 0
+
+        # assume e means char code array
+        e = []
+        g = 0
+        size = len(text)
+        for i, char in enumerate(text):
+            l = ord(char)
+            # just append if l is less than 128(ascii: DEL)
+            if l < 128:
+                e.append(l)
+            # append calculated value if l is less than 2048
+            elif l < 2048:
+                e.append(l >> 6 | 192)
+            # append calculated value if l matches special condition
+            elif (l & 64512) == 55296 and g + 1 < size and ord(text[g + 1]) & 64512 == 56320:
+                g += 1
+                l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
+                e.append(l >> 18 | 240)
+                e.append(l >> 12 & 63 | 128)
+            else:
+                e.append(l >> 12 | 224)
+                e.append(l >> 6 & 63 | 128)
+                e.append(l & 63 | 128)
+
+        a = b
+        for i, value in enumerate(e):
+            a += value
+            a = self._xr(a, '+-a^+6')
+        a = self._xr(a, '+-3^+b+-f')
+        a ^= int(d[1]) if len(d) > 1 else 0
+        if a < 0:  # pragma: nocover
+            a = (a & 2147483647) + 2147483648
+        a %= 1000000  # int(1E6)
+
+        return '{}.{}'.format(a, a ^ b)
+
+
+    def do(self, text):
+        self._update()
+        tk = self.acquire(text)
+        return tk