wmt-2020-pl-en/googletrans/gtoken.py
SuHun Han dda50305b7 PEP8
2017-03-10 23:43:06 +09:00

182 lines
6.3 KiB
Python

# -*- coding: utf-8 -*-
import ast
import math
import re
import time
import requests
from googletrans.compat import PY3
from googletrans.compat import unicode
from googletrans.utils import rshift
class TokenAcquirer(object):
"""Google Translate API token generator
translate.google.com uses a token to authorize the requests. If you are
not Google, you do have this token and will have to pay for use.
This class is the result of reverse engineering on the obfuscated and
minified code used by Google to generate such token.
The token is based on a seed which is updated once per hour and on the
text that will be translated.
Both are combined - by some strange math - in order to generate a final
token (e.g. 744915.856682) which is used by the API to validate the
request.
This operation will cause an additional request to get an initial
token from translate.google.com.
:Example:
>>> from gtoken import TokenAcquirer
>>> acquirer = TokenAcquirer()
>>> text = 'test'
>>> tk = acquirer.do(text)
>>> print(tk)
950629.577246
"""
RE_TKK = re.compile(r'TKK=eval\(\'\(\(function\(\)\{(.+?)\}\)\(\)\)\'\);',
re.DOTALL)
def __init__(self, tkk='0', session=None):
self.session = session or requests.Session()
self.tkk = tkk
def _update(self):
"""update tkk
"""
# we don't need to update the base TKK value when it is still valid
now = math.floor(int(time.time() * 1000) / 3600000.0)
if self.tkk and int(self.tkk.split('.')[0]) == now:
return
r = self.session.get('https://translate.google.com')
# this will be the same as python code after stripping out a reserved word 'var'
code = unicode(self.RE_TKK.search(r.text).group(1)).replace('var ', '')
# unescape special ascii characters such like a \x3d(=)
if PY3: # pragma: no cover
code = code.encode().decode('unicode-escape')
else: # pragma: no cover
code = code.decode('string_escape')
if code:
tree = ast.parse(code)
visit_return = False
operator = '+'
n, keys = 0, dict(a=0, b=0)
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
name = node.targets[0].id
if name in keys:
if isinstance(node.value, ast.Num):
keys[name] = node.value.n
# the value can sometimes be negative
elif isinstance(node.value, ast.UnaryOp) and \
isinstance(node.value.op, ast.USub): # pragma: nocover
keys[name] = -node.value.operand.n
elif isinstance(node, ast.Return):
# parameters should be set after this point
visit_return = True
elif visit_return and isinstance(node, ast.Num):
n = node.n
elif visit_return and n > 0:
# the default operator is '+' but implement some more for
# all possible scenarios
if isinstance(node, ast.Add): # pragma: nocover
pass
elif isinstance(node, ast.Sub): # pragma: nocover
operator = '-'
elif isinstance(node, ast.Mult): # pragma: nocover
operator = '*'
elif isinstance(node, ast.Pow): # pragma: nocover
operator = '**'
elif isinstance(node, ast.BitXor): # pragma: nocover
operator = '^'
# a safety way to avoid Exceptions
clause = compile('{1}{0}{2}'.format(
operator, keys['a'], keys['b']), '', 'eval')
value = eval(clause, dict(__builtin__={}))
result = '{}.{}'.format(n, value)
self.tkk = result
def _lazy(self, value):
"""like lazy evalution, this method returns a lambda function that
returns value given.
We won't be needing this because this seems to have been built for
code obfuscation.
the original code of this method is as follows:
... code-block: javascript
var ek = function(a) {
return function() {
return a;
};
}
"""
return lambda: value
def _xr(self, a, b):
size_b = len(b)
c = 0
while c < size_b - 2:
d = b[c + 2]
d = ord(d[0]) - 87 if 'a' <= d else int(d)
d = rshift(a, d) if '+' == b[c + 1] else a << d
a = a + d & 4294967295 if '+' == b[c] else a ^ d
c += 3
return a
def acquire(self, text):
b = self.tkk if self.tkk != '0' else ''
d = b.split('.')
b = int(d[0]) if len(d) > 1 else 0
# assume e means char code array
e = []
g = 0
size = len(text)
for i, char in enumerate(text):
l = ord(char)
# just append if l is less than 128(ascii: DEL)
if l < 128:
e.append(l)
# append calculated value if l is less than 2048
elif l < 2048:
e.append(l >> 6 | 192)
# append calculated value if l matches special condition
elif (l & 64512) == 55296 and g + 1 < size and \
ord(text[g + 1]) & 64512 == 56320:
g += 1
l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
e.append(l >> 18 | 240)
e.append(l >> 12 & 63 | 128)
else:
e.append(l >> 12 | 224)
e.append(l >> 6 & 63 | 128)
e.append(l & 63 | 128)
a = b
for i, value in enumerate(e):
a += value
a = self._xr(a, '+-a^+6')
a = self._xr(a, '+-3^+b+-f')
a ^= int(d[1]) if len(d) > 1 else 0
if a < 0: # pragma: nocover
a = (a & 2147483647) + 2147483648
a %= 1000000 # int(1E6)
return '{}.{}'.format(a, a ^ b)
def do(self, text):
self._update()
tk = self.acquire(text)
return tk