Add google translate token generator
This commit is contained in:
parent
2f76c45d6e
commit
59d96f75a0
181
googletrans/gtoken.py
Normal file
181
googletrans/gtoken.py
Normal file
@ -0,0 +1,181 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import ast
|
||||
import math
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
from googletrans.compat import PY3
|
||||
from googletrans.compat import unicode
|
||||
from googletrans.utils import rshift
|
||||
|
||||
|
||||
class TokenAcquirer(object):
|
||||
"""Google Translate API token generator
|
||||
|
||||
translate.google.com uses a token to authorize the requests. If you are
|
||||
not Google, you do have this token and will have to pay for use.
|
||||
This class is the result of reverse engineering on the obfuscated and
|
||||
minified code used by Google to generate such token.
|
||||
|
||||
The token is based on a seed which is updated once per hour and on the
|
||||
text that will be translated.
|
||||
Both are combined - by some strange math - in order to generate a final
|
||||
token (e.g. 744915.856682) which is used by the API to validate the
|
||||
request.
|
||||
|
||||
This operation will cause an additional request to get an initial
|
||||
token from translate.google.com.
|
||||
|
||||
:Example:
|
||||
|
||||
>>> from gtoken import TokenAcquirer
|
||||
>>> acquirer = TokenAcquirer()
|
||||
>>> text = 'test'
|
||||
>>> tk = acquirer.do(text)
|
||||
>>> print(tk)
|
||||
950629.577246
|
||||
"""
|
||||
|
||||
RE_TKK = re.compile(r'TKK=eval\(\'\(\(function\(\)\{(.+?)\}\)\(\)\)\'\);', re.DOTALL)
|
||||
|
||||
|
||||
def __init__(self, tkk='0', session=None):
|
||||
self.session = session or requests.Session()
|
||||
self.tkk = tkk
|
||||
|
||||
def _update(self):
|
||||
"""update tkk
|
||||
"""
|
||||
# we don't need to update the base TKK value when it is still valid
|
||||
now = math.floor(int(time.time() * 1000) / 3600000.0)
|
||||
if self.tkk and int(self.tkk.split('.')[0]) == now:
|
||||
return
|
||||
|
||||
r = self.session.get('https://translate.google.com')
|
||||
# this will be the same as python code after stripping out a reserved word 'var'
|
||||
code = unicode(self.RE_TKK.search(r.text)[1]).replace('var ', '')
|
||||
# unescape special ascii characters such like a \x3d(=)
|
||||
if PY3: # pragma: no cover
|
||||
code = code.encode().decode('unicode-escape')
|
||||
else: # pragma: no cover
|
||||
code = code.decode('string_escape')
|
||||
|
||||
if code:
|
||||
tree = ast.parse(code)
|
||||
visit_return = False
|
||||
operator = '+'
|
||||
n, keys = 0, dict(a=0, b=0)
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Assign):
|
||||
name = node.targets[0].id
|
||||
if name in keys:
|
||||
if isinstance(node.value, ast.Num):
|
||||
keys[name] = node.value.n
|
||||
# the value can sometimes be negative
|
||||
elif isinstance(node.value, ast.UnaryOp) and \
|
||||
isinstance(node.value.op, ast.USub):
|
||||
keys[name] = -node.value.operand.n
|
||||
elif isinstance(node, ast.Return):
|
||||
# parameters should be set after this point
|
||||
visit_return = True
|
||||
elif visit_return and isinstance(node, ast.Num):
|
||||
n = node.n
|
||||
elif visit_return and n > 0:
|
||||
# the default operator is '+' but implement some more for
|
||||
# all possible scenarios
|
||||
if isinstance(node, ast.Add): # pragma: nocover
|
||||
pass
|
||||
elif isinstance(node, ast.Sub): # pragma: nocover
|
||||
operator = '-'
|
||||
elif isinstance(node, ast.Mult): # pragma: nocover
|
||||
operator = '*'
|
||||
elif isinstance(node, ast.Pow): # pragma: nocover
|
||||
operator = '**'
|
||||
elif isinstance(node, ast.BitXor): # pragma: nocover
|
||||
operator = '^'
|
||||
# a safety way to avoid Exceptions
|
||||
clause = compile('{1}{0}{2}'.format(operator, keys['a'], keys['b']), '', 'eval')
|
||||
value = eval(clause, dict(__builtin__={}))
|
||||
result = '{}.{}'.format(n, value)
|
||||
|
||||
self.tkk = result
|
||||
|
||||
def _lazy(self, value):
|
||||
"""like lazy evalution, this method returns a lambda function that
|
||||
returns value given.
|
||||
We won't be needing this because this seems to have been built for
|
||||
code obfuscation.
|
||||
|
||||
the original code of this method is as follows:
|
||||
|
||||
... code-block: javascript
|
||||
|
||||
var ek = function(a) {
|
||||
return function() {
|
||||
return a;
|
||||
};
|
||||
}
|
||||
"""
|
||||
return lambda: value
|
||||
|
||||
def _xr(self, a, b):
|
||||
size_b = len(b)
|
||||
c = 0
|
||||
while c < size_b - 2:
|
||||
d = b[c + 2]
|
||||
d = ord(d[0]) - 87 if 'a' <= d else int(d)
|
||||
d = rshift(a, d) if '+' == b[c + 1] else a << d
|
||||
a = a + d & 4294967295 if '+' == b[c] else a ^ d
|
||||
|
||||
c += 3
|
||||
return a
|
||||
|
||||
def acquire(self, text):
|
||||
b = self.tkk if self.tkk != '0' else ''
|
||||
d = b.split('.')
|
||||
b = int(d[0]) if len(d) > 1 else 0
|
||||
|
||||
# assume e means char code array
|
||||
e = []
|
||||
g = 0
|
||||
size = len(text)
|
||||
for i, char in enumerate(text):
|
||||
l = ord(char)
|
||||
# just append if l is less than 128(ascii: DEL)
|
||||
if l < 128:
|
||||
e.append(l)
|
||||
# append calculated value if l is less than 2048
|
||||
elif l < 2048:
|
||||
e.append(l >> 6 | 192)
|
||||
# append calculated value if l matches special condition
|
||||
elif (l & 64512) == 55296 and g + 1 < size and ord(text[g + 1]) & 64512 == 56320:
|
||||
g += 1
|
||||
l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
|
||||
e.append(l >> 18 | 240)
|
||||
e.append(l >> 12 & 63 | 128)
|
||||
else:
|
||||
e.append(l >> 12 | 224)
|
||||
e.append(l >> 6 & 63 | 128)
|
||||
e.append(l & 63 | 128)
|
||||
|
||||
a = b
|
||||
for i, value in enumerate(e):
|
||||
a += value
|
||||
a = self._xr(a, '+-a^+6')
|
||||
a = self._xr(a, '+-3^+b+-f')
|
||||
a ^= int(d[1]) if len(d) > 1 else 0
|
||||
if a < 0: # pragma: nocover
|
||||
a = (a & 2147483647) + 2147483648
|
||||
a %= 1000000 # int(1E6)
|
||||
|
||||
return '{}.{}'.format(a, a ^ b)
|
||||
|
||||
|
||||
def do(self, text):
|
||||
self._update()
|
||||
tk = self.acquire(text)
|
||||
return tk
|
Loading…
Reference in New Issue
Block a user