Add google translate token generator
This commit is contained in:
parent
2f76c45d6e
commit
59d96f75a0
181
googletrans/gtoken.py
Normal file
181
googletrans/gtoken.py
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import ast
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
from googletrans.compat import PY3
|
||||||
|
from googletrans.compat import unicode
|
||||||
|
from googletrans.utils import rshift
|
||||||
|
|
||||||
|
|
||||||
|
class TokenAcquirer(object):
|
||||||
|
"""Google Translate API token generator
|
||||||
|
|
||||||
|
translate.google.com uses a token to authorize the requests. If you are
|
||||||
|
not Google, you do have this token and will have to pay for use.
|
||||||
|
This class is the result of reverse engineering on the obfuscated and
|
||||||
|
minified code used by Google to generate such token.
|
||||||
|
|
||||||
|
The token is based on a seed which is updated once per hour and on the
|
||||||
|
text that will be translated.
|
||||||
|
Both are combined - by some strange math - in order to generate a final
|
||||||
|
token (e.g. 744915.856682) which is used by the API to validate the
|
||||||
|
request.
|
||||||
|
|
||||||
|
This operation will cause an additional request to get an initial
|
||||||
|
token from translate.google.com.
|
||||||
|
|
||||||
|
:Example:
|
||||||
|
|
||||||
|
>>> from gtoken import TokenAcquirer
|
||||||
|
>>> acquirer = TokenAcquirer()
|
||||||
|
>>> text = 'test'
|
||||||
|
>>> tk = acquirer.do(text)
|
||||||
|
>>> print(tk)
|
||||||
|
950629.577246
|
||||||
|
"""
|
||||||
|
|
||||||
|
RE_TKK = re.compile(r'TKK=eval\(\'\(\(function\(\)\{(.+?)\}\)\(\)\)\'\);', re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, tkk='0', session=None):
|
||||||
|
self.session = session or requests.Session()
|
||||||
|
self.tkk = tkk
|
||||||
|
|
||||||
|
def _update(self):
|
||||||
|
"""update tkk
|
||||||
|
"""
|
||||||
|
# we don't need to update the base TKK value when it is still valid
|
||||||
|
now = math.floor(int(time.time() * 1000) / 3600000.0)
|
||||||
|
if self.tkk and int(self.tkk.split('.')[0]) == now:
|
||||||
|
return
|
||||||
|
|
||||||
|
r = self.session.get('https://translate.google.com')
|
||||||
|
# this will be the same as python code after stripping out a reserved word 'var'
|
||||||
|
code = unicode(self.RE_TKK.search(r.text)[1]).replace('var ', '')
|
||||||
|
# unescape special ascii characters such like a \x3d(=)
|
||||||
|
if PY3: # pragma: no cover
|
||||||
|
code = code.encode().decode('unicode-escape')
|
||||||
|
else: # pragma: no cover
|
||||||
|
code = code.decode('string_escape')
|
||||||
|
|
||||||
|
if code:
|
||||||
|
tree = ast.parse(code)
|
||||||
|
visit_return = False
|
||||||
|
operator = '+'
|
||||||
|
n, keys = 0, dict(a=0, b=0)
|
||||||
|
for node in ast.walk(tree):
|
||||||
|
if isinstance(node, ast.Assign):
|
||||||
|
name = node.targets[0].id
|
||||||
|
if name in keys:
|
||||||
|
if isinstance(node.value, ast.Num):
|
||||||
|
keys[name] = node.value.n
|
||||||
|
# the value can sometimes be negative
|
||||||
|
elif isinstance(node.value, ast.UnaryOp) and \
|
||||||
|
isinstance(node.value.op, ast.USub):
|
||||||
|
keys[name] = -node.value.operand.n
|
||||||
|
elif isinstance(node, ast.Return):
|
||||||
|
# parameters should be set after this point
|
||||||
|
visit_return = True
|
||||||
|
elif visit_return and isinstance(node, ast.Num):
|
||||||
|
n = node.n
|
||||||
|
elif visit_return and n > 0:
|
||||||
|
# the default operator is '+' but implement some more for
|
||||||
|
# all possible scenarios
|
||||||
|
if isinstance(node, ast.Add): # pragma: nocover
|
||||||
|
pass
|
||||||
|
elif isinstance(node, ast.Sub): # pragma: nocover
|
||||||
|
operator = '-'
|
||||||
|
elif isinstance(node, ast.Mult): # pragma: nocover
|
||||||
|
operator = '*'
|
||||||
|
elif isinstance(node, ast.Pow): # pragma: nocover
|
||||||
|
operator = '**'
|
||||||
|
elif isinstance(node, ast.BitXor): # pragma: nocover
|
||||||
|
operator = '^'
|
||||||
|
# a safety way to avoid Exceptions
|
||||||
|
clause = compile('{1}{0}{2}'.format(operator, keys['a'], keys['b']), '', 'eval')
|
||||||
|
value = eval(clause, dict(__builtin__={}))
|
||||||
|
result = '{}.{}'.format(n, value)
|
||||||
|
|
||||||
|
self.tkk = result
|
||||||
|
|
||||||
|
def _lazy(self, value):
|
||||||
|
"""like lazy evalution, this method returns a lambda function that
|
||||||
|
returns value given.
|
||||||
|
We won't be needing this because this seems to have been built for
|
||||||
|
code obfuscation.
|
||||||
|
|
||||||
|
the original code of this method is as follows:
|
||||||
|
|
||||||
|
... code-block: javascript
|
||||||
|
|
||||||
|
var ek = function(a) {
|
||||||
|
return function() {
|
||||||
|
return a;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
return lambda: value
|
||||||
|
|
||||||
|
def _xr(self, a, b):
|
||||||
|
size_b = len(b)
|
||||||
|
c = 0
|
||||||
|
while c < size_b - 2:
|
||||||
|
d = b[c + 2]
|
||||||
|
d = ord(d[0]) - 87 if 'a' <= d else int(d)
|
||||||
|
d = rshift(a, d) if '+' == b[c + 1] else a << d
|
||||||
|
a = a + d & 4294967295 if '+' == b[c] else a ^ d
|
||||||
|
|
||||||
|
c += 3
|
||||||
|
return a
|
||||||
|
|
||||||
|
def acquire(self, text):
|
||||||
|
b = self.tkk if self.tkk != '0' else ''
|
||||||
|
d = b.split('.')
|
||||||
|
b = int(d[0]) if len(d) > 1 else 0
|
||||||
|
|
||||||
|
# assume e means char code array
|
||||||
|
e = []
|
||||||
|
g = 0
|
||||||
|
size = len(text)
|
||||||
|
for i, char in enumerate(text):
|
||||||
|
l = ord(char)
|
||||||
|
# just append if l is less than 128(ascii: DEL)
|
||||||
|
if l < 128:
|
||||||
|
e.append(l)
|
||||||
|
# append calculated value if l is less than 2048
|
||||||
|
elif l < 2048:
|
||||||
|
e.append(l >> 6 | 192)
|
||||||
|
# append calculated value if l matches special condition
|
||||||
|
elif (l & 64512) == 55296 and g + 1 < size and ord(text[g + 1]) & 64512 == 56320:
|
||||||
|
g += 1
|
||||||
|
l = 65536 + ((l & 1023) << 10) + ord(text[g]) & 1023
|
||||||
|
e.append(l >> 18 | 240)
|
||||||
|
e.append(l >> 12 & 63 | 128)
|
||||||
|
else:
|
||||||
|
e.append(l >> 12 | 224)
|
||||||
|
e.append(l >> 6 & 63 | 128)
|
||||||
|
e.append(l & 63 | 128)
|
||||||
|
|
||||||
|
a = b
|
||||||
|
for i, value in enumerate(e):
|
||||||
|
a += value
|
||||||
|
a = self._xr(a, '+-a^+6')
|
||||||
|
a = self._xr(a, '+-3^+b+-f')
|
||||||
|
a ^= int(d[1]) if len(d) > 1 else 0
|
||||||
|
if a < 0: # pragma: nocover
|
||||||
|
a = (a & 2147483647) + 2147483648
|
||||||
|
a %= 1000000 # int(1E6)
|
||||||
|
|
||||||
|
return '{}.{}'.format(a, a ^ b)
|
||||||
|
|
||||||
|
|
||||||
|
def do(self, text):
|
||||||
|
self._update()
|
||||||
|
tk = self.acquire(text)
|
||||||
|
return tk
|
Loading…
Reference in New Issue
Block a user