Refactor client

This commit is contained in:
SuHun Han 2017-03-10 22:44:39 +09:00 committed by ssut
parent aac87e5f29
commit deb36364f7
3 changed files with 60 additions and 53 deletions

View File

@ -4,23 +4,17 @@ A Translation module.
You can translate text using this module.
"""
import re
import requests
import sys
from collections import namedtuple
from future.moves.urllib.parse import quote
from googletrans import __version__
from googletrans import urls
from googletrans import urls, utils
from googletrans.compat import PY3
from googletrans.compat import unicode
from googletrans.gtoken import TokenAcquirer
from googletrans.utils import format_json
from googletrans.constants import DEFAULT_USER_AGENT, LANGUAGES, SPECIAL_CASES
from googletrans.models import Translated, Detected
EXCLUDES = ['en', 'ca', 'fr']
RE_SRC = re.compile(',\[\["([\w]{2})"\]')
EXCLUDES = ('en', 'ca', 'fr')
class Translator(object):
@ -39,6 +33,27 @@ class Translator(object):
except ImportError: # pragma: nocover
pass
def _translate(self, text, dest='en', src='auto'):
if src != 'auto' and src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys():
src = SPECIAL_CASES[src]
elif src != 'auto' and src not in LANGUAGES.keys():
raise ValueError('invalid source language')
if dest not in LANGUAGES.keys() and dest in SPECIAL_CASES.keys():
dest = SPECIAL_CASES[dest]
elif dest not in LANGUAGES.keys():
raise ValueError('invalid destination language')
if not PY3 and isinstance(text, str): # pragma: nocover
text = text.decode('utf-8')
token = self.token_acquirer.do(text)
params = utils.build_params(query=text, src=src, dest=dest, token=token)
r = self.session.get(urls.TRANSLATE, params=params)
data = utils.format_json(r.text)
return data
def translate(self, text, dest='en', src='auto'):
"""
Translate the passed text into destination language.
@ -75,30 +90,8 @@ class Translator(object):
result.append(translated)
return result
if src != 'auto' and src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys():
src = SPECIAL_CASES[src]
elif src != 'auto' and src not in LANGUAGES.keys():
raise ValueError('invalid source language')
if dest not in LANGUAGES.keys() and dest in SPECIAL_CASES.keys():
dest = SPECIAL_CASES[dest]
elif dest not in LANGUAGES.keys():
raise ValueError('invalid destination language')
result = ''
origin = text
token = self.token_acquirer.do(text)
text = quote(text)
url = urls.TRANSLATE.format(query=text, src=src, dest=dest, token=token)
r = self.session.get(url)
"""
Response Sample (20150605)
$ ./translate "republique" -d ko
[[["공화국","republique"],[,,"gonghwagug"]],,"fr",,,[["republique",1,[["공화국",1000,true,false],["공화국의",0,true,false],["공화국에",0,true,false],["공화국에서",0,true,false]],[[0,10]],"republique",0,1]],0.94949496,,[["fr"],,[0.94949496]],,,[["명사",[[["communauté","démocratie"],""]],"république"]]]
"""
data = format_json(r.text)
data = self._translate(text, dest, src)
# this code will be updated when the format is changed.
translated = data[0][0][0]
@ -107,13 +100,13 @@ class Translator(object):
# src passed is equal to auto.
try:
src = data[-1][0][0]
except: # pragma: nocover
except Exception: # pragma: nocover
pass
pron = origin
try:
pron = data[0][1][-1]
except: # pragma: nocover
except Exception: # pragma: nocover
pass
if not PY3 and isinstance(pron, unicode) and isinstance(origin, str): # pragma: nocover
origin = origin.decode('utf-8')
@ -122,13 +115,16 @@ class Translator(object):
# for python 2.x compatbillity
if not PY3: # pragma: nocover
if isinstance(src, str): src = src.decode('utf-8')
if isinstance(dest, str): dest = dest.decode('utf-8')
if isinstance(translated, str): translated = translated.decode('utf-8')
if isinstance(src, str):
src = src.decode('utf-8')
if isinstance(dest, str):
dest = dest.decode('utf-8')
if isinstance(translated, str):
translated = translated.decode('utf-8')
# put final values into a new Translated object
result = Translated(src=src, dest=dest, origin=origin,
text=translated, pronunciation=pron)
text=translated, pronunciation=pron)
return result
@ -168,23 +164,17 @@ class Translator(object):
result.append(lang)
return result
result = ''
origin = text
token = self.token_acquirer.do(text)
text = quote(text)
url = urls.DETECT.format(query=text, token=token)
r = self.session.get(url)
data = format_json(r.text)
data = self._translate(text, dest='en', src='auto')
# actual source language that will be recognized by Google Translator when the
# src passed is equal to auto.
src = ''
confidence = 0.0
try:
src = ''.join(data[-1][0])
confidence = data[-1][-1][0]
except: # pragma: nocover
src = ''.join(data[8][0])
confidence = data[8][-2][0]
except Exception: # pragma: nocover
pass
result = Detected(lang=src, confidence=confidence)
return result
return result

View File

@ -3,5 +3,4 @@
Predefined URLs used to make google translate requests.
"""
BASE = 'https://translate.google.com'
TRANSLATE = 'https://translate.google.com/translate_a/single?client=t&sl={src}&tl={dest}&hl={dest}&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&tk={token}&q={query}'
DETECT = 'https://translate.google.com/translate_a/single?client=t&sl=auto&tl=en&hl=en&dt=bd&ie=UTF-8&oe=UTF-8&tk={token}&q={query}'
TRANSLATE = 'https://translate.google.com/translate_a/single'

View File

@ -1,9 +1,27 @@
"""A conversion module for googletrans"""
from __future__ import print_function
import re
import traceback
import json
def build_params(query, src, dest, token):
params = {
'client': 't',
'sl': src,
'tl': dest,
'hl': dest,
'dt': ['at', 'bd', 'ex', 'ld', 'md', 'qca', 'rw', 'rm', 'ss', 't'],
'ie': 'UTF-8',
'oe': 'UTF-8',
'otf': 1,
'ssel': 0,
'tsel': 0,
'tk': token,
'q': query,
}
return params
def format_json(original):
# save state
states = []
@ -37,4 +55,4 @@ def format_json(original):
def rshift(val, n):
"""python port for '>>>'(right shift with padding)
"""
return (val % 0x100000000) >> n
return (val % 0x100000000) >> n