Refactor client
This commit is contained in:
parent
aac87e5f29
commit
deb36364f7
@ -4,23 +4,17 @@ A Translation module.
|
|||||||
|
|
||||||
You can translate text using this module.
|
You can translate text using this module.
|
||||||
"""
|
"""
|
||||||
import re
|
|
||||||
import requests
|
import requests
|
||||||
import sys
|
|
||||||
from collections import namedtuple
|
|
||||||
from future.moves.urllib.parse import quote
|
|
||||||
|
|
||||||
from googletrans import __version__
|
from googletrans import urls, utils
|
||||||
from googletrans import urls
|
|
||||||
from googletrans.compat import PY3
|
from googletrans.compat import PY3
|
||||||
|
from googletrans.compat import unicode
|
||||||
from googletrans.gtoken import TokenAcquirer
|
from googletrans.gtoken import TokenAcquirer
|
||||||
from googletrans.utils import format_json
|
|
||||||
from googletrans.constants import DEFAULT_USER_AGENT, LANGUAGES, SPECIAL_CASES
|
from googletrans.constants import DEFAULT_USER_AGENT, LANGUAGES, SPECIAL_CASES
|
||||||
from googletrans.models import Translated, Detected
|
from googletrans.models import Translated, Detected
|
||||||
|
|
||||||
|
|
||||||
EXCLUDES = ['en', 'ca', 'fr']
|
EXCLUDES = ('en', 'ca', 'fr')
|
||||||
RE_SRC = re.compile(',\[\["([\w]{2})"\]')
|
|
||||||
|
|
||||||
|
|
||||||
class Translator(object):
|
class Translator(object):
|
||||||
@ -39,6 +33,27 @@ class Translator(object):
|
|||||||
except ImportError: # pragma: nocover
|
except ImportError: # pragma: nocover
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _translate(self, text, dest='en', src='auto'):
|
||||||
|
if src != 'auto' and src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys():
|
||||||
|
src = SPECIAL_CASES[src]
|
||||||
|
elif src != 'auto' and src not in LANGUAGES.keys():
|
||||||
|
raise ValueError('invalid source language')
|
||||||
|
|
||||||
|
if dest not in LANGUAGES.keys() and dest in SPECIAL_CASES.keys():
|
||||||
|
dest = SPECIAL_CASES[dest]
|
||||||
|
elif dest not in LANGUAGES.keys():
|
||||||
|
raise ValueError('invalid destination language')
|
||||||
|
|
||||||
|
if not PY3 and isinstance(text, str): # pragma: nocover
|
||||||
|
text = text.decode('utf-8')
|
||||||
|
|
||||||
|
token = self.token_acquirer.do(text)
|
||||||
|
params = utils.build_params(query=text, src=src, dest=dest, token=token)
|
||||||
|
r = self.session.get(urls.TRANSLATE, params=params)
|
||||||
|
|
||||||
|
data = utils.format_json(r.text)
|
||||||
|
return data
|
||||||
|
|
||||||
def translate(self, text, dest='en', src='auto'):
|
def translate(self, text, dest='en', src='auto'):
|
||||||
"""
|
"""
|
||||||
Translate the passed text into destination language.
|
Translate the passed text into destination language.
|
||||||
@ -75,30 +90,8 @@ class Translator(object):
|
|||||||
result.append(translated)
|
result.append(translated)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
if src != 'auto' and src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys():
|
|
||||||
src = SPECIAL_CASES[src]
|
|
||||||
elif src != 'auto' and src not in LANGUAGES.keys():
|
|
||||||
raise ValueError('invalid source language')
|
|
||||||
|
|
||||||
if dest not in LANGUAGES.keys() and dest in SPECIAL_CASES.keys():
|
|
||||||
dest = SPECIAL_CASES[dest]
|
|
||||||
elif dest not in LANGUAGES.keys():
|
|
||||||
raise ValueError('invalid destination language')
|
|
||||||
|
|
||||||
result = ''
|
|
||||||
origin = text
|
origin = text
|
||||||
token = self.token_acquirer.do(text)
|
data = self._translate(text, dest, src)
|
||||||
text = quote(text)
|
|
||||||
url = urls.TRANSLATE.format(query=text, src=src, dest=dest, token=token)
|
|
||||||
r = self.session.get(url)
|
|
||||||
|
|
||||||
"""
|
|
||||||
Response Sample (20150605)
|
|
||||||
$ ./translate "republique" -d ko
|
|
||||||
|
|
||||||
[[["공화국","republique"],[,,"gonghwagug"]],,"fr",,,[["republique",1,[["공화국",1000,true,false],["공화국의",0,true,false],["공화국에",0,true,false],["공화국에서",0,true,false]],[[0,10]],"republique",0,1]],0.94949496,,[["fr"],,[0.94949496]],,,[["명사",[[["communauté","démocratie"],""]],"république"]]]
|
|
||||||
"""
|
|
||||||
data = format_json(r.text)
|
|
||||||
|
|
||||||
# this code will be updated when the format is changed.
|
# this code will be updated when the format is changed.
|
||||||
translated = data[0][0][0]
|
translated = data[0][0][0]
|
||||||
@ -107,13 +100,13 @@ class Translator(object):
|
|||||||
# src passed is equal to auto.
|
# src passed is equal to auto.
|
||||||
try:
|
try:
|
||||||
src = data[-1][0][0]
|
src = data[-1][0][0]
|
||||||
except: # pragma: nocover
|
except Exception: # pragma: nocover
|
||||||
pass
|
pass
|
||||||
|
|
||||||
pron = origin
|
pron = origin
|
||||||
try:
|
try:
|
||||||
pron = data[0][1][-1]
|
pron = data[0][1][-1]
|
||||||
except: # pragma: nocover
|
except Exception: # pragma: nocover
|
||||||
pass
|
pass
|
||||||
if not PY3 and isinstance(pron, unicode) and isinstance(origin, str): # pragma: nocover
|
if not PY3 and isinstance(pron, unicode) and isinstance(origin, str): # pragma: nocover
|
||||||
origin = origin.decode('utf-8')
|
origin = origin.decode('utf-8')
|
||||||
@ -122,13 +115,16 @@ class Translator(object):
|
|||||||
|
|
||||||
# for python 2.x compatbillity
|
# for python 2.x compatbillity
|
||||||
if not PY3: # pragma: nocover
|
if not PY3: # pragma: nocover
|
||||||
if isinstance(src, str): src = src.decode('utf-8')
|
if isinstance(src, str):
|
||||||
if isinstance(dest, str): dest = dest.decode('utf-8')
|
src = src.decode('utf-8')
|
||||||
if isinstance(translated, str): translated = translated.decode('utf-8')
|
if isinstance(dest, str):
|
||||||
|
dest = dest.decode('utf-8')
|
||||||
|
if isinstance(translated, str):
|
||||||
|
translated = translated.decode('utf-8')
|
||||||
|
|
||||||
# put final values into a new Translated object
|
# put final values into a new Translated object
|
||||||
result = Translated(src=src, dest=dest, origin=origin,
|
result = Translated(src=src, dest=dest, origin=origin,
|
||||||
text=translated, pronunciation=pron)
|
text=translated, pronunciation=pron)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -168,23 +164,17 @@ class Translator(object):
|
|||||||
result.append(lang)
|
result.append(lang)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
result = ''
|
data = self._translate(text, dest='en', src='auto')
|
||||||
origin = text
|
|
||||||
token = self.token_acquirer.do(text)
|
|
||||||
text = quote(text)
|
|
||||||
url = urls.DETECT.format(query=text, token=token)
|
|
||||||
r = self.session.get(url)
|
|
||||||
data = format_json(r.text)
|
|
||||||
|
|
||||||
# actual source language that will be recognized by Google Translator when the
|
# actual source language that will be recognized by Google Translator when the
|
||||||
# src passed is equal to auto.
|
# src passed is equal to auto.
|
||||||
src = ''
|
src = ''
|
||||||
confidence = 0.0
|
confidence = 0.0
|
||||||
try:
|
try:
|
||||||
src = ''.join(data[-1][0])
|
src = ''.join(data[8][0])
|
||||||
confidence = data[-1][-1][0]
|
confidence = data[8][-2][0]
|
||||||
except: # pragma: nocover
|
except Exception: # pragma: nocover
|
||||||
pass
|
pass
|
||||||
result = Detected(lang=src, confidence=confidence)
|
result = Detected(lang=src, confidence=confidence)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
@ -3,5 +3,4 @@
|
|||||||
Predefined URLs used to make google translate requests.
|
Predefined URLs used to make google translate requests.
|
||||||
"""
|
"""
|
||||||
BASE = 'https://translate.google.com'
|
BASE = 'https://translate.google.com'
|
||||||
TRANSLATE = 'https://translate.google.com/translate_a/single?client=t&sl={src}&tl={dest}&hl={dest}&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&tk={token}&q={query}'
|
TRANSLATE = 'https://translate.google.com/translate_a/single'
|
||||||
DETECT = 'https://translate.google.com/translate_a/single?client=t&sl=auto&tl=en&hl=en&dt=bd&ie=UTF-8&oe=UTF-8&tk={token}&q={query}'
|
|
||||||
|
@ -1,9 +1,27 @@
|
|||||||
"""A conversion module for googletrans"""
|
"""A conversion module for googletrans"""
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
import re
|
import re
|
||||||
import traceback
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def build_params(query, src, dest, token):
|
||||||
|
params = {
|
||||||
|
'client': 't',
|
||||||
|
'sl': src,
|
||||||
|
'tl': dest,
|
||||||
|
'hl': dest,
|
||||||
|
'dt': ['at', 'bd', 'ex', 'ld', 'md', 'qca', 'rw', 'rm', 'ss', 't'],
|
||||||
|
'ie': 'UTF-8',
|
||||||
|
'oe': 'UTF-8',
|
||||||
|
'otf': 1,
|
||||||
|
'ssel': 0,
|
||||||
|
'tsel': 0,
|
||||||
|
'tk': token,
|
||||||
|
'q': query,
|
||||||
|
}
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
def format_json(original):
|
def format_json(original):
|
||||||
# save state
|
# save state
|
||||||
states = []
|
states = []
|
||||||
@ -37,4 +55,4 @@ def format_json(original):
|
|||||||
def rshift(val, n):
|
def rshift(val, n):
|
||||||
"""python port for '>>>'(right shift with padding)
|
"""python port for '>>>'(right shift with padding)
|
||||||
"""
|
"""
|
||||||
return (val % 0x100000000) >> n
|
return (val % 0x100000000) >> n
|
||||||
|
Loading…
Reference in New Issue
Block a user