diff --git a/googletrans/client.py b/googletrans/client.py index da48fb2..da2ad16 100644 --- a/googletrans/client.py +++ b/googletrans/client.py @@ -4,23 +4,17 @@ A Translation module. You can translate text using this module. """ -import re import requests -import sys -from collections import namedtuple -from future.moves.urllib.parse import quote -from googletrans import __version__ -from googletrans import urls +from googletrans import urls, utils from googletrans.compat import PY3 +from googletrans.compat import unicode from googletrans.gtoken import TokenAcquirer -from googletrans.utils import format_json from googletrans.constants import DEFAULT_USER_AGENT, LANGUAGES, SPECIAL_CASES from googletrans.models import Translated, Detected -EXCLUDES = ['en', 'ca', 'fr'] -RE_SRC = re.compile(',\[\["([\w]{2})"\]') +EXCLUDES = ('en', 'ca', 'fr') class Translator(object): @@ -39,6 +33,27 @@ class Translator(object): except ImportError: # pragma: nocover pass + def _translate(self, text, dest='en', src='auto'): + if src != 'auto' and src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys(): + src = SPECIAL_CASES[src] + elif src != 'auto' and src not in LANGUAGES.keys(): + raise ValueError('invalid source language') + + if dest not in LANGUAGES.keys() and dest in SPECIAL_CASES.keys(): + dest = SPECIAL_CASES[dest] + elif dest not in LANGUAGES.keys(): + raise ValueError('invalid destination language') + + if not PY3 and isinstance(text, str): # pragma: nocover + text = text.decode('utf-8') + + token = self.token_acquirer.do(text) + params = utils.build_params(query=text, src=src, dest=dest, token=token) + r = self.session.get(urls.TRANSLATE, params=params) + + data = utils.format_json(r.text) + return data + def translate(self, text, dest='en', src='auto'): """ Translate the passed text into destination language. @@ -75,30 +90,8 @@ class Translator(object): result.append(translated) return result - if src != 'auto' and src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys(): - src = SPECIAL_CASES[src] - elif src != 'auto' and src not in LANGUAGES.keys(): - raise ValueError('invalid source language') - - if dest not in LANGUAGES.keys() and dest in SPECIAL_CASES.keys(): - dest = SPECIAL_CASES[dest] - elif dest not in LANGUAGES.keys(): - raise ValueError('invalid destination language') - - result = '' origin = text - token = self.token_acquirer.do(text) - text = quote(text) - url = urls.TRANSLATE.format(query=text, src=src, dest=dest, token=token) - r = self.session.get(url) - - """ - Response Sample (20150605) - $ ./translate "republique" -d ko - - [[["공화국","republique"],[,,"gonghwagug"]],,"fr",,,[["republique",1,[["공화국",1000,true,false],["공화국의",0,true,false],["공화국에",0,true,false],["공화국에서",0,true,false]],[[0,10]],"republique",0,1]],0.94949496,,[["fr"],,[0.94949496]],,,[["명사",[[["communauté","démocratie"],""]],"république"]]] - """ - data = format_json(r.text) + data = self._translate(text, dest, src) # this code will be updated when the format is changed. translated = data[0][0][0] @@ -107,13 +100,13 @@ class Translator(object): # src passed is equal to auto. try: src = data[-1][0][0] - except: # pragma: nocover + except Exception: # pragma: nocover pass pron = origin try: pron = data[0][1][-1] - except: # pragma: nocover + except Exception: # pragma: nocover pass if not PY3 and isinstance(pron, unicode) and isinstance(origin, str): # pragma: nocover origin = origin.decode('utf-8') @@ -122,13 +115,16 @@ class Translator(object): # for python 2.x compatbillity if not PY3: # pragma: nocover - if isinstance(src, str): src = src.decode('utf-8') - if isinstance(dest, str): dest = dest.decode('utf-8') - if isinstance(translated, str): translated = translated.decode('utf-8') + if isinstance(src, str): + src = src.decode('utf-8') + if isinstance(dest, str): + dest = dest.decode('utf-8') + if isinstance(translated, str): + translated = translated.decode('utf-8') # put final values into a new Translated object result = Translated(src=src, dest=dest, origin=origin, - text=translated, pronunciation=pron) + text=translated, pronunciation=pron) return result @@ -168,23 +164,17 @@ class Translator(object): result.append(lang) return result - result = '' - origin = text - token = self.token_acquirer.do(text) - text = quote(text) - url = urls.DETECT.format(query=text, token=token) - r = self.session.get(url) - data = format_json(r.text) + data = self._translate(text, dest='en', src='auto') # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. src = '' confidence = 0.0 try: - src = ''.join(data[-1][0]) - confidence = data[-1][-1][0] - except: # pragma: nocover + src = ''.join(data[8][0]) + confidence = data[8][-2][0] + except Exception: # pragma: nocover pass result = Detected(lang=src, confidence=confidence) - return result \ No newline at end of file + return result diff --git a/googletrans/urls.py b/googletrans/urls.py index 1094e3e..982c673 100644 --- a/googletrans/urls.py +++ b/googletrans/urls.py @@ -3,5 +3,4 @@ Predefined URLs used to make google translate requests. """ BASE = 'https://translate.google.com' -TRANSLATE = 'https://translate.google.com/translate_a/single?client=t&sl={src}&tl={dest}&hl={dest}&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&tk={token}&q={query}' -DETECT = 'https://translate.google.com/translate_a/single?client=t&sl=auto&tl=en&hl=en&dt=bd&ie=UTF-8&oe=UTF-8&tk={token}&q={query}' \ No newline at end of file +TRANSLATE = 'https://translate.google.com/translate_a/single' diff --git a/googletrans/utils.py b/googletrans/utils.py index b7c352d..dc315e3 100644 --- a/googletrans/utils.py +++ b/googletrans/utils.py @@ -1,9 +1,27 @@ """A conversion module for googletrans""" from __future__ import print_function import re -import traceback import json + +def build_params(query, src, dest, token): + params = { + 'client': 't', + 'sl': src, + 'tl': dest, + 'hl': dest, + 'dt': ['at', 'bd', 'ex', 'ld', 'md', 'qca', 'rw', 'rm', 'ss', 't'], + 'ie': 'UTF-8', + 'oe': 'UTF-8', + 'otf': 1, + 'ssel': 0, + 'tsel': 0, + 'tk': token, + 'q': query, + } + return params + + def format_json(original): # save state states = [] @@ -37,4 +55,4 @@ def format_json(original): def rshift(val, n): """python port for '>>>'(right shift with padding) """ - return (val % 0x100000000) >> n \ No newline at end of file + return (val % 0x100000000) >> n