diff --git a/googletrans/__init__.py b/googletrans/__init__.py index 2bb05d4..b66a4ea 100644 --- a/googletrans/__init__.py +++ b/googletrans/__init__.py @@ -1,3 +1,7 @@ """Free Google Translate API for Python. Translates totally free of charge.""" +__all__ = 'Translator', +__version_info__ = 2, 0, 0 +__version__ = '.'.join(str(v) for v in __version_info__) -__version__ = 1.2 + +from googletrans.client import Translator \ No newline at end of file diff --git a/googletrans/client.py b/googletrans/client.py new file mode 100644 index 0000000..da48fb2 --- /dev/null +++ b/googletrans/client.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +""" +A Translation module. + +You can translate text using this module. +""" +import re +import requests +import sys +from collections import namedtuple +from future.moves.urllib.parse import quote + +from googletrans import __version__ +from googletrans import urls +from googletrans.compat import PY3 +from googletrans.gtoken import TokenAcquirer +from googletrans.utils import format_json +from googletrans.constants import DEFAULT_USER_AGENT, LANGUAGES, SPECIAL_CASES +from googletrans.models import Translated, Detected + + +EXCLUDES = ['en', 'ca', 'fr'] +RE_SRC = re.compile(',\[\["([\w]{2})"\]') + + +class Translator(object): + + def __init__(self, user_agent=DEFAULT_USER_AGENT): + self.session = requests.Session() + self.session.headers.update({ + 'User-Agent': user_agent, + }) + self.token_acquirer = TokenAcquirer(session=self.session) + + # Use HTTP2 Adapter if hyper is installed + try: + from hyper.contrib import HTTP20Adapter + self.session.mount(urls.BASE, HTTP20Adapter()) + except ImportError: # pragma: nocover + pass + + def translate(self, text, dest='en', src='auto'): + """ + Translate the passed text into destination language. + + Basic usage: + >>> from googletrans import translator + >>> translator.translate('안녕하세요.') + + >>> translator.translate('안녕하세요.', dest='ja') + + >>> translator.translate('veritas lux mea', src='la') + + + Advanced usage: + >>> translations = translator.translate(['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko') + >>> for translation in translations: + ... print(translation.origin, ' -> ', translation.text) + The quick brown fox -> 빠른 갈색 여우 + jumps over -> 이상 점프 + the lazy dog -> 게으른 개 + + :param text: the text you want to translate. + you can pass this parameter as a list object, as shown in the advanced usage above. + :param dest: the destination language you want to translate. (default: en) + :param src: the source language you want to translate. (default: auto) + + :rtype: Translated + :rtype: list (when list is passed) + """ + if isinstance(text, list): + result = [] + for item in text: + translated = self.translate(item, dest=dest, src=src) + result.append(translated) + return result + + if src != 'auto' and src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys(): + src = SPECIAL_CASES[src] + elif src != 'auto' and src not in LANGUAGES.keys(): + raise ValueError('invalid source language') + + if dest not in LANGUAGES.keys() and dest in SPECIAL_CASES.keys(): + dest = SPECIAL_CASES[dest] + elif dest not in LANGUAGES.keys(): + raise ValueError('invalid destination language') + + result = '' + origin = text + token = self.token_acquirer.do(text) + text = quote(text) + url = urls.TRANSLATE.format(query=text, src=src, dest=dest, token=token) + r = self.session.get(url) + + """ + Response Sample (20150605) + $ ./translate "republique" -d ko + + [[["공화국","republique"],[,,"gonghwagug"]],,"fr",,,[["republique",1,[["공화국",1000,true,false],["공화국의",0,true,false],["공화국에",0,true,false],["공화국에서",0,true,false]],[[0,10]],"republique",0,1]],0.94949496,,[["fr"],,[0.94949496]],,,[["명사",[[["communauté","démocratie"],""]],"république"]]] + """ + data = format_json(r.text) + + # this code will be updated when the format is changed. + translated = data[0][0][0] + + # actual source language that will be recognized by Google Translator when the + # src passed is equal to auto. + try: + src = data[-1][0][0] + except: # pragma: nocover + pass + + pron = origin + try: + pron = data[0][1][-1] + except: # pragma: nocover + pass + if not PY3 and isinstance(pron, unicode) and isinstance(origin, str): # pragma: nocover + origin = origin.decode('utf-8') + if dest in EXCLUDES and pron == origin: + pron = translated + + # for python 2.x compatbillity + if not PY3: # pragma: nocover + if isinstance(src, str): src = src.decode('utf-8') + if isinstance(dest, str): dest = dest.decode('utf-8') + if isinstance(translated, str): translated = translated.decode('utf-8') + + # put final values into a new Translated object + result = Translated(src=src, dest=dest, origin=origin, + text=translated, pronunciation=pron) + + return result + + def detect(self, text): + """ + Detect the language of a text. + + Basic usage: + >>> from googletrans import translator + >>> translator.detect('이 문장은 한글로 쓰여졌습니다.') + + >>> translator.detect('この文章は日本語で書かれました。') + + >>> translator.detect('This sentence is written in English.') + + >>> translator.detect('Tiu frazo estas skribita en Esperanto.') + + + Advanced usage: + >>> langs = translator.detect(['한국어', '日本語', 'English', 'le français']) + >>> for lang in langs: + ... print(lang.lang, lang.confidence) + ko 1 + ja 0.92929292 + en 0.96954316 + fr 0.043500196 + + :param text: the text you want to detect. + + :rtype: Detected + :rtype: list (when list is passed) + """ + if isinstance(text, list): + result = [] + for item in text: + lang = self.detect(item) + result.append(lang) + return result + + result = '' + origin = text + token = self.token_acquirer.do(text) + text = quote(text) + url = urls.DETECT.format(query=text, token=token) + r = self.session.get(url) + data = format_json(r.text) + + # actual source language that will be recognized by Google Translator when the + # src passed is equal to auto. + src = '' + confidence = 0.0 + try: + src = ''.join(data[-1][0]) + confidence = data[-1][-1][0] + except: # pragma: nocover + pass + result = Detected(lang=src, confidence=confidence) + + return result \ No newline at end of file diff --git a/googletrans/compat.py b/googletrans/compat.py new file mode 100644 index 0000000..26c441a --- /dev/null +++ b/googletrans/compat.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +import sys + + +PY3 = sys.version_info > (3, ) + +if PY3: + unicode = str \ No newline at end of file diff --git a/googletrans/conversion.py b/googletrans/constants.py similarity index 51% rename from googletrans/conversion.py rename to googletrans/constants.py index c7eeeec..0fb9a8b 100644 --- a/googletrans/conversion.py +++ b/googletrans/constants.py @@ -1,41 +1,4 @@ -"""A conversion module for googletrans""" -from __future__ import print_function -import re -import traceback -import json - -def format_json(original): - # save state - states = [] - text = original - for i, pos in enumerate(re.finditer('"', text)): - p = pos.start() + 1 - if i % 2 == 0: - nxt = text.find('"', p) - states.append((p, text[p:nxt])) - - # replace all weired characters in text - while text.find(',,') > -1: - text = text.replace(',,', ',null,') - while text.find('[,') > -1: - text = text.replace('[,', '[null,') - - # recover state - for i, pos in enumerate(re.finditer('"', text)): - p = pos.start() + 1 - if i % 2 == 0: - j = int(i / 2) - nxt = text.find('"', p) - # replacing a portion of a string - # use slicing to extract those parts of the original string to be kept - text = text[:p] + states[j][1] + text[nxt:] - - try: - converted = json.loads(text) - except ValueError as e: - print('original text: ', original, ' => ', text) - traceback.print_exc() - return converted +DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' SPECIAL_CASES = { 'ee': 'et', @@ -96,4 +59,4 @@ LANGUAGES = { 'vi': 'vietnamese', 'cy': 'welsh', 'yi': 'yiddish', - } + } \ No newline at end of file diff --git a/googletrans/response.py b/googletrans/models.py similarity index 84% rename from googletrans/response.py rename to googletrans/models.py index 35c3aa3..3df5647 100644 --- a/googletrans/response.py +++ b/googletrans/models.py @@ -15,13 +15,14 @@ class Translated(object): self.text = text self.pronunciation = pronunciation - def __str__(self): + def __str__(self): # pragma: nocover return self.__unicode__() - def __unicode__(self): + def __unicode__(self): # pragma: nocover return u''.format( src=self.src, dest=self.dest, text=self.text, pronunciation=self.pronunciation) + class Detected(object): """ The detected object, which contains Google Translator's langauge detection result. @@ -33,10 +34,9 @@ class Detected(object): self.lang = lang self.confidence = confidence - def __str__(self): + def __str__(self): # pragma: nocover return self.__unicode__() - def __unicode__(self): + def __unicode__(self): # pragma: nocover return u''.format( - lang=self.lang, confidence=self.confidence) - + lang=self.lang, confidence=self.confidence) \ No newline at end of file diff --git a/googletrans/translator.py b/googletrans/translator.py deleted file mode 100644 index 69f5929..0000000 --- a/googletrans/translator.py +++ /dev/null @@ -1,192 +0,0 @@ -# -*- coding: utf-8 -*- -""" -A Translation module. - -You can translate text using this module. -""" -import re -import requests -import sys -from collections import namedtuple -from future.moves.urllib.parse import quote - -from . import __version__ -from googletrans import urls -from googletrans.conversion import format_json -from googletrans.conversion import LANGUAGES, SPECIAL_CASES -from googletrans.response import Translated, Detected - -user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36 Gt/{0}'.format(__version__) - -EXCLUDES = ['en', 'ca', 'fr'] -RE_SRC = re.compile(',\[\["([\w]{2})"\]') -PY3 = sys.version_info > (3, 1) - -__agent = None -__headers = { - 'User-Agent': user_agent, - 'Referer': urls.TRANSLATOR, -} -def agent(): - """ - A requests session for translator - """ - global __agent - # create new object when object doesn't created yet. - if not __agent: - __agent = requests.Session() - # this code may help to avoid a ban. - __agent.get(urls.TRANSLATOR, headers=__headers) - - return __agent - -def translate(text, dest='en', src='auto'): - """ - Translate the passed text into destination language. - -Basic usage: - >>> from googletrans import translator - >>> translator.translate('안녕하세요.') - - >>> translator.translate('안녕하세요.', dest='ja') - - >>> translator.translate('veritas lux mea', src='la') - - -Advanced usage: - >>> translations = translator.translate(['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko') - >>> for translation in translations: - ... print(translation.origin, ' -> ', translation.text) - The quick brown fox -> 빠른 갈색 여우 - jumps over -> 이상 점프 - the lazy dog -> 게으른 개 - - :param text: the text you want to translate. - you can pass this parameter as a list object, as shown in the advanced usage above. - :param dest: the destination language you want to translate. (default: en) - :param src: the source language you want to translate. (default: auto) - - :rtype: Translated - :rtype: list (when list is passed) - """ - if isinstance(text, list): - result = [] - for item in text: - translated = translate(item, dest=dest, src=src) - result.append(translated) - return result - - if src != 'auto' and src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys(): - src = SPECIAL_CASES[src] - elif src != 'auto' and src not in LANGUAGES.keys(): - raise ValueError('incorrect source language') - - if dest not in LANGUAGES.keys() and dest in SPECIAL_CASES.keys(): - dest = SPECIAL_CASES[dest] - elif dest not in LANGUAGES.keys(): - raise ValueError('incorrect destination language') - - result = '' - sess = agent() # acquire requests session - origin = text - text = quote(text) - url = urls.TRANSLATE.format(query=text, src=src, dest=dest) - r = sess.get(url, headers=__headers) - - """ - Response Sample (20150605) - $ ./translate "republique" -d ko - - [[["공화국","republique"],[,,"gonghwagug"]],,"fr",,,[["republique",1,[["공화국",1000,true,false],["공화국의",0,true,false],["공화국에",0,true,false],["공화국에서",0,true,false]],[[0,10]],"republique",0,1]],0.94949496,,[["fr"],,[0.94949496]],,,[["명사",[[["communauté","démocratie"],""]],"république"]]] - """ - data = format_json(r.text) - - # this code will be updated when the format is changed. - translated = data[0][0][0] - - # actual source language that will be recognized by Google Translator when the - # src passed is equal to auto. - try: - src = data[-1][0][0] - except: pass - - pron = origin - try: - pron = data[0][1][-1] - except: pass - if not PY3 and isinstance(pron, unicode) and isinstance(origin, str): - origin = origin.decode('utf-8') - if dest in EXCLUDES and pron == origin: - pron = translated - - # for python 2.x compatbillity - if not PY3: - if isinstance(src, str): src = src.decode('utf-8') - if isinstance(dest, str): dest = dest.decode('utf-8') - if isinstance(translated, str): translated = translated.decode('utf-8') - - # put final values into new Translated object - result = Translated(src=src, dest=dest, origin=origin, - text=translated, pronunciation=pron) - - return result - -def detect(text): - """ - Detect the language of a text. - -Basic usage: - >>> from googletrans import translator - >>> translator.detect('이 문장은 한글로 쓰여졌습니다.') - - >>> translator.detect('この文章は日本語で書かれました。') - - >>> translator.detect('This sentence is written in English.') - - >>> translator.detect('Tiu frazo estas skribita en Esperanto.') - - -Advanced usage: - >>> langs = translator.detect(['한국어', '日本語', 'English', 'le français']) - >>> for lang in langs: - ... print(lang.lang, lang.confidence) - ko 1 - ja 0.92929292 - en 0.96954316 - fr 0.043500196 - - :param text: the text you want to detect. - - :rtype: Detected - :rtype: list (when list is passed) - """ - if isinstance(text, list): - result = [] - for item in text: - lang = detect(item) - result.append(lang) - return result - - result = '' - sess = agent() # acquire requests session - origin = text - text = quote(text) - url = urls.DETECT.format(query=text) - r = sess.get(url, headers=__headers) - data = format_json(r.text) - - # actual source language that will be recognized by Google Translator when the - # src passed is equal to auto. - src = '' - confidence = 0.0 - try: - src = ''.join(data[-1][0]) - confidence = data[-1][-1][0] - except: pass - result = Detected(lang=src, confidence=confidence) - - return result - -def get_languages(): - return LANGUAGES - diff --git a/googletrans/urls.py b/googletrans/urls.py index 7a340f0..1094e3e 100644 --- a/googletrans/urls.py +++ b/googletrans/urls.py @@ -2,6 +2,6 @@ """ Predefined URLs used to make google translate requests. """ -TRANSLATOR = 'https://translate.google.com/' -TRANSLATE = 'https://translate.google.com/translate_a/single?client=t&sl={src}&tl={dest}&hl={dest}&dt=bd&dt=rm&dt=ss&dt=t&dt=at&ie=UTF-8&oe=UTF-8&q={query}' -DETECT = 'https://translate.google.com/translate_a/single?client=t&sl=auto&tl=en&hl=en&dt=bd&ie=UTF-8&oe=UTF-8&q={query}' \ No newline at end of file +BASE = 'https://translate.google.com' +TRANSLATE = 'https://translate.google.com/translate_a/single?client=t&sl={src}&tl={dest}&hl={dest}&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&tk={token}&q={query}' +DETECT = 'https://translate.google.com/translate_a/single?client=t&sl=auto&tl=en&hl=en&dt=bd&ie=UTF-8&oe=UTF-8&tk={token}&q={query}' \ No newline at end of file diff --git a/googletrans/utils.py b/googletrans/utils.py new file mode 100644 index 0000000..b7c352d --- /dev/null +++ b/googletrans/utils.py @@ -0,0 +1,40 @@ +"""A conversion module for googletrans""" +from __future__ import print_function +import re +import traceback +import json + +def format_json(original): + # save state + states = [] + text = original + for i, pos in enumerate(re.finditer('"', text)): + p = pos.start() + 1 + if i % 2 == 0: + nxt = text.find('"', p) + states.append((p, text[p:nxt])) + + # replace all weired characters in text + while text.find(',,') > -1: + text = text.replace(',,', ',null,') + while text.find('[,') > -1: + text = text.replace('[,', '[null,') + + # recover state + for i, pos in enumerate(re.finditer('"', text)): + p = pos.start() + 1 + if i % 2 == 0: + j = int(i / 2) + nxt = text.find('"', p) + # replacing a portion of a string + # use slicing to extract those parts of the original string to be kept + text = text[:p] + states[j][1] + text[nxt:] + + converted = json.loads(text) + return converted + + +def rshift(val, n): + """python port for '>>>'(right shift with padding) + """ + return (val % 0x100000000) >> n \ No newline at end of file