From 04a0e6dfbbeb7993681a8e2dd656aa40bcb0717b Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Thu, 1 Jun 2017 15:50:13 +0200 Subject: [PATCH] Support languages names and RFC 1766 codes (#26) * Update client.py Accept languages names as dest/src params in the `translate` routine. Move dest/src params check to the `translate` routine. * Update constants.py Add LANGCODES dict * Update __init__.py Import LANGCODES * Update test_client.py Add tests * Update constants.py Fix typo --- googletrans/__init__.py | 2 +- googletrans/client.py | 41 ++++++++++++++++++++++++---------------- googletrans/constants.py | 4 +++- tests/test_client.py | 10 ++++++++++ 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/googletrans/__init__.py b/googletrans/__init__.py index 353a9e0..9bb489f 100644 --- a/googletrans/__init__.py +++ b/googletrans/__init__.py @@ -4,4 +4,4 @@ __version__ = '2.1.3' from googletrans.client import Translator -from googletrans.constants import LANGUAGES +from googletrans.constants import LANGCODES, LANGUAGES diff --git a/googletrans/client.py b/googletrans/client.py index 56dc6d8..18339b6 100644 --- a/googletrans/client.py +++ b/googletrans/client.py @@ -10,7 +10,7 @@ import random from googletrans import urls, utils from googletrans.compat import PY3 from googletrans.gtoken import TokenAcquirer -from googletrans.constants import DEFAULT_USER_AGENT, LANGUAGES, SPECIAL_CASES +from googletrans.constants import DEFAULT_USER_AGENT, LANGCODES, LANGUAGES, SPECIAL_CASES from googletrans.models import Translated, Detected @@ -50,19 +50,7 @@ class Translator(object): return self.service_urls[0] return random.choice(self.service_urls) - def _translate(self, text, dest='en', src='auto'): - if src != 'auto': - if src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys(): - src = SPECIAL_CASES[src] - elif src not in LANGUAGES.keys(): - raise ValueError('invalid source language') - - if dest not in LANGUAGES.keys(): - if dest in SPECIAL_CASES.keys(): - dest = SPECIAL_CASES[dest] - else: - raise ValueError('invalid destination language') - + def _translate(self, text, dest, src): if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') @@ -82,11 +70,13 @@ class Translator(object): :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :param dest: The language to translate the source text into. - The value should be one of the language codes listed in :const:`googletrans.LANGUAGES`. + The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` + or one of the language names listed in :const:`googletrans.LANGCODES`. :param dest: :class:`str`; :class:`unicode` :param src: The language of the source text. - The value should be one of the language codes listed in :const:`googletrans.LANGUAGES`. + The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` + or one of the language names listed in :const:`googletrans.LANGCODES`. If a language is not specified, the system will attempt to identify the source language automatically. :param src: :class:`str`; :class:`unicode` @@ -112,6 +102,25 @@ class Translator(object): jumps over -> 이상 점프 the lazy dog -> 게으른 개 """ + dest = dest.lower().split('_', 1)[0] + src = src.lower().split('_', 1)[0] + + if src != 'auto' and src not in LANGUAGES: + if src in SPECIAL_CASES: + src = SPECIAL_CASES[src] + elif src in LANGCODES: + src = LANGCODES[src] + else: + raise ValueError('invalid source language') + + if dest not in LANGUAGES: + if dest in SPECIAL_CASES: + dest = SPECIAL_CASES[dest] + elif dest in LANGCODES: + dest = LANGCODES[dest] + else: + raise ValueError('invalid destination language') + if isinstance(text, list): result = [] for item in text: diff --git a/googletrans/constants.py b/googletrans/constants.py index 0fb9a8b..a598ae4 100644 --- a/googletrans/constants.py +++ b/googletrans/constants.py @@ -59,4 +59,6 @@ LANGUAGES = { 'vi': 'vietnamese', 'cy': 'welsh', 'yi': 'yiddish', - } \ No newline at end of file + } + +LANGCODES = dict(map(reversed, LANGUAGES.items())) diff --git a/tests/test_client.py b/tests/test_client.py index ccbfae6..a5116c9 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -27,6 +27,16 @@ def test_unicode(translator): assert result.text == u'こんにちは。' +def test_language_name(translator): + result = translator.translate(u'Hello', src='ENGLISH', dest='iRiSh') + assert result.text == u'Dia dhuit' + + +def test_language_rfc1766(translator): + result = translator.translate(u'luna', src='it_ch@euro', dest='en') + assert result.text == u'moon' + + def test_special_chars(translator): text = u"©×《》"