From 15549dcdf8c89b3c0158360b48d29c2f7774da14 Mon Sep 17 00:00:00 2001 From: ssut Date: Mon, 2 Oct 2017 23:26:34 +0900 Subject: [PATCH] Refactor googletrans to work properly (#40) * Fix for invalid token array of the text * Remove unused compacted-json decoder because Google apparently now does not send like that before. This commit refers the issue #37. * Remove a test case for multiline text This is a temporary fix: will be reverted after mocking tests. * Add pipenv supports * Update README --- Pipfile | 24 +++++ Pipfile.lock | 216 ++++++++++++++++++++++++++++++++++++++++++ README.rst | 15 +-- googletrans/gtoken.py | 2 +- googletrans/utils.py | 13 ++- tests/test_client.py | 13 --- tests/test_utils.py | 6 +- 7 files changed, 265 insertions(+), 24 deletions(-) create mode 100644 Pipfile create mode 100644 Pipfile.lock diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..6d7d44a --- /dev/null +++ b/Pipfile @@ -0,0 +1,24 @@ +[[source]] +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +requests = "==2.13.0" + + + + + + + +[dev-packages] + +future = "*" +coveralls = "*" +"pytest-watch" = "*" +"pytest-testmon" = "*" + + +[requires] +python_version = "3.6" \ No newline at end of file diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..2432da0 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,216 @@ +{ + "_meta": { + "hash": { + "sha256": "d324823904cf4298321e5b976f1787c3c9b73eb94ab413e96227109c0efcd33d" + }, + "host-environment-markers": { + "implementation_name": "cpython", + "implementation_version": "3.6.2", + "os_name": "posix", + "platform_machine": "x86_64", + "platform_python_implementation": "CPython", + "platform_release": "4.4.0-91-generic", + "platform_system": "Linux", + "platform_version": "#114-Ubuntu SMP Tue Aug 8 11:56:56 UTC 2017", + "python_full_version": "3.6.2", + "python_version": "3.6", + "sys_platform": "linux" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.6" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "requests": { + "hashes": [ + "sha256:1a720e8862a41aa22e339373b526f508ef0c8988baf48b84d3fc891a8e237efb", + "sha256:5722cd09762faa01276230270ff16af7acf7c5c45d623868d9ba116f15791ce8" + ], + "version": "==2.13.0" + } + }, + "develop": { + "argh": { + "hashes": [ + "sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3", + "sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65" + ], + "version": "==0.26.2" + }, + "certifi": { + "hashes": [ + "sha256:54a07c09c586b0e4c619f02a5e94e36619da8e2b053e20f594348c0611803704", + "sha256:40523d2efb60523e113b44602298f0960e900388cf3bb6043f645cf57ea9e3f5" + ], + "version": "==2017.7.27.1" + }, + "chardet": { + "hashes": [ + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691", + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae" + ], + "version": "==3.0.4" + }, + "colorama": { + "hashes": [ + "sha256:463f8483208e921368c9f306094eb6f725c6ca42b0f97e313cb5d5512459feda", + "sha256:48eb22f4f8461b1df5734a074b57042430fb06e1d61bd1e11b078c0fe6d7a1f1" + ], + "version": "==0.3.9" + }, + "coverage": { + "hashes": [ + "sha256:c1456f66c536010cf9e4633a8853a9153e8fd588393695295afd4d0fc16c1d74", + "sha256:97a7ec51cdde3a386e390b159b20f247ccb478084d925c75f1faa3d26c01335e", + "sha256:83e955b975666b5a07d217135e7797857ce844eb340a99e46cc25525120417c4", + "sha256:483ed14080c5301048128bb027b77978c632dd9e92e3ecb09b7e28f5b92abfcf", + "sha256:ef574ab9640bcfa2f3c671831faf03f65788945fdf8efa4d4a1fffc034838e2a", + "sha256:c5a205b4da3c624f5119dc4d84240789b5906bb8468902ec22dcc4aad8aa4638", + "sha256:5dea90ed140e7fa9bc00463313f9bc4a6e6aff297b4969615e7a688615c4c4d2", + "sha256:f9e83b39d29c2815a38e4118d776b482d4082b5bf9c9147fbc99a3f83abe480a", + "sha256:700040c354f0230287906b1276635552a3def4b646e0145555bc9e2e5da9e365", + "sha256:7f1eacae700c66c3d7362a433b228599c9d94a5a3a52613dddd9474e04deb6bc", + "sha256:13ef9f799c8fb45c446a239df68034de3a6f3de274881b088bebd7f5661f79f8", + "sha256:dfb011587e2b7299112f08a2a60d2601706aac9abde37aa1177ea825adaed923", + "sha256:381be5d31d3f0d912334cf2c159bc7bea6bfe6b0e3df6061a3bf2bf88359b1f6", + "sha256:83a477ac4f55a6ef59552683a0544d47b68a85ce6a80fd0ca6b3dc767f6495fb", + "sha256:dfd35f1979da31bcabbe27bcf78d4284d69870731874af629082590023a77336", + "sha256:9681efc2d310cfc53863cc6f63e88ebe7a48124550fa822147996cb09390b6ab", + "sha256:53770b20ac5b4a12e99229d4bae57af0945be87cc257fce6c6c7571a39f0c5d4", + "sha256:8801880d32f11b6df11c32a961e186774b4634ae39d7c43235f5a24368a85f07", + "sha256:16db2c69a1acbcb3c13211e9f954e22b22a729909d81f983b6b9badacc466eda", + "sha256:ef43a06a960b46c73c018704051e023ee6082030f145841ffafc8728039d5a88", + "sha256:c3e2736664a6074fc9bd54fb643f5af0fc60bfedb2963b3d3f98c7450335e34c", + "sha256:17709e22e4c9f5412ba90f446fb13b245cc20bf4a60377021bbff6c0f1f63e7c", + "sha256:a2f7106d1167825c4115794c2ba57cc3b15feb6183db5328fa66f94c12902d8b", + "sha256:2a08e978f402696c6956eee9d1b7e95d3ad042959b71bafe1f3e4557cbd6e0ac", + "sha256:57f510bb16efaec0b6f371b64a8000c62e7e3b3e48e8b0a5745ade078d849814", + "sha256:0f1883eab9c19aa243f51308751b8a2a547b9b817b721cc0ecf3efb99fafbea7", + "sha256:e00fe141e22ce6e9395aa24d862039eb180c6b7e89df0bbaf9765e9aebe560a9", + "sha256:ec596e4401553caa6dd2e3349ce47f9ef82c1f1bcba5d8ac3342724f0df8d6ff", + "sha256:c820a533a943ebc860acc0ce6a00dd36e0fdf2c6f619ff8225755169428c5fa2", + "sha256:b7f7283eb7badd2b8a9c6a9d6eeca200a0a24db6be79baee2c11398f978edcaa", + "sha256:a5ed27ad3e8420b2d6b625dcbd3e59488c14ccc06030167bcf14ffb0f4189b77", + "sha256:d7b70b7b4eb14d0753d33253fe4f121ca99102612e2719f0993607deb30c6f33", + "sha256:4047dc83773869701bde934fb3c4792648eda7c0e008a77a0aec64157d246801", + "sha256:7a9c44400ee0f3b4546066e0710e1250fd75831adc02ab99dda176ad8726f424", + "sha256:0f649e68db74b1b5b8ca4161d08eb2b8fa8ae11af1ebfb80e80e112eb0ef5300", + "sha256:52964fae0fafef8bd283ad8e9a9665205a9fdf912535434defc0ec3def1da26b", + "sha256:36aa6c8db83bc27346ddcd8c2a60846a7178ecd702672689d3ea1828eb1a4d11", + "sha256:9824e15b387d331c0fc0fef905a539ab69784368a1d6ac3db864b4182e520948", + "sha256:4a678e1b9619a29c51301af61ab84122e2f8cc7a0a6b40854b808ac6be604300", + "sha256:8bb7c8dca54109b61013bc4114d96effbf10dea136722c586bce3a5d9fc4e730", + "sha256:1a41d621aa9b6ab6457b557a754d50aaff0813fad3453434de075496fca8a183", + "sha256:0fa423599fc3d9e18177f913552cdb34a8d9ad33efcf52a98c9d4b644edb42c5", + "sha256:e61a4ba0b2686040cb4828297c7e37bcaf3a1a1c0bc0dbe46cc789dde51a80fa", + "sha256:ce9ef0fc99d11d418662e36fd8de6d71b19ec87c2eab961a117cc9d087576e72" + ], + "version": "==4.4.1" + }, + "coveralls": { + "hashes": [ + "sha256:84dd8c88c5754e8db70a682f537e2781366064aa3cdd6b24c2dcecbd3181187c", + "sha256:510682001517bcca1def9f6252df6ce730fcb9831c62d9fff7c7d55b6fdabdf3" + ], + "version": "==1.2.0" + }, + "docopt": { + "hashes": [ + "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" + ], + "version": "==0.6.2" + }, + "future": { + "hashes": [ + "sha256:e39ced1ab767b5936646cedba8bcce582398233d6a627067d4c6a454c90cfedb" + ], + "version": "==0.16.0" + }, + "idna": { + "hashes": [ + "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4", + "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f" + ], + "version": "==2.6" + }, + "pathtools": { + "hashes": [ + "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0" + ], + "version": "==0.1.2" + }, + "py": { + "hashes": [ + "sha256:2ccb79b01769d99115aa600d7eed99f524bf752bba8f041dc1c184853514655a", + "sha256:0f2d585d22050e90c7d293b6451c83db097df77871974d90efd5a30dc12fcde3" + ], + "version": "==1.4.34" + }, + "pytest": { + "hashes": [ + "sha256:b84f554f8ddc23add65c411bf112b2d88e2489fd45f753b1cae5936358bdf314", + "sha256:f46e49e0340a532764991c498244a60e3a37d7424a532b3ff1a6a7653f1a403a" + ], + "version": "==3.2.2" + }, + "pytest-testmon": { + "hashes": [ + "sha256:a8e71249b53955030c98f986abaa6d9378e97606ee8c9606d4ad6c17cff8d4de" + ], + "version": "==0.9.6" + }, + "pytest-watch": { + "hashes": [ + "sha256:29941f6ff74e6d85cc0796434a5cbc27ebe51e91ed24fd0757fad5cc6fd3d491" + ], + "version": "==4.1.0" + }, + "pyyaml": { + "hashes": [ + "sha256:3262c96a1ca437e7e4763e2843746588a965426550f3797a79fca9c6199c431f", + "sha256:16b20e970597e051997d90dc2cddc713a2876c47e3d92d59ee198700c5427736", + "sha256:e863072cdf4c72eebf179342c94e6989c67185842d9997960b3e69290b2fa269", + "sha256:bc6bced57f826ca7cb5125a10b23fd0f2fff3b7c4701d64c439a300ce665fff8", + "sha256:c01b880ec30b5a6e6aa67b09a2fe3fb30473008c85cd6a67359a1b15ed6d83a4", + "sha256:827dc04b8fa7d07c44de11fabbc888e627fa8293b695e0f99cb544fdfa1bf0d1", + "sha256:592766c6303207a20efc445587778322d7f73b161bd994f227adaa341ba212ab", + "sha256:5f84523c076ad14ff5e6c037fe1c89a7f73a3e04cf0377cb4d017014976433f3", + "sha256:0c507b7f74b3d2dd4d1322ec8a94794927305ab4cebbe89cc47fe5e81541e6e8", + "sha256:b4c423ab23291d3945ac61346feeb9a0dc4184999ede5e7c43e1ffb975130ae6", + "sha256:ca233c64c6e40eaa6c66ef97058cdc80e8d0157a443655baa1b2966e812807ca", + "sha256:4474f8ea030b5127225b8894d626bb66c01cda098d47a2b0d3429b6700af9fd8", + "sha256:326420cbb492172dec84b0f65c80942de6cedb5233c413dd824483989c000608", + "sha256:5ac82e411044fb129bae5cfbeb3ba626acb2af31a8d17d175004b70862a741a7" + ], + "version": "==3.12" + }, + "requests": { + "hashes": [ + "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b", + "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e" + ], + "version": "==2.18.4" + }, + "urllib3": { + "hashes": [ + "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b", + "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f" + ], + "version": "==1.22" + }, + "watchdog": { + "hashes": [ + "sha256:7e65882adb7746039b6f3876ee174952f8eaaa34491ba34333ddf1fe35de4162" + ], + "version": "==0.8.3" + } + } +} diff --git a/README.rst b/README.rst index 86110a0..1d18370 100644 --- a/README.rst +++ b/README.rst @@ -9,8 +9,8 @@ implemented Google Translate API. This uses the `Google Translate Ajax API `__ to make calls to such methods as detect and translate. -Compatible with Python 2.7+ and 3.4+ (CPython and PyPy. Py 2.6 and 3.3 -are not tested yet.) +Compatible with Python 2.7+ and 3.4+. (Note: Python 2 support will be dropped in the +next major release.) For details refer to the `API Documentation `__. @@ -173,13 +173,16 @@ GoogleTrans as a command line application Note on library usage --------------------- -- The maximum character limit on a single text is 15k. +DISCLAIMER: this is an unofficial library using the web API of translate.google.com +and also is not associated with Google. + +- **The maximum character limit on a single text is 15k.** - Due to limitations of the web version of google translate, this API - does not guarantee that the library would work properly at all times. - (so please use this library if you don't care about stability.) + does not guarantee that the library would work properly at all times + (so please use this library if you don't care about stability). -- If you want to use a stable API, I highly recommend you to use +- **Important:** If you want to use a stable API, I highly recommend you to use `Google's official translate API `__. diff --git a/googletrans/gtoken.py b/googletrans/gtoken.py index 298cbc3..0d23a09 100644 --- a/googletrans/gtoken.py +++ b/googletrans/gtoken.py @@ -162,7 +162,7 @@ class TokenAcquirer(object): e.append(l >> 12 & 63 | 128) else: e.append(l >> 12 | 224) - e.append(l >> 6 & 63 | 128) + e.append(l >> 6 & 63 | 128) e.append(l & 63 | 128) a = b for i, value in enumerate(e): diff --git a/googletrans/utils.py b/googletrans/utils.py index dc315e3..4929f2f 100644 --- a/googletrans/utils.py +++ b/googletrans/utils.py @@ -22,11 +22,14 @@ def build_params(query, src, dest, token): return params -def format_json(original): +def legacy_format_json(original): # save state states = [] text = original + + # save position for double-quoted texts for i, pos in enumerate(re.finditer('"', text)): + # pos.start() is a double-quote p = pos.start() + 1 if i % 2 == 0: nxt = text.find('"', p) @@ -52,6 +55,14 @@ def format_json(original): return converted +def format_json(original): + try: + converted = json.loads(original) + except ValueError: + converted = legacy_format_json(original) + return converted + + def rshift(val, n): """python port for '>>>'(right shift with padding) """ diff --git a/tests/test_client.py b/tests/test_client.py index 4370c52..e51bee9 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -59,19 +59,6 @@ def test_special_chars(translator): assert result.text == text -def test_multiple_sentences(translator): - text = u"""Architecturally, the school has a Catholic character. -Atop the Main Building's gold dome is a golden statue of the Virgin Mary. -Immediately in front of the Main Building and facing it, -is a copper statue of Christ with arms upraised with the legend Venite Ad Me Omnes.""" - - result = translator.translate(text, src='en', dest='es') - assert result.text == u"""Arquitectónicamente, la escuela tiene un carácter católico. -Encima de la cúpula de oro del edificio principal es una estatua de oro de la Virgen María. -Inmediatamente frente al edificio principal y frente a él, -Es una estatua de cobre de Cristo con los brazos levantados con la leyenda Venite Ad Me Omnes.""" - - def test_translate_list(translator): args = (['test', 'exam'], 'ko', 'en') translations = translator.translate(*args) diff --git a/tests/test_utils.py b/tests/test_utils.py index 6ca1745..7bbbf13 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,7 +5,7 @@ from pytest import raises def test_format_json(): text = '[,,"en",,,,0.96954316,,[["en"],,[0.96954316]]]' - result = utils.format_json(text) + result = utils.legacy_format_json(text) assert result == [None, None, 'en', None, None, None, 0.96954316, None, [['en'], None, [0.96954316]]] @@ -14,11 +14,11 @@ def test_format_malformed_json(): text = '[,,"en",,,,0.96954316,,[["en"],,0.96954316]]]' with raises(ValueError): - utils.format_json(text) + utils.legacy_format_json(text) def test_rshift(): value, n = 1000, 3 result = utils.rshift(value, n) - assert result == 125 \ No newline at end of file + assert result == 125