add conversion module to process in a better way at least

This commit is contained in:
ssut 2015-06-06 14:43:01 +09:00
parent 840060ae25
commit 5305de1d23
5 changed files with 49 additions and 25 deletions

View File

@ -4,10 +4,18 @@ googletrans package
Submodules
----------
googletrans.translated module
googletrans.conversion module
-----------------------------
.. automodule:: googletrans.translated
.. automodule:: googletrans.conversion
:members:
:undoc-members:
:show-inheritance:
googletrans.response module
---------------------------
.. automodule:: googletrans.response
:members:
:undoc-members:
:show-inheritance:

View File

@ -6,3 +6,4 @@
googletrans
setup
tests

7
docs/tests.rst Normal file
View File

@ -0,0 +1,7 @@
tests module
============
.. automodule:: tests
:members:
:undoc-members:
:show-inheritance:

31
googletrans/conversion.py Normal file
View File

@ -0,0 +1,31 @@
"""A conversion module for googletrans"""
import re
import json
def format_json(text):
# save state
states = []
for i, pos in enumerate(re.finditer('"', text)):
p = pos.start() + 1
if i % 2 == 0:
nxt = text.find('"', p)
states.append((p, text[p:nxt]))
# replace all weired characters in text
while text.find(',,') > -1:
text = text.replace(',,', ',null,')
while text.find('[,') > -1:
text = text.replace('[,', '[null,')
# recover state
for i, pos in enumerate(re.finditer('"', text)):
p = pos.start() + 1
if i % 2 == 0:
j = int(i / 2)
nxt = text.find('"', p)
# replacing a portion of a string
# use slicing to extract those parts of the original string to be kept
text = text[:p] + states[j][1] + text[nxt:]
converted = json.loads(text)
return converted

View File

@ -1,23 +0,0 @@
class Translated:
"""
The Translated object, which contains Google Translator's result.
:param src: source langauge (default: auto)
:param dest: destination language (default: en)
:param origin: original text
:param text: translated text
:param pronunciation: the pronunciation provided by Google Translator
"""
def __init__(self, src, dest, origin, text, pronunciation):
self.src = src
self.dest = dest
self.origin = origin
self.text = text
self.pronunciation = pronunciation
def __str__(self):
return self.__unicode__()
def __unicode__(self):
return '<Translated src={src} dest={dest} text={text} pronunciation={pronunciation}>'.format(
src=self.src, dest=self.dest, text=self.text, pronunciation=self.pronunciation)