"""A conversion module for googletrans""" from __future__ import print_function import re import traceback import json def format_json(original): # save state states = [] text = original for i, pos in enumerate(re.finditer('"', text)): p = pos.start() + 1 if i % 2 == 0: nxt = text.find('"', p) states.append((p, text[p:nxt])) # replace all weired characters in text while text.find(',,') > -1: text = text.replace(',,', ',null,') while text.find('[,') > -1: text = text.replace('[,', '[null,') # recover state for i, pos in enumerate(re.finditer('"', text)): p = pos.start() + 1 if i % 2 == 0: j = int(i / 2) nxt = text.find('"', p) # replacing a portion of a string # use slicing to extract those parts of the original string to be kept text = text[:p] + states[j][1] + text[nxt:] try: converted = json.loads(text) except ValueError as e: print('original text: ', original, ' => ', text) traceback.print_exc() return converted SPECIAL_CASES = { 'ee': 'et', } LANGUAGES = { 'af': 'afrikaans', 'sq': 'albanian', 'ar': 'arabic', 'be': 'belarusian', 'bg': 'bulgarian', 'ca': 'catalan', 'zh-CN': 'chinese_simplified', 'zh-TW': 'chinese_traditional', 'hr': 'croatian', 'cs': 'czech', 'da': 'danish', 'nl': 'dutch', 'en': 'english', 'eo': 'esperanto', 'et': 'estonian', 'tl': 'filipino', 'fi': 'finnish', 'fr': 'french', 'gl': 'galician', 'de': 'german', 'el': 'greek', 'iw': 'hebrew', 'hi': 'hindi', 'hu': 'hungarian', 'is': 'icelandic', 'id': 'indonesian', 'ga': 'irish', 'it': 'italian', 'ja': 'japanese', 'ko': 'korean', 'la': 'latin', 'lv': 'latvian', 'lt': 'lithuanian', 'mk': 'macedonian', 'ms': 'malay', 'mt': 'maltese', 'no': 'norwegian', 'fa': 'persian', 'pl': 'polish', 'pt': 'portuguese', 'ro': 'romanian', 'ru': 'russian', 'sr': 'serbian', 'sk': 'slovak', 'sl': 'slovenian', 'es': 'spanish', 'sw': 'swahili', 'sv': 'swedish', 'th': 'thai', 'tr': 'turkish', 'uk': 'ukrainian', 'vi': 'vietnamese', 'cy': 'welsh', 'yi': 'yiddish', }