2018-04-27 22:44:15 +02:00
|
|
|
import re
|
|
|
|
from colorama import Fore, Back, Style
|
|
|
|
|
2018-05-14 01:51:40 +02:00
|
|
|
hour_regex = re.compile(
|
2018-05-16 20:33:32 +02:00
|
|
|
'(0?[6-9]|1\d|2[0-2])[:.](oo|[0-5]\d)|6|7|8|9|1\d|2[0-2]')
|
2018-04-27 22:44:15 +02:00
|
|
|
|
2018-05-11 23:12:21 +02:00
|
|
|
|
2018-04-27 22:44:15 +02:00
|
|
|
def borders_ok(text, start, end):
|
|
|
|
text = ' ' + text + ' '
|
|
|
|
before_start_char = text[start]
|
|
|
|
after_end_char = text[end + 1]
|
2018-05-14 01:51:40 +02:00
|
|
|
if ((before_start_char.isspace() or before_start_char in ',(/')
|
|
|
|
and (after_end_char.isspace() or after_end_char in ',;)/')
|
|
|
|
and (before_start_char != '(' or after_end_char != ')')):
|
2018-04-27 22:44:15 +02:00
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
2018-05-11 23:12:21 +02:00
|
|
|
|
2018-05-15 07:13:09 +02:00
|
|
|
def delete_duplicates(text):
|
|
|
|
text = re.sub(' +', ' ', text)
|
|
|
|
text = re.sub(' ?\n ?', '\n', text)
|
|
|
|
text = re.sub('\n{5,}', '\n\n\n', text)
|
|
|
|
text = re.sub('\n\n', '\n', text)
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
2018-04-27 22:44:15 +02:00
|
|
|
def get_context(text, start, end, minsize):
|
|
|
|
hour = text[start:end]
|
2018-05-15 07:13:09 +02:00
|
|
|
prefix = delete_duplicates(text[:start]).rsplit(
|
|
|
|
' ', maxsplit=minsize + 12)[1:]
|
|
|
|
suffix = delete_duplicates(text[end:]).split(
|
2018-05-11 23:12:21 +02:00
|
|
|
' ', maxsplit=minsize + 2)[:-1]
|
2018-04-27 22:44:15 +02:00
|
|
|
return ' '.join(prefix), hour, ' '.join(suffix)
|
|
|
|
|
2018-05-11 23:12:21 +02:00
|
|
|
|
|
|
|
def hours_iterator(text, minsize=20, color=False):
|
2018-04-27 22:44:15 +02:00
|
|
|
for hour_match in hour_regex.finditer(text):
|
|
|
|
start = hour_match.start(0)
|
|
|
|
end = hour_match.end(0)
|
|
|
|
if not borders_ok(text, start, end):
|
|
|
|
continue
|
|
|
|
prefix, hour, suffix = get_context(text, start, end, minsize)
|
2018-05-11 23:12:21 +02:00
|
|
|
if color:
|
2018-05-15 07:13:09 +02:00
|
|
|
utterance = f'{prefix}&&&{hour}###{suffix}'
|
2018-05-11 23:12:21 +02:00
|
|
|
yield utterance, color_hour(prefix, hour, suffix, Fore.GREEN,
|
|
|
|
Style.BRIGHT)
|
|
|
|
else:
|
2018-05-15 07:13:09 +02:00
|
|
|
yield prefix, hour, suffix
|
2018-05-11 23:12:21 +02:00
|
|
|
|
2018-04-27 22:44:15 +02:00
|
|
|
|
|
|
|
# w klasyfikatorze dzielić tak aby jeszcze \n było oddzielnie
|
|
|
|
|
|
|
|
|
2018-05-24 12:56:02 +02:00
|
|
|
def color_hour(prefix, hour, suffix, color=Fore.GREEN, style=Style.BRIGHT):
|
2018-04-27 22:44:15 +02:00
|
|
|
return prefix + color + style + hour + Style.RESET_ALL + suffix
|