2018-05-19 01:05:30 +02:00
|
|
|
#!/usr/bin/env python3
|
2018-05-24 12:56:02 +02:00
|
|
|
import sys
|
2018-05-19 01:05:30 +02:00
|
|
|
|
2018-05-25 15:09:23 +02:00
|
|
|
import argparse
|
2018-05-19 01:05:30 +02:00
|
|
|
import redis
|
2018-05-24 12:56:02 +02:00
|
|
|
from extractor.find_hours import color_hour
|
|
|
|
import pickle
|
|
|
|
from colorama import Fore, Back, Style
|
|
|
|
import time
|
|
|
|
import datetime
|
|
|
|
import re
|
|
|
|
|
|
|
|
r = redis.StrictRedis(unix_socket_path='/redis-socket/redis.sock', db=0)
|
|
|
|
|
|
|
|
|
|
|
|
def load_utterances(filename):
|
|
|
|
with open(filename, 'rb') as f:
|
|
|
|
utterances = pickle.load(f)
|
|
|
|
return utterances
|
|
|
|
|
|
|
|
|
|
|
|
utterances = load_utterances(
|
|
|
|
'/home/siulkilulki/gitrepos/mass-scraper/utterances.pkl')
|
|
|
|
|
|
|
|
|
|
|
|
def format_time(timestamp):
|
|
|
|
return datetime.datetime.fromtimestamp(timestamp).strftime(
|
|
|
|
'%H:%M:%S.%f %Y-%m-%d')
|
|
|
|
|
|
|
|
|
|
|
|
def investigate_by_cookie(cookie_hash):
|
|
|
|
cx = 0
|
|
|
|
index_stop = None
|
|
|
|
for key in sorted(set(r.scan_iter())):
|
|
|
|
key = key.decode('utf-8')
|
|
|
|
if ':' in key and not '.' in key and cookie_hash in key:
|
|
|
|
if cx != 0:
|
|
|
|
cx -= 1
|
|
|
|
continue
|
|
|
|
index = int(key.split(':')[1])
|
|
|
|
if index_stop and index_stop != index:
|
|
|
|
continue
|
|
|
|
annotation_info = r.get(key).decode('utf-8')
|
|
|
|
pprint_utterance(index, annotation_info)
|
|
|
|
print(index)
|
|
|
|
print(format_time(float(annotation_info.split(':')[2])))
|
|
|
|
# print(annotation_info)
|
|
|
|
action = input(
|
|
|
|
'c: continue, cX: continue Xtimes, number: goto index\n')
|
|
|
|
if action.isdigit():
|
|
|
|
index_stop = int(action)
|
|
|
|
else:
|
|
|
|
index_stop = None
|
|
|
|
if action[0] == 'c':
|
|
|
|
if action[1:]:
|
|
|
|
cx = int(action[1:])
|
|
|
|
|
|
|
|
|
|
|
|
def pprint_utterance(index, annotation_info=None):
|
|
|
|
if not annotation_info:
|
|
|
|
annotation_info = ['y']
|
|
|
|
color = Fore.GREEN if annotation_info[0] == 'y' else Fore.RED
|
|
|
|
print(
|
|
|
|
color_hour(utterances[index]['prefix'], utterances[index]['hour'],
|
|
|
|
utterances[index]['suffix'], color))
|
|
|
|
|
|
|
|
|
|
|
|
def print_stats():
|
|
|
|
annotated = set()
|
|
|
|
all_count = 0
|
|
|
|
for key in set(r.scan_iter()):
|
|
|
|
key = key.decode('utf-8')
|
|
|
|
if ':' in key and not '.' in key:
|
|
|
|
all_count += 1
|
|
|
|
index = key.split(':')[1]
|
|
|
|
annotated.add(index)
|
|
|
|
print('All annotations: {}'.format(all_count))
|
|
|
|
print('Annotated utterances: {}'.format(len(annotated)))
|
|
|
|
|
|
|
|
|
2018-05-25 15:09:23 +02:00
|
|
|
def get_args():
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
subparser = parser.add_subparsers(dest='cmd')
|
|
|
|
parser_stats = subparser.add_parser('stats', help='Show annotation stats.')
|
|
|
|
parser_investigate = subparser.add_parser(
|
|
|
|
'investigate', help='investigate cookie.')
|
|
|
|
parser_investigate.add_argument('cookie', help='User cookie string')
|
|
|
|
parser_index = subparser.add_parser('index', help='Print utterance.')
|
|
|
|
parser_index.add_argument('index', type=int, help='Utterance index')
|
|
|
|
subparser.add_parser('ipdb', help='Get into ipdb.')
|
|
|
|
parser_exec = subparser.add_parser('exec', help='Execute redis command.')
|
|
|
|
parser_exec.add_argument(
|
|
|
|
'redis_command',
|
|
|
|
help=
|
|
|
|
'Redis command (lowercased). e.g. to get r.zrangebyscore("key", "-inf", "inf", start=0, num=1) pass \'zrangebyscore("key", "-inf", "inf", start=0, num=1)\''
|
|
|
|
)
|
|
|
|
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
|
2018-05-24 12:56:02 +02:00
|
|
|
def main():
|
2018-05-25 15:09:23 +02:00
|
|
|
args = get_args()
|
|
|
|
if args.cmd == 'stats':
|
2018-05-24 12:56:02 +02:00
|
|
|
print_stats()
|
2018-05-25 15:09:23 +02:00
|
|
|
elif args.cmd == 'investigate':
|
|
|
|
investigate_by_cookie(args.cookie)
|
|
|
|
elif args.cmd == 'index':
|
|
|
|
pprint_utterance(args.index)
|
|
|
|
elif args.cmd == 'ipdb':
|
2018-05-25 12:39:06 +02:00
|
|
|
import ipdb
|
|
|
|
ipdb.set_trace()
|
2018-05-25 15:09:23 +02:00
|
|
|
elif args.cmd == 'exec':
|
|
|
|
exec('print(r.{})'.format(args.redis_command), {
|
|
|
|
'print': print,
|
|
|
|
'r': r
|
|
|
|
})
|
2018-05-24 12:56:02 +02:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|