2018-05-15 07:13:09 +02:00
|
|
|
|
from flask import Flask, render_template, request, make_response, jsonify
|
2018-05-19 01:05:30 +02:00
|
|
|
|
import time
|
2018-05-15 07:13:09 +02:00
|
|
|
|
import os
|
|
|
|
|
from get_utterances import Utterance
|
2018-05-11 23:12:21 +02:00
|
|
|
|
import redis
|
2018-05-15 07:13:09 +02:00
|
|
|
|
import pickle
|
|
|
|
|
import re
|
2018-05-19 01:05:30 +02:00
|
|
|
|
import logging
|
2018-05-15 07:13:09 +02:00
|
|
|
|
|
2018-05-04 23:25:07 +02:00
|
|
|
|
app = Flask(__name__)
|
2018-05-19 01:05:30 +02:00
|
|
|
|
app.logger.setLevel(logging.INFO)
|
2018-05-04 23:25:07 +02:00
|
|
|
|
|
2018-05-15 07:13:09 +02:00
|
|
|
|
r = redis.StrictRedis(host='localhost', port=6379, db=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def log(msg):
|
2018-05-19 01:05:30 +02:00
|
|
|
|
app.logger.info(msg)
|
2018-05-15 07:13:09 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_utterances(filename):
|
|
|
|
|
with open(filename, 'rb') as f:
|
|
|
|
|
utterances = pickle.load(f)
|
|
|
|
|
return utterances
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utterances = load_utterances(
|
|
|
|
|
'/home/siulkilulki/gitrepos/mass-scraper/utterances.pkl')
|
|
|
|
|
|
|
|
|
|
UTT_SCORES = 'utterance-scores'
|
2018-05-16 20:33:32 +02:00
|
|
|
|
COOKIE_NAME = 'cookie-hash'
|
2018-05-15 07:13:09 +02:00
|
|
|
|
# log(utterances[0:2])
|
|
|
|
|
|
|
|
|
|
#initialize_redis_db
|
|
|
|
|
# r.flushdb()
|
|
|
|
|
# status = None
|
|
|
|
|
status = r.get('status')
|
|
|
|
|
if status:
|
|
|
|
|
status = status.decode('utf-8')
|
|
|
|
|
else:
|
|
|
|
|
r.flushdb()
|
|
|
|
|
if status != 'filled':
|
|
|
|
|
log('filling status')
|
|
|
|
|
for i in range(len(utterances)):
|
2018-05-16 20:33:32 +02:00
|
|
|
|
if r.zscore(UTT_SCORES, str(i)) == None:
|
|
|
|
|
log(i)
|
|
|
|
|
r.zadd(UTT_SCORES, 0, str(i))
|
2018-05-15 07:13:09 +02:00
|
|
|
|
r.set('status', 'filled')
|
|
|
|
|
status = 'filled'
|
|
|
|
|
|
2018-05-11 23:12:21 +02:00
|
|
|
|
|
2018-05-15 07:13:09 +02:00
|
|
|
|
def get_next():
|
|
|
|
|
index = int(r.zrangebyscore(UTT_SCORES, '-inf', 'inf')[1])
|
|
|
|
|
left_context = utterances[index]['prefix'].replace('\n', '<br>')
|
|
|
|
|
hour = utterances[index]['hour'].replace('\n', '<br>')
|
|
|
|
|
right_context = ' '.join(
|
|
|
|
|
utterances[index]['suffix'].split(' ')[:10]).replace('\n', '<br>')
|
2018-05-19 01:05:30 +02:00
|
|
|
|
log('get_next index: {}, score: {}'.format(index,
|
|
|
|
|
r.zscore(UTT_SCORES, index)))
|
2018-05-15 07:13:09 +02:00
|
|
|
|
return index, left_context, hour, right_context
|
2018-05-11 23:12:21 +02:00
|
|
|
|
|
|
|
|
|
|
2018-05-15 07:13:09 +02:00
|
|
|
|
def get_next_response(cookie_hash):
|
|
|
|
|
index, left_context, hour, right_context = get_next()
|
|
|
|
|
resp = jsonify(
|
|
|
|
|
index=index,
|
|
|
|
|
left_context=left_context,
|
|
|
|
|
hour=hour,
|
|
|
|
|
right_context=right_context)
|
|
|
|
|
if cookie_hash:
|
2018-05-16 20:33:32 +02:00
|
|
|
|
resp.set_cookie(COOKIE_NAME, cookie_hash, max_age=60 * 60 * 24 * 90)
|
2018-05-15 07:13:09 +02:00
|
|
|
|
return resp
|
2018-05-11 23:12:21 +02:00
|
|
|
|
|
|
|
|
|
|
2018-05-15 07:13:09 +02:00
|
|
|
|
def get_by_index(index):
|
|
|
|
|
left_context = utterances[index]['prefix'].replace('\n', '<br>')
|
|
|
|
|
hour = utterances[index]['hour'].replace('\n', '<br>')
|
|
|
|
|
right_context = ' '.join(
|
|
|
|
|
utterances[index]['suffix'].split(' ')[:10]).replace('\n', '<br>')
|
|
|
|
|
# log('get_next index: {}, score: {}'.format(index,
|
|
|
|
|
# r.zscore(UTT_SCORES, index)))
|
|
|
|
|
return index, left_context, hour, right_context
|
2018-05-04 23:25:07 +02:00
|
|
|
|
|
2018-05-11 23:12:21 +02:00
|
|
|
|
|
2018-05-15 07:13:09 +02:00
|
|
|
|
def get_response_by_index(index, cookie_hash):
|
|
|
|
|
index, left_context, hour, right_context = get_by_index(index)
|
|
|
|
|
resp = jsonify(
|
|
|
|
|
index=index,
|
|
|
|
|
left_context=left_context,
|
|
|
|
|
hour=hour,
|
|
|
|
|
right_context=right_context)
|
|
|
|
|
if cookie_hash:
|
2018-05-16 20:33:32 +02:00
|
|
|
|
resp.set_cookie(COOKIE_NAME, cookie_hash, max_age=60 * 60 * 24 * 90)
|
2018-05-14 01:51:40 +02:00
|
|
|
|
return resp
|
2018-05-11 23:12:21 +02:00
|
|
|
|
|
2018-05-04 23:25:07 +02:00
|
|
|
|
|
2018-05-19 01:05:30 +02:00
|
|
|
|
def annotate_redis(yesno, index, ip_addr):
|
2018-05-16 20:33:32 +02:00
|
|
|
|
cookie_hash = request.cookies.get(COOKIE_NAME)
|
2018-05-19 01:05:30 +02:00
|
|
|
|
# log('annotate: {}'.format(cookie_hash))
|
2018-05-15 07:13:09 +02:00
|
|
|
|
if not cookie_hash:
|
|
|
|
|
return None
|
|
|
|
|
annotation = r.get('{}:{}'.format(
|
|
|
|
|
cookie_hash, index)) # previous annotation of utterance by that user
|
|
|
|
|
if annotation:
|
|
|
|
|
str_index = int(annotation.decode('utf-8').split(':')[1])
|
|
|
|
|
r.setrange(index, str_index, yesno) #sets str_index to yesno value
|
|
|
|
|
else:
|
|
|
|
|
# before = r.zscore(UTT_SCORES, index)
|
|
|
|
|
r.zincrby(UTT_SCORES, index)
|
|
|
|
|
# log('incrementing index {}, before_val: {}, value: {}'.format(
|
|
|
|
|
# index, before, r.zscore(UTT_SCORES, index)))
|
|
|
|
|
str_index = r.append(index, yesno) - 1
|
2018-05-19 01:05:30 +02:00
|
|
|
|
timestamp = time.time()
|
|
|
|
|
r.set('{}:{}'.format(cookie_hash, index), '{}:{}:{}'.format(
|
|
|
|
|
yesno, str_index, timestamp))
|
|
|
|
|
r.set('{}:{}'.format(ip_addr, index), '{}:{}:{}:{}'.format(
|
|
|
|
|
yesno, str_index, timestamp, cookie_hash))
|
2018-05-15 07:13:09 +02:00
|
|
|
|
r.lpush(cookie_hash, '{}:{}:{}'.format(yesno, index, str_index))
|
|
|
|
|
return cookie_hash
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_cookie(js_hash):
|
2018-05-19 01:05:30 +02:00
|
|
|
|
## TODO: dodawać nowe js_hash do listy z key bedacym cookie_hash | czy trzeba?
|
2018-05-15 07:13:09 +02:00
|
|
|
|
old_cookie_hash = None
|
2018-05-16 20:33:32 +02:00
|
|
|
|
cookie_hash = request.cookies.get(COOKIE_NAME)
|
2018-05-15 07:13:09 +02:00
|
|
|
|
if not cookie_hash:
|
|
|
|
|
old_cookie_hash = r.get(js_hash)
|
|
|
|
|
if not old_cookie_hash:
|
|
|
|
|
cookie_hash = str(
|
|
|
|
|
int.from_bytes(os.urandom(4), byteorder='little'))
|
2018-05-16 20:33:32 +02:00
|
|
|
|
r.set(js_hash, cookie_hash)
|
2018-05-19 01:05:30 +02:00
|
|
|
|
log('Cookie not on client side. Creating new cookie.')
|
2018-05-15 07:13:09 +02:00
|
|
|
|
else:
|
2018-05-19 01:05:30 +02:00
|
|
|
|
log('Cookie not on client side. Getting cookie from fingerprint.')
|
2018-05-16 20:33:32 +02:00
|
|
|
|
cookie_hash = old_cookie_hash.decode('utf-8')
|
2018-05-19 01:05:30 +02:00
|
|
|
|
else:
|
|
|
|
|
log('cookie found on client side')
|
2018-05-15 07:13:09 +02:00
|
|
|
|
return cookie_hash
|
|
|
|
|
|
|
|
|
|
|
2018-05-16 20:33:32 +02:00
|
|
|
|
def http_post():
|
2018-05-15 07:13:09 +02:00
|
|
|
|
index = str(request.form.get('index'))
|
|
|
|
|
action = request.form['action']
|
|
|
|
|
js_hash = request.form['hash']
|
2018-05-19 01:05:30 +02:00
|
|
|
|
ip_addr = str(request.headers.get('X-Real-Ip', request.remote_addr))
|
2018-05-15 07:13:09 +02:00
|
|
|
|
if action == 'get':
|
|
|
|
|
cookie_hash = set_cookie(js_hash)
|
2018-05-19 01:05:30 +02:00
|
|
|
|
resp = get_next_response(cookie_hash)
|
2018-05-15 07:13:09 +02:00
|
|
|
|
elif action == 'undo':
|
2018-05-16 20:33:32 +02:00
|
|
|
|
cookie_hash = request.cookies.get(COOKIE_NAME)
|
2018-05-15 07:13:09 +02:00
|
|
|
|
if cookie_hash:
|
|
|
|
|
last_action = r.lpop(cookie_hash)
|
|
|
|
|
if last_action:
|
|
|
|
|
index = int(last_action.decode('utf-8').split(':')[1])
|
2018-05-19 01:05:30 +02:00
|
|
|
|
resp = get_response_by_index(index, cookie_hash)
|
|
|
|
|
# if no cookie-hash or action list is empty resp = None
|
2018-05-15 07:13:09 +02:00
|
|
|
|
elif action == 'yes':
|
2018-05-19 01:05:30 +02:00
|
|
|
|
cookie_hash = annotate_redis('y', index, ip_addr)
|
2018-05-15 07:13:09 +02:00
|
|
|
|
if cookie_hash:
|
2018-05-19 01:05:30 +02:00
|
|
|
|
resp = get_next_response(cookie_hash)
|
2018-05-15 07:13:09 +02:00
|
|
|
|
elif action == 'no':
|
2018-05-19 01:05:30 +02:00
|
|
|
|
cookie_hash = annotate_redis('n', index, ip_addr)
|
2018-05-15 07:13:09 +02:00
|
|
|
|
if cookie_hash:
|
2018-05-19 01:05:30 +02:00
|
|
|
|
resp = get_next_response(cookie_hash)
|
|
|
|
|
if resp:
|
|
|
|
|
# r.sadd
|
|
|
|
|
log(f'ip: {ip_addr}, cookie: {cookie_hash}, hash: {js_hash}, action: {action}, index: {index}'
|
|
|
|
|
)
|
|
|
|
|
return resp
|
2018-05-15 07:13:09 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def http_get():
|
|
|
|
|
return render_template('index.html')
|
|
|
|
|
|
|
|
|
|
|
2018-05-04 23:25:07 +02:00
|
|
|
|
@app.route("/", methods=['GET', 'POST'])
|
|
|
|
|
def root():
|
|
|
|
|
if request.method == 'POST':
|
2018-05-16 20:33:32 +02:00
|
|
|
|
return http_post()
|
2018-05-04 23:25:07 +02:00
|
|
|
|
else:
|
2018-05-15 07:13:09 +02:00
|
|
|
|
return http_get()
|
2018-05-04 23:25:07 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2018-05-19 01:05:30 +02:00
|
|
|
|
gunicorn_logger = logging.getLogger('gunicorn.error')
|
|
|
|
|
app.logger.handlers = gunicorn_logger.handlers
|
|
|
|
|
# app.logger.setLevel(gunicorn_logger.level)
|
|
|
|
|
app.run(host='0.0.0.0', debug=False)
|