"""
### REDIS data structures in annotator:
STRINGS:
'status'
---> {'filled', None, other_string}
'filled'
indicate that utterance-scores are properly filled
with utterance ids
None
clears database
other non empty value
adds non existance utterances and sets their score to 0
'{cookie}:{index}'
---> '{yesno}:{str_index}:{timestamp}:{ip_addr}'
'{ip_addr}:{index}'
---> '{yesno}:{str_index}:{timestamp}:{cookie}'
'{index}'
---> '[yYnNtf]*' e.g. 'ynyNYnynYytf'
y
uncertain yes
t
banned yes
Y
trusted yes (not handled yet)
n
uncertain no
N
trusted no (not handled yet)
f
banned no
'jshash:{js_hash}'
---> '{cookie}'
LISTS:
'{cookie}'
---> '{yesno}:{index}:{str_index}'
'undo:{cookie}'
---> '{yesno}:{index}:{str_index}'
SETS:
'ip-cookies:{ip_addr}'
---> '{cookie}'
expires after 3 hours
'trusted'
---> '{cookie}'
trusted because I know this person
'trusted-checked'
---> '{cookie}'
'banned'
---> '{cookie}'
SORTED SETS:
'utterance-scores'
---> {index} ---> {nr_of_annotations}
"""
from flask import Flask, render_template, request, make_response, jsonify
import secrets
import time
from get_utterances import Utterance
import redis
import pickle
import re
import logging
UTT_SCORES = 'utterance-scores'
COOKIE_NAME = 'cookie-hash'
MAX_COOKIES_PER_IP = 5
app = Flask(__name__)
app.logger.setLevel(logging.INFO)
r = redis.StrictRedis(unix_socket_path='/redis-socket/redis.sock', db=0)
def log(msg):
app.logger.info(msg)
def load_utterances(filename):
with open(filename, 'rb') as f:
utterances = pickle.load(f)
return utterances
utterances = load_utterances(
'/home/siulkilulki/gitrepos/mass-scraper/utterances.pkl')
#initialize_redis_db
status = r.get('status')
if status:
status = status.decode('utf-8')
else:
r.flushdb()
if status != 'filled':
log('filling status')
for i in range(len(utterances)):
if r.zscore(UTT_SCORES, str(i)) == None:
log(i)
r.zadd(UTT_SCORES, 0, str(i))
r.set('status', 'filled')
status = 'filled'
def get_utterance_for_web(index):
left_context = utterances[index]['prefix'].replace('\n', '
')
hour = utterances[index]['hour'].replace('\n', '
')
right_context = ' '.join(
utterances[index]['suffix'].split(' ')[:10]).replace('\n', '
')
return left_context, hour, right_context
def find_not_annotated(cookie_hash):
# XXX: should be effecient enough even though it's O(n)
for index in range(len(utterances)):
if not r.exists(f'{cookie_hash}:{index}'):
return index
def get_next(cookie_hash):
"""returns utterance with minmum annotations if that utterance
wasn't annotated by cookie_hash user
or not yet annotated utterance by cookie_hash user"""
index = int(r.zrangebyscore(UTT_SCORES, '-inf', 'inf', start=0, num=1)[0])
if r.exists(f'{cookie_hash}:{index}'):
index = find_not_annotated(cookie_hash)
log('found unannotated index: {}'.format(index))
left_context, hour, right_context = get_utterance_for_web(index)
# log('get_next index: {}, score: {}'.format(index,
# r.zscore(UTT_SCORES, index)))
return index, left_context, hour, right_context
def get_next_response(cookie_hash):
index, left_context, hour, right_context = get_next(cookie_hash)
resp = jsonify(
index=index,
left_context=left_context,
hour=hour,
right_context=right_context)
if cookie_hash:
resp.set_cookie(COOKIE_NAME, cookie_hash, max_age=60 * 60 * 24 * 90)
return resp
def get_by_index(index):
left_context, hour, right_context = get_utterance_for_web(index)
# log('get_next index: {}, score: {}'.format(index,
# r.zscore(UTT_SCORES, index)))
return index, left_context, hour, right_context
def get_response_by_index(index, cookie_hash):
index, left_context, hour, right_context = get_by_index(index)
resp = jsonify(
index=index,
left_context=left_context,
hour=hour,
right_context=right_context)
if cookie_hash:
resp.set_cookie(COOKIE_NAME, cookie_hash, max_age=60 * 60 * 24 * 90)
return resp
def annotate_redis(yesno, index, ip_addr, cookie_hash):
# log('annotate: {}'.format(cookie_hash))
banned = r.sismember('banned', cookie_hash)
if banned:
yesno = yesno.translate(str.maketrans('yn', 'tf'))
timestamp = time.time()
annotation = r.get('{}:{}'.format(
cookie_hash, index)) # previous annotation of utterance by that user
if annotation:
str_index = int(annotation.decode('utf-8').split(':')[1])
r.setrange(index, str_index, yesno) #sets str_index to yesno value
else:
if not banned:
r.zincrby(UTT_SCORES, index)
str_index = r.append(index, yesno) - 1
r.set('{}:{}'.format(cookie_hash, index), '{}:{}:{}:{}'.format(
yesno, str_index, timestamp, ip_addr))
r.set('{}:{}'.format(ip_addr, index), '{}:{}:{}:{}'.format(
yesno, str_index, timestamp, cookie_hash))
undo_cookie_key = 'undo:' + cookie_hash
first_undo_action = r.rpop(undo_cookie_key)
if first_undo_action:
first_undo_action = first_undo_action.decode('utf-8')
if first_undo_action.split(':')[1] != str(index):
r.lpush(cookie_hash, first_undo_action)
while (r.llen(undo_cookie_key) != 0):
r.rpoplpush(undo_cookie_key, cookie_hash)
r.lpush(cookie_hash, '{}:{}:{}'.format(yesno, index, str_index))
def set_cookie(js_hash):
## TODO: dodawać nowe js_hash do listy z key bedacym cookie_hash | czy trzeba?
old_cookie_hash = None
js_hash_key = 'jshash:' + js_hash
cookie_hash = request.cookies.get(COOKIE_NAME)
if not cookie_hash:
old_cookie_hash = r.get(js_hash_key)
if not old_cookie_hash:
cookie_hash = secrets.token_urlsafe(16)
r.set(js_hash_key, cookie_hash)
log('Cookie not on client side. Creating new cookie.')
else:
log('Cookie not on client side. Getting cookie from fingerprint.')
cookie_hash = old_cookie_hash.decode('utf-8')
else:
log('cookie found on client side')
return cookie_hash
def ip_cookies_violation(ttl):
if ttl <= 0:
return None
else:
m, s = divmod(ttl, 60)
h, m = divmod(m, 60)
hour_str = f'{h} godz. ' if h != 0 else ''
minute_str = f'{m} min ' if m != 0 else ''
wait_time_str = hour_str + minute_str + f'{s} sek.'
return jsonify(wait_time_str=wait_time_str)
def undo(cookie_hash):
last_action = r.lpop(cookie_hash)
if last_action:
last_action = last_action.decode('utf-8')
r.rpush('undo:' + cookie_hash, last_action)
index = int(last_action.split(':')[1])
return get_response_by_index(index, cookie_hash)
log('No last action returning None')
# if no cookie-hash or action list is empty resp = None
def handle_ip_cookies(ip_key, cookie_hash):
"""mechanism for forcing users to use maximum X cookies per ip but no more than X"""
r.sadd(ip_key, cookie_hash)
if int(r.ttl(ip_key)) == -1:
r.expire(ip_key, 60 * 60 * 3)
log('{}, expire started'.format(ip_key))
def get(cookie_hash):
undo_cookie_key = 'undo:' + cookie_hash
while (r.llen(undo_cookie_key) != 0):
r.rpoplpush(undo_cookie_key, cookie_hash)
return get_next_response(cookie_hash)
def http_post():
resp = None
action = request.form['action']
index = int(request.form['index']) if action != 'get' else None
js_hash = request.form['hash']
ip_addr = str(request.headers.get('X-Real-Ip', request.remote_addr))
ip_key = 'ip-cookies:' + ip_addr
ip_key_scard = r.scard(ip_key)
if ip_key_scard >= MAX_COOKIES_PER_IP:
ttl = int(r.ttl(ip_key))
app.logger.warning(
f'MAX_COOKIES_PER_IP violation! ip: {ip_addr}, ttl: {ttl}, scard: {ip_key_scard}, action: {action}, index: {index}'
)
return ip_cookies_violation(ttl)
if action == 'get':
cookie_hash = set_cookie(js_hash)
resp = get(cookie_hash)
else:
cookie_hash = request.cookies.get(COOKIE_NAME)
if not cookie_hash:
log('No cookie hash given by client')
return None
if action == 'undo':
resp = undo(cookie_hash)
elif action == 'yes':
annotate_redis('y', index, ip_addr, cookie_hash)
resp = get_next_response(cookie_hash)
elif action == 'no':
annotate_redis('n', index, ip_addr, cookie_hash)
resp = get_next_response(cookie_hash)
if resp:
# r.sadd
handle_ip_cookies(ip_key, cookie_hash)
log(f'ip: {ip_addr}, cookie: {cookie_hash}, hash: {js_hash}, action: {action}, index: {index}, ip_scard_before_req: {ip_key_scard}'
)
return resp
def http_get():
return render_template('index.html')
@app.route("/", methods=['GET', 'POST'])
def root():
if request.method == 'POST':
return http_post()
else:
return http_get()
@app.route("/hidden", methods=['GET', 'POST'])
def dev_root():
ip_addr = str(request.headers.get('X-Real-Ip', request.remote_addr))
cookie_hash = request.cookies.get(COOKIE_NAME)
if ip_addr != '192.168.1.1' and cookie_hash != '1594469046':
return None
if request.method == 'POST':
return http_post()
else:
return render_template('index-dev.html')
if __name__ == "__main__":
gunicorn_logger = logging.getLogger('gunicorn.error')
app.logger.handlers = gunicorn_logger.handlers
# app.logger.setLevel(gunicorn_logger.level)
app.run(host='0.0.0.0', debug=False)