#!/usr/bin/python
##############################################################################
import ConfigParser, sys
import psycopg2, psycopg2.extras
import psycopg2.extensions
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
import time
from datetime import timedelta, datetime
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import os
import threading
import re
from urlparse import urljoin
from flask import Flask, request, session, url_for, redirect, Response, \
render_template, abort, g, flash, _app_ctx_stack, make_response, \
jsonify
##############################################################################
##############################################################################
# Single mandatory arg: config file path
if len(sys.argv[1:]) != 1:
# Default path for WSGI use (change to yours) :
config_path = "/home/nsabot/logger/nsabot.conf"
else:
# Read Config from given conf file
config_path = sys.argv[1]
#config_path = os.path.abspath(config_path)
cfg = ConfigParser.ConfigParser()
cfg.readfp(open(config_path))
try:
# IRCism:
Nick = cfg.get("irc", "nick")
Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')]
Bots = [x.strip() for x in cfg.get("logotron", "bots").split(',')]
Bots.append(Nick) # Add our own bot to the bot list
# DBism:
DB_Name = cfg.get("db", "db_name")
DB_User = cfg.get("db", "db_user")
DB_DEBUG = int(cfg.get("db", "db_debug"))
# Logism:
Base_URL = cfg.get("logotron", "base_url")
App_Root = cfg.get("logotron", "app_root")
CSS_File = cfg.get("logotron", "css_file")
Era = int(cfg.get("logotron", "era"))
DEBUG = int(cfg.get("logotron", "www_dbg"))
Max_Raw_Ln = int(cfg.get("logotron", "max_raw"))
Days_Hide = int(cfg.get("logotron", "days_hide"))
# WWW:
WWW_Port = int(cfg.get("logotron", "www_port"))
Max_Search_Results = int(cfg.get("logotron", "max_search"))
except Exception as e:
print "Invalid config: ", e
exit(1)
##############################################################################
##############################################################################
### Knobs not made into config yet ###
Default_Chan = Channels[0]
Min_Query_Length = 3
## Format for Date in Log Lines
Date_Short_Format = "%Y-%m-%d"
##############################################################################
app = Flask(__name__)
app.config.from_object(__name__)
def get_db():
db = getattr(g, 'db', None)
if db is None:
db = g.db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User))
return db
def close_db():
if hasattr(g, 'db'):
g.db.close()
@app.before_request
def before_request():
g.db = get_db()
@app.teardown_request
def teardown_request(exception):
close_db()
def query_db(query, args=(), one=False):
cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
if (DB_DEBUG): print "query: '{0}'".format(query)
cur.execute(query, args)
rv = cur.fetchone() if one else cur.fetchall()
if (DB_DEBUG): print "query res: '{0}'".format(rv)
return rv
##############################################################################
## All eggogs redirect to main page
@app.errorhandler(404)
def page_not_found(error):
return redirect(url_for('log'))
##############################################################################
html_escape_table = {
"&": "&",
'"': """,
"'": "'",
">": ">",
"<": "<",
}
def html_escape(text):
return "".join(html_escape_table.get(c,c) for c in text)
##############################################################################
## Get base URL
def get_base():
if DEBUG:
return request.host_url.rstrip('/')
return Base_URL.rstrip('/')
# Get perma-URL corresponding to given log line
def line_url(l):
return "{0}{1}{2}/{3}#{4}".format(get_base(),
App_Root,
l['chan'],
l['t'].strftime(Date_Short_Format),
l['idx'])
def gen_chanlist(selected_chan, show_all_chans=False):
# Get current time
now = datetime.now()
# Data for channel display :
chan_list = []
chan_idx = 0
for chan in Channels:
last_time = query_db(
'''select t, idx from loglines where chan=%s
and idx = (select max(idx) from loglines where chan=%s) ;''',
[chan, chan], one=True)
last_time_txt = ""
last_time_url = ""
if last_time != None:
span = (now - last_time['t'])
days = span.days
# Only add to the list if it should be visible, otherwise continue
if days > Days_Hide and chan != selected_chan and not show_all_chans:
continue
hours = span.seconds/3600
minutes = (span.seconds%3600)/60
if days != 0:
last_time_txt += '%dd ' % days
if hours != 0:
last_time_txt += '%dh ' % hours
if minutes != 0:
last_time_txt += '%dm' % minutes
last_time_url = "{0}{1}{2}/{3}#{4}".format(
get_base(),
App_Root,
chan,
last_time['t'].strftime(Date_Short_Format),
last_time['idx'])
chan_list.append({ 'name': chan })
chan_list[chan_idx]['last_time_url'] = last_time_url
chan_list[chan_idx]['last_time_txt'] = last_time_txt
chan_list[chan_idx]['chan_url'] = "{0}{1}{2}{3}".format(
get_base(), App_Root, chan, '/' if chan == Default_Chan else '')
chan_idx += 1
return chan_list
# HTML Tag Regex
tag_regex = re.compile("(<[^>]+>)")
# Find the segments of a block of text which constitute HTML tags
def get_link_intervals(str):
links = []
span = []
for match in tag_regex.finditer(str):
span = match.span()
links += [span]
return links
# Highlight all matched tokens in given text
def highlight_matches(strings, text):
e = '(' + ('|'.join(strings)) + ')'
return re.sub(e,
r"""\1""",
text,
flags=re.I)
# Highlight matched tokens in the display of a search result logline,
# but leave HTML tags alone
def highlight_text(strings, text):
result = ""
last = 0
for i in get_link_intervals(text):
i_start, i_end = i
result += highlight_matches(strings, text[last:i_start])
result += text[i_start:i_end] # the HTML tag, leave it alone
last = i_end
result += highlight_matches(strings, text[last:]) # last block
return result
# Regexps used in format_logline:
boxlinks_re = re.compile(
'\[\s*]*>[^ <]+\s*\]\[([^\[\]]+)\]')
stdlinks_re = re.compile('(http[^ \[\]]+)')
# For era 1 ('bitcoin-assets') links :
era1_re = re.compile('(\1', payload)
# Now also format [link][text] links :
payload = re.sub(boxlinks_re,
r'\2', payload)
# For ancient logs strictly: substitute orig. link with our logger :
if l['era'] < 3:
payload = re.sub(era1_re,
r' ' + speaker
# If 'action', annotate:
if l['self']:
separator = ""
payload = "" + payload + ""
speaker = "" + speaker + ""
# HTMLize the given line :
s = ("").format(l['idx'],
speaker,
l['t'],
line_url(l),
payload,
bot,
dclass,
separator)
return s
# Make above callable from inside htm templater:
app.jinja_env.globals.update(format_logline=format_logline)
@app.route('/rnd/')
def rnd(chan):
# Handle rubbish chan:
if chan not in Channels:
return redirect(url_for('log'))
rnd_line = query_db(
'''select * from loglines where chan=%s
order by random() limit 1 ;''',
[chan], one=True)
return redirect(line_url(rnd_line))
@app.route('%s/' % App_Root)
@app.route('%s' % App_Root, defaults={'date': None})
@app.route('%s' % App_Root, defaults={'chan': Default_Chan, 'date': None})
def log(chan, date):
# Handle rubbish chan:
if chan not in Channels:
return redirect(url_for('log'))
# Get possible selection start and end
sel_start = request.args.get('ss', default = 0, type = int)
sel_end = request.args.get('se', default = 0, type = int)
# Get possible 'reverse gear'
rev = request.args.get('rev', default = 0, type = int)
# Get possible 'show all'
show_all = request.args.get('all', default = 0, type = int)
# Get current time
now = datetime.now()
# Whether we are viewing 'current' tail
tail = False
# If viewing 'current' log:
if date == None:
date = now.strftime(Date_Short_Format)
tail = True
# Parse given date, and redirect to default log if rubbish:
try:
day_start = datetime.strptime(date, Date_Short_Format)
except Exception, e:
return redirect(url_for('log'))
# Determine the end of the interval being shown
day_end = day_start + timedelta(days=1)
# Enable 'tail' is day_end is after end of current day
if day_end > now:
tail = True
# Get the loglines from DB
lines = query_db(
'''select * from loglines where chan=%s
and t between %s and %s order by idx asc;''',
[chan, day_start, day_end], one=False)
# Optional 'reverse gear' knob:
if rev == 1:
lines.reverse()
# Generate navbar for the given date:
prev_day = ""
next_day = ""
prev_t = query_db(
'''select t from loglines where chan=%s
and t < %s order by idx desc limit 1;''',
[chan, day_start], one=True)
if prev_t != None:
prev_day = prev_t['t'].strftime(Date_Short_Format)
if not tail:
next_t = query_db(
'''select t from loglines where chan=%s
and t > %s order by idx asc limit 1;''',
[chan, day_end], one=True)
if next_t != None:
next_day = next_t['t'].strftime(Date_Short_Format)
# Generate url for css file based on config value
css_url = url_for('static', filename=CSS_File)
chan_list = gen_chanlist(chan, show_all)
# Return the HTMLized text
return render_template('log.html',
css_url = css_url,
app_root = App_Root,
chan = chan,
chan_list = chan_list,
loglines = lines,
sel = (sel_start, sel_end),
date = date,
prev_day = prev_day,
next_day = next_day,
rev = not rev,
idle_day = Days_Hide,
show_all = show_all)
# Primarily for use with 'era 1' and 'era 2' :
# Get arbitrary log item by chan and raw line index
@app.route('/ilog//')
def ilog(chan, idx):
# Handle rubbish chan:
if chan not in Channels:
return redirect(url_for('log'))
# Attempt to locate given chan/idx:
item = query_db(
'''select * from loglines where chan=%s and idx = %s ;''',
[chan, idx], one=True)
# If given chan/idx not found:
if item == None:
return redirect(url_for('log'))
# Determine date where item appears in log :
item_date = item['t'].strftime(Date_Short_Format)
# Go there:
return redirect(App_Root + chan + "/" + item_date + "#" + idx)
@app.route('/log-raw/')
def rawlog(chan):
res = ""
# Handle rubbish chan:
if chan not in Channels:
return Response("EGGOG: No such Channel!", mimetype='text/plain')
# Get start and end indices:
idx_start = request.args.get('istart', default = 0, type = int)
idx_end = request.args.get('iend', default = 0, type = int)
# Malformed bounds?
if idx_start > idx_end:
return Response("EGGOG: Start must precede End!",
mimetype='text/plain')
# Demanded too many in one burst ?
if (idx_end - idx_start) > Max_Raw_Ln :
return Response("EGGOG: May request Max. of %s Lines !" % Max_Raw_Ln,
mimetype='text/plain')
# Get the loglines from DB
lines = query_db(
'''select * from loglines where chan=%s
and idx between %s and %s order by idx asc;''',
[chan, idx_start, idx_end], one=False)
# Retrieve raw lines in classical Phf format:
for l in lines:
action = ""
speaker = "%s;" % l['speaker']
if l['self']:
action = "*;"
speaker = "%s " % l['speaker']
res += "%s;%s;%s%s%s\n" % (l['idx'],
l['t'].strftime('%s'),
action,
speaker,
l['payload'])
# Return plain text:
return Response(res, mimetype='text/plain')
Name_Chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"
def sanitize_speaker(s):
return "".join([ch for ch in s if ch in Name_Chars])
def re_escape(s):
return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s)
# Search knob. Supports 'chan' parameter.
@app.route('/log-search')
def logsearch():
# The query params:
chan = request.args.get('chan', default = Default_Chan, type = str)
query = request.args.get('q', default = '', type = str)
offset = request.args.get('after', default = 0, type = int)
show_all = 0
if len(query) < Min_Query_Length:
return redirect(url_for('log'))
# channels to search in
chans = []
# whether to indicate chan per log line
showchan = False
if chan == 'all':
# search in all logged channels
chans = Channels
legend = "all logged channels"
showchan = True
show_all = 1
else:
# Handle possible rubbish chan:
if chan not in Channels:
return redirect(url_for('log'))
else:
# search in selected channel only
chans = [chan]
legend = chan
nres = 0
searchres = []
tokens_orig = []
search_head = "Query is too short!"
# Forbid query that is too short:
if len(query) >= Min_Query_Length:
# Get the search tokens to use:
tokens = query.split()
tokens_standard = []
from_users = []
# separate out "from:foo" tokens and ordinary:
for t in tokens:
if t.startswith("from:") or t.startswith("f:"):
from_users.append(t.split(':')[1]) # Record user for 'from' query
else:
tokens_standard.append(t)
from_users = ['%' + sanitize_speaker(t) + '%' for t in from_users]
tokens_orig = [re_escape(t) for t in tokens_standard]
tokens_formed = ['%' + t + '%' for t in tokens_orig]
# Query is usable; perform the search on DB and get the finds
if from_users == []:
searchres = query_db(
'''select * from loglines where chan = any(%s)
and payload ilike all(%s) order by t desc
limit %s offset %s;''',
[(chans,),
tokens_formed,
Max_Search_Results,
offset], one=False)
else:
searchres = query_db(
'''select * from loglines where chan = any(%s)
and speaker ilike any(%s)
and payload ilike all(%s) order by t desc
limit %s offset %s;''',
[(chans,),
from_users,
tokens_formed,
Max_Search_Results,
offset], one=False)
# Number of search results returned in this query
nres = len(searchres)
# Whether to display 'back' button :
back = (offset != 0)
# Whether to display 'forward' button :
forw = (nres == Max_Search_Results)
# Starting index of search results
sres = offset
# Ending index of search results
eres = offset + min(nres, Max_Search_Results)
# Generate url for css file based on config value
css_url = url_for('static', filename=CSS_File)
chan_list = gen_chanlist(chan, show_all)
return render_template('searchres.html',
css_url = css_url,
query = query,
hquery = html_escape(query),
legend = legend,
sres = sres,
eres = eres,
back = back,
forw = forw,
psize = Max_Search_Results,
chan = chan,
chan_list = chan_list,
tokens = tokens_orig,
loglines = searchres,
showchan = showchan,
show_all = show_all)
# Comment this out if you don't have one
@app.route('/favicon.ico')
def favicon():
return redirect(url_for('static', filename='favicon.ico'))
## App Mode
if __name__ == '__main__':
app.run(threaded=True, port=WWW_Port)