#!/usr/bin/python ############################################################################## import ConfigParser, sys import psycopg2, psycopg2.extras import psycopg2.extensions psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) import time from datetime import timedelta, datetime import sys reload(sys) sys.setdefaultencoding('utf8') import os import threading import re from urlparse import urljoin from flask import Flask, request, session, url_for, redirect, Response, \ render_template, abort, g, flash, _app_ctx_stack, make_response, \ jsonify ############################################################################## ############################################################################## # Single mandatory arg: config file path if len(sys.argv[1:]) != 1: # Default path for WSGI use (change to yours) : config_path = "/home/nsabot/logger/nsabot.conf" else: # Read Config from given conf file config_path = sys.argv[1] #config_path = os.path.abspath(config_path) cfg = ConfigParser.ConfigParser() cfg.readfp(open(config_path)) try: # IRCism: Nick = cfg.get("irc", "nick") Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')] Bots = [x.strip() for x in cfg.get("logotron", "bots").split(',')] Bots.append(Nick) # Add our own bot to the bot list # DBism: DB_Name = cfg.get("db", "db_name") DB_User = cfg.get("db", "db_user") DB_DEBUG = int(cfg.get("db", "db_debug")) # Logism: Base_URL = cfg.get("logotron", "base_url") App_Root = cfg.get("logotron", "app_root") CSS_File = cfg.get("logotron", "css_file") Era = int(cfg.get("logotron", "era")) DEBUG = int(cfg.get("logotron", "www_dbg")) Max_Raw_Ln = int(cfg.get("logotron", "max_raw")) Days_Hide = int(cfg.get("logotron", "days_hide")) # WWW: WWW_Port = int(cfg.get("logotron", "www_port")) Max_Search_Results = int(cfg.get("logotron", "max_search")) except Exception as e: print "Invalid config: ", e exit(1) ############################################################################## ############################################################################## ### Knobs not made into config yet ### Default_Chan = Channels[0] Min_Query_Length = 3 ## Format for Date in Log Lines Date_Short_Format = "%Y-%m-%d" ############################################################################## app = Flask(__name__) app.config.from_object(__name__) def get_db(): db = getattr(g, 'db', None) if db is None: db = g.db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User)) return db def close_db(): if hasattr(g, 'db'): g.db.close() @app.before_request def before_request(): g.db = get_db() @app.teardown_request def teardown_request(exception): close_db() def query_db(query, args=(), one=False): cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) if (DB_DEBUG): print "query: '{0}'".format(query) cur.execute(query, args) rv = cur.fetchone() if one else cur.fetchall() if (DB_DEBUG): print "query res: '{0}'".format(rv) return rv ############################################################################## ## All eggogs redirect to main page @app.errorhandler(404) def page_not_found(error): return redirect(url_for('log')) ############################################################################## html_escape_table = { "&": "&", '"': """, "'": "'", ">": ">", "<": "<", } def html_escape(text): return "".join(html_escape_table.get(c,c) for c in text) ############################################################################## ## Get base URL def get_base(): if DEBUG: return request.host_url.rstrip('/') return Base_URL.rstrip('/') # Get perma-URL corresponding to given log line def line_url(l): return "{0}{1}{2}/{3}#{4}".format(get_base(), App_Root, l['chan'], l['t'].strftime(Date_Short_Format), l['idx']) def gen_chanlist(selected_chan, show_all_chans=False): # Get current time now = datetime.now() # Data for channel display : chan_list = [] chan_idx = 0 for chan in Channels: last_time = query_db( '''select t, idx from loglines where chan=%s and idx = (select max(idx) from loglines where chan=%s) ;''', [chan, chan], one=True) last_time_txt = "" last_time_url = "" if last_time != None: span = (now - last_time['t']) days = span.days # Only add to the list if it should be visible, otherwise continue if days > Days_Hide and chan != selected_chan and not show_all_chans: continue hours = span.seconds/3600 minutes = (span.seconds%3600)/60 if days != 0: last_time_txt += '%dd ' % days if hours != 0: last_time_txt += '%dh ' % hours if minutes != 0: last_time_txt += '%dm' % minutes last_time_url = "{0}{1}{2}/{3}#{4}".format( get_base(), App_Root, chan, last_time['t'].strftime(Date_Short_Format), last_time['idx']) chan_list.append({ 'name': chan }) chan_list[chan_idx]['last_time_url'] = last_time_url chan_list[chan_idx]['last_time_txt'] = last_time_txt chan_list[chan_idx]['chan_url'] = "{0}{1}{2}{3}".format( get_base(), App_Root, chan, '/' if chan == Default_Chan else '') chan_idx += 1 return chan_list # HTML Tag Regex tag_regex = re.compile("(<[^>]+>)") # Find the segments of a block of text which constitute HTML tags def get_link_intervals(str): links = [] span = [] for match in tag_regex.finditer(str): span = match.span() links += [span] return links # Highlight all matched tokens in given text def highlight_matches(strings, text): e = '(' + ('|'.join(strings)) + ')' return re.sub(e, r"""\1""", text, flags=re.I) # Highlight matched tokens in the display of a search result logline, # but leave HTML tags alone def highlight_text(strings, text): result = "" last = 0 for i in get_link_intervals(text): i_start, i_end = i result += highlight_matches(strings, text[last:i_start]) result += text[i_start:i_end] # the HTML tag, leave it alone last = i_end result += highlight_matches(strings, text[last:]) # last block return result # Regexps used in format_logline: boxlinks_re = re.compile( '\[\s*]*>[^ <]+\s*\]\[([^\[\]]+)\]') stdlinks_re = re.compile('(http[^ \[\]]+)') # For era 1 ('bitcoin-assets') links : era1_re = re.compile('(\1', payload) # Now also format [link][text] links : payload = re.sub(boxlinks_re, r'\2', payload) # For ancient logs strictly: substitute orig. link with our logger : if l['era'] < 3: payload = re.sub(era1_re, r' ' + speaker # If 'action', annotate: if l['self']: separator = "" payload = "" + payload + "" speaker = "" + speaker + "" # HTMLize the given line : s = ("
" "{1}{7} {4}
").format(l['idx'], speaker, l['t'], line_url(l), payload, bot, dclass, separator) return s # Make above callable from inside htm templater: app.jinja_env.globals.update(format_logline=format_logline) @app.route('/rnd/') def rnd(chan): # Handle rubbish chan: if chan not in Channels: return redirect(url_for('log')) rnd_line = query_db( '''select * from loglines where chan=%s order by random() limit 1 ;''', [chan], one=True) return redirect(line_url(rnd_line)) @app.route('%s/' % App_Root) @app.route('%s' % App_Root, defaults={'date': None}) @app.route('%s' % App_Root, defaults={'chan': Default_Chan, 'date': None}) def log(chan, date): # Handle rubbish chan: if chan not in Channels: return redirect(url_for('log')) # Get possible selection start and end sel_start = request.args.get('ss', default = 0, type = int) sel_end = request.args.get('se', default = 0, type = int) # Get possible 'reverse gear' rev = request.args.get('rev', default = 0, type = int) # Get possible 'show all' show_all = request.args.get('all', default = 0, type = int) # Get current time now = datetime.now() # Whether we are viewing 'current' tail tail = False # If viewing 'current' log: if date == None: date = now.strftime(Date_Short_Format) tail = True # Parse given date, and redirect to default log if rubbish: try: day_start = datetime.strptime(date, Date_Short_Format) except Exception, e: return redirect(url_for('log')) # Determine the end of the interval being shown day_end = day_start + timedelta(days=1) # Enable 'tail' is day_end is after end of current day if day_end > now: tail = True # Get the loglines from DB lines = query_db( '''select * from loglines where chan=%s and t between %s and %s order by idx asc;''', [chan, day_start, day_end], one=False) # Optional 'reverse gear' knob: if rev == 1: lines.reverse() # Generate navbar for the given date: prev_day = "" next_day = "" prev_t = query_db( '''select t from loglines where chan=%s and t < %s order by idx desc limit 1;''', [chan, day_start], one=True) if prev_t != None: prev_day = prev_t['t'].strftime(Date_Short_Format) if not tail: next_t = query_db( '''select t from loglines where chan=%s and t > %s order by idx asc limit 1;''', [chan, day_end], one=True) if next_t != None: next_day = next_t['t'].strftime(Date_Short_Format) # Generate url for css file based on config value css_url = url_for('static', filename=CSS_File) chan_list = gen_chanlist(chan, show_all) # Return the HTMLized text return render_template('log.html', css_url = css_url, app_root = App_Root, chan = chan, chan_list = chan_list, loglines = lines, sel = (sel_start, sel_end), date = date, prev_day = prev_day, next_day = next_day, rev = not rev, idle_day = Days_Hide, show_all = show_all) # Primarily for use with 'era 1' and 'era 2' : # Get arbitrary log item by chan and raw line index @app.route('/ilog//') def ilog(chan, idx): # Handle rubbish chan: if chan not in Channels: return redirect(url_for('log')) # Attempt to locate given chan/idx: item = query_db( '''select * from loglines where chan=%s and idx = %s ;''', [chan, idx], one=True) # If given chan/idx not found: if item == None: return redirect(url_for('log')) # Determine date where item appears in log : item_date = item['t'].strftime(Date_Short_Format) # Go there: return redirect(App_Root + chan + "/" + item_date + "#" + idx) @app.route('/log-raw/') def rawlog(chan): res = "" # Handle rubbish chan: if chan not in Channels: return Response("EGGOG: No such Channel!", mimetype='text/plain') # Get start and end indices: idx_start = request.args.get('istart', default = 0, type = int) idx_end = request.args.get('iend', default = 0, type = int) # Malformed bounds? if idx_start > idx_end: return Response("EGGOG: Start must precede End!", mimetype='text/plain') # Demanded too many in one burst ? if (idx_end - idx_start) > Max_Raw_Ln : return Response("EGGOG: May request Max. of %s Lines !" % Max_Raw_Ln, mimetype='text/plain') # Get the loglines from DB lines = query_db( '''select * from loglines where chan=%s and idx between %s and %s order by idx asc;''', [chan, idx_start, idx_end], one=False) # Retrieve raw lines in classical Phf format: for l in lines: action = "" speaker = "%s;" % l['speaker'] if l['self']: action = "*;" speaker = "%s " % l['speaker'] res += "%s;%s;%s%s%s\n" % (l['idx'], l['t'].strftime('%s'), action, speaker, l['payload']) # Return plain text: return Response(res, mimetype='text/plain') Name_Chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-" def sanitize_speaker(s): return "".join([ch for ch in s if ch in Name_Chars]) def re_escape(s): return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s) # Search knob. Supports 'chan' parameter. @app.route('/log-search') def logsearch(): # The query params: chan = request.args.get('chan', default = Default_Chan, type = str) query = request.args.get('q', default = '', type = str) offset = request.args.get('after', default = 0, type = int) show_all = 0 if len(query) < Min_Query_Length: return redirect(url_for('log')) # channels to search in chans = [] # whether to indicate chan per log line showchan = False if chan == 'all': # search in all logged channels chans = Channels legend = "all logged channels" showchan = True show_all = 1 else: # Handle possible rubbish chan: if chan not in Channels: return redirect(url_for('log')) else: # search in selected channel only chans = [chan] legend = chan nres = 0 searchres = [] tokens_orig = [] search_head = "Query is too short!" # Forbid query that is too short: if len(query) >= Min_Query_Length: # Get the search tokens to use: tokens = query.split() tokens_standard = [] from_users = [] # separate out "from:foo" tokens and ordinary: for t in tokens: if t.startswith("from:") or t.startswith("f:"): from_users.append(t.split(':')[1]) # Record user for 'from' query else: tokens_standard.append(t) from_users = ['%' + sanitize_speaker(t) + '%' for t in from_users] tokens_orig = [re_escape(t) for t in tokens_standard] tokens_formed = ['%' + t + '%' for t in tokens_orig] # Query is usable; perform the search on DB and get the finds if from_users == []: searchres = query_db( '''select * from loglines where chan = any(%s) and payload ilike all(%s) order by t desc limit %s offset %s;''', [(chans,), tokens_formed, Max_Search_Results, offset], one=False) else: searchres = query_db( '''select * from loglines where chan = any(%s) and speaker ilike any(%s) and payload ilike all(%s) order by t desc limit %s offset %s;''', [(chans,), from_users, tokens_formed, Max_Search_Results, offset], one=False) # Number of search results returned in this query nres = len(searchres) # Whether to display 'back' button : back = (offset != 0) # Whether to display 'forward' button : forw = (nres == Max_Search_Results) # Starting index of search results sres = offset # Ending index of search results eres = offset + min(nres, Max_Search_Results) # Generate url for css file based on config value css_url = url_for('static', filename=CSS_File) chan_list = gen_chanlist(chan, show_all) return render_template('searchres.html', css_url = css_url, query = query, hquery = html_escape(query), legend = legend, sres = sres, eres = eres, back = back, forw = forw, psize = Max_Search_Results, chan = chan, chan_list = chan_list, tokens = tokens_orig, loglines = searchres, showchan = showchan, show_all = show_all) # Comment this out if you don't have one @app.route('/favicon.ico') def favicon(): return redirect(url_for('static', filename='favicon.ico')) ## App Mode if __name__ == '__main__': app.run(threaded=True, port=WWW_Port)