#!/usr/bin/python ############################################################################## import ConfigParser, sys import psycopg2, psycopg2.extras import psycopg2.extensions psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) import time import datetime from datetime import timedelta import sys reload(sys) sys.setdefaultencoding('utf8') import os import threading import re from datetime import datetime from urlparse import urljoin from flask import Flask, request, session, url_for, redirect, \ render_template, abort, g, flash, _app_ctx_stack, make_response, \ jsonify from flask import Flask from flask.ext.cache import Cache ############################################################################## ############################################################################## # Single mandatory arg: config file path if len(sys.argv[1:]) != 1: # If no args, print usage and exit: print sys.argv[0] + " CONFIG" exit(0) # Read Config from given conf file config_path = os.path.abspath(sys.argv[1]) cfg = ConfigParser.ConfigParser() cfg.readfp(open(config_path)) try: # IRCism: Nick = cfg.get("irc", "nick") Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')] Bots = [x.strip() for x in cfg.get("logotron", "bots").split(',')] Bots.append(Nick) # Add our own bot to the bot list # DBism: DB_Name = cfg.get("db", "db_name") DB_User = cfg.get("db", "db_user") DB_DEBUG = cfg.get("db", "db_debug") # Logism: Base_URL = cfg.get("logotron", "base_url") Era = int(cfg.get("logotron", "era")) # WWW: WWW_Port = int(cfg.get("logotron", "www_port")) except Exception as e: print "Invalid config: ", e exit(1) ############################################################################## ############################################################################## ### Knobs not made into config yet ### Default_Chan = Channels[0] Min_Query_Length = 3 Max_Search_Results = 1000 ## Format for Date in Log Lines Date_Short_Format = "%Y-%m-%d" ## WWW Debug Knob DEBUG = False ############################################################################## app = Flask(__name__) cache = Cache(app,config={'CACHE_TYPE': 'simple'}) app.config.from_object(__name__) def get_db(): db = getattr(g, 'db', None) if db is None: db = g.db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User)) return db def close_db(): if hasattr(g, 'db'): g.db.close() @app.before_request def before_request(): g.db = get_db() @app.teardown_request def teardown_request(exception): close_db() def query_db(query, args=(), one=False): cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) if (DB_DEBUG): print "query: '{0}'".format(query) cur.execute(query, args) rv = cur.fetchone() if one else cur.fetchall() if (DB_DEBUG): print "query res: '{0}'".format(rv) return rv def exec_db(query, args=()): cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) if (DB_DEBUG): print "query: '{0}'".format(query) if (DB_DEBUG): print "args: '{0}'".format(args) if (DB_DEBUG): print "EXEC:" cur.execute(query, args) def getlast_db(): cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) cur.execute('select lastval()') return cur.fetchone()['lastval'] def commit_db(): cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) g.db.commit() ############################################################################## ## All eggogs redirect to main page @app.errorhandler(404) def page_not_found(error): return redirect(url_for('log')) ############################################################################## html_escape_table = { "&": "&", '"': """, "'": "'", ">": ">", "<": "<", } def html_escape(text): return "".join(html_escape_table.get(c,c) for c in text) ############################################################################## ## Get base URL def get_base(): if DEBUG: return request.host_url return Base_URL # Get perma-URL corresponding to given log line def line_url(l): return "{0}log/{1}/{2}#{3}".format(get_base(), l['chan'], l['t'].strftime(Date_Short_Format), l['idx']) def gen_chanlist(selected_chan): # Get current time now = datetime.now() s = """""" for chan in Channels: chan_formed = chan if chan == selected_chan: chan_formed = "" + chan + "" s += """""".format( get_base(), chan, chan_formed) s += "" for chan in Channels: last_time = query_db( '''select t, idx from loglines where chan=%s and idx = (select max(idx) from loglines where chan=%s) ;''', [chan, chan], one=True) last_time_txt = "" if last_time != None: span = (now - last_time['t']) days = span.days hours = span.seconds/3600 minutes = (span.seconds%3600)/60 if days != 0: last_time_txt += '%dd ' % days if hours != 0: last_time_txt += '%dh ' % hours if minutes != 0: last_time_txt += '%dm' % minutes s += """""".format( get_base(), chan, last_time['t'].strftime(Date_Short_Format), last_time['idx'], last_time_txt) else: last_time_txt = "" s += "" s += "
{2}
{4}
" return s # Make above callable from inside htm templater: app.jinja_env.globals.update(gen_chanlist=gen_chanlist) # HTML Tag Regex tag_regex = re.compile("(<[^>]+>)") # Find the segments of a block of text which constitute HTML tags def get_link_intervals(str): links = [] span = [] for match in tag_regex.finditer(str): span = match.span() links += [span] return links # Highlight all matched tokens in given text def highlight_matches(strings, text): e = '(' + ('|'.join(strings)) + ')' return re.sub(e, r"""\1""", text, flags=re.I) # Highlight matched tokens in the display of a search result logline, # but leave HTML tags alone def highlight_text(strings, text): result = "" last = 0 for i in get_link_intervals(text): i_start, i_end = i result += highlight_matches(strings, text[last:i_start]) result += text[i_start:i_end] # the HTML tag, leave it alone last = i_end result += highlight_matches(strings, text[last:]) # last block return result # Regexps used in format_logline: boxlinks_re = re.compile('\[\s*[^ <]+\s*\]\[([^\[\]]+)\]') stdlinks_re = re.compile('(http[^ \[\]]+)') ## Format given log line for display def format_logline(l, highlights = []): payload = html_escape(l['payload']) # Format ordinary links: payload = re.sub(stdlinks_re, r'\1', payload) # Now also format [link][text] links : payload = re.sub(boxlinks_re, r'\2', payload) # If this is a search result, illuminate the matched strings: if highlights != []: payload = highlight_text(highlights, payload) bot = "" if l['speaker'] in Bots: bot = " bot" # HTMLize the given line : s = ("
" "{1}: {4}
").format(l['idx'], l['speaker'], l['t'], line_url(l), payload, bot) return s # Make above callable from inside htm templater: app.jinja_env.globals.update(format_logline=format_logline) # Generate navbar for the given date: def generate_navbar(date, tail, chan): cur_day = datetime.strptime(date, Date_Short_Format) prev_day = cur_day - timedelta(days=1) prev_day_txt = prev_day.strftime(Date_Short_Format) s = "← {2}".format( get_base(), chan, prev_day_txt) if not tail: next_day = cur_day + timedelta(days=1) next_day_txt = next_day.strftime(Date_Short_Format) s = s + " | {2} →".format( get_base(), chan, next_day_txt) return s # Make above callable from inside htm templater: app.jinja_env.globals.update(generate_navbar=generate_navbar) @app.route('/log//') @app.route('/log/', defaults={'date': None}) @app.route('/log/', defaults={'chan': Default_Chan, 'date': None}) @app.route('/log', defaults={'chan': Default_Chan, 'date': None}) def log(chan, date): # Handle rubbish chan: if chan not in Channels: return redirect(url_for('log')) # Get current time now = datetime.now() # Whether we are viewing 'current' tail tail = False # If viewing 'current' log: if date == None: date = now.strftime(Date_Short_Format) tail = True # Parse given date, and redirect to default log if rubbish: try: day_start = datetime.strptime(date, Date_Short_Format) except Exception, e: return redirect(url_for('log')) # Determine the end of the interval being shown day_end = day_start + timedelta(days=1) # Get the loglines from DB lines = query_db( '''select * from loglines where chan=%s and t between %s and %s order by idx asc;''', [chan, day_start, day_end], one=False) # Return the HTMLized text return render_template('log.html', chan = chan, loglines = lines, date = date, tail = tail) Name_Chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-" def sanitize_speaker(s): return "".join([ch for ch in s if ch in Name_Chars]) def re_escape(s): return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s) # Search knob. Supports 'chan' parameter. @app.route('/log-search') def logsearch(): # The query params: chan = request.args.get('chan', default = Default_Chan, type = str) query = request.args.get('q', default = '', type = str) # page_num = request.args.get('page', default = 0, type = int) # Handle rubbish chan: if chan not in Channels: return redirect(url_for('log')) nres = 0 searchres = [] tokens_orig = [] search_head = "Query is too short!" # Forbid query that is too short: if len(query) >= Min_Query_Length: # Get the search tokens to use: tokens = query.split() tokens_standard = [] from_users = [] # separate out "from:foo" tokens and ordinary: for t in tokens: if t.startswith("from:") or t.startswith("f:"): from_users.append(t.split(':')[1]) # Record user for 'from' query else: tokens_standard.append(t) from_users = ['%' + sanitize_speaker(t) + '%' for t in from_users] tokens_orig = [re_escape(t) for t in tokens_standard] tokens_formed = ['%' + t + '%' for t in tokens_orig] # Query is usable; perform the search on DB and get the finds if from_users == []: searchres = query_db( '''select * from loglines where chan=%s and payload ilike all(%s) order by idx desc limit %s;''', [chan, tokens_formed, Max_Search_Results], one=False) else: print "from=", from_users searchres = query_db( '''select * from loglines where chan=%s and speaker ilike any(%s) and payload ilike all(%s) order by idx desc limit %s;''', [chan, from_users, tokens_formed, Max_Search_Results], one=False) # Number of entries found nres = len(searchres) search_head = "{0} entries found in {1} for '{2}' :".format( nres, chan, html_escape(query)) # No paging support just yet: return render_template('searchres.html', query = query, nres = nres, chan = chan, search_head = search_head, tokens = tokens_orig, loglines = searchres) # Comment this out if you don't have one @app.route('/favicon.ico') def favicon(): return redirect(url_for('static', filename='favicon.ico')) ## App Mode if __name__ == '__main__': app.run(threaded=True, port=WWW_Port)