#!/usr/bin/python

##############################################################################
import ConfigParser, sys
import psycopg2, psycopg2.extras
import psycopg2.extensions
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
import time
import datetime
from datetime import timedelta
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import os
import threading
import re
from datetime import datetime
from urlparse import urljoin
from flask import Flask, request, session, url_for, redirect, Response, \
     render_template, abort, g, flash, _app_ctx_stack, make_response, \
     jsonify
from flask import Flask
##############################################################################

##############################################################################
# Single mandatory arg: config file path

if len(sys.argv[1:]) != 1:
    # Default path for WSGI use (change to yours) :
    config_path = "/home/nsabot/logger/nsabot.conf"
else:
    # Read Config from given conf file
    config_path = sys.argv[1]

#config_path = os.path.abspath(config_path)
cfg = ConfigParser.ConfigParser()
cfg.readfp(open(config_path))

try:
    # IRCism:
    Nick     = cfg.get("irc", "nick")
    Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')]
    Bots     = [x.strip() for x in cfg.get("logotron", "bots").split(',')]
    Bots.append(Nick) # Add our own bot to the bot list
    # DBism:
    DB_Name  = cfg.get("db", "db_name")
    DB_User  = cfg.get("db", "db_user")
    DB_DEBUG = int(cfg.get("db", "db_debug"))
    # Logism:
    Base_URL = cfg.get("logotron", "base_url")
    Era      = int(cfg.get("logotron", "era"))
    DEBUG    = int(cfg.get("logotron", "www_dbg"))
    Max_Raw_Ln = int(cfg.get("logotron", "max_raw"))
    Days_Hide = int(cfg.get("logotron", "days_hide"))
    # WWW:
    WWW_Port = int(cfg.get("logotron", "www_port"))
    Max_Search_Results = int(cfg.get("logotron", "max_search"))

except Exception as e:
    print "Invalid config: ", e
    exit(1)

##############################################################################

##############################################################################
### Knobs not made into config yet ###
Default_Chan       = Channels[0]
Min_Query_Length   = 3

## Format for Date in Log Lines
Date_Short_Format  = "%Y-%m-%d"
##############################################################################

app = Flask(__name__)
app.config.from_object(__name__)

def get_db():
    db = getattr(g, 'db', None)
    if db is None:
        db = g.db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User))
    return db

def close_db():
    if hasattr(g, 'db'):
        g.db.close()

@app.before_request
def before_request():
    g.db = get_db()

@app.teardown_request
def teardown_request(exception):
    close_db()

def query_db(query, args=(), one=False):
    cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    if (DB_DEBUG): print "query: '{0}'".format(query)
    cur.execute(query, args)
    rv = cur.fetchone() if one else cur.fetchall()
    if (DB_DEBUG): print "query res: '{0}'".format(rv)
    return rv

##############################################################################

## All eggogs redirect to main page
@app.errorhandler(404)
def page_not_found(error):
    return redirect(url_for('log'))

##############################################################################

html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;",
    ">": "&gt;",
    "<": "&lt;",
}

def html_escape(text):
    return "".join(html_escape_table.get(c,c) for c in text)

##############################################################################

## Get base URL
def get_base():
    if DEBUG:
        return request.host_url
    return Base_URL


# Get perma-URL corresponding to given log line
def line_url(l):
    return "{0}log/{1}/{2}#{3}".format(get_base(),
                                       l['chan'],
                                       l['t'].strftime(Date_Short_Format),
                                       l['idx'])

def gen_chanlist(selected_chan, show_all_chans=False):
    # Get current time
    now = datetime.now()
    # Data for channel display :
    chan_tbl = {}
    for chan in Channels:
        chan_tbl[chan] = {}
        chan_tbl[chan]['show'] = False
        
        chan_formed = chan
        if chan == selected_chan:
            chan_formed = "<span class='highlight'>" + chan + "</span>"
        
        chan_tbl[chan]['link'] = """<a href="{0}log/{1}"><b>{2}</b></a>""".format(
            get_base(), chan, chan_formed)
        
        last_time = query_db(
            '''select t, idx from loglines where chan=%s
            and idx = (select max(idx) from loglines where chan=%s) ;''',
            [chan, chan], one=True)

        last_time_txt = ""
        time_field = ""
        if last_time != None:
            span = (now - last_time['t'])
            days = span.days
            hours = span.seconds/3600
            minutes = (span.seconds%3600)/60

            if days != 0:
                last_time_txt += '%dd ' % days
            if hours != 0:
                last_time_txt += '%dh ' % hours
            if minutes != 0:
                last_time_txt += '%dm' % minutes
            
            time_field = """<i><a href="{0}log/{1}/{2}#{3}">{4}</a></i>""".format(
                get_base(),
                chan,
                last_time['t'].strftime(Date_Short_Format),
                last_time['idx'],
                last_time_txt)

            if (days <= Days_Hide) or (chan == selected_chan) or show_all_chans:
                chan_tbl[chan]['show'] = True

        chan_tbl[chan]['time'] = time_field
    
    ## Generate channel selector bar :
    s = """<table align="center" class="chantable"><tr>"""
    for chan in Channels:
        if chan_tbl[chan]['show']:
            s += """<th>{0}</th>""".format(chan_tbl[chan]['link'])
    s += "</tr><tr>"
    ## Generate last-activ. links for above :
    for chan in Channels:
        if chan_tbl[chan]['show']:
            s += """<td>{0}</td>""".format(chan_tbl[chan]['time'])
    # wrap up:
    s += "</tr></table>"
    return s


# Make above callable from inside htm templater:
app.jinja_env.globals.update(gen_chanlist=gen_chanlist)


# HTML Tag Regex
tag_regex = re.compile("(<[^>]+>)")


# Find the segments of a block of text which constitute HTML tags
def get_link_intervals(str):
    links = []
    span = []
    for match in tag_regex.finditer(str):
        span = match.span()
        links += [span]
    return links


# Highlight all matched tokens in given text
def highlight_matches(strings, text):
    e = '(' + ('|'.join(strings)) + ')'
    return re.sub(e,
                  r"""<span class='highlight'>\1</span>""",
                  text,
                  flags=re.I)


# Highlight matched tokens in the display of a search result logline,
# but leave HTML tags alone
def highlight_text(strings, text):
    result = ""
    last = 0
    for i in get_link_intervals(text):
        i_start, i_end = i
        result += highlight_matches(strings, text[last:i_start])
        result += text[i_start:i_end] # the HTML tag, leave it alone
        last = i_end
    result += highlight_matches(strings, text[last:]) # last block
    return result


# Regexps used in format_logline:
boxlinks_re = re.compile(
    '\[\s*<a href="(http[^ \[\]]+)"[^>]*>[^ <]+</a>\s*\]\[([^\[\]]+)\]')

stdlinks_re = re.compile('(http[^ \[\]]+)')

# For era 1 ('bitcoin-assets') links :
era1_re = re.compile('(<a href="http[^ \[\]]\/\/log\d*\.bitcoin-assets\.com\/\?date=\d+-\d+-\d+#)(\d+)')

# For era 2 ('btcbase') links :
era2_re = re.compile('(<a href="http[^ \[\]]\/\/btcbase\.org\/log\/\d+-\d+-\d+#)(\d+)')


## Format given log line for display
def format_logline(l, highlights = [], select=[], showchan=False):
    payload = html_escape(l['payload'])
    
    # Format ordinary links:
    payload = re.sub(stdlinks_re,
                     r'<a href="\1" target=\'_blank\'>\1</a>', payload)

    # Now also format [link][text] links :
    payload = re.sub(boxlinks_re,
                     r'<a href="\1"  target=\'_blank\'>\2</a>', payload)
    
    # For ancient logs strictly: substitute orig. link with our logger :
    if l['era'] < 3:
        payload = re.sub(era1_re,
                         r'<a href="/ilog/{0}/\2'.format(l['chan']),
                         payload)
    
    # Adjust era 2 links in all cases:
    payload = re.sub(era2_re,
                     r'<a href="/ilog/{0}/\2'.format(l['chan']),
                     payload)
    
    # If this is a search result, illuminate the matched strings:
    if highlights != []:
        payload = highlight_text(highlights, payload)
    
    bot = ""
    if l['speaker'] in Bots:
        bot = " bot"

    # default -- no selection
    dclass = l['speaker']

    # If selection is given:
    if select != []:
        ss, se = select
        if ss <= l['idx'] <= se:
            dclass = "highlight"

    speaker = l['speaker']
    separator = ":"

    if showchan:
        speaker = '<small>(' + l['chan'] + ')</small> ' + speaker
    
    # If 'action', annotate:
    if l['self']:
        separator = ""
        payload = "<i>" + payload + "</i>"
        speaker = "<i>" + speaker + "</i>"
    
    # HTMLize the given line :
    s = ("<div id='{0}' class='{6}{5}'>"
         "<a class='nick' title='{2}'"
         " href=\"{3}\">{1}</a>{7} {4}</div>").format(l['idx'],
                                                      speaker,
                                                      l['t'],
                                                      line_url(l),
                                                      payload,
                                                      bot,
                                                      dclass,
                                                      separator)
    return s

# Make above callable from inside htm templater:
app.jinja_env.globals.update(format_logline=format_logline) 


@app.route('/rnd/<chan>')
def rnd(chan):
    # Handle rubbish chan:
    if chan not in Channels:
        return redirect(url_for('log'))

    rnd_line = query_db(
        '''select * from loglines where chan=%s
        order by random() limit 1 ;''',
        [chan], one=True)

    return redirect(line_url(rnd_line))


@app.route('/log/<chan>/<date>')
@app.route('/log/<chan>', defaults={'date': None})
@app.route('/log/', defaults={'chan': Default_Chan, 'date': None})
@app.route('/log', defaults={'chan': Default_Chan, 'date': None})
def log(chan, date):
    # Handle rubbish chan:
    if chan not in Channels:
        return redirect(url_for('log'))

    # Get possible selection start and end
    sel_start = request.args.get('ss', default = 0, type = int)
    sel_end = request.args.get('se', default = 0, type = int)

    # Get possible 'reverse gear'
    rev = request.args.get('rev', default = 0, type = int)

    # Get possible 'show all'
    show_all = request.args.get('all', default = 0, type = int)
    
    # Get current time
    now = datetime.now()

    # Whether we are viewing 'current' tail
    tail = False
    
    # If viewing 'current' log:
    if date == None:
        date = now.strftime(Date_Short_Format)
        tail = True

    # Parse given date, and redirect to default log if rubbish:
    try:
        day_start = datetime.strptime(date, Date_Short_Format)
    except Exception, e:
        return redirect(url_for('log'))
    
    # Determine the end of the interval being shown
    day_end = day_start + timedelta(days=1)

    # Enable 'tail' is day_end is after end of current day
    if day_end > now:
        tail = True

    # Get the loglines from DB
    lines = query_db(
        '''select * from loglines where chan=%s
        and t between %s and %s order by idx asc;''',
                     [chan, day_start, day_end], one=False)
    
    # Optional 'reverse gear' knob:
    if rev == 1:
        lines.reverse()
    
    # Generate navbar for the given date:
    prev_day = ""
    next_day = ""
    
    prev_t = query_db(
        '''select t from loglines where chan=%s
        and t < %s order by idx desc limit 1;''',
        [chan, day_start], one=True)
    
    if prev_t != None:
        prev_day = prev_t['t'].strftime(Date_Short_Format)
    
    if not tail:
        next_t = query_db(
            '''select t from loglines where chan=%s
            and t > %s order by idx asc limit 1;''',
            [chan, day_end], one=True)
        
        if next_t != None:
            next_day = next_t['t'].strftime(Date_Short_Format)
    
    # Return the HTMLized text
    return render_template('log.html',
                           chan     = chan,
                           loglines = lines,
                           sel      = (sel_start, sel_end),
                           date     = date,
                           prev_day = prev_day,
                           next_day = next_day,
                           rev      = not rev,
                           show_all = show_all,
                           idle_day = Days_Hide)


# Primarily for use with 'era 1' and 'era 2' :
# Get arbitrary log item by chan and raw line index
@app.route('/ilog/<chan>/<idx>')
def ilog(chan, idx):
    # Handle rubbish chan:
    if chan not in Channels:
        return redirect(url_for('log'))

    # Attempt to locate given chan/idx:
    item = query_db(
        '''select * from loglines where chan=%s and idx = %s ;''',
        [chan, idx], one=True)

    # If given chan/idx not found:
    if item == None:
        return redirect(url_for('log'))
    
    # Determine date where item appears in log :
    item_date = item['t'].strftime(Date_Short_Format)
    
    # Go there:
    return redirect("/log/" + chan + "/" + item_date + "#" + idx)
    

@app.route('/log-raw/<chan>')
def rawlog(chan):
    res = ""
    
    # Handle rubbish chan:
    if chan not in Channels:
        return Response("EGGOG: No such Channel!", mimetype='text/plain')
    
    # Get start and end indices:
    idx_start = request.args.get('istart', default = 0, type = int)
    idx_end = request.args.get('iend', default = 0, type = int)
    
    # Malformed bounds?
    if idx_start > idx_end:
        return Response("EGGOG: Start must precede End!",
                        mimetype='text/plain')

    # Demanded too many in one burst ?
    if (idx_end - idx_start) > Max_Raw_Ln :
        return Response("EGGOG: May request Max. of %s Lines !" % Max_Raw_Ln,
                        mimetype='text/plain')

    # Get the loglines from DB
    lines = query_db(
        '''select * from loglines where chan=%s
        and idx between %s and %s order by idx asc;''',
                     [chan, idx_start, idx_end], one=False)
    
    # Retrieve raw lines in classical Phf format:
    for l in lines:
        action = ""
        speaker = "%s;" % l['speaker']
        if l['self']:
            action = "*;"
            speaker = "%s " % l['speaker']
        res += "%s;%s;%s%s%s\n" % (l['idx'],
                                    l['t'].strftime('%s'),
                                    action,
                                    speaker,
                                    l['payload'])
    
    # Return plain text:
    return Response(res, mimetype='text/plain')


Name_Chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"

def sanitize_speaker(s):
    return "".join([ch for ch in s if ch in Name_Chars])


def re_escape(s):
    return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s)

# Search knob. Supports 'chan' parameter.
@app.route('/log-search')
def logsearch():
    # The query params:
    chan = request.args.get('chan', default = Default_Chan, type = str)
    query = request.args.get('q', default = '', type = str)
    offset = request.args.get('after', default = 0, type = int)
    
    # channels to search in
    chans = []

    # whether to indicate chan per log line
    showchan = False

    if chan == 'all':
        # search in all logged channels
        chans = Channels
        legend = "<i>all logged channels</i>"
        showchan = True
    else:
        # Handle possible rubbish chan:
        if chan not in Channels:
            return redirect(url_for('log'))
        else:
            # search in selected channel only
            chans = [chan]
            legend = chan
    
    nres = 0
    searchres   = []
    tokens_orig = []
    search_head = "Query is too short!"
    # Forbid query that is too short:
    if len(query) >= Min_Query_Length:
        # Get the search tokens to use:
        tokens = query.split()
        tokens_standard = []
        from_users = []

        # separate out "from:foo" tokens and ordinary:
        for t in tokens:
            if t.startswith("from:") or t.startswith("f:"):
                from_users.append(t.split(':')[1]) # Record user for 'from' query
            else:
                tokens_standard.append(t)
        
        from_users = ['%' + sanitize_speaker(t) + '%' for t in from_users]
        tokens_orig = [re_escape(t) for t in tokens_standard]
        tokens_formed = ['%' + t + '%' for t in tokens_orig]
        
        # Query is usable; perform the search on DB and get the finds
        if from_users == []:
            searchres = query_db(
                '''select * from loglines where chan = any(%s)
                and payload ilike all(%s) order by t desc
                limit %s offset %s;''',
                [(chans,),
                 tokens_formed,
                 Max_Search_Results,
                 offset], one=False)
        else:
            searchres = query_db(
                '''select * from loglines where chan = any(%s)
                and speaker ilike any(%s)
                and payload ilike all(%s) order by t desc
                limit %s offset %s;''',
                [(chans,),
                 from_users,
                 tokens_formed,
                 Max_Search_Results,
                 offset], one=False)
        
        # Number of search results returned in this query
        nres = len(searchres)

        # Whether to display 'back' button :
        back = (offset != 0)
        
        # Whether to display 'forward' button :
        forw = (nres == Max_Search_Results)
        
        # Starting index of search results
        sres = offset
        
        # Ending index of search results
        eres = offset + min(nres, Max_Search_Results)
        
    # No paging support just yet:
    return render_template('searchres.html',
                           query       = query,
                           hquery      = html_escape(query),
                           legend      = legend,
                           sres        = sres,
                           eres        = eres,
                           back        = back,
                           forw        = forw,
                           psize       = Max_Search_Results,
                           chan        = chan,
                           tokens      = tokens_orig,
                           loglines    = searchres,
                           showchan    = showchan)

# Comment this out if you don't have one
@app.route('/favicon.ico')
def favicon():
    return redirect(url_for('static', filename='favicon.ico'))


## App Mode
if __name__ == '__main__':
    app.run(threaded=True, port=WWW_Port)