#!/usr/bin/python

##############################################################################
import ConfigParser, sys
import psycopg2, psycopg2.extras
import psycopg2.extensions
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
import time
from datetime import timedelta, datetime
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import os
import threading
import re
from urlparse import urljoin
from flask import Flask, request, session, url_for, redirect, Response, \
     render_template, abort, g, flash, _app_ctx_stack, make_response, \
     jsonify
##############################################################################

##############################################################################
# Single mandatory arg: config file path

if len(sys.argv[1:]) != 1:
    # Default path for WSGI use (change to yours) :
    config_path = "/home/nsabot/logger/nsabot.conf"
else:
    # Read Config from given conf file
    config_path = sys.argv[1]

#config_path = os.path.abspath(config_path)
cfg = ConfigParser.ConfigParser()
cfg.readfp(open(config_path))

try:
    # IRCism:
    Nick     = cfg.get("irc", "nick")
    Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')]
    Bots     = [x.strip() for x in cfg.get("logotron", "bots").split(',')]
    Bots.append(Nick) # Add our own bot to the bot list
    # DBism:
    DB_Name  = cfg.get("db", "db_name")
    DB_User  = cfg.get("db", "db_user")
    DB_DEBUG = int(cfg.get("db", "db_debug"))
    # Logism:
    Base_URL = cfg.get("logotron", "base_url")
    App_Root = cfg.get("logotron", "app_root")
    CSS_File = cfg.get("logotron", "css_file")
    Era      = int(cfg.get("logotron", "era"))
    DEBUG    = int(cfg.get("logotron", "www_dbg"))
    Max_Raw_Ln = int(cfg.get("logotron", "max_raw"))
    Days_Hide = int(cfg.get("logotron", "days_hide"))
    # WWW:
    WWW_Port = int(cfg.get("logotron", "www_port"))
    Max_Search_Results = int(cfg.get("logotron", "max_search"))

except Exception as e:
    print "Invalid config: ", e
    exit(1)

##############################################################################

##############################################################################
### Knobs not made into config yet ###
Default_Chan       = Channels[0]
Min_Query_Length   = 3

## Format for Date in Log Lines
Date_Short_Format  = "%Y-%m-%d"
##############################################################################

app = Flask(__name__)
app.config.from_object(__name__)

def get_db():
    db = getattr(g, 'db', None)
    if db is None:
        db = g.db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User))
    return db

def close_db():
    if hasattr(g, 'db'):
        g.db.close()

@app.before_request
def before_request():
    g.db = get_db()

@app.teardown_request
def teardown_request(exception):
    close_db()

def query_db(query, args=(), one=False):
    cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
    if (DB_DEBUG): print "query: '{0}'".format(query)
    cur.execute(query, args)
    rv = cur.fetchone() if one else cur.fetchall()
    if (DB_DEBUG): print "query res: '{0}'".format(rv)
    return rv

##############################################################################

## All eggogs redirect to main page
@app.errorhandler(404)
def page_not_found(error):
    return redirect(url_for('log'))

##############################################################################

html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;",
    ">": "&gt;",
    "<": "&lt;",
}

def html_escape(text):
    return "".join(html_escape_table.get(c,c) for c in text)

##############################################################################

## Get base URL
def get_base():
    if DEBUG:
        return request.host_url.rstrip('/')
    return Base_URL.rstrip('/')

# Get perma-URL corresponding to given log line
def line_url(l):
    return "{0}{1}{2}/{3}#{4}".format(get_base(),
                                       App_Root,
                                       l['chan'],
                                       l['t'].strftime(Date_Short_Format),
                                       l['idx'])

def gen_chanlist(selected_chan, show_all_chans=False):
    # Get current time
    now = datetime.now()
    # Data for channel display :
    chan_list = []
    chan_idx = 0
    for chan in Channels:
        last_time = query_db(
            '''select t, idx from loglines where chan=%s
            and idx = (select max(idx) from loglines where chan=%s) ;''',
            [chan, chan], one=True)

        last_time_txt = ""
        last_time_url = ""
        if last_time != None:
            span = (now - last_time['t'])
            days = span.days

            # Only add to the list if it should be visible, otherwise continue
            if days > Days_Hide and chan != selected_chan and not show_all_chans:
                continue

            hours = span.seconds/3600
            minutes = (span.seconds%3600)/60

            if days != 0:
                last_time_txt += '%dd ' % days
            if hours != 0:
                last_time_txt += '%dh ' % hours
            if minutes != 0:
                last_time_txt += '%dm' % minutes

            last_time_url = "{0}{1}{2}/{3}#{4}".format(
                get_base(),
                App_Root,
                chan,
                last_time['t'].strftime(Date_Short_Format),
                last_time['idx'])

        chan_list.append({ 'name': chan })

        chan_list[chan_idx]['last_time_url'] = last_time_url
        chan_list[chan_idx]['last_time_txt'] = last_time_txt
        chan_list[chan_idx]['chan_url'] = "{0}{1}{2}{3}".format(
            get_base(), App_Root, chan, '/' if chan == Default_Chan else '')

        chan_idx += 1

    return chan_list


# HTML Tag Regex
tag_regex = re.compile("(<[^>]+>)")


# Find the segments of a block of text which constitute HTML tags
def get_link_intervals(str):
    links = []
    span = []
    for match in tag_regex.finditer(str):
        span = match.span()
        links += [span]
    return links


# Highlight all matched tokens in given text
def highlight_matches(strings, text):
    e = '(' + ('|'.join(strings)) + ')'
    return re.sub(e,
                  r"""<span class='highlight'>\1</span>""",
                  text,
                  flags=re.I)


# Highlight matched tokens in the display of a search result logline,
# but leave HTML tags alone
def highlight_text(strings, text):
    result = ""
    last = 0
    for i in get_link_intervals(text):
        i_start, i_end = i
        result += highlight_matches(strings, text[last:i_start])
        result += text[i_start:i_end] # the HTML tag, leave it alone
        last = i_end
    result += highlight_matches(strings, text[last:]) # last block
    return result


# Regexps used in format_logline:
boxlinks_re = re.compile(
    '\[\s*<a href="(http[^ \[\]]+)"[^>]*>[^ <]+</a>\s*\]\[([^\[\]]+)\]')

stdlinks_re = re.compile('(http[^ \[\]]+)')

# For era 1 ('bitcoin-assets') links :
era1_re = re.compile('(<a href="http[^ \[\]]\/\/log\d*\.bitcoin-assets\.com\/+\?date=\d+-\d+-\d+#)(\d+)')

# For era 2 ('btcbase') links :
era2_re = re.compile('(<a href="http[^ \[\]]\/\/btcbase\.org\/log\/\d+-\d+-\d+#)(\d+)')


## Format given log line for display
def format_logline(l, highlights = [], select=[], showchan=False):
    payload = html_escape(l['payload'])

    # Format ordinary links:
    payload = re.sub(stdlinks_re,
                     r'<a href="\1" target=\'_blank\'>\1</a>', payload)

    # Now also format [link][text] links :
    payload = re.sub(boxlinks_re,
                     r'<a href="\1"  target=\'_blank\'>\2</a>', payload)

    # For ancient logs strictly: substitute orig. link with our logger :
    if l['era'] < 3:
        payload = re.sub(era1_re,
                         r'<a href="/ilog/trilema/\2', payload)

    # Adjust era 2 links in all cases:
    payload = re.sub(era2_re,
                     r'<a href="/ilog/trilema/\2', payload)

    # If this is a search result, illuminate the matched strings:
    if highlights != []:
        payload = highlight_text(highlights, payload)

    bot = ""
    if l['speaker'] in Bots:
        bot = " bot"

    # default -- no selection
    dclass = l['speaker']

    # If selection is given:
    if select != []:
        ss, se = select
        if ss <= l['idx'] <= se:
            dclass = "highlight"

    speaker = l['speaker']
    separator = ":"

    if showchan:
        speaker = '<small>(' + l['chan'] + ')</small> ' + speaker

    # If 'action', annotate:
    if l['self']:
        separator = ""
        payload = "<i>" + payload + "</i>"
        speaker = "<i>" + speaker + "</i>"

    # HTMLize the given line :
    s = ("<div id='{0}' class='logline {6}{5}'>"
         "<a class='nick' title='{2}'"
         " href=\"{3}\">{1}</a>{7} {4}</div>").format(l['idx'],
                                                      speaker,
                                                      l['t'],
                                                      line_url(l),
                                                      payload,
                                                      bot,
                                                      dclass,
                                                      separator)
    return s

# Make above callable from inside htm templater:
app.jinja_env.globals.update(format_logline=format_logline)


@app.route('/rnd/<chan>')
def rnd(chan):
    # Handle rubbish chan:
    if chan not in Channels:
        return redirect(url_for('log'))

    rnd_line = query_db(
        '''select * from loglines where chan=%s
        order by random() limit 1 ;''',
        [chan], one=True)

    return redirect(line_url(rnd_line))


@app.route('%s<chan>/<date>' % App_Root)
@app.route('%s<chan>' % App_Root, defaults={'date': None})
@app.route('%s' % App_Root, defaults={'chan': Default_Chan, 'date': None})
def log(chan, date):
    # Handle rubbish chan:
    if chan not in Channels:
        return redirect(url_for('log'))

    # Get possible selection start and end
    sel_start = request.args.get('ss', default = 0, type = int)
    sel_end = request.args.get('se', default = 0, type = int)

    # Get possible 'reverse gear'
    rev = request.args.get('rev', default = 0, type = int)

    # Get possible 'show all'
    show_all = request.args.get('all', default = 0, type = int)

    # Get current time
    now = datetime.now()

    # Whether we are viewing 'current' tail
    tail = False

    # If viewing 'current' log:
    if date == None:
        date = now.strftime(Date_Short_Format)
        tail = True

    # Parse given date, and redirect to default log if rubbish:
    try:
        day_start = datetime.strptime(date, Date_Short_Format)
    except Exception, e:
        return redirect(url_for('log'))

    # Determine the end of the interval being shown
    day_end = day_start + timedelta(days=1)

    # Enable 'tail' is day_end is after end of current day
    if day_end > now:
        tail = True

    # Get the loglines from DB
    lines = query_db(
        '''select * from loglines where chan=%s
        and t between %s and %s order by idx asc;''',
                     [chan, day_start, day_end], one=False)

    # Optional 'reverse gear' knob:
    if rev == 1:
        lines.reverse()

    # Generate navbar for the given date:
    prev_day = ""
    next_day = ""

    prev_t = query_db(
        '''select t from loglines where chan=%s
        and t < %s order by idx desc limit 1;''',
        [chan, day_start], one=True)

    if prev_t != None:
        prev_day = prev_t['t'].strftime(Date_Short_Format)

    if not tail:
        next_t = query_db(
            '''select t from loglines where chan=%s
            and t > %s order by idx asc limit 1;''',
            [chan, day_end], one=True)

        if next_t != None:
            next_day = next_t['t'].strftime(Date_Short_Format)

    # Generate url for css file based on config value
    css_url = url_for('static', filename=CSS_File)

    chan_list = gen_chanlist(chan, show_all)

    # Return the HTMLized text
    return render_template('log.html',
                           css_url   = css_url,
                           app_root  = App_Root,
                           chan      = chan,
                           chan_list = chan_list,
                           loglines  = lines,
                           sel       = (sel_start, sel_end),
                           date      = date,
                           prev_day  = prev_day,
                           next_day  = next_day,
                           rev       = not rev,
                           idle_day  = Days_Hide,
                           show_all  = show_all)


# Primarily for use with 'era 1' and 'era 2' :
# Get arbitrary log item by chan and raw line index
@app.route('/ilog/<chan>/<idx>')
def ilog(chan, idx):
    # Handle rubbish chan:
    if chan not in Channels:
        return redirect(url_for('log'))

    # Attempt to locate given chan/idx:
    item = query_db(
        '''select * from loglines where chan=%s and idx = %s ;''',
        [chan, idx], one=True)

    # If given chan/idx not found:
    if item == None:
        return redirect(url_for('log'))

    # Determine date where item appears in log :
    item_date = item['t'].strftime(Date_Short_Format)

    # Go there:
    return redirect(App_Root + chan + "/" + item_date + "#" + idx)


@app.route('/log-raw/<chan>')
def rawlog(chan):
    res = ""

    # Handle rubbish chan:
    if chan not in Channels:
        return Response("EGGOG: No such Channel!", mimetype='text/plain')

    # Get start and end indices:
    idx_start = request.args.get('istart', default = 0, type = int)
    idx_end = request.args.get('iend', default = 0, type = int)

    # Malformed bounds?
    if idx_start > idx_end:
        return Response("EGGOG: Start must precede End!",
                        mimetype='text/plain')

    # Demanded too many in one burst ?
    if (idx_end - idx_start) > Max_Raw_Ln :
        return Response("EGGOG: May request Max. of %s Lines !" % Max_Raw_Ln,
                        mimetype='text/plain')

    # Get the loglines from DB
    lines = query_db(
        '''select * from loglines where chan=%s
        and idx between %s and %s order by idx asc;''',
                     [chan, idx_start, idx_end], one=False)

    # Retrieve raw lines in classical Phf format:
    for l in lines:
        action = ""
        speaker = "%s;" % l['speaker']
        if l['self']:
            action = "*;"
            speaker = "%s " % l['speaker']
        res += "%s;%s;%s%s%s\n" % (l['idx'],
                                    l['t'].strftime('%s'),
                                    action,
                                    speaker,
                                    l['payload'])

    # Return plain text:
    return Response(res, mimetype='text/plain')


Name_Chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"

def sanitize_speaker(s):
    return "".join([ch for ch in s if ch in Name_Chars])


def re_escape(s):
    return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s)

# Search knob. Supports 'chan' parameter.
@app.route('/log-search')
def logsearch():
    # The query params:
    chan = request.args.get('chan', default = Default_Chan, type = str)
    query = request.args.get('q', default = '', type = str)
    offset = request.args.get('after', default = 0, type = int)
    show_all = 0

    if len(query) < Min_Query_Length:
        return redirect(url_for('log'))

    # channels to search in
    chans = []

    # whether to indicate chan per log line
    showchan = False

    if chan == 'all':
        # search in all logged channels
        chans = Channels
        legend = "<i>all logged channels</i>"
        showchan = True
        show_all = 1
    else:
        # Handle possible rubbish chan:
        if chan not in Channels:
            return redirect(url_for('log'))
        else:
            # search in selected channel only
            chans = [chan]
            legend = chan

    nres = 0
    searchres   = []
    tokens_orig = []
    search_head = "Query is too short!"
    # Forbid query that is too short:
    if len(query) >= Min_Query_Length:
        # Get the search tokens to use:
        tokens = query.split()
        tokens_standard = []
        from_users = []

        # separate out "from:foo" tokens and ordinary:
        for t in tokens:
            if t.startswith("from:") or t.startswith("f:"):
                from_users.append(t.split(':')[1]) # Record user for 'from' query
            else:
                tokens_standard.append(t)

        from_users = ['%' + sanitize_speaker(t) + '%' for t in from_users]
        tokens_orig = [re_escape(t) for t in tokens_standard]
        tokens_formed = ['%' + t + '%' for t in tokens_orig]

        # Query is usable; perform the search on DB and get the finds
        if from_users == []:
            searchres = query_db(
                '''select * from loglines where chan = any(%s)
                and payload ilike all(%s) order by t desc
                limit %s offset %s;''',
                [(chans,),
                 tokens_formed,
                 Max_Search_Results,
                 offset], one=False)
        else:
            searchres = query_db(
                '''select * from loglines where chan = any(%s)
                and speaker ilike any(%s)
                and payload ilike all(%s) order by t desc
                limit %s offset %s;''',
                [(chans,),
                 from_users,
                 tokens_formed,
                 Max_Search_Results,
                 offset], one=False)

        # Number of search results returned in this query
        nres = len(searchres)

        # Whether to display 'back' button :
        back = (offset != 0)

        # Whether to display 'forward' button :
        forw = (nres == Max_Search_Results)

        # Starting index of search results
        sres = offset

        # Ending index of search results
        eres = offset + min(nres, Max_Search_Results)

    # Generate url for css file based on config value
    css_url = url_for('static', filename=CSS_File)

    chan_list = gen_chanlist(chan, show_all)

    return render_template('searchres.html',
                           css_url     = css_url,
                           query       = query,
                           hquery      = html_escape(query),
                           legend      = legend,
                           sres        = sres,
                           eres        = eres,
                           back        = back,
                           forw        = forw,
                           psize       = Max_Search_Results,
                           chan        = chan,
                           chan_list   = chan_list,
                           tokens      = tokens_orig,
                           loglines    = searchres,
                           showchan    = showchan,
                           show_all    = show_all)

# Comment this out if you don't have one
@app.route('/favicon.ico')
def favicon():
    return redirect(url_for('static', filename='favicon.ico'))


## App Mode
if __name__ == '__main__':
    app.run(threaded=True, port=WWW_Port)