diff -uNr a/logotron/MANIFEST.TXT b/logotron/MANIFEST.TXT
--- a/logotron/MANIFEST.TXT false
+++ b/logotron/MANIFEST.TXT 824bfece7088516aaadfdb1d0e37fbae915602d0f5ded55cbc39401af9ceefa8a62f8bf04b35b53811d18a4ec8fea553571a384bf9fdda7d4e76bf31a0d7190d
@@ -0,0 +1 @@
+589248 logotron_genesis "Genesis."
diff -uNr a/logotron/README.txt b/logotron/README.txt
--- a/logotron/README.txt false
+++ b/logotron/README.txt 2209f578062ecc5636e67fcb9904cd26204c832ffd170b373773b6b6e1bd1fb85e79e99be44a49bb1aabc99cdae4f6df4cef203bdadbe3773c3586048a61888f
@@ -0,0 +1,73 @@
+This is the Aug 2019 draft of S.NSA WWW logotron and IRC bot kit,
+as can be seen presently at http://logs.nosuchlabs.com/log .
+
+To make your own installation, you will need:
+
+(1) Traditional 'python' 2.7.
+(2) 'flask' lib for (1).
+(3) 'psycopg2' lib for (1).
+(4) 'postgres' (9 or 10).
+(5) A WWW server that knows how to proxy.
+
+To use the kit, you will first need to create a user and DB, e.g.:
+
+su - postgres
+psql
+
+create user nsabot createdb;
+alter role nsabot superuser;
+createdb nsalog;
+grant all privileges on database nsalog to nsabot ;
+
+... you can take 'super' away from this user after 1st run,
+it is needed in order to let him load the pg_trgm indexer
+plugin.
+
+Next, run 'init_db.sh' (alter the constants to match the
+names of your postgres user and the DB), this creates the schema.
+
+Then see 'eat.sh' and the 'eat_dump.py' it uses, re how to
+fill your log archive DB. 'eat_dump.py' eats in Phf's classical
+format, e.g.:
+
+1926177;1564727032;mp_en_viaje;in the meantime, everyone's invited on trilema & other blogs.
+
+where 1926177 is absolute line index (in given chan), 1564727032
+is unix epochal timestamp, mp_en_viaje is speaker (if he is
+'actioning', there will be a * behind his name), and the remainder
+of the line is the payload.
+
+You WILL need to adjust the constants in 'eat_dump.py', it is not
+currently capable of eating config file. Set these to your DB
+and PG user.
+
+Now, adjust the constants in 'nsabot.conf' (rename per taste)
+to specify your IRC params, name of bot, host at which www
+logger will reside, and other knob values.
+
+Adjust the three 'flask' templates in 'templates' subdir to
+give the desired look and feel for the www end. Currently we are using
+Phf's classic style sheet, with minor modifications.
+
+'reader.py' takes one mandatory command-line argument: full path
+to the config above. Same for 'bot.py', which is the IRC bot.
+
+Run these via e.g. nohup ./bot.py & ; nohup ./reader.py &
+and let your proxying WWW server know how to reach the latter's port.
+
+For bot.py you will need a registered nick on fleanode (or wherever
+it is used.) There are no fleanode-specific hacks in the bot, ergo
+it can be stood up behind ZNC (although this has not been tested.)
+
+Certain important features are presently unimplemented, in no order:
+(1) Backlinkage.
+(2) Search result pagination.
+(3) Double-quoted search terms.
+(4) Paste archiving.
+(5) Multi-headed IRC bot for weather resistance.
+(6) 'Ecologically clean' export of raw log material.
+(7) Informative eggogology for bot commands.
+(8) Automatic synchronization with mirrors (see 6)
+
+A ZNC log eater is also required, to properly fill in the archives.
+This is not yet available at the time of this writing.
diff -uNr a/logotron/bot.py b/logotron/bot.py
--- a/logotron/bot.py false
+++ b/logotron/bot.py a168b17f05e71bec4ed700eda6ae588d080f41d1b93842044709fc8349d1a44116692fcccdc10861dc1e4e317f02f7456e70dcf66945fd9451d83b83f09a85e4
@@ -0,0 +1,446 @@
+#!/usr/bin/python
+
+import ConfigParser, sys, logging, socket, time, re, requests, urllib
+from urllib import quote
+
+# DBism
+import psycopg2, psycopg2.extras
+import psycopg2.extensions
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
+import time, datetime
+from datetime import datetime
+
+##############################################################################
+
+cfg = ConfigParser.ConfigParser()
+
+##############################################################################
+
+# Single mandatory arg: config file path
+if len(sys.argv[1:]) != 1:
+ # If no args, print usage and exit:
+ print sys.argv[0] + " CONFIG"
+ exit(0)
+
+# Read Config
+cfg.readfp(open(sys.argv[1]))
+
+# Get log path
+logpath = cfg.get("bofh", "log")
+
+# Get IRCism debug toggle
+irc_dbg = cfg.get("irc", "irc_dbg")
+if irc_dbg == 1:
+ log_lvl = logging.DEBUG
+else:
+ log_lvl = logging.INFO
+
+# Init logo
+logging.basicConfig(filename=logpath, filemode='a', level=log_lvl,
+ format='%(asctime)s %(levelname)s %(message)s',
+ datefmt='%d-%b-%y %H:%M:%S')
+
+# Date format used in log lines
+Date_Short_Format = "%Y-%m-%d"
+
+# Date format used in echoes
+Date_Long_Format = "%Y-%m-%d %H:%M:%S"
+
+##############################################################################
+# Get the remaining knob values:
+
+try:
+ # IRCism:
+ Buf_Size = int(cfg.get("tcp", "bufsize"))
+ Timeout = int(cfg.get("tcp", "timeout"))
+ TX_Delay = float(cfg.get("tcp", "t_delay"))
+ Servers = [x.strip() for x in cfg.get("irc", "servers").split(',')]
+ Port = int(cfg.get("irc", "port"))
+ Nick = cfg.get("irc", "nick")
+ Pass = cfg.get("irc", "pass")
+ Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')]
+ Join_Delay = int(cfg.get("irc", "join_t"))
+ Prefix = cfg.get("control", "prefix")
+ # DBism:
+ DB_Name = cfg.get("db", "db_name")
+ DB_User = cfg.get("db", "db_user")
+ DB_DEBUG = cfg.get("db", "db_debug")
+ # Logism:
+ Base_URL = cfg.get("logotron", "base_url")
+ Era = int(cfg.get("logotron", "era"))
+ NewChan_Idx = int(cfg.get("logotron", "newchan_idx"))
+ Src_URL = cfg.get("logotron", "src_url")
+
+except Exception as e:
+ print "Invalid config: ", e
+ exit(1)
+
+##############################################################################
+
+# Connect to the given DB
+try:
+ db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User))
+except Exception:
+ print "Could not connect to DB!"
+ logging.error("Could not connect to DB!")
+ exit(1)
+else:
+ logging.info("Connected to DB!")
+
+##############################################################################
+
+def close_db():
+ db.close()
+
+def exec_db(query, args=()):
+ cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ if (DB_DEBUG): logging.debug("query: '{0}'".format(query))
+ if (DB_DEBUG): logging.debug("args: '{0}'".format(args))
+ cur.execute(query, args)
+
+def query_db(query, args=(), one=False):
+ cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ if (DB_DEBUG): logging.debug("query: '{0}'".format(query))
+ cur.execute(query, args)
+ rv = cur.fetchone() if one else cur.fetchall()
+ if (DB_DEBUG): logging.debug("query res: '{0}'".format(rv))
+ return rv
+
+def rollback_db():
+ cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ cur.execute("ROLLBACK")
+ db.commit()
+
+def commit_db():
+ cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ db.commit()
+
+
+##############################################################################
+# IRCism
+##############################################################################
+
+# Used to compute 'uptime'
+time_last_conn = datetime.now()
+
+# Init socket:
+sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+
+# Set keepalive:
+sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
+
+# Initially we are not connected to anything
+connected = False
+
+# Connect to given host:port; return whether connected
+def connect(host, port):
+ logging.info("Connecting to %s:%s" % (host, port))
+ sock.settimeout(Timeout)
+ try:
+ sock.connect((host, port))
+ except (socket.timeout, socket.error) as e:
+ logging.warning(e)
+ return False
+ except Exception as e:
+ logging.exception(e)
+ return False
+ else:
+ logging.info("Connected.")
+ return True
+
+
+# Attempt connect to each of hosts, in order, on port; return whether connected
+def connect_any(hosts, port):
+ for host in hosts:
+ if connect(host, port):
+ return True
+ return False
+
+
+# Transmit IRC message
+def send(message):
+ global connected
+ if not connected:
+ logging.warning("Tried to send while disconnected?")
+ return False
+ time.sleep(TX_Delay)
+ logging.debug("> '%s'" % message)
+ message = "%s\r\n" % message
+ try:
+ sock.send(message.encode("utf-8"))
+ except (socket.timeout, socket.error) as e:
+ logging.warning("Socket could not send! Disconnecting.")
+ connected = False
+ return False
+ except Exception as e:
+ logging.exception(e)
+ return False
+
+
+# Speak given message on a selected channel
+def speak(channel, message):
+ send("PRIVMSG #%s :%s" % (channel, message))
+ # Now save what the bot spoke:
+ save_line(datetime.now(), channel, Nick, False, message)
+
+
+# Standard incoming IRC line (excludes fleanode liquishit, etc)
+irc_line_re = re.compile("""^:([^!]+)\!\S+\s+PRIVMSG\s+\#(\S+)\s+\:(.*)""")
+
+# The '#' prevents interaction via PM, this is not a PM-able bot.
+
+# 'Actions'
+irc_act_re = re.compile(""".*ACTION\s+(.*)""")
+
+
+# A line was received from IRC
+def received_line(line):
+ # Process the traditional pingpong
+ if line.startswith("PING"):
+ send("PONG " + line.split()[1])
+ else:
+ logging.debug("< '%s'" % line)
+ standard_line = re.search(irc_line_re, line)
+ if standard_line:
+ # Break this line into the standard segments
+ (user, chan, text) = [s.strip() for s in standard_line.groups()]
+ # Determine whether this line is an 'action' :
+ action = False
+ act = re.search(irc_act_re, line)
+ if act:
+ action = True
+ text = act.group(1)
+ # This line is edible, process it.
+ eat_logline(user, chan, text, action)
+
+
+# IRCate until we get disconnected
+def irc():
+ global connected
+
+ # Connect to one among the specified servers, in given priority :
+ while not connected:
+ connected = connect_any(Servers, Port)
+
+ # Save time of last successful connect
+ time_last_conn = datetime.now()
+
+ # Auth to server
+ send("NICK %s\r\n" % Nick)
+ send("USER %s %s %s :%s\r\n" % (Nick, Nick, Nick, Nick))
+ send("NICKSERV IDENTIFY %s %s\r\n" % (Nick, Pass))
+
+ time.sleep(Join_Delay) # wait to join until fleanode eats auth
+
+ # Join selected channels
+ for chan in Channels:
+ logging.info("Joining channel '%s'..." % chan)
+ send("JOIN #%s\r\n" % chan)
+
+ while connected:
+ try:
+ data = sock.recv(Buf_Size)
+ except socket.timeout as e:
+ logging.debug("Listen timed out")
+ continue
+ except socket.error as e:
+ logging.warning("Listen socket error, disconnecting.")
+ connected = False
+ continue
+ except Exception as e:
+ logging.exception(e)
+ connected = False
+ continue
+ else:
+ if len(data) == 0:
+ logging.warning("Listen socket closed, disconnecting.")
+ connected = False
+ continue
+ try:
+ data = data.strip(b'\r\n').decode("utf-8")
+ for l in data.splitlines():
+ received_line(l)
+ continue
+ except Exception as e:
+ logging.exception(e)
+ continue
+
+##############################################################################
+
+html_escape_table = {
+ "&": "&",
+ '"': """,
+ "'": "'",
+ ">": ">",
+ "<": "<",
+}
+
+def html_escape(text):
+ res = ("".join(html_escape_table.get(c,c) for c in text))
+ return urllib.quote(res.encode('utf-8'))
+
+
+searcher_re = re.compile("""(\d+) Results""")
+
+# Retrieve a search result count using the WWWistic frontend.
+# This way it is not necessary to have query parser in two places.
+# However it is slightly wasteful of CPU (requires actually loading results.)
+def get_search_res(chan, query):
+ try:
+ esc_q = html_escape(query)
+ url = Base_URL + "log-search?q=" + esc_q + "&chan=" + chan
+ res = requests.get(url).text
+ t = res[res.find('
') + 7 : res.find('')].strip()
+ found = searcher_re.match(t)
+ if found:
+ output = "[" + url + "]" + "[" + found.group(1)
+ output += """ results for "%s" in #%s]""" % (query, chan)
+ return output
+ else:
+ return """No results found for "%s" in #%s""" % (query, chan)
+ except Exception as e:
+ logging.exception(e)
+ return "No results returned (is logotron WWW up ?)"
+
+##############################################################################
+
+# Commands:
+
+def cmd_help(arg, user, chan):
+ # Speak the 'help' text
+ speak(chan, "%s: my valid commands are: %s" %
+ (user, ', '.join(Commands.keys())));
+
+def cmd_search(arg, user, chan):
+ logging.debug("search: '%s'" % arg)
+ speak(chan, get_search_res(chan, arg))
+
+def cmd_seen(arg, user, chan):
+ speak(chan, "%s: this command is not yet implemented." % user);
+
+def cmd_src(arg, user, chan):
+ speak(chan, "%s: my source code can be seen at: %s" % (user, Src_URL));
+
+def cmd_uptime(arg, user, chan):
+ uptime_txt = ""
+ uptime = (datetime.now() - time_last_conn)
+ days = uptime.days
+ hours = uptime.seconds/3600
+ minutes = (uptime.seconds%3600)/60
+ uptime_txt += '%dd ' % days
+ uptime_txt += '%dh ' % hours
+ uptime_txt += '%dm' % minutes
+ # Speak the uptime
+ speak(chan, "%s: time since my last reconnect : %s" %
+ (user, uptime_txt));
+
+Commands = {
+ "help" : cmd_help,
+ "s" : cmd_search,
+ "seen" : cmd_seen,
+ "uptime" : cmd_uptime,
+ "src" : cmd_src
+}
+
+##############################################################################
+
+# Save given line to perma-log
+def save_line(time, chan, speaker, action, payload):
+ ## Put in DB:
+ try:
+ # Get index of THIS new line to be saved
+ last_idx = query_db(
+ '''select idx from loglines where chan=%s
+ and idx = (select max(idx) from loglines where chan=%s) ;''',
+ [chan, chan], one=True)
+
+ # Was this chan unseen previously?
+ if last_idx == None:
+ cur_idx = NewChan_Idx # Then use the config'd start index
+ else:
+ cur_idx = last_idx['idx'] + 1 # Otherwise, get the next idx
+
+ logging.debug("Adding log line with index: %s" % cur_idx)
+
+ # Set up the insert
+ exec_db('''insert into loglines (idx, t, chan, era,
+ speaker, self, payload) values (%s, %s, %s, %s, %s, %s, %s) ; ''',
+ [cur_idx, time, chan, Era, speaker, action, payload])
+
+ # Fire
+ commit_db()
+ except Exception as e:
+ rollback_db()
+ logging.warning("DB add failed, rolled back.")
+ logging.exception(e)
+
+
+# RE for finding log refs
+logref_re = re.compile(Base_URL + """log\/([^/]+)/([^/]+)#(\d+)""")
+
+
+# All valid received lines end up here
+def eat_logline(user, chan, text, action):
+ # If somehow received line from channel which isn't in the set:
+ if chan not in Channels:
+ logging.warning(
+ "Received martian : '%s' : '%s'" % (chan, text))
+ return
+
+ # First, add the line to the log:
+ save_line(datetime.now(), chan, user, action, text)
+
+ # Then, see if the line was a command for this bot:
+ if text.startswith(Prefix):
+ cmd = text.partition(Prefix)[2].strip()
+ cmd = [x.strip() for x in cmd.split(' ', 1)]
+ if len(cmd) == 1:
+ arg = ""
+ else:
+ arg = cmd[1]
+ # Dispatch this command...
+ command = cmd[0]
+ logging.debug("Dispatching command '%s' with arg '%s'.." %
+ (command, arg))
+ func = Commands.get(command)
+ # If this command is undefined:
+ if func == None:
+ logging.debug("Invalid command: %s" % command)
+ # Utter the 'help' text as response to the sad command
+ cmd_help("", user, chan)
+ else:
+ # Is defined command, dispatch it:
+ func(arg, user, chan)
+ else:
+ # Finally, see if contains log refs:
+ for ref in re.findall(logref_re, text):
+ ref_chan, ref_date, ref_idx = ref
+ # Find this line in DB:
+ ref_line = query_db(
+ '''select t, speaker, payload from loglines
+ where chan=%s and idx=%s;''',
+ [ref_chan, ref_idx], one=True)
+ # If retrieved line is valid, echo it:
+ if ref_line != None:
+ time_txt = ref_line['t'].strftime(Date_Long_Format)
+ my_line = "Logged on %s %s: %s" % (time_txt,
+ ref_line['speaker'],
+ ref_line['payload'])
+ # Speak the line echo into the chan where ref was seen
+ speak(chan, my_line)
+
+##############################################################################
+
+# IRCate; if disconnected, reconnect
+def run():
+ while 1:
+ irc()
+ logging.warning("Disconnected, will reconnect...")
+
+##############################################################################
+
+# Run continuously.
+run()
+
+##############################################################################
diff -uNr a/logotron/eat.sh b/logotron/eat.sh
--- a/logotron/eat.sh false
+++ b/logotron/eat.sh e1f3e840c65ecaedda97fffe268490f1db0b2d2ed7541dbe00982cd49f01fad23b2d9ca9ba754550e6855051a28afde34e22bd4704a0e0f49abca66e9b58c0a3
@@ -0,0 +1 @@
+for f in phf/*.txt; do ./eat_dump.py $f trilema 2 ; done
diff -uNr a/logotron/eat_dump.py b/logotron/eat_dump.py
--- a/logotron/eat_dump.py false
+++ b/logotron/eat_dump.py 5614d6523b1512656953c12732db5daa56b49288251b879427a9b8e33da7db95847e441d2ad007896182c5acb27f0ed808b072a25c12b4789cf85cc186e68f68
@@ -0,0 +1,104 @@
+#!/usr/bin/python
+
+##############################################################################
+import psycopg2, psycopg2.extras
+import psycopg2.extensions
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
+import re
+import time
+import datetime
+from datetime import datetime
+import sys
+import os
+
+# Debug Knob
+DB_DEBUG = False
+##############################################################################
+
+##############################################################################
+db = psycopg2.connect("dbname=nsalog user=nsabot") ## CHANGE THESE
+
+def close_db():
+ db.close()
+
+def exec_db(query, args=()):
+ cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ if (DB_DEBUG): print "query: '{0}'".format(query)
+ if (DB_DEBUG): print "args: '{0}'".format(args)
+ if (DB_DEBUG): print "EXEC:"
+ cur.execute(query, args)
+
+def rollback_db():
+ cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ cur.execute("ROLLBACK")
+ db.commit()
+
+def commit_db():
+ cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ db.commit()
+
+##############################################################################
+
+# Eat individual line of a Phf-style log dump
+def eat_logline(line, chan, era):
+ match = re.search("(\d+)\;(\d+)\;([^;]+)\;(.*$)", line)
+ if match:
+ g = match.groups()
+ self_speak = False
+
+ try:
+ idx = int(g[0]) # Serial Number of Log Line
+ time = int(g[1]) # Unix Epochal Time of Log Line
+ except Exception, e:
+ print("Malformed Line! '" + line +"' ! : " + e)
+ close_db()
+ exit(1)
+
+ speaker = g[2] # Name of Speaker
+ payload = g[3] # Payload (remainder of line)
+
+ ## If spoken line is of form "* user ..." :
+ if speaker == "*":
+ spl = payload.split(' ', 1)
+ speaker = spl[0]
+ payload = spl[1]
+ self_speak = True
+
+ ## Put in DB:
+ try:
+ exec_db('''insert into loglines (idx, t, chan, era, speaker, self, payload)
+ values (%s, %s, %s, %s, %s, %s, %s) ; ''',
+ [int(idx), datetime.fromtimestamp(time), str(chan), int(era), str(speaker),
+ bool(self_speak), str(payload)])
+ commit_db()
+ except psycopg2.IntegrityError as e:
+ rollback_db()
+ print "Dupe Ignored, Idx=", idx
+ else:
+ print("Malformed Line! '" + line +"' !")
+ close_db()
+ exit(1)
+
+
+# Eat Phf-style log dump at given path
+def eat_dump(path, chan, era):
+ with open(path) as fp:
+ for line in fp:
+ eat_logline(line, chan, era)
+
+
+##############################################################################
+
+if (len(sys.argv) == 4):
+ logdump = sys.argv[1] # Path to Phf-style log dump
+ chan = sys.argv[2] # Chan Name
+ era = sys.argv[3] # Era (integer)
+ # Eat:
+ eat_dump(logdump, chan, era)
+ close_db()
+else:
+ print "Usage: ./eat_dump LOGFILE CHAN ERA"
+ exit(0)
+
+##############################################################################
diff -uNr a/logotron/init_db.sh b/logotron/init_db.sh
--- a/logotron/init_db.sh false
+++ b/logotron/init_db.sh 63546f44db3fd7f6c48a1bee1439f2a67d3a3de6fef7eed536e8e7e95875ec53186e0383608afaf0635e4660d5dbeecaef9e620ee53369a8ea3aa148ae8ccbef
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+psql -U nsabot -d nsalog -a -f nsalog_schem.sql
diff -uNr a/logotron/nsabot.conf b/logotron/nsabot.conf
--- a/logotron/nsabot.conf false
+++ b/logotron/nsabot.conf f447f004041538581919c4d373faa6e957d8ed67e273a93704b952802eeec970213983fb8fbe45e8a913b8831d5c1dbb605d8309df1f70559969b9b1b9b7685d
@@ -0,0 +1,67 @@
+[bofh]
+
+# Path to IRC bot debuggism log
+log = nsabot.log
+
+[irc]
+servers = irc.freenode.net
+port = 6667
+
+# Bot's nick (change to yours, as with all knobs)
+nick = snsabot
+
+# All chans for both www end and bot, go here:
+chans = asciilifeform-test, asciilifeform-test-2
+
+# IRC nick PW
+pass = YOURFLEANODEPW
+
+# How long to wait for fleanode to ack auth of nick before joining chans
+join_t = 20
+
+# Verbose barf of irc tx/rx
+irc_dbg = 0
+
+[tcp]
+bufsize = 4096
+
+# Recv timeout
+timeout = 30
+
+# Delay between IRC transmits - possibly ought to be longer
+t_delay = 0.1
+
+[control]
+# Command Trigger for IRC bot
+prefix = !q
+
+[logotron]
+# The current era.
+era = 3
+# Convention for these :
+# 1 : Age of #b-a (and earlier dark age material)
+# 2 : Phf's (and several variously-reliable) loggers
+# 3 : Present day.
+
+# Where the source lives (change to yours)
+src_url = http://not.yet
+
+# From where index starts for new chan, so to leave room for archive insert
+newchan_idx = 1000000
+
+# Base URL of logtron site (change to yours!)
+base_url = http://logs.nosuchlabs.com/
+
+# Other people's bots (for colouration strictly)
+bots = a111, deedbot, feedbot, auctionbot, lobbesbot
+
+# On what port will sit the www logtron
+www_port = 5002
+
+[db]
+# Change to your DB (set it up so only answers locally)
+db_name = nsalog
+db_user = nsabot
+
+# Verbose barf of DB transactions
+db_debug = 0
diff -uNr a/logotron/nsalog_schem.sql b/logotron/nsalog_schem.sql
--- a/logotron/nsalog_schem.sql false
+++ b/logotron/nsalog_schem.sql 2fc6a536ec6c147d53ee7a9ec50e947e4fb9f479bb108f23df3329ed83c3479a622ee618e41e721cb2fa8efac189d142743ab5ce3c3f4a6e90b851bd3dc9dcea
@@ -0,0 +1,29 @@
+drop table if exists loglines;
+create table loglines (
+ ser serial,
+ idx integer not null,
+ t timestamp,
+ chan text not null,
+ era integer not null,
+ speaker text not null,
+ self boolean,
+ payload text not null,
+ backlinks integer[],
+ PRIMARY KEY(idx, chan),
+ UNIQUE(idx, chan)
+);
+
+
+create index logline_idx_i on loglines(idx);
+create index logline_t_i on loglines(t);
+create index logline_chan_i on loglines(chan);
+create index logline_era_i on loglines(era);
+create index logline_speaker_i on loglines(speaker);
+create index logline_payload_i on loglines(payload);
+
+CREATE EXTENSION pg_trgm;
+
+-- drop index payload_search_idx;
+
+create index concurrently payload_search_idx
+ON loglines USING gin (payload gin_trgm_ops);
diff -uNr a/logotron/reader.py b/logotron/reader.py
--- a/logotron/reader.py false
+++ b/logotron/reader.py 5de963eb326e8f107264fb5d2dceaf715b8daff649353295ff19bfaa560946bd856f8970c69b1b6360fb003b7548fa78302423ecf83512a4bff43cfd3973f628
@@ -0,0 +1,441 @@
+#!/usr/bin/python
+
+##############################################################################
+import ConfigParser, sys
+import psycopg2, psycopg2.extras
+import psycopg2.extensions
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
+import time
+import datetime
+from datetime import timedelta
+import sys
+reload(sys)
+sys.setdefaultencoding('utf8')
+import os
+import threading
+import re
+from datetime import datetime
+from urlparse import urljoin
+from flask import Flask, request, session, url_for, redirect, \
+ render_template, abort, g, flash, _app_ctx_stack, make_response, \
+ jsonify
+from flask import Flask
+from flask.ext.cache import Cache
+##############################################################################
+
+##############################################################################
+# Single mandatory arg: config file path
+if len(sys.argv[1:]) != 1:
+ # If no args, print usage and exit:
+ print sys.argv[0] + " CONFIG"
+ exit(0)
+
+# Read Config from given conf file
+config_path = os.path.abspath(sys.argv[1])
+cfg = ConfigParser.ConfigParser()
+cfg.readfp(open(config_path))
+
+try:
+ # IRCism:
+ Nick = cfg.get("irc", "nick")
+ Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')]
+ Bots = [x.strip() for x in cfg.get("logotron", "bots").split(',')]
+ Bots.append(Nick) # Add our own bot to the bot list
+ # DBism:
+ DB_Name = cfg.get("db", "db_name")
+ DB_User = cfg.get("db", "db_user")
+ DB_DEBUG = cfg.get("db", "db_debug")
+ # Logism:
+ Base_URL = cfg.get("logotron", "base_url")
+ Era = int(cfg.get("logotron", "era"))
+ # WWW:
+ WWW_Port = int(cfg.get("logotron", "www_port"))
+
+except Exception as e:
+ print "Invalid config: ", e
+ exit(1)
+
+##############################################################################
+
+##############################################################################
+### Knobs not made into config yet ###
+Default_Chan = Channels[0]
+Min_Query_Length = 3
+Max_Search_Results = 1000
+
+## Format for Date in Log Lines
+Date_Short_Format = "%Y-%m-%d"
+
+## WWW Debug Knob
+DEBUG = False
+##############################################################################
+
+app = Flask(__name__)
+cache = Cache(app,config={'CACHE_TYPE': 'simple'})
+app.config.from_object(__name__)
+
+def get_db():
+ db = getattr(g, 'db', None)
+ if db is None:
+ db = g.db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User))
+ return db
+
+def close_db():
+ if hasattr(g, 'db'):
+ g.db.close()
+
+@app.before_request
+def before_request():
+ g.db = get_db()
+
+@app.teardown_request
+def teardown_request(exception):
+ close_db()
+
+def query_db(query, args=(), one=False):
+ cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ if (DB_DEBUG): print "query: '{0}'".format(query)
+ cur.execute(query, args)
+ rv = cur.fetchone() if one else cur.fetchall()
+ if (DB_DEBUG): print "query res: '{0}'".format(rv)
+ return rv
+
+def exec_db(query, args=()):
+ cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ if (DB_DEBUG): print "query: '{0}'".format(query)
+ if (DB_DEBUG): print "args: '{0}'".format(args)
+ if (DB_DEBUG): print "EXEC:"
+ cur.execute(query, args)
+
+def getlast_db():
+ cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ cur.execute('select lastval()')
+ return cur.fetchone()['lastval']
+
+def commit_db():
+ cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+ g.db.commit()
+
+##############################################################################
+
+## All eggogs redirect to main page
+@app.errorhandler(404)
+def page_not_found(error):
+ return redirect(url_for('log'))
+
+##############################################################################
+
+html_escape_table = {
+ "&": "&",
+ '"': """,
+ "'": "'",
+ ">": ">",
+ "<": "<",
+}
+
+def html_escape(text):
+ return "".join(html_escape_table.get(c,c) for c in text)
+
+##############################################################################
+
+## Get base URL
+def get_base():
+ if DEBUG:
+ return request.host_url
+ return Base_URL
+
+
+# Get perma-URL corresponding to given log line
+def line_url(l):
+ return "{0}log/{1}/{2}#{3}".format(get_base(),
+ l['chan'],
+ l['t'].strftime(Date_Short_Format),
+ l['idx'])
+
+def gen_chanlist(selected_chan):
+ # Get current time
+ now = datetime.now()
+
+ s = """"""
+ for chan in Channels:
+ chan_formed = chan
+ if chan == selected_chan:
+ chan_formed = "" + chan + ""
+ s += """{2} | """.format(
+ get_base(), chan, chan_formed)
+ s += "
"
+
+ for chan in Channels:
+
+ last_time = query_db(
+ '''select t, idx from loglines where chan=%s
+ and idx = (select max(idx) from loglines where chan=%s) ;''',
+ [chan, chan], one=True)
+
+ last_time_txt = ""
+ if last_time != None:
+ span = (now - last_time['t'])
+ days = span.days
+ hours = span.seconds/3600
+ minutes = (span.seconds%3600)/60
+
+ if days != 0:
+ last_time_txt += '%dd ' % days
+ if hours != 0:
+ last_time_txt += '%dh ' % hours
+ if minutes != 0:
+ last_time_txt += '%dm' % minutes
+
+ s += """{4} | """.format(
+ get_base(),
+ chan,
+ last_time['t'].strftime(Date_Short_Format),
+ last_time['idx'],
+ last_time_txt)
+
+ else:
+ last_time_txt = ""
+ s += " | "
+
+ s += "
"
+ return s
+
+
+# Make above callable from inside htm templater:
+app.jinja_env.globals.update(gen_chanlist=gen_chanlist)
+
+
+# HTML Tag Regex
+tag_regex = re.compile("(<[^>]+>)")
+
+
+# Find the segments of a block of text which constitute HTML tags
+def get_link_intervals(str):
+ links = []
+ span = []
+ for match in tag_regex.finditer(str):
+ span = match.span()
+ links += [span]
+ return links
+
+
+# Highlight all matched tokens in given text
+def highlight_matches(strings, text):
+ e = '(' + ('|'.join(strings)) + ')'
+ return re.sub(e,
+ r"""\1""",
+ text,
+ flags=re.I)
+
+
+# Highlight matched tokens in the display of a search result logline,
+# but leave HTML tags alone
+def highlight_text(strings, text):
+ result = ""
+ last = 0
+ for i in get_link_intervals(text):
+ i_start, i_end = i
+ result += highlight_matches(strings, text[last:i_start])
+ result += text[i_start:i_end] # the HTML tag, leave it alone
+ last = i_end
+ result += highlight_matches(strings, text[last:]) # last block
+ return result
+
+
+# Regexps used in format_logline:
+boxlinks_re = re.compile('\[\s*[^ <]+\s*\]\[([^\[\]]+)\]')
+stdlinks_re = re.compile('(http[^ \[\]]+)')
+
+
+## Format given log line for display
+def format_logline(l, highlights = []):
+ payload = html_escape(l['payload'])
+
+ # Format ordinary links:
+ payload = re.sub(stdlinks_re, r'\1', payload)
+
+ # Now also format [link][text] links :
+ payload = re.sub(boxlinks_re, r'\2', payload)
+
+ # If this is a search result, illuminate the matched strings:
+ if highlights != []:
+ payload = highlight_text(highlights, payload)
+
+ bot = ""
+ if l['speaker'] in Bots:
+ bot = " bot"
+
+ # HTMLize the given line :
+ s = ("").format(l['idx'],
+ l['speaker'],
+ l['t'],
+ line_url(l),
+ payload,
+ bot)
+
+ return s
+
+# Make above callable from inside htm templater:
+app.jinja_env.globals.update(format_logline=format_logline)
+
+
+# Generate navbar for the given date:
+def generate_navbar(date, tail, chan):
+ cur_day = datetime.strptime(date, Date_Short_Format)
+ prev_day = cur_day - timedelta(days=1)
+ prev_day_txt = prev_day.strftime(Date_Short_Format)
+
+ s = "← {2}".format(
+ get_base(),
+ chan,
+ prev_day_txt)
+
+ if not tail:
+ next_day = cur_day + timedelta(days=1)
+ next_day_txt = next_day.strftime(Date_Short_Format)
+ s = s + " | {2} →".format(
+ get_base(),
+ chan,
+ next_day_txt)
+
+ return s
+
+# Make above callable from inside htm templater:
+app.jinja_env.globals.update(generate_navbar=generate_navbar)
+
+
+@app.route('/log//')
+@app.route('/log/', defaults={'date': None})
+@app.route('/log/', defaults={'chan': Default_Chan, 'date': None})
+@app.route('/log', defaults={'chan': Default_Chan, 'date': None})
+def log(chan, date):
+ # Handle rubbish chan:
+ if chan not in Channels:
+ return redirect(url_for('log'))
+
+ # Get current time
+ now = datetime.now()
+
+ # Whether we are viewing 'current' tail
+ tail = False
+
+ # If viewing 'current' log:
+ if date == None:
+ date = now.strftime(Date_Short_Format)
+ tail = True
+
+ # Parse given date, and redirect to default log if rubbish:
+ try:
+ day_start = datetime.strptime(date, Date_Short_Format)
+ except Exception, e:
+ return redirect(url_for('log'))
+
+ # Determine the end of the interval being shown
+ day_end = day_start + timedelta(days=1)
+
+ # Get the loglines from DB
+ lines = query_db(
+ '''select * from loglines where chan=%s
+ and t between %s and %s order by idx asc;''',
+ [chan, day_start, day_end], one=False)
+
+ # Return the HTMLized text
+ return render_template('log.html',
+ chan = chan,
+ loglines = lines,
+ date = date,
+ tail = tail)
+
+
+
+Name_Chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"
+
+def sanitize_speaker(s):
+ return "".join([ch for ch in s if ch in Name_Chars])
+
+
+def re_escape(s):
+ return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s)
+
+# Search knob. Supports 'chan' parameter.
+@app.route('/log-search')
+def logsearch():
+ # The query params:
+ chan = request.args.get('chan', default = Default_Chan, type = str)
+ query = request.args.get('q', default = '', type = str)
+ # page_num = request.args.get('page', default = 0, type = int)
+
+ # Handle rubbish chan:
+ if chan not in Channels:
+ return redirect(url_for('log'))
+
+ nres = 0
+ searchres = []
+ tokens_orig = []
+ search_head = "Query is too short!"
+ # Forbid query that is too short:
+ if len(query) >= Min_Query_Length:
+ # Get the search tokens to use:
+ tokens = query.split()
+ tokens_standard = []
+ from_users = []
+
+ # separate out "from:foo" tokens and ordinary:
+ for t in tokens:
+ if t.startswith("from:") or t.startswith("f:"):
+ from_users.append(t.split(':')[1]) # Record user for 'from' query
+ else:
+ tokens_standard.append(t)
+
+ from_users = ['%' + sanitize_speaker(t) + '%' for t in from_users]
+ tokens_orig = [re_escape(t) for t in tokens_standard]
+ tokens_formed = ['%' + t + '%' for t in tokens_orig]
+
+ # Query is usable; perform the search on DB and get the finds
+ if from_users == []:
+ searchres = query_db(
+ '''select * from loglines where chan=%s
+ and payload ilike all(%s) order by idx desc limit %s;''',
+ [chan,
+ tokens_formed,
+ Max_Search_Results], one=False)
+ else:
+ print "from=", from_users
+
+ searchres = query_db(
+ '''select * from loglines where chan=%s
+ and speaker ilike any(%s)
+ and payload ilike all(%s) order by idx desc limit %s;''',
+ [chan,
+ from_users,
+ tokens_formed,
+ Max_Search_Results], one=False)
+
+
+ # Number of entries found
+ nres = len(searchres)
+ search_head = "{0} entries found in {1} for '{2}' :".format(
+ nres, chan, html_escape(query))
+
+ # No paging support just yet:
+ return render_template('searchres.html',
+ query = query,
+ nres = nres,
+ chan = chan,
+ search_head = search_head,
+ tokens = tokens_orig,
+ loglines = searchres)
+
+
+# Comment this out if you don't have one
+@app.route('/favicon.ico')
+def favicon():
+ return redirect(url_for('static', filename='favicon.ico'))
+
+
+## App Mode
+if __name__ == '__main__':
+ app.run(threaded=True, port=WWW_Port)
diff -uNr a/logotron/static/README b/logotron/static/README
--- a/logotron/static/README false
+++ b/logotron/static/README d89bc958b029448a46d627da724957403b8db562b3a8756236142341a5a998212440f9e5a5d5679633d89f19c5f07217195165e4c7931ce57697bb678bea6fe4
@@ -0,0 +1 @@
+favicon.ico goes in this dir.
diff -uNr a/logotron/templates/layout.html b/logotron/templates/layout.html
--- a/logotron/templates/layout.html false
+++ b/logotron/templates/layout.html 587ccf877ba1bff6dfa4547ebfed8fae1b071cf243fa4bd77f56f91c0a1b6a692137ff9dc5b458c34783ac632bcb83abac664073ab82d79649709810f4cf9c0a
@@ -0,0 +1,127 @@
+
+
+
+
+ {% block title %}
+ {% endblock %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ |
+
+
+ {{ gen_chanlist( chan ) | safe }}
+ |
+
+
+
+
+
+ |
+
+
+
+
+
+
+
+
+
+
+
+ {% block body %}{% endblock %}
+
+
+
+
diff -uNr a/logotron/templates/log.html b/logotron/templates/log.html
--- a/logotron/templates/log.html false
+++ b/logotron/templates/log.html 44d51aaef738815d01d92cbf6a1d0d44803627579352dbaafe0257346e5fdff9d76ec64fd00ebb435e177869502f97c1b72c7322ee6108c6784011d222966ba7
@@ -0,0 +1,17 @@
+{% extends "layout.html" %}
+
+{% block title %}
+#{{ chan }} | {{ date }}
+{% endblock %}
+
+{% block body %}
+
+{{ generate_navbar(date, tail, chan) | safe }}
+
+{% for l in loglines %}
+{{ format_logline(l) | safe }}
+{% endfor %}
+
+{{ generate_navbar(date, tail, chan) | safe }}
+
+{% endblock %}
diff -uNr a/logotron/templates/searchres.html b/logotron/templates/searchres.html
--- a/logotron/templates/searchres.html false
+++ b/logotron/templates/searchres.html a60ca105a579ed2b256dbfc92a7ca7468d17ba875a3215217bb1bb2ea15ac04e9e56c66205b1a4387263b068c39f9d72f08e76edce2b371f4f1aebe24bbf7f03
@@ -0,0 +1,17 @@
+{% extends "layout.html" %}
+
+{% block title %}
+{{ nres }} Results for {{ query }} in #{{ chan }}
+{% endblock %}
+
+{% block body %}
+
+{{ search_head | safe }}
+
+
+
+{% for l in loglines %}
+{{ format_logline(l, tokens) | safe }}
+{% endfor %}
+
+{% endblock %}