#!/usr/bin/python import ConfigParser, sys, logging, socket, time, re, requests, urllib from urllib import quote # DBism import psycopg2, psycopg2.extras import psycopg2.extensions psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) import time, datetime from datetime import datetime ############################################################################## # Version. If changing this program, always set this to same # as in MANIFEST Ver = 597858 ############################################################################## cfg = ConfigParser.ConfigParser() ############################################################################## # Single mandatory arg: config file path if len(sys.argv[1:]) != 1: # If no args, print usage and exit: print sys.argv[0] + " CONFIG" exit(0) # Read Config cfg.readfp(open(sys.argv[1])) # Get log path logpath = cfg.get("bofh", "log") # Get IRCism debug toggle irc_dbg = cfg.get("irc", "irc_dbg") if irc_dbg == 1: log_lvl = logging.DEBUG else: log_lvl = logging.INFO # Init logo logging.basicConfig(filename=logpath, filemode='a', level=log_lvl, format='%(asctime)s %(levelname)s %(message)s', datefmt='%d-%b-%y %H:%M:%S') # Date format used in log lines Date_Short_Format = "%Y-%m-%d" # Date format used in echoes Date_Long_Format = "%Y-%m-%d %H:%M:%S" ############################################################################## # Get the remaining knob values: try: # IRCism: Buf_Size = int(cfg.get("tcp", "bufsize")) Timeout = int(cfg.get("tcp", "timeout")) TX_Delay = float(cfg.get("tcp", "t_delay")) Servers = [x.strip() for x in cfg.get("irc", "servers").split(',')] Port = int(cfg.get("irc", "port")) Nick = cfg.get("irc", "nick") Pass = cfg.get("irc", "pass") Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')] Join_Delay = int(cfg.get("irc", "join_t")) Discon_TO = int(cfg.get("irc", "disc_t")) Prefix = cfg.get("control", "prefix") # DBism: DB_Name = cfg.get("db", "db_name") DB_User = cfg.get("db", "db_user") DB_DEBUG = cfg.get("db", "db_debug") # Logism: Base_URL = cfg.get("logotron", "base_url") Era = int(cfg.get("logotron", "era")) NewChan_Idx = int(cfg.get("logotron", "newchan_idx")) Src_URL = cfg.get("logotron", "src_url") except Exception as e: print "Invalid config: ", e exit(1) ############################################################################## # Connect to the given DB try: db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User)) except Exception: print "Could not connect to DB!" logging.error("Could not connect to DB!") exit(1) else: logging.info("Connected to DB!") ############################################################################## def close_db(): db.close() def exec_db(query, args=()): cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) if (DB_DEBUG): logging.debug("query: '{0}'".format(query)) if (DB_DEBUG): logging.debug("args: '{0}'".format(args)) cur.execute(query, args) def query_db(query, args=(), one=False): cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) if (DB_DEBUG): logging.debug("query: '{0}'".format(query)) cur.execute(query, args) rv = cur.fetchone() if one else cur.fetchall() if (DB_DEBUG): logging.debug("query res: '{0}'".format(rv)) return rv def rollback_db(): cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) cur.execute("ROLLBACK") db.commit() def commit_db(): cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) db.commit() ############################################################################## # IRCism ############################################################################## # Used to compute 'uptime' time_last_conn = datetime.now() # Used to monitor disconnection timeout time_last_recv = datetime.now() # Socket will be here: sock = None; # Initially we are not connected to anything connected = False def init_socket(): global sock sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # Disable Nagle's algorithm for transmit operations sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) # Disable Nagle's algorithm for receive operation, Linux-only try: sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_QUICKACK, 1) except Exception as e: logging.warning(e) connected = False def deinit_socket(): global connected global sock sock.close() connected = False # Connect to given host:port; return whether connected def connect(host, port): logging.info("Connecting to %s:%s" % (host, port)) sock.settimeout(Timeout) try: sock.connect((host, port)) except (socket.timeout, socket.error) as e: logging.warning(e) return False except Exception as e: logging.exception(e) return False else: logging.info("Connected.") return True # Attempt connect to each of hosts, in order, on port; return whether connected def connect_any(hosts, port): for host in hosts: if connect(host, port): return True return False # Transmit IRC message def send(message): global connected if not connected: logging.warning("Tried to send while disconnected?") return False time.sleep(TX_Delay) logging.debug("> '%s'" % message) message = "%s\r\n" % message try: sock.send(message.encode("utf-8")) except (socket.timeout, socket.error) as e: logging.warning("Socket could not send! Disconnecting.") deinit_socket() return False except Exception as e: logging.exception(e) return False # Speak given message on a selected channel def speak(channel, message): send("PRIVMSG #%s :%s" % (channel, message)) # Now save what the bot spoke: save_line(datetime.now(), channel, Nick, False, message) # Standard incoming IRC line (excludes fleanode liquishit, etc) irc_line_re = re.compile("""^:([^!]+)\!\S+\s+PRIVMSG\s+\#(\S+)\s+\:(.*)""") # The '#' prevents interaction via PM, this is not a PM-able bot. # 'Actions' irc_act_re = re.compile(""".*ACTION\s+(.*)""") # A line was received from IRC def received_line(line): # Process the traditional pingpong if line.startswith("PING"): send("PONG " + line.split()[1]) else: logging.debug("< '%s'" % line) standard_line = re.search(irc_line_re, line) if standard_line: # Break this line into the standard segments (user, chan, text) = [s.strip() for s in standard_line.groups()] # Determine whether this line is an 'action' : action = False act = re.search(irc_act_re, line) if act: action = True text = act.group(1) # Remove turd if present: if text[-1:] == '\x01': text = text[:-1] # This line is edible, process it. eat_logline(user, chan, text, action) # IRCate until we get disconnected def irc(): global connected global time_last_conn global time_last_recv global sock # Initialize a socket init_socket() # Connect to one among the specified servers, in given priority : while not connected: connected = connect_any(Servers, Port) # Save time of last successful connect time_last_conn = datetime.now() # Auth to server send("NICK %s\r\n" % Nick) send("USER %s %s %s :%s\r\n" % (Nick, Nick, Nick, Nick)) # If this is a production bot, rather than test, there will be a PW: if Pass != "": send("NICKSERV IDENTIFY %s %s\r\n" % (Nick, Pass)) time.sleep(Join_Delay) # wait to join until fleanode eats auth # Join selected channels for chan in Channels: logging.info("Joining channel '%s'..." % chan) send("JOIN #%s\r\n" % chan) while connected: try: data = sock.recv(Buf_Size) time_last_recv = datetime.now() # Received anything -- reset timer except socket.timeout as e: logging.debug("Receive timed out") # Determine whether the connection has timed out: since_recv = (datetime.now() - time_last_recv).seconds if since_recv > Discon_TO: logging.info("Exceeded %d seconds of silence " % Discon_TO + "from server: disconnecting!") deinit_socket() continue except socket.error as e: logging.warning("Receive socket error, disconnecting.") deinit_socket() continue except Exception as e: logging.exception(e) deinit_socket() continue else: if len(data) == 0: logging.warning("Receive socket closed, disconnecting.") deinit_socket() continue try: try: data = data.strip(b'\r\n').decode("utf-8") except UnicodeDecodeError: data = data.strip(b'\r\n').decode('latin-1') for l in data.splitlines(): received_line(l) continue except Exception as e: logging.exception(e) continue ############################################################################## html_escape_table = { "&": "&", '"': """, "'": "'", ">": ">", "<": "<", } def html_escape(text): res = ("".join(html_escape_table.get(c,c) for c in text)) return urllib.quote(res.encode('utf-8')) searcher_re = re.compile("""(\d+) Results""") # Retrieve a search result count using the WWWistic frontend. # This way it is not necessary to have query parser in two places. # However it is slightly wasteful of CPU (requires actually loading results.) def get_search_res(chan, query): try: esc_q = html_escape(query) url = Base_URL + "log-search?q=" + esc_q + "&chan=" + chan res = requests.get(url).text t = res[res.find('') + 7 : res.find('')].strip() found = searcher_re.match(t) if found: output = "[" + url + "]" + "[" + found.group(1) output += """ results for "%s" in #%s]""" % (query, chan) return output else: return """No results found for "%s" in #%s""" % (query, chan) except Exception as e: logging.exception(e) return "No results returned (is logotron WWW up ?)" ############################################################################## # Get perma-URL corresponding to given log line def line_url(l): return "{0}log/{1}/{2}#{3}".format(Base_URL, l['chan'], l['t'].strftime(Date_Short_Format), l['idx']) ############################################################################## # Commands: def cmd_help(arg, user, chan): # Speak the 'help' text speak(chan, "%s: my valid commands are: %s" % (user, ', '.join(Commands.keys()))); def cmd_search(arg, user, chan): logging.debug("search: '%s'" % arg) speak(chan, get_search_res(chan, arg)) def cmd_seen(arg, user, chan): # Empty query is prohibited: if arg == "": speak(chan, "Required argument: USER") return # Perform query: seen_line = query_db( '''select t, idx, payload, chan from loglines where chan=%s and speaker=%s order by t desc limit 1;''', [chan, arg], one=True) # Where output will go result = "" # If user has been seen in THE CURRENT chan: if seen_line != None: time_txt = seen_line['t'].strftime(Date_Long_Format) time_link = "[%s][%s]" % (line_url(seen_line), time_txt) seen_line = "%s last seen here on %s: %s" % (arg, time_link, seen_line['payload']) result = seen_line else: # If user has never been seen in this chan: result = "The user %s has never been seen in #%s." % (arg, chan) # Speak the result into the chan where command was issued speak(chan, result) def cmd_seen_anywhere(arg, user, chan): # Empty query is prohibited: if arg == "": speak(chan, "Required argument: USER") return # Perform query: seen_line = query_db( '''select t, idx, payload, chan from loglines where speaker=%s order by t desc limit 1;''', [arg], one=True) # Where output will go result = "" # If user has been seen in ANY logged chan: if seen_line != None: time_txt = seen_line['t'].strftime(Date_Long_Format) time_link = "[%s][%s]" % (line_url(seen_line), time_txt) seen_line = "%s last seen in #%s on %s: %s" % (arg, seen_line['chan'], time_link, seen_line['payload']) result = seen_line else: # If user has never been seen at all: result = "The user %s has never been seen by this logger." % arg # Speak the result into the chan where command was issued speak(chan, result) def cmd_version(arg, user, chan): speak(chan, "I am bot version %s." % Ver); def cmd_src(arg, user, chan): speak(chan, "%s: my source code can be seen at: %s" % (user, Src_URL)); def cmd_uptime(arg, user, chan): uptime_txt = "" uptime = (datetime.now() - time_last_conn) days = uptime.days hours = uptime.seconds/3600 minutes = (uptime.seconds%3600)/60 uptime_txt += '%dd ' % days uptime_txt += '%dh ' % hours uptime_txt += '%dm' % minutes # Speak the uptime speak(chan, "%s: time since my last reconnect : %s" % (user, uptime_txt)); Commands = { "help" : cmd_help, "s" : cmd_search, "seen" : cmd_seen, "seen-anywhere" : cmd_seen_anywhere, "uptime" : cmd_uptime, "src" : cmd_src, "version" : cmd_version } ############################################################################## # Save given line to perma-log def save_line(time, chan, speaker, action, payload): ## Put in DB: try: # Get index of THIS new line to be saved last_idx = query_db( '''select idx from loglines where chan=%s and idx = (select max(idx) from loglines where chan=%s) ;''', [chan, chan], one=True) # Was this chan unseen previously? if last_idx == None: cur_idx = NewChan_Idx # Then use the config'd start index else: cur_idx = last_idx['idx'] + 1 # Otherwise, get the next idx logging.debug("Adding log line with index: %s" % cur_idx) # Set up the insert exec_db('''insert into loglines (idx, t, chan, era, speaker, self, payload) values (%s, %s, %s, %s, %s, %s, %s) ; ''', [cur_idx, time, chan, Era, speaker, action, payload]) # Fire commit_db() except Exception as e: rollback_db() logging.warning("DB add failed, rolled back.") logging.exception(e) # RE for finding log refs logref_re = re.compile(Base_URL + """log\/([^/]+)/[^/]+#(\d+)""") # All valid received lines end up here def eat_logline(user, chan, text, action): # If somehow received line from channel which isn't in the set: if chan not in Channels: logging.warning( "Received martian : '%s' : '%s'" % (chan, text)) return # First, add the line to the log: save_line(datetime.now(), chan, user, action, text) # Then, see if the line was a command for this bot: if text.startswith(Prefix): cmd = text.partition(Prefix)[2].strip() cmd = [x.strip() for x in cmd.split(' ', 1)] if len(cmd) == 1: arg = "" else: arg = cmd[1] # Dispatch this command... command = cmd[0] logging.debug("Dispatching command '%s' with arg '%s'.." % (command, arg)) func = Commands.get(command) # If this command is undefined: if func == None: logging.debug("Invalid command: %s" % command) # Utter the 'help' text as response to the sad command cmd_help("", user, chan) else: # Is defined command, dispatch it: func(arg, user, chan) else: # Finally, see if contains log refs: for ref in re.findall(logref_re, text): ref_chan, ref_idx = ref # Find this line in DB: ref_line = query_db( '''select t, speaker, payload from loglines where chan=%s and idx=%s;''', [ref_chan, ref_idx], one=True) # If retrieved line is valid, echo it: if ref_line != None: # If referred line was spoken in THIS chan: if ref_chan == chan: time_txt = ref_line['t'].strftime(Date_Long_Format) my_line = "Logged on %s %s: %s" % (time_txt, ref_line['speaker'], ref_line['payload']) else: # If this is a cross-chan echo: time_txt = ref_line['t'].strftime(Date_Short_Format) my_line = "(%s) %s %s: %s" % (ref_chan, time_txt, ref_line['speaker'], ref_line['payload']) # Speak the line echo into the chan where ref was seen speak(chan, my_line) ############################################################################## # IRCate; if disconnected, reconnect def run(): while 1: irc() logging.warning("Disconnected, will reconnect...") ############################################################################## # Run continuously. run() ##############################################################################