"""A wrapper for Analog, a weblog analyzer and reporting tool.""" import ftplib import datetime import os import re def mirror_ftp_dir(site, user, pw, workdir, destdir): """Mirror a remote FTP folder to a local folder.""" if not os.path.exists(destdir): os.makedirs(destdir) today = datetime.date.today() today = "access_log.%s%02d%02d" % (today.year, today.month, today.day) ftp = ftplib.FTP(site) try: ftp.login(user, pw) ftp.cwd(workdir) files = ftp.nlst() for line in files: # Skip today, since it is probably not yet written completely. if line != today: copyloc = os.path.join(destdir, line) if os.path.exists(copyloc): print "=", else: print "+", ftp.retrbinary('RETR %s' % line, open(copyloc, 'wb').write) finally: ftp.quit() print print options = ["BARSTYLE", "DEBUG", "DIREXCLUDE", "DNS", "DNSFILE", "HOSTNAME", "IMAGEDIR", "PAGEEXCLUDE", "OUTFILE", "REFEXCLUDE", "REQEXCLUDE", "SETTINGS", "WEEKBEGINSON", ] reports = ['BROWSERREP', 'BROWSERSUM', 'DAILYREP', 'DAILYSUM', 'DIRECTORY', 'DOMAIN', 'FAILURE', 'FAILUSER', 'FAILREF', 'FAILHOST', 'FAILVHOST', 'FILETYPE', 'FIVEREP', 'FIVESUM', 'GENERAL', 'HOST', 'HOURLYREP', 'HOURLYSUM', 'INTSEARCHQUERY', 'INTSEARCHWORD', 'MONTHLY', 'ORGANISATION', 'OSREP', 'PROCTIME', 'QUARTERLY', 'QUARTERREP', 'QUARTERSUM', 'REDIR', 'REDIRHOST', 'REDIRREF', 'REDIRUSER', 'REDIRVHOST', 'REFERRER', 'REFSITE', 'REQUEST', 'SEARCHQUERY', 'SEARCHWORD', 'SIZE', 'STATUS', 'USER', 'VHOST', 'WEEKHOUR', 'WEEKLY', 'YEARLY', ] def assert_path(path): """assert_path(path) -> Assert that the given folder exists.""" folder, filename = os.path.split(path) if not os.path.exists(folder): os.makedirs(folder) class Log(object): """A set of logfiles with the same format(s).""" def __init__(self, *filenames): self.filenames = list(filenames) # "If you specify several formats, analog tries to match each line to # the first format first, then if that fails the next, and so on, # so the order of the formats is important. Usually you want to # specify the most common one first, to minimise the time spent # trying to match lines to inappropriate formats. self.formats = [] self.defaultformat = None def format(self, newformat): if isinstance(newformat, basestring): self.formats.append(newformat) else: self.formats.extend(newformat) class Analog(object): """A wrapper for Analog, a weblog analyzer and reporting tool.""" def __init__(self, analog_app): self.analog_app = analog_app self.logs = [] self.includes = [] self.configfile = None def add_logs(self, *filenames): log = Log(*filenames) self.logs.append(log) return log def write_config(self, *fileparts): if fileparts: self.configfile = os.path.join(*fileparts) if not self.configfile: raise TypeError("No config filename supplied.") assert_path(self.configfile) f = open(self.configfile, 'wb') try: for log in self.logs: for format in log.formats: f.write("LOGFORMAT %s\n" % format) for fname in log.filenames: f.write("LOGFILE %s\n" % fname) for name in options + reports: value = getattr(self, name, None) if value is not None: if isinstance(value, bool): if value: f.write("%s ON\n" % name) else: f.write("%s OFF\n" % name) else: f.write("%s %s\n" % (name, value)) for fname in self.includes: f.write("CONFIGFILE %s\n" % fname) finally: f.close() def execute(self): if self.configfile: print "Producing %s" % self.configfile cmd = r'%s -G +g"%s"' % (self.analog_app, self.configfile) print cmd stdin, stdouterr = os.popen4(cmd) for l in stdouterr.xreadlines(): print l.strip('\n') else: raise AttributeError("Config file has not been written.") def analyze(self, reportdir): """Report on all self.logfiles into reportdir/analog.html.""" self.OUTFILE = os.path.join(reportdir, "analog.html") self.configfile = os.path.join(reportdir, "analog.cfg") self.write_config() self.execute() # IIS format tools def files(dirname): root, dirs, filenames = os.walk(dirname).next() return filenames def add_dates_to_log(dir, filename): loc = os.path.join(dir, filename) f = open(loc, 'r') content = f.read() f.close() date = "20%s-%s-%s " % (filename[2:4], filename[4:6], filename[6:8]) content = re.sub(r'(?m)^#Fields: time', r'#Fields: date time', content) content = re.sub(r'(?m)^(?=\d\d:)', date, content) f = open(loc, 'w') f.write(content) f.close()