User:SecuniBot/source

From Wikipedia, the free encyclopedia

#!/usr/bin/env python

'''
Script to update the number of unpatched vulnerabilities of an operating
system in the "Comparison of operating systems" page based on the secunia.com
advisory database.
'''

__author__  = 'Copyright (c) 2005 Karsten Sperling'
__license__ = 'Distribute under the terms of the PSF license.'

import sys, os
os.chdir(os.path.dirname(sys.argv[0]))

import wikipedia
import re, datetime


PAGES = [ "Comparison of operating systems" ]
LINK  = re.compile(
    r'^ \| \s* \[(http://secunia\.com/product/\d+/) \s+ (\d+)\]' +
    r'\s+ (/ | \|\|) \s+ (?: - | \[\[ (\d+-\d+-\d+) \]\] ) \s* $',
    re.VERBOSE | re.MULTILINE
)

ADVISORY = re.compile(
    r'<table.*?' +
      r'<img(?: [^>]*)? src="/gfx/(patched|partial|unpatched)\.gif"(?: [^>]*)?>.*?' +
      r'Release Date:(?:\s*<[^>]+>)*?\s*(\d+-\d+-\d+)\s*<.*?' +
      r'Criticality:(?:\s*<[^>]+>)*?\s*<img(?: [^>]*)? src="/gfx/crit_(\d+)\.gif"(?: [^>]*)?>.*?' +
    r'</table>', re.DOTALL | re.IGNORECASE
)

def secunify(title):
    stats = [0, 0]
    print "Processing '%s'" % title
    
    def update1(match):
        url    = match.group(1) + '?task=advisories'
        count  = int(match.group(2))
        delim  = match.group(3)
        oldest = match.group(4) or '-'
        
        print "  -> %s" % url
        http = wikipedia.MyURLopener().open(url)
        data = http.read()
        http.close()
        
        ncount  = 0
        noldest = '-'
        for m in re.finditer(ADVISORY, data):
            stats[0] += 1
            if  m.group(1) == 'unpatched' and int(m.group(3)) >= 2:
                ncount += 1
                noldest = m.group(2)
        
        if ncount == count and noldest == oldest: return match.group(0)
        
        stats[1] += 1
        if noldest != '-': noldest = "[[" + noldest + "]]"
        return "| [%s %d] %s %s" % (url, ncount, delim, noldest)
    
    try:
        page = wikipedia.Page(wikipedia.getSite(), title)
        text = page.get()
        
        if not re.search(r'<!--\s+SECUNIBOT\s+PERMITTED\s', text, re.IGNORECASE):
            raise Exception("missing bot tag, page ignored")
        
        output = LINK.sub(update1, text)
        if not stats[0]: raise Exception("no advisories at all, a parsing bug is very likely")
        print "  parsed %d advisories, performed %d changes" % (stats[0], stats[1])
        
        now = datetime.date.today()
        output = re.sub(
            r'(\[\[Secunia\]\].*?Last updated.*?)\[\[\d+-\d+-\d+\]\]',
            r'\1[[%04d-%02d-%02d]]' % (now.year, now.month, now.day),
            output, 1)
        
        if output == text: return
        print "Performing edit:"
        wikipedia.showDiff(text, output)
        page.put(
            output,
            comment = "SecuniBot: %s vulnerabilities list" % (stats[1] and "updated" or "checked"),
            minorEdit = (stats[1] == 0),
            watchArticle = False
        )
        
    except Exception, e:
        print "Error: " + str(e)
    
if __name__ == "__main__":
    try:
        pages = []
        for arg in sys.argv[1:]:
            if not wikipedia.argHandler(arg, 'secunibot'):
                pages.append(args)
        if not pages: pages = PAGES
        
        for p in PAGES: secunify(p)
        
    finally:
        wikipedia.stopme()