User:Misza13/spoilerkill.py

From Wikipedia, the free encyclopedia

This is an interactive pywikipedia script that aids in the removal of {{spoiler}} (and related) tags from articles. It works on a list of articles from a given file, showing the context within which the tags are placed and asks whether to remove them. In this (default) mode, it is a manually-assisted tool, which means that it doesn't require any approval whatsoever. The --auto option turns it into a fully automated bot that would remove all tags from all given articles.

[edit] Help

usage: spoilerkill.py [options] FILE

options:
  -h, --help            show this help message and exit
  -s ARTICLE, --start=ARTICLE
                        skip articles before ARTICLE
  -a, --auto            don't ask for removal confirmation - remove
                        automatically

FILE - name of file that contains a list of articles to browse through (one article per line, inside [[wiki parens]])

A suitable file (updated daily) is located here:

http://tools.wikimedia.de/~misza13/spoilers.txt

[edit] Code

import re, difflib
from optparse import OptionParser
 
 
summaries = {
        u'c'  : u'cleanup',
        u'rm' : u'Removing redundant template(s) per [[Wikipedia:Spoiler]]',
        u'ue' : u'Removing unencyclopedic template(s)',
        }
 
def bufline(ch=u'*', clr=10, L=40):
    wikipedia.output(ch*L,colors=[clr for i in range(L)])
 
 
def main(options, args):
    articles = []
    ctx = 500
    for f in args:
        articles += file(f,'r').read().decode('utf-8').split('\n')
 
    articles = [a.strip('\n[]') for a in articles if a != '\n']
    if options.start:
        articles = [a for a in articles if a >= options.start]
 
    RX = re.compile('\n*(?P<tpl>{{[^}\|]*?spoil[^}]*?}})\n*',re.IGNORECASE)
    RXh = re.compile('(?P<hdr>=+.*(?:plot|summary|synopsis|recap|overview|history|character|story|background|biography).*=+)',re.IGNORECASE)
 
    Site = wikipedia.getSite()
    for a in articles:
        article = wikipedia.Page(Site,a)
        bufline(ch=u'=', clr=13, L=60)
        wikipedia.output(u'Checking for spoilers in [[%s]]...' % article.title())
        try:
            oldtxt = article.get()
        except wikipedia.IsRedirectPage:
            wikipedia.output(u'Skipping redirect page...')
            continue
        if Site.messages:
            msg = u'You have new messages!'
            wikipedia.output(msg,colors=[10 for i in msg])
        found = False
        for match in RX.finditer(oldtxt):
            found = True
            bufline(clr=14)
            st = match.start('tpl')
            en = match.end('tpl')
            pre = oldtxt[max(0,st-ctx):st]
            tpl = match.group('tpl')
            post = oldtxt[en:en+ctx]
            txtcolors = [None for i in range(len(pre))] + [12 for i in range(len(tpl))] + [None for i in range(len(post))]
            for hdr in RXh.finditer(pre+tpl+post):
                txtcolors[hdr.start('hdr'):hdr.end('hdr')] = [11 for i in hdr.group('hdr')]
            wikipedia.output(pre+tpl+post,colors=txtcolors)
        if found:
            bufline(clr=14)
 
            newtxt = oldtxt
            summary = u''
            if options.auto:
                ch = 'y'
            else:
                ch = wikipedia.inputChoice('Remove spoiler templates?',['y','e','n'],['y','e','n'])
 
            if ch == 'y':
                newtxt = RX.sub('\n\n',oldtxt,re.IGNORECASE)
                summary = u'Removing redundant template(s) per [[Wikipedia:Spoiler]]'
            elif ch == 'e':
                edt = editarticle.TextEditor()
                newtxt = edt.edit(oldtxt)
                if newtxt:
                    summary = wikipedia.input(u'Edit summary [empty to abort]:')
                    if summary in summaries.keys():
                        summary = summaries[summary]
 
            if summary and newtxt != oldtxt:
                try:
                    article.put(newtxt,comment=summary,minorEdit=True)
                except wikipedia.SpamfilterError:
                    wikipedia.output(u'Spamfilter error has occured!')
                except wikipedia.EditConflict:
                    wikipedia.output(u'An edit conflict has occured!')
        else:
            wikipedia.output(u'Not found.')
 
 
if __name__ == '__main__':
    parser = OptionParser(usage='usage: %prog [options] FILE')
 
    parser.add_option('-s', '--start', dest='start',
            help='skip articles before ARTICLE', metavar='ARTICLE')
    parser.add_option('-a', '--auto', action='store_true', dest='auto',
            help='don\'t ask for removal confirmation - remove automatically')
 
    options, args = parser.parse_args()
 
    try:
        import wikipedia, editarticle
        main(options, args)
    finally:
        wikipedia.stopme()