User:Misza13/spoilerkill.py
From Wikipedia, the free encyclopedia
This is an interactive pywikipedia script that aids in the removal of {{spoiler}} (and related) tags from articles. It works on a list of articles from a given file, showing the context within which the tags are placed and asks whether to remove them. In this (default) mode, it is a manually-assisted tool, which means that it doesn't require any approval whatsoever. The --auto
option turns it into a fully automated bot that would remove all tags from all given articles.
[edit] Help
usage: spoilerkill.py [options] FILE options: -h, --help show this help message and exit -s ARTICLE, --start=ARTICLE skip articles before ARTICLE -a, --auto don't ask for removal confirmation - remove automatically
FILE - name of file that contains a list of articles to browse through (one article per line, inside [[wiki parens]])
A suitable file (updated daily) is located here:
[edit] Code
import re, difflib from optparse import OptionParser summaries = { u'c' : u'cleanup', u'rm' : u'Removing redundant template(s) per [[Wikipedia:Spoiler]]', u'ue' : u'Removing unencyclopedic template(s)', } def bufline(ch=u'*', clr=10, L=40): wikipedia.output(ch*L,colors=[clr for i in range(L)]) def main(options, args): articles = [] ctx = 500 for f in args: articles += file(f,'r').read().decode('utf-8').split('\n') articles = [a.strip('\n[]') for a in articles if a != '\n'] if options.start: articles = [a for a in articles if a >= options.start] RX = re.compile('\n*(?P<tpl>{{[^}\|]*?spoil[^}]*?}})\n*',re.IGNORECASE) RXh = re.compile('(?P<hdr>=+.*(?:plot|summary|synopsis|recap|overview|history|character|story|background|biography).*=+)',re.IGNORECASE) Site = wikipedia.getSite() for a in articles: article = wikipedia.Page(Site,a) bufline(ch=u'=', clr=13, L=60) wikipedia.output(u'Checking for spoilers in [[%s]]...' % article.title()) try: oldtxt = article.get() except wikipedia.IsRedirectPage: wikipedia.output(u'Skipping redirect page...') continue if Site.messages: msg = u'You have new messages!' wikipedia.output(msg,colors=[10 for i in msg]) found = False for match in RX.finditer(oldtxt): found = True bufline(clr=14) st = match.start('tpl') en = match.end('tpl') pre = oldtxt[max(0,st-ctx):st] tpl = match.group('tpl') post = oldtxt[en:en+ctx] txtcolors = [None for i in range(len(pre))] + [12 for i in range(len(tpl))] + [None for i in range(len(post))] for hdr in RXh.finditer(pre+tpl+post): txtcolors[hdr.start('hdr'):hdr.end('hdr')] = [11 for i in hdr.group('hdr')] wikipedia.output(pre+tpl+post,colors=txtcolors) if found: bufline(clr=14) newtxt = oldtxt summary = u'' if options.auto: ch = 'y' else: ch = wikipedia.inputChoice('Remove spoiler templates?',['y','e','n'],['y','e','n']) if ch == 'y': newtxt = RX.sub('\n\n',oldtxt,re.IGNORECASE) summary = u'Removing redundant template(s) per [[Wikipedia:Spoiler]]' elif ch == 'e': edt = editarticle.TextEditor() newtxt = edt.edit(oldtxt) if newtxt: summary = wikipedia.input(u'Edit summary [empty to abort]:') if summary in summaries.keys(): summary = summaries[summary] if summary and newtxt != oldtxt: try: article.put(newtxt,comment=summary,minorEdit=True) except wikipedia.SpamfilterError: wikipedia.output(u'Spamfilter error has occured!') except wikipedia.EditConflict: wikipedia.output(u'An edit conflict has occured!') else: wikipedia.output(u'Not found.') if __name__ == '__main__': parser = OptionParser(usage='usage: %prog [options] FILE') parser.add_option('-s', '--start', dest='start', help='skip articles before ARTICLE', metavar='ARTICLE') parser.add_option('-a', '--auto', action='store_true', dest='auto', help='don\'t ask for removal confirmation - remove automatically') options, args = parser.parse_args() try: import wikipedia, editarticle main(options, args) finally: wikipedia.stopme()