User:Gdr/dykbot.py
From Wikipedia, the free encyclopedia
< User:Gdr
#!/usr/bin/python # # # DYKBOT.PY -- UPDATING "DID YOU KNOW" # Gdr, 2005-05-12 # # # INTRODUCTION # # This script partially automates the procedure of placing new items on # [[Template:Did you know]] on the English Wikipedia. # # You must have the Python Wikipedia Robot Framework # (http://sourceforge.net/projects/pywikipediabot/). # # # DOCUMENTATION # # [[User:Gdr/DYKbot]] # # # LICENCE # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or (at # your option) any later version. import calendar import getopt import history import protect import re import sys import time import wikipedia class DYK: # Carry out operations for real? for_real = False # These are the pages targeted by the script and their page links site = wikipedia.Site('en') target = "Template:Did you know" target_pl = None talk = "Template talk:Did you know" talk_pl = None new_image = None new_image_pl = None old_image = None old_image_pl = None # The list of suggestions, in the form of dictionaries with keys # ('article', 'fact', 'image', 'suggester', 'creator') where some # elements may be None. suggestions = [] def confirm(self, query): answer = wikipedia.input(query + u' [y|N]') return (answer in ('Y', 'y')) def check(self, query): if not self.confirm(query): print "Stopping." sys.exit(1) def __init__(self, for_real = False): self.for_real = for_real self.suggestions = [] # Format for archiving. def format_archive(self, s): if s['image']: return u'*...%s ([[:%s]])<br>\n' % (s['fact'], s['image']) else: return u'*...%s<br>\n' % s['fact'] # Format for DYK. def format_dyk(self, s): return u'<li>...%s</li>\n' % s['fact'] # Generate edit comment. def make_comment(self, comment): print '-' * 72 wikipedia.output(u'>>> ' + comment) return u'DYKbot - ' + comment # Suggestion regexp. Useful groups are: # 1. Suggestion text # 2. Article name # 5. Image name # 7. Image width # 9. Image caption # 11. User name. suggestion_re = re.compile( ur"^\*?\s*(?:\.\.\.|\u2026|…|…)\s*(.*'''\[\[([^\]|]+)(\|[^\]|]*)?\]\][a-z]*'''.*?\?)" ur"(.*\(?\[\[:(Image:[^\]|]+)(\|([0-9]+)px)?(\|([^\]|]+))?\]\]\)?)?" ur"(.*\[\[(User:[^\]|]+))?.*$\s*", re.M) # String marking the start of the archive in the talk page. archive_marker = "<!-- Please place the latest did you know lines on the top. -->" # String marking the end of suggestions in the talk page. suggestions_end_marker = '== Inform these users ==' # Positions in the talk page. suggestions_end_re = re.compile(re.escape(suggestions_end_marker) + '|' + re.escape(archive_marker)) archive_marker_re = re.compile(re.escape(archive_marker) + r'\s*') # String matching the line for the time which needs to be updated. refresh_string1 = u'* Earliest time for next refreshment is' refresh_string2 = u'Wikipedia time ([[UTC]]). <!-- This should be six hours from when new items were last added to the template. -->' refresh_re = re.compile('^' + re.escape(refresh_string1) + '.*$', re.M) # Acceptable licences for the new image? (This isn't a thorough test # but it will catch typical mistakes such as no licence at all.) licence_re = re.compile(r'{{(GFDL|CC|cc|PD)') # Old image on target page. old_image_re = re.compile( r'\[\[(Image:[^\]|]+)(\|([0-9]+)px)?(\|([^\]|]*))*\]\]') mprotected_re = re.compile(r'\s*{{mprotected}}\s*') # Edittime regexp. edittime_re = re.compile(r'^([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])' r'([0-9][0-9])([0-9][0-9])([0-9][0-9])$') def run(self): self.target_pl = wikipedia.Page(self.site, self.target) self.talk_pl = wikipedia.Page(self.site, self.talk) # Check that it's more than 6 hours since the last time # the target page was edited. target_orig = self.target_pl.get() et = wikipedia.edittime[repr(self.site), wikipedia.link2url(self.target, site = self.site)] m = self.edittime_re.match(et) if m: et_secs = calendar.timegm((map(int, m.groups()[0:6]))) if time.time() - et_secs < 6 * 60 * 60: self.check(u'%s last updated at %s. Proceed anyway?' % (self.target, et)) else: self.check(u"Unrecognizable edittime '%s' in %s. Proceed anyway?" % (et, self.target)) # Read talk page, extracting suggestions and deleting them. talk_orig = self.talk_pl.get() # Only look at suggestions up to the suggestions end marker, and # in reverse order (oldest first). m = self.suggestions_end_re.search(talk_orig) if not m: print u'No archive marker in %s. Stopping.' % self.talk sys.exit(1) sugg_end = m.end() iter = self.suggestion_re.finditer(talk_orig[:sugg_end]) mm = [] n = 0 try: while 1: mm.append(iter.next()) except StopIteration: mm.reverse() talk_text = talk_orig for m in mm: n = n + 1 s = { 'article': m.group(2), 'pagelink': None, 'fact': m.group(1), 'image': m.group(5), 'suggester': m.group(11), 'creator': None, } wikipedia.output(u"Parsed suggestion %d as:" % n) wikipedia.output(u" Article = " + s['article']) wikipedia.output(u" Fact = " + s['fact']) if s['image']: wikipedia.output(u" Image = " + s['image']) if s['suggester']: wikipedia.output(u" Suggester = " + s['suggester']) if self.confirm(u"Use this suggestion?"): s['pagelink'] = wikipedia.Page(self.site, m.group(2)) # Cut out used suggestion. talk_text = talk_text[:m.start()] + talk_text[m.end():] if s['image'] and self.confirm(u"Use this image?"): self.new_image = s['image'] self.new_image_pl = wikipedia.Page(self.site, s['image']) # Item with image needs to appear on top. self.suggestions = [s] + self.suggestions else: self.suggestions.append(s) if not self.suggestions: print "No suggestions. Stopping." sys.exit(1) if not self.new_image: print "No image! Stopping." sys.exit(1) # Check creation times for suggestions. for s in self.suggestions: h = history.historyPage(s['pagelink']) if h: hoursago = (time.time() - h[-1]['date']) / 3600 msg = (u'%s created %d hours ago by %s.' % (s['article'], hoursago, h[-1]['user'] or 'anon')) if hoursago <= 72: wikipedia.output(msg) else: self.check(msg + u' OK?') s['creator'] = h[-1]['user'] else: wikipedia.output(u'No history for %s.' % s['article']) # Check that the new image has a plausible license. try: image_orig = self.new_image_pl.get() except: image_orig = u'' if not self.licence_re.search(image_orig): print '-' * 72 wikipedia.output(image_orig) print '-' * 72 self.check(u"%s appears not to have an acceptable licence. " u"Use it anyway?" % self.new_image) # Update the "next refresh" time in the talk page. if self.refresh_re.search(talk_text): next_refresh = time.time() + 6 * 60 * 60 replacement = (self.refresh_string1 + time.strftime(u' %Y-%m-%d %T ', time.gmtime(next_refresh)) + self.refresh_string2) talk_text = self.refresh_re.sub(replacement, talk_text, 1) else: self.check(u"No refresh text in %s. Proceed anyway?" % self.talk) # Add the new suggestions to the top of the "Archive" section of # the talk page, formatted with *...<br>, for # [[User:AllyUnion]]'s bot to archive. if self.archive_marker_re.search(talk_text): replacement = (self.archive_marker + '\n\n' + ''.join(map(self.format_archive, self.suggestions)) + '\n') talk_text = self.archive_marker_re.sub(replacement, talk_text, 1) else: print u"No archive marker in %s. Stopping." % self.talk sys.exit(1) # Add {{mprotected}} to the description page for the new image. image_text = image_orig + '\n{{mprotected}}' comment = self.make_comment( u'added {{mprotected}}: image is about to appear on [[Main Page]]') print '-' * 72 wikipedia.showDiff(image_orig, image_text) print '-' * 72 if (self.confirm(u"OK to update %s?" % self.new_image) and self.for_real): self.new_image_pl.put(image_text, comment) # Protect the new image. comment = self.make_comment( u'image is about to appear on [[Main Page]] via [[%s]]' % self.target) if (self.confirm(u"OK to protect %s?" % self.new_image) and self.for_real): print "Protecting new image" protect.protectPage(self.new_image_pl, comment) # Edit target, formatting lines with <li>...</li>. m = self.old_image_re.search(target_orig) if m: self.old_image = m.group(1) self.old_image_pl = wikipedia.Page(self.site, self.old_image) else: print "No image found on %s" % self.target target_text = (u"{{subst:User:Gdr/Did you know header|[[%s|100px|]]}}\n" % self.new_image + u''.join(map(self.format_dyk, self.suggestions)) + u"{{subst:User:Gdr/Did you know footer}}") comment = self.make_comment( u'%d new entries: %s' % (len(self.suggestions), u', '.join(map(lambda s: u'[[%s]]' % s['article'], self.suggestions)))) print '-' * 72 wikipedia.output(target_text) print '-' * 72 if self.confirm(u"OK to update %s?" % self.target) and self.for_real: self.target_pl.put(target_text, comment) # Purge the [[Main Page]] cache. if self.for_real: print "Purging [[Main Page]] cache" wikipedia.getUrl(self.site.hostname(), '/w/wiki.phtml?title=Main_Page&action=purge') if self.old_image: # Unprotect the old image comment = self.make_comment(u'image no longer on [[Main Page]]') if (self.confirm(u"OK to unprotect %s?" % self.old_image) and self.for_real): print "Unprotecting old image" protect.unprotectPage(self.old_image_pl, comment) # Remove {{mprotected}} from the description page for the # old image. try: image_orig = self.old_image_pl.get() except: image_orig = u'' if self.mprotected_re.search(image_orig): image_text = self.mprotected_re.sub('', image_orig, 1) comment = self.make_comment( u'removed {{mprotected}}: image no longer on [[Main Page]]') print '-' * 72 wikipedia.showDiff(image_orig, image_text) print '-' * 72 if (self.confirm(u"OK to update %s?" % self.old_image) and self.for_real): self.old_image_pl.put(image_text, comment) else: print "{{mprotected}} not found in %s" % self.old_image self.inform_creators() # Update [[Template talk:Did you know]] comment = self.make_comment(u'archiving %d suggestions' % len(self.suggestions)) print '-' * 72 wikipedia.showDiff(talk_orig, talk_text) print '-' * 72 if self.confirm(u"OK to update %s?" % self.talk) and self.for_real: self.talk_pl.put(talk_text, comment) print '-' * 72 if self.for_real: print "Did you know has been updated. Please check the results." else: print "Did you know has not been updated." def inform_creators(self): # Leave {{subst:UpdatedDYK|[[<article>]]}} message on user pages of # creators. for s in self.suggestions: if s['creator']: user_talk = re.sub('^User:', 'User talk:', s['creator']) user_talk_pl = wikipedia.Page(self.site, user_talk) try: user_talk_orig = user_talk_pl.get() except wikipedia.IsRedirectPage: continue except wikipedia.NoPage: user_talk_orig = u'' user_talk_text = (user_talk_orig + u'\n\n== Did you know? ==\n\n' + u'{{subst:UpdatedDYK|[[%s]]}}' % s['article']) comment = self.make_comment( u'your article [[%s]] has been used on [[%s]]' % (s['article'], self.target)) print '-' * 72 wikipedia.showDiff(user_talk_orig, user_talk_text) print '-' * 72 if (self.confirm(u"OK to update %s?" % user_talk) and self.for_real): user_talk_pl.put(user_talk_text, comment) if __name__ == '__main__': wikipedia.username = 'DYKbot' forreal = False opts, args = getopt.getopt(sys.argv[1:], '', ['for-real']) for o, a in opts: if o == '--for-real': forreal = True if not forreal: print "RUNNING IN DEBUGGING MODE: ARTICLES WILL NOT BE EDITED" try: DYK(forreal).run() finally: wikipedia.stopme()