User:RussBlau/dplcount.py
From Wikipedia, the free encyclopedia
Following is a Python script to count the progress of link-fixing on WP:DPL. This script requires the Pywikipedia framework.
For those not familiar with Python, please be aware that indentation is significant. You can copy-and-paste this text into a plain text file on your local machine, but do not change any of the white space unless you know what you are doing.
Note that the current disambiguation dump page location is hard-coded into the script; this will need to be updated, where indicated, whenever a new dump is started.
# script to count progress on [[WP:DPL]] import re import wikipedia try: site = wikipedia.getSite() # Edit the following line whenever a new dump is posted dplpage = wikipedia.Page(site, "Wikipedia:Disambiguation pages with links/2006-07-17 dump") text = dplpage.get() todomark = text.index("===To do===") donemark = text.index("===Done===") counter = re.compile(r": ([0-9]+) ?(?:</s>)? ?\[\[Special:Whatlinkshere/") todocount = 0 todolinks = 0 donecount = 0 donelinks = 0 for match in counter.finditer(text, todomark, donemark): todocount += 1 if not int(match.group(1)): print "Check formatting:" print text[match.start()-40, match.end()+40] todolinks += int(match.group(1)) for match in counter.finditer(text, donemark): donecount += 1 if not int(match.group(1)): print "Check formatting:" print text[match.start()-40, match.end()+40] donelinks += int(match.group(1)) print "Found %i articles, %i links to do; %i articles, %i links done." % ( todocount, todolinks, donecount, donelinks) print "Progress: %i out of %i links (%.1f%%)" % ( donelinks, donelinks + todolinks, 100.0 * donelinks / (donelinks + todolinks)) finally: wikipedia.stopme()