User:Drinibot/CapitalizationRedirects
From Wikipedia, the free encyclopedia
The following is based on pywikipedia and was done by the kind es: user Yrithinnd.
# -*- coding: utf-8 -*- """ This bot will make direct text replacements. It will retrieve information on which pages might need changes either from an XML dump or a text file, or only change a single page. You can run the bot with the following commandline parameters: -xml - Retrieve information from a local XML dump (pages_current, see http://download.wikimedia.org). Argument can also be given as "-xml:filename". -file - Work on all pages given in a local text file. Will read any [[wiki link]] and use these articles. Argument can also be given as "-file:filename". -cat - Work on all pages which are in a specific category. Argument can also be given as "-cat:categoryname". -page - Only edit a single page. Argument can also be given as "-page:pagename". You can give this parameter multiple times to edit multiple pages. -ref - Work on all pages that link to a certain page. Argument can also be given as "-ref:referredpagename". -start - Work on all pages in the wiki, starting at a given page. Choose "-start:!" to start at the beginning. NOTE: You are advised to use -xml instead of this option; this is meant for cases where there is no recent XML dump. -regex - Make replacements using regular expressions. If this argument isn't given, the bot will make simple text replacements. -except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -fix:XYZ - Perform one of the predefined replacements tasks, which are given in the dictionary 'fixes' defined inside this file. The -regex argument and given replacements will be ignored if you use -fix. Currently available predefined fixes are: * HTML - convert HTML tags to wiki syntax, and fix XHTML -namespace:n - Number of namespace to process. The parameter can be used multiple times. It works in combination with all other parameters, except for the -start parameter. If you e.g. want to iterate over all user pages starting at User:M, use -start:User:M. -always - Don't prompt you for each replacement other: - First argument is the old text, second argument is the new text. If the -regex argument is given, the first argument will be regarded as a regular expression, and the second argument might contain expressions like \\1 or \g<name>. NOTE: Only use either -xml or -file or -page, but don't mix them. Examples: If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the new syntax, e.g. {{Stub}}, download an XML dump file (cur table) from http://download.wikimedia.org, then use this command: python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}" If you have a dump called foobar.xml and want to fix typos, e.g. Errror -> Error, use this: python replace.py -xml:foobar.xml "Errror" "Error" If you have a page called 'John Doe' and want to convert HTML tags to wiki syntax, use: python replace.py -page:John_Doe -fix:HTML """ # # (C) Daniel Herding, 2004 # # Distributed under the terms of the MIT license. # __version__='$Id: replace.py,v 1.87 2006/01/26 19:08:27 leogregianin Exp $' from __future__ import generators import sys, re import wikipedia, pagegenerators, catlib, config # Summary messages in different languages # NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes' # below. msg = { 'es':u'Robot: Borrado masivo de artÃculos', } class Drinibot: def __init__(self, generator, acceptall = False): self.generator = generator self.acceptall = False def run(self): """ Starts the robot. """ # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: titulo=page.title() np=wikipedia.Page(wikipedia.getSite(), page.title().capitalize()) if not np.exists(): wikipedia.output(u'%s no existe' % np.title()) if not self.acceptall: choice = wikipedia.inputChoice(u'Quieres crear la redireccion asociada?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice in ['a', 'A']: self.acceptall = True if self.acceptall or choice in ['y', 'Y']: np.put(u"#REDIRECT [[%s]]" % titulo, u"capitalization redirect") else: wikipedia.output(u'%s ya existe\nNo se hace nada' % np.title()) # def put(self, newtext, comment=None, watchArticle = None, minorEdit = True): def main(): gen = None # How we want to retrieve information on which pages need to be changed. # Can either be 'xmldump', 'textfile' or 'userinput'. source = None # Array which will collect commandline parameters. # First element is original text, second element is replacement text. textfilename = None # the category name which will be used when source is 'category'. categoryname = None # pages which will be processed when the -page parameter is used pageNames = [] # a page whose referrers will be processed when the -ref parameter is used referredPageName = None # will become True when the user presses a ('yes to all') or uses the -always # commandline paramater. acceptall = False # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Which page to start startpage = None # Load default summary message. wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg)) # Read commandline parameters. for arg in sys.argv[1:]: arg = wikipedia.argHandler(arg, 'replace') if arg: if arg.startswith('-file'): if len(arg) == 5: textfilename = wikipedia.input(u'Please enter the filename:') else: textfilename = arg[6:] source = 'textfile' elif arg.startswith('-cat'): if len(arg) == 4: categoryname = wikipedia.input(u'Please enter the category name:') else: categoryname = arg[5:] source = 'category' elif arg.startswith('-page'): if len(arg) == 5: pageNames.append(wikipedia.input(u'Which page do you want to chage?')) else: pageNames.append(arg[6:]) source = 'singlepage' elif arg.startswith('-ref'): if len(arg) == 4: referredPageName = wikipedia.input(u'Links to which page should be processed?') else: referredPageName = arg[5:] source = 'ref' elif arg.startswith('-start'): if len(arg) == 6: firstPageTitle = wikipedia.input(u'Which page do you want to chage?') else: firstPageTitle = arg[7:] source = 'allpages' elif arg == '-always': acceptall = True elif arg.startswith('-namespace:'): namespaces.append(int(arg[11:])) else: commandline_replacements.append(arg) if source == 'textfile': gen = pagegenerators.TextfilePageGenerator(textfilename) elif source == 'category': cat = catlib.Category(wikipedia.getSite(), categoryname) gen = pagegenerators.CategorizedPageGenerator(cat) elif source == 'singlepage': pages = [wikipedia.Page(wikipedia.getSite(), pageName) for pageName in pageNames] gen = iter(pages) elif source == 'allpages': namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace() gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace) elif source == 'ref': referredPage = wikipedia.Page(wikipedia.getSite(), referredPageName) gen = pagegenerators.ReferringPageGenerator(referredPage) elif source == None or len(commandline_replacements) not in [0, 2]: # syntax error, show help text from the top of this file wikipedia.output(__doc__, 'utf-8') wikipedia.stopme() sys.exit() if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20) bot = Drinibot(preloadingGen, acceptall) bot.run() if __name__ == "__main__": try: main() finally: wikipedia.stopme()