User:Drinibot/CapitalizationRedirects

From Wikipedia, the free encyclopedia

The following is based on pywikipedia and was done by the kind es: user Yrithinnd.

# -*- coding: utf-8  -*-
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.

You can run the bot with the following commandline parameters:

-xml         - Retrieve information from a local XML dump (pages_current, see
               http://download.wikimedia.org).
               Argument can also be given as "-xml:filename".
-file        - Work on all pages given in a local text file.
               Will read any [[wiki link]] and use these articles.
               Argument can also be given as "-file:filename".
-cat         - Work on all pages which are in a specific category.
               Argument can also be given as "-cat:categoryname".
-page        - Only edit a single page.
               Argument can also be given as "-page:pagename". You can give this
               parameter multiple times to edit multiple pages.
-ref         - Work on all pages that link to a certain page.
               Argument can also be given as "-ref:referredpagename".
-start       - Work on all pages in the wiki, starting at a given page. Choose
               "-start:!" to start at the beginning.
               NOTE: You are advised to use -xml instead of this option; this is
               meant for cases where there is no recent XML dump.
-regex       - Make replacements using regular expressions. If this argument
               isn't given, the bot will make simple text replacements.
-except:XYZ  - Ignore pages which contain XYZ. If the -regex argument is given,
               XYZ will be regarded as a regular expression.
-fix:XYZ     - Perform one of the predefined replacements tasks, which are given
               in the dictionary 'fixes' defined inside this file.
               The -regex argument and given replacements will be ignored if
               you use -fix.
               Currently available predefined fixes are:
                   * HTML - convert HTML tags to wiki syntax, and fix XHTML
-namespace:n - Number of namespace to process. The parameter can be used
               multiple times. It works in combination with all other
               parameters, except for the -start parameter. If you e.g. want to
               iterate over all user pages starting at User:M, use
               -start:User:M.
-always      - Don't prompt you for each replacement
other:       - First argument is the old text, second argument is the new text.
               If the -regex argument is given, the first argument will be
               regarded as a regular expression, and the second argument might
               contain expressions like \\1 or \g<name>.
      
NOTE: Only use either -xml or -file or -page, but don't mix them.

Examples:

If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the
new syntax, e.g. {{Stub}}, download an XML dump file (cur table) from
http://download.wikimedia.org, then use this command:

    python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}"

If you have a dump called foobar.xml and want to fix typos, e.g.
Errror -> Error, use this:

    python replace.py -xml:foobar.xml "Errror" "Error"

If you have a page called 'John Doe' and want to convert HTML tags to wiki
syntax, use:
    
    python replace.py -page:John_Doe -fix:HTML
"""
#
# (C) Daniel Herding, 2004
#
# Distributed under the terms of the MIT license.
#
__version__='$Id: replace.py,v 1.87 2006/01/26 19:08:27 leogregianin Exp $'

from __future__ import generators
import sys, re
import wikipedia, pagegenerators, catlib, config

# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.
msg = {
       'es':u'Robot: Borrado masivo de artículos',
       }

class Drinibot:
    def __init__(self, generator, acceptall = False):
        self.generator = generator
        self.acceptall = False


    def run(self):
        """
        Starts the robot.
        """
        # Run the generator which will yield Pages which might need to be
        # changed.
        for page in self.generator:
            titulo=page.title()
            np=wikipedia.Page(wikipedia.getSite(), page.title().capitalize())
            if not np.exists():
                wikipedia.output(u'%s no existe' % np.title())
                if not self.acceptall:
                        choice = wikipedia.inputChoice(u'Quieres crear la redireccion asociada?',  ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
                        if choice in ['a', 'A']:
                            self.acceptall = True
                if self.acceptall or choice in ['y', 'Y']:
                        np.put(u"#REDIRECT [[%s]]" % titulo, u"capitalization redirect")
            else:
                wikipedia.output(u'%s ya existe\nNo se hace nada' % np.title())
#    def put(self, newtext, comment=None, watchArticle = None, minorEdit = True):

    
def main():
    gen = None
    # How we want to retrieve information on which pages need to be changed.
    # Can either be 'xmldump', 'textfile' or 'userinput'.
    source = None
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.

    textfilename = None
    # the category name which will be used when source is 'category'.
    categoryname = None
    # pages which will be processed when the -page parameter is used
    pageNames = []
    # a page whose referrers will be processed when the -ref parameter is used
    referredPageName = None
    # will become True when the user presses a ('yes to all') or uses the -always
    # commandline paramater.
    acceptall = False
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Which page to start
    startpage = None
    # Load default summary message.
    wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg))

    # Read commandline parameters.
    for arg in sys.argv[1:]:
        arg = wikipedia.argHandler(arg, 'replace')
        if arg:
            if arg.startswith('-file'):
                if len(arg) == 5:
                    textfilename = wikipedia.input(u'Please enter the filename:')
                else:
                    textfilename = arg[6:]
                source = 'textfile'
            elif arg.startswith('-cat'):
                if len(arg) == 4:
                    categoryname = wikipedia.input(u'Please enter the category name:')
                else:
                    categoryname = arg[5:]
                source = 'category'
            elif arg.startswith('-page'):
                if len(arg) == 5:
                    pageNames.append(wikipedia.input(u'Which page do you want to chage?'))
                else:
                    pageNames.append(arg[6:])
                source = 'singlepage'
            elif arg.startswith('-ref'):
                if len(arg) == 4:
                    referredPageName = wikipedia.input(u'Links to which page should be processed?')
                else:
                    referredPageName = arg[5:]
                source = 'ref'
            elif arg.startswith('-start'):
                if len(arg) == 6:
                    firstPageTitle = wikipedia.input(u'Which page do you want to chage?')
                else:
                    firstPageTitle = arg[7:]
                source = 'allpages'
            elif arg == '-always':
                acceptall = True
            elif arg.startswith('-namespace:'):
                namespaces.append(int(arg[11:]))
            else:
                commandline_replacements.append(arg)
   

    
    if source == 'textfile':
        gen = pagegenerators.TextfilePageGenerator(textfilename)
    elif source == 'category':
        cat = catlib.Category(wikipedia.getSite(), categoryname)
        gen = pagegenerators.CategorizedPageGenerator(cat)
    elif source == 'singlepage':
        pages = [wikipedia.Page(wikipedia.getSite(), pageName) for pageName in pageNames]
        gen = iter(pages)
    elif source == 'allpages':
        namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace()
        gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace)
    elif source == 'ref':
        referredPage = wikipedia.Page(wikipedia.getSite(), referredPageName)
        gen = pagegenerators.ReferringPageGenerator(referredPage)
    elif source == None or len(commandline_replacements) not in [0, 2]:
        # syntax error, show help text from the top of this file
        wikipedia.output(__doc__, 'utf-8')
        wikipedia.stopme()
        sys.exit()
    if namespaces != []:
        gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20)
    bot = Drinibot(preloadingGen, acceptall)
    bot.run()


if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()