User:MER-C/Wiki.java

From Wikipedia, the free encyclopedia

/**
 *  @(#)Wiki.java 0.15 06/06/2008
 *  Copyright (C) 2007 - 2008 MER-C
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version 3
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
 
import java.awt.image.*;
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.logging.*;
import java.util.zip.*;
import javax.imageio.*;
import javax.security.auth.login.*; // useful exception types
 
/**
 *  This is a somewhat sketchy bot framework for editing MediaWiki wikis.
 *  Requires JDK 1.5 (5.0) or greater. Uses the [[mw:API|MediaWiki API]] for
 *  most operations. It is recommended that the server runs the latest version
 *  of MediaWiki (1.12), otherwise some functions may not work.
 *
 *  <p>
 *  A typical program would go something like this:
 *
 *  <pre>
 *  Wiki wiki;
 *  File f = new File("wiki.dat");
 *  if (f.exists()) // we already have a copy on disk
 *  {
 *      ObjectInputStream in = new ObjectInputStream(new FileInputStream(f));
 *      wiki = (Wiki)in.readObject();
 *  }
 *  else
 *  {
 *      try
 *      {
 *          wiki = new Wiki("en.wikipedia.org"); // create a new wiki connection to en.wikipedia.org
 *          wiki.setThrottle(5000); // set the edit throttle to 0.2 Hz
 *          wiki.login("ExampleBot", password); // log in as user ExampleBot, with the specified password
 *      }
 *      catch (FailedLoginException ex)
 *      {
 *          // deal with failed login attempt
 *      }
 *  }
 *  String[] titles = . . . ; // fetch a list of titles
 *  for (int i = 0; i < titles.length; i++)
 *  {
 *      new Thread()
 *      {
 *          public void run()
 *          {
 *              try
 *              {
 *                  // do something with titles[i]
 *              }
 *              catch (Exception ex)
 *              {
 *                  if (ex.getClass().equals(CredentialException.class))
 *                      // deal with protected page
 *                  else if (ex.getClass().equals(CredentialNotFoundException.class))
 *                      // deal with trying to do something we can't
 *                  else if (ex instanceof AccountLockedException)
 *                      // deal with being blocked
 *                  else if (ex instanceof IOException)
 *                      // deal with I/O error
 *              }
 *      }.start();
 *  }
 *  </pre>
 *
 *  Don't forget to release system resources held by this object when done.
 *  This may be achieved by logging out of the wiki. Since <tt>logout()</tt> is
 *  entirely offline, we can have a persistent session by simply serializing
 *  this wiki, then logging out as follows:
 *
 *  <pre>
 *  File f = new File("wiki.dat");
 *  ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(f));
 *  out.writeObject(wiki); // if we want the session to persist
 *  out.close();
 *  wiki.logout();
 *  </pre>
 *
 *  Long term storage of data (particularly greater than 20 days) is not
 *  recommended as the cookies may expire on the server.
 *
 *  <h4>Assertions</h4>
 *
 *  Without too much effort, it is possible to emulate assertions supported
 *  by [[mw:Extension:Assert Edit]]. The extension need not be installed
 *  for these assertions to work. Use <tt>setAssertionMode(int mode)</tt>
 *  to set the assertion mode. Checking for login, bot flag or new messages is
 *  supported by default. Other assertions can easily be defined, see {@link
 *  http://java.sun.com/j2se/1.4.2/docs/guide/lang/assert.html Programming
 *  With Assertions}. Assertions are applied on write methods only and are
 *  disabled by default.
 *
 *  <p>
 *  IMPORTANT: You need to run the program with the flag -enableassertions
 *  or -ea to enable assertions, example: <tt>java -ea Mybot</tt>.
 *
 *  <p>
 *  Please file bug reports at [[User talk:MER-C/Wiki.java#Bugs]]. Revision
 *  history is on the same page.
 *  <!-- all wikilinks are relative to the English Wikipedia -->
 *
 *  @author MER-C
 *  @version 0.15
 */
public class Wiki implements Serializable
{
    // namespaces
 
    /**
     *  Denotes the namespace of images and media, such that there is no
     *  description page. Uses the "Media:" prefix.
     *  @see IMAGE_NAMESPACE
     *  @since 0.03
     */
    public static final int MEDIA_NAMESPACE = -2;
 
    /**
     *  Denotes the namespace of pages with the "Special:" prefix. Note
     *  that many methods dealing with special pages may spew due to
     *  raw content not being available.
     *  @since 0.03
     */
    public static final int SPECIAL_NAMESPACE = -1;
 
    /**
     *  Denotes the main namespace, with no prefix.
     *  @since 0.03
     */
    public static final int MAIN_NAMESPACE = 0;
 
    /**
     *  Denotes the namespace for talk pages relating to the main
     *  namespace, denoted by the prefix "Talk:".
     *  @since 0.03
     */
    public static final int TALK_NAMESPACE = 1;
 
    /**
     *  Denotes the namespace for user pages, given the prefix "User:".
     *  @since 0.03
     */
    public static final int USER_NAMESPACE = 2;
 
    /**
     *  Denotes the namespace for user talk pages, given the prefix
     *  "User talk:".
     *  @since 0.03
     */
    public static final int USER_TALK_NAMESPACE = 3;
 
    /**
     *  Denotes the namespace for pages relating to the project,
     *  with prefix "Project:". It also goes by the name of whatever
     *  the project name was.
     *  @since 0.03
     */
    public static final int PROJECT_NAMESPACE = 4;
 
    /**
     *  Denotes the namespace for talk pages relating to project
     *  pages, with prefix "Project talk:". It also goes by the name
     *  of whatever the project name was, + "talk:".
     *  @since 0.03
     */
    public static final int PROJECT_TALK_NAMESPACE = 5;
 
    /**
     *  Denotes the namespace for image description pages. Has the prefix
     *  prefix "Image:". Do not create these directly, use upload() instead.
     *  @see MEDIA_NAMESPACE
     *  @since 0.03
     */
    public static final int IMAGE_NAMESPACE = 6;
 
    /**
     *  Denotes talk pages for image description pages. Has the prefix
     *  "Image talk:".
     *  @since 0.03
     */
    public static final int IMAGE_TALK_NAMESPACE = 7;
 
    /**
     *  Denotes the namespace for (wiki) system messages, given the prefix
     *  "MediaWiki:".
     *  @since 0.03
     */
    public static final int MEDIAWIKI_NAMESPACE = 8;
 
    /**
     *  Denotes the namespace for talk pages relating to system messages,
     *  given the prefix "MediaWiki talk:".
     *  @since 0.03
     */
    public static final int MEDIAWIKI_TALK_NAMESPACE = 9;
 
    /**
     *  Denotes the namespace for templates, given the prefix "Template:".
     *  @since 0.03
     */
    public static final int TEMPLATE_NAMESPACE = 10;
 
    /**
     *  Denotes the namespace for talk pages regarding templates, given
     *  the prefix "Template talk:".
     *  @since 0.03
     */
    public static final int TEMPLATE_TALK_NAMESPACE = 11;
 
    /**
     *  Denotes the namespace for help pages, given the prefix "Help:".
     *  @since 0.03
     */
    public static final int HELP_NAMESPACE = 12;
 
    /**
     *  Denotes the namespace for talk pages regarding help pages, given
     *  the prefix "Help talk:".
     *  @since 0.03
     */
    public static final int HELP_TALK_NAMESPACE = 13;
 
    /**
     *  Denotes the namespace for category description pages. Has the
     *  prefix "Category:".
     *  @since 0.03
     */
    public static final int CATEGORY_NAMESPACE = 14;
 
    /**
     *  Denotes the namespace for talk pages regarding categories. Has the
     *  prefix "Category talk:".
     *  @since 0.03
     */
    public static final int CATEGORY_TALK_NAMESPACE = 15;
 
    /**
     *  Denotes all namespaces.
     *  @since 0.03
     */
    public static final int ALL_NAMESPACES = 0x09f91102;
 
    // user rights
 
    /**
     *  Denotes no user rights.
     *  @see User#userRights()
     *  @since 0.05
     */
    public static final int IP_USER = -1;
 
    /**
     *  Denotes a registered account.
     *  @see User#userRights()
     *  @since 0.05
     */
    public static final int REGISTERED_USER = 1;
 
    /**
     *  Denotes a user who has admin rights.
     *  @see User#userRights()
     *  @since 0.05
     */
    public static final int ADMIN = 2;
 
    /**
     *  Denotes a user who has bureaucrat rights.
     *  @see User#userRights()
     *  @since 0.05
     */
    public static final int BUREAUCRAT = 4;
 
    /**
     *  Denotes a user who has steward rights.
     *  @see User#userRights()
     *  @since 0.05
     */
    public static final int STEWARD = 8;
 
    /**
     *  Denotes a user who has a bot flag.
     *  @see User#userRights()
     *  @since 0.05
     */
    public static final int BOT = 16;
 
    // log types
 
    /**
     *  Denotes all logs.
     *  @since 0.06
     */
    public static final String ALL_LOGS = "";
 
    /**
     *  Denotes the user creation log.
     *  @since 0.06
     */
    public static final String USER_CREATION_LOG = "newusers";
 
    /**
     *  Denotes the upload log.
     *  @since 0.06
     */
    public static final String UPLOAD_LOG = "upload";
 
    /**
     *  Denotes the deletion log.
     *  @since 0.06
     */
    public static final String DELETION_LOG = "delete";
 
    /**
     *  Denotes the move log.
     *  @since 0.06
     */
    public static final String MOVE_LOG = "move";
 
    /**
     *  Denotes the block log.
     *  @since 0.06
     */
    public static final String BLOCK_LOG = "block";
 
    /**
     *  Denotes the protection log.
     *  @since 0.06
     */
    public static final String PROTECTION_LOG = "protect";
 
    /**
     *  Denotes the user rights log.
     *  @since 0.06
     */
    public static final String USER_RIGHTS_LOG = "rights";
 
    /**
     *  Denotes the user renaming log.
     *  @since 0.06
     */
    public static final String USER_RENAME_LOG = "renameuser";
 
    /**
     *  Denotes the bot status log.
     *  @since 0.08
     *  @deprecated [[Special:Makebot]] is deprecated, use
     *  <tt>USER_RIGHTS_LOG</tt> instead.
     */
    public static final String BOT_STATUS_LOG = "makebot";
 
    /**
     *  Denotes the page importation log.
     *  @since 0.08
     */
    public static final String IMPORT_LOG = "import";
 
    /**
     *  Denotes the edit patrol log.
     *  @since 0.08
     */
    public static final String PATROL_LOG = "patrol";
 
    // protection levels
 
    /**
     *  Denotes a non-protected page.
     *  @since 0.09
     */
    public static final int NO_PROTECTION = -1;
 
    /**
     *  Denotes semi-protection (i.e. only autoconfirmed users can edit this page)
     *  [edit=autoconfirmed;move=autoconfirmed].
     *  @since 0.09
     */
    public static final int SEMI_PROTECTION = 1;
 
    /**
     *  Denotes full protection (i.e. only admins can edit this page)
     *  [edit=sysop;move=sysop].
     *  @see #ADMIN
     *  @see User#userRights()
     *  @since 0.09
     */
    public static final int FULL_PROTECTION = 2;
 
    /**
     *  Denotes move protection (i.e. only admins can move this page) [move=sysop].
     *  We don't define semi-move protection because only autoconfirmed users
     *  can move pages anyway.
     *
     *  @see #ADMIN
     *  @see User#userRights()
     *  @since 0.09
     */
    public static final int MOVE_PROTECTION = 3;
 
    /**
     *  Denotes move and semi-protection (i.e. autoconfirmed editors can edit the
     *  page, but you need to be a sysop to move) [edit=autoconfirmed;move=sysop].
     *  Naturally, this value (4) is equal to SEMI_PROTECTION (1) +
     *  MOVE_PROTECTION (3).
     *
     *  @see #ADMIN
     *  @see User#userRights()
     *  @since 0.09
     */
    public static final int SEMI_AND_MOVE_PROTECTION = 4;
 
    /**
     *  Denotes protected deleted pages [create=sysop].
     *  @since 0.12
     *  @see #ADMIN
     */
     public static final int PROTECTED_DELETED_PAGE = 5;
 
    // assertion modes
 
    /**
     *  Use no assertions (i.e. 0).
     *  @see #setAssertionMode
     *  @since 0.11
     */
    public static final int ASSERT_NONE = 0;
 
    /**
     *  Assert that we are logged in (i.e. 1).
     *  @see #setAssertionMode
     *  @since 0.11
     */
    public static final int ASSERT_LOGGED_IN = 1;
 
    /**
     *  Assert that we have a bot flag (i.e. 2).
     *  @see #setAssertionMode
     *  @since 0.11
     */
    public static final int ASSERT_BOT = 2;
 
    /**
     *  Assert that we have no new messages. Not defined in Assert Edit, but
     *  some bots have this.
     *  @see #setAssertionMode
     *  @since 0.11
     */
    public static final int ASSERT_NO_MESSAGES = 4;
 
    // the domain of the wiki
    private String domain, query, base;
    private String scriptPath = "/w"; // need this for sites like partyvan.info
 
    // something to handle cookies
    private HashMap cookies = new HashMap(12);
    private HashMap cookies2 = new HashMap(10);
    private User user;
 
    // internal data storage
    private HashMap namespaces = null;
    private int max = 500; // awkward workaround
    private static Logger logger = Logger.getLogger("wiki"); // only one required
    private int throttle = 10000; // throttle
    private int maxlag = 5;
    private long lastlagcheck;
    private int assertion = 0; // assertion mode
 
    // serial version
    private static final long serialVersionUID = -8745212681497644126L;
 
    // constructors and config
 
    /**
     *  Logs which version we're using.
     *  @since 0.12
     */
    static
    {
        logger.logp(Level.CONFIG, "Wiki", "<init>", "Using Wiki.java v0.15.");
    }
 
    /**
     *  Creates a new connection to the English Wikipedia.
     *  @since 0.02
     */
    public Wiki()
    {
        this("");
    }
 
    /**
     *  Creates a new connection to a wiki. WARNING: if the wiki uses a
     *  $wgScriptpath other than the default <tt>/w</tt>, you need to call
     *  <tt>getScriptPath()</tt> to automatically set it. Alternatively, you
     *  can use the constructor below if you know it in advance.
     *
     *  @param domain the wiki domain name e.g. en.wikipedia.org (defaults to
     *  en.wikipedia.org)
     */
    public Wiki(String domain)
    {
        if (domain == null || domain.equals(""))
            domain = "en.wikipedia.org";
        this.domain = domain;
 
        // init variables
        base = "http://" + domain + scriptPath + "/index.php?title=";
        query = "http://" + domain + scriptPath +  "/api.php?format=xml&";
    }
 
    /**
     *  Creates a new connection to a wiki with $wgScriptpath set to
     *  <tt>scriptPath</tt>.
     *
     *  @param domain the wiki domain name
     *  @param scriptPath the script path
     *  @since 0.14
     */
    public Wiki(String domain, String scriptPath)
    {
        this.domain = domain;
        this.scriptPath = scriptPath;
 
        // init variables
        base = "http://" + domain + scriptPath + "/index.php?title=";
        query = "http://" + domain + scriptPath +  "/api.php?format=xml&";
    }
 
    /**
     *  Gets the domain of the wiki, as supplied on construction.
     *  @return the domain of the wiki
     *  @since 0.06
     */
    public String getDomain()
    {
        return domain;
    }
 
    /**
     *  Gets the editing throttle.
     *  @return the throttle value in milliseconds
     *  @see #setThrottle
     *  @since 0.09
     */
    public int getThrottle()
    {
        return throttle;
    }
 
    /**
     *  Sets the editing throttle. Read requests are not throttled or restricted
     *  in any way.
     *  @param throttle the new throttle value in milliseconds
     *  @see #getThrottle
     *  @since 0.09
     */
    public void setThrottle(int throttle)
    {
        this.throttle = throttle;
        log(Level.CONFIG, "Throttle set to " + throttle + " milliseconds", "setThrottle");
    }
 
    /**
     *  Detects the $wgScriptpath wiki variable and sets the bot framework up
     *  to use it. You need not call this if you know the script path is
     *  <tt>/w</tt>. See also [[mw:Manual:$wgScriptpath]].
     *
     *  @throws IOException if a network error occurs
     *  @return the script path, if you have any use for it
     *  @since 0.14
     */
    public String getScriptPath() throws IOException
    {
        scriptPath = parseAndCleanup("{{SCRIPTPATH}}");
        base = "http://" + domain + scriptPath + "/index.php?title=";
        query = "http://" + domain + scriptPath +  "/api.php?format=xml&";
        return scriptPath;
    }
 
    /**
     *  Determines whether this wiki is equal to another object.
     *  @param obj the object to compare
     *  @return whether this wiki is equal to such object
     *  @since 0.10
     */
    public boolean equals(Object obj)
    {
        if (!(obj instanceof Wiki))
            return false;
        return domain.equals(((Wiki)obj).domain);
    }
 
    /**
     *  Returns a hash code of this object.
     *  @return a hash code
     *  @since 0.12
     */
    public int hashCode()
    {
        return domain.hashCode() * maxlag - throttle;
    }
 
    /**
     *   Returns a string representation of this Wiki.
     *   @return a string representation of this Wiki.
     *   @since 0.10
     */
    public String toString()
    {
        try
        {
            // domain
            StringBuilder buffer = new StringBuilder("Wiki[domain=");
            buffer.append(domain);
 
            // user
            buffer.append(",user=");
            if (user != null)
            {
                buffer.append(user.getUsername());
                buffer.append("[rights=");
                buffer.append(user.userRights());
                buffer.append("],");
            }
            else
                buffer.append("null,");
 
            // throttle mechanisms
            buffer.append("throttle=");
            buffer.append(throttle);
            buffer.append(",maxlag=");
            buffer.append(maxlag);
            buffer.append(",assertionMode=");
            buffer.append(assertion);
            buffer.append(",cookies=");
            buffer.append(cookies);
            buffer.append(",cookies2=");
            buffer.append(cookies2);
            return buffer.toString();
        }
        catch (IOException ex)
        {
            // this shouldn't happen due to the user rights cache
            logger.logp(Level.SEVERE, "Wiki", "toString()", "Cannot retrieve user rights!", ex);
            return "";
        }
    }
 
    /**
     *  Gets the maxlag parameter. See [[mw:Manual:Maxlag parameter]], but
     *  this is implemented locally because screen scraping is something that
     *  should be avoided if possible.
     *
     *  @return the current maxlag, in seconds
     *  @see #setMaxLag
     *  @see #getCurrentDatabaseLag
     *  @since 0.11
     */
    public int getMaxLag()
    {
        return maxlag;
    }
 
    /**
     *  Sets the maxlag parameter. See [[mw:Manual:Maxlag parameter]], but
     *  this is implemented locally because screen scraping is something that
     *  should be avoided if possible. A value of less than 1s disables this
     *  mechanism.
     *
     *  @param lag the desired maxlag in seconds
     *  @see #getMaxLag
     *  @see #getCurrentDatabaseLag
     *  @since 0.11
     */
    public void setMaxLag(int lag)
    {
        maxlag = lag;
        log(Level.CONFIG, "Setting maximum allowable database lag to " + lag, "setMaxLag");
    }
 
    /**
     *  Gets the assertion mode. See [[mw:Extension:Assert Edit]] for what
     *  functionality this mimics. Assertion modes are bitmasks.
     *  @return the current assertion mode
     *  @see #setAssertionMode
     *  @since 0.11
     */
    public int getAssertionMode()
    {
        return assertion;
    }
 
    /**
     *  Sets the assertion mode. See [[mw:Extension:Assert Edit]] for what this
     *  functionality this mimics. Assertion modes are bitmasks.
     *  @param an assertion mode
     *  @see #getAssertionMode
     *  @since 0.11
     */
    public void setAssertionMode(int mode)
    {
        assertion = mode;
        log(Level.CONFIG, "Set assertion mode to " + mode, "setAssertionMode");
    }
 
    // meta stuff
 
    /**
     *  Logs in to the wiki. Uses [[Special:Userlogin]] due to security
     *  concerns. This method is thread-safe. If the specified username or
     *  password is incorrect, the thread blocks for 60 seconds then throws an
     *  exception.
     *
     *  @param username a username
     *  @param password a password (as a char[] due to JPasswordField)
     *  @throws FailedLoginException if the login failed due to incorrect
     *  username and/or password
     *  @throws IOException if a network error occurs
     *  @see #logout
     */
    public synchronized void login(String username, char[] password) throws IOException, FailedLoginException
    {
        // @revised 0.11 to remove screen scraping
 
        // sanitize
        String ps = new String(password);
        username = URLEncoder.encode(username, "UTF-8");
        ps = URLEncoder.encode(ps, "UTF-8");
 
        // start
        String url = "http://" + domain + scriptPath + "/api.php";
        URLConnection connection = new URL(url).openConnection();
        logurl(url, "login");
        setCookies(connection, cookies);
        connection.setDoOutput(true);
        connection.connect();
 
        // send
        PrintWriter out = new PrintWriter(connection.getOutputStream());
        out.print("format=xml&action=login&lgname=");
        out.print(username);
        out.print("&lgpassword=");
        out.print(password);
        out.close();
 
        // get the cookies
        grabCookies(connection, cookies);
 
        // determine success
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
        boolean success = in.readLine().indexOf("result=\"Success\"") != -1;
        in.close();
        if (success)
        {
            user = new User(username);
            boolean apihighlimit = (user.userRights() & BOT) == BOT || (user.userRights() & ADMIN) == ADMIN;
            max = apihighlimit ? 5000 : 500;
            log(Level.INFO, "Successfully logged in as " + username + ", highLimit = " + apihighlimit, "login");
        }
        else
        {
            log(Level.WARNING, "Failed to log in as " + username, "login");
            try
            {
                Thread.sleep(60000); // to prevent brute force
            }
            catch (InterruptedException e)
            {
                // nobody cares
            }
            throw new FailedLoginException("Login failed.");
        }
    }
 
    /**
     *  Logs out of the wiki. This method is thread safe (so that we don't log
     *  out during an edit). All operations are conducted offline, so you can
     *  serialize this Wiki first.
     *  @see #login
     *  @see #logoutServerSide
     */
    public synchronized void logout()
    {
        cookies.clear();
        cookies2.clear();
        user = null;
        max = 500;
        log(Level.INFO, "Logged out", "logout");
    }
 
    /**
     *  Logs out of the wiki and destroys the session on the server. You will
     *  need to log in again instead of just reading in a serialized wiki.
     *  Equivalent to [[Special:Userlogout]]. This method is thread safe
     *  (so that we don't log out during an edit).
     *
     *  @throws IOException if a network error occurs
     *  @since 0.14
     *  @see #login
     *  @see #logout
     */
    public synchronized void logoutServerSide() throws IOException
    {
        // send the request
        String url = query + "action=logout";
        logurl(url, "logoutServerSide");
        URLConnection connection = new URL(url).openConnection();
        setCookies(connection, cookies);
        connection.connect();
 
        // destroy local cookies
        logout();
    }
 
    /**
     *  Determines whether the current user has new messages. (A human would
     *  notice a yellow bar at the top of the page).
     *  @return whether the user has new messages
     *  @throws IOException if a network error occurs
     *  @since 0.11
     */
    public boolean hasNewMessages() throws IOException
    {
        String url = query + "action=query&meta=userinfo&uiprop=hasmsg";
        logurl(url, "hasNewMessages");
        URLConnection connection = new URL(url).openConnection();
        setCookies(connection, cookies);
        connection.connect();
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
        return in.readLine().indexOf("messages=\"\"") != -1;
    }
 
    /**
     *  Determines the current database replication lag.
     *  @return the current database replication lag
     *  @throws IOException if a network error occurs
     *  @see #setMaxLag
     *  @see #getMaxLag
     *  @since 0.10
     */
    public int getCurrentDatabaseLag() throws IOException
    {
        String url = query + "action=query&meta=siteinfo&siprop=dbrepllag";
        logurl(url, "getCurrentDatabaseLag");
        URLConnection connection = new URL(url).openConnection();
        setCookies(connection, cookies);
        connection.connect();
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
        String line = in.readLine();
        int z = line.indexOf("lag=\"") + 5;
        String lag = line.substring(z, line.indexOf("\" />", z));
        log(Level.INFO, "Current database replication lag is " + lag + " seconds", "getCurrentDatabaseLag");
        return Integer.parseInt(lag);
    }
 
    /**
     *  Fetches some site statistics, namely the number of articles, pages,
     *  files, edits, users and admins. Equivalent to [[Special:Statistics]].
     *
     *  @return a map containing the stats. Use "articles", "pages", "files"
     *  "edits", "users" or "admins" to retrieve the respective value
     *  @throws IOException if a network error occurs
     *  @since 0.14
     */
    public HashMap<String, Integer> getSiteStatistics() throws IOException
    {
        // ZOMG hack to avoid excessive substring code
        String text = parseAndCleanup("{{NUMBEROFARTICLES:R}} {{NUMBEROFPAGES:R}} {{NUMBEROFFILES:R}} {{NUMBEROFEDITS:R}} " +
                "{{NUMBEROFUSERS:R}} {{NUMBEROFADMINS:R}}");
        String[] values = text.split("\\s");
        HashMap<String, Integer> ret = new HashMap<String, Integer>();
        String[] keys =
        {
            "version", "articles", "pages", "files", "edits", "users", "admins"
        };
        for (int i = 0; i < values.length; i++)
        {
            Integer value = new Integer(values[i]);
            ret.put(keys[i], value);
        }
        return ret;
    }
 
    /**
     *  Gets the version of MediaWiki this wiki runs e.g. 1.13 alpha (r31567).
     *  The r number corresponds to a revision in MediaWiki subversion
     *  (http://svn.wikimedia.org/viewvc/mediawiki/).
     *  @return the version of MediaWiki used
     *  @throws IOException if a network error occurs
     *  @since 0.14
     */
    public String version() throws IOException
    {
        return parseAndCleanup("{{CURRENTVERSION}}"); // ahh, the magicness of magic words
    }
 
    /**
     *  Renders the specified wiki markup by passing it to the MediaWiki
     *  parser through the API. (Note: this isn't implemented locally because
     *  I can't be stuffed porting Parser.php). One use of this method is to
     *  emulate the previewing functionality of the MediaWiki software.
     *
     *  @param markup the markup to parse
     *  @return the parsed markup as HTML
     *  @throws IOException if a network error occurs
     *  @since 0.13
     */
    public String parse(String markup) throws IOException
    {
        // start
        String url = "http://" + domain + scriptPath + "/api.php";
        URLConnection connection = new URL(url).openConnection();
        logurl(url, "parse");
        setCookies(connection, cookies);
        connection.setDoOutput(true);
        connection.connect();
 
        // send
        PrintWriter out = new PrintWriter(connection.getOutputStream());
        out.print("format=xml&action=parse&prop=text&text=");
        out.print(URLEncoder.encode(markup, "UTF-8"));
        out.close();
 
        // parse
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
        String line;
        StringBuilder text = new StringBuilder(100000);
        while ((line = in.readLine()) != null)
        {
            int y = line.indexOf("<text>");
            int z = line.indexOf("</text>");
            if (y != -1)
            {
                text.append(line.substring(y + 6));
                text.append("\n");
            }
            else if (z != -1)
            {
                text.append(line.substring(0, z));
                text.append("\n");
                break; // done
            }
            else
            {
                text.append(line);
                text.append("\n");
            }
        }
        return decode(text.toString());
    }
 
    /**
     *  Fetches a random page in the main namespace. Equivalent to
     *  [[Special:Random]].
     *  @return the title of the page
     *  @throws IOException if a network error occurs
     *  @since 0.13
     */
    public String random() throws IOException
    {
        return random(MAIN_NAMESPACE);
    }
 
    /**
     *  Fetches a random page in the specified namespace. Equivalent to
     *  [[Special:Random]].
     *
     *  @param namespace a namespace
     *  @return the title of the page
     *  @throws IOException if a network error occurs
     *  @since 0.13
     */
    public String random(int namespace) throws IOException
    {
        // fetch
        String url = query + "action=query&list=random";
                url += (namespace == ALL_NAMESPACES ? "" : "&rnnamespace=" + namespace);
                URLConnection connection = new URL(url).openConnection();
        setCookies(connection, cookies);
        connection.connect();
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
        String line = in.readLine();
 
        // parse
        int a = line.indexOf("title=\"") + 7;
        int b = line.indexOf("\"", a);
        return line.substring(a, b);
    }
 
    // static members
 
    /**
     *   Parses a list of links into its individual elements. Such a list
     *   should be in the form:
     *
     *  <pre>
     *  * [[Main Page]]
     *  * [[Wikipedia:Featured picture candidates]]
     *  * [[:Image:Example.png]]
     *  </pre>
     *
     *  in which case <tt>{ "Main Page", "Wikipedia:Featured picture
     *  candidates", "Image:Example.png" }</tt> is the return value.
     *
     *  @param list a list of pages
     *  @see #formatList
     *  @return an array of the page titles
     *  @since 0.11
     */
    public static String[] parseList(String list)
    {
        StringTokenizer tokenizer = new StringTokenizer(list, "[]");
        ArrayList<String> titles = new ArrayList<String>(667);
        tokenizer.nextToken(); // skip the first token
        while (tokenizer.hasMoreTokens())
        {
            String token = tokenizer.nextToken();
 
            // skip any containing new lines or double letters
            if (token.indexOf('\n') != -1)
                continue;
            if (token.equals(""))
                continue;
 
            // trim the starting colon, if present
            if (token.startsWith(":"))
                token = token.substring(1);
 
            titles.add(token);
        }
        return titles.toArray(new String[0]);
    }
 
    /**
     *  Formats a list of pages, say, generated from one of the query methods
     *  into something that would be editor-friendly. Does the exact opposite
     *  of <tt>parseList()</tt>, i.e. { "Main Page", "Wikipedia:Featured
     *  picture candidates", "Image:Example.png" } becomes the string:
     *
     *  <pre>
     *  *[[:Main Page]]
     *  *[[:Wikipedia:Featured picture candidates]]
     *  *[[:Image:Example.png]]
     *  </pre>
     *
     *  @param pages an array of page titles
     *  @return see above
     *  @see #parseList
     *  @since 0.14
     */
    public static String formatList(String[] pages)
    {
        StringBuilder buffer = new StringBuilder(10000);
        for (int i = 0; i < pages.length; i++)
        {
            buffer.append("*[[:");
            buffer.append(pages[i]);
            buffer.append("]]\n");
        }
        return buffer.toString();
    }
 
    /**
     *  Determines the intersection of two lists of pages a and b, i.e. a ∩ b.
     *  Such lists might be generated from the various list methods below.
     *  Examples from the English Wikipedia:
     *
     *  <pre>
     *  // find all orphaned and unwikified articles
     *  String[] articles = Wiki.intersection(wikipedia.getCategoryMembers("All orphaned articles", Wiki.MAIN_NAMESPACE),
     *      wikipedia.getCategoryMembers("All pages needing to be wikified", Wiki.MAIN_NAMESPACE));
     *
     *  // find all (notable) living people who are related to Barack Obama
     *  String[] people = Wiki.intersection(wikipedia.getCategoryMembers("Living people", Wiki.MAIN_NAMESPACE),
     *      wikipedia.whatLinksHere("Barack Obama", Wiki.MAIN_NAMESPACE));
     *  </pre>
     *
     *  @param a a list of pages
     *  @param b another list of pages
     *  @return a ∩ b (as String[])
     *  @since 0.04
     */
    public static String[] intersection(String[] a, String[] b)
    {
        // @revised 0.11 to take advantage of Collection.retainAll()
        // @revised 0.14 genericised to all page titles, not just category members
 
        List<String> aa = Arrays.asList(a);
        aa.retainAll(Arrays.asList(b));
        return aa.toArray(new String[0]);
    }
 
    /**
     *  Determines the list of articles that are in a but not b, i.e. a \ b.
     *  This is not the same as b \ a. Such lists might be generated from the
     *  various lists below. Some examples from the English Wikipedia:
     *
     *  <pre>
     *  // find all Martian crater articles that do not have an infobox
     *  String[] articles = Wiki.relativeComplement(wikipedia.getCategoryMembers("Craters on Mars"),
     *      wikipedia.whatTranscludesHere("Template:MarsGeo-Crater", Wiki.MAIN_NAMESPACE));
     *
     *  // find all images without a description that haven't been tagged "no license"
     *  String[] images = Wiki.relativeComplement(wikipedia.getCategoryMembers("Images lacking a description"),
     *      wikipedia.getCategoryMembers("All images with unknown copyright status"));
     *  </pre>
     *
     *  @param a a list of pages
     *  @param b another list of pages
     *  @return a \ b
     *  @since 0.14
     */
    public static String[] relativeComplement(String[] a, String[] b)
    {
        List<String> aa = Arrays.asList(a);
        aa.removeAll(Arrays.asList(b));
        return aa.toArray(new String[0]);
    }
 
    // page methods
 
    /**
     *  Returns the corresponding talk page to this page.
     *  @param title the page title
     *  @return the name of the talk page corresponding to <tt>title</tt>
     *  or "" if we cannot recognise it
     *  @throws IllegalArgumentException if given title is in a talk namespace
     *  or we try to retrieve the talk page of a Special: or Media: page.
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public String getTalkPage(String title) throws IOException
    {
        int namespace = namespace(title);
        if (namespace % 2 == 1)
            throw new IllegalArgumentException("Cannot fetch talk page of a talk page!");
        if (namespace < 0)
            throw new IllegalArgumentException("Special: and Media: pages do not have talk pages!");
        if (namespace != MAIN_NAMESPACE) // remove the namespace
            title = title.substring(title.indexOf(':') + 1, title.length());
 
        // you should override this to add wiki-specific namespaces e.g.
        // Portal on en.wikipedia.org or UnNews: on Uncyclopedia.
        switch(namespace)
        {
            case MAIN_NAMESPACE:
                return "Talk:" + title;
            case USER_NAMESPACE:
                return "User talk:" + title;
            case PROJECT_NAMESPACE:
                return "Project talk:" + title;
            case TEMPLATE_NAMESPACE:
                return "Template talk:" + title;
            case CATEGORY_NAMESPACE:
                return "Category talk:" + title;
            case MEDIAWIKI_NAMESPACE:
                return "MediaWiki talk:" + title;
            case HELP_NAMESPACE:
                return "Help talk:" + title;
            case IMAGE_NAMESPACE:
                return "Image talk:" + title;
        }
        return "";
    }
 
    /**
     *  Gets the protection status of a page. WARNING: returns NO_PROTECTION
     *  for pages that are protected through the cascading mechanism, e.g.
     *  [[Talk:W/w/index.php]].
     *
     *  @param title the title of the page
     *  @return one of the various protection levels (i.e,. NO_PROTECTION,
     *  SEMI_PROTECTION, MOVE_PROTECTION, FULL_PROTECTION,
     *  SEMI_AND_MOVE_PROTECTION, PROTECTED_DELETED_PAGE)
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public int getProtectionLevel(String title) throws IOException
    {
        // fetch
        String url = query + "action=query&prop=info&inprop=protection&titles=" + URLEncoder.encode(title, "UTF-8");
        logurl(url, "getProtectionLevel");
        checkLag("getProtectionLevel");
        URLConnection connection = new URL(url).openConnection();
        setCookies(connection, cookies);
        connection.connect();
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
        String line = in.readLine();
 
        // parse
        int z = line.indexOf("type=\"edit\"");
        if (z != -1)
        {
            String s = line.substring(z, z + 30);
            if (s.indexOf("sysop") != -1)
                return FULL_PROTECTION;
            s = line.substring(z + 30, line.length()); // cut out edit tag
            if (line.indexOf("level=\"sysop\"") != -1)
                return SEMI_AND_MOVE_PROTECTION;
            return SEMI_PROTECTION;
        }
        if (line.indexOf("type=\"move\"") != -1)
            return MOVE_PROTECTION;
        if (line.indexOf("type=\"create\"") != -1)
            return PROTECTED_DELETED_PAGE;
        return NO_PROTECTION;
    }
 
    /**
     *  Returns the namespace a page is in.
     *  @param title the title of the page
     *  @return one of namespace types above, or a number for custom
     *  namespaces or ALL_NAMESPACES if we can't make sense of it
     *  @throws IOException if a network error occurs
     *  @since 0.03
     */
    public int namespace(String title) throws IOException
    {
        // sanitise
        title = title.replace('_', ' ');
        if (title.indexOf(':') == -1)
            return MAIN_NAMESPACE;
        String namespace = title.substring(0, title.indexOf(':'));
 
        // all wiki namespace test
        if (namespace.equals("Project talk"))
            return PROJECT_TALK_NAMESPACE;
        if (namespace.equals("Project"))
            return PROJECT_NAMESPACE;
 
        if (namespaces == null) // cache this, as it will be called often
        {
            URLConnection connection = new URL(query + "action=query&meta=siteinfo&siprop=namespaces").openConnection();
            logurl(query + "action=query&meta=siteinfo&siprop=namespaces", "namespace");
            checkLag("namespace");
            setCookies(connection, cookies);
            connection.connect();
 
            // read the first line, as it is the only thing worth paying attention to
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            namespaces = new HashMap(30);
            while (line.indexOf("<ns") != -1)
            {
                int x = line.indexOf("<ns id=");
                if (line.charAt(x + 8) == '0') // skip main, it's a little different
                {
                    line = line.substring(13, line.length());
                    continue;
                }
                int y = line.indexOf("</ns>");
                String working = line.substring(x + 8, y);
                int ns = Integer.parseInt(working.substring(0, working.indexOf('"')));
                String name = working.substring(working.indexOf(">") + 1, working.length());
                namespaces.put(name, new Integer(ns));
                line = line.substring(y + 5, line.length());
            }
            in.close();
            log(Level.INFO, "Successfully retrieved namespace list (" + (namespaces.size() + 1) + " namespaces)", "namespace");
        }
 
        if (!namespaces.containsKey(namespace))
            return MAIN_NAMESPACE; // For titles like UN:NRV
        Iterator i = namespaces.entrySet().iterator();
        while (i.hasNext())
        {
            Map.Entry entry = (Map.Entry)i.next();
            if (entry.getKey().equals(namespace))
                return ((Integer)entry.getValue()).intValue();
        }
        return ALL_NAMESPACES; // unintelligble title
    }
 
    /**
     *  Determines whether a series of pages exist. Requires the
     *  [[mw:Extension:ParserFunctions|ParserFunctions extension]].
     *
     *  @param titles the titles to check
     *  @return whether the pages exist
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public boolean[] exists(String... titles) throws IOException
    {
        // @revised 0.15 optimized for multiple queries, now up to 500x faster!
 
        StringBuilder wikitext = new StringBuilder(15000);
        StringBuilder parsed = new StringBuilder(1000);
        for (int i = 0; i < titles.length; i++)
        {
            // build up the parser string
            wikitext.append("{{#ifexist:");
            wikitext.append(titles[i]);
            wikitext.append("|1|0}}"); // yay! binary! (well, almost)
 
            // Send them off in batches of 500. Change this if your expensive
            // parser function limit is different.
            if (i % 500 == 499 || i == titles.length - 1)
            {
                parsed.append(parseAndCleanup(wikitext.toString()));
                wikitext = new StringBuilder(15000);
                System.out.println(parsed.length());
            }
        }
 
        // now parse the resulting "binary"
        char[] characters = parsed.toString().toCharArray();
        boolean[] ret = new boolean[characters.length];
        for (int i = 0; i < characters.length; i++)
        {
            // we would want to use the ternary operator here but other things can go wrong
            if (characters[i] != '1' && characters[i] != '0')
                throw new UnknownError("Unable to parse output. Perhaps the ParserFunctions extension is not installed, or this is a bug.");
            ret[i] = (characters[i] == '1') ? true : false;
        }
        return ret;
    }
 
    /**
     *  Gets the raw wikicode for a page. WARNING: does not support special
     *  pages. Check [[User talk:MER-C/Wiki.java#Special page equivalents]]
     *  for fetching the contents of special pages. Use <tt>getImage()</tt> to
     *  fetch an image.
     *
     *  @param title the title of the page.
     *  @return the raw wikicode of a page
     *  @throws UnsupportedOperationException if you try to retrieve the text of a
     *  Special: page or a Media: page
     *  @throws IOException if a network error occurs
     *  @see #edit
     */
    public String getPageText(String title) throws IOException
    {
        // pitfall check
        if (namespace(title) < 0)
            throw new UnsupportedOperationException("Cannot retrieve Special: or Media: pages!");
 
        // sanitise the title
        title = URLEncoder.encode(title, "UTF-8");
 
        // go for it
        String URL = base + title + "&action=raw";
        logurl(URL, "getPageText");
        checkLag("getPageText");
        URLConnection connection = new URL(URL).openConnection();
        setCookies(connection, cookies);
        connection.connect();
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
 
        // get the text
        String line;
        StringBuilder text = new StringBuilder(100000);
        while ((line = in.readLine()) != null)
        {
            text.append(line);
            text.append("\n");
        }
 
        in.close();
        log(Level.INFO, "Successfully retrieved text of " + title, "getPageText");
        return decode(text.toString());
    }
 
    /**
     *  Gets the contents of a page, rendered in HTML (as opposed to
     *  wikitext). WARNING: only supports special pages in certain
     *  circumstances, for example <tt>getRenderedText("Special:Recentchanges")
     *  </tt> returns the 50 most recent change to the wiki in pretty-print
     *  HTML. You should test any use of this method on-wiki through the text
     *  <tt>{{Special:Specialpage}}</tt>. Use <tt>getImage()</tt> to fetch an
     *  image. Be aware of any transclusion limits, as outlined at
     *  [[Wikipedia:Template limits]].
     *
     *  @param title the title of the page
     *  @return the rendered contents of that page
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public String getRenderedText(String title) throws IOException
    {
        // @revised 0.13 genericised to parse any wikitext
        return parse("{{" + title + "}}");
    }
 
    /**
     *  Edits a page by setting its text to the supplied value. This method is
     *  thread safe and blocks for a minimum time as specified by the
     *  throttle.
     *
     *  @param text the text of the page
     *  @param title the title of the page
     *  @param summary the edit summary. See [[Help:Edit summary]]. Summaries
     *  longer than 200 characters are truncated server-side.
     *  @param minor whether the edit should be marked as minor. See
     *  [[Help:Minor edit]].
     *  @throws IOException if a network error occurs
     *  @throws AccountLockedException if user is blocked
     *  @throws CredentialException if page is protected and we can't edit it
     *  @throws UnsupportedOperationException if you try to retrieve the text
     *  of a Special: page or a Media: page
     *  @see #getPageText
     */
    public synchronized void edit(String title, String text, String summary, boolean minor) throws IOException, LoginException
    {
        // throttle
        long start = System.currentTimeMillis();
 
        // pitfall check
        if (namespace(title) < 0)
            throw new UnsupportedOperationException("Cannot edit Special: or Media: pages!");
 
        // check if the page is protected, and if we can edit it
        if (!checkRights(getProtectionLevel(title), false))
        {
            CredentialException ex = new CredentialException("Permission denied: page is protected.");
            logger.logp(Level.WARNING, "Wiki", "edit()", "[" + domain + "] Cannot edit - permission denied.", ex);
            throw ex;
        }
 
        // sanitise
        title = URLEncoder.encode(title, "UTF-8");
        summary = URLEncoder.encode(summary, "UTF-8");
        text = URLEncoder.encode(text, "UTF-8");
 
        // What we need to do is get the edit page and fish out the wpEditToken, wpStartTime and
        // wpEditTime values. See [[mw:Manual:Parameters to index.php]]
        String URL = base + title + "&action=edit";
        logurl(URL, "edit"); // no need to check lag, have already done so in getProtectionLevel()
        URLConnection connection = new URL(URL).openConnection();
        setCookies(connection, cookies);
        connection.connect();
        grabCookies(connection, cookies2);
        cookies2.putAll(cookies);
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
 
        // dump cookies for debug purpose
//        System.out.println(cookies2);
 
        // more specifically, we're looking for "name="wpEditToken"", and related
        String line, wpEditToken = "", wpEdittime = "";
        boolean editRetrieved = false, timeRetrieved = false, watchRetrieved = false;
        boolean watched = false;
        while ((line = in.readLine()) != null)
        {
            if (line.indexOf("name=\"wpEditToken\"") != -1)
            {
                int x = line.indexOf("value=\"") + 7;
                wpEditToken = URLEncoder.encode(line.substring(x, line.indexOf('\"', x)), "UTF-8");
                editRetrieved = true;
            }
            else if (line.indexOf("name=\"wpEdittime\"") != -1)
            {
                int x = line.indexOf("value=\"") + 7;
                wpEdittime = line.substring(x, line.indexOf('\"', x));
                timeRetrieved = true;
            }
            else if (line.indexOf("name=\"wpWatchthis\"") != -1)
            {
                watched = (line.indexOf("checked=\"") != -1);
                watchRetrieved = true;
            }
            else if (editRetrieved && timeRetrieved && watchRetrieved)
            {
                in.close();
                break; // bandwidth hack
            }
        }
 
        // this is what accepts the text
        URL = base + title + "&action=submit";
        logurl(URL, "edit");
        connection = new URL(URL).openConnection();
        setCookies(connection, cookies2);
        connection.setDoOutput(true);
        PrintWriter out = new PrintWriter(connection.getOutputStream());
 
        // now we send the data
        out.print("wpTextbox1=");
        out.print(text);
        out.print("&wpSummary=");
        out.print(summary);
        if (minor)
            out.print("&wpMinoredit=1");
        if (watched)
            out.print("&wpWatchthis=1");
        out.print("&wpEdittime=");
        out.print(wpEdittime);
        out.print("&wpEditToken=");
        out.print(wpEditToken);
        // note that we can compute wpStartTime offline
        out.print("&wpStarttime=");
        String wpStarttime = calendarToTimestamp(new GregorianCalendar(TimeZone.getTimeZone("UTC")));
        out.print(wpStarttime);
 
        //done
        out.close();
        try
        {
            // it's somewhat strange that the edit only sticks when you start reading the response...
            in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            in.readLine();
 
            // debug version
//            while ((line = in.readLine()) != null)
//                 System.out.println(line);
        }
        catch (IOException e)
        {
//            InputStream err = ((HttpURLConnection)connection).getInputStream();
//            if (err == null)
//                throw e;
//            in = new BufferedReader(new InputStreamReader(err));
            logger.logp(Level.SEVERE, "Wiki", "edit()", "[" + domain + "] EXCEPTION:  ", e);
        }
        in.close();
        log(Level.INFO, "Successfully edited " + title, "edit");
 
         // throttle
        try
        {
            long z = throttle - System.currentTimeMillis() + start;
            if (z > 0)
                Thread.sleep(z);
        }
        catch (InterruptedException e)
        {
            // nobody cares
        }
    }
 
    /**
     *  Prepends something to the given page. A convenience method for
     *  adding maintainance templates, rather than getting and setting the
     *  page yourself. Edit summary is automatic, being "+whatever".
     *
     *  @param title the title of the page
     *  @param stuff what to prepend to the page
     *  @param minor whether the edit is minor
     *  @throws AccountLockedException if user is blocked
     *  @throws CredentialException if page is protected and we can't edit it
     *  @throws UnsupportedOperationException if you try to retrieve the text
     *  of a Special: page or a Media: page
     *  @throws IOException if a network error occurs
     */
    public void prepend(String title, String stuff, boolean minor) throws IOException, LoginException
    {
        StringBuilder text = new StringBuilder(100000);
        text.append(stuff);
        text.append(getPageText(title));
        edit(title, text.toString(), "+" + stuff, minor);
    }
 
    // image methods
 
    /**
     *  Fetches a raster image file. This method uses <tt>ImageIO.read()</tt>,
     *  and as such only JPG, PNG, GIF and BMP formats are supported.
     *  Animated GIFs have not been tested yet.
     *
     *  @param title the title of the image (i.e. Image:Example.jpg, not
     *  Example.jpg)
     *  @return the image, encapsulated in a BufferedImage
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public BufferedImage getImage(String title) throws IOException
    {
        return getImage(title, -1, -1);
    }
 
    /**
     *  Fetches a thumbnail of a raster image file. This method uses
     *  <tt>ImageIO.read()</tt>, and as such only JPG, PNG, GIF and BMP
     *  formats are supported. SVG images are supported if and only if
     *  a thumbnail width and height are specified. Animated GIFs have not been
     *  tested yet.
     *
     *  @param title the title of the image without the Image: prefix (i.e.
     *  Example.jpg, not Example.jpg)
     *  @param width the width of the thumbnail (use -1 for actual width)
     *  @param height the height of the thumbnail (use -1 for actual height)
     *  @return the image, encapsulated in a BufferedImage, null if we cannot
     *  read the image
     *  @throws IOException if a network error occurs
     *  @since 0.13
     */
    public BufferedImage getImage(String title, int width, int height) throws IOException
    {
        // sanitise the title
        title = URLEncoder.encode(title, "UTF-8");
 
        // this is a two step process - first we fetch the image url
        StringBuilder url = new StringBuilder(query);
        url.append("&action=query&prop=imageinfo&iiprop=url&titles=Image:");
        url.append(URLEncoder.encode(title, "UTF-8"));
        url.append("&iiurlwidth=");
        url.append(width);
        url.append("&iirulheight=");
        url.append(height);
        logurl(url.toString(), "getImage");
        checkLag("getImage");
        URLConnection connection = new URL(url.toString()).openConnection();
        setCookies(connection, cookies);
        connection.connect();
        BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
        String line = in.readLine();
        int a = line.indexOf("url=\"") + 5;
        int b = line.indexOf("\"", a);
        String url2 = line.substring(a, b);
 
        // then we use ImageIO to read from it
        logurl(url2, "getImage");
        BufferedImage image = ImageIO.read(new URL(url2));
        log(Level.INFO, "Successfully retrieved image \"" + title + "\"", "getImage");
        return image;
    }
 
    /**
     *  Uploads an image. Equivalent to [[Special:Upload]]. Supported
     *  extensions are (case-insensitive) "png", "jpg", "gif" and "svg". You
     *  need to be logged on to do this. This method is thread safe and subject
     *  to the throttle.
     *
     *  @param file the image file
     *  @param filename the target file name (Example.png, not Image:Example.png)
     *  @param contents the contents of the image description page
     *  @throws CredentialNotFoundException if not logged in
     *  @throws CredentialException if page is protected and we can't upload
     *  @throws IOException if a network/local filesystem error occurs
     *  @throws AccountLockedException if user is blocked
     *  @since 0.11
     */
    public synchronized void upload(File file, String filename, String contents) throws IOException, LoginException
    {
        // throttle
        long start = System.currentTimeMillis();
 
        // check for log in
        if (user == null)
        {
            CredentialNotFoundException ex = new CredentialNotFoundException("Permission denied: you need to be registered to upload files.");
            logger.logp(Level.SEVERE, "Wiki", "upload()", "[" + domain + "] Cannot upload - permission denied.", ex);
            throw ex;
        }
 
        // check if the page is protected, and if we can upload
        String filename2 = URLEncoder.encode(filename.replace(" ", "_"), "UTF-8");
        String fname = "Image:" + filename2;
        if (!checkRights(getProtectionLevel(fname), false))
        {
            CredentialException ex = new CredentialException("Permission denied: image is protected.");
            logger.logp(Level.WARNING, "Wiki", "upload()", "[" + domain + "] Cannot upload - permission denied.", ex);
            throw ex;
        }
 
        // prepare some parameters
        String extension = filename2.substring(filename2.length() - 3).toUpperCase().toLowerCase();
        if (extension.equals("jpg"))
            extension = "jpeg";
        else if (extension.equals("svg"))
            extension += "+xml";
 
        // upload the image
        // this is how we do multipart post requests, by the way
        // see also: http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.2
        String url = base + "Special:Upload";
        URLConnection connection = new URL(url).openConnection();
        logurl(url, "upload");
        String boundary = "----------NEXT PART----------";
        connection.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary);
        setCookies(connection, cookies);
        connection.setDoOutput(true);
        connection.connect();
 
        // send data
        boundary = "--" + boundary + "\r\n";
        DataOutputStream out = new DataOutputStream(connection.getOutputStream());
//        DataOutputStream out = new DataOutputStream(System.out); // debug version
        out.writeBytes(boundary);
        out.writeBytes("Content-Disposition: form-data; name=\"wpIgnoreWarning\"\r\n\r\n");
        out.writeBytes("true\r\n");
        out.writeBytes(boundary);
        out.writeBytes("Content-Disposition: form-data; name=\"wpDestFile\"\r\n\r\n");
        out.writeBytes(filename2);
        out.writeBytes("\r\n");
        out.writeBytes(boundary);
        out.writeBytes("Content-Disposition: form-data; name=\"wpUploadFile\"; filename=\"");
        out.writeBytes(filename);
        out.writeBytes("\"\r\n");
        out.writeBytes("Content-Type: image/");
        out.writeBytes(extension);
        out.writeBytes("\r\n\r\n");
 
        // write image
        FileInputStream fi = new FileInputStream(file);
        byte[] b = new byte[fi.available()];
        fi.read(b);
        out.write(b);
        fi.close();
 
        // write the rest
        out.writeBytes("\r\n");
        out.writeBytes(boundary);
        out.writeBytes("Content-Disposition: form-data; name=\"wpUploadDescription\"\r\n");
        out.writeBytes("Content-Type: text/plain\r\n\r\n");
        out.writeBytes(contents);
        out.writeBytes("\r\n");
        out.writeBytes(boundary);
        out.writeBytes("Content-Disposition: form-data; name=\"wpUpload\"\r\n\r\n");
        out.writeBytes("Upload file\r\n");
        out.writeBytes(boundary.substring(0, boundary.length() - 2) + "--\r\n");
        out.close();
 
        // done
        BufferedReader in;
        try
        {
            // it's somewhat strange that the edit only sticks when you start reading the response...
            in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            in.readLine();
 
            // debug version
//            String line;
//            while ((line = in.readLine()) != null)
//                 System.out.println(line);
        }
        catch (IOException e)
        {
            InputStream err = ((HttpURLConnection)connection).getInputStream();
            if (err == null)
                throw e;
            in = new BufferedReader(new InputStreamReader(err));
            logger.logp(Level.SEVERE, "Wiki", "upload()", "[" + domain + "] EXCEPTION:  ", e);
        }
        in.close();
        log(Level.INFO, "Successfully uploaded " + filename, "upload");
 
        // throttle
        try
        {
            long z = throttle - System.currentTimeMillis() + start;
            if (z > 0)
                Thread.sleep(z);
        }
        catch (InterruptedException e)
        {
            // nobody cares
        }
    }
 
    // lists
 
    /**
     *  Performs a full text search of the wiki. Equivalent to
     *  [[Special:Search]], or that little textbox in the sidebar.
     *
     *  @param search a search string
     *  @param namespaces the namespaces to search. If no parameters are passed
     *  then the default is MAIN_NAMESPACE only.
     *  @return the search results
     *  @throws IOException if a network error occurs
     *  @since 0.14
     */
    public String[] search(String search, int... namespaces) throws IOException
    {
        // this varargs thing is really handy, there's no need to define a
        // separate search(String search) while allowing multiple namespaces
 
        // default to main namespace
        if (namespaces.length == 0)
            namespaces = new int[] { MAIN_NAMESPACE };
        StringBuilder url = new StringBuilder(query);
        url.append("action=query&list=search&srwhat=text&srlimit=");
        url.append(250); // a hack, some searches may not produce results when there are some
        url.append("&srsearch=");
        url.append(URLEncoder.encode(search, "UTF-8"));
        url.append("&srnamespace=");
        for (int i = 0; i < namespaces.length; i++)
        {
            url.append(namespaces[i]);
            if (i != namespaces.length - 1)
                url.append("|");
        }
        url.append("&sroffset=");
 
        // some random variables we need later
        int sroffset = 0;
        boolean done = false;
        ArrayList<String> results = new ArrayList<String>(5000);
 
        // fetch and iterate through the search results
        while (!done)
        {
            String newURL = url.toString() + sroffset;
            URLConnection connection = new URL(newURL).openConnection();
            checkLag("search");
            logurl(newURL, "search");
            setCookies(connection, cookies);
            connection.connect();
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // if this is the last page of results then there is no sroffset parameter
            if (line.indexOf("sroffset=\"") == -1)
                done = true;
 
            // strip the search results
            // typical form: <p ns="0" title="Main Page" />
            while (line.indexOf("title=\"") != -1)
            {
                int a = line.indexOf("title=\"") + 7;
                int b = line.indexOf("\"", a);
                results.add(line.substring(a, b));
                line = line.substring(b);
            }
            // increment the offset
            in.close();
            sroffset += 250; // CHANGE ME
        }
 
        log(Level.INFO, "Successfully searched for string \"" + search + "\" (" + results.size() + " items found)", "search");
        return results.toArray(new String[0]);
    }
 
    /**
     *  Returns a list of pages which the use the specified image.
     *  @param image the image (Example.png, not Image:Example.png)
     *  @return the list of pages that use this image
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public String[] imageUsage(String image) throws IOException
    {
        return imageUsage(image, ALL_NAMESPACES);
    }
 
    /**
     *  Returns a list of pages in the specified namespace which use the
     *  specified image.
     *  @param image the image (Example.png, not Image:Example.png)
     *  @param namespace a namespace
     *  @return the list of pages that use this image
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public String[] imageUsage(String image, int namespace) throws IOException
    {
        String url = query + "action=query&list=imageusage&iutitle=Image:" + URLEncoder.encode(image, "UTF-8") + "&iulimit=" + max;
        if (namespace != ALL_NAMESPACES)
            url += "&iunamespace=" + namespace;
 
        // fiddle
        ArrayList<String> pages = new ArrayList<String>(1333);
        String next = "";
        while ((pages.size() % max) == 0)
        {
            // connect
            if (pages.size() != 0)
                next = "&iucontinue="  + next;
            URLConnection connection = new URL(url + next).openConnection();
            logurl(url + next, "imageUsage");
            checkLag("imageUsage");
            setCookies(connection, cookies);
            connection.connect();
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // parse
            if (line.indexOf("iucontinue") != -1)
                next = line.substring(line.indexOf("iucontinue") + 12, line.indexOf("\" />"));
            while (line.indexOf("title") != -1)
            {
                int x = line.indexOf("title=\"");
                int y = line.indexOf("\" />", x);
                pages.add(decode(line.substring(x + 7, y)));
                line = line.substring(y + 4, line.length());
            }
 
            // short circuit
            in.close();
            if (pages.size() == 0)
                break;
        }
        log(Level.INFO, "Successfully retrieved usages of Image:" + image + " (" + pages.size() + " items)", "imageUsage");
        return pages.toArray(new String[0]);
    }
 
    /**
     *  Returns a list of all pages linking to this page. Equivalent to
     *  [[Special:Whatlinkshere]].
     *
     *  @param title the title of the page
     *  @return the list of pages linking to the specified page
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public String[] whatLinksHere(String title) throws IOException
    {
        return whatLinksHere(title, ALL_NAMESPACES, false);
    }
 
    /**
     *  Returns a list of all pages linking to this page. Equivalent to
     *  [[Special:Whatlinkshere]].
     *
     *  @param title the title of the page
     *  @param namespace a namespace
     *  @return the list of pages linking to the specified page
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public String[] whatLinksHere(String title, int namespace) throws IOException
    {
        return whatLinksHere(title, namespace, false);
    }
 
    /**
     *  Returns a list of all pages linking to this page within the specified
     *  namespace. Alternatively, we can retrive a list of what redirects to a
     *  page by setting <tt>redirects</tt> to true. Equivalent to
     *  [[Special:Whatlinkshere]].
     *
     *  @param title the title of the page
     *  @param namespace a namespace
     *  @param redirects whether we should limit to redirects only
     *  @return the list of pages linking to the specified page
     *  @throws IOException if a network error occurs
     *  @since 0.10
     */
    public String[] whatLinksHere(String title, int namespace, boolean redirects) throws IOException
    {
        String url = query + "action=query&list=backlinks&bllimit=" + max + "&bltitle=" + URLEncoder.encode(title, "UTF-8");
        if (namespace != ALL_NAMESPACES)
            url += "&blnamespace=" + namespace;
        if (redirects)
            url += "&blfilterredir=redirects";
 
        // fiddle
        ArrayList<String> pages = new ArrayList<String>(6667); // generally enough
        String next = "";
        while ((pages.size() % max) == 0)
        {
            // connect
            if (pages.size() != 0)
                next = "&blcontinue=" + URLEncoder.encode(next, "UTF-8");
            logurl(url + next, "whatLinksHere");
            checkLag("whatLinksHere");
            URLConnection connection = new URL(url + next).openConnection();
            setCookies(connection, cookies);
            connection.connect();
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // parse
            if (line.indexOf("blcontinue") != -1)
                next = line.substring(line.indexOf("blcontinue") + 12, line.indexOf("\" />"));
            while (line.indexOf("title") != -1)
            {
                int x = line.indexOf("title=\"");
                int y = line.indexOf("\" ", x);
                pages.add(decode(line.substring(x + 7, y)));
                line = line.substring(y + 4, line.length());
            }
 
            // short circuit
            in.close();
            if (pages.size() == 0)
                break;
        }
        log(Level.INFO, "Successfully retrieved " + (redirects ? "redirects to " : "links to ") + title + " (" + pages.size() + " items)", "whatLinksHere");
        return pages.toArray(new String[0]);
    }
 
    /**
     *  Returns a list of all pages transcluding to a page.
     *
     *  @param title the title of the page, e.g. "Template:Stub"
     *  @return the list of pages transcluding the specified page
     *  @throws IOException if a netwrok error occurs
     *  @since 0.12
     */
    public String[] whatTranscludesHere(String title) throws IOException
    {
        return whatTranscludesHere(title, ALL_NAMESPACES);
    }
 
    /**
     *  Returns a list of all pages transcluding to a page within the specified
     *  namespace.
     *
     *  @param title the title of the page, e.g. "Template:Stub"
     *  @param namespace a namespace
     *  @return the list of pages transcluding the specified page
     *  @throws IOException if a netwrok error occurs
     *  @since 0.12
     */
    public String[] whatTranscludesHere(String title, int namespace) throws IOException
    {
        String url = query + "action=query&list=embeddedin&eilimit=" + max + "&eititle=" + URLEncoder.encode(title, "UTF-8");
        if (namespace != ALL_NAMESPACES)
            url += "&einamespace=" + namespace;
 
        // fiddle
        ArrayList<String> pages = new ArrayList<String>(6667); // generally enough
        String next = "";
        while ((pages.size() % max) == 0)
        {
            // connect
            if (pages.size() != 0)
                next = "&eicontinue=" + URLEncoder.encode(next, "UTF-8");
            logurl(url + next, "whatTranscludesHere");
            checkLag("whatTranscludesHere");
            URLConnection connection = new URL(url + next).openConnection();
            setCookies(connection, cookies);
            connection.connect();
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // parse
            if (line.indexOf("eicontinue") != -1)
                next = line.substring(line.indexOf("eicontinue") + 12, line.indexOf("\" />"));
            while (line.indexOf("title") != -1)
            {
                int x = line.indexOf("title=\"");
                int y = line.indexOf("\" />", x);
                pages.add(decode(line.substring(x + 7, y)));
                line = line.substring(y + 4, line.length());
            }
 
            // short circuit
            in.close();
            if (pages.size() == 0)
                break;
        }
        log(Level.INFO, "Successfully retrieved transclusions of " + title + " (" + pages.size() + " items)", "whatTranscludesHere");
        return pages.toArray(new String[0]);
    }
 
    /**
     *  Gets the members of a category.
     *
     *  @param name the name of the category (e.g. Candidates for speedy
     *  deletion, not Category:Candidates for speedy deletion)
     *  @return a String[] containing page titles of members of the category
     *  @throws IOException if a network error occurs
     *  @since 0.02
     */
    public String[] getCategoryMembers(String name) throws IOException
    {
        return getCategoryMembers(name, ALL_NAMESPACES);
    }
 
    /**
     *  Gets the members of a category.
     *
     *  @param name the name of the category (e.g. Candidates for speedy
     *  deletion, not Category:Candidates for speedy deletion)
     *  @param namespace filters by namespace, returns empty if namespace
     *  does not exist
     *  @return a String[] containing page titles of members of the category
     *  @throws IOException if a network error occurs
     *  @since 0.03
     */
    public String[] getCategoryMembers(String name, int namespace) throws IOException
    {
        String url = query + "action=query&list=categorymembers&cmprop=title&cmlimit=" + max + "&cmtitle=Category:" + URLEncoder.encode(name, "UTF-8");
        if (namespace != ALL_NAMESPACES)
            url += "&cmnamespace=" + namespace;
 
        // work around an arbitrary and silly limitation
        ArrayList<String> members = new ArrayList<String>(6667); // enough for most cats
        String next = "";
        while ((members.size() % max) == 0)
        {
            if (members.size() != 0)
                next = "&cmcontinue=" + URLEncoder.encode(next, "UTF-8");
            logurl(url + next, "getCategoryMembers");
            checkLag("getCategoryMembers");
            URLConnection connection = new URL(url + next).openConnection();
            setCookies(connection, cookies);
            connection.connect();
 
            // read the first line, as it is the only thing worth paying attention to
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // parse
            if (line.indexOf("cmcontinue") != -1)
                next = line.substring(line.indexOf("cmcontinue") + 12, line.indexOf("\" />"));
            while (line.indexOf("title") != -1)
            {
                int x = line.indexOf("title=\"");
                int y = line.indexOf("\" />", x);
                members.add(decode(line.substring(x + 7, y)));
                line = line.substring(y + 4, line.length());
            }
 
            // short circuit
            in.close();
            if (members.size() == 0)
                break;
        }
        log(Level.INFO, "Successfully retrieved contents of Category:" + name + " (" + members.size() + " items)", "getCategoryMembers");
        return members.toArray(new String[0]);
    }
 
    /**
     *  Searches the wiki for external links. Equivalent to [[Special:Linksearch]].
     *  Returns two lists, where the first is the list of pages and the
     *  second is the list of urls. The index of a page in the first list
     *  corresponds to the index of the url on that page in the second list.
     *  Wildcards (*) are only permitted at the start of the search string.
     *
     *  @param pattern the pattern (String) to search for (e.g. example.com,
     *  *.example.com)
     *  @throws IOException if a network error occurs
     *  @return two lists - index 0 is the list of pages (String), index 1 is
     *  the list of urls (instance of <tt>java.net.URL</tt>)
     *  @since 0.06
     */
    public ArrayList[] spamsearch(String pattern) throws IOException
    {
        return spamsearch(pattern, ALL_NAMESPACES);
    }
 
    /**
     *  Searches the wiki for external links. Equivalent to [[Special:Linksearch]].
     *  Returns two lists, where the first is the list of pages and the
     *  second is the list of urls. The index of a page in the first list
     *  corresponds to the index of the url on that page in the second list.
     *  Wildcards (*) are only permitted at the start of the search string.
     *
     *  @param pattern the pattern (String) to search for (e.g. example.com,
     *  *.example.com)
     *  @param namespace filters by namespace, returns empty if namespace
     *  does not exist
     *  @throws IOException if a network error occurs
     *  @return two lists - index 0 is the list of pages (String), index 1 is
     *  the list of urls (instance of <tt>java.net.URL</tt>)
     *  @since 0.06
     */
    public ArrayList[] spamsearch(String pattern, int namespace) throws IOException
    {
        // set it up
        StringBuilder url = new StringBuilder(query);
        url.append("action=query&list=exturlusage&euprop=title|url&euquery=");
        url.append(pattern);
        url.append("&eulimit=");
        url.append(max);
        if (namespace != ALL_NAMESPACES)
        {
            url.append("&eunamespace=");
            url.append(namespace);
        }
        url.append("&euoffset=");
 
        // some variables we need later
        int euoffset = 0;
        boolean done = false;
        ArrayList[] ret = new ArrayList[] // no reason for more than 500 spamlinks
        {
            new ArrayList<String>(667), // page titles
            new ArrayList<URL>(667) // urls
        };
 
        // begin
        while (!done)
        {
            // fetch the results
            String newURL = url.toString() + euoffset;
            URLConnection connection = new URL(newURL).openConnection();
            logurl(newURL, "spamsearch");
            checkLag("spamsearch");
            setCookies(connection, cookies);
            connection.connect();
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // if this is the last page of results then there is no euoffset parameter
            if (line.indexOf("euoffset=\"") == -1)
                done = true;
 
            // parse
            // typical form: <eu ns="0" title="Main Page" url="http://example.com" />
            while (line.indexOf("<eu ") != -1)
            {
                int x = line.indexOf("title=\"");
                int y = line.indexOf("\" url=\"");
                int z = line.indexOf("\" />", y);
 
                String title = line.substring(x + 7, y);
                String link = line.substring(y + 7, z);
                ret[0].add(decode(title));
                ret[1].add(new URL(link));
                line = line.substring(z + 3);
            }
 
            // update the offset
            euoffset += max;
            in.close();
        }
 
        // return value
        log(Level.INFO, "Successfully returned instances of external link " + pattern + " (" + ret[0].size() + " links)", "spamsearch");
        return ret;
    }
 
    /**
     *  Looks up a particular user in the IP block list, i.e. whether a user
     *  is currently blocked. Equivalent to [[Special:Ipblocklist]].
     *
     *  @param user a username or IP (e.g. "127.0.0.1")
     *  @return the block log entry
     *  @throws IOException if a network error occurs
     *  @since 0.12
     */
    public LogEntry[] getIPBlockList(String user) throws IOException
    {
        return getIPBlockList(user, null, null, 1);
    }
 
    /**
     *  Lists currently operating blocks that were made in the specified
     *  interval. Equivalent to [[Special:Ipblocklist]].
     *
     *  @param start the start date
     *  @param end the end date
     *  @return the currently operating blocks that were made in that interval
     *  @throws IOException if a network error occurs
     *  @since 0.12
     */
    public LogEntry[] getIPBlockList(Calendar start, Calendar end) throws IOException
    {
        return getIPBlockList("", start, end, Integer.MAX_VALUE);
    }
 
    /**
     *  Fetches part of the list of currently operational blocks. Equivalent to
     *  [[Special:Ipblocklist]]. WARNING: cannot tell whether a particular IP
     *  is autoblocked as this is non-public data (see also [[bugzilla:12321]]
     *  and [[foundation:Privacy policy]]). Don't call this directly, use one
     *  of the two above methods instead.
     *
     *  @param user a particular user that might have been blocked. Use "" to
     *  not specify one. May be an IP (e.g. "127.0.0.1") or a CIDR range (e.g.
     *  "127.0.0.0/16") but not an autoblock (e.g. "#123456").
     *  @param start what timestamp to start. Use null to not specify one.
     *  @param end what timestamp to end. Use null to not specify one.
     *  @param amount the number of blocks to retrieve. Use
     *  <tt>Integer.MAX_VALUE</tt> to not specify one.
     *  @return a LogEntry[] of the blocks
     *  @throws IOException if a network error occurs
     *  @throws IllegalArgumentException if start date is before end date
     *  @since 0.12
     */
    protected LogEntry[] getIPBlockList(String user, Calendar start, Calendar end, int amount) throws IOException
    {
        // quick param check
        if (start != null && end != null)
            if (start.before(end))
                throw new IllegalArgumentException("Specified start date is before specified end date!");
        String bkstart = start != null ? calendarToTimestamp(start) : "";
        String bkend = end != null ? calendarToTimestamp(end) : "";
 
        // url base
        StringBuilder urlBase = new StringBuilder(query);
        urlBase.append("action=query&list=blocks");
        if (end != null)
        {
            urlBase.append("&bkend=");
            urlBase.append(bkend);
        }
        if (!user.equals(""))
        {
            urlBase.append("&bkusers=");
            urlBase.append(user);
        }
        urlBase.append("&bklimit=");
        urlBase.append(amount < max ? amount : max);
        if (start != null) // start is only null when a user is specified
            urlBase.append("&bkstart=");
 
        // connection
        ArrayList<LogEntry> entries = new ArrayList<LogEntry>(1333);
                while ((entries.size() % max) == 0)
        {
            String url = urlBase.toString() + bkstart;
                logurl(url, "getIPBlockList");
            checkLag("getIPBlockList");
                URLConnection connection = new URL(url).openConnection();
                setCookies(connection, cookies);
            connection.connect();
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // set start parameter to new value if required
            if (amount - entries.size() >= max)
            {
                int a = line.indexOf("bkstart=\"") + 9;
                bkstart = line.substring(a, line.indexOf("\"", a));
            }
 
            // parse xml
            while (entries.size() < amount  && line.indexOf("<block ") != -1)
            {
                // find entry
                int a = line.indexOf("<block ");
                int b = line.indexOf("/>", a);
                String entry = line.substring(a, b);
                line = line.substring(b);
 
                // target
                String target = "";
                a = entry.indexOf("user=\"") + 6;
                if (a < 6) // it's an autoblock, use block id instead (#zzzz)
                {
                    a = entry.indexOf("id=\"") + 4;
                    target = "#";
                }
                b = entry.indexOf("\"", a);
                target += entry.substring(a, b);
 
                // blocking admin
                a = entry.indexOf("by=\"") + 4;
                b = entry.indexOf("\"", a);
                User admin = new User(entry.substring(a, b));
 
                // timestamp
                a = entry.indexOf("timestamp=\"") + 11;
                b = entry.indexOf("\"", a);
                String timestamp = convertTimestamp(entry.substring(a, b));
 
                // reason
                a = entry.indexOf("reason=\"") + 8;
                b = entry.indexOf("\"", a);
                String reason = decode(entry.substring(a, b));
 
                // details see LogEntry.getDetails()
                Object[] details = new Object[5];
                details[0] = new Boolean(entry.indexOf("anononly") != -1); // anons only
                details[1] = new Boolean(entry.indexOf("nocreate") != -1); // account creation disabled
                details[2] = new Boolean(entry.indexOf("noautoblock") != -1); // autoblock disabled
                details[3] = new Boolean(entry.indexOf("noemail") != -1); // [[Special:Emailuser]] disabled
                a = entry.indexOf("expiry=\"") + 8;
                int c = entry.indexOf("\"", a);
                details[4] = entry.substring(a, c);
 
                // add
                entries.add(new LogEntry(BLOCK_LOG, "block", reason, admin, target, timestamp, details));
            }
 
            // short circuit
            if (entries.size() == 0)
                break;
        }
 
        // log statement
        StringBuilder logRecord = new StringBuilder("Successfully fetched IP block list ");
        if (!user.equals(""))
        {
            logRecord.append(" for ");
            logRecord.append(user);
        }
        if (start != null)
        {
            logRecord.append(" from ");
            logRecord.append(start.getTime().toString());
        }
        if (end != null)
        {
            logRecord.append(" to ");
            logRecord.append(end.getTime().toString());
        }
        logRecord.append(" (");
        logRecord.append(entries.size());
        logRecord.append(" entries)");
        log(Level.INFO, logRecord.toString(), "getIPBlockList");
        return entries.toArray(new LogEntry[0]);
     }
 
    /**
     *  Gets the most recent set of log entries up to the given amount.
     *  Equivalent to [[Special:Log]].
     *
     *  @param amount the amount of log entries to get
     *  @return the most recent set of log entries
     *  @throws IOException if a network error occurs
     *  @throws IllegalArgumentException if amount < 1
     *  @since 0.08
     */
    public LogEntry[] getLogEntries(int amount) throws IOException
    {
        return getLogEntries(null, null, amount, ALL_LOGS, null, "", ALL_NAMESPACES);
    }
 
    /**
     *  Gets log entries for a specific user. Equivalent to [[Special:Log]]. Dates
     *  and timestamps are in UTC.
     *
     *  @param user the user to get log entries for
     *  @throws IOException if a network error occurs
     *  @return the set of log entries created by that user
     *  @since 0.08
     */
    public LogEntry[] getLogEntries(User user) throws IOException
    {
        return getLogEntries(null, null, Integer.MAX_VALUE, ALL_LOGS, user, "", ALL_NAMESPACES);
    }
 
    /**
     *  Gets the log entries representing actions that were performed on a
     *  specific target. Equivalent to [[Special:Log]]. Dates and timestamps are
     *  in UTC.
     *
     *  @param target the target of the action(s).
     *  @throws IOException if a network error occurs
     *  @return the specified log entries
     *  @since 0.08
     */
    public LogEntry[] getLogEntries(String target) throws IOException
    {
        return getLogEntries(null, null, Integer.MAX_VALUE, ALL_LOGS, null, target, ALL_NAMESPACES);
    }
 
    /**
     *  Gets all log entries that occurred between the specified dates.
     *  WARNING: the start date is the most recent of the dates given, and
     *   the order of enumeration is from newest to oldest. Equivalent to
     *  [[Special:Log]]. Dates and timestamps are in UTC.
     *
     *  @param start what timestamp to start. Use null to not specify one.
     *  @param end what timestamp to end. Use null to not specify one.
     *  @throws IOException if something goes wrong
     *  @throws IllegalArgumentException if start &lt; end
     *  @return the specified log entries
     *  @since 0.08
     */
    public LogEntry[] getLogEntries(Calendar start, Calendar end) throws IOException
    {
        return getLogEntries(start, end, Integer.MAX_VALUE, ALL_LOGS, null, "", ALL_NAMESPACES);
    }
 
    /**
     *  Gets the last how ever many log entries in the specified log. Equivalent
     *  to [[Special:Log]]. Dates and timestamps are in UTC.
     *
     *  @param amount the number of entries to get
     *  @param type what log to get (e.g. DELETION_LOG)
     *  @throws IOException if a network error occurs
     *  @throws IllegalArgumentException if the log type doesn't exist
     *  @return the specified log entries
     */
    public LogEntry[] getLogEntries(int amount, String type) throws IOException
    {
        return getLogEntries(null, null, amount, type, null, "", ALL_NAMESPACES);
    }
 
    /**
     *  Gets the specified amount of log entries between the given times by
     *  the given user on the given target. Equivalent to [[Special:Log]].
     *  WARNING: the start date is the most recent of the dates given, and
     *  the order of enumeration is from newest to oldest. Dates and timestamps
     *  are in UTC.
     *
     *  @param start what timestamp to start. Use null to not specify one.
     *  @param end what timestamp to end. Use null to not specify one.
     *  @param amount the amount of log entries to get. If both start and
     *  end are defined, this is ignored. Use Integer.MAX_VALUE to not
     *  specify one.
     *  @param log what log to get (e.g. DELETION_LOG)
     *  @param user the user performing the action. Use null not to specify
     *  one.
     *  @param target the target of the action. Use "" not to specify one.
     *  @param namespace filters by namespace. Returns empty if namespace
     *  doesn't exist.
     *  @throws IOException if a network error occurs
     *  @throws IllegalArgumentException if start &lt; end or amount &lt; 1
     *  @return the specified log entries
     *  @since 0.08
     */
    public LogEntry[] getLogEntries(Calendar start, Calendar end, int amount, String log, User user, String target, int namespace) throws IOException
    {
        // construct the query url from the parameters given
        StringBuilder url = new StringBuilder(query);
        url.append("action=query&list=logevents&leprop=title|type|user|timestamp|comment|details");
        StringBuilder console = new StringBuilder("Successfully retrieved "); // logger statement
 
        // check for amount
        if (amount < 1)
            throw new IllegalArgumentException("Tried to retrieve less than one log entry!");
 
        // log type
        if (!log.equals(ALL_LOGS))
        {
            url.append("&letype=");
            url.append(log);
        }
 
        // specific log types
        if (log.equals(USER_CREATION_LOG))
            console.append("user creation");
        else if (log.equals(DELETION_LOG))
            console.append("deletion");
        else if (log.equals(PROTECTION_LOG))
            console.append("protection");
        else if (log.equals(USER_RIGHTS_LOG))
            console.append("user rights");
        else if (log.equals(USER_RENAME_LOG))
            console.append("user rename");
        else if (log.equals(BOT_STATUS_LOG))
            console.append("bot status");
        else
        {
            console.append(" ");
            console.append(log);
        }
        console.append(" log ");
 
        // check for user parameter
        if (user != null)
        {
            url.append("&leuser=");
            url.append(URLEncoder.encode(user.getUsername(), "UTF-8"));
            console.append("for ");
            console.append(user.getUsername());
            console.append(" ");
        }
 
        // check for target
        if (!target.equals(""))
        {
            url.append("&letitle=");
            url.append(URLEncoder.encode(target, "UTF-8"));
            console.append("on ");
            console.append(target);
            console.append(" ");
        }
 
        // Check for namespace. Should be done server-side.
//        if (namespace != ALL_NAMESPACES)
//        {
//            url.append("&lenamespace=");
//            url.append(namespace);
//        }
 
        // set maximum
        url.append("&lelimit=");
        url.append(amount > max || namespace != ALL_NAMESPACES ? max : amount);
 
        // check for start/end dates
        String lestart = ""; // we need to account for lestart being the continuation parameter too.
        if (start != null)
        {
            if (end != null && start.before(end)) //aargh
                throw new IllegalArgumentException("Specified start date is before specified end date!");
            lestart = new String(calendarToTimestamp(start));
            console.append("from ");
            console.append(start.getTime().toString());
            console.append(" ");
        }
        if (end != null)
        {
            url.append("&leend=");
            url.append(calendarToTimestamp(end));
            console.append("to ");
            console.append(end.getTime().toString());
            console.append(" ");
        }
 
        // only now we can actually start to retrieve the logs
        ArrayList<LogEntry> entries = new ArrayList<LogEntry>(6667); // should be enough
        while (entries.size() < amount && !lestart.equals("a"))
        {
            String str = url.toString() + "&lestart=" + lestart;
            logurl(str, "getLogEntries");
            checkLag("getLogEntries");
            URLConnection connection = new URL(str).openConnection();
            setCookies(connection, cookies);
            connection.connect();
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // set start parameter to new value
            int ab = line.indexOf("lestart=\"") + 9;
            if (ab != 8)
                lestart = line.substring(ab, line.indexOf("\"", ab));
            else
                lestart = "a"; //set a signal so we know when to stop
 
            // parse xml. We need to repeat the test because the XML may contain more than the required amount.
            while (entries.size() < amount)
            {
                // find entry
                int a = line.indexOf("<item");
                if (a < 0)
                    break;
                // end may be " />" or "</item>", followed by next item
                int b = line.indexOf("><item", a);
                if (b < 0) // last entry
                    b = line.length();
                String entry = line.substring(a, b);
                line = line.substring(b);
 
                // page title
                a = entry.indexOf("title=\"") + 7;
                b = entry.indexOf("\" ", a);
                String title = decode(entry.substring(a, b));
 
                // log type
                String entryType;
                if (log.equals(ALL_LOGS))
                {
                    a = entry.indexOf("type=\"") + 6;
                    b = entry.indexOf("\" ", a);
                    entryType = entry.substring(a, b);
                }
                else
                    entryType = log;
 
                // action
                a = entry.indexOf("action=\"") + 8;
                b = entry.indexOf("\" ", a);
                String action = entry.substring(a, b);
 
                // user
                User performer;
                if (user == null)
                {
                    a = entry.indexOf("user=\"") + 6;
                    b = entry.indexOf("\" ", a);
                    performer = new User(entry.substring(a, b));
                }
                else
                    performer = user;
 
                // reason
                String reason;
                if (entryType.equals(USER_CREATION_LOG)) // there is no reason for creating a user
                    reason = "";
                else
                {
                    a = entry.indexOf("comment=\"") + 9;
                    b = entry.indexOf("\"", a);
                    reason = decode(entry.substring(a, b));
                }
 
                // timestamp
                a = entry.indexOf("timestamp=\"") + 11;
                b = a + 20;
                String timestamp = convertTimestamp(entry.substring(a, b));
 
                // details
                Object details = null;
                if (entryType.equals(MOVE_LOG))
                {
                    a = entry.indexOf("new_title=\"") + 11;
                    b = entry.indexOf("\" />", a);
                    details = decode(entry.substring(a, b)); // the new title
                }
                else if (entryType.equals(BLOCK_LOG))
                {
                    a = entry.indexOf("<block") + 7;
                    String s = entry.substring(a);
                    int c = s.indexOf("duration=") + 10;
                    int d = s.indexOf("\"", c);
                    details = new Object[]
                    {
                        new Boolean(s.indexOf("anononly") > -1), // anon-only
                        new Boolean(s.indexOf("nocreate") > -1), // account creation blocked
                        new Boolean(s.indexOf("noautoblock") > -1), // autoblock disabled
                        new Boolean(s.indexOf("noemail") > -1), // email disabled
                        s.substring(c, d) // duration
                    };
                }
                else if (entryType.equals(USER_RENAME_LOG))
                {
                    a = entry.indexOf("<param>") + 7;
                    b = entry.indexOf("</param>", a);
                        details = entry.substring(a, b); // the new username
                }
                else if (entryType.equals(USER_RIGHTS_LOG))
                {
                    a = entry.indexOf("new=\"") + 5;
                    b = entry.indexOf("\"", a);
                    String z = entry.substring(a, b);
                    int rights = 1; // no ips in user rights log
                    rights += (z.indexOf("sysop") != -1 ? ADMIN : 0); // sysop
                    rights += (z.indexOf("bureaucrat") != -1 ? BUREAUCRAT : 0); // bureaucrat
                    rights += (z.indexOf("steward") != -1 ? STEWARD : 0); // steward
                    rights += (z.indexOf("bot") != -1 ? BOT : 0); // bot
                    details = new Integer(rights);
                }
 
                // namespace processing
                if (namespace == ALL_NAMESPACES || namespace(title) == namespace)
                    entries.add(new LogEntry(entryType, action, reason, performer, title, timestamp.toString(), details));
            }
            in.close();
        }
 
        // log the success
        console.append(" (");
        console.append(entries.size());
        console.append(" entries)");
        log(Level.INFO, console.toString(), "getLogEntries");
        return entries.toArray(new LogEntry[0]);
    }
 
    /**
     *  Lists pages that start with a given prefix. Equivalent to
     *  [[Special:Prefixindex]].
     *
     *  @param prefix the prefix
     *  @return the list of pages with that prefix
     *  @throws IOException if a network error occurs
     *  @since 0.15
     */
    public String[] prefixIndex(String prefix) throws IOException
    {
        return listPages(prefix, NO_PROTECTION, ALL_NAMESPACES, -1, -1);
    }
 
    /**
     *  List pages below a certain size in the main namespace. Equivalent to
     *  [[Special:Shortpages]].
     *  @param cutoff the maximum size in bytes these short pages can be
     *  @return pages below that size
     *  @throws IOException if a network error occurs
     *  @since 0.15
     */
    public String[] shortPages(int cutoff) throws IOException
    {
        return listPages("", NO_PROTECTION, MAIN_NAMESPACE, -1, cutoff);
    }
 
    /**
     *  List pages below a certain size in any namespace. Equivalent to
     *  [[Special:Shortpages]].
     *  @param cutoff the maximum size in bytes these short pages can be
     *  @param namespace a namespace
     *  @throws IOException if a network error occurs
     *  @return pages below that size in that namespace
     *  @since 0.15
     */
    public String[] shortPages(int cutoff, int namespace) throws IOException
    {
        return listPages("", NO_PROTECTION, namespace, -1, cutoff);
    }
 
    /**
     *  List pages above a certain size in the main namespace. Equivalent to
     *  [[Special:Longpages]].
     *  @param cutoff the minimum size in bytes these long pages can be
     *  @return pages above that size
     *  @throws IOException if a network error occurs
     *  @since 0.15
     */
    public String[] longPages(int cutoff) throws IOException
    {
        return listPages("", NO_PROTECTION, MAIN_NAMESPACE, cutoff, -1);
    }
 
    /**
     *  List pages above a certain size in any namespace. Equivalent to
     *  [[Special:Longpages]].
     *  @param cutoff the minimum size in nbytes these long pages can be
     *  @param namespace a namespace
     *  @return pages above that size
     *  @throws IOException if a network error occurs
     *  @since 0.15
     */
    public String[] longPages(int cutoff, int namespace) throws IOException
    {
        return listPages("", NO_PROTECTION, namespace, cutoff, -1);
    }
 
    /**
     *  Lists pages with titles containing a certain prefix with a certain
     *  protection level and in a certain namespace. Equivalent to
     *  [[Special:Allpages]], [[Special:Prefixindex]], [[Special:Protectedpages]]
     *  and [[Special:Allmessages]] (if namespace == MEDIAWIKI_NAMESPACE).
     *  WARNING: Limited to 500 values (5000 for bots), unless a prefix is
     *  specified.
     *
     *  @param prefix the prefix of the title. Use "" to not specify one.
     *  @param level a protection level. Use NO_PROTECTION to not specify one.
     *  WARNING: it is not currently possible to specify a combination of both
     *  semi and move protection
     *  @param namespace a namespace
     *  @return the specified list of pages
     *  @since 0.09
     *  @throws IOException if a network error occurs
     */
    public String[] listPages(String prefix, int level, int namespace) throws IOException
    {
        return listPages(prefix, level, namespace, -1, -1);
    }
 
    /**
     *  Lists pages with titles containing a certain prefix with a certain
     *  protection level and in a certain namespace. Equivalent to
     *  [[Special:Allpages]], [[Special:Prefixindex]], [[Special:Protectedpages]]
     *  [[Special:Allmessages]] (if namespace == MEDIAWIKI_NAMESPACE),
     *  [[Special:Shortpages]] and [[Special:Longpages]]. WARNING: Limited to
     *  500 values (5000 for bots), unless a prefix or (max|min)imum size is
     *  specified.
     *
     *  @param prefix the prefix of the title. Use "" to not specify one.
     *  @param level a protection level. Use NO_PROTECTION to not specify one.
     *  WARNING: it is not currently possible to specify a combination of both
     *  semi and move protection
     *  @param namespace a namespace
     *  @param minimum the minimum size in bytes these pages can be. Use -1 to
     *  not specify one.
     *  @param maximum the maximum size in bytes these pages can be. Use -1 to
     *  not specify one.
     *  @return the specified list of pages
     *  @since 0.09
     *  @throws IOException if a network error occurs
     */
    public String[] listPages(String prefix, int level, int namespace, int minimum, int maximum) throws IOException
    {
        // @revised 0.15 to add short/long pages
        StringBuilder url = new StringBuilder(query);
        url.append("action=query&list=allpages&aplimit=");
        url.append(max);
        if (!prefix.equals("")) // prefix
        {
            url.append("&apprefix=");
            url.append(URLEncoder.encode(prefix, "UTF-8"));
          }
        if (namespace != ALL_NAMESPACES) // check for namespace
        {
            url.append("&apnamespace=");
            url.append(namespace);
        }
        switch (level) // protection level
        {
            case NO_PROTECTION: // squelch, this is the default
                break;
            case SEMI_PROTECTION:
                url.append("&apprlevel=autoconfirmed&apprtype=edit");
                break;
            case FULL_PROTECTION:
                url.append("&apprlevel=sysop&apprtype=edit");
                break;
            case MOVE_PROTECTION:
                url.append("&apprlevel=sysop&apprtype=move");
                break;
            case SEMI_AND_MOVE_PROTECTION: // squelch, not implemented
                break;
            default:
                throw new IllegalArgumentException("Invalid protection level!");
        }
                // max and min
        if (minimum != -1)
        {
            url.append("&apminsize=");
                        url.append(minimum);
        }
        if (maximum != -1)
        {
            url.append("&apmaxsize=");
                        url.append(maximum);
        }
 
        // parse
        ArrayList<String> pages = new ArrayList<String>(6667);
        String next = "";
        while (pages.size() < max)
        {
            // connect and read
            String s;
            if (next.equals("") && pages.size() != 0) // done
                break;
            else if (next.equals(""))
                s = url.toString();
            else
                s = url.toString() + "&apfrom=" + next;
            URLConnection connection = new URL(s).openConnection();
            logurl(s, "listPages");
            checkLag("listPages");
            setCookies(connection, cookies);
            connection.connect();
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // find next value
            int a = line.indexOf("apfrom=\"");
            int b;
            if (a > -1)
            {
                a += 8;
                b = line.indexOf("\"", a);
                next = line.substring(a, b).replace(' ', '_');
            }
 
            // find the pages
            while (line.indexOf("<p ") > -1)
            {
                a = line.indexOf("title=\"") + 7;
                b = line.indexOf("\" />", a);
                pages.add(decode(line.substring(a, b)));
                line = line.substring(b, line.length());
            }
            in.close();
            if (pages.size() == 0)
                break; // short circuit
            if (prefix.equals(""))
                break; // break if prefix not specified
        }
 
        // tidy up
        log(Level.INFO, "Successfully retrieved page list (" + pages.size() + " pages)", "listPages");
        return pages.toArray(new String[0]);
    }
 
    // user methods
 
    /**
     *  Determines whether a specific user exists. Should evaluate to false
     *  for anons.
     *
     *  @param username a username
     *  @return whether the user exists
     *  @throws IOException if a network error occurs
     *  @since 0.05
     */
    public boolean userExists(String username) throws IOException
    {
        return allUsers(username, 1)[0].equals(username);
    }
 
    /**
     *  Gets the specified number of users (as a String) starting at the
     *  given string, in alphabetical order. Equivalent to [[Special:Listusers]].
     *
     *  @param start the string to start enumeration
     *  @param number the number of users to return
     *  @return a String[] containing the usernames
     *  @throws IOException if a network error occurs
     *  @since 0.05
     */
    public String[] allUsers(String start, int number) throws IOException
    {
        // sanitise
        String url = query + "action=query&list=allusers&aulimit=" + (number > max ? max : number) + "&aufrom=";
 
        // work around an arbitrary and silly limitation
        ArrayList<String> members = new ArrayList<String>(6667); // enough for most requests
        String next = URLEncoder.encode(start.replace(" ", "_"), "UTF-8");
        while ((members.size() % max) == 0 && members.size() < number)
        {
            next = URLEncoder.encode(next, "UTF-8");
            logurl(url + next, "allUsers");
            checkLag("allUsers");
            URLConnection connection = new URL(url + next).openConnection();
            setCookies(connection, cookies);
            connection.connect();
 
            // read the first line, as it is the only thing worth paying attention to
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // parse
            next = line.substring(line.indexOf("aufrom=\"") + 8, line.indexOf("\" />"));
            while (line.indexOf("<u ") != -1 && members.size() < number)
            {
                int x = line.indexOf("name=");
                int y = line.indexOf(" />", x);
                members.add(line.substring(x + 6, y - 1));
                line = line.substring(y + 2, line.length());
            }
 
            // short circuit
            in.close();
            if (members.size() == 0)
                break;
        }
        log(Level.INFO, "Successfully retrieved user list (" + number + " users starting at " + start + ")", "allUsers");
        return members.toArray(new String[0]);
    }
 
    /**
     *  Gets the user with the given username. Returns null if it doesn't
     *  exist.
     *  @param username a username
     *  @return the user with that username
     *  @since 0.05
     *  @throws IOException if a network error occurs
     */
    public User getUser(String username) throws IOException
    {
        return userExists(username) ? new User(username) : null;
    }
 
    /**
     *  Gets the user we are currently logged in as. If not logged in, returns
     *  null.
     *  @return the current logged in user
     *  @since 0.05
     */
    public User getCurrentUser()
    {
        return user;
    }
 
    /**
     *  Subclass for wiki users.
     *  @since 0.05
     */
    public class User implements Cloneable
    {
        private String username;
        private int rights = -484; // cache for userRights()
        private int count; // cache for isBlocked()
        private boolean blocked; // cache for isBlocked()
 
        /**
         *  Creates a new user object. Does not create a new user on the
         *  wiki (we don't implement this for a very good reason). Shouldn't
         *  be called for anons.
         *
         *  @param username the username of the user
         *  @since 0.05
         */
        protected User(String username)
        {
            this.username = username;
        }
 
        /**
         *  Gets a user's rights. Returns a bitmark of the user's rights.
         *  See fields above (be aware that IP_USER = -1, but you shouldn't
         *  be calling from a null object anyway). Uses the cached value for
         *  speed.
         *
         *  @return a bitwise mask of the user's rights.
         *  @throws IOException if a network error occurs
         *  @since 0.05
         */
        public int userRights() throws IOException
        {
            return userRights(true);
        }
 
        /**
         *  Gets a user's rights. Returns a bitmark of the user's rights.
         *  See fields above (be aware that IP_USER = -1, but you
         *  shouldn't be calling from a null object anyway). The value
         *  returned is cached which is returned by default, specify
         *  <tt>cache = false</tt> to retrieve a new one.
         *
         *  @return a bitwise mask of the user's rights.
         *  @throws IOException if a network error occurs
         *  @param cache whether we should use the cached value
         *  @since 0.07
         */
        public int userRights(boolean cache) throws IOException
        {
            // retrieve cache (if valid)
            if (cache && rights != -484)
                return rights;
 
            // begin
            String url = query + "action=query&list=allusers&aulimit=1&auprop=groups&aufrom=" + URLEncoder.encode(username, "UTF-8");
            URLConnection connection = new URL(url).openConnection();
            logurl(url, "User.userRights");
            checkLag("User.userRights");
            setCookies(connection, cookies);
            connection.connect();
 
            // read the first line, as it is the only thing worth paying attention to
            BufferedReader in = new BufferedReader(new InputStreamReader(new GZIPInputStream(connection.getInputStream()), "UTF-8"));
            String line = in.readLine();
 
            // parse
            ArrayList<String> members = new ArrayList<String>();
            while (line.indexOf("<g>") != -1)
            {
                int x = line.indexOf("<g>");
                int y = line.indexOf("</g>");
                members.add(line.substring(x + 3, y));
                line = line.substring(y + 4, line.length());
            }
            log(Level.INFO, "Successfully retrived user rights for " + username, "User.userRights");
 
            int ret = 1;
            if (members.indexOf("sysop") != -1)
                ret += 4;
            if (members.indexOf("bureaucrat") != -1)
                ret += 8;
            if (members.indexOf("steward") != -1)
                ret += 16;
            if (members.indexOf("bot") != -1)
                ret += 32;
 
            // store
            rights += (ret + 484);
            in.close();
            return ret;
        }
 
        /**
         *  Gets this user's username. (Should have implemented this earlier).
         *  @return this user's username
         *  @since 0.08
         */
        public String getUsername()
        {
            return username;
        }
 
        /**
         *  Returns a log of the times when the user has been blocked.
         *  @return records of the occasions when this user has been blocked
         *  @throws IOException if something goes wrong
         *  @since 0.08
         */
        public LogEntry[] blockLog() throws IOException
        {
            return getLogEntries(null, null, Integer.MAX_VALUE, BLOCK_LOG, null, "User:" + username, USER_NAMESPACE);
        }
 
        /**
         *  Determines whether this user is blocked by looking it up on the IP
         *  block list.
         *  @return whether this user is blocked
         *  @throws IOException if we cannot retrieve the IP block list
         *  @since 0.12
         */
        public boolean isBlocked() throws IOException
        {
            // if logged in as this user, it's only worth fetching this every 100 edits or so
            if (this != user || count > 100)
            {
                blocked = getIPBlockList(username, null, null, 1).length != 0;
                count = 0;
            }
            else
                count++;
            return blocked;
        }
 
        /**
         *  Copies this user object.
         *  @return the copy
         *  @since 0.08
         */
        public Object clone()
        {
            return new User(username);
        }
 
        /**
         *   Tests whether this user is equal to another one.
         *   @return whether the users are equal
         *   @since 0.08
         */
        public boolean equals(Object x)
        {
            if (!(x instanceof User))
                return false;
            return username.equals(((User)x).username);
        }
    }
 
    /**
     *  A wrapper class for an entry in a wiki log, which represents an action
     *  performed on the wiki.
     *
     *  @see #getLogEntries
     *  @since 0.08
     */
    public class LogEntry
    {
        // internal data storage
        private String type;
        private String action;
        private String reason;
        private User user;
        private String target;
        private Calendar timestamp;
        private Object details;
 
        /**
         *  Creates a new log entry. WARNING: does not perform the action
         *  implied. Use Wiki.class methods to achieve this.
         *
         *  @param type the type of log entry, one of USER_CREATION_LOG,
         *  DELETION_LOG, BLOCK_LOG, etc.
         *  @param action the type of action that was performed e.g. "delete",
         *  "unblock", "overwrite", etc.
         *  @param reason why the action was performed
         *  @param user the user who performed the action
         *  @param target the target of the action
         *  @param timestamp the local time when the action was performed.
         *  We will convert this back into a Calendar.
         *  @param details the details of the action (e.g. the new title of
         *  the page after a move was performed).
         *  @since 0.08
         */
        protected LogEntry(String type, String action, String reason, User user, String target, String timestamp, Object details)
        {
            this.type = type;
            this.action = action;
            this.reason = reason;
            this.user = user;
            this.target = target;
            this.timestamp = timestampToCalendar(timestamp);
            this.details = details;
        }
 
        /**
         *  Gets the type of log that this entry is in.
         *  @return one of DELETION_LOG, USER_CREATION_LOG, BLOCK_LOG, etc.
         *  @since 0.08
         */
        public String getType()
        {
            return type;
        }
 
        /**
         *  Gets a string description of the action performed, for example
         *  "delete", "protect", "overwrite", ...
         *  @return the type of action performed
         *  @since 0.08
         */
        public String getAction()
        {
            return action;
        }
 
        /**
         *  Gets the reason supplied by the perfoming user when the action
         *  was performed.
         *  @return the reason the action was performed
         *  @since 0.08
         */
        public String getReason()
        {
            return reason;
        }
 
        /**
         *  Gets the user object representing who performed the action.
         *  @return the user who performed the action.
         *  @since 0.08
         */
        public User getUser()
        {
            return user;
        }
 
        /**
         *  Gets the target of the action represented by this log entry.
         *  @return the target of this log entry
         *  @since 0.08
         */
        public String getTarget()
        {
            return target;
        }
 
        /**
         *  Gets the timestamp of this log entry.
         *  @return the timestamp of this log entry
         *  @since 0.08
         */
        public Calendar getTimestamp()
        {
            return timestamp;
        }
 
        /**
         *  Gets the details of this log entry. Return values are as follows:
         *
         *  <table>
         *  <tr><th>Log type <th>Return value
         *  <tr><td>MOVE_LOG <td>The new page title
         *  <tr><td>USER_RENAME_LOG <td>The new username
         *  <tr><td>BLOCK_LOG <td>new Object[] { boolean anononly, boolean nocreate, boolean noautoblock, boolean noemail, String duration }
         *  <tr><td>USER_RIGHTS_LOG <td>The new user rights (Integer)
         *  <tr><td>Others <td>null
         *  </table>
         *
         *  Note that the duration of a block may be given as a period of time
         *  (e.g. "31 hours") or a timestamp (e.g. 20071216160302). To tell
         *  these apart, feed it into <tt>Long.parseLong()</tt> and catch any
         *  resulting exceptions.
         *
         *  @return the details of the log entry
         *  @since 0.08
         */
        public Object getDetails()
        {
            return details;
        }
 
        /**
         *  Returns a string representation of this log entry. This is similar to
         *  on-wiki logs, which are of the form {{timestamp}} {{user}} {{action}}
         *  {{target}} ({{reason}}). <!-- yes, this is crude, fixing it will
         *  add about 200 lines of code -->
         *
         *  @return a string representation of this object
         *  @since 0.08
         */
        public String toString()
        {
            StringBuilder s = new StringBuilder(timestamp.getTime().toString());
            s.append(" ");
            s.append(user.getUsername());
            s.append(" ");
            s.append(action);
            s.append(" ");
            s.append(target);
            s.append(" (");
            s.append(reason);
            s.append(")");
            return s.toString();
        }
    }
 
    // internals
 
    // miscellany
 
    /**
     *  Strips entity references like &quot; from the supplied string.
     *  @param in the string to remove URL encoding from
     *  @return that string without URL encoding
     *  @since 0.11
     */
    private String decode(String in)
    {
        // Remove entity references. Oddly enough, URLDecoder doesn't nuke these.
        in = in.replace("&lt;", "<").replace("&gt;", ">"); // html tags
        in = in.replace("&amp;", "&");
        in = in.replace("&quot;", "\"");
        in = in.replace("&#039;", "'");
        return in;
    }
 
    /**
     *  Same as <tt>parse()</tt>, but also strips out unwanted crap.
     *
     *  @param in the string to parse
     *  @return that string without the crap
     *  @throws IOException if a network error occurs
     *  @since 0.14
     */
    private String parseAndCleanup(String in) throws IOException
    {
        String output = parse(in);
        output = output.replace("<p>", "").replace("</p>", ""); // remove paragraph tags
        output = output.replace("\n", ""); // remove new lines
        return output;
    }
 
    /**
     *  Finalizes the object on garbage collection.
     *  @since 0.14
     */
    protected void finalize()
    {
        // I have no idea why this is called when getLogEntries() needs to make four
        // queries. Silly Java.
        logout();
        namespaces = null;
    }
 
    // user rights methods
 
    /**
     *  Checks whether the currently logged on user has sufficient rights to
     *  edit/move a protected page (this includes
     *
     *  @param level a protection level
     *  @param move whether the action is a move
     *  @return whether the user can perform the specified action
     *  @throws IOException if we can't get the user rights
     *  @throws AccountLockedException if user is blocked
     *  @throws AssertionError if any defined assertions are false
     *  @since 0.10
     */
    private boolean checkRights(int level, boolean move) throws IOException, AccountLockedException
    {
        // check our assertions
        assertions();
 
        // check if logged out
        if (user == null)
            return level == NO_PROTECTION;
 
        // check if blocked
        if (user.isBlocked())
        {
            AccountLockedException ex = new AccountLockedException("Permission denied: Current user is blocked!");
            logger.logp(Level.SEVERE, "Wiki", "upload()", "[" + domain + "] Cannot upload - user is blocked!.", ex);
            throw ex;
        }
 
        // admins can do anything, this also covers FULL_PROTECTION
        if ((user.userRights() & ADMIN)  == ADMIN)
            return true;
        switch (level)
        {
            case NO_PROTECTION:
            case SEMI_PROTECTION:
                return true;
            case MOVE_PROTECTION:
            case SEMI_AND_MOVE_PROTECTION:
                return !move; // fall through is OK: the user cannot move a protected page
            // cases PROTECTED_DELETED_PAGE and FULL_PROTECTION are unnecessary
            default:
                return false;
        }
    }
 
    /**
     *  Checks the database lag and blocks for 30s if it exceeds our current
     *  maximum value. This method is thread safe as it allows us to block
     *  all requests (read and write) if <tt>maxlag</tt> is exceeded.
     *  See [[mw:Manual:Maxlag parameter]] for the server-side analog.
     *
     *  @param method what we are doing at the moment
     *  @throws IOException if we cannot retrieve the database lag
     *  @since 0.11
     */
    private void checkLag(String method) throws IOException
    {
        if (maxlag < 1) // disabled
            return;
        // only bother to check every 30 seconds
        if ((System.currentTimeMillis() - lastlagcheck) < 30000)
            return;
        try
        {
            // if we use this, this can block unrelated read requests while we edit a page
            synchronized(domain)
            {
               int lag = getCurrentDatabaseLag();
               while (lag > maxlag)
               {
                   log(Level.WARNING, "Sleeping for 30s as current database lag exceeds the maximum allowed value of " + maxlag + " s", method);
                   Thread.sleep(30000);
                   lag = getCurrentDatabaseLag();
               }
 
               // update counter
               lastlagcheck = System.currentTimeMillis();
            }
        }
        catch (InterruptedException ex)
        {
            // nobody cares
        }
    }
 
    /**
     *  Performs assertions.
     *  @throws AssertionError if any assertions are false
     *  @see #setAssertionMode
     *  @since 0.11
     */
    private void assertions()
    {
        try
        {
            if (assertion == ASSERT_NONE)
                return;
            if ((assertion & ASSERT_LOGGED_IN) == ASSERT_LOGGED_IN)
                assert (user != null && cookies.containsValue(user.getUsername())): "Not logged in";
            if ((assertion & ASSERT_BOT) == ASSERT_BOT)
                assert (user.userRights() & BOT) == BOT : "Not a bot";
            if ((assertion & ASSERT_NO_MESSAGES) == ASSERT_NO_MESSAGES)
                // it's best if we avoid expensive API queries if possible
                assert !hasNewMessages() : "User has new messages";
        }
        catch (IOException ex)
        {
            throw new AssertionError(ex);
        }
    }
 
    // cookie methods
 
    /**
     *  Sets cookies to an unconnected URLConnection and enables gzip
     *  compression of returned text.
     *  @param u an unconnected URLConnection
     *  @param map the cookie store
     */
    private void setCookies(URLConnection u, Map map)
    {
        Iterator i = map.entrySet().iterator();
        StringBuilder cookie = new StringBuilder(100);
        while (i.hasNext())
        {
            Map.Entry entry = (Map.Entry)i.next();
            cookie.append(entry.getKey());
            cookie.append("=");
            cookie.append(entry.getValue());
            cookie.append("; ");
        }
        u.setRequestProperty("Cookie", cookie.toString());
 
        // enable gzip compression
        u.setRequestProperty("Accept-encoding", "gzip");
    }
 
    /**
     *  Grabs cookies from the URL connection provided.
     *  @param u an unconnected URLConnection
     *  @param map the cookie store
     */
    private void grabCookies(URLConnection u, Map map)
    {
        // reset the cookie store
        map.clear();
        String headerName = null;
        for (int i = 1; (headerName = u.getHeaderFieldKey(i)) != null; i++)
            if (headerName.equals("Set-Cookie"))
            {
                String cookie = u.getHeaderField(i);
 
                // _session cookie is for cookies2, otherwise this causes problems
                if (cookie.indexOf("_session") != -1 && map == cookies)
                    continue;
 
                cookie = cookie.substring(0, cookie.indexOf(";"));
                String name = cookie.substring(0, cookie.indexOf("="));
                String value = cookie.substring(cookie.indexOf("=") + 1, cookie.length());
                map.put(name, value);
            }
    }
 
    // logging methods
 
    /**
     *  Logs a successful result.
     *  @param text string the string to log
     *  @param method what we are currently doing
     *  @param level the level to log at
     *  @since 0.06
     */
    private void log(Level level, String text, String method)
    {
        StringBuilder sb = new StringBuilder(100);
        sb.append('[');
        sb.append(domain);
        sb.append("] ");
        sb.append(text);
        sb.append('.');
        logger.logp(level, "Wiki", method + "()", sb.toString());
    }
 
    /**
     *  Logs a url fetch.
     *  @param url the url we are fetching
     *  @param method what we are currently doing
     *  @since 0.08
     */
    private void logurl(String url, String method)
    {
        logger.logp(Level.FINE, "Wiki", method + "()", "Fetching URL " + url);
    }
 
    // calendar/timestamp methods
 
    /**
     *  Turns a calendar into a timestamp of the format yyyymmddhhmmss.
     *  @param c the calendar to convert
     *  @return the converted calendar
     *  @see #timestampToCalendar
     *  @since 0.08
     */
    private String calendarToTimestamp(Calendar c)
    {
        StringBuilder x = new StringBuilder();
        x.append(c.get(Calendar.YEAR));
        int i = c.get(Calendar.MONTH) + 1; // January == 0!
        if (i < 10)
            x.append("0"); // add a zero if required
        x.append(i);
        i = c.get(Calendar.DATE);
        if (i < 10)
            x.append("0");
        x.append(i);
        i = c.get(Calendar.HOUR_OF_DAY);
        if (i < 10)
            x.append("0");
        x.append(i);
        i = c.get(Calendar.MINUTE);
        if (i < 10)
            x.append("0");
        x.append(i);
        i = c.get(Calendar.SECOND);
        if (i < 10)
            x.append("0");
        x.append(i);
        return x.toString();
    }
 
    /**
     *  Turns a timestamp of the format yyyymmddhhmmss into a Calendar object.
     *  @param timestamp the timestamp to convert
     *  @return the converted Calendar
     *  @see #calendarToTimestamp
     *  @since 0.08
     */
    private Calendar timestampToCalendar(String timestamp)
    {
        GregorianCalendar calendar = new GregorianCalendar(TimeZone.getTimeZone("UTC"));
        int year = Integer.parseInt(timestamp.substring(0, 4));
        int month = Integer.parseInt(timestamp.substring(4, 6)) - 1; // January == 0!
        int day = Integer.parseInt(timestamp.substring(6, 8));
        int hour = Integer.parseInt(timestamp.substring(8, 10));
        int minute = Integer.parseInt(timestamp.substring(10, 12));
        int second = Integer.parseInt(timestamp.substring(12, 14));
        calendar.set(year, month, day, hour, minute, second);
        return calendar;
    }
 
    /**
     *  Converts a timestamp of the form used by the API
     *  (yyyy-mm-ddThh:mm:ssZ) to the form
     *  yyyymmddhhmmss, which can be fed into <tt>timestampToCalendar()</tt>.
     *
     *  @param timestamp the timestamp to convert
     *  @return the converted timestamp
     *  @see #timestampToCalendar
     *  @since 0.12
     */
    private String convertTimestamp(String timestamp)
    {
        StringBuilder ts = new StringBuilder(timestamp.substring(0, 4));
        ts.append(timestamp.substring(5, 7));
        ts.append(timestamp.substring(8, 10));
        ts.append(timestamp.substring(11, 13));
        ts.append(timestamp.substring(14, 16));
        ts.append(timestamp.substring(17, 19));
        return ts.toString();
    }
 
    // serialization
 
    /**
     *  Writes this wiki to a file.
     *  @param out an ObjectOutputStream to write to
     *  @throws IOException if there are local IO problems
     *  @since 0.10
     */
    private void writeObject(ObjectOutputStream out) throws IOException
    {
        out.writeObject(user.getUsername());
        out.writeObject(cookies);
        out.writeInt(throttle);
        out.writeInt(maxlag);
        out.writeInt(assertion);
        out.writeObject(scriptPath);
        out.writeObject(domain);
        out.writeObject(namespaces);
    }
 
    /**
     *  Reads a copy of a wiki from a file.
     *  @param in an ObjectInputStream to read from
     *  @throws IOException if there are local IO problems
     *  @throws ClassNotFoundException if we can't recognize the input
     *  @since 0.10
     */
    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException
    {
        String z = (String)in.readObject();
        user = new User(z);
        cookies = (HashMap)in.readObject();
        throttle = in.readInt();
        maxlag = in.readInt();
        assertion = in.readInt();
        scriptPath = (String)in.readObject();
        domain = (String)in.readObject();
        namespaces = (HashMap)in.readObject();
 
        // various other intializations
        cookies2 = new HashMap(10);
        base = "http://" + domain + scriptPath + "/index.php?title=";
        query = "http://" + domain + scriptPath + "/api.php?format=xml&";
    }
}
User:MER-C/Wiki.java

From Wikipedia, the free encyclopedia

Views

Navigation

Interaction

Search