User:Wmahan/despam.js

From Wikipedia, the free encyclopedia

Note: After saving, you have to bypass your browser's cache to see the changes. In Internet Explorer and Firefox, hold down the Ctrl key and click the Refresh or Reload button. Opera users have to clear their caches through Tools→Preferences, see the instructions for Opera. Konqueror and Safari users can just click the Reload button.

var despamPage = 'User:Wmahan/despam';
var query_php = '/w/query.php';
var wiki = 'http://en.wikipedia.org/wiki/';
var wPage = 'http://en.wikipedia.org/w/index.php';
var tab = '     ';
 
// number of history entries to fetch at a time
var revPage = 50;
// stop after searching this many revisions
var maxRevs = 5000;
 
var http_request;
var rvoffset;
var despamPage, despamUrl, despamUserRe;
var logDiv;
 
// parse parameters passed in the query string
function parseParams() {
  var query = window.location.search.substring(1);
  var params = new Array();
  var qlist = query.split('&');
  for (var i=0; i < qlist.length; i++) {
    var pos = qlist[i].indexOf('=');
    if (pos > 0) {
      var key = unescape(qlist[i].substring(0,pos));
      var val = unescape(qlist[i].substring(pos+1));
      params[key] = val;
    }
  }
  return params;
}
 
// messages indicating success or failure in removing a link
var removeSuccess = '<span style="color: #008000">link removed</span>\n';
var removeFailure = '<span style="color: #a00000">unable to remove link</span>\n';
 
 
// save the edit page and close this window
function saveAndClose() {
  window.onunload = function() {
    window.close();
  };
  document.getElementById('wpSave').click();
}
 
// click the diff button on the edit page
function doDiff() {
  document.getElementById('wpDiff').click();
}
 
// on the edit page, remove the link "url", noting in the
// edit summary that it was added by "user" on "timestamp"
function removeLink(url, user, timestamp) {
  var es = document.getElementById('wpSummary');
  if (es && es.value == '') {
    document.getElementById('wpMinoredit').checked = true;
 
    // remove the link;
    var tb = document.getElementById('wpTextbox1');
 
    var cont = document.getElementById('content');
    logDiv = document.createElement('div');
    logDiv.id = 'despamLog';
 
    // XXX
    logDiv.innerHTML = "<hr />\n<a name=\"despam\"></a>\n"
      + "<h2>Removing link</h2><br />\n"
      + "<input type='button' value='Save page and close window'"
      + " onclick='saveAndClose()' /> ***"
      + " <input type='button' value='Show changes'"
      + " onclick='doDiff()' />\n";
 
    cont.appendChild(logDiv);
 
 
    // turn url into a regex
    var regexChars = new RegExp('([!*+?^\\\\$\\]\\[.])', 'g');
    url = url.replace(regexChars, '\\$1');
    //log('url regex: ' + url);
 
    // now look for various externa link styles
 
    var cb = function(a) {
      log('deleted line: <span style="color: #c00000">' + a + '</span>');
      return '\n';
    };
 
    // *[http://www.example.com link] plus any following text
    //log('regex1: \\n\\** *\\[' + url + '[^\\n]*?\\][^\\n]*?\\n');
    var re1 = new RegExp('\\n\\**[^\\n]*\\[' + url + '[^\\n]*?\\][^\\n]*?\\n', 'g');
    var newText = tb.value.replace(re1, cb);
 
    // *http://www.example.com plus any following text
    //log('regex2: \\n\\** *' + url + '[^\\n]*?\\n');
    //var re2 = new RegExp('\\n\\** *' + url + '[^\\n]*?\\n', 'g');
    //newText = newText.replace(re2, '\n');
 
    if (newText != tb.value) {
      log(removeSuccess);
      tb.value = newText;
      es.value = 'rm linkspam by [[Special:Contributions/' + user + '|'
        + user + ']] on ' + timestamp;
    }
    else {
      log(removeFailure);
    }
    document.location.href = '#despam';
  }
}
 
 
// write the log message "msg" to the logging area
function log(msg) {
  var div = document.createElement('div');
  div.innerHTML = msg; // XXX
 
  if (logDiv) {
    logDiv.appendChild(div);
  }
}
 
// fetch "url" in with a synchronous (blocking) call
function sync_fetch(url) {
  log('fetching ' + url + '...');
  var http_request2 = new XMLHttpRequest();
  http_request2.open("GET", url, false); // synchronous
  http_request2.send(null);
  return http_request2.responseText;
}
 
// Check whether the link "url" is in "page" revision
// "afterid", but not in "beforeid"
function wasLinkAdded(page, url, beforid, afterid) {
  qpage = wiki + page + '?action=raw&oldid=';
 
  beforeText = sync_fetch(qpage + beforeid);
  if (beforeText.indexOf(url) != -1) {
    // link already existed; it was not added in the next edit
    return false;
  }
 
  afterText = sync_fetch(qpage + afterid);
 
  if (afterText.indexOf(url) != -1) {
    // it was added
    return true;
  }
  else {
    // it wasn't added, and there's no point searching
    // further back in the history
    // XXX doesn't consider vandalism
    //stopSearch = true;
    return false;
  }
}
 
// examine the list of history entries "results"
// for edits by a user matching "userRe" that add
// the link "url"
function processHistory(results, url, page, userRe) {
  var pages = results['pages'];
  //var info = pages.shift();
  var info;
  for (var i in pages) { // XXX hack to get first element of associative array
    info = pages[i];
    break;
  }
  var revs = info['revisions'];
 
  var found = false;
  var i = -1;
 
  for (var prevI in revs) {
    if (i == -1) {
      i = prevI;
      continue;
    }
    //alert('i=' + i + '; user=' + revs[i]['user']);
    var curuser = revs[i]['user'];
    if (curuser.match(userRe)) {
      // found a possible match
      afterid = revs[i]['revid'];
      beforeid = revs[prevI]['revid'];
      log('checking possible match: ' + curuser + ' on ' + revs[i]['timestamp']);
      //alert('beforeid=' + beforeid + '; afterid=' + afterid);
      if (wasLinkAdded(page, url, beforeid, afterid)) {
        found = true;
        break;
      }
      else {
        log(tab + 'no match');
      }
    }
    i = prevI;
  }
 
  if (found) {
    timestamp = revs[i]['timestamp'];
    log('found addition of link by ' + curuser + ' on ' + timestamp
      + ' (<a target="_blank" href="' + wPage + '?title=' + escape(page)
      + '&diff=' + afterid + '&oldid=' + beforeid + '">diff</a>, <a href="'
      + wPage + '?title=' + escape(page) + '&action=edit&fakeaction=rmlink'
      + '&user=' + escape(curuser) + '&timestamp=' + escape(timestamp)
      + '&url=' + escape(url) + '">remove link</a>)');
    //removeLink(page, url, curuser, timestamp);
 
    return true;
  }
  else {
    rvoffset += revPage;
    if (rvoffset > maxRevs || revs.length < revPage) {
      // we reached the end without finding anything
      log('<span style="color: #aa0000">No match found!</span>');
      return false;
    }
    else {
      // go on to the next page
      fetchHistory();
    }
  }
}
 
// set everythig up and start fetching pages of history entries
function doDespam(url, page, users) {
  var regexCharsExceptStar = new RegExp('([!+?^\\\\$\\]\\[.])', 'g');
  users = users.replace(regexCharsExceptStar, '\\$1');
  // turn wildcards into regexes
  starRe = new RegExp('\\*', 'g');
  users = users.replace(starRe, '\\d+');
 
  // remove extraneous spaces
  users = users.replace(/ +/g, ' ');
  users = users.replace(/(^ +| +$)/g, '');
  var userlist = users.split(':');
  var userRe = new RegExp('(' + userlist.join('|') + ')');
  if (!userRe) {
    log('Error: invalid user list');
    log('debugging info: (' + userlist.join('|') + ')');
    return false;
  }
 
  nicePage = page.replace(/_/g, ' ');
  page = page.replace(/ /g, '_');
  log('<h2>Scanning history for <a href="'
    + wiki + escape(page) + '">' + nicePage + '</a> (<a href="'
    + wPage + '?title=' + escape(page) + '&action=history">history</a>)</h2>\n');
 
  rvoffset = 0;
  despamPage = page;
  despamUrl = url;
  despamUserRe = userRe;
 
  http_request = new XMLHttpRequest();
 
  fetchHistory();
}
 
// fetch 
function fetchHistory() {
  log('fetching history entries #' + rvoffset + ' through #' + (rvoffset + revPage));
 
  // fetch the query page
  var qpage = query_php + '?what=revisions&format=json&rvlimit=' + revPage
    + '&rvoffset=' + rvoffset + '&titles=' + escape(despamPage);
 
  var results;
  http_request.open("GET", qpage, true);
  http_request.onreadystatechange = function () {
    if (http_request.readyState == 4) {
        if (http_request.status == 200) {
            results = eval("(" + http_request.responseText + ")");
            processHistory(results, despamUrl, despamPage, despamUserRe);
        } else {
            log('<span style="color: #aa0000">There was a problem querying the page history.</span>');
            return false;
        }
    }
    //http_request = null;
  };
  http_request.send(null);
 
  return true;
}
 
function despamClick(url, page) {
  var despamUrl = '/wiki/' + despamPage;
 
  users = document.getElementById('despamUsers').value;
 
  if (users == '') {
    alert('No user name or IP address was entered');
    return false;
  }
 
  url = escape(url);
  page = escape(page);
  users = escape(users);
 
  window.open(despamUrl + '?url=' + url + '&page=' + page + '&users=' + users);
 
  return false;
}
 
addOnloadHook( function() {
 
  if ((location.href.indexOf(':Linksearch') != -1
    || location.href.indexOf('%3ALinksearch') != -1)
    && location.href.indexOf('target=') != -1)
  {
    var textLabel = 'IPs or usernames for despam (e.g. <i>SpamUser</i>, <i>192.168.0.*</i>):<br />';
    var cont = document.getElementById("content");
 
    var html = cont.innerHTML; // XXX un-DOM
 
    var re = new RegExp('<li>(<a href.*?>(.*?)</a>.*?<a .*?>(.*?)</a>)</li>', 'g');
 
    var quoteChars = new RegExp('([\'\\\\])', 'g');
    var cb = function(a, b, c, d) {
      var skipPages = new RegExp('(talk|user):', 'i');
 
      if (!d.match(skipPages)) {
        // escape quotes
        c = c.replace(quoteChars, '\\$1');   
        d = d.replace(quoteChars, '\\$1');   
        return '<li>' + b + '  [<a href="" onclick="return despamClick(\'' + c + '\',\'' + d
          + '\')">despam</a>]</li>';
      }
      else {
        return a; // skip entry
      }
    };
 
    div = document.createElement('div');
    div.innerHTML = textLabel; // XXX
    inp = document.createElement('input');
    inp.type = 'text';
    inp.id = 'despamUsers';
    inp.size = 40;
    div.appendChild(inp);
 
    html = html.replace(re, cb);
 
    cont.innerHTML = html;
 
    bc = document.getElementById('bodyContent');
    bc.insertBefore(div, document.getElementsByTagName('ol')[0]);
  }
  else if (document.location.href.indexOf(despamPage) != -1
    && document.location.href.indexOf('action=') == -1)
  {
    logDiv = document.getElementById('despamLog');
 
    var params = parseParams();
    if (params['url'] && params['page'] && params['users']) {
      doDespam(params['url'], params['page'], params['users']);
    }
    else {
      log('<span style="color: #aa0000">The url, page, or users parameter was missing.</span>');
    }
  }
  else if (document.location.href.indexOf('action=edit&fakeaction=rmlink') != -1) {
    var params = parseParams();
    if (params['url'] && params['user'] && params['timestamp']) {
      removeLink(params['url'], params['user'], params['timestamp']);
    }
  }
 
} );