User:Wmahan/despam.js
From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. In Internet Explorer and Firefox, hold down the Ctrl key and click the Refresh or Reload button. Opera users have to clear their caches through Tools→Preferences, see the instructions for Opera. Konqueror and Safari users can just click the Reload button.
var despamPage = 'User:Wmahan/despam'; var query_php = '/w/query.php'; var wiki = 'http://en.wikipedia.org/wiki/'; var wPage = 'http://en.wikipedia.org/w/index.php'; var tab = ' '; // number of history entries to fetch at a time var revPage = 50; // stop after searching this many revisions var maxRevs = 5000; var http_request; var rvoffset; var despamPage, despamUrl, despamUserRe; var logDiv; // parse parameters passed in the query string function parseParams() { var query = window.location.search.substring(1); var params = new Array(); var qlist = query.split('&'); for (var i=0; i < qlist.length; i++) { var pos = qlist[i].indexOf('='); if (pos > 0) { var key = unescape(qlist[i].substring(0,pos)); var val = unescape(qlist[i].substring(pos+1)); params[key] = val; } } return params; } // messages indicating success or failure in removing a link var removeSuccess = '<span style="color: #008000">link removed</span>\n'; var removeFailure = '<span style="color: #a00000">unable to remove link</span>\n'; // save the edit page and close this window function saveAndClose() { window.onunload = function() { window.close(); }; document.getElementById('wpSave').click(); } // click the diff button on the edit page function doDiff() { document.getElementById('wpDiff').click(); } // on the edit page, remove the link "url", noting in the // edit summary that it was added by "user" on "timestamp" function removeLink(url, user, timestamp) { var es = document.getElementById('wpSummary'); if (es && es.value == '') { document.getElementById('wpMinoredit').checked = true; // remove the link; var tb = document.getElementById('wpTextbox1'); var cont = document.getElementById('content'); logDiv = document.createElement('div'); logDiv.id = 'despamLog'; // XXX logDiv.innerHTML = "<hr />\n<a name=\"despam\"></a>\n" + "<h2>Removing link</h2><br />\n" + "<input type='button' value='Save page and close window'" + " onclick='saveAndClose()' /> ***" + " <input type='button' value='Show changes'" + " onclick='doDiff()' />\n"; cont.appendChild(logDiv); // turn url into a regex var regexChars = new RegExp('([!*+?^\\\\$\\]\\[.])', 'g'); url = url.replace(regexChars, '\\$1'); //log('url regex: ' + url); // now look for various externa link styles var cb = function(a) { log('deleted line: <span style="color: #c00000">' + a + '</span>'); return '\n'; }; // *[http://www.example.com link] plus any following text //log('regex1: \\n\\** *\\[' + url + '[^\\n]*?\\][^\\n]*?\\n'); var re1 = new RegExp('\\n\\**[^\\n]*\\[' + url + '[^\\n]*?\\][^\\n]*?\\n', 'g'); var newText = tb.value.replace(re1, cb); // *http://www.example.com plus any following text //log('regex2: \\n\\** *' + url + '[^\\n]*?\\n'); //var re2 = new RegExp('\\n\\** *' + url + '[^\\n]*?\\n', 'g'); //newText = newText.replace(re2, '\n'); if (newText != tb.value) { log(removeSuccess); tb.value = newText; es.value = 'rm linkspam by [[Special:Contributions/' + user + '|' + user + ']] on ' + timestamp; } else { log(removeFailure); } document.location.href = '#despam'; } } // write the log message "msg" to the logging area function log(msg) { var div = document.createElement('div'); div.innerHTML = msg; // XXX if (logDiv) { logDiv.appendChild(div); } } // fetch "url" in with a synchronous (blocking) call function sync_fetch(url) { log('fetching ' + url + '...'); var http_request2 = new XMLHttpRequest(); http_request2.open("GET", url, false); // synchronous http_request2.send(null); return http_request2.responseText; } // Check whether the link "url" is in "page" revision // "afterid", but not in "beforeid" function wasLinkAdded(page, url, beforid, afterid) { qpage = wiki + page + '?action=raw&oldid='; beforeText = sync_fetch(qpage + beforeid); if (beforeText.indexOf(url) != -1) { // link already existed; it was not added in the next edit return false; } afterText = sync_fetch(qpage + afterid); if (afterText.indexOf(url) != -1) { // it was added return true; } else { // it wasn't added, and there's no point searching // further back in the history // XXX doesn't consider vandalism //stopSearch = true; return false; } } // examine the list of history entries "results" // for edits by a user matching "userRe" that add // the link "url" function processHistory(results, url, page, userRe) { var pages = results['pages']; //var info = pages.shift(); var info; for (var i in pages) { // XXX hack to get first element of associative array info = pages[i]; break; } var revs = info['revisions']; var found = false; var i = -1; for (var prevI in revs) { if (i == -1) { i = prevI; continue; } //alert('i=' + i + '; user=' + revs[i]['user']); var curuser = revs[i]['user']; if (curuser.match(userRe)) { // found a possible match afterid = revs[i]['revid']; beforeid = revs[prevI]['revid']; log('checking possible match: ' + curuser + ' on ' + revs[i]['timestamp']); //alert('beforeid=' + beforeid + '; afterid=' + afterid); if (wasLinkAdded(page, url, beforeid, afterid)) { found = true; break; } else { log(tab + 'no match'); } } i = prevI; } if (found) { timestamp = revs[i]['timestamp']; log('found addition of link by ' + curuser + ' on ' + timestamp + ' (<a target="_blank" href="' + wPage + '?title=' + escape(page) + '&diff=' + afterid + '&oldid=' + beforeid + '">diff</a>, <a href="' + wPage + '?title=' + escape(page) + '&action=edit&fakeaction=rmlink' + '&user=' + escape(curuser) + '×tamp=' + escape(timestamp) + '&url=' + escape(url) + '">remove link</a>)'); //removeLink(page, url, curuser, timestamp); return true; } else { rvoffset += revPage; if (rvoffset > maxRevs || revs.length < revPage) { // we reached the end without finding anything log('<span style="color: #aa0000">No match found!</span>'); return false; } else { // go on to the next page fetchHistory(); } } } // set everythig up and start fetching pages of history entries function doDespam(url, page, users) { var regexCharsExceptStar = new RegExp('([!+?^\\\\$\\]\\[.])', 'g'); users = users.replace(regexCharsExceptStar, '\\$1'); // turn wildcards into regexes starRe = new RegExp('\\*', 'g'); users = users.replace(starRe, '\\d+'); // remove extraneous spaces users = users.replace(/ +/g, ' '); users = users.replace(/(^ +| +$)/g, ''); var userlist = users.split(':'); var userRe = new RegExp('(' + userlist.join('|') + ')'); if (!userRe) { log('Error: invalid user list'); log('debugging info: (' + userlist.join('|') + ')'); return false; } nicePage = page.replace(/_/g, ' '); page = page.replace(/ /g, '_'); log('<h2>Scanning history for <a href="' + wiki + escape(page) + '">' + nicePage + '</a> (<a href="' + wPage + '?title=' + escape(page) + '&action=history">history</a>)</h2>\n'); rvoffset = 0; despamPage = page; despamUrl = url; despamUserRe = userRe; http_request = new XMLHttpRequest(); fetchHistory(); } // fetch function fetchHistory() { log('fetching history entries #' + rvoffset + ' through #' + (rvoffset + revPage)); // fetch the query page var qpage = query_php + '?what=revisions&format=json&rvlimit=' + revPage + '&rvoffset=' + rvoffset + '&titles=' + escape(despamPage); var results; http_request.open("GET", qpage, true); http_request.onreadystatechange = function () { if (http_request.readyState == 4) { if (http_request.status == 200) { results = eval("(" + http_request.responseText + ")"); processHistory(results, despamUrl, despamPage, despamUserRe); } else { log('<span style="color: #aa0000">There was a problem querying the page history.</span>'); return false; } } //http_request = null; }; http_request.send(null); return true; } function despamClick(url, page) { var despamUrl = '/wiki/' + despamPage; users = document.getElementById('despamUsers').value; if (users == '') { alert('No user name or IP address was entered'); return false; } url = escape(url); page = escape(page); users = escape(users); window.open(despamUrl + '?url=' + url + '&page=' + page + '&users=' + users); return false; } addOnloadHook( function() { if ((location.href.indexOf(':Linksearch') != -1 || location.href.indexOf('%3ALinksearch') != -1) && location.href.indexOf('target=') != -1) { var textLabel = 'IPs or usernames for despam (e.g. <i>SpamUser</i>, <i>192.168.0.*</i>):<br />'; var cont = document.getElementById("content"); var html = cont.innerHTML; // XXX un-DOM var re = new RegExp('<li>(<a href.*?>(.*?)</a>.*?<a .*?>(.*?)</a>)</li>', 'g'); var quoteChars = new RegExp('([\'\\\\])', 'g'); var cb = function(a, b, c, d) { var skipPages = new RegExp('(talk|user):', 'i'); if (!d.match(skipPages)) { // escape quotes c = c.replace(quoteChars, '\\$1'); d = d.replace(quoteChars, '\\$1'); return '<li>' + b + ' [<a href="" onclick="return despamClick(\'' + c + '\',\'' + d + '\')">despam</a>]</li>'; } else { return a; // skip entry } }; div = document.createElement('div'); div.innerHTML = textLabel; // XXX inp = document.createElement('input'); inp.type = 'text'; inp.id = 'despamUsers'; inp.size = 40; div.appendChild(inp); html = html.replace(re, cb); cont.innerHTML = html; bc = document.getElementById('bodyContent'); bc.insertBefore(div, document.getElementsByTagName('ol')[0]); } else if (document.location.href.indexOf(despamPage) != -1 && document.location.href.indexOf('action=') == -1) { logDiv = document.getElementById('despamLog'); var params = parseParams(); if (params['url'] && params['page'] && params['users']) { doDespam(params['url'], params['page'], params['users']); } else { log('<span style="color: #aa0000">The url, page, or users parameter was missing.</span>'); } } else if (document.location.href.indexOf('action=edit&fakeaction=rmlink') != -1) { var params = parseParams(); if (params['url'] && params['user'] && params['timestamp']) { removeLink(params['url'], params['user'], params['timestamp']); } } } );