User:Interiot2/Tool2/code.js
From Wikipedia, the free encyclopedia
< User:Interiot2 | Tool2
Note: After saving, you have to bypass your browser's cache to see the changes. Mozilla/Safari: hold down Shift while clicking Reload (or press Ctrl-Shift-R), Internet Explorer: press Ctrl-F5, Opera/Konqueror: press F5.
// see http://paperlined.org/apps/wikipedia/Tool2/ for instructions on adding this to your monobook.js // To run this tool on other servers: // 1. copy this script to the target server (this is required because of javascript cross-site security restrictions) // 2. update the following URL // for example: "User:Interiot/Tool2/code.js" var tool2_url = "User:Interiot2/Tool2/code.js"; // 3. update this namespace list, extracted from something like http://en.wikipedia.org/wiki/Special:Export// // These *should not* have colons after them. var namespaces = [ "Talk", "User", "User talk", "Wikiquote", "Wikiquote talk", "Image", "Image talk", "MediaWiki", "MediaWiki talk", "Template", "Template talk", "Help", "Help talk", "Category", "Category talk", // 3b. these two project project entries are not added by Special:Export, and might or might not need to be updated "Wikipedia", "Wikipedia talk" ]; // 4. update this date-parser to match the format and language of your specific wiki. Feel free to contact Interiot regarding this, if you can't find another // copy of this script that uses the same language. // input: a text string from Special:Contributions. output: a javascript Date object // documentation: http://www.quirksmode.org/js/introdate.html#parse, http://www.elated.com/tutorials/programming/javascript/dates/ function date_parse(text) { var matches = text.match(/^([0-9:]+), +([0-9]+) +([a-z]+) +([0-9]+)$/i); if (!matches) { //dump_text("XXX"); // for debugging return matches; } parseme = matches[3] + ", " + matches[2] + " " + matches[4] + " " + matches[1] + ":00"; //dump_text(parseme); // for debugging var dt = new Date(); dt.setTime( Date.parse(parseme)); //dump_text(dt.toLocaleString()); // for debugging return dt; } // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ end of server-specific configuration ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // TODO: // - the current document.location method doesn't work when the page is accessed sans-mod_rewrite // - test with non-ASCII characters // - non-ascii usernames // - ?? var prefix = ""; var params = parse_params(); var path_len = document.location.pathname.length; // trigger once we view the right page if (document.location.pathname.substring(path_len - tool2_url.length, path_len) == tool2_url) { // get the prefix (needs to be fixed to work sans-mod_rewrite prefix = document.location.protocol + "//" + document.location.host + "/" + document.location.pathname.substring(1, path_len - tool2_url.length); writeCSS(); addOnloadFunction(function() { // blank the inner contents of the page var bodyContent = document.getElementById("bodyContent"); while (bodyContent.childNodes.length > 0) bodyContent.removeChild(bodyContent.lastChild); if (document.location.search.length == 0) { generate_input_form(bodyContent); } else { generate_main_report(bodyContent); } }); } function generate_input_form(bodyContent) { bodyContent.innerHTML = "<form><table class='IT_form'>" + " <tr><td>Username <td><input maxlength=128 name=username value='' id=username title='username'>" + " <tr><td> <td><input type=submit value='Submit'>" + "</table></form>"; var form = bodyContent.getElementsByTagName("form")[0]; form.method = "get"; form.action = document.location; document.getElementById("username").focus(); } function generate_main_report() { fetch_data(params["username"], "", output_main_report, 0, []); } function add_stats_row(left_col, right_col) { var row = document.createElement("tr"); var left = document.createElement("td"); var right = document.createElement("td"); document.getElementById("basic_stats").appendChild(row); row.appendChild(left); row.appendChild(right); //left.innerHTML = left_col; left.appendChild( document.createTextNode(left_col) ); right.appendChild( document.createTextNode(right_col) ); return row; } function output_main_report(history) { // -- generate summary statistics var unique_articles = new Array(); var namespace_numedits = new Array(); for (var i=0; i<namespaces.length; i++) { namespace_numedits[ namespaces[i] ] = 0; } namespace_numedits[""] = 0; for (var i=0; i<history.length; i++) { var h = history[i]; unique_articles[ h["title"] ]++; namespace_numedits[ h["namespace"] ]++; } var unique_articles_keys = keys(unique_articles); // -- output report var table = document.createElement("table"); table.id = "basic_stats"; document.getElementById("bodyContent").appendChild(table); add_stats_row("Username", params["username"]); add_stats_row("Total edits", history.length); add_stats_row("Distinct pages edited", unique_articles_keys.length); add_stats_row("Average edits/page", new Number(history.length / unique_articles_keys.length).toFixed(3)); add_stats_row("First edit", history[ history.length-1 ]["date_text"] ); // add a blank row add_stats_row("", "").childNodes[0].style.height = "1em"; add_stats_row("(main)", namespace_numedits[""]); for (var i=0; i<namespaces.length; i++) { var nmspc = namespaces[i]; if (namespace_numedits[nmspc]) { add_stats_row(nmspc, namespace_numedits[nmspc]); } } } // ===================================== HTML-scraping backend ========================================= function add_loading_notice() { if (document.getElementById("loading_notice")) return; var loading = document.createElement("div"); loading.id = "loading_notice"; loading.innerHTML = "<br><br>Retrieving data<blink>...</blink>"; document.getElementById("bodyContent").appendChild(loading); } function remove_loading_notice() { var loading = document.getElementById("loading_notice"); if (!loading) return; loading.parentNode.removeChild(loading); } var offset_regexp = /href="[^"]+:Contributions[^"]+offset=(\d+)/gi; function fetch_data(username, end_date, handler, offset, page_list) { add_loading_notice(); var url = prefix + "Special:Contributions/" + username + "?offset=" + offset + "&limit=5000"; loadXMLDoc(url, function (request) { var next_offset = 0; if (request.readyState != 4) return; if (request.status == 200) { page_list.push(request.responseText); //dump_text(request.responseText); // see if there's another pageful to get var matches = map( function(p){ return p.match( /(\d+)$/ )[0]; }, request.responseText.match( offset_regexp ) ); for (var i=0; i<matches.length; i++) { var v = matches[i] * 1; if (v != 0 && (offset == 0 || v < offset)) { next_offset = v; break; } } } //next_offset = 0; // for testing only, retrieve just the first page of results if (next_offset == 0) { parse_data(page_list, handler); } else { // tail recurse fetch_data(username, end_date, handler, next_offset, page_list); } }); } // input: a list of strings, each string containing the HTML from a single page // output: a list, where each individual entry is a specific edit from history function parse_data(page_list, handler) { //var total_len = 0; //for (var i=0; i<page_list.length; i++) total_len += page_list[i].length; //alert("parsing " + page_list.length + " pages comprising " + total_len + " total bytes"); var last_history_ent = []; last_history_ent["title"] = ""; last_history_ent["oldid"] = ""; var edit_history = new Array(); for (var pagecnt=0; pagecnt<page_list.length; pagecnt++) { var matches = page_list[pagecnt].match( /^<li>[^(]+\(<a href="[^"]+action=history.*/gim ); //dump_lines(matches); for (var matchcnt=0; matchcnt<matches.length; matchcnt++) { var history_text = matches[matchcnt]; var history_entry = new Array(); history_entry["date_text"] = history_text.match( /^<li>([^(<]+)/i )[1] .replace( / +$/, ""); history_entry["date"] = date_parse( history_entry["date_text"] ); history_entry["title"] = history_text.match( /title="([^"]+)"/i )[1] .replace( /"/g, "\"") .replace( /&/g, "&"); var find_comment = history_text.replace(/<span class="autocomment">.*?<\/span> ?/, ""); history_entry["comment"] = ifmatch(find_comment.match( /<span class='comment'>(.*?)<\/span>/ )) .replace(/^\((.*)\)$/, "$1"); history_entry["minor"] = /<span class="minor"/.test(history_text); history_entry["oldid"] = ifmatch(history_text.match(/oldid=([0-9]+)/i)); history_entry["namespace"] = ""; for (var nmspc_ctr=0; nmspc_ctr<namespaces.length; nmspc_ctr++) { var nmspc = namespaces[nmspc_ctr] + ":"; if (history_entry["title"].substring(0, nmspc.length) == nmspc) { history_entry["namespace"] = namespaces[nmspc_ctr]; break; } } //dump_object(history_entry); if (history_entry["title"] != last_history_ent["title"] || history_entry["oldid"] != last_history_ent["oldid"]) edit_history.push(history_entry); last_history_ent = history_entry; } } remove_loading_notice(); handler(edit_history); } // ===================================== test/debug functions ========================================= function dump_text(text) { //alert("dump_text, with text of size " + text.length); var pre = document.createElement("pre"); var div = document.createElement("div"); div.style.width = "60em"; div.style.maxHeight = "40em"; div.style.overflow = "auto"; pre.appendChild(document.createTextNode(text)); div.appendChild(pre); document.getElementById("bodyContent").appendChild(div); } function dump_lines(ary) { dump_text("--> " + ary.join("\n--> ")); } function dump_object(obj) { var toString = ""; for (var prop in obj) { toString += prop + ": " + obj[prop] + "\n"; } dump_text(toString); } // ===================================== utility functions ========================================= function addOnloadFunction(f) { if (window.addEventListener) window.addEventListener("load",f,false); else if (window.attachEvent) window.attachEvent("onload",f); else { var oldOnload='_old_onload_'+addOnloadFunction.uid; addOnloadFunction[oldOnload] = window.onload ? window.onload : function () {}; window.onload = function() { addOnloadFunction[oldOnload](); f(); } ++addOnloadFunction.uid; } } function parse_params() { var pairs = document.location.search.substring(1).split("&"); var ret = []; for (var i=0; i < pairs.length; i++) { var values = pairs[i].split("="); ret[values[0]] = unescape(values[1]); } return ret; } function loadXMLDoc(url, handler) { // branch for native XMLHttpRequest object if (window.XMLHttpRequest) { req = new XMLHttpRequest(); req.onreadystatechange = function () {handler(req)}; req.open("GET", url, true); req.send(null); // branch for IE/Windows ActiveX version } else if (window.ActiveXObject) { req = new ActiveXObject("Microsoft.XMLHTTP"); if (req) { req.onreadystatechange = function () {handler(req)}; req.open("GET", url, true); req.send(); } } } // see http://search.cpan.org/dist/perl/pod/perlfunc.pod#map function map (handler, list) { var ret = new Array(); for (var i=0; i<list.length; i++) { ret[i] = handler( list[i] ); // ret.push( handler( list[i] ) ); } return ret; } // see http://search.cpan.org/dist/perl/pod/perlfunc.pod#keys function keys (obj) { var ret = new Array(); for (var key in obj) { ret.push(key); } return ret; } function ifmatch(ary) { if (ary && ary.length >= 2) { return ary[1]; } else { return ""; } } // ===================================== other ========================================= function writeCSS () { document.write( "<style>" + " table.IT_form td {background-color:purple} " + "</style>" ); }