User:Quarl/diff.js

From Wikipedia, the free encyclopedia

Note: After saving, you have to bypass your browser's cache to see the changes. Mozilla/Safari: hold down Shift while clicking Reload (or press Ctrl-Shift-R), Internet Explorer: press Ctrl-F5, Opera/Konqueror: press F5.

// [[User:Quarl/diff.js]] - utility functions for doing diffs

// quarl 2006-01-29 initial version

// requires: util.js (trimspaces)

// <pre><nowiki>

// if more than this many words of changes, use overflow string
var diff_wikisummary_maxwords = 30;
var diff_wikisummary_overflow = "$1 words changed";

/*
 * diff() and diffString() are based on
 *  http://ejohn.org/projects/javascript-diff-algorithm/
 *   Copyright John Resig
 */

function diff_split(s) {
    //return trimspaces(s).split(/(?:\s|[.,;\'\"`])+/);
    return trimspaces(s).split(/\s+/);
}

function diffString( o, n ) {
    var out = diff( diff_split(o), diff_split(n) );
    var str = "";

    for ( var i = 0; i < out.n.length - 1; i++ ) {
        if ( out.n[i].text == null ) {
            if ( out.n[i].indexOf('"') == -1 && out.n[i].indexOf('<') == -1 )
                str += "<ins style='background:#E6FFE6;'> " + out.n[i] +"</ins>";
            else
                str += " " + out.n[i];
        } else {
            var pre = "";
            if ( out.n[i].text.indexOf('"') == -1 && out.n[i].text.indexOf('<') == -1 ) {

                var n = out.n[i].row + 1;
                while ( n < out.o.length && out.o[n].text == null ) {
                    if ( out.o[n].indexOf('"') == -1 && out.o[n].indexOf('<') == -1 && out.o[n].indexOf(':') == -1 && out.o[n].indexOf(';') == -1 )
                        pre += " <del style='background:#FFE6E6;'>" + out.o[n] +" </del>";
                    n++;
                }
            }
            str += " " + out.n[i].text + pre;
        }
    }

    return str;
}

function diff( o, n ) {
    var ns = {};
    var os = {};

    for ( var i = 0; i < n.length; i++ ) {
        // note we have to check that it is in fact an object with "rows", in
        // case ns[i] happens to match a javascript member function of class
        // Array, e.g. "some"!
        if ( ns[ n[i] ] == null || !ns[n[i]].rows )
            ns[ n[i] ] = { rows: new Array(), o: null };
        ns[ n[i] ].rows.push( i );
    }

    for ( var i = 0; i < o.length; i++ ) {
        if ( os[ o[i] ] == null || !os[o[i]].rows )
            os[ o[i] ] = { rows: new Array(), n: null };
        os[ o[i] ].rows.push( i );
    }

    for ( var i in ns ) {
        if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
            n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
            o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
        }
    }

    for ( var i = 0; i < n.length - 1; i++ ) {
        if ( n[i].text != null && n[i+1].text == null &&
             0 <= n[i].row+1 && n[i].row+1 < o.length &&
             o[ n[i].row + 1 ].text == null &&
             n[i+1] == o[ n[i].row + 1 ] )
        {
            n[i+1] = { text: n[i+1], row: n[i].row + 1 };
            o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
        }
    }

    for ( var i = n.length - 1; i > 0; i-- ) {
        if ( n[i].text != null && n[i-1].text == null &&
             0 <= n[i].row-1 && n[i].row-1 < o.length &&
             o[ n[i].row - 1 ].text == null &&
             n[i-1] == o[ n[i].row - 1 ] )
        {
            n[i-1] = { text: n[i-1], row: n[i].row - 1 };
            o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
        }
    }

    return { o: o, n: n };
}

function diffAggregate(words) {
    var phrases = new Array();
    var cur = null;
    var wordcount = 0;

    // start at virtual index -1 to check for removed words at beginning of
    // text
    for ( var i = -1; i < words.n.length; i++ ) {
        if ( i!=-1 && words.n[i].text == null ) {
            if (!cur) {
                cur = { o: "", n: "" };
                phrases.push(cur);
            }
            cur.n += " " + words.n[i];
            wordcount ++;
        } else {
            var pre = "";
            var j = i==-1 ? 0 : words.n[i].row + 1;
            while ( j < words.o.length && words.o[j].text == null ) {
                pre += " " + words.o[j];
                j++;
                wordcount ++;
            }
            if (pre) {
                if (!cur) {
                    cur = { o: "", n: "" };
                    phrases.push(cur);
                }
                cur.o += pre;
            }
            if (pre && words.n[i+1] && words.n[i+1].text == null) {
                // If there's an addition following, treat this as part of the
                // same change.
            } else {
                cur = null;
            }
        }
    }

    for (var i in phrases) {
        phrases[i].n = trimspaces(phrases[i].n);
        phrases[i].o = trimspaces(phrases[i].o);
    }

    return { phrases: phrases, wordcount: wordcount };
}

function diffWikiQuote(s) {
    if (!s) return s;
    if (s.match(/^\{\{.*\}\}$/)) return s;
    s = s.replace(/\"/g, "'");
    return '"'+s+'"';
}


function reverse(s) {
    var ret = '';
    for (var i = s.length-1; i >= 0; --i) {
        ret += s[i];
    }
    return ret;
}

// trim the equal chars from the front and back of o,n at a word boundary
function diffStringTrim(o, n) {
    var r = diffStringTrim0(reverse(o), reverse(n));
    return diffStringTrim0(reverse(r.o), reverse(r.n));
}

function diffStringTrim0(o, n) {
    var i = 0;
    while (i < o.length && i < n.length && o[i] == n[i]) {
        ++i;
    }

    // find index of last non-word character
    var prefix = o.substr(0, i);

    // if prefix ends with word characters and suffix starts with non-word,
    // then erase entire prefix
    if (prefix.match(/\w$/) &&
        !o.substr(i, 1).match(/^\w/) && !n.substr(i, 1).match(/^\w/))
    {
        o = o.substr(i);
        n = n.substr(i);
    } else if (prefix.match(/.*\W/)) {
        i = RegExp.lastMatch.length;
        o = o.substr(i);
        n = n.substr(i);
    } else {
        // keep entire prefix
    }
    return { o: o, n: n };
}

function diffSummary(o, n) {
    if (o == n) return "";
    if (!o) return "new";
    if (!n) return "blank";
    var words = diff( diff_split(o), diff_split(n) );
    var r = diffAggregate(words);
    if (!r.wordcount) return "";
    if (r.wordcount > diff_wikisummary_maxwords) {
        return diff_wikisummary_overflow.replace('$1', r.wordcount);
    }

    var phrases = r.phrases;
    var str = [];
    for (var i in phrases) {
        var r = diffStringTrim(phrases[i].o, phrases[i].n);
        var o = diffWikiQuote(r.o), n = diffWikiQuote(r.n);
        if (o && n) {
            str.push(o + ' → ' + n);
        } else if (o) {
            str.push('-' + o);
        } else if (n) {
            str.push('+' + n);
        } else {
            alert("## internal error 15e1b13f-bae3-4399-86c5-721786822fa2");
        }
    }

    return str.join(", ");
}

// </nowiki></pre>