User:Lupin/diff.js

From Wikipedia, the free encyclopedia

Note: After saving, you have to bypass your browser's cache to see the changes. In Internet Explorer and Firefox, hold down the Ctrl key and click the Refresh or Reload button. Opera users have to clear their caches through Tools→Preferences, see the instructions for Opera. Konqueror and Safari users can just click the Reload button.

/*
 * Javascript Diff Algorithm
 *  By John Resig (http://ejohn.org/) and [[:en:User:Lupin]]
 *
 * More Info:
 *  http://ejohn.org/projects/javascript-diff-algorithm/
 */
 
function diffEscape(n) {
    return n.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;")
      .replace(RegExp('"','g'), "&quot;");
}
function delFmt(x) { 
  if (!x.length) return ''; 
  return "<del style='background:#FFE6E6;'>" + diffEscape(x.join('')) +"</del>"; 
}
function insFmt(x) { 
  if (!x.length) return '';
  return "<ins style='background:#AAFFEE;'>" + diffEscape(x.join('')) +"</ins>"; 
}
 
function copyDiffObj(x){
  var ret=[];
  for (var i=0; i<x.length; ++i) {
    if (typeof x[i] == 'string') ret[i]=x[i];
    else {
      ret[i]={};
      for (var prop in x[i]) ret[i][prop]=x[i][prop];
    }
  }
  return ret;
}
 
function countCrossings(a, b, i) {
  // count the crossings on the edge starting at b[i]
  if (b[i].row==null) return -1;
  var count=0;
  for (var j=0; j<a.length; ++j) {
    if (a[j].row==null) continue;
    if ( (j-b[i].row)*(i-a[j].row) > 0) count++;
  }
  return count;
}
 
//function debug(s){
//  try {document.getElementById('debug').value+=s+'\n'; } catch(foo) {};
//}
 
function untangle( a, b) {
  // try to remove crossing edges from an ordered bipartite graph,
  // removing the least number possible
  var aa=copyDiffObj(a);
  var bb=copyDiffObj(b);
 
  // remove the edge with the largest number of crossings until no
  // crossings remain
  do {
    var maxCrossings=0;
    var worstEdge=null;
    for (var i=0; i<bb.length; ++i) {
      var c=countCrossings(aa,bb,i);
      if (c > maxCrossings) { maxCrossings=c; worstEdge=i; }
    }
    if (worstEdge!=null) {
      aa[ bb[worstEdge].row ] = aa[ bb[worstEdge].row ].text;
      bb[worstEdge] = bb[worstEdge].text;
    }
  } while (maxCrossings > 0);
  return { a: aa, b: bb };
}
 
window.max=function(a,b){return a<b ? b : a;}
window.min=function(a,b){return a>b ? b : a;}
 
function shortenDiffString(str, context) {
  var output=[];
  var s=str;
  var m=true;
  while ( true ) {
    var re=RegExp('(<del[\\s\\S]*?</del>|<ins[\\s\\S]*?</ins>)+');
    m=re.exec(s);
    if(!m) break;
    var t=(s.substring(max(0,m.index-context), min(s.length, m.index+m[0].length+context)));
    //alert(s+'\n\n\n'+m[0] +'\n\n'+(m.index-context) + '\n\n'+t); 
    output.push(t);
    s=s.substring(min(s.length, m.index+m[0].length));
  }
  return output;
}
 
function diffString( o, n ) {
  var out = diff( o.split(/\b/), n.split(/\b/) );
  var str = "";
  var acc=[]; // accumulator for prettier output
 
  // crossing pairings -- 'A B' vs 'B A' -- cause problems, so let's iron them out 
  //var untangled=untangle(out.o, out.n); <-- too slow!
  //out.o=untangled.a; out.n=untangled.b;
  var maxOutputPair=0;
  for (var i=0; i<out.n.length; ++i) {
    if ( out.n[i].row != null) { 
      if( maxOutputPair > out.n[i].row ) {
        // tangle - delete pairing
        out.o[ out.n[i].row ]=out.o[ out.n[i].row ].text;
        out.n[i]=out.n[i].text;
      }
      if (maxOutputPair < out.n[i].row) maxOutputPair = out.n[i].row;
    }
  }
 
 
  // output the stuff preceding the first paired old line 
  for (var i=0; i<out.o.length && out.o[i].text == null; ++i) acc.push( out.o[i] );
  str += delFmt(acc); acc=[];
 
  // main loop 
  for ( var i = 0; i < out.n.length; ++i ) {
    // output unpaired new "lines" 
    while ( i < out.n.length && out.n[i].text == null ) acc.push( out.n[i++] ); 
    str += insFmt(acc); acc=[]; 
    if ( i < out.n.length ) { // this new "line" is paired with the (out.n[i].row)th old "line" 
      str += out.n[i].text;
      // output unpaired old rows starting after this new line's partner 
      var m = out.n[i].row + 1; 
      while ( m < out.o.length && out.o[m].text == null ) acc.push ( out.o[m++] );
      str += delFmt(acc); acc=[];
    }
  }
  return str;
}
 
function diff( o, n ) {
  var ns = new Object();
  var os = new Object();
 
  // pass 1: make hashtable ns with new rows as keys
  for ( var i = 0; i < n.length; i++ ) {
    if ( ns[ n[i] ] == null )
      ns[ n[i] ] = { rows: new Array(), o: null };
    ns[ n[i] ].rows.push( i );
  }
 
  // pass 2: make hashtable os with old rows as keys
  for ( var i = 0; i < o.length; i++ ) {
    if ( os[ o[i] ] == null )
      os[ o[i] ] = { rows: new Array(), n: null };
    os[ o[i] ].rows.push( i );
  }
 
  // pass 3: pair unique new rows and matching unique old rows
  for ( var i in ns ) {
    if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
      n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
      o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
    }
  }
 
  // pass 4: pair matching rows immediately following paired rows (not necessarily unique)
  for ( var i = 0; i < n.length - 1; i++ ) {
    if ( n[i].text != null && n[i+1].text == null && 
         n[i].row < o.length - 1 && o[ n[i].row + 1 ].text == null && 
         n[i+1] == o[ n[i].row + 1 ] ) {
      n[i+1] = { text: n[i+1], row: n[i].row + 1 };
      o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
    }
  }
 
  // pass 5: pair matching rows immediately preceding paired rows (not necessarily unique)
  for ( var i = n.length - 1; i > 0; i-- ) {
    if ( n[i].text != null && n[i-1].text == null && 
         n[i].row > 0 && o[ n[i].row - 1 ].text == null && 
         n[i-1] == o[ n[i].row - 1 ] ) {
      n[i-1] = { text: n[i-1], row: n[i].row - 1 };
      o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
    }
  }
 
  return { o: o, n: n };
}