Wikipedia:WikiProject User scripts/Scripts/Formatter

From Wikipedia, the free encyclopedia

/*

*/

function format() {
    var txt = document.editform.wpTextbox1;
    txt.value = catFixer(txt.value);
    txt.value = entities(txt.value);
    txt.value = fixheadings(txt.value);
    txt.value = fixsyntax(txt.value);
    txt.value = linkfixer(txt.value, false);
    //txt.value = imagefixer(txt.value);
    txt.value = whitespace(txt.value);
    txt.value = linksimplifyer(txt.value);
    txt.value = trim(txt.value);
}

function whitespace(str){
    str = str.replace(/\t/g, " ");

    str = str.replace(/^ ? ? \n/gm, "\n");
    str = str.replace(/(\n\n)\n+/g, "$1");
    str = str.replace(/== ? ?\n\n==/g, "==\n==");
    str = str.replace(/\n\n(\* ?\[?http)/g, "\n$1");

    str = str.replace(/^ ? ? \n/gm, "\n");
    str = str.replace(/\n\n\*/g, "\n*");
    str = str.replace(/[ \t][ \t]+/g, " ");
    str = str.replace(/([=\n]\n)\n+/g, "$1");
    str = str.replace(/ \n/g, "\n");

    //* bullet points
    str = str.replace(/^([\*#]+) /gm, "$1");
    str = str.replace(/^([\*#]+)/gm, "$1 ");

    //==Headings==
    str = str.replace(/^(={1,4}) ?(.*?) ?(={1,4})$/gm, "$1$2$3");

    //dash — spacing
    str = str.replace(/ ?(–|–|–|–|–) ?/g, "$1");
    str = str.replace(/ ?(—|—|—|—|—) ?/g, "$1");
    str = str.replace(/([^1-9])(—|—|—|—|—|–|–|–|–|–)([^1-9])/g, "$1 $2 $3");

    return trim(str);
}

function entities(str){
    //str = str.replace(//g, "");
    str = str.replace(/–|–|–/g, "–");
    str = str.replace(/—|—|—/g, "—");
   // str = str.replace(/(cm| m|km|mi)<sup>2</sup>/g, "$1²");
    str = str.replace(/²/g, "²");
    str = str.replace(/°/g, "°");

    return trim(str);
}

//Fix ==See also== and similar section common errors.
function fixheadings(str)
{
  if( !str.match(/= ?See also ?=/) )
    str = str.replace(/(== ?)(see also:?|related topics:?|related articles:?|internal links:?|also see:?)( ?==)/gi, "$1See also$3");

  str = str.replace(/(== ?)(external links:?|outside links|web ?links:?|exterior links:?)( ?==)/gi, "$1External links$3");
  str = str.replace(/(== ?)(external link:?|web ?link:?|exterior link:?)( ?==)/gi, "$1External link$3");
  str = str.replace(/(== ?)(reference:?)(s? ?==)/gi, "$1Reference$3");
  str = str.replace(/(== ?)(source:?)(s? ?==)/gi, "$1Source$3");
  str = str.replace(/(== ?)(further readings?:?)( ?==)/gi, "$1Further reading$3");

  return str;
}

function catFixer(str){
    str = str.replace(/\[\[ ?[Cc]ategory ?: ?/g, "[[Category:");

    return trim(str);
}

//fixes many common syntax problems
function fixsyntax(str)
{
  //replace html with wiki syntax
  if( !str.match(/'<\/?[ib]>|<\/?[ib]>'/gi) )
  {
    str = str.replace(/<i>(.*?)<\/i>/gi, "''$1''");
    str = str.replace(/<b>(.*?)<\/b>/gi, "'''$1'''");
  }
  str = str.replace(/<br\/>/gi, "<br />");
  str = str.replace(/<br>/gi, "<br />");
                      
  return trim(str);
}

//formats links in standard fashion
function linkfixer(str, checkImages)
{ 
  str = str.replace(/\]\[/g, "] [");
  var m = str.match(/\[?\[[^\]]*?\]\]?/g);
  if (m)
  {
    for (var i = 0; i < m.length; i++)
    {
      var x = m[i].toString();
      var y = x;

      //internal links only
      if ( !y.match(/^\[?\[http:\/\//i) && !y.match(/^\[?\[image:/i) )
      {
        if (y.indexOf(":") == -1 && y.substr(0,3) != "[[_" && y.indexOf("|_") == -1)
        {
          if (y.indexOf("|") == -1)
            y = y.replace(/_/g, " ");
          else
            y = y.replace( y.substr(0, y.indexOf("|")), y.substr(0, y.indexOf("|")).replace(/_/g, " "));
        }  
      
        y = y.replace(/ ?\| ?/, "|").replace("|]]", "| ]]");
        
      }

      str = str.replace(x, y);
    }
  }

  //repair bad internal links
  str = str.replace(/\[\[ ?([^\]]*?) ?\]\]/g, "[[$1]]");
  str = str.replace(/\[\[([^\]]*?)( |_)#([^\]]*?)\]\]/g, "[[$1#$3]]");

  //repair bad external links
  str = str.replace(/\[?\[http:\/\/([^\]]*?)\]\]?/gi, "[http://$1]");
  str = str.replace(/\[http:\/\/([^\]]*?)\|([^\]]*?)\]/gi, "[http://$1 $2]");

  return trim(str);
}

//fixes images
function imagefixer(str)
{

  //remove external images
  str = str.replace(/\[?\[image:http:\/\/([^\]]*?)\]\]?/gi, "[http://$1]");

  //fix links within internal images
  var m = str.match(/\[?\[image:[^\[\]]*?(\[?\[[^\]]*?\]*?[^\[\]]*?)*?\]+/gi);
  if (m)
  {
    for (var i = 0; i < m.length; i++)
    {
      var x = m[i].toString();
      var y = x;

      y = y.replace(/^\[\[i/i, "I").replace(/\]\]$/, "");
      y = y.replace(/(\[[^\]]*?)$/, "$1]");
      y = linkfixer(y, true);
      y = "[[" + y + "]]";

      str = str.replace(x, y);
    }
  }

  return trim(str);
}

//simplifies some links e.g. [[Dog|dog]] to [[dog]] and [[Dog|dogs]] to [[dog]]s
function linksimplifyer(str){
  var m = str.match(/\[\[([^[]*?)\|([^[]*?)\]\]/g);
  if (m)
  {
    for (var i = 0; i < m.length; i++)
    {
      var n_arr = m[i].toString().match(/\[\[([^[]*?)\|([^[]*?)\]\]/);
      var n = n_arr[0];
      var a = n_arr[1];
      var b = n_arr[2];

      if (b.indexOf(a) == 0 || b.indexOf(TurnFirstToLower(a)) == 0)
      {
        var k = n.replace(/\[\[([^\]\|]*?)\|(\1)([\w]*?)\]\]/i, "[[$2]]$3");
        str = str.replace(n, k);
      }
    }
  }

  str = str.replace(/\[\[([^\]\|]+)\|([^\]\|]+)\]\]([A-Za-z\'][A-Za-z]*)([\.\,\;\:\"\!\?\s\n])/g, "[[$1|$2$3]]$4");

  return str;
}

//trim start and end, trim spaces from the end of lines
function trim(str) {
   str = str.replace(/ $/gm, "");
   return str.replace(/^\s*|\s*$/g, "");
}

//turns first character to lowercase
function TurnFirstToLower(input) {
  if (input != "")
  {
    var input = trim(input);
    var temp = input.substr(0, 1);
    return temp.toLowerCase() + input.substr(1, input.length);
  }
  else
    return "";
}

//entities that should never be unicoded
function noUnicodify(str) {
  str = str.replace(" & ", " & ");
  str = str.replace("&", "&amp;").replace("&lt;", "&amp;lt;").replace("&gt;", "&amp;gt;").replace("&quot;", "&amp;quot;").replace("&apos;", "&amp;apos;");
  str = str.replace("−", "&minus;").replace("×", "&times;");

  str = str.replace(" ", "&nbsp;").replace(" ", "&thinsp;").replace("­", "&shy;");
  str = str.replace("′", "&prime;");
  str = str.replace(/&(#0?9[13];)/, "&$1");
  str = str.replace(/&(#0?12[345];)/, "&$1");

  return str;
}

addOnloadHook(function () {
  if(document.forms.editform) {
    addPortletLink('p-cactions', 'javascript:format()', 'format', 'ca-format', 'Format article', '', document.getElementById('ca-edit'));
  }
});

/*
  • /