User:Grim Revenant/Formatter.js

From Wikipedia, the free encyclopedia

Note: After saving, you have to bypass your browser's cache to see the changes. In Internet Explorer and Firefox, hold down the Ctrl key and click the Refresh or Reload button. Opera users have to clear their caches through Tools→Preferences, see the instructions for Opera. Konqueror and Safari users can just click the Reload button.

/*<pre><nowiki>*/
 
function format() {
    var txt = document.editform.wpTextbox1;
    txt.value = catFixer(txt.value);
    txt.value = entities(txt.value);
    txt.value = fixheadings(txt.value);
    txt.value = fixsyntax(txt.value);
    txt.value = linkfixer(txt.value, false);
    //txt.value = imagefixer(txt.value);
    txt.value = whitespace(txt.value);
    txt.value = linksimplifyer(txt.value);
    txt.value = trim(txt.value);
}
 
function whitespace(str){
    str = str.replace(/\t/g, " ");
 
    str = str.replace(/^ ? ? \n/gm, "\n");
    str = str.replace(/(\n\n)\n+/g, "$1");
    str = str.replace(/== ? ?\n\n==/g, "==\n==");
    str = str.replace(/\n\n(\* ?\[?http)/g, "\n$1");
 
    str = str.replace(/^ ? ? \n/gm, "\n");
    str = str.replace(/\n\n\*/g, "\n*");
    str = str.replace(/[ \t][ \t]+/g, " ");
    str = str.replace(/([=\n]\n)\n+/g, "$1");
    str = str.replace(/ \n/g, "\n");
 
    //* bullet points
    str = str.replace(/^([\*#]+) /gm, "$1");
    str = str.replace(/^([\*#]+)/gm, "$1 ");
 
    //==Headings==
    str = str.replace(/^(={1,4}) ?(.*?) ?(={1,4})$/gm, "$1$2$3");
 
    //dash — spacing
    str = str.replace(/ ?(–|&#150;|&ndash;|&#8211;|&#x2013;) ?/g, "$1");
    str = str.replace(/ ?(—|&#151;|&mdash;|&#8212;|&#x2014;) ?/g, "$1");
    str = str.replace(/([^1-9])(—|&#151;|&mdash;|&#8212;|&#x2014;|–|&#150;|&ndash;|&#8211;|&#x2013;)([^1-9])/g, "$1 $2 $3");
 
    return trim(str);
}
 
function entities(str){
    //str = str.replace(//g, "");
    str = str.replace(/&#150;|&#8211;|&#x2013;/g, "&ndash;");
    str = str.replace(/&#151;|&#8212;|&#x2014;/g, "&mdash;");
   // str = str.replace(/(cm| m|km|mi)<sup>2</sup>/g, "$1²");
    str = str.replace(/&sup2;/g, "²");
    str = str.replace(/&deg;/g, "°");
 
    return trim(str);
}
 
//Fix ==See also== and similar section common errors.
function fixheadings(str)
{
  if( !str.match(/= ?See also ?=/) )
    str = str.replace(/(== ?)(see also:?|related topics:?|related articles:?|internal links:?|also see:?)( ?==)/gi, "$1See also$3");
 
  str = str.replace(/(== ?)(external links?:?|outside links?|web ?links?:?|exterior links?:?)( ?==)/gi, "$1External links$3");
  str = str.replace(/(== ?)(references?:?)( ?==)/gi, "$1References$3");
  str = str.replace(/(== ?)(sources?:?)( ?==)/gi, "$1Sources$3");
  str = str.replace(/(== ?)(further readings?:?)( ?==)/gi, "$1Further reading$3");
 
  return str;
}
 
function catFixer(str){
    str = str.replace(/\[\[ ?[Cc]ategory ?: ?/g, "[[Category:");
 
    return trim(str);
}
 
//fixes many common syntax problems
function fixsyntax(str)
{
  //replace html with wiki syntax
  if( !str.match(/'<\/?[ib]>|<\/?[ib]>'/gi) )
  {
    str = str.replace(/<i>(.*?)<\/i>/gi, "''$1''");
    str = str.replace(/<b>(.*?)<\/b>/gi, "'''$1'''");
  }
  str = str.replace(/<br\/>/gi, "<br />");
  str = str.replace(/<br>/gi, "<br />");
 
  return trim(str);
}
 
//formats links in standard fashion
function linkfixer(str, checkImages)
{ 
  str = str.replace(/\]\[/g, "] [");
  var m = str.match(/\[?\[[^\]]*?\]\]?/g);
  if (m)
  {
    for (var i = 0; i < m.length; i++)
    {
      var x = m[i].toString();
      var y = x;
 
      //internal links only
      if ( !y.match(/^\[?\[http:\/\//i) && !y.match(/^\[?\[image:/i) )
      {
        if (y.indexOf(":") == -1 && y.substr(0,3) != "[[_" && y.indexOf("|_") == -1)
        {
          if (y.indexOf("|") == -1)
            y = y.replace(/_/g, " ");
          else
            y = y.replace( y.substr(0, y.indexOf("|")), y.substr(0, y.indexOf("|")).replace(/_/g, " "));
        }  
 
        y = y.replace(/ ?\| ?/, "|").replace("|]]", "| ]]");
 
      }
 
      str = str.replace(x, y);
    }
  }
 
  //repair bad internal links
  str = str.replace(/\[\[ ?([^\]]*?) ?\]\]/g, "[[$1]]");
  str = str.replace(/\[\[([^\]]*?)( |_)#([^\]]*?)\]\]/g, "[[$1#$3]]");
 
  //repair bad external links
  str = str.replace(/\[?\[http:\/\/([^\]]*?)\]\]?/gi, "[http://$1]");
  str = str.replace(/\[http:\/\/([^\]]*?)\|([^\]]*?)\]/gi, "[http://$1 $2]");
 
  return trim(str);
}
 
//fixes images
function imagefixer(str)
{
 
  //remove external images
  str = str.replace(/\[?\[image:http:\/\/([^\]]*?)\]\]?/gi, "[http://$1]");
 
  //fix links within internal images
  var m = str.match(/\[?\[image:[^\[\]]*?(\[?\[[^\]]*?\]*?[^\[\]]*?)*?\]+/gi);
  if (m)
  {
    for (var i = 0; i < m.length; i++)
    {
      var x = m[i].toString();
      var y = x;
 
      y = y.replace(/^\[\[i/i, "I").replace(/\]\]$/, "");
      y = y.replace(/(\[[^\]]*?)$/, "$1]");
      y = linkfixer(y, true);
      y = "[[" + y + "]]";
 
      str = str.replace(x, y);
    }
  }
 
  return trim(str);
}
 
//simplifies some links e.g. [[Dog|dog]] to [[dog]] and [[Dog|dogs]] to [[dog]]s
function linksimplifyer(str){
  var m = str.match(/\[\[([^[]*?)\|([^[]*?)\]\]/g);
  if (m)
  {
    for (var i = 0; i < m.length; i++)
    {
      var n_arr = m[i].toString().match(/\[\[([^[]*?)\|([^[]*?)\]\]/);
      var n = n_arr[0];
      var a = n_arr[1];
      var b = n_arr[2];
 
      if (b.indexOf(a) == 0 || b.indexOf(TurnFirstToLower(a)) == 0)
      {
        var k = n.replace(/\[\[([^\]\|]*?)\|(\1)([\w]*?)\]\]/i, "[[$2]]$3");
        str = str.replace(n, k);
      }
    }
  }
 
  str = str.replace(/\[\[([^\]\|]+)\|([^\]\|]+)\]\]([A-Za-z\'][A-Za-z]*)([\.\,\;\:\"\!\?\s\n])/g, "[[$1|$2$3]]$4");
 
  return str;
}
 
//trim start and end, trim spaces from the end of lines
function trim(str) {
   str = str.replace(/ $/gm, "");
   return str.replace(/^\s*|\s*$/g, "");
}
 
//turns first character to lowercase
function TurnFirstToLower(input) {
  if (input != "")
  {
    var input = trim(input);
    var temp = input.substr(0, 1);
    return temp.toLowerCase() + input.substr(1, input.length);
  }
  else
    return "";
}
 
//entities that should never be unicoded
function noUnicodify(str) {
  str = str.replace(" &amp; ", " & ");
  str = str.replace("&amp;", "&amp;amp;").replace("&amp;lt;", "&amp;amp;lt;").replace("&amp;gt;", "&amp;amp;gt;").replace("&amp;quot;", "&amp;amp;quot;").replace("&amp;apos;", "&amp;amp;apos;");
  str = str.replace("&minus;", "&amp;minus;").replace("&times;", "&amp;times;");
 
  str = str.replace("&nbsp;", "&amp;nbsp;").replace("&thinsp;", "&amp;thinsp;").replace("&shy;", "&amp;shy;");
  str = str.replace("&prime;", "&amp;prime;");
  str = str.replace(/&(#0?9[13];)/, "&amp;$1");
  str = str.replace(/&(#0?12[345];)/, "&amp;$1");
 
  return str;
}
 
addOnloadHook(function () {
  if(document.forms.editform) {
    addPortletLink('p-cactions', 'javascript:format()', 'format', 'ca-format', 'Format article', '', document.getElementById('ca-edit'));
  }
});
 
/*</nowiki></pre>
 
[[Category:Wikipedia scripts]]
*/