User:Pilaf/InstaView/Devel
From Wikipedia, the free encyclopedia
< User:Pilaf | InstaView
/*
This version is not functional. Cacycle 01:48, 1 April 2007 (UTC)
InstaView - a Mediawiki to HTML converter in JavaScript Version 0.6.2 Last update: 11:11, 24 July 2006 (UTC) by Shtriter Andrew, http://meta.wikimedia.org/wiki/User:Shtriter Copyright (C) Pedro Fayolle 2005-2006 http://en.wikipedia.org/wiki/User:Pilaf Distributed under the BSD license
Contents |
[edit] Changelog:
[edit] 0.6.4
- MD5 code is moved to md5.js
- Multiple changes to parse_inline_wiki(str) function:
- Changed categories handling: categories are displayed as catlinks now, not just deleted...
- Added title attribute for all internal links
- Apostrophes (') in a tags changed to double quotes (")
- Fixed 2 bugs with trails of internal links (watch the Known bugs).
- Fixed misrendering of short external links ([1] -> [1]): [#] in label changed with the ordered number.
- Multiple changes to make_image(..) function
- Fixed undefined width bug: if width wasn't defined it doesn't added to output (the word undefined was earlier).
- Apostrophes (') in tags changed to double quotes (")
- Added longdesc attribute
- onError attribute moved from the beggining to the end of the img tag
- Removed px suffix from the width attribute
[edit] 0.6.3
- Multiple changes to IV's output. There are (almost) no diffs between MW and IV for:
- paragraphs
- tables
- < pre > tags
- limple lists (not heavily tested yet)
- Lot's of unnecessary linebreaks deleted.
[edit] 0.6.2
- The linebreaks are preserved now (customisable, can be used to output endline's tokens)
- Added closing tags - almost XHTML compatible
- Fix misrendering of the folowing elements:
- Full support for < nowiki >
- Content between < pre > tags
- Tables on the lines that start with space (if there's no sp_lines before - 2do)
[edit] 0.6.1
- Fixed problem caused by \r characters
- Improved inline formatting parser
[edit] 0.6
- Changed name to InstaView
- Some major code reorganizations and factored out some common functions
- Handled conversion of relative links (i.e. /foo)
- Fixed misrendering of adjacent definition list items !!! Not fully
- Fixed bug in table headings handling
- Changed date format in signatures to reflect Mediawiki's output
- Fixed handling of Image: ...
- Updated MD5 function (hopefully it will work with UTF-8)
- Fixed bug in handling of links inside images
[edit] To do:
- Add standart namespaces handling in addition to the local ns (i.e. '..' + InstaView.conf.locale.image + '|Image')
- The reason is that Image, Category and other standart ns work in non-english wikis too.
- Urlencode the article name in href attribute
- Improve html-tags handling (i.e. don't parse block elements and < html > tag)
- Improve transcluding (through AJAX)
- Handle < noinclude > and < includeonly > tags depending on the URL (the page is transcluded or not)...
- Substitute MagicWords (store the list in iw.conf?) and template's arguments
- Fix misrendering of the folowing elements:
- Nested definition lists
- Validate the XHTML of output
- Support for coloured links (AJAX)
- Better support for < math > (Ajax).
- Parser-based (as opposed to RegExp-based) inline wikicode handling (make it one-pass and bullet-proof)
[edit] Known bugs - for discussion
- Non-english words are ignored due to \w
- Solved for cyrilic: \w -> [\wа-яё]
- Use \0n sintax for other?
- Upcase chars should be ignored, they dont
- Solved: \w - > [a-z]
- Compare with MW's regexp
//
// Script to embed InstaView in MediaWiki's edit page addOnloadHook(function(){ if (document.getElementById('editpage-copywarn')) { var oldPreview = document.getElementById('wpPreview'); var newPreview = document.createElement('input'); newPreview.setAttribute('type', 'button'); newPreview.setAttribute('style', 'font-style: italic'); newPreview.setAttribute('value', 'InstaView'); newPreview.setAttribute('onclick', "InstaView.dump('wpTextbox1', 'InstaViewDump')"); oldPreview.parentNode.insertBefore(newPreview, oldPreview); oldPreview.parentNode.innerHTML += '<div style="margin: 5px 0 5px 0; padding: 5px; border: 2px solid orange;" id="InstaViewDump"></div>'; oldPreview.value = 'Classic Preview'; } }); document.write('<script type="text/javascript" src="md5.js"> </script>\n'); */ var InstaView = {} // options InstaView.conf = { user: {}, wiki: { lang: 'en', interwiki: 'ab|aa|af|ak|sq|als|am|ang|ar|an|arc|hy|roa-rup|as|ast|av|ay|az|bm|ba|eu|be|bn|bh|bi|bs|br|bg|my|ca|ch|ce|chr|chy|ny|zh|zh-tw|zh-cn|cho|cv|kw|co|cr|hr|cs|da|dv|nl|dz|en|eo|et|ee|fo|fj|fi|fr|fy|ff|gl|ka|de|got|el|kl|gn|gu|ht|ha|haw|he|hz|hi|ho|hu|is|io|ig|id|ia|ie|iu|ik|ga|it|ja|jv|kn|kr|csb|ks|kk|km|ki|rw|rn|tlh|kv|kg|ko|kj|ku|ky|lo|la|lv|li|ln|lt|jbo|nds|lg|lb|mk|mg|ms|ml|mt|gv|mi|minnan|mr|mh|zh-min-nan|mo|mn|mus|nah|na|nv|ne|se|no|nn|oc|or|om|pi|fa|pl|pt|pa|ps|qu|ro|rm|ru|sm|sg|sa|sc|gd|sr|sh|st|tn|sn|scn|simple|sd|si|sk|sl|so|st|es|su|sw|ss|sv|tl|ty|tg|ta|tt|te|th|bo|ti|tpi|to|tokipona|ts|tum|tr|tk|tw|uk|ur|ug|uz|ve|vi|vo|wa|cy|wo|xh|ii|yi|yo|za|zu', default_thumb_width: 180 }, paths: { articles: '/wiki/', math: '/math/', images: '', images_fallback: 'http://upload.wikimedia.org/wikipedia/commons/', magnify_icon: 'skins/common/images/magnify-clip.png' }, locale: { user: 'User', image: 'Image', category: 'Category', months: ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] } } // options with default values or backreferences with (InstaView.conf) { user.name = user.name || 'Wikipedian' user.signature = '[['+locale.user+':'+user.name+'|'+user.name+']]' paths.images = 'http://upload.wikimedia.org/wikipedia/' + wiki.lang + '/' } //reset internal counters // pregormated text token InstaView.inPre = false; // the level of nowiki nesting InstaView.nestlev = 0; // define constants // Maximum number of includes /**///InstaView.IncNum = 0; // new line token // One of the folowing strings (or combinaton): // '\n' | '<br style="display:none;"/>' | '<br class="hidden"/>' | '' InstaView.br = '\n'; //Regular Expression for wiki images processing InstaView.BLOCK_IMAGE = new RegExp('^\\[\\['+InstaView.conf.locale.image+':.*?\\|.*?(?:frame|thumbnail|thumb|none|right|left|center)', 'i'); InstaView.el = function(aID) { return (typeof(aID) != 'string') ? aID : (document.getElementById) ? document.getElementById(aID) : (document.layers) ? document.layers[aID] : document.all[aID]; } InstaView.dump = function(from, to) { this.el(to).innerHTML = this.convert( this.el(from).value ) } InstaView.convert = function(wiki) { var ll = (typeof wiki == 'string')? wiki.replace(/\r/g,'').split(/\n/): wiki, // lines of wikicode o='', // output p=0, // para flag $r // result of passing a regexp to $() // some shorthands function remain() { return ll.length } function sh() { return ll.shift() } // shift function ps(s) { o+=s } // push function f() // similar to C's printf, uses ? as placeholders, ?? to escape question marks { var i=1,a=arguments,f=a[0],o='',c,p for (;i<a.length; i++) if ((p=f.indexOf('?'))+1) { // allow character escaping i -= c=f.charAt(p+1)=='?'?1:0 o += f.substring(0,p)+(c?'?':a[i]) f=f.substr(p+1+c) } else break; return o+f } function html_entities(s) { return s.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">") } function max(a,b) { return (a>b)?a:b } function min(a,b) { return (a<b)?a:b } // return the first non matching character position between two strings function str_imatch(a, b) { for (var i=0, l=min(a.length, b.length); i<l; i++) if (a.charAt(i)!=b.charAt(i)) break return i } // compare current line against a string or regexp // if passed a string it will compare only the first string.length characters // if passed a regexp the result is stored in $r function $(c) { return (typeof c == 'string') ? (ll[0].substr(0,c.length)==c) : ($r = ll[0].match(c)) } function $$(c) { return ll[0]==c } // compare current line against a string function _(p) { return ll[0].charAt(p) } // return char at pos p //function endl(s) { ps(s); sh() } /**/function endl(s) { ps(s+InstaView.br); sh() } /**/function endP() { /*alert(o);*/ if (p) { p=0; ps('</p>') } return !p; } /**/function trim(s) { return s.replace(/^\s*(.*)\s*$/, '$1') ; } /**/function ltrim() { var str = ll[0]; //alert('str = "' + str + '"'); ll[0] = str.replace(/^\s*(.*)$/, '$1') ; //alert('ll[0] = "' + ll[0] + '"'); str = str.replace(/^(\s*)(.*)$/, '$1') ; ps (str); //alert('str = "' + str + '"'); } // Delete 1-st empty line. (like MW does - propose to delete ALL empty lines - while not if) /**/ while ($$('')) sh(); function parse_nowiki () { var start=0, open=0, close=0; while ( InstaView.nestlev || ( remain() && /<nowiki>/i . test( ll[0].substring( start ) ) ) ) { open = ll[0] . toLowerCase(). indexOf( '<nowiki>' , start); close = ll[0]. toLowerCase(). indexOf( '</'+'nowiki>', start); // if < nowiki > tag is opened and < / nowiki > closing tag stands before the next < nowiki > (if it exists...) if (InstaView.nestlev && (close < open || ( open == -1 && close >= 0 ) ) ) { // extract the substring from the current line // (the text between the starting point and closing tag), // replace '<', '>' and '&' with their html entities // and push the result to output ps( html_entities( ll[0] . substring(start, close) ) ); // close opened span indicating the borders of each nowiki container ps( '</span class="nowiki_' + InstaView.nestlev-- + '">' ); // and shift the starting point to 9 simbols after closing tag position start = close + 9; // if < nowiki > was found on the line } else if (open >= 0) { // select text before the opening nowiki tag var before = ll[0] . substring( start, open ); // if inside the multiline nowiki replace special html symbols (<, >, &) // before the opening tag with their html entities otherwise paste the text as it is // and push the result to output ps( (InstaView.nestlev) ? html_entities(before) : before ); // anyway - create new span indicating the borders of each nowiki container ps( '<span class="nowiki_' + ++InstaView.nestlev + '">' ); // if there are no closing < / nowiki > tags... if (close == -1) { // extract the substring from the current line (the text after the opening tag), // replace special html symbols after the opening tag with their html entities // and push the result to output endl( html_entities( ll[0] . substring(open + 8) ) ); // reset the starting point start=0; // if closing tag stands after the opening ( .. < nowiki > .. < / nowiki > .. ) } else if ( close > open) { // replace special html symbols between the nowiki tags with their html entities ps( html_entities( ll[0] . substring(open + 8, close) ) ); // close opened span indicating the borders of each nowiki container ps( '</span class="nowiki_' + InstaView.nestlev-- + '">' ); // and shift the starting point to 9 simbols after closing tag position start = close + 9; } } //alert('Current line:\n' + ll[0] + // '\n InstaView.nestlev = ' + InstaView.nestlev + '\n open = ' + open + '\n close = ' + close ); var after = ll[0] . substring( start ); // if the line doesn't contain's nowiki tags... if ( !/<\/?nowiki>/i . test( after ) ) { // if inside the multiline nowiki replace special html symbols (<, >, &) // before the opening tag with their html entities otherwise paste the text as it is; // then push the result to output and shift the array of lines endl( (InstaView.nestlev) ? html_entities(after) : after ); } } //alert('nowiki ended:\n' + o); } function parse_pre () { function doInPre( $0, $1, $2, $3) { if ($2 == '<pre>') return $1 + $2 + html_entities($3); return html_entities($1) + $2 + $3; } if ( !remain() ) return; if ( ll[0].match('<pre>') && !InstaView.inPre ) { InstaView.inPre = true; endl( ll[0].replace(/(.*?)(<pre>)(.*)/i, doInPre) ); } while ( InstaView.inPre ) { parse_nowiki(); if ( !remain() ) return; if ( !ll[0].match('</'+'pre>') ) endl( html_entities(ll[0]) ); else { InstaView.inPre = false; endl( ll[0].replace(/(.*?)(<\/pre>)(.*)/i, doInPre) ); } } //alert('preformatted line passed:\n' + o); } /* function dont_parse() { var openmatch = '(<html|<table|<blockquote|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<p|<ul|<ol|<li' + //.'|<\\/center|<tr|<\\/tr|<\\/td|<\\/th'+ ')'; var closematch = '(<\\/html|<\\/table|<\\/blockquote|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|' + //.'<td|<th|<div|<\\/div|<hr|<\\/pre|<center|' + '<\\/p|<\\/li|<\\/ul|<\\/ol); var patterns = [openmatch, closematch] if (!remain()) return true; //alert(' Open Match: ' + openmatch.test(ll[0]) + '\n Close Match: ' + closematch.test(ll[0]) ) op_part = RegExp( '^\\s*' + openmatch + '.*$', 'i' ); cl_pat = RegExp( '^.*' + closematch + '\\s*$', 'i' ); if ( $(op_pat) ) { //if (InstaView.BlockEl) ps('</'+'pre>'); ps('<pre>'); InstaView.BlockEl = true; ltrim(); //alert('Trimed line with html tag(s):\n' + ll[0]) endl(ll[0]); return true; } if ( $(cl_pat) ) { if (InstaView.BlockEl) ps('</'+'pre>'); InstaView.BlockEl = false; ltrim(); //alert('Trimed line with html tag(s):\n' + ll[0]) endl(ll[0]); return true; } if ( $(/^\s*\{\|/) ) { if (this.inPre) ps('</'+'pre>'); InstaView.BlockEl = false; parse_table(); return true; } return false; } */ function parse_list() { var prev=''; var DtOpen = false; function next(char) { switch (char) { case '*': case '#': ps('</li><li>'); //alert('Countinue "ol" or "ul"') break; case ';': ps( (DtOpen)?'</dt>':'</dd>' + '<dt>' ); //alert('Countinue "dl" - dt') break case ':': ps( (DtOpen)?'</dt>':'</dd>' + '<dd>' ); //alert('Countinue "dl" - dd') DtOpen = false; } } function close(pi) { if (pi=='*') { ps('</li></ul>'); //alert('Close "ul"') } else if (pi=='#') { ps('</li></ol>'); //alert('Close "ol"') } // close a dl only if the new item is not a dl item (:, ; or empty) else switch (l_match[1].charAt(i)) { case'':case'*':case'#': ps( (DtOpen)?'</dt>':'</dd>' + '</dl>' ); //alert('Close "dl"'); DtOpen = false; } //switch // print the line-break after each list's closing tag (if the line isn't last) //if (remain()>1) //alert('Close list at: '+ll[0]); ps(InstaView.br); return t; } function open(li) { if (li=='*') { ps('<ul><li>'); //alert('Start "ul"') } else if (li=='#') { ps('<ol><li>'); //alert('Start "ol"') } // do wee need the coparison at all? it don't work... (imho) else switch(prev.charAt(i)) { case'':case'*':case'#': // open a new dl only if it is needed for nesting: // don't add <dl> if the previous and the current // char are equal to ';' at the same time if ( (l_match[1].charAt(i-1) != ';') || (l_match[1].charAt(i) != ';') ) { ps('<dl>'); //alert('Start "dl"') if (li==':') ps('<dd>'); else { ps('<dt>'); DtOpen = true; } //else } // if } //else } while (remain() && $(/^([*#:;]+)(.*)$/)) { var l_match = $r sh() var ipos = str_imatch(prev, l_match[1]) // continue opened lists //if ( prev == l_match[1] ) next(l_match[1].charAt(l_match[1].length-1)) //else if ( l_match[1].length || prev.length ) { // close uncontinued lists for (var i=prev.length-1; i >= ipos; i--) ps( close(prev.charAt(i)) ); // open new lists for (var i=ipos; i<l_match[1].length; i++) open(l_match[1].charAt(i)); // continue unclosed lists %) if ( l_match[1].length <= ipos && ipos > 0 ) { //alert('Continue list: '+ l_match[1][ipos-1] + // ',\n cause the current prefix is shorter then previous') next( l_match[1][ipos-1] ); } // process the remaining wikitext prev=l_match[1] // Nested DL's solution - see bug #6569 [http://bugzilla.wikimedia.org/show_bug.cgi?id=6569] // ps( l_match[1].replace(/^.*?[;:]([;:]*)$/g, '$1') ) var dt_match var pr_char = prev.charAt(prev.length-1) // handle ;dt :dd format if ( (pr_char == ';') && (dt_match = l_match[2].match(/(.*?) (:.*?)$/) ) ) { ps(parse_inline_wiki(dt_match[1])) //alert(dt_match[1]) ll.unshift(dt_match[2]) } else { ps(parse_inline_wiki(l_match[2])); //alert(l_match[2]) } // print the line-break ps(InstaView.br); //} // else if } // while //alert(prev+', '+ipos) for (var i=prev.length-1; i >= ipos-1; i--) ps( close(prev.charAt(i)) ); } //function function parse_table() { ltrim(); //alert('ll[0] = "' + ll[0] + '"'); /**/endl(f('<table?>', $(/^\s*\{\|( .*)$/)? $r[1] : '' )); //ps(f('<table?>', $(/^\{\|( .*)$/)? $r[1]: '')); //sh(); while (remain()) { ltrim(); //alert('ll[0] = "' + ll[0] + '"'); if ($('|')) switch (_(1)) { // close table and parse inline wiki after it case '}': endl('</table>' + parse_inline_wiki(ll[0].substr(ll[0].indexOf('}')+1))); return case '-': endl(f('<tr ?>', $(/\|-*(.*)/)[1])); break default: parse_table_data() } else if ($('!')) parse_table_data() //else sh(); // add new line token and shift the array of lines /**/else endl('') } } function parse_table_data() { var td_line, match_i // 1: "|+", '|' or '!' // 2: Full string: // all the chars before the "|" and "[" but not "||" if such pattern exists in the string ; and any way - the rest of the line // (?: .. ) and (?! .. ) doesn't save the matches. So they aren't counted. // The first matches for pattern agter the colon but not saves it. // The second matches ig the pattern given after ! doesn't match the string. // So: // (?: .. )?(.*)$ - if the line before the '|' exists (see #3) return "substring"+"the_rest_of_the_line" ; otherwise return "the rest of the line" only // (?!\|) - not a "|" // 3: attribute - any minimal (maybe even 0-length) number of occurances of neither "[" nor "|" characters // 4: The rest of the line - can be less that #2 - any chars till the end of the line var td_match = sh().match(/^\s*(\|\+|\||!)((?:([^[|]*?)\|(?!\|))?(.*))$/) ltrim(); //alert('td_match =\n' + td_match); if (td_match[1] == '|+') ps('<caption'); else ps('<t' + ((td_match[1]=='|')?'d':'h')) // the was some attribute if (typeof td_match[3] != 'undefined') { ps(' ' + td_match[3]) match_i = 4 } else match_i = 2 /**/ps('>') //ps('>' + InstaView.br) if (td_match[1] != '|+') { // use || or !! as a cell separator depending on context // NOTE: when split() is passed a regexp make sure to use non-capturing brackets td_line = td_match[match_i].split((td_match[1] == '|')? '||': /(?:\|\||!!)/) ps(parse_inline_wiki(td_line.shift())) while (td_line.length) ll.unshift(td_match[1] + td_line.pop()) } else ps(td_match[match_i]) var tc = 0, td = [] while (remain()) { if ($('|')) { if (!tc) break // we're at the outer-most level (no nested tables), skip to td parse else if (_(1)=='}') tc-- } else if (!tc && $('!')) break else if ($('{|')) tc++ td.push(sh()); ltrim(); } if (td.length) ps(InstaView.convert(td)) //add closing <td> or <th> and new line token /**/ps('</t' + ((td_match[1]=='|')?'d':'h')+'>' + InstaView.br); } function parse_sp_lines() { //close paragraph if it was opened /**/endP();//p=0 ps('<pre>'); while (_(0)==' ' && remain()) { /*if ( !remain() || dont_parse() ) break; this.inPre = true; */ endl(parse_inline_wiki(ll[0].substring(1))); } if (this.inPre) ps('</'+'pre>'); } function parse_block_image() { //ps(parse_image(sh())) //add new line token /**/ps(parse_image(sh()) + InstaView.br) } function parse_image(str) { // get what's in between "[[Image:" and "]]" var tag = str.substring(InstaView.conf.locale.image.length + 3, str.length - 2); var width; var attr = [], filename, caption = ''; var thumb=0, frame=0, center=0; var align=''; if (tag.match(/\|/)) { // manage nested links var nesting = 0; var last_attr; for (var i = tag.length-1; i > 0; i--) { if (tag.charAt(i) == '|' && !nesting) { last_attr = tag.substr(i+1); tag = tag.substring(0, i); break; } else switch (tag.substr(i-1, 2)) { case ']]': nesting++; i--; break; case '[[': nesting--; i--; } } attr = tag.split(/\s*\|\s*/); attr.push(last_attr); filename = attr.shift(); var w_match; for (;attr.length; attr.shift()) if (w_match = attr[0].match(/^(\d*)px$/)) width = w_match[1] else switch(attr[0]) { case 'thumb': case 'thumbnail': thumb=true; case 'frame': frame=true; break; case 'none': case 'right': case 'left': center=false; align=attr[0]; break; case 'center': center=true; align='none'; break; default: if (attr.length == 1) caption = attr[0]; } } else filename = tag; var o=''; if (frame) { if (align=='') align = 'right'; o += f('<div class="thumb t?">', align); if (thumb) { if (!width) width = InstaView.conf.wiki.default_thumb_width; o += f('<div style="width:?px;">?', 2+width*1, make_image(filename, caption, width)) + f('<div class="thumbcaption"><div class="magnify" style="float:right"><a href="?" class="internal" title="Enlarge"><img src="?"></a></div>?</div>', InstaView.conf.paths.articles + InstaView.conf.locale.image + ':' + filename, InstaView.conf.paths.magnify_icon, parse_inline_wiki(caption) ) } else { o += '<div>' + make_image(filename, caption) + f('<div class="thumbcaption">?</div>', parse_inline_wiki(caption)) } o += '</div></div>'; } else if (align != '') { o += f('<div class="float?"><span>?</span></div>', align, make_image(filename, caption, width)); } else { return make_image(filename, caption, width); } //alert(width); return center? f('<div class="center">?</div>', o): o; } function make_image(filename, caption, width) { // uppercase first letter in file name filename = filename.charAt(0).toUpperCase() + filename.substr(1); // replace spaces with underscores filename = filename.replace(/ /g, '_'); caption = strip_inline_wiki(caption); var md5 = hex_md5(filename); var source = md5.charAt(0) + '/' + md5.substr(0,2) + '/' + filename; width = (width) ? 'width="' + width + '"' : ''; var img = f('<img src="?" ? longdesc="?" ? onerror="this.onerror=null;this.src=\'?\'">', InstaView.conf.paths.images + source, (caption!='')? 'alt="' + caption + '"' : '', InstaView.conf.paths.articles + InstaView.conf.locale.image + ':' + filename, width, InstaView.conf.paths.images_fallback + source); return f('<a href="?" class="image" ?>?</a>', InstaView.conf.paths.articles + InstaView.conf.locale.image + ':' + filename, (caption!='')? 'title="' + caption + '"' : '', img); } function parse_inline_images(str) { var start, substart=0, nestlev=0; var loop, close, open, wiki, html; while (-1 != (start=str.indexOf('[[', substart))) { if(str.substr(start+2).match(RegExp('^' + InstaView.conf.locale.image + ':','i'))) { loop=true; substart=start; do { substart+=2; close=str.indexOf(']]',substart); open=str.indexOf('[[',substart); if (close<=open||open==-1) { if (close==-1) return str; substart=close; if (nestlev) { nestlev--; } else { wiki=str.substring(start,close+2); html=parse_image(wiki); str=str.replace(wiki,html); substart=start+html.length; loop=false; } } else { substart=open; nestlev++; } } while (loop) } else break; } return str; } // the output of this function doesn't respect the FILO structure of HTML // but since most browsers can handle it I'll save myself the hassle function parse_inline_formatting(str) { var em,st,i,li,o=''; while ((i=str.indexOf("''",li))+1) { o += str.substring(li,i); li=i+2; if (str.charAt(i+2)=="'") { li++; st=!st; //o+=st?'<strong>':'</strong>'; //MW uses <b> and <i> /**/o+=st?'<b>':'</b>'; } else { em=!em; //o+=em?'<em>':'</em>'; /**/o+=em?'<i>':'</i>'; } } return o+str.substr(li); } function parse_inline_wiki(str) { var aux_match; str = parse_inline_images(str); str = parse_inline_formatting(str); // math while (aux_match = str.match(/<(?:)math>(.*?)<\/math>/i)) { var math_md5 = hex_md5(aux_match[1]); str = str.replace(aux_match[0], f('<img src="?.png">', InstaView.conf.paths.math+math_md5)); } // Build a Mediawiki-formatted date string var date = new Date; var minutes = date.getUTCMinutes(); if (minutes < 10) minutes = '0' + minutes; var date = f("?:?, ? ? ? (UTC)", date.getUTCHours(), minutes, date.getUTCDate(), InstaView.conf.locale.months[date.getUTCMonth()], date.getUTCFullYear()); //alert('Строка' + urlencode('Строка') ); // text formatting return str. // signatures replace(/~{5}(?!~)/g, date). replace(/~{4}(?!~)/g, InstaView.conf.user.name+' '+date). replace(/~{3}(?!~)/g, InstaView.conf.user.name). /* //********************************** // {{{ Variables }}} and {{{ Replaced | Variables }}} replace(/\{\{\{(.*?)(?:\|(.*?))?\}\}\}/g, this.replaceArguments). // {{ (Striped:)? Templates (| with_args )? }} replace(/\{\{([^\]]*?:)?(.*?)(?:\|(.*?))?\}\}/g, this.replaceTemplates). //********************************** */ // 2Do: Urlencode the article name in ''href'' attribute // [[:Category:...]], [[:Image:...]], etc... replace(RegExp('\\[\\[:((?:'+InstaView.conf.locale.category+'|'+InstaView.conf.locale.image+'|'+InstaView.conf.wiki.interwiki+'):.*?)\\]\\]','gi'), '<a href="'+InstaView.conf.paths.articles+'$1" title="$1">$1</a>'). replace(RegExp('\\[\\[('+InstaView.conf.locale.category+'|'+InstaView.conf.wiki.interwiki+'):(.*?)\\]\\]','gi'), '<span dir="ltr" style="display:none"><a href="'+InstaView.conf.paths.articles+'$1:$2" title="$1:$2">$2</a></span>'). // [[/Relative links]] replace(/\[\[(\/[^|]*?)\]\]/g, f('<a href="?$1" title="$1">$1</a>', location)). // [[/Replaced|Relative links]] replace(/\[\[(\/.*?)\|(.+?)\]\]/g, f('<a href="?$1" title="$1">$2</a>', location)). // Bug: Non-english words are ignored due to \w // Solved for cyrilic: \w -> [\wа-яё] // Bug: Upcase chars should be ignored, they dont // Solved: \w - > [a-z] // [[Common links]]with_trail replace(/\[\[([^|]*?)\]\]([a-zа-яё]*)/g, f('<a href="?$1" title="$1">$1$2</a>', InstaView.conf.paths.articles)). // [[Replaced|Links]]with_trail replace(/\[\[(.*?)\|([^\]]+?)\]\]([a-zа-яё]*)/g, f('<a href="?$1" title="$1">$2$3</a>', InstaView.conf.paths.articles)). // [[Stripped:Namespace|]] replace(/\[\[([^\]]*?:)?(.*?)( *\(.*?\))?\|\]\]/g, f('<a href="?$1$2$3" title="$1$2$3">$2</a>', InstaView.conf.paths.articles)). // External links replace(/\[(https?|news|ftp|mailto|gopher|irc):(\/*)([^\]]*?) (.*?)\]/g, '<a href="$1:$2$3">$4</a>'). replace(/\[http:\/\/(.*?)\]/g, '<a href="http://$1">[#]</a>'). replace(/\[(news|ftp|mailto|gopher|irc):(\/*)(.*?)\]/g, '<a href="$1:$2$3">$1:$2$3</a>'). replace(/(^| )(https?|news|ftp|mailto|gopher|irc):(\/*)([^ $]*)/g, '$1<a href="$2:$3$4">$2:$3$4</a>'). replace('__NOTOC__',''). replace('__NOEDITSECTION__',''); } function strip_inline_wiki(str) { return str .replace(/\[\[[^\]]*\|(.*?)\]\]/g,'$1') .replace(/\[\[(.*?)\]\]/g,'$1') .replace(/''(.*?)''/g,'$1'); } // begin parsing do { parse_nowiki(); parse_pre(); //dont_parse(); if (!remain()) break; //parse headings if ($(/^(={1,6})(.*)\1(.*)$/)) { //close paragraph if it was opened /**/endP();//p=0 endl(f('<h?>?</h?>?', $r[1].length, parse_inline_wiki($r[2]), $r[1].length, $r[3])) //alert('headings processed:\n' + o) //parse lists } else if ($(/^[*#:;]/)) { //close paragraph if it was opened /**/endP();//p=0 /**/ps(InstaView.br); parse_list() //alert('lists processed:\n' + o) //parse tables } else if ($(/^(?:\s*)\{\|/)) { //alert('ll[0] = "' + ll[0] + '"') //close paragraph if it was opened /**/endP();//p=0 parse_table() //alert('tables processed:\n' + o) //parse lines that start with space } else if ( _(0) == ' ') { parse_sp_lines() //alert('spaced lines processed:\n' + o) //do the <hr> } else if ($(/^----+$/)) { //close paragraph if it was opened /**/endP();//p=0 endl('<hr>') //alert('horizontal lines processed:\n' + o) //parse images } else if ($(InstaView.BLOCK_IMAGE)) { //close paragraph if it was opened /**/endP();//p=0 parse_block_image() //alert('images processed:\n' + o) } else { //escape < /pre > tag cause there's no opening tag and it should be treaten as text ll[0] = ll[0].replace( '</'+'pre>', '</pre>') // handle paragraphs if (trim(ll[0]) == '') { //blank line //if (p = (remain()>1 && ll[1]==(''))) endl('<p><br>') //if para was opened - close it //if 2 empty strings - add hard line break if ( endP() ) if ( remain()>1 ){ ps('<p>'); p=1; /**/ if (trim(ll[1])==('') ) /**/ {sh(); ps('<br>'); p=1} } else break; } else { if(!p) { ps('<p>') p=1 } if (remain()<2) { //alert('The last line:' + ll[0]); ps(parse_inline_wiki(ll[0])); endP(); break; } else ps(parse_inline_wiki(ll[0])); //add new line token //ps(parse_inline_wiki(ll[0]) + InstaView.br) //alert('Add inblockElem - in order to disable the creation of new paragraph' + //'for such elements as:\n tables, headings, lists and so on.' ); } //alert('paragraphs processed:\n' + o) //sh(); //add new line token and shift the array of lines /**/endl(''); } } while (remain()) //add closing </tr> /**/o = o.replace(/(<\/t[dh]>\s*)(<tr (.*)>|<\/table>)/gim, '$1</tr>'+ InstaView.br +'$2') /**/.replace(/<tr >/gim, '<tr>') //escape closing < / nowiki > tags /**/.replace(/<\/nowiki>/i, '</nowiki>'); CatLinks = o.match(/<span dir="ltr" style="display:none">(.*?)<\/span>/gim); if (CatLinks) { o += '<div id="catlinks"><p class="catlinks">'; for (i in CatLinks) { alert(CatLinks[i]); o += CatLinks[i].replace(' style="display:none"', ''); if ( i != CatLinks.length-1 ) { alert(i); alert(CatLinks.length-1); o += ' | ';} } o += '</p></div>'; } /*// - maybe faster, but don't work :(((. ShortExtLinks = o.match('[#]'); for (i in ShortExtLinks) { alert(ShortExtLinks[i] + ' ' + i); o.replace(/\[#\]/m, '[' + i + ']'); } */ i=0; while ( /\[#\]/m.test(o) ) o=o.replace(/\[#\]/m, '[' + ++i + ']'); //alert('"'+o+'"'); return o } //