From Wikipedia, the free encyclopedia
/*
WikiFunctions
Copyright (C) 2006 Martin Richards
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Configuration;
using System.Collections;
using System.Web;
[assembly: CLSCompliant(true)]
namespace WikiFunctions.Parse
{
/// <summary>
/// Provides functions for editting wiki text, such as formatting and re-categorisation.
/// </summary>
public class Parsers
{
#region constructor etc.
public Parsers()
{//default constructor
metaDataSorter = new MetaDataSorter(this);
MakeRegexes();
}
/// <summary>
/// Re-organises the Person Data, stub/disambig templates, categories and interwikis
/// </summary>
/// <param name="StubWordCount">The number of maximum number of words for a stub.</param>
public Parsers(int StubWordCount, bool AddHumanKey)
{
metaDataSorter = new MetaDataSorter(this);
StubMaxWordCount = StubWordCount;
addCatKey = AddHumanKey;
MakeRegexes();
}
private void MakeRegexes()
{
//look bad if changed
RegexUnicode.Add(new Regex("&(ndash|mdash|minus|times|lt|gt|nbsp|thinsp|shy|lrm|rlm|[Pp]rime);", RegexOptions.Compiled), "&$1;");
//IE6 does like these
RegexUnicode.Add(new Regex("&#(705|803|596|620|699|700|8652|9408|9848|12288|160|61|x27|39);", RegexOptions.Compiled), "&#$1;");
//Decoder doesn't like these
RegexUnicode.Add(new Regex("&#(x109[0-9A-Z]{2});", RegexOptions.Compiled), "&#$1;");
RegexUnicode.Add(new Regex("&#((?:277|119|84|x1D|x100)[A-Z0-9a-z]{2,3});", RegexOptions.Compiled), "&#$1;");
RegexUnicode.Add(new Regex("&#(x12[A-Za-z0-9]{3});", RegexOptions.Compiled), "&#$1;");
//interfere with wiki syntax
RegexUnicode.Add(new Regex("&#(126|x5D|x5B|x7b|x7c|x7d|0?9[13]|0?12[345]|0?0?3[92]);", RegexOptions.Compiled | RegexOptions.IgnoreCase), "&#$1;");
//not entity, but still wrong
RegexUnicode.Add(new Regex("(cm| m|mm|km|mi)<sup>2</sup>", RegexOptions.Compiled), "$1²");
RegexUnicode.Add(new Regex("(cm| m|mm|km|mi)<sup>3</sup>", RegexOptions.Compiled), "$1³");
RegexTagger.Add(new Regex("\\{\\{(template:)?(wikify|wikify-date|wfy|wiki)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{Wikify|February 2007}}");
RegexTagger.Add(new Regex("\\{\\{(template:)?(Clean ?up|CU|Clean|Tidy)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{Cleanup|February 2007}}");
RegexTagger.Add(new Regex("\\{\\{(template:)?(Linkless|Orphan)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{Linkless|February 2007}}");
RegexTagger.Add(new Regex("\\{\\{(template:)?(Uncategori[sz]ed|Uncat|Classify|Category needed|Catneeded|categori[zs]e|nocats?)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{Uncategorized|February 2007}}");
RegexTagger.Add(new Regex("\\{\\{(template:)?(Unreferenced|add references|cite[ -]sources?|cleanup-sources?|needs? references|no sources|no references?|not referenced|references|sources|unref|Unreferencedsect|unsourced)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{Unreferenced|date=February 2007}}");
RegexConversion.Add(new Regex("\\{\\{(?:Template:)?(Dab|Disamb|Disambiguation)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{Disambig}}");
RegexConversion.Add(new Regex("\\{\\{(?:Template:)?(2cc|2LAdisambig|2LCdisambig|2LC)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{2CC}}");
RegexConversion.Add(new Regex("\\{\\{(?:Template:)?(3cc|3LW|Tla|Tla-dab|TLA-disambig|TLAdisambig|3LC)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{3CC}}");
RegexConversion.Add(new Regex("\\{\\{(?:Template:)?(4cc|4LW|4LA|4LC)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{4CC}}");
RegexConversion.Add(new Regex("\\{\\{(?:Template:)?(Bio-dab|Hndisambig)", RegexOptions.IgnoreCase | RegexOptions.Compiled), "{{Hndis");
RegexConversion.Add(new Regex("\\{\\{(?:Template:)?(Prettytable|Prettytable100|Pt)\\}\\}", RegexOptions.IgnoreCase | RegexOptions.Compiled), "class="wikitable"");
RegexConversion.Add(new Regex("\\{\\{(?:[Tt]emplate:)?(PAGENAMEE?\\}\\}|[Ll]ived\\||[Bb]io-cats\\|)", RegexOptions.Compiled), "{{subst:$1");
RegexConversion.Add(new Regex(@"\{\{[Ll]ife(?:time|span)\|([0-9]{4})\|([0-9]{4})\|(.*?)\}\}", RegexOptions.Compiled), "[[Category:$1 births|$3]]\r\n[[Category:$2 deaths|$3]]");
RegexConversion.Add(new Regex(@"\{\{[Ll]ife(?:time|span)\|\|([0-9]{4})\|(.*?)\}\}", RegexOptions.Compiled), "[[Category:Year of birth unknown|$2]]\r\n[[Category:$1 deaths|$2]]");
RegexConversion.Add(new Regex(@"\{\{[Ll]ife(?:time|span)\|([0-9]{4})\|\|(.*?)\}\}", RegexOptions.Compiled), "[[Category:$1 births|$2]]\r\n[[Category:Year of death unknown|$2]]");
}
Dictionary<Regex, string> RegexUnicode = new Dictionary<Regex, string>();
Dictionary<Regex, string> RegexConversion = new Dictionary<Regex, string>();
Dictionary<Regex, string> RegexTagger = new Dictionary<Regex, string>();
HideText hider = new HideText();
MetaDataSorter metaDataSorter;
string testText = "";
int StubMaxWordCount = 500;
/// <summary>
/// Sort interwiki link order
/// </summary>
public bool sortInterwikiOrder
{
get { return boolInterwikiOrder; }
set { boolInterwikiOrder = value; }
}
private bool boolInterwikiOrder = true;
/// <summary>
/// The interwiki link order to use
/// </summary>
public InterWikiOrderEnum InterWikiOrder
{
set { metaDataSorter.InterWikiOrder = value; }
get { return metaDataSorter.InterWikiOrder; }
}
/// <summary>
/// When set to true, adds key to categories (for people only) when parsed
/// </summary>
public bool addCatKey
{
get { return boolAddCatKey; }
set { boolAddCatKey = value; }
}
private bool boolAddCatKey = false;
#endregion
#region General Parse
/// <summary>
/// Re-organises the Person Data, stub/disambig templates, categories and interwikis
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="ArticleTitle">The article title.</param>
/// <param name="sortWikis">True, sort interwiki order per pywiki bots, false keep current order.</param>
/// <returns>The re-organised text.</returns>
public string SortMetaData(string ArticleText, string ArticleTitle)
{
return metaDataSorter.Sort(ArticleText, ArticleTitle);
}
readonly Regex regexFixDates0 = new Regex("([12][0-9][0-9]0)'s", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexFixDates1 = new Regex("(January|February|March|April|May|June|July|August|September|October|November|December) ([1-9][0-9]?)(?:st|nd|rd|th)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexFixDates2 = new Regex("([1-9][0-9]?)(?:st|nd|rd|th) (January|February|March|April|May|June|July|August|September|October|November|December)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadings0 = new Regex("(== ?)(see also:?|related topics:?|related articles:?|internal links:?|also see:?)( ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadings1 = new Regex("(== ?)(external links:?|external sites:?|outside links|web ?links:?|exterior links:?)( ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadings2 = new Regex("(== ?)(external link:?|external site:?|web ?link:?|exterior link:?)( ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadings3 = new Regex("(== ?)(reference:?)(s? ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadings4 = new Regex("(== ?)(source:?)(s? ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadings5 = new Regex("(== ?)(further readings?:?)( ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadings6 = new Regex("(== ?)(Early|Personal|Adult|Later) Life( ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadings7 = new Regex("(== ?)(Current|Past|Prior) Members( ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex regexHeadingsCareer = new Regex("(== ?)([a-zA-Z]+) Career( ?==)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex RegexBadHeader = new Regex("^(={1,4} ?(about|description|overview|definition|profile|(?:general )?information|background|intro(?:duction)?|summary|bio(?:graphy)?) ?={1,4})", RegexOptions.IgnoreCase | RegexOptions.Compiled);
/// <summary>
/// Fix ==See also== and similar section common errors.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The modified article text.</returns>
public string FixHeadings(string ArticleText, string ArticleTitle, out bool NoChange)
{
testText = ArticleText;
ArticleText = FixHeadings(ArticleText, ArticleTitle);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText.Trim();
}
/// <summary>
/// Fix ==See also== and similar section common errors. Removes unecessary introductary headings.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The modified article text.</returns>
public string FixHeadings(string ArticleText, string ArticleTitle)
{
ArticleText = Regex.Replace(ArticleText, "^={1,4} ?" + Regex.Escape(ArticleTitle) + " ?={1,4}", "", RegexOptions.IgnoreCase);
ArticleText = RegexBadHeader.Replace(ArticleText, "");
if (!Regex.IsMatch(ArticleText, "= ?See also ?="))
ArticleText = regexHeadings0.Replace(ArticleText, "$1See also$3");
ArticleText = regexHeadings1.Replace(ArticleText, "$1External links$3");
ArticleText = regexHeadings2.Replace(ArticleText, "$1External link$3");
ArticleText = regexHeadings3.Replace(ArticleText, "$1Reference$3");
ArticleText = regexHeadings4.Replace(ArticleText, "$1Source$3");
ArticleText = regexHeadings5.Replace(ArticleText, "$1Further reading$3");
ArticleText = regexHeadings6.Replace(ArticleText, "$1$2 life$3");
ArticleText = regexHeadings7.Replace(ArticleText, "$1$2 members$3");
ArticleText = regexHeadingsCareer.Replace(ArticleText, "$1$2 career$3");
return ArticleText;
}
/// <summary>
/// Fix date and decade formatting errors.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The modified article text.</returns>
public string FixDates(string ArticleText)
{
ArticleText = regexFixDates0.Replace(ArticleText, "$1s");
ArticleText = regexFixDates1.Replace(ArticleText, "$1 $2");
ArticleText = regexFixDates2.Replace(ArticleText, "$1 $2");
return ArticleText;
}
/// <summary>
/// Footnote formatting errors per [[WP:FN]].
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The modified article text.</returns>
public string FixFootnotes(string ArticleText)
{
string factTag = "({{[ ]*fact[ ]*}}|{{[ ]*fact[ ]*[\\|][^}]*}}|{{[ ]*facts[ ]*}}|{{[ ]*citequote[ ]*}}|{{[ ]*citation needed[ ]*}}|{{[ ]*cn[ ]*}}|{{[ ]*verification needed[ ]*}}|{{[ ]*verify source[ ]*}}|{{[ ]*verify credibility[ ]*}}|{{[ ]*who[ ]*}}|{{[ ]*failed verification[ ]*}}|{{[ ]*nonspecific[ ]*}}|{{[ ]*dubious[ ]*}}|{{[ ]*or[ ]*}}|{{[ ]*lopsided[ ]*}}|{{[ ]*GR[ ]*[\\|][ ]*[^ ]+[ ]*}}|{{[ ]*[c]?r[e]?f[ ]*[\\|][^}]*}}|{{[ ]*ref[ _]label[ ]*[\\|][^}]*}}|{{[ ]*ref[ _]num[ ]*[\\|][^}]*}})";
ArticleText = Regex.Replace(ArticleText, "\n\r\f\t ]+?"+factTag, "$1");
// One space/linefeed
ArticleText = Regex.Replace(ArticleText, "[\\n\\r\\f\\t ]+?<ref([ >])", "<ref$1");
// remove trailing spaces from named refs
ArticleText = Regex.Replace(ArticleText, ">ref ([^>]*[^>])[ ]*>", "<ref $1>");
// removed superscripted punctuation between refs
ArticleText = Regex.Replace(ArticleText, "(</ref>|<ref[^>]*?/>)<sup>[ ]*[,;-]?[ ]*</sup><ref", "$1<ref");
ArticleText = Regex.Replace(ArticleText, "(</ref>|<ref[^>]*?/>)[ ]*[,;-]?[ ]*<ref", "$1<ref");
string LacksPunctuation = "([^\\.,;:!\\?\"'’])";
string QuestionOrExclam = "([!\\?])";
string MinorPunctuation = "([\\.,;:])";
string AnyPunctuation = "([\\.,;:!\\?])";
string MajorPunctuation = "([,;:!\\?])";
string Period = "([\\.])";
string Quote = "([\"'’]*)";
string Space = "[ ]*";
string RefTag1 = "(<ref>([^<]|<[^/]|</[^r]|</r[^e]|</re[^f]|</ref[^>])*?</ref>)";
string RefTag2 = "(<ref[^>]*?[^/]>([^<]|<[^/]|</[^r]|</r[^e]|</re[^f]|</ref[^>])*?</ref>)";
string RefTag3 = "(<ref[^>]*?/>)";
string match0a = LacksPunctuation + Quote + factTag + Space + AnyPunctuation;
string match0b = QuestionOrExclam + Quote + factTag + Space + MajorPunctuation;
string match0c = MinorPunctuation + Quote + factTag + Space + AnyPunctuation;
string match0d = QuestionOrExclam + Quote + factTag + Space + Period;
string match1a = LacksPunctuation + Quote + RefTag1 + Space + AnyPunctuation;
string match1b = QuestionOrExclam + Quote + RefTag1 + Space + MajorPunctuation;
string match1c = MinorPunctuation + Quote + RefTag1 + Space + AnyPunctuation;
string match1d = QuestionOrExclam + Quote + RefTag1 + Space + Period;
string match2a = LacksPunctuation + Quote + RefTag2 + Space + AnyPunctuation;
string match2b = QuestionOrExclam + Quote + RefTag2 + Space + MajorPunctuation;
string match2c = MinorPunctuation + Quote + RefTag2 + Space + AnyPunctuation;
string match2d = QuestionOrExclam + Quote + RefTag2 + Space + Period;
string match3a = LacksPunctuation + Quote + RefTag3 + Space + AnyPunctuation;
string match3b = QuestionOrExclam + Quote + RefTag3 + Space + MajorPunctuation;
string match3c = MinorPunctuation + Quote + RefTag3 + Space + AnyPunctuation;
string match3d = QuestionOrExclam + Quote + RefTag3 + Space + Period;
for (int j = 0; j < 10; j++) { // repeat for multiple refs together
ArticleText = Regex.Replace(ArticleText, match0a, "$1$2$4$3");
ArticleText = Regex.Replace(ArticleText, match0b, "$1$2$4$3");
ArticleText = Regex.Replace(ArticleText, match0c, "$2$4$3");
ArticleText = Regex.Replace(ArticleText, match0d, "$1$2$3");
ArticleText = Regex.Replace(ArticleText, match1a, "$1$2$5$3");
ArticleText = Regex.Replace(ArticleText, match1b, "$1$2$5$3");
ArticleText = Regex.Replace(ArticleText, match1c, "$2$5$3");
ArticleText = Regex.Replace(ArticleText, match1d, "$1$2$3");
ArticleText = Regex.Replace(ArticleText, match2a, "$1$2$5$3");
ArticleText = Regex.Replace(ArticleText, match2b, "$1$2$5$3");
ArticleText = Regex.Replace(ArticleText, match2c, "$2$5$3");
ArticleText = Regex.Replace(ArticleText, match2d, "$1$2$3");
ArticleText = Regex.Replace(ArticleText, match3a, "$1$2$4$3");
ArticleText = Regex.Replace(ArticleText, match3b, "$1$2$4$3");
ArticleText = Regex.Replace(ArticleText, match3c, "$2$4$3");
ArticleText = Regex.Replace(ArticleText, match3d, "$1$2$3");
}
return ArticleText;
}
/// <summary>
/// Applies removes some excess whitespace from the article
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The modified article text.</returns>
public static string RemoveWhiteSpace(string ArticleText)
{
ArticleText = Regex.Replace(ArticleText, "\r\n(\r\n)+", "\r\n\r\n");
ArticleText = Regex.Replace(ArticleText, "== ? ?\r\n\r\n==", "==\r\n==");
ArticleText = ArticleText.Replace("\r\n\r\n(* ?\\[?http)", "\r\n$1");
ArticleText = Regex.Replace(ArticleText.Trim(), "----+$", "");
ArticleText = Regex.Replace(ArticleText.Trim(), "<br ?/?>$", "", RegexOptions.IgnoreCase);
return ArticleText.Trim();
}
/// <summary>
/// Applies removes all excess whitespace from the article
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The modified article text.</returns>
public string RemoveAllWhiteSpace(string ArticleText)
{//removes all whitespace
ArticleText = ArticleText.Replace("\t", " ");
ArticleText = RemoveWhiteSpace(ArticleText);
ArticleText = ArticleText.Replace("\r\n\r\n*", "\r\n*");
ArticleText = Regex.Replace(ArticleText, " +", " ");
ArticleText = Regex.Replace(ArticleText, " \r\n", "\r\n");
ArticleText = Regex.Replace(ArticleText, "==\r\n\r\n", "==\r\n");
//fix bullet points
ArticleText = Regex.Replace(ArticleText, "^([\\*#]+) ", "$1", RegexOptions.Multiline);
ArticleText = Regex.Replace(ArticleText, "^([\\*#]+)", "$1 ", RegexOptions.Multiline);
//fix heading space
ArticleText = Regex.Replace(ArticleText, "^(={1,4}) ?(.*?) ?(={1,4})$", "$1$2$3", RegexOptions.Multiline);
//fix dash spacing
ArticleText = Regex.Replace(ArticleText, " ?(–|—|[01];|&[nm]dash;|̵[12];|ȁ[34];) ?", "$1");
ArticleText = Regex.Replace(ArticleText, "(—|—|—|—|—|–|–|–|–|–)", " $1 ");
return ArticleText.Trim();
}
/// <summary>
/// Fixes and improves syntax (such as html markup)
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The modified article text.</returns>
public string FixSyntax(string ArticleText, out bool NoChange)
{
testText = ArticleText;
ArticleText = FixSyntax(ArticleText);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
readonly Regex SyntaxRegex1 = new Regex("\\[\\[http:\\/\\/([^][]*?)\\]", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex SyntaxRegex2 = new Regex("\\[http:\\/\\/([^][]*?)\\]\\]", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex SyntaxRegex3 = new Regex("\\[\\[http:\\/\\/(.*?)\\]\\]", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex SyntaxRegex4 = new Regex("\\[\\[([^][]*?)\\]([^][][^\\]])", RegexOptions.Compiled);
readonly Regex SyntaxRegex5 = new Regex("([^][])\\[([^][]*?)\\]\\]([^\\]])", RegexOptions.Compiled);
readonly Regex SyntaxRegex6 = new Regex("\\[?\\[image:(http:\\/\\/.*?)\\]\\]?", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex SyntaxRegex7 = new Regex("\\[\\[ (.*)?\\]\\]", RegexOptions.Compiled);
readonly Regex SyntaxRegex8 = new Regex("\\[\\[([A-Za-z]*) \\]\\]", RegexOptions.Compiled);
readonly Regex SyntaxRegex9 = new Regex("\\[\\[(.*)?_#(.*)\\]\\]", RegexOptions.Compiled);
readonly Regex SyntaxRegexTemplate = new Regex("(\\{\\{[\\s]*)[Tt]emplate:(.*?\\}\\})", RegexOptions.Singleline | RegexOptions.Compiled);
readonly Regex SyntaxRegex11 = new Regex("^((#|\\*).*?)<br ?/?>\r\n", RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex SyntaxRegexItalic = new Regex("<i>(.*?)</i>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
readonly Regex SyntaxRegexBold = new Regex("<b>(.*?)</b>", RegexOptions.IgnoreCase | RegexOptions.Compiled);
/// <summary>
/// Fixes and improves syntax (such as html markup)
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The modified article text.</returns>
public string FixSyntax(string ArticleText)
{
//replace html with wiki syntax
if (!Regex.IsMatch(ArticleText, "'</?[ib]>|</?[ib]>'", RegexOptions.IgnoreCase))
{
ArticleText = SyntaxRegexItalic.Replace(ArticleText, "''$1''");
ArticleText = SyntaxRegexBold.Replace(ArticleText, "'''$1'''");
}
ArticleText = Regex.Replace(ArticleText, "^<hr>|^----+", "----", RegexOptions.Multiline);
//remove appearance of double line break
ArticleText = Regex.Replace(ArticleText, "(^==?[^=]*==?)\r\n(\r\n)?----+", "$1", RegexOptions.Multiline);
//remove unnecessary namespace
ArticleText = SyntaxRegexTemplate.Replace(ArticleText, "$1$2");
//remove <br> from lists
ArticleText = SyntaxRegex11.Replace(ArticleText, "$1\r\n");
//can cause problems
//ArticleText = Regex.Replace(ArticleText, "^<[Hh]2>(.*?)</[Hh]2>", "==$1==", RegexOptions.Multiline);
//ArticleText = Regex.Replace(ArticleText, "^<[Hh]3>(.*?)</[Hh]3>", "===$1===", RegexOptions.Multiline);
//ArticleText = Regex.Replace(ArticleText, "^<[Hh]4>(.*?)</[Hh]4>", "====$1====", RegexOptions.Multiline);
//fix uneven bracketing on links
if (!Regex.IsMatch(ArticleText, "\\[\\[[Ii]mage:[^]]*http"))
{
ArticleText = SyntaxRegex1.Replace(ArticleText, "[http://$1]");
ArticleText = SyntaxRegex2.Replace(ArticleText, "[http://$1]");
ArticleText = SyntaxRegex3.Replace(ArticleText, "[http://$1]");
ArticleText = SyntaxRegex4.Replace(ArticleText, "[[$1]]$2");
ArticleText = SyntaxRegex5.Replace(ArticleText, "$1[[$2]]$3");
}
//repair bad external links
ArticleText = SyntaxRegex6.Replace(ArticleText, "[$1]");
//repair bad internal links
ArticleText = SyntaxRegex7.Replace(ArticleText, "[[$1]]");
ArticleText = SyntaxRegex8.Replace(ArticleText, "[[$1]]");
ArticleText = SyntaxRegex9.Replace(ArticleText, "[[$1#$2]]");
ArticleText = Regex.Replace(ArticleText, "ISBN: ?([0-9])", "ISBN $1");
return ArticleText.Trim();
}
/// <summary>
/// Fixes link syntax
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The modified article text.</returns>
public string FixLinks(string ArticleText, out bool NoChange)
{
testText = ArticleText;
string y = "";
string cat = "[[" + Variables.Namespaces[14];
foreach (Match m in WikiRegexes.SimpleWikiLink.Matches(ArticleText))
{
if (!m.Value.StartsWith(cat) && !m.Value.StartsWith("[[Image:") && !m.Value.StartsWith("[[image:") && !m.Value.StartsWith("[[_") && !m.Value.Contains("|_"))
{
y = m.Value.Replace("_", " ");
y = Regex.Replace(y, " ?\\| ?", "|");
}
else
y = m.Value;
y = y.Replace("+", "%2B");
y = HttpUtility.UrlDecode(y);
ArticleText = ArticleText.Replace(m.Value, y);
}
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Simplifies some links in article wiki text such as changing [[Dog|Dogs]] to [[Dog]]s
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The simplified article text.</returns>
public string LinkSimplifier(string ArticleText, out bool NoChange)
{
testText = ArticleText;
ArticleText = LinkSimplifier(ArticleText);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Simplifies some links in article wiki text such as changing [[Dog|Dogs]] to [[Dog]]s
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The simplified article text.</returns>
public string LinkSimplifier(string ArticleText)
{
string n = "";
string a = "";
string b = "";
string k = "";
foreach (Match m in WikiRegexes.PipedWikiLink.Matches(ArticleText))
{
n = m.Value;
a = m.Groups[1].Value;
b = m.Groups[2].Value;
if (a == b || Tools.TurnFirstToLower(a) == b)
{
k = WikiRegexes.PipedWikiLink.Replace(n, "[[$2]]");
ArticleText = ArticleText.Replace(n, k);
}
else if (a + "s" == b || Tools.TurnFirstToLower(a) + "s" == b)
{
k = WikiRegexes.PipedWikiLink.Replace(n, "$2");
k = "[[" + k.Substring(0, k.Length - 1) + "]]s";
ArticleText = ArticleText.Replace(n, k);
}
}
return ArticleText;
}
/// <summary>
/// Adds bullet points to external links after "external links" header
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The modified article text.</returns>
public string BulletExternalLinks(string ArticleText, out bool NoChange)
{
testText = ArticleText;
ArticleText = BulletExternalLinks(ArticleText);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Adds bullet points to external links after "external links" header
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The modified article text.</returns>
public string BulletExternalLinks(string ArticleText)
{
int intStart = 0;
string ArticleTextSubstring = "";
Match m = Regex.Match(ArticleText, "= ? ?external links? ? ?=", RegexOptions.IgnoreCase | RegexOptions.RightToLeft);
if (!m.Success)
return ArticleText;
intStart = m.Index;
ArticleTextSubstring = ArticleText.Substring(intStart);
ArticleText = ArticleText.Substring(0, intStart);
ArticleTextSubstring = Regex.Replace(ArticleTextSubstring, "(\r\n)?(\r\n)(\\[?http)", "$2* $3");
ArticleText += ArticleTextSubstring;
return ArticleText;
}
public string FixCategories(string ArticleText)
{//Fix common spacing/capitalisation errors in categories
Regex catregex = new Regex("\\[\\[ ?" + Variables.NamespacesCaseInsensitive[14] + " ?(.*?)\\]\\]");
string cat = "[[" + Variables.Namespaces[14];
string x = "";
foreach (Match m in catregex.Matches(ArticleText))
{
x = cat + m.Groups[1].Value.Replace("_", " ") + "]]";
ArticleText = ArticleText.Replace(m.Value, x);
}
return ArticleText;
}
#endregion
#region other functions
/// <summary>
/// Converts HTML entities to unicode, with some deliberate exceptions
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The modified article text.</returns>
public string Unicodify(string ArticleText, out bool NoChange)
{
testText = ArticleText;
ArticleText = Unicodify(ArticleText);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Converts HTML entities to unicode, with some deliberate exceptions
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The modified article text.</returns>
public string Unicodify(string ArticleText)
{
if (Regex.IsMatch(ArticleText, "<[Mm]ath>"))
return ArticleText;
ArticleText = Regex.Replace(ArticleText, "–|–|–", "–");
ArticleText = Regex.Replace(ArticleText, "—|—|—", "—");
ArticleText = ArticleText.Replace(" & ", " & ");
ArticleText = ArticleText.Replace("&", "&");
foreach (KeyValuePair<Regex, string> k in RegexUnicode)
{
ArticleText = k.Key.Replace(ArticleText, k.Value);
}
try
{
ArticleText = HttpUtility.HtmlDecode(ArticleText);
}
catch (Exception ex)
{
System.Windows.Forms.MessageBox.Show(ex.ToString());
}
return ArticleText;
}
/// <summary>
/// '''Emboldens''' the first occurence of the title, if it isnt already
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="ArticleTitle">The title of the article.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The modified article text.</returns>
public string BoldTitle(string ArticleText, string ArticleTitle, out bool NoChange)
{
//ignore date articles
if (WikiRegexes.Dates2.IsMatch(ArticleTitle))
{
NoChange = true;
return ArticleText;
}
string escTitle = Regex.Escape(ArticleTitle);
//remove self links first
Regex tregex = new Regex("\\[\\[(" + Tools.CaseInsensitive(escTitle) + ")\\]\\]");
if (!ArticleText.Contains("'''"))
{
ArticleText = tregex.Replace(ArticleText, "'''$1'''", 1);
}
else
{
ArticleText = ArticleText.Replace("[[" + ArticleTitle + "]]", ArticleTitle);
ArticleText = ArticleText.Replace("[[" + Tools.TurnFirstToLower(ArticleTitle) + "]]", Tools.TurnFirstToLower(ArticleTitle));
}
if (Regex.IsMatch(ArticleText, "^(\\[\\[|\\*|:)") || Regex.IsMatch(ArticleText, "''' ?" + escTitle + " ?'''", RegexOptions.IgnoreCase))
{
NoChange = true;
return ArticleText;
}
ArticleText = hider.HideMore(ArticleText);
escTitle = Regex.Replace(ArticleTitle, " \\(.*?\\)$", "");
escTitle = Regex.Escape(escTitle);
Regex regexBold = new Regex("([^\\[]|^)(" + escTitle + ")([ ,.:;])", RegexOptions.IgnoreCase);
string strSecondHalf = "";
if (ArticleText.Length > 80)
{
strSecondHalf = ArticleText.Substring(80);
ArticleText = ArticleText.Substring(0, 80);
}
if (ArticleText.Contains("'''"))
{
ArticleText = ArticleText + strSecondHalf;
ArticleText = hider.AddBackMore(ArticleText);
NoChange = true;
return ArticleText;
}
if (regexBold.IsMatch(ArticleText))
{
NoChange = false;
ArticleText = regexBold.Replace(ArticleText, "$1'''$2'''$3", 1);
}
else
NoChange = true;
ArticleText = ArticleText + strSecondHalf;
ArticleText = hider.AddBackMore(ArticleText);
return ArticleText;
}
/// <summary>
/// Replaces an iamge in the article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="OldImage">The old image to replace.</param>
/// <param name="NewImage">The new image.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The new article text.</returns>
public string ReplaceImage(string OldImage, string NewImage, string ArticleText, out bool NoChange)
{
testText = ArticleText;
ArticleText = ReplaceImage(OldImage, NewImage, ArticleText);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Replaces an iamge in the article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="OldImage">The old image to replace.</param>
/// <param name="NewImage">The new image.</param>
/// <returns>The new article text.</returns>
public string ReplaceImage(string OldImage, string NewImage, string ArticleText)
{
//remove image prefix
OldImage = Regex.Replace(OldImage, "^" + Variables.Namespaces[6], "", RegexOptions.IgnoreCase).Replace("_", " ");
NewImage = Regex.Replace(NewImage, "^" + Variables.Namespaces[6], "", RegexOptions.IgnoreCase).Replace("_", " ");
OldImage = Regex.Escape(OldImage).Replace("\\ ", "[ _]");
OldImage = Variables.NamespacesCaseInsensitive[6] + Tools.CaseInsensitive(OldImage);
NewImage = Variables.Namespaces[6] + NewImage;
ArticleText = Regex.Replace(ArticleText, OldImage, NewImage);
return ArticleText;
}
/// <summary>
/// Removes an iamge in the article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="Image">The image to remove.</param>
/// <returns>The new article text.</returns>
public string RemoveImage(string Image, string ArticleText, bool CommentOut, string Comment)
{
//remove image prefix
Image = Regex.Replace(Image, "^" + Variables.Namespaces[6], "", RegexOptions.IgnoreCase).Replace("_", " ");
Image = Regex.Escape(Image).Replace("\\ ", "[ _]");
Image = Tools.CaseInsensitive(Image);
Regex r = new Regex("\\[\\[" + Variables.NamespacesCaseInsensitive[6] + Image + ".*\\]\\]");
MatchCollection n = r.Matches(ArticleText);
if (n.Count > 0)
{
foreach (Match m in n)
{
string match = m.Value;
int i = 0;
int j = 0;
foreach (char c in match)
{
if (c == '[')
j++;
else if (c == ']')
j--;
i++;
if (j == 0)
{
if (match.Length > i)
match = match.Remove(i);
Regex t = new Regex(Regex.Escape(match));
if (CommentOut)
ArticleText = t.Replace(ArticleText, "<!-- " + Comment + " " + match + " -->", 1, m.Index);
else
ArticleText = t.Replace(ArticleText, "", 1);
break;
}
}
}
}
else
{
r = new Regex("(" + Variables.NamespacesCaseInsensitive[6] + ")?" + Image);
n = r.Matches(ArticleText);
foreach (Match m in n)
{
Regex t = new Regex(Regex.Escape(m.Value));
if (CommentOut)
ArticleText = t.Replace(ArticleText, "<!-- " + Comment + " $0 -->", 1, m.Index);
else
ArticleText = t.Replace(ArticleText, "", 1, m.Index);
}
}
return ArticleText;
}
/// <summary>
/// Removes an iamge in the article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="OldImage">The image to remove.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The new article text.</returns>
public string RemoveImage(string Image, string ArticleText, bool CommentOut, string Comment, out bool NoChange)
{
testText = ArticleText;
ArticleText = RemoveImage(Image, ArticleText, CommentOut, Comment);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Adds the category to the article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NewCategory">The new category.</param>
/// <returns>The article text.</returns>
public string AddCategory(string NewCategory, string ArticleText, string ArticleTitle, out bool NoChange)
{
testText = ArticleText;
ArticleText = AddCategory(NewCategory, ArticleText, ArticleTitle);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Adds the category to the article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NewCategory">The new category.</param>
/// <returns>The article text.</returns>
public string AddCategory(string NewCategory, string ArticleText, string ArticleTitle)
{
if (Regex.IsMatch(ArticleText, "\\[\\[ ?[Cc]ategory ?: ?" + Regex.Escape(NewCategory)))
return ArticleText;
string cat = "\r\n[[" + Variables.Namespaces[14] + NewCategory + "]]";
cat = Tools.ApplyKeyWords(ArticleTitle, cat);
if (ArticleTitle.StartsWith(Variables.Namespaces[10]))
ArticleText += "<noinclude>" + cat + "\r\n</noinclude>";
else
ArticleText += cat;
return ArticleText;
}
/// <summary>
/// Re-categorises the article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="OldCategory">The old category to replace.</param>
/// <param name="NewCategory">The new category.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The re-categorised article text.</returns>
public string ReCategoriser(string OldCategory, string NewCategory, string ArticleText, out bool NoChange)
{
//remove category prefix
OldCategory = Regex.Replace(OldCategory, "^" + Variables.Namespaces[14], "", RegexOptions.IgnoreCase);
NewCategory = Regex.Replace(NewCategory, "^" + Variables.Namespaces[14], "", RegexOptions.IgnoreCase);
//format categories properly
ArticleText = FixCategories(ArticleText);
testText = ArticleText;
if (Regex.IsMatch(ArticleText, "\\[\\[" + Variables.NamespacesCaseInsensitive[14] + Tools.CaseInsensitive(Regex.Escape(NewCategory)) + "( ?\\|| ?\\]\\])"))
{
ArticleText = RemoveCategory(OldCategory, ArticleText);
}
else
{
OldCategory = Regex.Escape(OldCategory);
OldCategory = Tools.CaseInsensitive(OldCategory);
OldCategory = Variables.Namespaces[14] + OldCategory + "( ?\\|| ?\\]\\])";
NewCategory = Variables.Namespaces[14] + NewCategory + "$1";
ArticleText = Regex.Replace(ArticleText, OldCategory, NewCategory);
}
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Removes a category from an article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="strOldCat">The old category to remove.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The article text without the old category.</returns>
public string RemoveCategory(string strOldCat, string ArticleText, out bool NoChange)
{
testText = ArticleText;
ArticleText = RemoveCategory(strOldCat, ArticleText);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Removes a category from an article.
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="strOldCat">The old category to remove.</param>
/// <returns>The article text without the old category.</returns>
public string RemoveCategory(string strOldCat, string ArticleText)
{
//format categories properly
ArticleText = FixCategories(ArticleText);
strOldCat = Regex.Escape(strOldCat);
strOldCat = Tools.CaseInsensitive(strOldCat);
//broken into two parts to avoid removal of newline when it's not desirable
string s = "\\[\\[" + Variables.NamespacesCaseInsensitive[14] + " ?" + strOldCat + "( ?\\]\\]| ?\\|[^\\|]*?\\]\\])\r\n\\[";
ArticleText = Regex.Replace(ArticleText, s, "[");
s = "\\[\\[" + Variables.NamespacesCaseInsensitive[14] + " ?" + strOldCat + "( ?\\]\\]| ?\\|[^\\|]*?\\]\\])";
ArticleText = Regex.Replace(ArticleText, s, "");
return ArticleText;
}
public string LivingPeople(string ArticleText, out bool NoChange)
{
NoChange = true;
testText = ArticleText;
if (Regex.IsMatch(ArticleText, "\\[\\[ ?Category ?:[ _]?([0-9]{1,2}[ _]century[ _]deaths|[0-9s]{4,5}[ _]deaths|Disappeared[ _]people|Living[ _]people|Year[ _]of[ _]death[ _]missing|Possibly[ _]living[ _]people)", RegexOptions.IgnoreCase))
return ArticleText;
Match m = Regex.Match(ArticleText, "\\[\\[ ?Category ?:[ _]?([0-9]{4})[ _]births(\\|.*?)?\\]\\]", RegexOptions.IgnoreCase);
if (!m.Success)
return ArticleText;
string birthCat = m.Value;
int birthYear = int.Parse(m.Groups[1].Value);
string catKey = "";
if (birthYear < 1910)
return ArticleText;
if (birthCat.Contains("|"))
catKey = Regex.Match(birthCat, "\\|.*?\\]\\]").Value;
else
catKey = "]]";
ArticleText += "[[Category:Living people" + catKey;
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Converts/subst'd some deprecated templates
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="NoChange">Value that indicated whether no change was made.</param>
/// <returns>The new article text.</returns>
public string Conversions(string ArticleText, out bool NoChange)
{
testText = ArticleText;
ArticleText = Conversions(ArticleText);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// Converts/subst'd some deprecated templates
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The new article text.</returns>
public string Conversions(string ArticleText)
{
//Use proper codes
ArticleText = ArticleText.Replace("[[zh-tw:", "[[zh:");
ArticleText = ArticleText.Replace("[[nb:", "[[no:");
ArticleText = ArticleText.Replace("[[dk:", "[[da:");
ArticleText = ArticleText.Replace("{{msg:", "{{");
foreach (KeyValuePair<Regex, string> k in RegexConversion)
{
ArticleText = k.Key.Replace(ArticleText, k.Value);
}
return ArticleText;
}
/// <summary>
/// Subst'd some user talk templates
/// </summary>
/// <param name="TalPageText">The wiki text of the talk page.</param>
/// <returns>The new text.</returns>
public string SubstUserTemplates(string TalkPageText)
{
TalkPageText = Regex.Replace(TalkPageText, "\\{\\{(template:)?(test[n0-6]?[ab]?)\\}\\}", "{{subst:$2}}", RegexOptions.IgnoreCase);
TalkPageText = Regex.Replace(TalkPageText, "\\{\\{(template:)?(test[n0-6]?[ab]?-n\\|.*?)\\}\\}", "{{subst:$2}}", RegexOptions.IgnoreCase);
TalkPageText = Regex.Replace(TalkPageText, "\\{\\{(template:)?(3RR[0-5]?)\\}\\}", "{{subst:$2}}", RegexOptions.IgnoreCase);
TalkPageText = Regex.Replace(TalkPageText, "\\{\\{(template:)?(spam[0-5][ab]?)\\}\\}", "{{subst:$2}}", RegexOptions.IgnoreCase);
TalkPageText = Regex.Replace(TalkPageText, "\\{\\{(template:)?(spam[0-5]?-n\\|.*?)\\}\\}", "{{subst:$2}}", RegexOptions.IgnoreCase);
TalkPageText = Regex.Replace(TalkPageText, "\\{\\{(template:)?(welcome[0-6]|welcomeip|anon|welcome-anon)\\}\\}", "{{subst:$2}}", RegexOptions.IgnoreCase);
return TalkPageText;
}
/// <summary>
/// If necessary, adds/removes wikify or stub tag
/// </summary>
public string Tagger(string ArticleText, string ArticleTitle, out bool NoChange, ref string Summary)
{
testText = ArticleText;
ArticleText = Tagger(ArticleText, ArticleTitle, ref Summary);
if (testText == ArticleText)
NoChange = true;
else
NoChange = false;
return ArticleText;
}
/// <summary>
/// adds/removes
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <param name="ArticleTitle">The old category to remove.</param>
/// <returns>The article text without the old category.</returns>
public string Tagger(string ArticleText, string ArticleTitle, ref string Summary)
{
if (Tools.IsRedirect(ArticleText))
return ArticleText;
if (!Tools.IsMainSpace(ArticleTitle)) return ArticleText;
double Length = ArticleText.Length + 1;
double LinkCount = 1;
double Ratio = 0;
string CommentsStripped = WikiRegexes.Comments.Replace(ArticleText, "");
int words = Tools.WordCount(CommentsStripped);
//update by-date tags
foreach (KeyValuePair<Regex, string> k in RegexTagger)
{
ArticleText = k.Key.Replace(ArticleText, k.Value);
}
//remove stub tags from long articles
if (words > StubMaxWordCount && WikiRegexes.Stub.IsMatch(CommentsStripped))
{
MatchEvaluator stubEvaluator = new MatchEvaluator(stubChecker);
ArticleText = WikiRegexes.Stub.Replace(ArticleText, stubEvaluator);
ArticleText = ArticleText.Trim();
}
foreach (Match m in WikiRegexes.Template.Matches(ArticleText))
{
if (!m.Value.Contains("stub"))
return ArticleText;
}
LinkCount = Tools.LinkCount(CommentsStripped);
Ratio = LinkCount / Length;
if (words > 6 && !WikiRegexes.Category.IsMatch(CommentsStripped) && !Regex.IsMatch(ArticleText, @"\{\{[Uu]ncategori[zs]ed"))
{
if (WikiRegexes.Stub.IsMatch(CommentsStripped))
{
ArticleText += "\r\n\r\n{{Uncategorizedstub|February 2007}}";
Summary += ", added [[:Category:Uncategorized stubs|uncategorised]] tag";
}
else
{
ArticleText += "\r\n\r\n{{Uncategorized|February 2007}}";
Summary += ", added [[:Category:Category needed|uncategorised]] tag";
}
}
else if (LinkCount < 3 && (Ratio < 0.0025))
{
ArticleText = "{{Wikify|February 2007}}\r\n\r\n" + ArticleText;
Summary += ", added [[:Category:Articles that need to be wikified|wikify]] tag";
}
else if (CommentsStripped.Length <= 300 && !WikiRegexes.Stub.IsMatch(CommentsStripped))
{
ArticleText = ArticleText + "\r\n\r\n\r\n{{stub}}";
Summary += ", added stub tag";
}
return ArticleText;
}
private string stubChecker(Match m)
{// Replace each Regex cc match with the number of the occurrence.
if (Regex.IsMatch(m.Value, Variables.SectStub))
return m.Value;
else
return "";
}
#endregion
#region unused
/// <summary>
/// Bypasses all redirects in the article
/// </summary>
public string BypassRedirects(string ArticleText)
{//checks links to make them bypass redirects and (TODO) disambigs
string link = "";
string article = "";
MatchCollection simple = WikiRegexes.WikiLinksOnly.Matches(ArticleText);
MatchCollection piped = WikiRegexes.PipedWikiLink.Matches(ArticleText);
foreach (Match m in simple)
{
//make link
link = m.Value;
article = m.Groups[1].Value;
//get text
string text = "";
try
{
text = Tools.GetArticleText(article);
}
catch
{
continue;
}
//test if redirect
if (Tools.IsRedirect(text))
{
string directLink = Tools.RedirectTarget(text).Replace("_"," ");
directLink = "[[" + directLink + "|" + article + "]]";
ArticleText = ArticleText.Replace(link, directLink);
}
}
return ArticleText;
}
/// <summary>
/// Fixes minor problems, such as abbreviations and miscapitalisations
/// </summary>
/// <param name="ArticleText">The wiki text of the article.</param>
/// <returns>The new article text.</returns>
public string MinorThings(string ArticleText)
{
ArticleText = Regex.Replace(ArticleText, "[Aa]\\.[Kk]\\.[Aa]\\.?", "also known as");
ArticleText = ArticleText.Replace("e.g.", "for example");
ArticleText = ArticleText.Replace("i.e.", "that is");
MatchCollection ma = Regex.Matches(ArticleText, "(monday|tuesday|wednesday|thursday|friday|saturday|sunday|january|february|april|june|july|august|september|october|november|december)");
if (ma.Count > 0)
{
foreach (Match m in ma)
ArticleText = ArticleText.Replace(m.Groups[1].Value, Tools.TurnFirstToUpper(m.Groups[1].Value));
}
return ArticleText;
}
//[http://en.wikipedia.org/wiki/Dog] to [[Dog]]
//private string ExtToInternalLinks(string ArticleText)
//{
// foreach (Match m in Regex.Matches(ArticleText, "\\[http://en\\.wikipedia\\.org/wiki/.*?\\]"))
// {
// string a = HttpUtility.UrlDecode(m.ToString());
// if (a.Contains(" "))
// {
// int intP;
// //string a = n;
// intP = a.IndexOf(" ");
// string b = a.Substring(intP);
// a = a.Remove(intP);
// b = b.TrimStart();
// a = a.Replace("_", " ");
// ArticleText = ArticleText.Replace(m.ToString(), a);
// }
// }
// ArticleText = Regex.Replace(ArticleText, "\\[http://en\\.wikipedia\\.org/wiki/(.*?)\\]", "[[$1]]");
// return ArticleText;
//}
#endregion
}
}