User:AllyUnion/did you know.pl

From Wikipedia, the free encyclopedia

The following code is licensed under the GPL and the Creative Commons Attribution License. -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)

  • This code is not working -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)
  • FYI: The system calls to python2.3 are calls to the pywikipediabot framework... as perl takes like so many packages just to download files and I don't have access to an English module version that allows me to post to the English wikipedia. -- AllyUnion (talk) 03:27, 9 Apr 2005 (UTC)

Basic idea:

  1. Leave three blocks of "...that" on Template talk:Did you know
  2. Move all other blocks of "...that" to Wikipedia:Recent additions
  3. Move oldest blocks on Wikipedia:Recent additions to an archive page if Wikipedia:Recent additions exceeds 50 "...that" lines.

[edit] didyouknow.pl

#!/usr/bin/perl -w

# Author: Jason Y. Lee
# Purpose: Wikipedia's Did you know archival process

# Special thanks to dysprosia, and the person who helped me in #wikipedia

# Assumptions:

# On Template talk:Did you know:
# A did you know line is in the following format:
# *...that <TEXT><br>
# Where <TEXT> is any text of any length, no matter if there is a newline or not.

# On Wikipedia:Recent additions and any archive pages after Wikipedia:Recent additions 25:
# A did you know line starts either with:
# [[Image: OR ...that
# A did you know line ends with either:
# A question mark or a HTML line break (<br>)

use Tie::File;

my $HOME = '<insert home directory>';
my $BOTDIR = 'wikipedia/bots/kurando-san/';
my $LOGDIR = 'dyklogs/';

my $configfile = $HOME . $BOTDIR . 'didyouknow.cfg';

my $logfile1 = $HOME . $BOTDIR . $LOGDIR . 'dyk1.log';
my $logfile2 = $HOME . $BOTDIR . $LOGDIR . 'dyk2.log';
my $logfile3 = $HOME . $BOTDIR . $LOGDIR . 'dyk3.log';
my $logfile4 = $HOME . $BOTDIR . $LOGDIR . 'newra.log';
my $logfile5 = $HOME . $BOTDIR . $LOGDIR . 'newra-';

my $talkpage = "Template talk:Did you know";
my $pagename = "Wikipedia:Recent additions";
my $archive = "Wikipedia:Recent additions";
my $archivenum = "";
my $archiveheader = "{{DYK archive header}}\n{{DYK archive nav}}\n\n==Did you know...==\n";

# Python Page Existance program
#$pageexist1 = "\"import config, wikipedia\nimport sys\nmysite = wikipedia.getSite()\nif (wikipedia.PageLink(mysite, \'";
#$pageexist2 = "\').exists()):\n\tsys.exit(0)\nelse:\n\tsys.exit(1)\n\"";

# Posting a page
$postprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n";
$postprog2 = "logfile = \'";
$postprog3 = "\'\npagename = \'";
$postprog4 = "\'\ncomment = \'";
$postprog5 = "\'\nlog = file(logfile, 'r')\npage = log.read()\nlog.close()\nwikipedia.PageLink(mysite, pagename).put(page, comment)\n";

tie @config, 'Tie::File', $configfile or die;
$lastarchive = "";
foreach $line (@config)
{
        if ($line =~ /lastarchive =/)
        {
                $archive = $line;
                $archivenum = $line;
                $archivenum =~ s/lastarchive\s*=\s*//g;
                $archive = "Wikipedia:Recent additions " . $archivenum;
                $lastarchive = \$line;
        }
}

# Get the pages
$pythonprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n";
$pythonprog2 = "logfile = ";
$pythonprog3 = "\npagename = ";
$pythonprog4 = "\nlog = file(logfile, 'w')\nlog.write(wikipedia.getPage(mysite, pagename, True, True, False).encode('iso-8859-1'))\nlog.close()\n\"";

system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile1 . '\'' . $pythonprog3 . '\'' . $talkpage . '\'' . $pythonprog4);
system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile2 . '\'' . $pythonprog3 . '\'' . $pagename . '\'' . $pythonprog4);
system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile3 . '\'' . $pythonprog3 . '\'' . $archive . '\'' . $pythonprog4);

# Analysis of 'Template talk:Did you know'
tie @dyklog, 'Tie::File', $logfile1 or die;

$dykmat = [];
$dykblockcount = 0;
$y = 0;
$line = "";
$x = 0;

while (not ($dyklog[$x] =~ m/=+ARCHIVE USED SUGGESTIONS HERE=+/ig))
{
#       print $x, ". ", $dyklog[$x], "\n";
        $x++;
}

while (not ($dyklog[$x] =~ m/\'\'include a link to the used picture behind the fact in which it has been \'\'\'used\'\'\' on the front page\.\'\'<br>/ig))
{
#       print $x, ". ", $dyklog[$x], "\n";
        $x++;
}

#print $x, ". ", $dyklog[$x], "\n";
$x++;
#print $x, ". ", $dyklog[$x], "\n";

while ((not ($dyklog[$x] =~ m/All older items have been archived at \[\[Wikipedia:Recent additions\]\]/ig)) && ($x < scalar(@dyklog)))
{
        $dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig;
        $dyklog[$x] =~ s/(?:--|&[mn]dash;|&#15[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig;
        $dyklog[$x] =~ s/\(\)//g;
        $dyklog[$x] =~ s/\?\s<br/\?<br/ig;
        $dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig;
        $loopflag = 1;
        $line = "";

#       print "$x. 1\n";

        if ($dyklog[$x] =~ m/^\*\.\.\.that/ig)
        {
#               print "$x. 2\n";
                $line = $dyklog[$x];

                if ($dykblockcount >= 3)
                {
                        splice(@dyklog, $x, 1);
                }
                else
                {
                        $x++;
                }

                while (($loopflag == 1) && ($x < scalar(@dyklog)))
                {
#                       print "$x. 3\n";
                        $dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig;
                        $dyklog[$x] =~ s/(?:--|&[mn]dash;|&#15[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig;
                        $dyklog[$x] =~ s/\(\)//g;
                        $dyklog[$x] =~ s/\?\s<br/\?<br/ig;
                        $dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig;
                        if ($dyklog[$x] =~ m/^\*\.\.\.that/ig)
                        {
#                               print "$x. 4\n";
                                $loopflag = 0;
                        }
                        elsif ($dyklog[$x] eq "")
                        {
#                               print $x, ". Block detected\n";
#                               print "$x. 5\n";
                                $loopflag = 0;
                        }
                        else
                        {
#                               print "$x. 6\n";
                                $test = $dyklog[$x];
                                $test =~ s/\s+//g;
                                if ($test eq "")
                                {
#                                       print "$x. 7\n";
#                                       print $x, ". Block detected\n";
                                        $loopflag = 0;
                                }
                                else
                                {
#                                       print "$x. 8\n";
                                        $line .= " " . $dyklog[$x];
                                        if ($dykblockcount >= 3)
                                        {
                                                splice(@dyklog, $x, 1);
                                        }
                                        else
                                        {
                                                $x++;
                                        }
                                }
                        }
                }
#               print "$x. 9\n";
#               print $x, ". -> (", $dykblockcount, ", ", $y, "): ", $line, "\n\n";
                $y++;
                $dykmat[$dykblockcount][$y] = $line;
        }
        elsif ($dyklog[$x] eq "")
        {
#               print "$x. 10\n";
                if ($y == 0)
                {
                        $dykblockcount--;
                }
                else
                {
                        $dykmat[$dykblockcount][0] = $y + 1;
                }
                $y = 0;
                $dykblockcount++;
                if ($dykblockcount >= 3)
                {
                        splice(@dyklog, $x, 1);
                }
                else
                {
                        $x++;
                }
        }
        else
        {
#               print "$x. 11\n";
                $test = $dyklog[$x];
                $test =~ s/\s+//g;
                if ($test eq "")
                {
#                       print "$x. 12\n";
                        if ($y == 0)
                        {
                                $dykblockcount--;
                        }
                        else
                        {
                                $dykmat[$dykblockcount][0] = $y + 1;
                        }
                        $y = 0;
                        $dykblockcount++;
                }
                if ($dykblockcount >= 3)
                {
                        splice(@dyklog, $x, 1);
                }
                else
                {
                        $x++;
                }
        }

#       print "Exit";
}

#print $dykblockcount, "\n";

#die;

#for ($x = 0; $x < $dykblockcount; $x++)
#{
#       for ($y = 1; $y < $dykmat[$x][0]; $y++)
#       {
#               print $y, ". ", $dykmat[$x][$y], "\n";
#       }
#       print "\n";
#}

if ($dykblockcount <= 3)
{
        die;
}

# Find the image left and right.

tie @wralog, 'Tie::File', $logfile2 or die;

$side = "left";

$x = 0;
while (not ($wralog[$x] =~ m/\[\[Image:/i))
{
        $x++;
}
if ($wralog[$x] =~ m/left/i)
{
        $side = "left";
}
else
{
        $side = "right";
}

# Process the talk page.
for ($x = 0; $x < $dykblockcount; $x++)
{
        for ($y = 1; $y < $dykmat[$x][0]; $y++)
        {
                $line = $dykmat[$x][$y];

                $line =~ s/^\*\.\.\.that/\.\.\.that/ig;
                @parts = split /\(?\[\[:?Image:/i, $line;

                if (scalar(@parts) == 2)
                {
                        $image = $parts[1];
                        $image =~ s/\]\]\)?<br\s*\/?>//ig;
                        $image =~ s/\]\]\)?//g;
                        $image =~ s/\n//g;
                        $imagelink = "[[Image:" . $image . "|100px|" . $side . "]]";

                        if ($side eq "left")
                        {
                                $side = "right";
                        }
                        else
                        {
                                $side = "left";
                        }
                        $line =~ s/\(*?\s*?\[\[:*?Image:.*?\]\]\s*?\)*?//ig;
                        $line =~ s/\?\s*?\)*?<br/\?<br/ig;
                        $dykmat[$x][$y] = "$imagelink\n$line";
                }
                else
                {
                        $line =~ s/\?\s*?\)*?<br/\?<br/ig;
                        $dykmat[$x][$y] = "$line";
                }               
        }
}

# Analysis of 'Wikipedia:Recent additions'

$wramat = [];
$wrablockcount = -1;
$y = 1;
$line = "";
$wracount = 0;
#$limitflag = False;
$x = 0;
#$wralast = 0;

while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog)))
{
        $x++;
}
$x++;

while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog)))
{
        $wralog[$x] =~ s/^\s*?\.\.\.\s*?that/\.\.\.that/ig;
#       print "Processing: ", $wralog[$x], "\n";
        if ($wralog[$x] =~ m/\[\[Image:/ig)
        {
                if ($wrablockcount == -1)
                {
                        $wrablockcount++;
                }
#               print $x, ". (nimage). ", $wralog[$x], "\n";
                $line = $wralog[$x] . "\n";
                while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m/<br\s*\/{0,1}>/i)))
                {
#                       if ($limitflag)
#                       {
#                               splice(@wralog, $x, 1);
#                       }
#                       else
#                       {
                        $x++;
#                       }
#                       print $x, ". (image). ", $wralog[$x], "\n";
                        $line .= $wralog[$x] . "\n";
                }
                $wramat[$wrablockcount][$y] = $line;
                $y++;
                $wracount++;
        }
        elsif ($wralog[$x] =~ m/^\.\.\.that/ig)
        {
                if ($wrablockcount == -1)
                {
                        $wrablockcount++;
                }
                $line = "";
                while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m/<br\s*\/{0,1}>/i)))
                {
#                       print $x, ". (that). ", $wralog[$x], "\n";
                        $line .= $wralog[$x] . "\n";
#                       if ($limitflag)
#                       {
#                               splice(@wralog, $x, 1);
#                       }
#                       else
#                       {
                        $x++;
#                       }
                }
#               print $x, ". (that). ", $wralog[$x], "\n";
                $line .= $wralog[$x] . "\n";
                $wramat[$wrablockcount][$y] = $line;
                $y++;
                $wracount++;
        }
        elsif ($wralog[$x] eq "")
        {
#               print $x, ". Block detected!\n";
#               if ($wracount > 50)
#               {
#                       $limitflag = True;
#               }
                if ($wrablockcount != -1)
                {
                        $wramat[$wrablockcount][0] = $y;        # Save the size
                }
                $y = 1;                         # Reset the line count for the block
                $wrablockcount++;
        }
        else
        {
                $test = $wralog[$x];
                $test =~ s/\s+//g;
                if ($test eq "")
                {
#                       print $x, ". Block detected!\n";
#                       if ($wracount > 50)
#                       {
#                               $limitflag = True;
#                       }                                       
                        if ($wrablockcount != -1)
                        {
                                $wramat[$wrablockcount][0] = $y;
                        }
                
                        $y = 1;
                        $wrablockcount++;
                }
        }
#       if ($limitflag)
#       {
#               splice(@wralog, $x, 1);
#       }
#       else
#       {
        $x++;
#       }
}

splice(@wralog, $x, 1, "\n<!-- newly archived items should go in at the top -->");

# Copy everything from "Wikipedia:Recent archives" matrix into the "Did you know" matrix
for ($x = 0; $x < $wrablockcount; $x++)
{
        for ($y = 0; $y < $wramat[$x][0]; $y++)
        {
                $dykmat[$dykblockcount][$y] = $wramat[$x][$y];
        }
        $dykblockcount++;
}

$x = 0;
open(RALOG, ">", $logfile4) or die;
while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog)))
{
        print RALOG $wralog[$x], "\n";
        $x++;
}
print RALOG $wralog[$x], "\n";
$x++;

$total = 0;
for ($i = 3; (($i < $dykblockcount) && ($total < 60)); $i++)
{
        for ($j = 1; $j < $dykmat[$i][0]; $j++)
        {
                print RALOG $dykmat[$i][$j], "\n";
#               print "(", $i, ", ", $j, "). ", $dykmat[$i][$j], "\n";
        }
        print RALOG "\n";
        $total += $dykmat[$i][0];
}

while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog)))
{
        $x++;
}
while ($x < scalar(@wralog))
{
        print RALOG $wralog[$x], "\n";
        $x++;
}
close(RALOG);

#print "----\n";

#print $wracount, "\n";

# Post the new pages
system("python2.3 -c " . $postprog1 . $postprog2 . $logfile1 . $postprog3 . $talkpage . $postprog4 . "Testing archival bot" . $postprog5 . $postprog2 . $logfile4 . $postprog3 . $pagename . $postprog4 . "Testing archival bot" . $postprog5 . "\"");

# Analysis of the archive pages

$arccount = 0;

if ($i < $dykblockcount)
{
        tie @arclog, 'Tie::File', $logfile3 or die;
        $x = 0;
        while ((not ($arclog[$x] =~ m/==Did you know\.\.\.==/ig)) && ($x < scalar(@arclog)))
        {
                $x++;
        }
        
        for ($arccount = 0; $x < scalar(@arclog); $x++)
        {
                if ($arclog[$x] =~ m/\[\[Image:/ig)
                {
#                       print $x, ". (nimage). ", $arclog[$x], "\n";
                        while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m/<br\s*\/{0,1}>/i)))
                        {
                                $x++;
#                               print $x, ". (image). ", $arclog[$x], "\n";
                        }
                        $arccount++;
                }
                elsif ($arclog[$x] =~ m/^\.\.\.that/ig)
                {
                        while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m/<br\s*\/{0,1}>/i)))
                        {
#                               print $x, ". (that). ", $arclog[$x], "\n";
                                $x++;
                        }
#                       print $x, ". (that). ", $arclog[$x], "\n";
                        $arccount++;
                }
                $x++;
        }
}

$startingarchive = $archivenum;
$testcount = $arccount;
if ($arccount < 50)
{
        $currentlogfile = $logfile5 . $archivenum . ".log";
        $x = 0;
        open(ARCLOG, ">", $currentlogfile) or die;
        print ARCLOG $archiveheader;
        close(ARCLOG);

        tie @newlog, 'Tie::File', $currentlogfile or die;
        
        $a = $dykblockcount - 1;
        while ($a >= $i)
        {
                $start = 4;
                for ($b = 1; $b < $dykmat[$a][0]; $b++)
                {
                        splice(@newlog, $start, 0, $dykmat[$a][$b]);
                        $start++;
                }
                splice(@newlog, $start, 0, "");
                $testcount += $dykmat[$a][0];

                if ($testcount >= 50)
                {
                        $testcount = 0;
                        $archivenum++;
                        $currentlog = $logfile5 . $archivenum . ".log";
                        open(ARCLOG, ">", $currentlog) or die;
                        print ARCLOG $archiveheader;
                        close(ARCLOG);
                        untie @newlog or die;
                        tie @newlog, 'Tie::File', $currentlog or die;
                }
                $a--;
        }

        open(ARCLOG, ">>", $currentlogfile) or die;
#       print ARCLOG "\n";
        for ($x = 4; $x < scalar(@arclog); $x++)
        {
                print ARCLOG $arclog[$x], "\n";
        }
        close(ARCLOG);
}
else
{
        $a = $dykblockcount - 1;
        $archivenum++;
        $currentlog = $logfile5 . $archivenum . ".log";
        $testcount = 0;
        open(ARCLOG, ">", $currentlog) or die;
        print ARCLOG $archiveheader;
        close(ARCLOG);
        tie @newlog, 'Tie::File', $currentlog or die;
        while ($a >= $i)
        {
                $start = 4;
                for ($b = 1; $b < $dykmat[$a][0]; $b++)
                {
                        splice(@newlog, $start, 0, $dykmat[$a][$b]);
                        $start++;
                }
                splice(@newlog, $start, 0, "");
                $testcount += $dykmat[$a][0];
                if ($testcount >= 50)
                {
                        $testcount = 0;
                        $archivenum++;
                        $currentlog = $logfile5 . $archivenum . ".log";
                        open(ARCLOG, ">", $currentlog) or die;
                        print ARCLOG $archiveheader;
                        close(ARCLOG);
                        untie @newlog or die;
                        tie @newlog, 'Tie::File', $currentlog or die;
                }
                $a--;
        }
}
#print "\n$testcount\n";
#print $archivenum, "\n";

for ($x = $startingarchive; $x <= $archivenum; $x++)
{
        $currentlog = $logfile5 . $x . ".log";
        system("python2.3 -c " . $postprog1 . $postprog2 . $currentlog . $postprog3 . "Wikipedia:Recent additions " . $x . $postprog4 . "Testing archival bot" . $postprog5 . "\"");
}

$$lastarchive = "lastarchive = " . $archivenum;

system("rm", "-f", $HOME . $BOTDIR . $LOGDIR . "*.log");

[edit] didyouknow.cfg

# Last archive that the program is on, do not attempt to update manually.
lastarchive = 25