User:AllyUnion/did you know.pl
From Wikipedia, the free encyclopedia
The following code is licensed under the GPL and the Creative Commons Attribution License. -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)
This code is not working -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)- FYI: The system calls to python2.3 are calls to the pywikipediabot framework... as perl takes like so many packages just to download files and I don't have access to an English module version that allows me to post to the English wikipedia. -- AllyUnion (talk) 03:27, 9 Apr 2005 (UTC)
Basic idea:
- Leave three blocks of "...that" on Template talk:Did you know
- Move all other blocks of "...that" to Wikipedia:Recent additions
- Move oldest blocks on Wikipedia:Recent additions to an archive page if Wikipedia:Recent additions exceeds 50 "...that" lines.
[edit] didyouknow.pl
#!/usr/bin/perl -w # Author: Jason Y. Lee # Purpose: Wikipedia's Did you know archival process # Special thanks to dysprosia, and the person who helped me in #wikipedia # Assumptions: # On Template talk:Did you know: # A did you know line is in the following format: # *...that <TEXT><br> # Where <TEXT> is any text of any length, no matter if there is a newline or not. # On Wikipedia:Recent additions and any archive pages after Wikipedia:Recent additions 25: # A did you know line starts either with: # [[Image: OR ...that # A did you know line ends with either: # A question mark or a HTML line break (<br>) use Tie::File; my $HOME = '<insert home directory>'; my $BOTDIR = 'wikipedia/bots/kurando-san/'; my $LOGDIR = 'dyklogs/'; my $configfile = $HOME . $BOTDIR . 'didyouknow.cfg'; my $logfile1 = $HOME . $BOTDIR . $LOGDIR . 'dyk1.log'; my $logfile2 = $HOME . $BOTDIR . $LOGDIR . 'dyk2.log'; my $logfile3 = $HOME . $BOTDIR . $LOGDIR . 'dyk3.log'; my $logfile4 = $HOME . $BOTDIR . $LOGDIR . 'newra.log'; my $logfile5 = $HOME . $BOTDIR . $LOGDIR . 'newra-'; my $talkpage = "Template talk:Did you know"; my $pagename = "Wikipedia:Recent additions"; my $archive = "Wikipedia:Recent additions"; my $archivenum = ""; my $archiveheader = "{{DYK archive header}}\n{{DYK archive nav}}\n\n==Did you know...==\n"; # Python Page Existance program #$pageexist1 = "\"import config, wikipedia\nimport sys\nmysite = wikipedia.getSite()\nif (wikipedia.PageLink(mysite, \'"; #$pageexist2 = "\').exists()):\n\tsys.exit(0)\nelse:\n\tsys.exit(1)\n\""; # Posting a page $postprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n"; $postprog2 = "logfile = \'"; $postprog3 = "\'\npagename = \'"; $postprog4 = "\'\ncomment = \'"; $postprog5 = "\'\nlog = file(logfile, 'r')\npage = log.read()\nlog.close()\nwikipedia.PageLink(mysite, pagename).put(page, comment)\n"; tie @config, 'Tie::File', $configfile or die; $lastarchive = ""; foreach $line (@config) { if ($line =~ /lastarchive =/) { $archive = $line; $archivenum = $line; $archivenum =~ s/lastarchive\s*=\s*//g; $archive = "Wikipedia:Recent additions " . $archivenum; $lastarchive = \$line; } } # Get the pages $pythonprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n"; $pythonprog2 = "logfile = "; $pythonprog3 = "\npagename = "; $pythonprog4 = "\nlog = file(logfile, 'w')\nlog.write(wikipedia.getPage(mysite, pagename, True, True, False).encode('iso-8859-1'))\nlog.close()\n\""; system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile1 . '\'' . $pythonprog3 . '\'' . $talkpage . '\'' . $pythonprog4); system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile2 . '\'' . $pythonprog3 . '\'' . $pagename . '\'' . $pythonprog4); system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile3 . '\'' . $pythonprog3 . '\'' . $archive . '\'' . $pythonprog4); # Analysis of 'Template talk:Did you know' tie @dyklog, 'Tie::File', $logfile1 or die; $dykmat = []; $dykblockcount = 0; $y = 0; $line = ""; $x = 0; while (not ($dyklog[$x] =~ m/=+ARCHIVE USED SUGGESTIONS HERE=+/ig)) { # print $x, ". ", $dyklog[$x], "\n"; $x++; } while (not ($dyklog[$x] =~ m/\'\'include a link to the used picture behind the fact in which it has been \'\'\'used\'\'\' on the front page\.\'\'<br>/ig)) { # print $x, ". ", $dyklog[$x], "\n"; $x++; } #print $x, ". ", $dyklog[$x], "\n"; $x++; #print $x, ". ", $dyklog[$x], "\n"; while ((not ($dyklog[$x] =~ m/All older items have been archived at \[\[Wikipedia:Recent additions\]\]/ig)) && ($x < scalar(@dyklog))) { $dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig; $dyklog[$x] =~ s/(?:--|&[mn]dash;|[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig; $dyklog[$x] =~ s/\(\)//g; $dyklog[$x] =~ s/\?\s<br/\?<br/ig; $dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig; $loopflag = 1; $line = ""; # print "$x. 1\n"; if ($dyklog[$x] =~ m/^\*\.\.\.that/ig) { # print "$x. 2\n"; $line = $dyklog[$x]; if ($dykblockcount >= 3) { splice(@dyklog, $x, 1); } else { $x++; } while (($loopflag == 1) && ($x < scalar(@dyklog))) { # print "$x. 3\n"; $dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig; $dyklog[$x] =~ s/(?:--|&[mn]dash;|[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig; $dyklog[$x] =~ s/\(\)//g; $dyklog[$x] =~ s/\?\s<br/\?<br/ig; $dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig; if ($dyklog[$x] =~ m/^\*\.\.\.that/ig) { # print "$x. 4\n"; $loopflag = 0; } elsif ($dyklog[$x] eq "") { # print $x, ". Block detected\n"; # print "$x. 5\n"; $loopflag = 0; } else { # print "$x. 6\n"; $test = $dyklog[$x]; $test =~ s/\s+//g; if ($test eq "") { # print "$x. 7\n"; # print $x, ". Block detected\n"; $loopflag = 0; } else { # print "$x. 8\n"; $line .= " " . $dyklog[$x]; if ($dykblockcount >= 3) { splice(@dyklog, $x, 1); } else { $x++; } } } } # print "$x. 9\n"; # print $x, ". -> (", $dykblockcount, ", ", $y, "): ", $line, "\n\n"; $y++; $dykmat[$dykblockcount][$y] = $line; } elsif ($dyklog[$x] eq "") { # print "$x. 10\n"; if ($y == 0) { $dykblockcount--; } else { $dykmat[$dykblockcount][0] = $y + 1; } $y = 0; $dykblockcount++; if ($dykblockcount >= 3) { splice(@dyklog, $x, 1); } else { $x++; } } else { # print "$x. 11\n"; $test = $dyklog[$x]; $test =~ s/\s+//g; if ($test eq "") { # print "$x. 12\n"; if ($y == 0) { $dykblockcount--; } else { $dykmat[$dykblockcount][0] = $y + 1; } $y = 0; $dykblockcount++; } if ($dykblockcount >= 3) { splice(@dyklog, $x, 1); } else { $x++; } } # print "Exit"; } #print $dykblockcount, "\n"; #die; #for ($x = 0; $x < $dykblockcount; $x++) #{ # for ($y = 1; $y < $dykmat[$x][0]; $y++) # { # print $y, ". ", $dykmat[$x][$y], "\n"; # } # print "\n"; #} if ($dykblockcount <= 3) { die; } # Find the image left and right. tie @wralog, 'Tie::File', $logfile2 or die; $side = "left"; $x = 0; while (not ($wralog[$x] =~ m/\[\[Image:/i)) { $x++; } if ($wralog[$x] =~ m/left/i) { $side = "left"; } else { $side = "right"; } # Process the talk page. for ($x = 0; $x < $dykblockcount; $x++) { for ($y = 1; $y < $dykmat[$x][0]; $y++) { $line = $dykmat[$x][$y]; $line =~ s/^\*\.\.\.that/\.\.\.that/ig; @parts = split /\(?\[\[:?Image:/i, $line; if (scalar(@parts) == 2) { $image = $parts[1]; $image =~ s/\]\]\)?<br\s*\/?>//ig; $image =~ s/\]\]\)?//g; $image =~ s/\n//g; $imagelink = "[[Image:" . $image . "|100px|" . $side . "]]"; if ($side eq "left") { $side = "right"; } else { $side = "left"; } $line =~ s/\(*?\s*?\[\[:*?Image:.*?\]\]\s*?\)*?//ig; $line =~ s/\?\s*?\)*?<br/\?<br/ig; $dykmat[$x][$y] = "$imagelink\n$line"; } else { $line =~ s/\?\s*?\)*?<br/\?<br/ig; $dykmat[$x][$y] = "$line"; } } } # Analysis of 'Wikipedia:Recent additions' $wramat = []; $wrablockcount = -1; $y = 1; $line = ""; $wracount = 0; #$limitflag = False; $x = 0; #$wralast = 0; while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog))) { $x++; } $x++; while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog))) { $wralog[$x] =~ s/^\s*?\.\.\.\s*?that/\.\.\.that/ig; # print "Processing: ", $wralog[$x], "\n"; if ($wralog[$x] =~ m/\[\[Image:/ig) { if ($wrablockcount == -1) { $wrablockcount++; } # print $x, ". (nimage). ", $wralog[$x], "\n"; $line = $wralog[$x] . "\n"; while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m/<br\s*\/{0,1}>/i))) { # if ($limitflag) # { # splice(@wralog, $x, 1); # } # else # { $x++; # } # print $x, ". (image). ", $wralog[$x], "\n"; $line .= $wralog[$x] . "\n"; } $wramat[$wrablockcount][$y] = $line; $y++; $wracount++; } elsif ($wralog[$x] =~ m/^\.\.\.that/ig) { if ($wrablockcount == -1) { $wrablockcount++; } $line = ""; while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m/<br\s*\/{0,1}>/i))) { # print $x, ". (that). ", $wralog[$x], "\n"; $line .= $wralog[$x] . "\n"; # if ($limitflag) # { # splice(@wralog, $x, 1); # } # else # { $x++; # } } # print $x, ". (that). ", $wralog[$x], "\n"; $line .= $wralog[$x] . "\n"; $wramat[$wrablockcount][$y] = $line; $y++; $wracount++; } elsif ($wralog[$x] eq "") { # print $x, ". Block detected!\n"; # if ($wracount > 50) # { # $limitflag = True; # } if ($wrablockcount != -1) { $wramat[$wrablockcount][0] = $y; # Save the size } $y = 1; # Reset the line count for the block $wrablockcount++; } else { $test = $wralog[$x]; $test =~ s/\s+//g; if ($test eq "") { # print $x, ". Block detected!\n"; # if ($wracount > 50) # { # $limitflag = True; # } if ($wrablockcount != -1) { $wramat[$wrablockcount][0] = $y; } $y = 1; $wrablockcount++; } } # if ($limitflag) # { # splice(@wralog, $x, 1); # } # else # { $x++; # } } splice(@wralog, $x, 1, "\n<!-- newly archived items should go in at the top -->"); # Copy everything from "Wikipedia:Recent archives" matrix into the "Did you know" matrix for ($x = 0; $x < $wrablockcount; $x++) { for ($y = 0; $y < $wramat[$x][0]; $y++) { $dykmat[$dykblockcount][$y] = $wramat[$x][$y]; } $dykblockcount++; } $x = 0; open(RALOG, ">", $logfile4) or die; while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog))) { print RALOG $wralog[$x], "\n"; $x++; } print RALOG $wralog[$x], "\n"; $x++; $total = 0; for ($i = 3; (($i < $dykblockcount) && ($total < 60)); $i++) { for ($j = 1; $j < $dykmat[$i][0]; $j++) { print RALOG $dykmat[$i][$j], "\n"; # print "(", $i, ", ", $j, "). ", $dykmat[$i][$j], "\n"; } print RALOG "\n"; $total += $dykmat[$i][0]; } while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog))) { $x++; } while ($x < scalar(@wralog)) { print RALOG $wralog[$x], "\n"; $x++; } close(RALOG); #print "----\n"; #print $wracount, "\n"; # Post the new pages system("python2.3 -c " . $postprog1 . $postprog2 . $logfile1 . $postprog3 . $talkpage . $postprog4 . "Testing archival bot" . $postprog5 . $postprog2 . $logfile4 . $postprog3 . $pagename . $postprog4 . "Testing archival bot" . $postprog5 . "\""); # Analysis of the archive pages $arccount = 0; if ($i < $dykblockcount) { tie @arclog, 'Tie::File', $logfile3 or die; $x = 0; while ((not ($arclog[$x] =~ m/==Did you know\.\.\.==/ig)) && ($x < scalar(@arclog))) { $x++; } for ($arccount = 0; $x < scalar(@arclog); $x++) { if ($arclog[$x] =~ m/\[\[Image:/ig) { # print $x, ". (nimage). ", $arclog[$x], "\n"; while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m/<br\s*\/{0,1}>/i))) { $x++; # print $x, ". (image). ", $arclog[$x], "\n"; } $arccount++; } elsif ($arclog[$x] =~ m/^\.\.\.that/ig) { while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m/<br\s*\/{0,1}>/i))) { # print $x, ". (that). ", $arclog[$x], "\n"; $x++; } # print $x, ". (that). ", $arclog[$x], "\n"; $arccount++; } $x++; } } $startingarchive = $archivenum; $testcount = $arccount; if ($arccount < 50) { $currentlogfile = $logfile5 . $archivenum . ".log"; $x = 0; open(ARCLOG, ">", $currentlogfile) or die; print ARCLOG $archiveheader; close(ARCLOG); tie @newlog, 'Tie::File', $currentlogfile or die; $a = $dykblockcount - 1; while ($a >= $i) { $start = 4; for ($b = 1; $b < $dykmat[$a][0]; $b++) { splice(@newlog, $start, 0, $dykmat[$a][$b]); $start++; } splice(@newlog, $start, 0, ""); $testcount += $dykmat[$a][0]; if ($testcount >= 50) { $testcount = 0; $archivenum++; $currentlog = $logfile5 . $archivenum . ".log"; open(ARCLOG, ">", $currentlog) or die; print ARCLOG $archiveheader; close(ARCLOG); untie @newlog or die; tie @newlog, 'Tie::File', $currentlog or die; } $a--; } open(ARCLOG, ">>", $currentlogfile) or die; # print ARCLOG "\n"; for ($x = 4; $x < scalar(@arclog); $x++) { print ARCLOG $arclog[$x], "\n"; } close(ARCLOG); } else { $a = $dykblockcount - 1; $archivenum++; $currentlog = $logfile5 . $archivenum . ".log"; $testcount = 0; open(ARCLOG, ">", $currentlog) or die; print ARCLOG $archiveheader; close(ARCLOG); tie @newlog, 'Tie::File', $currentlog or die; while ($a >= $i) { $start = 4; for ($b = 1; $b < $dykmat[$a][0]; $b++) { splice(@newlog, $start, 0, $dykmat[$a][$b]); $start++; } splice(@newlog, $start, 0, ""); $testcount += $dykmat[$a][0]; if ($testcount >= 50) { $testcount = 0; $archivenum++; $currentlog = $logfile5 . $archivenum . ".log"; open(ARCLOG, ">", $currentlog) or die; print ARCLOG $archiveheader; close(ARCLOG); untie @newlog or die; tie @newlog, 'Tie::File', $currentlog or die; } $a--; } } #print "\n$testcount\n"; #print $archivenum, "\n"; for ($x = $startingarchive; $x <= $archivenum; $x++) { $currentlog = $logfile5 . $x . ".log"; system("python2.3 -c " . $postprog1 . $postprog2 . $currentlog . $postprog3 . "Wikipedia:Recent additions " . $x . $postprog4 . "Testing archival bot" . $postprog5 . "\""); } $$lastarchive = "lastarchive = " . $archivenum; system("rm", "-f", $HOME . $BOTDIR . $LOGDIR . "*.log");
[edit] didyouknow.cfg
# Last archive that the program is on, do not attempt to update manually. lastarchive = 25