User:Shadowbot2/Source

From Wikipedia, the free encyclopedia

This page contains a cleaned-up version of the Shadowbot2 source.

#!/usr/bin/perl
use Perlwikipedia;
use HTML::Entities;

my $editor=Perlwikipedia->new;

my $starting_time=time; #What time did we start?

my $username='Shadowbot2';
my $password='**********';


$editor->login($username,$password);

my (@protected,@unprotected);

$editor->purge_page('Main Page'); #Make sure we have a clean copy of the Main Page

my $mailing_list_text=$editor->get_text('User:Shadowbot2/Mailing list');

my @mailing_list=$mailing_list_text=~m/\* \[\[User:(.+?)\|.+?\]\]/g; #Parse for all users listed



unless ($main_page_html=~m/Shadowbot2/i) { #Make sure we're still logged in
        $editor->login($username,$password);
}


my @tranclusion_list;

load_template_list();

do_main_page_scan();

do_images_scan();

do_tomorrow_templates_scan();

my $sent_email=0;

unless((scalar @unprotected)==0) { #Send email if needed and mark as such
        send_mail();
        $sent_email=1;
}

my $ending_time=time; #Get various statistics about the run
my $total_time=$ending_time-$starting_time;
my $total_scanned=(scalar @protected)+(scalar @unprotected);

print "I just scanned $total_scanned pages/images. I detected " . (scalar @protected) ." protected pages and " . (scalar @unprotected) . " unprotected pages. The job took $total_time seconds to run.\n\n";
if($sent_email) {print "I sent an alert email to " . (scalar @mailing_list) . " users.\n\n";}
else {print "I did not send an alert email.\n\n";}

print "Pages I scanned and their protection status:\n\n";
#Create the format for the top of the form
format STDOUT_TOP=
Name of page                                                            Result of scan
--------------------------------------------------------------------------------------
.
#Create the report format
format STDOUT=
@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<  @<<<<<<<<<<<<<
$page,                                                                  $result
.

foreach $page (@protected) { #Write out the protected pages
        $page="[[$page]]";
        $result="Protected";
        write;
}

foreach $page (@unprotected) { #Write out the unprotected pages
        $page="[[$page]]";
        $result="Unprotected";
        write;
}  

sub do_main_page_scan {
        #Save us the hassle of finding the POTD subpage
        push (@transclusion_list,"Template:POTD protected/$CURRENTYEAR-$CURRENTMONTH-$CURRENTDAY2");
        #If it's not a .js or .css page, scan it.
        foreach (@transclusion_list) {
                unless(/\.\w{2,3}/) {scan($_);}
        }       
}

sub do_images_scan {
        my @images;
        foreach (@transclusion_list) {
                my $wikitext=$editor->get_text($_);
                #Get rid of unrendered stuff
                $wikitext=~s/<!--.+?-->//gs;
                $wikitext=~s/<noinclude>.+?<\/noinclude>//gs;
                #Handle <includeonly> text
                if ($wikitext=~m/<includeonly>\[\[(Image:.+)\|.+?\]\]<\/includeonly>/) {
                        unless ($1=~m/\{\{.+?\}\}/) {push (@images,$1);}
                }
                #Grab the rest
                if ($wikitext=~m/\[\[(Image:.+?)\|.+?\]\]/) {
                        my $image=$1;
                        unless ($image=~m/\{\{.+?\}\}/) {
                                push (@images,$image);

                        }
                }
        }
        foreach (@images) {scan($_);}
}


sub do_tomorrow_templates_scan {
        foreach (@transclusion_list) {
                if (/(\w+) (\d{1,2})$/) { #Check for one date format
                        my $month=$1;
                        my $day=$2;
                        ($month,$day)=do_date_increment("$month $day");
                        $_ =~ s/\w+ \d{1,2}/$month $day/; #Substitute the new format
                        scan($_);

                }
                elsif (/(\w+) (\d{1,2}), \d{4}$/) { #The other format
                        my $month=$1;
                        my $day=$2;
                        my $year=$3;
                        ($month,$day,$year)=do_date_increment("$month $day $year");
                        $_ =~ s/\w+ \d{1,2}, \d{4}/$month $day, $year/;
                        scan($_);

                }
        }
}

sub scan {
        my $page=shift;
        #Grab page and decode it
        my $edit_page_contents=$editor->{mech}->get("http://en.wikipedia.org/w/index.php?title=$page&action=edit")->content;
        decode_entities($edit_page_contents);
        #Remove unrendered content
        $edit_page_contents=~s/<!--.+?-->//gs;
        $edit_page_contents=~s/<noinclude>.+?<\/noinclude>//gs;
        #Replace MediaWiki variables with their real values
        $edit_page_contents=do_replace($edit_page_contents);
        #Check if the text is editable
        if ($edit_page_contents =~ m/<textarea .+? readonly='readonly'/) {
                push(@protected,$page);

        }
        else {
                push(@unprotected,$page);
        
        }
}

sub send_mail {
        my $email_text;
        $email_text.="This is an automated report of unprotected templates and images I detected during my scan of the Main Page. This list also includes tomorrow's templates, but not tomorrow's images.\n\n Unprotected templates/images I found were:\n\n";
        foreach (@unprotected) { #Write out the unprotected templates
                $email_text.="[[$_]] -- http://en.wikipedia.org/wiki/$_\n";
        }
        $email_text.="\nThese templates/images should be protected immediately.";
        foreach (@mailing_list) { #Perform for each user subscribed
                $editor->{mech}->get("http://en.wikipedia.org/wiki/Special:Emailuser/$_");
                #Fill out variables
                $editor->{mech}->field('wpSubject','Shadowbot2 report of unprotected templates');
                $editor->{mech}->field('wpText',$email_text);
                $editor->{mech}->click('wpSend'); #Send the email
        }
}

sub load_template_list {

    my $main_page_wikitext=$editor->get_text("Main Page");
    my $variable_page_raw=$editor->{mech}->get("http://en.wikipedia.org/wiki/User:Shadowbot2/Variables")->content;
    #Guess.
        $main_page_wikitext=~s/<!--.+?-->//gs;
        $main_page_wikitext=~s/<noinclude>.+?<\/noinclude>//gs;
        #Grab the current MediaWiki variable values and mark them as global
    if($variable_page_raw=~m/CURRENTMONTHNAME=(\w+)/) {$CURRENTMONTHNAME=$1;}
    if($variable_page_raw=~m/CURRENTDAY=(\d+)/) {$CURRENTDAY=$1;}
    if($variable_page_raw=~m/CURRENTYEAR=(\d+)/) {$CURRENTYEAR=$1;}
    if($variable_page_raw=~m/CURRENTMONTH=(\d+)/) {$CURRENTMONTH=$1;}
    if($variable_page_raw=~m/CURRENTDAY2=(\d+)/) {$CURRENTDAY2=$1;}
    #Do the replace
    $main_page_wikitext=do_replace($main_page_wikitext);
    my @temp_list=$main_page_wikitext=~m/\{\{(.+?)\}\}/g; #Match text inside of curly brackets
        foreach $link (@temp_list) {
                if ($link !~ m/^[A-Z]+$/) { #Make sure it's not a variable we missed (all uppercase)
                        if ($link !~ m/Wikipedia:/) {$link="Template:".$link;} #Make sure it has a namespace
                        push (@transclusion_list,$link);
                }
    }
    #Take care of other templates that would be difficult to regex for
        push(@transclusion_list,"Template:TFAfooter");
        push(@transclusion_list,"Template:SelAnnivFooter");
        push(@transclusion_list,"Template:*mp");
        push(@transclusion_list,"Template:Click");

}

sub do_replace {
        #Self-explanatory
    my $text=shift;

    $text=~s/\{\{CURRENTDAY\}\}/$CURRENTDAY/g;
    $text=~s/\{\{CURRENTYEAR\}\}/$CURRENTYEAR/g;
    $text=~s/\{\{CURRENTMONTH\}\}/$CURRENTMONTH/g;
    $text=~s/\{\{CURRENTDAY2\}\}/$CURRENTDAY2/g;
    $text=~s/\{\{CURRENTMONTHNAME\}\}/$CURRENTMONTHNAME/g;

    return $text;

}

sub do_date_increment {
        #Set up hashes for date conversions
        my %months = ( January      => 1,
                       February  => 2,
                       March     => 3,
                       April     => 4,
                       May       => 5,
                       June      => 6,
                       July      => 7,
                       August    => 8,
                       September => 9,
                       October   => 10,
                       November  => 11,
                       December  => 12
        );
        my %number_to_month = (    1 => 'January',
                                2 => 'February',
                                3 => 'March',
                                4 => 'April',
                                5 => 'May',
                                6 => 'June',
                                7 => 'July',
                                8 => 'August',
                                9 => 'September',
                               10 => 'October',
                               11 => 'November',
                               12 => 'December'
        );

        my %months_less_than_31 = ( April     => 30,
                                    June      => 30,
                                    September => 30,
                                    November  => 30,
                                    February  => 28
        );
        my $date  = shift;
        #Split on a space, set the year to 2007 if it wasn't defined
        my $month = (split(/ /,$date))[0];
        my $day   = (split(/ /,$date))[1];
        my $year  = (split(/ /,$date))[2];
        if ($year eq '') { $year=2007;}
        if (!(exists($months_less_than_31{$month})) && $day<31) {
                $day++;
                return ($month,$day,$year);     
        }
        elsif (exists(($months_less_than_31{$month})) && $day<($months_less_than_31{$month})) {
                $day++;
                return ($month,$day,$year);
        }
        elsif (!(exists($months_less_than_31{$month})) && $day==31) {
                $day=1;
                my $temp;
                if ($month eq 'December') {$temp=1; $year=2007;}
                else {
                        $temp=$months{$month};
                        $temp++;
                }
                $month=$number_to_month{$temp};
                return ($month,$day,$year);
        }
        elsif (exists(($months_less_than_31{$month})) && $day==($months_less_than_31{$month})) {
                $day=1;
                my $temp;
                if ($month eq 'December') {$temp=1; $year=2007;}
                else {
                        $temp=$months{$month};
                        $temp++;
                }
                $month=$number_to_month{$temp};
                return ($month,$day,$year);
        }
}