User:Shadowbot2/Source
From Wikipedia, the free encyclopedia
This page contains a cleaned-up version of the Shadowbot2 source.
#!/usr/bin/perl use Perlwikipedia; use HTML::Entities; my $editor=Perlwikipedia->new; my $starting_time=time; #What time did we start? my $username='Shadowbot2'; my $password='**********'; $editor->login($username,$password); my (@protected,@unprotected); $editor->purge_page('Main Page'); #Make sure we have a clean copy of the Main Page my $mailing_list_text=$editor->get_text('User:Shadowbot2/Mailing list'); my @mailing_list=$mailing_list_text=~m/\* \[\[User:(.+?)\|.+?\]\]/g; #Parse for all users listed unless ($main_page_html=~m/Shadowbot2/i) { #Make sure we're still logged in $editor->login($username,$password); } my @tranclusion_list; load_template_list(); do_main_page_scan(); do_images_scan(); do_tomorrow_templates_scan(); my $sent_email=0; unless((scalar @unprotected)==0) { #Send email if needed and mark as such send_mail(); $sent_email=1; } my $ending_time=time; #Get various statistics about the run my $total_time=$ending_time-$starting_time; my $total_scanned=(scalar @protected)+(scalar @unprotected); print "I just scanned $total_scanned pages/images. I detected " . (scalar @protected) ." protected pages and " . (scalar @unprotected) . " unprotected pages. The job took $total_time seconds to run.\n\n"; if($sent_email) {print "I sent an alert email to " . (scalar @mailing_list) . " users.\n\n";} else {print "I did not send an alert email.\n\n";} print "Pages I scanned and their protection status:\n\n"; #Create the format for the top of the form format STDOUT_TOP= Name of page Result of scan -------------------------------------------------------------------------------------- . #Create the report format format STDOUT= @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<<<<<< $page, $result . foreach $page (@protected) { #Write out the protected pages $page="[[$page]]"; $result="Protected"; write; } foreach $page (@unprotected) { #Write out the unprotected pages $page="[[$page]]"; $result="Unprotected"; write; } sub do_main_page_scan { #Save us the hassle of finding the POTD subpage push (@transclusion_list,"Template:POTD protected/$CURRENTYEAR-$CURRENTMONTH-$CURRENTDAY2"); #If it's not a .js or .css page, scan it. foreach (@transclusion_list) { unless(/\.\w{2,3}/) {scan($_);} } } sub do_images_scan { my @images; foreach (@transclusion_list) { my $wikitext=$editor->get_text($_); #Get rid of unrendered stuff $wikitext=~s/<!--.+?-->//gs; $wikitext=~s/<noinclude>.+?<\/noinclude>//gs; #Handle <includeonly> text if ($wikitext=~m/<includeonly>\[\[(Image:.+)\|.+?\]\]<\/includeonly>/) { unless ($1=~m/\{\{.+?\}\}/) {push (@images,$1);} } #Grab the rest if ($wikitext=~m/\[\[(Image:.+?)\|.+?\]\]/) { my $image=$1; unless ($image=~m/\{\{.+?\}\}/) { push (@images,$image); } } } foreach (@images) {scan($_);} } sub do_tomorrow_templates_scan { foreach (@transclusion_list) { if (/(\w+) (\d{1,2})$/) { #Check for one date format my $month=$1; my $day=$2; ($month,$day)=do_date_increment("$month $day"); $_ =~ s/\w+ \d{1,2}/$month $day/; #Substitute the new format scan($_); } elsif (/(\w+) (\d{1,2}), \d{4}$/) { #The other format my $month=$1; my $day=$2; my $year=$3; ($month,$day,$year)=do_date_increment("$month $day $year"); $_ =~ s/\w+ \d{1,2}, \d{4}/$month $day, $year/; scan($_); } } } sub scan { my $page=shift; #Grab page and decode it my $edit_page_contents=$editor->{mech}->get("http://en.wikipedia.org/w/index.php?title=$page&action=edit")->content; decode_entities($edit_page_contents); #Remove unrendered content $edit_page_contents=~s/<!--.+?-->//gs; $edit_page_contents=~s/<noinclude>.+?<\/noinclude>//gs; #Replace MediaWiki variables with their real values $edit_page_contents=do_replace($edit_page_contents); #Check if the text is editable if ($edit_page_contents =~ m/<textarea .+? readonly='readonly'/) { push(@protected,$page); } else { push(@unprotected,$page); } } sub send_mail { my $email_text; $email_text.="This is an automated report of unprotected templates and images I detected during my scan of the Main Page. This list also includes tomorrow's templates, but not tomorrow's images.\n\n Unprotected templates/images I found were:\n\n"; foreach (@unprotected) { #Write out the unprotected templates $email_text.="[[$_]] -- http://en.wikipedia.org/wiki/$_\n"; } $email_text.="\nThese templates/images should be protected immediately."; foreach (@mailing_list) { #Perform for each user subscribed $editor->{mech}->get("http://en.wikipedia.org/wiki/Special:Emailuser/$_"); #Fill out variables $editor->{mech}->field('wpSubject','Shadowbot2 report of unprotected templates'); $editor->{mech}->field('wpText',$email_text); $editor->{mech}->click('wpSend'); #Send the email } } sub load_template_list { my $main_page_wikitext=$editor->get_text("Main Page"); my $variable_page_raw=$editor->{mech}->get("http://en.wikipedia.org/wiki/User:Shadowbot2/Variables")->content; #Guess. $main_page_wikitext=~s/<!--.+?-->//gs; $main_page_wikitext=~s/<noinclude>.+?<\/noinclude>//gs; #Grab the current MediaWiki variable values and mark them as global if($variable_page_raw=~m/CURRENTMONTHNAME=(\w+)/) {$CURRENTMONTHNAME=$1;} if($variable_page_raw=~m/CURRENTDAY=(\d+)/) {$CURRENTDAY=$1;} if($variable_page_raw=~m/CURRENTYEAR=(\d+)/) {$CURRENTYEAR=$1;} if($variable_page_raw=~m/CURRENTMONTH=(\d+)/) {$CURRENTMONTH=$1;} if($variable_page_raw=~m/CURRENTDAY2=(\d+)/) {$CURRENTDAY2=$1;} #Do the replace $main_page_wikitext=do_replace($main_page_wikitext); my @temp_list=$main_page_wikitext=~m/\{\{(.+?)\}\}/g; #Match text inside of curly brackets foreach $link (@temp_list) { if ($link !~ m/^[A-Z]+$/) { #Make sure it's not a variable we missed (all uppercase) if ($link !~ m/Wikipedia:/) {$link="Template:".$link;} #Make sure it has a namespace push (@transclusion_list,$link); } } #Take care of other templates that would be difficult to regex for push(@transclusion_list,"Template:TFAfooter"); push(@transclusion_list,"Template:SelAnnivFooter"); push(@transclusion_list,"Template:*mp"); push(@transclusion_list,"Template:Click"); } sub do_replace { #Self-explanatory my $text=shift; $text=~s/\{\{CURRENTDAY\}\}/$CURRENTDAY/g; $text=~s/\{\{CURRENTYEAR\}\}/$CURRENTYEAR/g; $text=~s/\{\{CURRENTMONTH\}\}/$CURRENTMONTH/g; $text=~s/\{\{CURRENTDAY2\}\}/$CURRENTDAY2/g; $text=~s/\{\{CURRENTMONTHNAME\}\}/$CURRENTMONTHNAME/g; return $text; } sub do_date_increment { #Set up hashes for date conversions my %months = ( January => 1, February => 2, March => 3, April => 4, May => 5, June => 6, July => 7, August => 8, September => 9, October => 10, November => 11, December => 12 ); my %number_to_month = ( 1 => 'January', 2 => 'February', 3 => 'March', 4 => 'April', 5 => 'May', 6 => 'June', 7 => 'July', 8 => 'August', 9 => 'September', 10 => 'October', 11 => 'November', 12 => 'December' ); my %months_less_than_31 = ( April => 30, June => 30, September => 30, November => 30, February => 28 ); my $date = shift; #Split on a space, set the year to 2007 if it wasn't defined my $month = (split(/ /,$date))[0]; my $day = (split(/ /,$date))[1]; my $year = (split(/ /,$date))[2]; if ($year eq '') { $year=2007;} if (!(exists($months_less_than_31{$month})) && $day<31) { $day++; return ($month,$day,$year); } elsif (exists(($months_less_than_31{$month})) && $day<($months_less_than_31{$month})) { $day++; return ($month,$day,$year); } elsif (!(exists($months_less_than_31{$month})) && $day==31) { $day=1; my $temp; if ($month eq 'December') {$temp=1; $year=2007;} else { $temp=$months{$month}; $temp++; } $month=$number_to_month{$temp}; return ($month,$day,$year); } elsif (exists(($months_less_than_31{$month})) && $day==($months_less_than_31{$month})) { $day=1; my $temp; if ($month eq 'December') {$temp=1; $year=2007;} else { $temp=$months{$month}; $temp++; } $month=$number_to_month{$temp}; return ($month,$day,$year); } }