User:OrphanBot/orphanbot.pl
From Wikipedia, the free encyclopedia
The source code for OrphanBot's image-removal task. Requires libBot.pl and libPearle2.pl.
#!/usr/bin/perl # OrphanBot # # A bot to remove images from pages in preparation for deletion use strict; use warnings; use Date::Calc qw(Delta_Days Decode_Month Month_to_Text Today); use Getopt::Long; require "libBot.pl"; my $permit_interruptions = 0; # Allow talkpage messages to stop the bot? my $last_image = undef; my @last_images; my $task = ""; # One of "source", "copyright", "unsure", "special", "fairuse", "disputed" my %users_notified; # List of users notifed. 0, undef = no; 1 = notified once; 2 = notified and second notice my %notifications; # List of user,image pairs, used to ensure that no user is ever notified about an image twice. my %dont_notify = (); # List of users to never notify my %image_whitelist = (); # Images to never remove my ($remove_type, $removal_comment, $removal_prefix, $template_match, $uploader_warning, $uploader_warning_summary, $write_remove_log, $limit_by_date, $test_only); # Params for changing tasks GetOptions('task=s' => \$task); # Generate a signature sub sig { return " ~~~~~"; } # No-error-checking removal routine, for special requests sub RemoveImageSpecial { my $image = shift; my $page = shift; my $image_regex = shift; my $removal_prefix = shift; my $removal_comment = shift; my ($text, $editTime, $startTime, $token); my ($match1, $match2) = (0,0); my $old_length; my $new_length; my $change_len; my $match_len = 0; # Fetch an article page ($text, $editTime, $startTime, $token) = Pearle::getPage($page); $old_length = length($text); if($text =~ /#redirect/i) { Pearle::myLog("Redirect found for page [[$page]] (image [[:$image]])\n"); userwarnlog("*Redirect found for page [[$page]] (image [[:$image]])\n"); return 0; } # Remove the image my $regex3 = "(\\[\\[${image_regex}.*?(\\[\\[.*?\\]\\].*?|)+\\]\\][ \\t]*)"; # Regex to match images Pearle::myLog("Regex 3: $regex3\n"); notelog("Regex 3: $regex3\n"); if($text =~ /$regex3/) { $match_len = length($1); if(defined($removal_prefix)) { $match2 = $text =~ s/$regex3/<!-- $removal_prefix $1 -->/g; } else { $match2 = $text =~ s/$regex3//g; } if($match2) { if($match_len < (4 + length($image))) { notelog("*Short replacement of $match_len bytes in [[$page]]\n"); Pearle::myLog("Short replacement of $match_len bytes (min " . (length($image) + 4) . ") in [[$page]] ($match2 matches). Exiting.\n"); Pearle::myLog("Text:\n$text\n"); exit; } if($match2 > 100) { Pearle::myLog("Too many matches ($match2) in page [[$page]]. Skipping.\n"); notelog("Too many matches ($match2) in page [[$page]]. Skipping.\n"); exit; } if($text =~ /-->\]/) { Pearle::myLog("Possible bracket mixup in page [[$page]]\n"); userwarnlog(FixupLinks("*Possible bracket mixup in page [[$page]]\n")); } } } $new_length = length($text); print "Num: $match2 Len: $match_len\n"; if($test_only) { notelog("Special removal for page\n"); } else { Pearle::postPage($page, $editTime, $startTime, $token, $text, $removal_comment, "no"); } return ($match2) } %notifications = loadNotificationList("./orphanbot.note"); %dont_notify = loadNotificationList("./orphanbot.whitelist"); %image_whitelist = loadNotificationList("./orphanbot.imagewhitelist"); Pearle::init(<< INSERT BOT NAME HERE >>, << INSERT BOT PASSWORD HERE >>, "./orphanbot.log","./cookies.pearle.txt"); Pearle::config(nullOK => 1); config(username => << INSERT BOT NAME HERE >>); if(!Pearle::login()) { exit; } my $last_run = 0; my @stbot_images; #while(1) { $last_image =~ s/^[Ii]mage:// if(defined($last_image)); # Remove any prepended namespacing my @images; my $image; my $edited = 0; my $images_removed = 0; my $nolimits = 0; # my ($remove_type, $removal_comment, $removal_prefix, $template_match, $uploader_warning, $uploader_warning_summary, $write_remove_log, $limit_by_date); # Params for changing tasks @images = (); userwarnlog("=== Beginning set at " . time() . " for task '$task' ===\n"); { if($task eq "source") { my $cat = "Category:All images with unknown source"; @images = Pearle::getCategoryImages($cat); $remove_type = 'normal'; $removal_comment = "Removing image with no source information. Such images that are older than seven days may be deleted at any time."; $removal_prefix = "Unsourced image removed:"; $template_match = "Unless the copyright status is provided|Unless this information is added to this page"; $uploader_warning = "{{subst:User:OrphanBot/nosource|"; $uploader_warning_summary = "You've uploaded an unsourced image"; $write_remove_log = 1; $limit_by_date = 1; $nolimits = 0; $test_only = 0; } elsif($task eq "copyright") { my $cat = "Category:All images with unknown copyright status"; @images = Pearle::getCategoryImages($cat); $remove_type = 'normal'; $removal_comment = "Removing image with no copyright information. Such images that are older than seven days may be deleted at any time."; $removal_prefix = "Image with unknown copyright status removed:"; $template_match = "Unless the copyright status is provided|Unless this information is added to this page|This image was uploaded under good faith using the above tag"; $uploader_warning = "{{subst:User:OrphanBot/nocopyright|"; $uploader_warning_summary = "You've uploaded an image with unknown copyright"; $write_remove_log = 1; $limit_by_date = 1; $nolimits = 0; $test_only = 0; } elsif($task eq 'replaceable') { my $cat = "Category:All replaceable fair use images"; @images = Pearle::getCategoryImages($cat); $remove_type = 'normal'; $removal_comment = "Removing replaceable fair-use image."; $removal_prefix = "Replaceable fair-use image removed:"; $template_match = "for which a free image might reasonably be found"; $uploader_warning = undef; $uploader_warning_summary = undef; $write_remove_log = 1; $limit_by_date = 1; $nolimits = 0; $test_only = 0; } elsif($task eq 'special') { # Special requests @images = Pearle::getLogArticles("upload", 408, 10, "Johnsatchmo"); @images = map {$_->[0]} @images; notelog("Found " . scalar(@images) . " images\n"); $remove_type = 'normal'; # Use the standard removal system; $removal_comment = "Removing image by request; see [[User talk:Carnildo#Bot help.3F]]" ; $removal_prefix = "Image with questionable copyright removed:"; $template_match = undef; $uploader_warning = undef; $uploader_warning_summary = undef; $write_remove_log = 1; $limit_by_date = 0; $nolimits = 0; $test_only = 0; } else { notelog("Unknown task: $task\n"); exit; } } if(scalar(@images) == 0) { print "Finished with category.\n"; Pearle::myLog("Finished with category.\n"); exit; } image: foreach $image (@images) { my $image_url; my $image_regex = $image; my $page; my @pages = (); my $page_remove_log; my ($day, $month, $year); # Fetch an image page my $query = "http://en.wikipedia.org/wiki/$image"; my $image_text = Pearle::getURL(Pearle::escapeUrl($query)); my $full_comment = ""; $page_remove_log = ''; $last_image = $image; if($permit_interruptions and DoIHaveMessages($image_text)) { print "Talkpage message found; exiting on image $image.\n"; Pearle::myLog("Talkpage message found; exiting on image $image.\n"); exit; } if($image_whitelist{$image}) { userwarnlog("*Image $image on whitelist\n"); next; } # Images from Commons if($image_text =~ /Wikimedia Commons<\/a>. The description on its /) { userwarnlog("*Commons image [[:$image]] found\n"); next; } # The odd case of an image description page without an image if($image_text =~ /<p>No file by this name exists; you can <a href=/ and $image_text =~ /$template_match/) { userwarnlog("*Image [[:$image]] does not appear to exist.\n"); my @historylist = Pearle::parseHistory($image); my $first_entry = pop @historylist; if($first_entry->[4] eq 'STBotI') { push @stbot_images, $image; } next; } # Check for image existance if($image_text =~ /<p>No file by this name exists; you can <a href=/) { Pearle::myLog("Image [[:$image]] has been deleted.\n"); notelog("Image [[:$image]] has been deleted.\n"); next; } # Check for image copyright tag if(defined($template_match) and ($image_text !~ /$template_match/)) { userwarnlog("*Image [[:$image]] in category does not have an appropriate template\n"); next; } if($task eq 'source') { if($image_text =~ /I, the creator of this work/) { next image; } if($image_text =~ /title="Category:[^"]*[Ll]ogos"|title="Category:[^"]*[Cc]overs"/) { Pearle::myLog("*Image [[:$image]] with self-sourcing template found\n"); next image; } } if($task eq 'replaceable') { if($image_text =~ /It is disputed whether or not this image violates/) { Pearle::myLog("*Disputed replaceable fair-use image [[:$image]] found\n"); next image; } } my ($raw_image) = $image =~ /Image:(.*)/; $raw_image = MakeWikiRegex($raw_image); if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i) { $image_regex = "[ _]*(:?[Ii]mage|[Mm]edia)[ _]*:[ _]*${raw_image}[ _]*"; } else { $image_regex = "[ _]*[Ii]mage[ _]*:[ _]*${raw_image}[ _]*"; } # Sanity check if(!defined($raw_image) or $image !~ /$raw_image/) { Pearle::myLog("Parse error on image [[:$image]] ($raw_image)\n"); userwarnlog("*Parse error on image [[:$image]] ($raw_image)\n"); next; } Pearle::myLog("Image regex: $image_regex\n"); notelog("Image regex: $image_regex\n"); ($day, $month, $year) = getDate($image_text); # Notify the user my $uploader = getUploader($image_text); my $is_notified = 0; if(defined($uploader_warning) and defined($uploader)) { $is_notified = isNotified($image_text, $uploader, $image_regex, $image, \%notifications, \%dont_notify); } if(defined($uploader_warning) and 1 != $is_notified) { if(defined($uploader)) { if(!($users_notified{$uploader})) { Pearle::myLog("Warning user $uploader\n"); userwarnlog("${uploader_warning}${image}}}" . sig() . "\n", $uploader, $uploader_warning_summary, $is_notified); $notifications{"$uploader,$image"} = 1; $users_notified{$uploader} = 1; } else { Pearle::myLog("User $uploader has already been warned repeatedly\n"); $users_notified{$uploader} += 1; } } else { Pearle::myLog("Could not determine uploader for [[:$image]]\n"); } } if(!Date::Calc::check_date($year, Decode_Month($month), $day)) { Pearle::myLog("Date error for image [[:$image]]\n"); userwarnlog("*Date error for image [[:$image]]\n"); } if((Date::Calc::check_date($year, Decode_Month($month), $day) and (Delta_Days($year, Decode_Month($month), $day, Today() ) >= 4)) or !($limit_by_date)) { # Ignore any old removal logs $image_text =~ s/<ol>.*?<\/ol>//gs; if($nolimits) { @pages = GetFullPageList($image, $image_text); } else { @pages = GetPageList($image, $image_text); } if(scalar(@pages) == 0) { notelog("Image $image may already be orphaned\n"); Pearle::myLog("Image $image may already be orphaned\n"); } if(scalar(@pages) > 3) { my $warningtext; $warningtext = "*Found image [[:$image]] on " . scalar(@pages) . " content pages\n"; userwarnlog($warningtext); } if(scalar(@pages) > 0) { $images_removed += 1; } foreach $page (@pages) { print "Page for removal: $page\n"; my $parsed_removal_comment = $removal_comment; $parsed_removal_comment =~ s/image/[[:$image|image]]/; if(defined($remove_type) and $remove_type eq 'special') { RemoveImageSpecial($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment); Pearle::limit(); } else { if(my $hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) # Don't limit if we just touched the article { $page_remove_log .= "#[[$page]]\n"; notelog("Removed image: $hits hits.\n"); Pearle::myLog("Removed image $image from article $page\n"); Pearle::limit(); } } $edited = 1; } } else { Pearle::myLog("Recent image: notification only\n"); notelog("Recent image: notification only\n"); } # Update image description page if($write_remove_log) { my $edited_idp = 0; # Log all removals on the image description page my ($text, $editTime, $startTime, $token); print "Will write\n"; ($text, $editTime, $startTime, $token) = Pearle::getPage($image); if($task eq "source") { if(!isDated($image_text)) { my ($cur_y, $cur_m, $cur_d) = Today(); $cur_m = Month_to_Text($cur_m); print "Changing date\n"; my $new_template = "{{no source|month=$cur_m|day=$cur_d|year=$cur_y}}"; if($text =~ /{{(?:[Nn]o source|[Nn]sn|[Nn]osource|[Uu]nverified|Di-no source).*?}}/) { # Build the substitution regex to replace the notify tag $text =~ s/{{(?:[Nn]o source|[Nn]sn|[Nn]osource|[Uu]nverified).*?}}/$new_template/; $full_comment .= "Changing nosource template format; "; } else { userwarnlog("*Template in [[:$image]] was probably subst'd\n"); Pearle::myLog("Template was probably subst'd\n"); $text .= "\n\n$new_template\n"; $full_comment .= "Adding nosource template. This may add a second template: the original was probably subst'd. "; } $edited_idp = 1; } } elsif($task eq "copyright") { if(!isDated($image_text)) { my ($cur_y, $cur_m, $cur_d) = Today(); $cur_m = Month_to_Text($cur_m); print "Changing date\n"; my $new_template = "{{no license|month=$cur_m|day=$cur_d|year=$cur_y}}"; if($text =~ /{{(?:[Nn]o licen[cs]e|[Uu]nknown|[Nn]olicen[cs]e).*?}}/) { # Build the substitution regex to replace the notify tag $text =~ s/{{(?:[Nn]o licen[cs]e|[Uu]nknown|[Nn]olicen[cs]e).*?}}/$new_template/; $full_comment .= "Changing nolicense template format; "; } else { userwarnlog("*Template in [[:$image]] was probably subst'd\n"); Pearle::myLog("Template was probably subst'd\n"); $text .= "\n\n$new_template\n"; $full_comment .= "Adding nolicense template. This may add a second template: the original was probably subst'd. "; } $edited_idp = 1; } } if($page_remove_log ne "") { $text .= "\n\nRemoved from the following pages:\n"; $text .= FixupLinks($page_remove_log); $text .= "--~~~~\n"; $full_comment .= "Listing pages that the image has been removed from"; $edited_idp = 1; print "Remove log\n"; } if($edited_idp) { if($test_only) { notelog("Edited image description page\n"); } else { Pearle::postPage($image, $editTime, $startTime, $token, $text, $full_comment, "no"); } } } if($edited) { print "Sleeping for 10 seconds\n"; sleep(10); } else { print "Sleeping for two seconds\n"; sleep(2); } $edited = 0; } notelog("Saving notification list\n"); saveNotificationList("./orphanbot.note", %notifications); Pearle::myLog("Finished with category.\n"); notelog("Finished with category.\n"); }