User:Polbot/source/Make rds from altnames

From Wikipedia, the free encyclopedia

use strict;
use Perlwikipedia;
use URI::Escape;

my $soonest_next_op = time;

my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');

print "\nStarting polbot, logging in.\n" ;
my $login_status=$pw->login('bot name','bot password');
die "I can't log in." unless ($login_status eq 0);
my @lines;

print "Reading needingdab file\n";
my %needingdab_names = ();
my $needingdab_list = $pw->get_text('User:Polbot/altnames/needingdab');
@lines = split(/\n/, $needingdab_list);
foreach my $line (@lines) {
        if ($line =~ m/^\* \[\[([^]]*)\]\].*$/) {
                $needingdab_names{$1} = $line;
        }
}

print "Reading in-process file\n";
my %inprocess_names = ();
my $inprocess_list = $pw->get_text('User:Polbot/altnames/inprocess');
@lines = split(/\n/, $inprocess_list);

foreach my $line (@lines) {
        if ($line =~ m/^\* \[\[([^]]*)\]\]\|([^*]*)\*(.*)$/) {
                my $main_name = $1;
                my $altnames = $2;
                my $jobtitle = $3;
                                
                # Escape wikichars for the main (article) name
                my $enc_main_name = $main_name;
                $enc_main_name =~ s/ /[ _]/g;
                $enc_main_name =~ s/\(/\\\(/g;
                $enc_main_name =~ s/\)/\\\)/g;
                $enc_main_name =~ s/\./\\\./g;
                
                print "\nReading $main_name ($jobtitle)\n";
                my @altnames = split(/\|/, $altnames);
                
                foreach my $altname (@altnames) {
                        my $link_descr = $altname;
                        $altname =~ s/\[\[(.*)\]\]/$1/;
                        my $final_article_name = $altname;
                        
                        # Read the altname article. 
                        my $wiki = wikiread($altname, $pw);
                        
                        if ($wiki =~ /\S/) {
                                # The article exists.

                                # Okay, here are The Rules:
                                # 1. If it mentions $main_name, then DELETE. End.
                                # 2. If it's a redirect, then follow it, and check #1 again.
                                # 3. If there's a link to [[anything (disambiguation)]], then follow it, and check #1 again.
                                # 4. We won't be deleting at this point. But if it's a dab, then note that. 
                                
                                # These rules will fail in the following situations:
                                # A. $altname is an article that dabs to [[x]], a dab page that mentions $main_name, but that isn't [[x (disambiguation)]]
                                # B. $altname is a dab that links to a rd to $main_name.

                                # 1. Does it mention the main article (perhaps even rd-ing there)?
                                if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
                                        $altname = "DELETE";
                                } 
                                
                                if ($altname ne "DELETE") {
                                        # 2. Is it a rd? If so, follow.
                                        if ($wiki =~ /\#\s*redirect\s*\[\[(.*)\]\]/i) {
                                                $final_article_name = $1;
                                                $link_descr .= " r [[$final_article_name]]";
                                                $wiki = wikiread($final_article_name, $pw);
                                                
                                                # 1. again.
                                                if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
                                                        $altname = "DELETE";
                                                } 
                                        }
                                }
                                
                                if ($altname ne "DELETE") {
                                        # 3. Does it link to a dab page? If so, follow.
                                        if (($wiki =~ m/\[\[([^]]* \(disambiguation\))\]\]/) 
                                        or ($wiki =~ m/{{\s*(?:otherpersons2|otherpeople2)\s*\|\s*(.*?)\s*}}/)) {
                                                $final_article_name = $1;
                                                $link_descr .= " f [[$final_article_name]]";
                                                $wiki = wikiread($final_article_name, $pw);
                                                
                                                # 1. again.
                                                if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
                                                        $altname = "DELETE";
                                                } 
                                        } elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*}}/) {
                                                $final_article_name .= " (disambiguation)";
                                                $link_descr .= " f [[$final_article_name]]";
                                                $wiki = wikiread($final_article_name, $pw);
                                                
                                                # 1. again.
                                                if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
                                                        $altname = "DELETE";
                                                }
                                        } elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*\|\s*(.*?)\s*}}/) {
                                                $final_article_name = "$1 (disambiguation)";
                                                $link_descr .= " f [[$final_article_name]]";
                                                $wiki = wikiread($final_article_name, $pw);
                                                
                                                # 1. again.
                                                if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
                                                        $altname = "DELETE";
                                                }
                                        }                                                       
                                }
                                
                                if ($wiki =~ m/({{dab|{{disambig|{{disamb|{{disambiguation|{{hndis|{{namedab|{{bio-dab|{{hndisambig)(}}|\|)/i) {
                                        $link_descr .= " d";
                                }
                                
                                if ($altname ne "DELETE") {
                                        $altname = $link_descr;
                                }
                        } else {        
                                # No article exists. Make a rd!
                                $|=1;
                                print "Waiting " . ($soonest_next_op - time) . " secs... ";
                                $|=1;
                                while (time < $soonest_next_op) {};                          
                                $soonest_next_op = time + 9;
                                print "rd [[$altname]] to [[$main_name]]\n";
                                $pw->edit($altname, "#Redirect [[$main_name]]", "Redirecting to [[$main_name]], auto-generated by [[User:polbot]]");
                                $altname = "DELETE";
                        }
                }
                # Remove the elements that say "DELETE"
                @altnames = grep(!/^DELETE$/, @altnames);
                
                if (scalar(@altnames) == 0) {
                        # No altnames left. Do nothing.
                } else {
                        # Put it in inprocess
                        $inprocess_names{$main_name} = "* [[$main_name]]|" . join('|', @altnames) . "*$jobtitle";           
                }
        }
}

print "Merging old and new needingdab lists\n";
foreach my $inprocess_key (keys %inprocess_names) {
        $needingdab_names{$inprocess_key} = $inprocess_names{$inprocess_key} unless ($needingdab_names{$inprocess_key});
}

print "Writing needingdab list\n";
my $wiki_code = "";
foreach my $dab_key (sort keys %needingdab_names) {
        $wiki_code .= $needingdab_names{$dab_key} . "\n";
}
$pw->edit('User:Polbot/altnames/needingdab', $wiki_code, "Auto-updating based on input at inprocess list");

print "Finis!";

sub wikiread {
        my $article = shift;
        my $connection = shift;
        my $i = 0;
        my $wiki = '';
        
        $wiki = $connection->get_text($article);
        
        while ($wiki eq "0") { 
                $i++;
                if ($i > 5) {
                        return '';
                }
                
                sleep $i;
                print "   retry. . .\n";
                $wiki = $connection->get_text($article);
        }

        return $wiki;
}