User:Bikabot/Source/pass1.pl

From Wikipedia, the free encyclopedia

#!/usr/bin/perl -w -C
use strict;
use encoding "utf8";
use Perlwikipedia;
use Encode;

my $user = 'Bikabot';
my $pass = '***';
my $editor = Perlwikipedia->new($user);
$editor->{debug} = 1;
$editor->login($user,$pass);

my $template = "Template:Infobox animanga/Header";
my @articles = $editor->what_links_here($template);

my $count;
my $countmax;
my $counttotal = scalar(@articles);

$| = 1;
my $test = 0; # whether the current run is a dry run
$countmax = $test? 75 : $counttotal;

my $trial = 1; # whether the current run is a trial run
my $edits;
my $editsmax;
$editsmax = $trial? 100 : $counttotal;

my $waittime = 10;
my $logpage = "User:Bikabot/Pass1" .
   ($test? "Test" : ($trial? "Trial" : "")) . "Log";

my %success;
my %notag;
my $report_several_headers;
my $report_several_tags;
my $report_unparsable_tag;
my $report_unparsable_mangabox;
my $report_correct;
my $report_footer;

sub commit_delay {
    my $wait = $waittime;
    print "Waiting $wait seconds";
    while( $wait-- ) { sleep 1; print "."; }
    print "\n";
}

sub commit_text {
    my $title = shift;
    my $text = encode_utf8( shift );

    if( $test ) {
        return if rand(100) > 3;

        $title = "User:Bikabot/$title";
        $report_footer = "== Results committed to userspace ==\n"
            unless $report_footer;
        $report_footer .= "* [[$title]].\n";
    }
    
    #$title = "User:Bikabot/$title" if $trial; #FIXME

    my $r = $editor->edit( $title, $text,
        "Bikabot Pass 1: fixing demographic tags. " .
        "See [[User:Bikabot]] for details." );
    print "Committing to $title. ";
    print $r->status_line;
    print "\n\n";

    $edits++;
    commit_delay();
} 

sub commit_log {
    my @keys;
    my $report_success;
    my $report_notag;
    
    @keys = sort {$a <=> $b} keys %success;
    foreach my $c ( @keys ) {
        $report_success .=
            "\n=== Demographic tag copied to $c manga box(es) ===\n";
        $report_success .= join '', @{ $success{$c} };
    }
    
    @keys = sort {$a <=> $b} keys %notag;
    foreach my $c ( @keys ) {
        $report_notag .=
            "\n=== Empty demographic tag added to $c manga box(es) ===\n";
        $report_notag .= join '', @{ $notag{$c} };
    }

    my $dry = $test? ' (dry run)' : ($trial? ' (trial run)' : '');
    my $report = <<"EOF";
'''Bikabot pass 1 report'''$dry.

Total number of relevant pages: $counttotal pages.<br />
Done: $count/$countmax pages and $edits edits ($editsmax allowed).

== Success ==
$report_success
== No action required ==
$report_notag
=== Already in correct format ===
$report_correct
== Attention required ==
=== Pages with more than one animanga header  ===
$report_several_headers
=== Pages with more than one demographic tags ===
$report_several_tags
=== Pages with possibly unparsable demographic tag ===
$report_unparsable_tag
=== Pages with unparsable mangabox ===
$report_unparsable_mangabox
$report_footer
EOF

    $report = encode_utf8( $report );
    my $r = $editor->edit( $logpage, $report,
        "[[User:Bikabot|Bikabot]] pass 1 log$dry." );
    print "Logging. ";
    print $r->status_line;
    print "\n\n";
}

sub change_mangaboxes {
    my $textref = shift;
    my $line = shift;
    my $logentries = shift;
    my $logline = shift;

    if( $$textref =~ s/
        ^({{Infobox\ animanga\/Manga.*?)
        (\|\s*serialized)
        /<!--mangaboxdone-->$1$line$2/msxg
    ) {
        my $mangaboxes;
        while( $$textref =~ m/<!--mangaboxdone-->/g ) { $mangaboxes++ }
        push @{ $logentries->{$mangaboxes} }, $logline;
        $$textref =~ s/<!--mangaboxdone-->//sg;
        return $mangaboxes;
    }
    else {
        if( $$textref =~ m/{{Infobox animanga\/Manga/ ) {
            $report_unparsable_mangabox .= $logline;
            return -1;
        }
        else {
            push @{ $logentries->{'no'} }, $logline;
            return 0;
        }
    }
}

for my $hash (@articles) {
    last if ++$count > $countmax;
    last if $edits >= $editsmax;

    my %hash = %{$hash};
    next unless ( $hash{'type'} eq 'transclusion' );

    my $commit = 0;
    my $title = $hash{'title'};
    my $text = $editor->get_text($title);

    my $reportline = "* [[$title]].\n";

    my $infoboxes = 0;
    while( $text =~ m/{{Infobox animanga\/Header/g ) { $infoboxes++; }
    if( $infoboxes > 1 ) {
        $report_several_headers .= $reportline;
    }
    elsif( $text =~ s/
        ({{Infobox\ animanga\/Header.*?)
        (\|\s*demographic\s*=\s*([^\n]+)\n\s*)
        (?=\|)/$1/sx
    ) {
        my $demographicline = $2;
        my $demographic = $3;
        
        $reportline .= ":''Demographics:'' $demographic.\n";

        if( ($demographic =~ tr/[//) > 2 ) {
            $report_several_tags .= $reportline;
        }
        elsif( $1 =~ m/{{Infobox animanga\/Manga/s ) {
            $report_correct .= $reportline;
        }
        else {
            $commit = change_mangaboxes( \$text, $demographicline,
                \%success, $reportline) >= 0;
        }
    }
    elsif( $text =~ m/demographic\s*=/ ) {
        $report_unparsable_tag .= $reportline;
    }
    else {
        $commit = change_mangaboxes( \$text, "| demographic = \n",
            \%notag, $reportline) > 0;
    }

    print $reportline;
    commit_text( $title, $text ) if $commit;
    commit_log() unless ( $count%50 );
}

$count--;
commit_log();