Wikipedia:WikiProject Missing encyclopedic articles/Mw links

From Wikipedia, the free encyclopedia

#!/usr/bin/env perl
# Author: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
# Copyright: 2006, Ævar Arnfjörð Bjarmason
# License: The DWTFYWWI LICENSE, see http://tools.wikimedia.de/~avar/COPYING

use strict;
use warnings;

use LWP::UserAgent;
use HTML::TreeBuilder;

my $arg = shift;

&help unless defined $arg;

my $ua = LWP::UserAgent->new(
        agent => ''
);

my $res = $ua->get($arg);

my %links = getlinks($res->content);

print "==Nonexisting==\n";
for my $i (@{$links{red}}) {
        print "# [[$i]]\n"
}

print "==Existing==\n";
for my $i (@{$links{blue}}) {
        print "# [[$i]]\n"
}

sub getlinks
{
        my $tree = HTML::TreeBuilder->new_from_content(shift);
        
        my %links = (
                blue => [],
                red => []
        );

        my $yummie = $tree->look_down(
                '_tag' => 'div',
                id => 'bodyContent'
        );

        my @blue = $yummie->look_down(
                '_tag' => 'a',
                sub {
                        no warnings;
                        $_[0]->attr('class') ne 'new'
                },
                sub {
                        no warnings;
                        $_[0]->attr('class') !~ / (?: external | extiw ) /x
                }
        );

        my @red = $yummie->look_down(
                '_tag' => 'a',
                class => 'new'
        );

        @{$links{blue}} = booya(\@blue);
        @{$links{red}} = booya(\@red);
        
        %links;
}

sub booya
{
        my $links = shift;
        my @ret;

        for my $i (@$links) {
                my $j = $i->attr('title');
                next unless defined $j;
                chomp $j;
                push @ret, $j unless $j eq ''
        }
        
        @ret
}

sub help
{
print <<HELP;
usage: $0 url
HELP
exit 1
}