From Wikipedia, the free encyclopedia
#!/usr/bin/env perl
# Author: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
# Copyright: 2006, Ævar Arnfjörð Bjarmason
# License: The DWTFYWWI LICENSE, see http://tools.wikimedia.de/~avar/COPYING
use strict;
use warnings;
use LWP::UserAgent;
use HTML::TreeBuilder;
my $arg = shift;
&help unless defined $arg;
my $ua = LWP::UserAgent->new(
agent => ''
);
my $res = $ua->get($arg);
my %links = getlinks($res->content);
print "==Nonexisting==\n";
for my $i (@{$links{red}}) {
print "# [[$i]]\n"
}
print "==Existing==\n";
for my $i (@{$links{blue}}) {
print "# [[$i]]\n"
}
sub getlinks
{
my $tree = HTML::TreeBuilder->new_from_content(shift);
my %links = (
blue => [],
red => []
);
my $yummie = $tree->look_down(
'_tag' => 'div',
id => 'bodyContent'
);
my @blue = $yummie->look_down(
'_tag' => 'a',
sub {
no warnings;
$_[0]->attr('class') ne 'new'
},
sub {
no warnings;
$_[0]->attr('class') !~ / (?: external | extiw ) /x
}
);
my @red = $yummie->look_down(
'_tag' => 'a',
class => 'new'
);
@{$links{blue}} = booya(\@blue);
@{$links{red}} = booya(\@red);
%links;
}
sub booya
{
my $links = shift;
my @ret;
for my $i (@$links) {
my $j = $i->attr('title');
next unless defined $j;
chomp $j;
push @ret, $j unless $j eq ''
}
@ret
}
sub help
{
print <<HELP;
usage: $0 url
HELP
exit 1
}