From Wikipedia, the free encyclopedia
#!/usr/local/bin/perl -w
use strict;
use LWP;
if (@ARGV < 2) {
print "Usage:\n";
print "$0 <wiki source file> <url>\n";
exit(1);
}
my $ua = LWP::UserAgent->new(
agent => ''
);
# First get the wiki source
my $srcfile = shift;
# Then get the rendered page to establish whether a link is blue or red
my $url = shift;
my $resp = $ua->get($url) or die "Can't get page '$url'";
my $html = $resp->content;
# Extract the relevant links, taking title and text
my %links = ();
while ($html =~ m!<a href="[^"]+" (class="new" )?title="([^"]+)">(.*?)</a>!gsi) { #"
unless ($1) { $links{$2}= $3 }
}
# Now check the source against the text
# Only pull out the first link on a line beginning with ":"
# Any subsequent lines which appear "under" this line are included with it
# This means everything up to either a blank line or a line beginning with a lone '#'
my $good = "";
my $bad = "";
my $lst = 0;
open IN, "$srcfile" or die "Can't open $srcfile";
while (<IN>) {
if (/^#[^:#*;]/) {
my ($lt) = /\[\[(.*?)\]\]/;
unless ($lt) { $lst = 0; next }
my ($link, $text);
if ($lt =~ /\|/) { ($link, $text) = $lt =~ /([^|]+)\|(.+)/ }
else { $link = $text = $lt }
$lst = $links{$link} ? \$good : \$bad;
$$lst.=$_;
} elsif (/^\s*$/) {
$lst = 0;
} elsif ($lst) {
$$lst.=$_;
}
}
close IN;
print "Red links:\n";
print "$bad\n";
print "Blue links:\n";
print "$good\n";