User:XLinkBot/Code/Diffparser.pm

From Wikipedia, the free encyclopedia

package DiffParser;

use LWP::UserAgent;

my $diffFetcher=LWP::UserAgent->new;
$diffFetcher->agent("LinkWatcher/2.0");

sub parse {
    my $diffUrl=shift;
    $diffUrl.="&diffonly=1&action=render";
    my $diffContent=$diffFetcher->get($diffUrl)->content;
    my (@added,@removed,@addedPre,@removedPre);

    @addedPre=$diffContent=~m/<td class="diff-addedline"><div>(.*?)<\/div><\/td>/sg;
        @removedPre=$diffContent=~m/<td class="diff-deletedline"><div>(.*?)<\/div><\/td>/sg;    

    my ($addedTotal,$removedTotal);
    $addedTotal=join(' ', @addedPre);
    $removedTotal=join(' ', @removedPre);

    $addedTotal =~ s/<span class="diffchange">.+?<\/span>//g;
    $removedTotal   =~ s/<span class="diffchange">.+?<\/span>//g;

    foreach $diff (@addedPre) {
        my $diffchange=0;
        foreach my $addition ($diff=~m/<span class="diffchange">(.*?)<\/span>/g) {
            push (@added,$addition);
            $diffchange=1;
        }
        if (!$diffchange) {
            if ($removedTotal !~ m/\Q$diff\E/) {
                push(@added,$diff);
            }
        }
    }

    foreach $diff (@removedPre) {
        my $diffchange=0;
        foreach my $removal ($diff=~m/<span class="diffchange">(.*?)<\/span>/g) {
            push (@removed,$removal);
            $diffchange=1;
        }
        if (!$diffchange) {
            if ($addedTotal !~ m/\Q$diff\E/) {
                push(@removed,$diff);
            }
        }
    }
    return (@added);
}