Wikipedia:WikiProject Chemistry/Structure drawing workgroup/Mysid's script

From Wikipedia, the free encyclopedia

#!/usr/bin/perl
#Modify svg file from BKchem to work with librsvg, for use on Wikimedia.
use Image::Magick;
 
@ARGV == 1 or die "Usage: $0 FILENAME.svg\n";
 
#Open files
$infile = $ARGV[0];
open INPUT, '<', $infile or die "Could not read $infile: $!\n";
$outfile1 = $infile;
$outfile1 =~ s/.svg/1.svg/;
open OUTPUT, '>', $outfile1 or die "Could not write $outfile1: $!\n";
 
while (<INPUT>) {
    #I must skip over "defs" blocks, because they have objects with relative
    #dimensions.  TODO:This is nasty code. I'm sure there's a better way to do
    #this.
    if (/defs/) {
      print OUTPUT $_;
      my $line  = $_;
      while (!($line =~ /\/defs/)){
        $line  = <INPUT>;
        print OUTPUT $line;
      }
      next;
    }
    #Sans is the most general font definition we can use, and librsvg chokes
    #on Helvetica.
    s/helvetica/Sans/gi;
    if (!/viewBox/ && /\d/) {
      #Round the numbers.
      s/(\d+\.\d+)/sprintf("%.1f", $&)/ge unless(/version/);
      #and fix font sizes for super- and subscripts
      s/75\%/9pt/g;
      }
    # Replace baseline-shift="super" with a numeric baseline-shift
    if (/y="([\d\.]+)">.*<tspan baseline-shift="super"/) {
        $vy = $1;
        $oy = $1-4;
        s/baseline-shift="super"/y="$oy"/g;
        s/<\/tspan>([^<]+)</<\/tspan><tspan y="$vy">$1<\/tspan></g;
    }
    # Replace baseline-shift="sub" with a numeric baseline-shift
    if (/y="([\d\.]+)">.*<tspan baseline-shift="sub"/) {
        $vy = $1;
        $oy = $1+3.25;
        s/baseline-shift="sub"/y="$oy"/g;
        s/<\/tspan>([^<]+)</<\/tspan><tspan y="$vy">$1<\/tspan></g;
    }
    #write each line out after mangling it.
    print OUTPUT $_;
}
#close the files (done with the original input file, output file becomes
#input for next pass.
close OUTPUT;
close INPUT;
 
 
open INPUT, '<', $outfile1 or die "Could not read $outfile1: $!\n";
$outfile2 = $infile;
$outfile2 =~ s/.svg/2.svg/;
open OUTPUT, '>', $outfile2 or die "Could not write $outfile2: $!\n";
while (<INPUT>) {
    #Take out redundant groups.
    if (/<\/g>/) {
      my $line  = $_;
      $nextline = <INPUT>;
      if (!($nextline =~ /<g stroke="\#000000" stroke-width="[\d.]+">\s/ )) {
        print OUTPUT $line;
        print OUTPUT $nextline;
      }
    } else {
    print OUTPUT $_;}
}
#clean up
close OUTPUT;
close INPUT;
unlink("$outfile1");
print "$outfile2 written\n";