#!/usr/bin/perl
# striprtf.pl, v1.0
# strips unneeded RTF tags (Winword 7+8) from RTF file to minimize filesize
# apparently hcrtf already strips unneeded fonts (according helpdeco)
# but you can easily figure out other optimizations, 
# styles /s[0-9]+ are ignored
#
# written and (c) 1999 by Reini Urban <rurban@xarch.tu-graz.ac.at>
# free to use, copy and modify. no warranties

sub strcase {
    return uc $_[0];
}

# strip all unneeded fonts from the header, in two passes
sub strip_fonts {
    undef $/;
    open (IN,  "< $base") or return;
    print "\nprocessing $base...";
    $_ = <IN>;
    print "\nfilesize: ", length($_);
    $_ or die "\nfile not found or empty";

    # search highest font id, only in header
    m/\}\{\\colortbl/;
    $fonttbl = $`; $rest = '}{\colortbl' . $';

    print "\nfonttbl size: ", length($fonttbl);
    print "\nrest size: ", length($rest);

    @fonts = ($fonttbl =~ m/\{\\f[0-9]{1,4}?[\\ ]/mg);
    @fonts = map { substr ($_,2,length($_)-3); } @fonts;

    #print "\nfonts: @fonts";
    print "\nfonts: @fonts[0]-@fonts[$#fonts]";
    # %used_fonts = map { $_, 1} @fonts;

    # first pass: => %used_fonts, list of used fonts
    print "\nfirst pass...";
    study $rest;
    foreach $f (@fonts) {
      if ($rest =~ m/\\$f[\\ ]/) {
        print "\n  found ", $f unless $used_fonts{$f};
        $used_fonts{$f}++;
        $fonts++;
      }
    }
    #print "\n  control list: ";
    #foreach $f (keys %used_fonts) {
    #    print $f," " if $used_fonts{$f};
    #}

    # second pass:
    # remove all fonts not in %used_fonts
    print "\nsecond pass: remove ",
          $#fonts-$fonts,
          " unused fonts from the header...";
    $fonttbl =~ s/\n//g;
    foreach $f (@fonts) {
        unless ($used_fonts{$f}) {
            # take care of {\falt }
            $fonttbl =~ s/\{\\$f\\.*?\{.*?\};\}//g;
            $fonttbl =~ s/\{\\$f\\.*?\}//g;
            $changed++;
        }
    }
    # also renumber it
    $i=0;
    foreach $f (@fonts) {
        if ($used_fonts{$f}) {
            print "\n  renumber $f to f$i";
            $fonttbl =~ s/\\$f([\\ ])/\\f$i$1/g;
            $rest    =~ s/\\$f([\\ ])/\\f$i$1/g;
            $changed++;
            $i++;
        }
    }

    # this didn't work sometimes, so it's omitted
    if ($winw8) {
      print "\n  remove some unsupport winhelp tags (from Winword8)...";
      $changed =+ $rest =~ s/\\nowidctlpar//g;
      $changed =+ $rest =~ s/\\widctlpar//g;
      # I needed this, because of lazy winword formatting.
      $changed =+ $rest =~ s/\{\\caps\\uldb (.*?)\}/"{\\uldb " .&strcase($1)."}"/eg;
      $changed =+ $rest =~ s/\{\\caps (.*?)\}/"{".&strcase($1)."}"/eg;
      $changed =+ $rest =~ s/\{([^\\]*?)\}/$1/g;
    }

    close IN;
    if ($changed) {
        open (OUT, "> $base.new") || return;
        print OUT $fonttbl;
        print OUT $rest;
        close OUT;
    }
}


while ($base=shift) {

    strip_fonts();

    if (!$changed) {
        print "\nnothing changed";
        next;
    } else {
        print "\n",$#fonts-$fonts, " fonts stripped";
        print "\n$changed fixes at all";
    }
    unlink "$base.bak" if -f "$base.bak";
    rename ("$base", "$base.bak");
    rename ("$base.new", "$base");
}
