#!/usr/bin/perl
#
# Copyright 1994 Werner Almesberger.
# All rights reserved.
#
# See file COPYING for details.
#
#-----------------------------------------------------------------------------
#
# Known bugs:
#
#   Usually doesn't check for prepended backslashes, e.g. things like
#   \\begin{verbatim} would be processed incorrectly.
#
#   Tokenization should be done once at the beginning, not on the fly
#   with cleanup and check procedures at the end of each step.
#
#-----------------------------------------------------------------------------
#
$w = 75;
#
# default macros
#
$m{"\\\\ldots"} = "...";
#
# read the file
#
print STDERR "[".length($t)."] Reading the file\n";
$/ = "\000";
$t = "\n".<>."\n";
#
# universal markers
#
$N = "\000";	# non-character
$X = "\007";	# generic marker
$Y = "\010";	# another generic marker
$Z = "\011";	# yet another generic marker
$B = "\001";	# begin
$E = "\002";	# end
$BS = "\013";	# second begin
$ES = "\014";	# second end
$CO = "\003";	# curly open
$CC = "\004";	# curly close
#
# commands to the output formatter
#
$SI = "\020";	# increase indentation by one
$SO = "\021";	# decrease indentation by one
$B1 = "\022";	# one blank line
$B2 = "\023";	# two blank lines

sub xlat
{
    local ($l) = @_;

    $l =~ tr/~/ /;
    $l =~ s/\\([_~&%^\$\#\[\]|\-])/\1/g;# unescape special characters
    $l =~ s/\\,//g;			# remove small spaces
    $l =~ s/\\backslash\s*/$X/g;	# \backslash ->\
    if ($l =~ /\\([A-Za-z]+|.)/) {
	warn "unrecognized command $& ($l)";
	$l = $`."\n!!! UNRECOGNIZED COMMAND: $&\n$'";
    }
    $l =~ s/$X/\\/g;
    $l =~ tr/{}//d;			# delete stray curly braces
    $l =~ s/$CO/{/g;			# put escaped braces back
    $l =~ s/$CC/}/g;
    return $l;
}


#
# load macros
#
print STDERR "[".length($t)."] Loading macros\n";
while ($t =~ /\n%%(def|cmd)([^\n]*)\n/) {
    $t = $`."\n".$';
    $a = $1;
    $2 =~ /([^\\])=/ || die "= missing in $2";
    if ($a eq "def") {
	$m{$`.$1} = $';
	$c{$`.$1} = "";
    }
    else {
	$m{$`.$1} = "";
	$c{$`.$1} = $';
    }
}
#
# remove %%beginskip ... %%endskip pairs
#
print STDERR "[".length($t)."] Removing %%beginskip ... %%endskip pairs\n";
while ($t =~ /\n%%beginskip\s*\n/) { $t = $`.$B.$'; }
while ($t =~ /\n%%endskip\s*\n/) { $t = $`.$E.$'; }
while ($t =~ /$B[^$B$E]*$E/) { $t = $`."\n".$'; }
$t !~ /[$B$E]/ || die "%%beginskip/%%endskip mismatch";
#
#  process macros
#
print STDERR "[".length($t)."] Processing macros (may take a while)\n";
while (1) {
    $none = 1;
    for (keys %m) {
	while ($t =~ /$_/) {
	    $none = 0;
	    if ($c{$_} eq "") {
		eval "\$t = \$`.\"$m{$_}\".\$';";
	    }
	    else {
		eval "\$t = \$`.$c{$_}.\$';";
	    }
	    die "syntax error: $@" if $@;
	}
    }
    last if $none;
    print STDERR "[".length($t)."] "."  next pass\n";
# perfectionist's approach:
#    $l = 0;
#    for (keys %m) {
#	if ($t =~ /$_/) {
#	    if (length($&) > $l) {
#		$i = $_;
#		$l = length($&);
#	    }
#	}
#    }
#    last if !$l;
#    $t =~ /$i/ || die "internal error";
#    eval "\$t = \$`.\"$m{$i}\".\$'";
#    die "syntax error: $@" if $@;
#    print STDERR "[".length($t)."] "."$i\n";
}
#
# handle verbatim sections (we're not trying to be perfect here)
#
print STDERR "[".length($t)."] Handling verbatim sections\n";
while ($t =~ /\\begin{verbatim}([ \t]*\n)?/) { $t = $`."\n\n".$B.$'; }
while ($t =~ /\\end{verbatim}([ \t]*\n)?/) { $t = $`.$E."\n\n".$'; }
while ($t =~ /\\verb([^a-zA-Z \t\n])/ && $t =~ /\\verb$1([^$1]*)$1/) {
    $t = $`.$B.$1.$E.$';
}
while ($t =~ /$B([^$B$E]*)$E/) {
    ($a,$b,$c) = ($`,$1,$');
    die "no support for \\t yet, sorry" if $b =~ /\t/;
    $b =~ s/\\/\\backslash /g;
    $b =~ s/[~^_%#&{}\$\-]/\\$&/g;
    $b =~ s/[`']/\\$&~/g;
    $b =~ s/ /~/g;
    $b =~ s/\n\n\n/$B2/g;
    $b =~ s/\n\n/$B1/g;
    $b =~ s/\n/\\\\/g;
    $t = $a.$b.$c;
}
if ($t =~ /[$B$E]/) {
    if ($t =~ /..........[$B$E]........../) { print STDERR "$&\n"; }
    die "verbatim conflict";
}
#
# hide escaped curly braces
#
print STDERR "[".length($t)."] Hiding escaped curly braces\n";
$t =~ s/\\{/$CO/g;
$t =~ s/\\}/$CC/g;
#
# discard comments and italic corrections
#
print STDERR "[".length($t)."] Discarding comments and italic corrections\n";
while ($t =~ s/([^\\])%[^\n]*\n/$1/g) {};
$t =~ s|\\/||g;
#
# no math mode
#
print STDERR "[".length($t)."] No math mode\n";
while ($t =~ s/([^\\])\$/$1/g) {};
#
# remove tabs and massage blanks
#
print STDERR "[".length($t)."] Removing tabs and massaging blanks\n";
$t =~ s/\\ / /g;	# \cmd\ blah
$t =~ tr/ \t/ /s;
#
# various minor issues
#
print STDERR "[".length($t)."] Dealing with various minor issues\n";
$t =~ s/\\rightarrow\s*/->/g;
$t =~ s/\\quad\s*/~/g;
$t =~ s/\\qquad\s*/~~/g;
$t =~ s/\\vert/|/g;
$t =~ s/\\TeX/TeX/g;
$t =~ s/\\LaTeX/LaTeX/g;
$t =~ s/\\rm\s*//g;
$t =~ s/\\hbox{/{/g;
$t =~ s/\\protect//g;
$t =~ s/\\newpage\s*//g;
$t =~ tr/-/-/s;
$t =~ s/\n\n+/$B1/g;
#
# handle footnotes
#
print STDERR "[".length($t)."] Handling footnotes\n";
$t =~ s/\\footnote{/\\footnotemark\\footnotetext{/g;
$t =~ s/\\footnotemark/$X/g;
$t =~ s/\\footnotetext{/$Y/g;
while ($t =~ /$X([^$Y]*)$Y/) {
    ($a,$b,$c) = ($`,$',$1);
    $t =~ /^[^$Y]*$Y$B1/;
    $d = $';
    for ($s = "*"; $d =~ /$Z/; $d = $`.$Y.$') { $s .= "*"; }
    $a = $a.$s.$c;
    while ($b =~ /^([^}]*){([^{}]*)}/) { $b = $`.$1.$B.$2.$E.$'; }
    $b =~ /^([^{}]*)}/ || die "{ } confusion";
    ($b,$t) = ($1,$');
    $b =~ s/$B/{/g;
    $b =~ s/$E/}/g;
    $d = "$B1$Z\\begin{description}\\item[$s] $b\\end{description}$B1";
    if ($t =~ /$B1([^$Z][^$N]*)$/) { $t = $`.$d.$1; }
    else { $t = $t.$d; }
    $t = $a.$t;
}
$t =~ s/$Z//g;
if ($t =~ /[$X$Y$Z$B$E]/) {
    if ($t =~ /..............[$X$Y$Z$B$E]/) { print STDERR "HEY $&\n"; }
    die "footnote confusion";
}
#
# process simple tables ...
#
print STDERR "[".length($t)."] Processing simple tables\n";
while ($t =~ /\\begin{tabular}/) { $t = $`.$B.$'; }
while ($t =~ /\\end{tabular}/) { $t = $`.$E.$'; }
while ($t =~ /$B{([rlc|]+)}([^$B$E]*)$E/) {
    ($a,$b,$c,$d) = ($`,$',$2,$1);
    $c =~ s/\\\\/&/g;
    $c =~ s/[\s\n]*\\hline[\s\n]*/$X&/g;
    ($e = $d) =~ tr/|//cd;
    @d = ();
    while ($d =~ /^(\|*)[a-z](\|*)/) {
	push(@d,$&);
	$d = $';
    }
    @f = ();
    while ($c =~ /([^\\])&/) {
	push(@f,$`.$1);
	$c = $';
    }
    @w = ();
    $d =~ tr/|//d;
    $i = 0;
    for (@f) {
	next if $_ eq $X;
	$f = $i % @d;
	$_ = &xlat($_);
	$_ =~ s/^[\s\n]*//g;
	$_ =~ s/[\s\n]*$//g;
	if ($w[$f] < length($_)) { $w[$f] = length($_); }
	$i++;
    }
    $l = @d+2*length($e)-1;
    for (@w) { $l += $_; }
    $a .= "$B1";
    $i = 0;
    for (@f) {
	if ($_ eq $X) { $a .= ("-" x $l)."\\\\"; }
	else {
	    $f = $i % @d;
	    if ($d[$f] =~ /^\|/) { $a .= "| "; }
	    $g = $w[$f]-length($_);
	    if ($d[$f] =~ /l/) { $a .= $_.("~" x $g); }
	    if ($d[$f] =~ /c/) {
		$a .= ("~" x int($g/2)).$_.("~" x ($g-int($g/2)));
	    }
	    if ($d[$f] =~ /r/) { $a .= ("~" x $g).$_; }
	    $a .= " ";
	    if ($d[$f] =~ /\|$/) { $a .= "| "; }
	    if ($f == $#d) { $a .= "\\\\"; }
	    $i++;
	}
    }
    $t = $a.$b.$B1;
}
if ($t =~ /[$B$E$X]/) {
    if ($t =~ /(.|\n)(.|\n)(.|\n)(.|\n)(.|\n)(.|\n)[$B$E$X](.|\n)(.|\n)(.|\n)(.|\n)(.|\n)(.|\n)/) { print STDERR "$&\n"; }
    die "\\begin/end{tabular} mismatch";
}
#
# process lists
#
print STDERR "[".length($t)."] Formatting lists\n";
while ($t =~ /\\begin{itemize}\s*/) { $t = $`.$B.$'; }
while ($t =~ /\\end{itemize}\s*/) { $t = $`.$E.$'; }
while ($t =~ /$B[^$B$E]*$E/) {
    ($a,$b,$c) = ($`,$&,$');
    while ($b =~ /\\item\s*/) { $b = $`.$X.$'; }
    while ($b =~ /$X([^$X]*)([$X$E])/) {
	$b = $`."- ".$SI.$SI.$1.$SO.$SO."\\\\"."$2".$';
    }
    $b =~ /$B([^$B$E]*)$E/;
    $t = $a.$SI.$SI.$B1.$1.$SO.$SO."$B1".$c;
}
$t !~ /[$B$E]/ || die "\\begin/\\end{itemize} mismatch";
while ($t =~ /\\begin{description}\s*/) { $t = $`.$B.$'; }
while ($t =~ /\\end{description}\s*/) { $t = $`.$E.$'; }
while ($t =~ /$B[^$B$E]*$E/) {
    ($a,$b,$c) = ($`,$&,$');
    while ($b =~ /\\item\[/) { $b = $`.$X."[".$'; }
    while ($b =~ /$X\[/) {
	($d,$e) = ($`,$');
	while ($e =~ s/\[([^\[\]]*)\]/$BS$1$ES/g) {};
	$e =~ /^([^\[\]]*)]\s*([^$X]*)([$X$E])/ || die "\item problem (1)";
	$b = $d.$1."~~".$SI.$SI.$2.$SO.$SO."\\\\".$3.$';
	$b =~ s/$BS/[/g;
	$b =~ s/$ES/]/g;
    }
    $b =~ /$B([^$B$E]*)$E/;
    $t = $a.$SI.$SI.$B1.$1.$SO.$SO.$B1.$c;
}
$t !~ /[$X]/ || die "\item problem (2)";
$t !~ /[$B$E]/ || die "\\begin/\\end{description} mismatch";
#
# process sections and labels
#
print STDERR "[".length($t)."] Processing sections and labels\n";
$LB = "\005";	# they don't necessarily have to be unique
$SC = "\006";
while ($t =~ /\\label{/) { $t = $`.$LB."{".$'; }
while ($t =~ /\\((sub)*)section{/) { $t = $`.$SC.$1."{".$'; }
$l = "";
while (1) {
    if ($t =~ /^([^$LB$SC]*)$LB{([^{}]*)}/) {
	$l{$2} = '"'.$l.'"';
	$t = $1.$';
    }
    if ($t =~ /$SC((sub)*){/) {
	($a,$b,$c) = ($`,$',$1);
	while ($b =~ /^([^}]*){([^{}]*)}/) { $b = $`.$1.$B.$2.$E.$'; }
	$b =~ /^([^{}]*)}\s*/ || die "{ } confusion";
	($b,$d) = ($1,$');
	$b =~ s/$B/{/g;
	$b =~ s/$E/}/g;
	$l = $b;
	$b = &xlat($b);
	if (($u = ("=","-","- ","")[length($c)/3]) ne "") {
	    $u = "\\\\".substr($u x length($b),0,length($b));
	}
        $t = $a.$B2.$b.$u.$B1.$d;
    }
    else {
	last;
    }
}
#
# handle references
#
print STDERR "[".length($t)."] Handling references\n";
$t =~ s/[Pp]age \\pageref({[^{}]*})/\\ref$1/g;
$t =~ s/\\pageref{[^{}]*}/???/g;
while ($t =~ /\\ref{([^{}]*)}/) {
    $t = $`.(defined($l{$1}) ? $l{$1} : "???").$';
}
#
# collapse whitespace
#
print STDERR "[".length($t)."] Collapsing whitespace\n";
$t =~ s/\\par\s*/\n\n/g;
$t =~ s/ *(\n+) */$1/g;
$t =~ tr/\n/ /;
$t =~ tr/ \t/ /s;	# again
#
# handle line breaks
#
print STDERR "[".length($t)."] Handling line breaks\n";
$t =~ tr/\n//d;
$t =~ s/\\\\/\n/g;
$t =~ s/\\par\s*/$B1/g;
#
# handle accents, umlauts, and double quotes
#
print STDERR "[".length($t)."] Handling accents, umlauts, double quotes ".
  "and hyphens\n";
$t =~ s/\\[`']([AEOUaeou])/$1/g;
$t =~ s/\\([`'])~/$1/g;
$t =~ s/\\"([AOUaou])/$1e/g;
$t =~ s/``/"/g;
$t =~ s/''/"/g;
#
# apply ultimate set of fixes to newlines
#
print STDERR "[".length($t)."] Applying ultimate set of fixes to newlines\n";
while ($t =~ s/([\n$B1$B2]+)([$SI$SO])/$2$1/g) {};
$t =~ s/([\n$B1$B2]*)\s+([\n$B1$B2]+)/$1$2/g;
$t =~ s/\n+/\n/g;
$t =~ s/\n?$B1[\n$B1]*/\n\n/g;
$t =~ s/\n*$B2[\n$B2]*/\n\n\n/g;
#
# translate what's left
#
print STDERR "[".length($t)."] Final translation\n";
$t = &xlat($t);
$t =~ s/^\s*//;
$t =~ s/\s*$//;
$t .= "\n";
#
# okay, now format and print it
#
print STDERR "[".length($t)."] "."Formatting (may take a while)\n";
$l = "";
$m = 0;
while ($t =~ /([$SI$SO\n]| +)/) {
    if ($` ne "" || substr($1,0,1) eq " ") {
	if (length($l)+length($`) > $w && $l ne "") {
	    print $l."\n";
	    $l = "";
	}
	if ($l eq "") { $l = " " x $m; }
	$l = $l.$`.(substr($1,0,1) eq " " ? $1 : "");
    }
    $t = $';
    if ($1 eq $SI) { $m++; }
    if ($1 eq $SO) { $m--; }
    if ($1 eq "\n") {
	print $l."\n";
	$l = "";
#	$t = s/^ *(\S.*)/\1/;
    }
}
print "$l\n" if $l ne "";
print STDERR "Done\n";
