#!/usr/bin/perl -w # # Script to convert GAP manual TeX files to HTML # Usage: # convert.pl [-csti] [-f ] [-n ] [] # # Requirements: Perl (might need to edit the first line of this file) # TtH is not strictlty necessary but very desirable to treat # formulas. # # Caveats: # # 1. This script assumes that the .toc, .lab and .bbl files are up-to-date # with the .tex files and will almost certainly fail horribly if they # are not. # # 2. The output files are CxxxSxxx.htm, (not .html) plus chapters.htm, # theindex.htm and biblio.htm, except when called with the -c option # (in which case, there are CHAPxxx.htm files instead of CxxxSxxx.htm). # A (front page) file index.htm is assumed, but not created. # Not all servers will serve .htm files as HTML without adjustments. # # 3. The script assumes that the .tex files comply with GAP conventions, # including unwritten ones. It tries to follow the behaviour of TeX # assuming those conventions. The on-line browser attempts to provide # an ASCII equivalent. See BUGS. # # 4. The hierarchy of the HTML manuals assumed is of the following form: # # / # doc/ # htm/ #
# pkg/ # / # htm # # for each main manual
(in: ref, ext, tut, prg, new) and each # share package . To make inter-linking between manuals work, # one should generally use the -c option for everything, (or not use # it for everything). Linking to share package manuals from the main # manual can only be expected to work if the share package manuals # are created using this converter. # # 5. Only the manual.lab files for books that are referenced via the # \UseReferences and \UseGapDocReferences commands in the manual.tex # file of the book being converted (and the book's own manual.lab # file, of course) are read. Make sure all the \UseReferences and # \UseGapDocReferences commands needed are present! (The TeX-produced # manuals will be missing lots of cross-references also, if some are # missing.) You will get `Bad link' messages if you have some missing. # # Options: # # -c file-per-chapter mode: Generates one HTML file CHAPxxx.htm # for each chapter; sections are level 2 headings and anchors # CHAPxxx.htm#SECTxxx. # This is intended for local browsing, especially under MS-DOS. # It may be used with the -n (share package) option. # # -f # Adds a "Top" link to link to each manual page, # only available if -n option is also used. # # -s silent running: Conversational messages are suppressed. # # -n # We are not building the main manual but the one for the share # package . To get cross references to the main library # right, it assumes that the share package is in the right place. # The -c option may be used with this option. # # -i index: Only one index file is produced. # # -t tex-math: Runs `tth' (which must be installed on the local system) # to produce better HTML code for formulae. (It would be possible to # replace tth by another conversion, for example TeXexplorer, but # (at least) the line calling `tth' would need to be modified.) # -u Like -t, but uses `tth -u1' to produce unicode. # # The directory where all the needed .tex, .toc, .lab and .bbl # files are located. # # The directory (which should already exist) in which to put # the generated .htm files. Defaults to the current directory, # if omitted. # # Example usage: # convert.pl -n mypkg doc htm # in directory .../pkg/mypkg # convert.pl -t -n mypkg doc htm # ditto previous + use tth for maths # convert.pl -t -n mypkg -c doc htm # ditto previous + 1 file per chapter # convert.pl -t -c ../ref ref # (for Ref manual) in dir .../doc/htm # # FEATURES (and intended departures from the TeX behaviour) # . Now interprets 2nd argument of an \atindex command if it is # of form @... and ignores the first argument, or otherwise it # interprets the first argument. Interprets ! as a comma and # indices output have no sub-headers. # . The @... component of \> commands is ignored. The assumption # is that for: \>`...'{...}@{...} the @{...} component is just # the {...} with font changes. # . In a \beginitems ... \enditems environment everything is indented # except for the item headers, rather than just the paragraph # following the item header. # . By default, the \beginlist ... \endlist environment is interpreted # as a compact description list. By adding %unordered or %ordered... # markup it will be interpreted as either an unordered or ordered # list respectively (see the ext manual for details). # . There are spacing differences e.g. \begintt ... \endtt etc. # environments are not indented. # . Supports all accents of TeX, in probably the best way currently # possible with HTML. # . Treats PseudoInput chapters in the `same' way as Input chapters. # . With -t switch announces the version of TtH used. # . Now supports %display{nontex}, %display{nontext} and # %display{nonhtml} variants of %display environment. # . References to subsections are now interpreted as one would expect. # # BUGS (and known departures from the TeX behaviour) # . $a.b$ is only interpreted correctly in -t mode. # . The citation keys that appear are the .bib file keys rather # than the keys BibTeX constructs with the `alpha' bib-style. # # TODO # . Refine macro_replace subroutine so it can also be used to purge # 2nd arg of \atindex macros. # . For -t mode, scan for \def commands in manual.tex and write # to TTHIN (tthmacros.tex). Should we only look for a block # demarcated by %mathsmacros ... %endmathsmacros ? # These \def commands are only intended for such font # changing commands as: \def\B{{\cal B}} (`tth' provides a # script-type font). # . Provide a table environment, if/when a \begintable ... # \endtable environment is added to gapmacro.tex. # ############################################################################# # Check PERL version # $] > 5 or die "Needs perl 5"; use Getopt::Std; # # Global variables # # $dir -- the full pathname of the input directory, including a trailing / # $odir -- the full pathname of the output directory, including a trailing / # $opt_c and $opt_s set by getopts() # @chapters -- the chapters data structure # IN -- the current input file (outputfiles are handled by select) # $footer -- the trailer put on every page # $indexcount -- used within chapters to number the index anchors # $lastnumchap -- number of last numerically numbered chapter # # These match chapter and section lines in a .toc file # $chapexp = '\\\\chapcontents\s+\{((?:\d+|[A-Z]))\}\s*\{(.+)\}\s*\{\d+\}'; $secexp = '\\\\seccontents\s+\{((?:\d+|[A-Z]))\.(\d+)\}\s*\{(.+)\}\s*\{\d+\}'; #$ignoreexp = '\\\\tocstrut|\\\\appno|\\\\seccontents\s+\{\d+\}'; $lastnumchap = 0; # Variable that is set to 2 inside a nest of \itemitem s of a # \beginlist ... \endlist environment # $listdepth = 0; # This is augmented each time a line: \Package{...} is read in a manual.tex # file, so that macro_replace knows to set a {\...} macro in sans-serif. # $sharepkg = ""; # The books converted to HTML with this converter # The values set are: 0 or 1 according to whether or not -c was used. # %convertbooks = (); # This is added to when scanning for \UseGapDocReferences. # %gapdocbooks = (); # Types of href label are: # 0 (non -c books) : CS.htm # 1 (-c books) : CHAP.htm#SECT # 2 (== $gapdoc) : chap.html#sss0 # # It would be nice to support subsections properly like GapDoc, # but this involves creating a subsection data-structure modelled # on section, which is a mite non-trivial (maybe ... if I find time). # For now in-text references go to the beginning of the chapter. # $gapdoc = 2; # sansserif: # # Used mainly to set GAP in sans serif font. Inside ... # there should *not* be any tags, since they are not translated there by # web browsers, and hence sansserif should *not* be applied to anything # that ends up in the ... field, but *is* quite appropriate # for the header in the

...

field at the top of the body of an # HTML file and anywhere else within the body of an HTML file. # sub sansserif { my ($name) = @_; return "$name"; } # booktitle_body: # # This is for generating the title of a document that goes in the #

...

field at the top of the body, as opposed to the title # that goes in the ... field which should be unembellished. # sub booktitle_body { my ($bktitle, @prog_or_pkg) = @_; foreach $prog_or_pkg (@prog_or_pkg) { $newstring = sansserif $prog_or_pkg; $bktitle =~ s/$prog_or_pkg/$newstring/; } return $bktitle; } # # used to standardize section names for use as hash indices. # sub canonize { my ($key) = @_; $key =~ tr/A-Z/a-z/; $key =~ s/\s//g; $key =~ s/\\//g; $key; } sub kanonize { my ($key) = @_; $key =~ s/\\ / /g; $key =~ s/!/ /g; $key; } sub def_section_by_name { my ($sec, $chapno, $secno, $ssecno) = @_; my $secname = canonize $1; if (defined $sections_by_name{$secname}) { if (($sections_by_name{$secname}->{chapnum} ne $chapno) || ($sections_by_name{$secname}->{secnum} ne $secno) || ($sections_by_name{$secname}->{ssecnum} ne $ssecno)) { print STDERR "Section: \"$secname\" already defined as: ", "$sections_by_name{$secname}->{chapnum}.", "$sections_by_name{$secname}->{secnum}.", "$sections_by_name{$secname}->{ssecnum}\n"; print STDERR "Now being redefined as: $chapno.$secno.$ssecno\n"; $redefined_secname{$secname} = 1; } else { return; } } $sections_by_name{$secname} = {chapnum => $chapno, secnum => $secno, ssecnum => $ssecno}; # print STDERR "Defined section \"$secname\": $chapno.$secno.$ssecno\n"; } sub tonum { # Needed since chanu may be A,B,... for appendices my ($chanu) = @_; return $chanu =~ /\d+/ ? $chanu : $lastnumchap + ord($chanu) - ord('A') + 1; } # getchaps: # # Scan the .tex and .toc files to get chapter names and numbers, # section names and numbers and associated filenames. # Loads up chapters and sections_by_name. # sub getchaps { open( TOC, "<${dir}manual.toc" ) || die "Can't open ${dir}manual.toc.\n You can " . "create the .toc file by doing: tex manual (at least once).\n"; my ($chap,$sec,$chapno,$chap_as_sec,$chapnam,$chanu); while () { if ( /$chapexp/o ) { $chapnam = $2; $chanu = $1; $lastnumchap = $chanu if ( $chanu =~ /\d+/ ); # remove `(preliminary)' part that messes everything up $chapnam =~ s/ \(preliminary\)//g; $chap = {name => $chapnam, number => $chanu}; $chap_as_sec = {name => $chapnam, chapnum => $chanu, secnum => 0, chapter => $chap}; $chap->{sections}[0] = $chap_as_sec; defined ($chapters[tonum $chanu]) && die "chapter number repeated"; $chapters[tonum $chanu] = $chap; } elsif ( /$secexp/o ) { defined ($chapters[tonum $1]) || die "section $2:$3 in unknown chapter $1"; defined ($chapters[tonum $1]{sections}[$2]) && die "section number repeated"; $sec = {name => $3, secnum => $2, chapnum => $1, chapter => $chapters[tonum $1]}; $chapters[tonum $1]{sections}[$2] = $sec; # this would produce warnings from empty chapters. Thus ignore. # } elsif ( $_ !~ /$ignoreexp/o ) { # print STDERR "Bad line: $_"; } } close TOC; open (TEX, "<${dir}manual.tex") || die "Can't open ${dir}manual.tex"; $chapno = 0; while () { if ( /^[^%]*\\(|Pseudo)Input\{([^}]+)\}(\{([^}]+)\}\{([^}]+)\})?/ ) { if (not -f "$dir$2.tex" or not -r "$dir$2.tex") { print STDERR "Chapter file $2.tex does not exist in $dir\n"; } if ($1 eq "") { $chapters[++$chapno]{file} = $2; } else { $chapnam = $5; $chanu = ++$chapno; $lastnumchap = $chanu; $chap = {name => $chapnam, number => $chanu}; $chap_as_sec = {name => $chapnam, chapnum => $chanu, secnum => 0, ssecnum => 0, chapter => $chap}; if ($4 ne $5) { def_section_by_name("$book:$chapnam", $chanu, 0, 0); add_to_index(htm_fname($opt_c,$chanu,0), $4, $chap_as_sec, 0); } $chap->{sections}[0] = $chap_as_sec; defined($chapters[$chanu]) && die "chapter number repeated"; $chapters[$chanu] = $chap; $chapters[$chanu]{file} = $2; } } } close TEX; } sub getlabs { my ($bkdir) = @_; open (LAB, "<${bkdir}manual.lab") || print "Can't open ${bkdir}manual.lab"; while () { if ( /\\setcitlab/ ) { next; # We don't get the bibliography labels from here } elsif (/\\makelabel\s*\{([^}]+)\}\s*\{(\w+)(\.(\d+))?(\.(\d+))?\}/) { def_section_by_name($1, $2, (defined($3) ? $4 : 0), (defined($5) ? $6 : 0)); } else { chomp; print STDERR "Ignored line: $_\n... in ${bkdir}manual.lab\n"; } } close LAB; } # # Mainly diagnostic, prints the chapters data structure. Also # checks that each section has the correct back reference to its # chapter # sub printchaps { my @chapters = @_; CHAP: foreach $chapter (@chapters) { next CHAP unless (defined ($chapter)); print "Chapter $chapter->{number} $chapter->{name} $chapter->{file}\n"; SECT: foreach $section (@{$chapter->{sections}}) { next SECT unless defined ($section); print " Section $section->{chapnum}.$section->{secnum} $section->{name}\n"; if ($section->{chapter} ne $chapter ) { print " loop problem\n"; } } } } # Printed at the bottom of every page. $footer = "

\n" . sansserif( "GAP 4 manual
" . `date +"%B %Y"` ) . ""; # Section label ... this is the bit that goes after a # in an HREF link # or is assigned to the value of NAME in an anchor. # sub sec_label { my ($c_s_gapdoc,$cnum,$snum,$ssnum) = @_; if ($c_s_gapdoc == $gapdoc) { return "s${snum}ss${ssnum}"; } $snum = "0" x (3 - length $snum) . $snum; if ($c_s_gapdoc) { if ($snum eq "000") { return ""; } elsif ($ssnum) { return "SSEC${snum}.$ssnum"; } else { return "SECT${snum}"; } } else { return ($ssnum) ? "SSEC$ssnum" : ""; } } # The HREFs of subsections, sections and chapter files are determined by # this routine directly if the chapter, section, subsection numbers are known. sub htm_fname { my ($c_s_gapdoc,$cnum,$snum,$ssnum) = @_; my $seclabel = sec_label($c_s_gapdoc,$cnum,$snum,$ssnum); $seclabel = "#$seclabel" if ($seclabel ne ""); if ($c_s_gapdoc == $gapdoc) { return "chap${cnum}.html$seclabel"; } $cnum = "0" x (3 - length $cnum) . $cnum; $snum = "0" x (3 - length $snum) . $snum; return ($c_s_gapdoc) ? "CHAP${cnum}.htm$seclabel" : "C${cnum}S$snum.htm$seclabel"; } # Returns the value that $opt_c must have had when the book $book # was compiled with this converter. sub hreftype { my ($book, $bdir) = @_; if ( !(exists $convertbooks{$book}) ) { my @ls = `ls ${odir}$bdir`; $convertbooks{$book} = (grep { m/^CHAP...[.]htm$/ } @ls) ? 1 : # .htm files have shape CHAP.htm (grep { m/^CHAP...[.]htm$/ } @ls) ? 0 : # .htm files have shape CS.htm $opt_c; # can't determine the shape ... don't exist # yet ... we assume the shape of the current # manual being compiled. } return $convertbooks{$book}; } # The names of the section and chapter files are determined by this routine # when one has to determine the chapter and section number indirectly. sub name2fn { my ($name,$ischap) = @_; my $bdir = ""; my $c_s_gapdoc = $opt_c; # : indicates a cross-volume reference my $canon_name = canonize $name; #print STDERR "canon_name = $canon_name\n"; if ( $canon_name =~ /^(ref|tut|ext|prg|new):/ ) { if ($mainman==1) { $bdir = "../$1/"; } else { $bdir = "../../../doc/htm/$1/"; } $c_s_gapdoc = hreftype($1, $bdir); } elsif ($canon_name =~ /^([a-zA-Z_0-9]*):/ ) { # presumably a package name #print STDERR "package name = $1\n"; if ($mainman==1) { if (exists $gapdocbooks{$1}) { # a main manual referring $bdir = "../../../pkg/$1/doc/"; # to a GapDoc-produced manual $c_s_gapdoc = $gapdoc; } else { $bdir = "../../../pkg/$1/htm/"; $c_s_gapdoc = hreftype($1, $bdir); } } elsif (exists $gapdocbooks{$1}) { # a package manual referring $bdir = "../../$1/doc/"; # to a GapDoc-produced manual $c_s_gapdoc = $gapdoc; } else { $bdir = "../../$1/htm/"; $c_s_gapdoc = hreftype($1, $bdir); } } elsif ($canon_name !~ /^($book):/) { $name = "$book:$name"; $canon_name = canonize $name; } $name =~ s/\s+/ /g; if (exists $redefined_secname{$canon_name}) { print STDERR "Ref to multiply defined label: ", "\"$name\" at line $. of $chap->{file}.tex\n"; } my $sec = $sections_by_name{$canon_name}; unless (defined ( $sec)) { print STDERR "Bad link: \"$name\" at line $. of $chap->{file}.tex\n"; return "badlink:$name"; } return $bdir . htm_fname($c_s_gapdoc, $sec->{chapnum}, ($ischap == 1) ? 0 : $sec->{secnum}, ($ischap == 1) ? 0 : $sec->{ssecnum}); } # strip out the tag from cross book references for the body of links sub name2linktext { my $name; ($name) = @_; $name =~ s/^(ref|tut|ext|prg|new)://; return $name; } # # Add an index entry to the index. # ($hname = $fname or $fname#..., where $fname is a filename) sub add_to_index { my ($hname, $key, $sec) = @_; my $secno = "$sec->{chapnum}.$sec->{secnum}"; if (defined $sec->{ssecnum} and $sec->{ssecnum}) { $secno .= ".$sec->{ssecnum}"; } push @{$index{$key}}, [ $hname, $secno ]; # print STDERR "hname = $hname, key = $key, "; # print STDERR "sec = $secno\n"; } # # Create a label for an index entry, add it to the index if new, # and return the label (which is an empty string if not new). sub inxentry { my ($fname,$key,$sec) = @_; my $curs="$sec->{chapnum}.$sec->{secnum}"; # print STDERR "curs = $curs\n"; # print STDERR "fname = $fname, key = $key, "; # print STDERR "sec = $sec->{chapnum}.$sec->{secnum}\n"; my $label = "\n"; if (defined $index{$key}) { my $ar; foreach $ar (@{$index{$key}}) { if ( ($ar->[1]) eq $curs ) { $label=""; # index entry is not new last; } } } else { $index{$key} = []; } if ($label ne "") { add_to_index("$fname#I$indexcount", $key, $sec); # print STDERR "$fname#I$indexcount\n"; $indexcount++; } return $label; } # # Return a NAME anchor for a subsection # sub subsec_name { my ($fname,$key,$sec) = @_; # print STDERR "curs = $curs\n"; # print STDERR "sec = $sec->{chapnum}.$sec->{secnum}.$sec->{ssecnum}\n"; $key =~ s/!\{(.*)\}$/!$1/; $key =~ s/\s+/ /g; my $canon_name = canonize "$book:$key"; my $sec_of_key = $sections_by_name{$canon_name}; if (exists $redefined_secname{$key}) { print STDERR "Multiply defined label: ", "\"$key\" at line $. of $chap->{file}.tex\n", "... subsection will be unreachable\n"; return ""; } elsif ($sec_of_key->{chapnum} ne $sec->{chapnum} || $sec_of_key->{secnum} ne $sec->{secnum}) { print STDERR "Section of \"$key\" (", "$sec_of_key->{chapnum}.$sec_of_key->{secnum}) ", "doesn't agree with the current section (", "$sec->{chapnum}.$sec->{secnum}) ", "at line $. of $chap->{file}.tex\n", "... subsection will be unreachable\n"; return ""; } else { my $curs = "$sec_of_key->{chapnum}.$sec_of_key->{secnum}" . ".$sec_of_key->{ssecnum}"; my $label = sec_label($opt_c, $sec_of_key->{chapnum}, $sec_of_key->{secnum}, $sec_of_key->{ssecnum}); if (defined $index{$key}) { my $ar; foreach $ar (@{$index{$key}}) { if ( ($ar->[1]) eq $curs ) { return ""; # index entry is not new } } } else { $index{$key} = []; } # print STDERR "Subsection key: \"$key\"\n"; add_to_index("$fname#$label", $key, $sec_of_key); return "\n"; } } # Some characters must be represented differently in HTML. sub html_literal { my ($lit) = @_; if ($lit eq "<") { return "<"; } elsif ($lit eq ">") { return ">"; } elsif ($lit eq "&") { return "&"; } else { return $lit; } } # Gather lines ending in % together. sub gather { my ($line, $nontex) = @_; my $nextline; while ($line =~ s/%+\s*$// and defined($nextline = )) { $nextline =~ s/^%// if $nontex; unless ($nextline =~ /^%/) { $nextline =~ s/^\s*//; $line .= $nextline; chomp $line; } } return $line; } # This routine is called to process the text of the section # the output file is assumed to be pre-selected. The input filehandle # is simply IN # # As we process, we can be in "normal" mode (text), "maths" mode # inside $ ... $, or "verbatim" mode inside a multi-line example # # We separately track whether we are in bold or tt, # whether we are in a xxx: .... paragraph and whether we are reading # a cross-reference that is split across multiple lines # # Finally, we track whether we have already # emitted a

for this group of blank lines # $boldcommands = 'CAS|[A-Z]|danger|exercise'; $TeXbinops = "in|wedge|vee|cup|cap|otimes|oplus|le|ge|rightarrow"; $EndTeXMacro = "(?![A-Za-z])"; $TeXaccents = "\'`~=^"; # ^ must come last, this is also used as regexp # From these and the argument following the HTML symbol is built # e.g. `a -> à %accents = ( "\'" => "acute", "19" => "acute", "`" => "grave", "18" => "grave", "~" => "tilde", "126" => "tilde", "^" => "circ", "94" => "circ", "c" => "cedil", "48" => "cedil", "H" => "uml", "125" => "uml", "127" => "uml" ); # These are the replacements for accents that have an empty argument # or for which there is no single HTML symbol (so that the accent must # precede the argument) %acc_0arg = ( "\'" => "\'", "19" => "\'", "`" => "`", "18" => "`", "~" => "~", "126" => "~", "=" => "macr", "22" => "macr", "^" => "^", "94" => "^", "c" => "", "48" => "", # too hard ... just omit "d" => "", # too hard ... just omit "b" => "", # too hard ... just omit "t" => "", # too hard ... just omit "u" => "\\u", "21" => "\\u", # too hard ... put back "v" => "\\v", "20" => "\\v", # too hard ... put back "H" => "uml", "125" => "uml", "127" => "uml" ); # Calls tth to find out its version number sub tth_version { `tth -H >tthout 2> tthout`; open (TTHOUT, ") { if (s/.*(Version [^ ]*).*/$1/) { close TTHOUT; system("rm tthout"); chomp; return $_; } } } # We use this routine when using -t option to do any maths translation sub tth_math_replace { my ($tth) = @_; open (TTHIN, ">tthin") || die "Can't create tthin"; #print STDERR "in: ${tth}\n"; my $tthorig = $tth; # replace <...> by proper TeX while ($tth =~ /(.*[^\\])<(.*[^\\])>(.*)/) { $tth= $1."{\\it ".$2."\\/}".$3; } # replace `...' by proper TeX while ($tth =~ /(.*[^\\])`(.*[^\\])\'(.*)/) { $tth= $1."{\\tt ".$2."}".$3; } # replace \< by proper TeX while ($tth =~ /(.*[^\\])\\<(.*)/) { $tth= $1."<".$2; } #while ($tth =~ /(.*[^\\])\\>(.*)/) { # $tth= $1.">".$2; #} $tth =~ s/([^\\]|^)([.])/$1\\cdot /g; # . not preceded by \ becomes \cdot $tth =~ s/\\[.]/./g; # \. becomes . $tth =~ s/(\\right)\\cdot/$1./g; # ... except for \right. (leave as is) $tth =~ s/(\\not)\s*/$1/g; $tth =~ s/\\\*/*/g; if ($opt_t < 2.52) { $tth =~ s/\\not\\in(?![a-zA-Z])/\\notin/g; $tth =~ s/\\not\\subset/ not subset/g; } # Ensure display mode used for \buildrel and \choose constructions $tth =~ s/\$/\$\$/g if ($tth =~ /\\buildrel|\\choose/ and $tth !~ /\$\$/); if ($tth =~ /\\[A-Za-z]/) { # there might be macros: Load our macros #print STDERR "tth: ${tth}\n"; print TTHIN "\\input tthmacros.tex\n"; } # we put in TTHBEGIN .. TTHEND # so we can strip out the superfluous

s # tth 2.78+ puts in, later. print TTHIN "TTHBEGIN${tth}TTHEND\n"; close TTHIN; `$tthbin -r -i tthout 2>/dev/null`; open (TTHOUT, " ) { chomp($tthin); $tth .= $tthin; } close TTHOUT; #print STDERR "out: ${tth}\n"; # only the stuff between TTHBEGIN and TTHEND # actually belongs to the formula translated $tth =~ s/.*TTHBEGIN(.*)TTHEND.*/$1/ || do {print STDERR "!tth failed with input:\n $tthorig\n", "!Null formula written to HTML file\n"; $tth = "";}; # tth leaves \mathbin etc. in ... get rid of them if present $tth =~ s/\\math(bin|rel|op)//g; # TtH up to version 2.86 doesn't know the following $tth =~ s/\\wr(?![a-zA-Z])/ wr /g; $tth =~ s/\\vdash(?![a-zA-Z])/ |- /g; $tth =~ s/\\tilde(?![a-zA-Z])/~/g; # needed for in-line maths #print STDERR "stripped: ${tth}\n"; # replace italic typewriter (happens because we force # italic letters) by roman typewriter style while ($tth =~ /(.*)(.*)<\/i><\/tt>(.*)/) { $tth= $1."".$2."".$3; } # increasing the font size doesn't affect maths displays # ... and `...' markup doesn't get increased in font size # So let's get rid of it. #$tth = "$tth"; #print STDERR "enlarged: ${tth}\n"; return $tth; } # # Takes a line of form: "{}" # and returns an array with: , , # i.e. it finds the matching } for {. sub get_arg { my ($line) = @_; if ($line =~ /\s*\{([^{}]*)/) { $line = $`; my $arg = $1; my $rest = $'; my $nbraces = 1; while ($nbraces) { if ($rest =~ s/^(\{[^{}]*)//) { $arg .= $1; $nbraces++; } elsif ($nbraces == 1 and $rest =~ s/^\}//) { $nbraces--; } elsif ($rest =~ s/^(\}[^{}]*)//) { $arg .= $1; $nbraces--; } else { # abort ... but make sure braces match $rest = "{" x $nbraces . $rest; $arg .= "}" x ($nbraces - 1); $nbraces = 0; } } return ($rest, $arg, $line); } else { print STDERR "line:$line\n"; die "Expected argument: at line $. of file"; } } # # Given an accent macro with the \ or \accent stripped and the rest # of a line with the macro's argument at it beginning return the # HTML version of the accented argument and rest after the macro's # argument has been stripped from it. sub do_accent { my ($rest, $macro) = @_; $rest =~ /^(\w)|\{(\w?)\}/; $rest = $'; my $arg = (defined $1) ? $1 : $2; $macro = ($arg eq "") ? $acc_0arg{$macro} : "&$arg$accents{$macro};"; return ($rest, $macro); } # # Takes rest which has a TeX macro without its \ at its beginning and # returns the HTML version of the TeX macro and rest with the TeX macro # stripped from it. sub macro_replace { my ($rest) = @_; if ($rest =~ /^([$TeXaccents])\s*/) { return do_accent($', $1); } if ($rest =~ /^([a-zA-Z]+)\s*/) { $rest = $'; my $macro = $1; if ($macro eq "accent") { $rest =~ /^(\d+)\s*/; $rest = $'; $macro = $1; $macro = "" unless (defined $acc_0arg{$macro}); } if (defined $accents{$macro}) { return do_accent($rest, $macro); } elsif (defined $acc_0arg{$macro}) { return ($rest, $acc_0arg{$macro}); } elsif ($macro eq "copyright") { return ($rest, "©"); } elsif ($macro eq "aa") { return ($rest, "å"); } elsif ($macro eq "AA") { return ($rest, "Å"); } elsif ($macro eq "lq") { return ($rest, "`"); } elsif ($macro =~ /^(rq|pif)$/) { return ($rest, "'"); } elsif ($macro =~ /^($boldcommands)$/) { return ($rest,"".uc($&).""); } elsif ($macro =~ /^(GAP|ATLAS|MOC$sharepkg)$/) { return ($rest, sansserif $macro); } elsif ($macro eq "package") { my ($last, $arg, $first) = get_arg("$rest"); # $first = "" return ($last, sansserif $arg);} elsif ($macro eq "sf") { my ($last, $arg, $first) = get_arg("{$rest"); # $first = "" return ($last, sansserif $arg);} elsif ($macro =~ /^([hv]box|rm|kernttindent|math(bin|rel|op))$/) { return ($rest, "");} elsif ($macro =~ /^(obeylines|(begin|end)group)$/) { return ($rest, "");} elsif ($macro =~ /^hfil(|l)$/) { return ($rest, " ");} elsif ($macro =~ /^break$/) { return ($rest, "
");} elsif ($macro =~ /^(it|sl)$/) { my ($last, $arg, $first) = get_arg("{$rest"); # $first = "" return ("$arg}\\emphend $last", "");} # pseudo ``emph'' end token elsif ($macro eq "emphend") { return ($rest, ""); } elsif ($macro eq "hrule") { return ($rest, "


"); } elsif ($macro eq "enspace") { return ($rest, " "); } elsif ($macro eq "quad") { return ($rest, " "); } elsif ($macro eq "qquad") { return ($rest, "  "); } elsif ($macro eq "ss") { return ($rest, "ß"); } elsif ($macro eq "o") { return ($rest, "ø"); } elsif ($macro eq "O") { return ($rest, "Ø"); } elsif ($macro =~ /^l?dots$/) { return ($rest, "..."); } elsif ($macro =~ /^bs?f|stars$/) { return ($rest, "
"); } elsif ($macro eq "cr") { return ($rest, "
"); } #
  • in the next line would be invalid HTML elsif ($macro eq "fmark") { return ($rest, " "); } elsif ($macro eq "item") { ($rest, $itemarg, $first) = get_arg("$rest"); # $first = "" if ($listdepth == 2) { $listdepth = 1; if ($listtype eq "d") { return ("$itemarg\\itmnd $rest", "\n\n
    "); } else { #ignore bit in braces (ordered and unordered lists) return ($rest, "\n\n
  • "); } } else { if ($listtype eq "d") { return ("$itemarg\\itmnd $rest", "
    "); } else { #ignore bit in braces (ordered and unordered lists) return ($rest, "
  • "); } } } elsif ($macro eq "itemitem") { ($rest, $itemarg, $first) = get_arg("$rest"); # $first = "" $rest =~ /^(%(un|)ordered)? #defines $sublisttype (\{([1aAiI])\})? #defines TYPE of ordered sublist (\{(\d+)\})? #defines START of ordered sublist /x; if ($listdepth == 1) { $sublisttype = list_type($1, $2); $sublistentry = begin_list($sublisttype, $3, $4, $5, $6) . "\n"; $listdepth = 2; } else { $sublistentry = ""; } if ($sublisttype eq "d") { return ("$itemarg\\itmnd $rest", "$sublistentry
    "); } else { #ignore bit in braces (ordered and unordered lists) return ($rest, "$sublistentry
  • "); } } # pseudo ``itemend'' character elsif ($macro eq "itmnd") { return ($rest, "
    "); } elsif ($macro eq "cite" and $rest =~ /^\{\s*(\w+)\s*\}/) { return ($', "$1"); } elsif ($macro eq "URL" and $rest =~ /^\{([^\}]*)\}/) { return ($', "$1"); } elsif ($macro eq "Mailto" and $rest =~ /^\{([^\}]*)\}/) { return ($', "$1"); } else { return ($rest, $macro); } } elsif ($rest =~ /^-/) { return ($', ""); # hyphenation help -- ignore } elsif ($rest =~ /^"; return $beginlist; } # # This could probably be done more cleverly -- this routine is too long # sub convert_text { my $fname = $_[0]; my $refchars = '[\-\\w\\s`\',./:!()?$]'; # these make up cross references my $ref = ""; my $endline = ""; # used for at the end of line my $mode = "normal"; # $mode can be: # "normal" : TeX macros need to be interpreted # "verbatim" : No interpretation done, except that # || is converted to |. # "html" : No interpretation done, except that # initial % is removed. # "maths" : A variant of "normal" where inside # $...$ or $$...$$ (TeX's math mode) my $ttenv = 0; # $ttenv is set to 1 in \begintt .. \endtt "verbatim" mode my $nontex = 0; # $nontex is set to 1 in %display{nontex} and # %display{nontext} env'ts, for which $mode is "normal" # but initial % of each line is removed. my $skip_lines = 0; # $skip_lines is set non-zero in %display{tex}, # %display{text}, %display{jpeg}, %display{nonhtml} # and \answer env'ts my ($bold,$tt,$it,$sub,$sup,$inlist,$inref,$donepar) = (0,0,0,0,0,0,0); my ($indexarg,$indexarg2,$zwei,$drei,$vier,$macro,$endmath,$endmathstring); # # Now we loop over lines. a line with 16 initial % signs marks # end of section # LINE: while (defined($_ = ) and not /^\%{16,}/) { chomp; # drop the trailing newline my $rest = $_; # rest of the line to scan my $outline = ""; # build the output in here # First we deal with various special whole lines. # \beginexample, \begintt, %display (this may end a $skip_lines) if ($mode eq "normal" and /^\\begin(example|tt)/) { if ($_ =~ /^\\begintt/) { # This is to catch a \begintt .. \endtt $ttenv = 1; # environment enclosing \beginexample .. } # \endexample $mode = "verbatim"; $skip_lines = 0; print "
    \n";
              next LINE;
          } elsif ($mode eq "normal" and /^%display\{nontex(|t)\}/) {
    	  $nontex = 1;
              $skip_lines = 0;
              next LINE;
          } elsif ($mode eq "normal" and /^%display\{(text?|jpeg|nonhtml)\}/) {
          	  # Paragraphs to be skipped by HTML.
    	  $mode = "normal";
              $nontex = 0;
          	  $skip_lines = 2;
          	  next LINE;
          } elsif ($mode eq "normal" and /^%display\{html\}/) {
    	  $mode = "html";
              $skip_lines = 0;
          } elsif ($mode eq "html" and /^%display\{text\}/) {
    	  $mode = "normal";
              $nontex = 0;
              $skip_lines = 2;
              next LINE;
          } elsif (/^%enddisplay/ and !$ttenv) {
    	  if ($mode eq "verbatim") {
    	      print "
    \n"; } $mode = "normal"; $nontex = 0; $skip_lines = 0; next LINE; } elsif ($mode eq "verbatim") { # \endexample, \endtt if (/^\\endtt/ or (/^\\endexample/ and !$ttenv)) { $mode = "normal"; $ttenv = 0; print "\n"; next LINE; } # |_ if (/^\|_/) { next LINE; } } elsif ($mode eq "html") { if (/^%/) { print "$'\n"; } else { print STDERR "Line $. ignored in \%display{html} mode, " . "because it didn't start with \%\n"; } next LINE; } elsif ((!$nontex and /^%/) || (!/\\(at|)index/ and /^([{}]|\s*\{?\\[a-zA-Z].*)%$/)) { # Ignore lines starting with a % except if in html or verbatim # modes (dealt with above) or if in nontex mode which we deal # with below. # Also ignore specific lines ending in a % (we have to be careful # here -- % also indicates a continuation). The lines we ignore are # those that match: "{%", "}%", "{\\X..%", "\\X..%" where X denotes # any letter and .. any sequence of chars. This is meant to exclude # lines like "{\obeylines ... %", "\begingroup ... %". If this proves # problematic the .tex files will need to use the %display{tex} env't # to exclude such lines. next LINE; # All that's left are whole lines that occur in "normal" mode } else { # Line skipping. if ($skip_lines) { if ($skip_lines == 1 and $_ =~ /^\s*$/) { $skip_lines = 0; } next LINE; } # Remove initial % if there is one when in %display{nontex} or # %display{nontext} environment if ($nontex) { s/^%//; $rest = $_; } # a '%' at end-of-line indicates a continuation $_ = gather($_, $nontex); # Paragraphs are ended by blank lines. if (/^\s*$/) { unless ($donepar) { $outline .= "

    \n"; $donepar = 1; } # If we get to the end of a paragraph we assume that we have # lost track of what is going on, warn and try to resume. if ($mode eq "maths" or $inref) { print STDERR "Paragraph ended in $mode mode at $.\n" . "reverting to normal\n"; $outline .= "" if ($mode eq "maths"); $mode = "normal"; } print $outline; next LINE; } # Vertical skips. if (/^\\(med|big)skip/) { $outline .= "

    "; print "$outline\n"; next LINE; } # Index entries -- emit an anchor and remember the index # keys for later there may be several on one line and # several references to one key if (/^\\(at|)index/) { # $_ = gather($_, $nontex); # already done above while (/\\((at|)index(tt|))\{/g) { ($rest, $indexarg) = (get_arg("{".$'))[0,1]; if ($1 eq "atindex") { ($indexarg2) = (get_arg($rest))[1]; if ($indexarg2 =~ /^@/) { $indexarg = $'; $indexarg =~ s/\\noexpand\s*`([^']*)'/$1/g; $indexarg =~ s/\\noexpand\s*<([^>]*)>/$1/g; $indexarg =~ s/\\noexpand//g; $indexarg =~ s/\|.*//; # remove "|indexit" if present # $indexarg might still have macros ... # we should do something about these too } } # Just the crudest form of macro removal - probably enough $indexarg =~ s/\\(.)/$1/g; $indexarg =~ s/\$//g; #assume $s match in pairs!! $bla = inxentry($fname,$indexarg,$sec); $outline .= $bla; print "$outline\n"; } next LINE; } # \> and \) lines (joined with next line if ending in %) if (/^\\[>)]/) { # $_ = gather($_, $nontex); # already done above # if \> with ` or ( without a matching ' or ) gather lines if ( /^\\> *\`/ ) { # line should have ended in a % while ( !/\'/ ) { $_ = gather("$_%", $nontex); } } elsif ( /^\\>.*\(/ ) { # line should have ended in a % while ( !/\)/ ) { $_ = gather("$_%", $nontex); } } # get rid of @{...} or @`...' if present. if (/@/) { # print STDERR "before:$_\n"; if (s/@\s*(\{[^{}]*\}|\`[^\']*\')\s*/ /) { # easy } elsif (/@\s*/) { # nested braces ... need to find matching brace $_ = $`; ($rest) = get_arg($'); $_ .= " $rest"; $rest =""; } # print STDERR "after:$_\n"; print STDERR "@ still present at $_" if (/@/); } } # if there is a comment in square brackets we extract it now # ... this way if this feature is undesirable we can easily get # rid of it my $comment = ""; # These cases [] is not a comment: # \>; # [] here is treated as an optional arg # \>`'{![gdfile]} # possibility from # # buildman.pe \Declaration if (/^\\>(.*;|`[^\']+\'{[^}!]*!\[[^\]]*\]})/) { ; } elsif (/^\\>.*\(/) { if (s/^(\\>[^(]*\([^)]*\)[^\[]*)(\[[^\]]*\])/$1/) { $comment = " $2"; } } elsif (s/^(\\>[^\[]*)(\[[^\]]*\])/$1/) { $comment = " $2"; } # \>`' V if (/^\\> *`([^\']+)\'\s*(\[[^\]]*\])?\s*V?\s*$/) { $endline = ""; $outline .= subsec_name($fname,$1,$sec); # $1 = $outline .= "

    " if $inlist; $outline .= "
  • "; $tt = 1; $rest = $1.$comment." V"; } # \>`'{"; $drei = defined($3) ? $3 : ""; $vier = defined($4) ? " $4" : ""; # $2$drei =