Annotation of doc/help/texxml2latex.pl, revision 1.10
1.1 bowersj2 1: #!/usr/bin/perl
2:
1.2 bowersj2 3: # The LearningOnline Network with CAPA
4: # Converts a texxml file into a single tex file
5: #
6: # Copyright Michigan State University Board of Trustees
7: #
8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
9: #
10: # LON-CAPA is free software; you can redistribute it and/or modify
11: # it under the terms of the GNU General Public License as published by
12: # the Free Software Foundation; either version 2 of the License, or
13: # (at your option) any later version.
14: #
15: # LON-CAPA is distributed in the hope that it will be useful,
16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18: # GNU General Public License for more details.
19: #
20: # You should have received a copy of the GNU General Public License
21: # along with LON-CAPA; if not, write to the Free Software
22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23: #
24: # /home/httpd/html/adm/gpl.txt
25: #
26: # http://www.lon-capa.org/
27: #
28: # 7-16-2002 Jeremy Bowers
29:
1.1 bowersj2 30: use strict;
31: use HTML::TokeParser;
32: use GDBM_File;
1.5 bowersj2 33: use File::Temp;
1.1 bowersj2 34:
35: # accept texxml document on standard in
36: my $p = HTML::TokeParser->new( $ARGV[0] );
1.4 albertel 37: my $dirprefix = "../../loncom/html/adm/help/tex/";
1.1 bowersj2 38:
1.10 ! albertel 39: my $include_filenames = ($ARGV[1] eq '--with-filenames');
1.5 bowersj2 40: # Make myself a temp dir for processing POD
41: my $tmpdir = File::Temp::tempdir('loncapahelpgenXXXXXXX', TMPDIR => 1);
42:
1.1 bowersj2 43: # Print the header
44: open (LATEX_FILE, $dirprefix . "Latex_Header.tex");
45: print <LATEX_FILE>;
46:
47: while (my $token = $p->get_token())
48: {
49: my $type = $token->[0];
1.5 bowersj2 50: if ($type eq 'S') {
1.1 bowersj2 51: my $tag = $token->[1];
52: my $attr = $token->[2];
1.5 bowersj2 53: if ($tag eq 'section') {
1.1 bowersj2 54: my $title = $attr->{'name'};
55: print "\\section{$title}\n\n";
56: }
57:
1.5 bowersj2 58: if ($tag eq 'subsection') {
1.1 bowersj2 59: my $title = $attr->{'name'};
60: print "\\subsection{$title}\n\n";
61: }
62:
1.5 bowersj2 63: if ($tag eq 'subsubsection') {
1.1 bowersj2 64: my $title = $attr->{'name'};
65: print "\\subsubsection{$title}\n\n";
66: }
67:
1.5 bowersj2 68: if ($tag eq 'file') {
1.1 bowersj2 69: my $file = $attr->{'name'};
1.9 bowersj2 70: open (LATEX_FILE, $dirprefix . $file) or
71: ($! = 1, die "Can't find LaTeX file $dirprefix/$file; terminating build.");
1.10 ! albertel 72: my $esc_file=$file;
! 73: $esc_file=~s/_/\\_/g;
! 74: if ($include_filenames) {
! 75: print "\\textrm{File: \\bf $esc_file}\\\\\n";
! 76: }
1.1 bowersj2 77: print <LATEX_FILE>;
1.3 bowersj2 78: print "\n\n";
1.1 bowersj2 79: }
80:
1.5 bowersj2 81: if ($tag eq 'tex') {
1.3 bowersj2 82: print "\n\n";
1.1 bowersj2 83: print $attr->{'content'};
1.3 bowersj2 84: print "\n\n";
1.1 bowersj2 85: }
1.5 bowersj2 86:
87: if ($tag eq 'pod') {
88: my $file = $attr->{'file'};
1.8 bowersj2 89: my $section = $attr->{'section'};
1.5 bowersj2 90: if (!defined($section)) { $section = ''; }
1.6 bowersj2 91: else {
1.8 bowersj2 92: $section = "-section '$section'";
1.6 bowersj2 93: }
1.8 bowersj2 94: my $h1level = $attr->{'h1level'};
95: if (!defined($h1level)) { $h1level = '2'; }
1.5 bowersj2 96: $file = '../../loncom/' . $file;
1.8 bowersj2 97: my $filename = substr($file, rindex($file, '/') + 1);
98: system ("cp $file $tmpdir\n");
1.9 bowersj2 99: my $latexFile;
100: if (index($filename, '.') == -1) {
101: # pod2latex *insists* that either the extension of the
102: # file be .pl|.pm|.pod or that it be executable. Some
103: # extension-less files like "lonsql' are none-of-the-above.
104: system ("cd $tmpdir; mv $filename $filename.pm");
105: $filename .= ".pm";
106: print STDERR $filename . "\n";
107: }
1.8 bowersj2 108: system ("cd $tmpdir; pod2latex -h1level $h1level $section $filename\n");
1.9 bowersj2 109: $latexFile = substr($filename, 0, rindex($filename, '.')) . '.tex';
110: open LATEX_FILE, $tmpdir . '/' . $latexFile or
111: ($! = 1, die "Latex file $latexFile not found while trying to use pod2latex, ".
112: "terminating build");
1.7 bowersj2 113: # pod2latex inserts \labels and \indexs for every section,
114: # which is horrible because the section names tend to get
115: # reused a lot. This filters those out, so we need to do
116: # create our own indexes.
117: for (<LATEX_FILE>) {
1.8 bowersj2 118: $_ =~ s/\\([^{]*)(section|paragraph)(\*?)\{([^\\]+)\\label\{[^\\]+\}\\index\{([^\\]+)\}\}/\\\1\2\3\{\4\}/g;
1.7 bowersj2 119: print $_;
120: }
1.5 bowersj2 121: print "\n\n";
122: }
1.1 bowersj2 123: }
124: }
125:
126: # Print out the footer.
127: open (LATEX_FILE, $dirprefix . "Latex_Footer.tex");
128: print <LATEX_FILE>;
1.5 bowersj2 129:
130: # Remove the temp directory
131: system ("rm -rf $tmpdir");
1.8 bowersj2 132:
133: __END__
134:
135: =pod
136:
137: =head1 NAME
138:
139: texxml2latex.pl - core script that drives the help file assembly
140: applications
141:
142: =head1 SYNOPSIS
143:
144: LON-CAPA's help system is based on assembling various pieces into
145: LaTeX files for conversion into printed documents. The various pieces
146: can also be used as online help.
147:
148: =head1 OVERVIEW
149:
150: X<help system, overview>LON-CAPA's help system is based on the idea of
151: assembling various pieces as needed to create documents for printing,
152: and using these various pieces for online help. LaTeX is the primary
153: language of the help system, because we can easily convert it to HTML,
154: and it makes the nicest printed documents.
155:
156: The scripts for the help system are stored in /docs/help in the CVS
157: repository.
158:
159: =head2 Data Sources
160:
161: The help system can draw from the following sources to create help
162: documents:
163:
164: =over 4
165:
166: =item * B<LaTeX fragments>: LaTeX fragments stored in
167: C</loncom/html/adm/help/tex> in the CVS repository (which end up in
168: C</home/httpd/html/adm/help/tex>). A "LaTeX fragment" is a file that
169: contains LaTeX-style markup, but is not a complete LaTeX file with
170: header and footer.
171:
172: =item * B<perl POD documentation>: POD documentation may be extracted
173: from perl modules used in LON-CAPA, using the syntax described in
174: podselect's man page.
175:
176: =back
177:
178: =head2 Online Help
179:
180: The online aspect of the help system is covered in the documentation
181: for loncommon.pm; see L<Apache::loncommon>, look for
182: C<help_open_topic>.
183:
184: Online help can only come from LaTeX fragments.
185:
186: Access to the printed documents is partially provided online by
187: rendering the help files structure in a way that allows the user to
188: click through to the underlying help files; see
189: L<http://msu.loncapa.org/adm/help/author.manual.access.hlp> for an
190: example. It's not very good, but it's marginally better then nothing.
191:
192: =head2 Offline Documents
193:
194: Offline documents are generated from XML documents which tell a
195: rendering script how to assemble the various LaTeX fragments into a
196: single LaTeX file, which is then rendered into PostScript and PDF
197: files, suitable for download and printing.
198:
199: =head1 texxml And Rendering texxml
200:
201: =head2 texxml
202:
203: X<texxml>
204: texxml is a little XML file format used to specify to the texxml2*.pl
205: scripts how to assemble the input sources into LaTeX documents. texxml
206: files end in the .texxml extension, and there is one texxml file per
207: final rendered document.
208:
209: The texxml format is as follows: There is a root <texxml> element,
210: with no attributes and the following children:
211:
212: =over 4
213:
214: =item * B<title>: The B<name> attribute of this tag is used as the
215: title of the document in texxml2index.pl; it is ignored in
216: texxml2latex.pl. If you don't intend to offer online-access
217: to the rendered documents this may be skipped.
218:
219: =item * B<section>, B<subsection>, and B<subsubsection>: These create
220: the corresponding environments in the output file. The B<name>
221: attribute is used to determine the name of the section.
222:
223: =item * B<file>: The C<name> attribute specifies a LaTeX fragment by
224: filename. The file is assumed to be located in the
225: C<loncom/html/adm/help/tex/> directory in the CVS repository. The
226: C<.tex> is required.
227:
228: =item * B<tex>: The contents of the B<content> attribute are directly
229: inserted into the rendered LaTeX file, followed by a paragraph
230: break. This is generally used for little connective paragraphs in
231: the documentation that don't make sense in the online help. See
232: C<author.manual.texxml> for several example usages.
233:
234: =item * B<pod>: The B<file> attribute specified a file to draw the POD
235: documentation out of. The B<section> attribute is a section
236: specification matching the format specified in the man page of
237: podselect. By default, all POD will be included. The file is
238: assumed to be relative to the C<loncom> directory in the CVS
239: repository; you are allowed to escape from that with .. if
240: necessary. The B<h1level> attribute can be used to change
241: the default depth of the headings; by default, this is set to 2,
242: which makes =head1 a "subsection". Setting this higher can allow
243: you to bundle several related pod files together; see
244: developer.manual.texxml for examples.
245:
246: =back
247:
248: texxml2latex.pl will automatically include C<Latex_Header.tex> at the
249: beginning and C<Latex_Footer.tex> at the end, to make a complete
250: document LaTeX document.
251:
1.9 bowersj2 252: =head2 Rendering texxml
1.8 bowersj2 253:
1.9 bowersj2 254: =head3 render.texxml.pl
1.8 bowersj2 255:
1.9 bowersj2 256: X<texxml, rendering>X<render.texxml.pl>The C<render.texxml.pl> script
257: takes a .texxml file, and produces PostScript and PDF files. The LaTeX
258: files will be given access to .eps files in the
259: C</loncom/html/adm/help/eps/> directory while rendering. Call it as
260: follows, from the C<doc/help> directory:
1.8 bowersj2 261:
262: perl render.texxml.pl -- author.manual.texxml
263:
264: substituting the appropriate texxml file.
265:
1.9 bowersj2 266: =head3 texxml2latex.pl
1.8 bowersj2 267:
1.9 bowersj2 268: X<texxml2latex.pl>texxml2latex.pl is a perl script that takes texxml in and assembles
1.8 bowersj2 269: the final LaTeX file, outputting it on stout. Invoke it as follows:
270:
271: perl texxml2latex.pl author.manual.texx
272:
273: Note that there is no error handling; if the script can not find a
274: .tex file, it is simply ignored. Generally, if a file is not in the
275: final render, it either could not be found, or you do not have
276: sufficient permissions with the current user to read it.
277:
1.9 bowersj2 278: =head3 texxml2index.pl
1.8 bowersj2 279:
1.9 bowersj2 280: X<texxml2index.pl>texxml2index.pl is a perl script that takes texxml in and assembles a
1.8 bowersj2 281: file that can be used online to access all the .tex files that are
282: specified in the .texxml file. For an example of how this looks
283: online, see
284: C<http://msu.loncapa.org/adm/help/author.manual.access.hlp>.
285:
286: =head2 texxml support
287:
288: There are a couple of scripts that you may find useful for creating
289: texxml-based help:
290:
1.9 bowersj2 291: =head3 latexSplitter.py
1.8 bowersj2 292:
1.9 bowersj2 293: X<latexSplitter.py>latexSplitter.py is a Python script that helps you seperate a
1.8 bowersj2 294: monolithic .tex file into the small pieces LON-CAPA's help system
295: expects. Invoke it like this:
296:
297: python latexSplitter.py monolithic.tex
298:
299: where C<monolithic.tex> is the .tex file you want to split into
300: pieces. This requires Python 2.1 or greater (2.0 may work); on many
301: modern RedHat installs this is installed by default under the
302: executable name C<python2>.
303:
304: Use the program by highlighting the desired section, give it a file
305: name in the textbox near the bottom, and hit the bottom button. The
306: program will remove that text from the textbox, and create a file in
307: the C<loncom/html/adm/help/tex/> directory containing that LaTeX. For
308: consistency, you should use underscores rather then spaces in the
309: filename, and note there are a few naming conventions for the .tex
310: files, which you can see just by listing the
311: C<loncom/html/adm/help/tex/> directory.
312:
313: The idea behind this program is that if you are writing a big document
314: from scratch, you can use a "real" program like LyX to create the .tex
315: file, then easily split it with this program.
316:
1.9 bowersj2 317: =head3 simpleEdit.py
1.8 bowersj2 318:
1.9 bowersj2 319: X<simpleEdit.py>simpleEdit.py is a python script that takes a .texxml file and shows
1.8 bowersj2 320: all the tex files that went into in sequence, allowing you to "edit"
321: the entire document as one entity. Note this is intended for simple
322: typo corrections and such in context, not major modification of the
323: document. Invoke it with
324:
325: python simpleEdit.py author.manual.texxml
326:
327: Make your changes, and hit the "Save" button to save them.
328:
329: =head2 texxml LaTeX Feature Support
330:
331: =head3 Cross-referencing
332:
333: LaTeX has a cross-referencing system build around labeling points in
334: the document with \label, and referencing those labels with \ref. In a
335: complete LaTeX document, there's no problem because all \refs and
336: \labels are present. However, for the online help, \ref'ing something
337: that is not in the current LaTeX fragment causes a TTH error when it
338: can't find the crossreference.
339:
340: The solution is to do the cross-references for TTH. When LON-CAPA is
341: installed, the C<rebuildLabelHahs.pl>X<rebuildLabelHash.pl> script
342: is executed, which extracts all the labels from the LaTeX fragments
343: and stores them in the C<fragmentLabels.gdbm>X<fragmentLabels.gdbm> hash.
344: The C<lonhelp.pm> handler then replaces \refs with appropriate
345: HTML to provide a link to the referenced help file while online. Thus,
346: you can freely use references, even in online help.
347:
348: =head3 Indexing
349:
350: LaTeX has a popular index making package called MakeIndex. LON-CAPA's
351: help system supports this, so you can create indices using the \index
352: LaTeX command. In perl POD files, use the X command. Note that in both
1.9 bowersj2 353: cases the index text is not included in the render, so the index must
354: be included in addition to the indexed text, and need not match the
355: indexed text precisely.
1.8 bowersj2 356:
357: =head1 Writing POD: Style
358:
359: Adopting a little bit from everybody who has included POD in their
360: documents to date, the help system is going to expect the following
361: format for POD documentation.
362:
363: The POD should start with a C<=head1> with the title C<NAME> (in caps
364: as shown). The following paragraph should extremely briefly describe
365: what the module does and contains. Example:
366:
367: =head1 NAME
368:
369: Apache::lonflunkstudent - provides interface to set all
370: student assessments point score to 0
371:
372: Next should be a C<head1> titled C<SYNOPSIS> which contains a
373: paragraph or two description of the module.
374:
375: =head1 SYNOPSIS
376:
377: lonflunkstudent provides a handler to select a student and set all
378: assignment values to zero, thereby flunking the student.
379:
380: Routines for setting all assessments to some value are provided by
381: this module, as well as some useful student taunting routines.
382:
383: Optionally, an C<OVERVIEW> section can be included. This can then be
384: extracted by the help system for the LON-CAPA subsystems overview
385: chapter. The overview should be a relatively high-level, but still
386: technical, overview of the module, sufficient to give the reader
387: enough context to understand what the module does, what it might be
388: useful for in other contexts, and what is going on in the code when it
389: is read.
390:
391: The remainder should be formatted as appropriate for the file, such
392: that discarding the NAME, SYNOPSIS, and OVERVIEW sections provides a
1.9 bowersj2 393: useful API overview of the module. This may be anything from an
394: elaborate discussion of the data structures, algorithms, and design
395: principles that went into the module, or a simple listing of
396: what functions exist, how to call them, and what they return, as
397: appropriate.
1.8 bowersj2 398:
399: Routines that are private to the module should B<not> be documented;
400: document them in perl comments, or, as is the style of the time, not
401: at all, as is appropriate.
402:
403: Method and function names should be bolded when being
1.9 bowersj2 404: documented.
405:
406: Literal string such as filename should be enclosed in
1.8 bowersj2 407: the C command, like this: C</home/httpd/lonTabs/>.
1.9 bowersj2 408:
409: Indexing can be done with the X command in perldoc, and should be used
410: as appropriate. Do not include X commands in the headings, the output
411: from pod2latex screws up some regexes in texxml2latex.pl.
1.8 bowersj2 412:
413: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>