Annotation of loncom/interface/lonhtmlgateway.pm, revision 1.2
1.1 faziophi 1: # The LearningOnline Network with CAPA
2: # gateway for html input/output to be properly parsed and handled
3: #
1.2 ! faziophi 4: # $Id: lonhtmlgateway.pm,v 1.1 2010/04/26 20:45:28 faziophi Exp $
1.1 faziophi 5: #
6: # Copyright Michigan State University Board of Trustees
7: #
8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
9: #
10: # LON-CAPA is free software; you can redistribute it and/or modify
11: # it under the terms of the GNU General Public License as published by
12: # the Free Software Foundation; either version 2 of the License, or
13: # (at your option) any later version.
14: #
15: # LON-CAPA is distributed in the hope that it will be useful,
16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18: # GNU General Public License for more details.
19: #
20: # You should have received a copy of the GNU General Public License
21: # along with LON-CAPA; if not, write to the Free Software
22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23: #
24: # /home/httpd/html/adm/gpl.txt
25: #
26: # http://www.lon-capa.org/
27: #
28: ######################################################################
29: ######################################################################
30:
31: =pod
32:
33: =head1 NAME
34:
35: Apache::lonhtmlgateway - properly parse and handle HTML input and output
36:
37: =head1 SYNOPSIS
38:
39: This is part of the LearningOnline Network with CAPA project
40: described at http://www.lon-capa.org.
41:
42: =head1 INTRODUCTION
43:
44: lonhtmlgateway is an object-oriented module used to parse and correct
45: malformed HTML input from the client, as well as to perform processing
46: of custom LON-CAPA HTML output before it is sent along to the end-user.
47: It replaces a number of subroutines in various modules, and adds new
48: code to tidy and process malformed HTML using XML::LibXML.
49:
50: This module is intended to be used for all non-authoring perspectives
51: in the system.
52:
53: New to LON-CAPA version 3.0.
54:
55: =head2 Example Usage
56:
57: Below is intended code to be invoked and called for use outside
58: of this module:
59:
60: $gateway = Apache::lonhtmlgateway->new();
61: $gateway = Apache::lonhtmlgateway->new($target);
62:
63: $xhtml = $gateway->process_incoming_html($html);
64: $xhtml = $gateway->process_incoming_html($html, $legacy);
65:
66: $xml = $gateway->process_html_to_xml($html);
67: $xhtml = $gateway->process_xml_to_html($xml);
68:
69: $bool = Apache::lonhtmlgateway->contains_block_level_tags($input);
70:
71: =head1 GLOBAL VARIABLES
72:
73: =over 4
74:
75: =cut
76:
77: ######################################################################
78: ######################################################################
79:
80: package Apache::lonhtmlgateway;
81:
82: use strict;
83: use utf8;
84: use Time::Local;
85: use Time::HiRes;
86: use Apache::lonlocal;
87: use Apache::lonnet;
88: use Apache::lonhtmlcommon;
89: use Apache::lonxml;
90: use Apache::lontexconvert;
91: use lib '/home/httpd/lib/perl/';
92: use LONCAPA;
93: use XML::LibXML;
94: use Encode;
95: use HTML::Entities;
96: use HTML::LCParser();
97: use Safe();
98:
99: local $XML::LibXML::skipXMLDeclaration = 1;
100: local $XML::LibXML::skipDTD = 1;
101: local $XML::LibXML::setTagCompression = 1;
102:
103: ##############################################
104: ##############################################
105:
106: =item %LONCAPA_ALLOWED_STANDARD_TAGS
107:
108: This is a hash of all tags, both HTML and custom LON-CAPA tags that
109: are allowed in non-authoring spaces. Examples of this include
110: course documents, bulletin boards, discussion posts, templated pages,
111: etc. In addition, in the event of rich text editing, the WYSIWYG
112: editor needs to know how to display LON-CAPA custom tags as either
113: inline-level (<span>) or block-level (<div>). Therefore, the hash is
114: set up with uppercase tag names as keys ("H1"), and the corresponding
115: entry an integer constant indicating that tag's role or purpose:
116:
117: =over 4
118:
119: =item 0 =
120:
121: Tag is explictly not allowed. Currently not used anywhere in this
122: module, but reserved for the future in case certain tags would like
123: to be explicitly blacklisted.
124:
125: =item 1 =
126:
127: Tag is allowed, and in cases where it is unclear, is rendered as an
128: inline-level element. Example: <algebra> should be rendered as an
129: inline element.
130:
131: =item 2 =
132:
133: Tag is allowed, and in cases where it is unclear, is rendered as a
134: block-level element. Example: <md> should be rendered as a block
135: element.
136:
137: =back
138:
139: =back
140:
141: =cut
142:
143: ##############################################
144: ##############################################
145:
146: our %LONCAPA_ALLOWED_STANDARD_TAGS = (
147: # standard html header tags
148: H1 => 2, H2 => 2, H3 => 2, H4 => 2, H5 => 2, H6 => 2,
149: # basic inline formatting and phrases
150: B => 1, I => 1, U => 1, STRONG => 1, EM => 1, STRIKE => 1,
151: BIG => 1, SMALL => 1, INS => 1, DEL => 1, S => 1,
152: Q => 1, DFN => 1, CODE => 1, SAMP => 1, KBD => 1, VAR => 1,
153: SUB => 1, SUP => 1,
154: # linking and embedding
155: A => 1, IMG => 1,
156: # block level tags
157: P => 2, DIV => 2, OL => 2, UL => 2, LI => 2, ADDRESS => 2,
158: BR => 2, HR => 2, BLOCKQUOTE => 2, PRE => 2,
159: # table-related tags
160: TABLE => 2, CAPTION => 2, TBODY => 2, TR => 2, TD => 2,
161: TH => 2,
162: # LON-CAPA custom tags
163: M => 1, MI => 1, MD => 2, ALGEBRA => 1,
164: CHEM => 1
165: );
166:
167: ##############################################
168: ##############################################
169:
170: =head1 PARSING LON-CAPA CUSTOM TAGS
171:
172: This module maintains a hash %custom_tag_parsers, containing
173: lowercase tag names as keys and function references as entries.
174: Convention used here names the actual parsing function whose
175: reference is stored here to be of the name &parse_tagname_tag().
176: These functions are called during the processing of outgoing
177: HTML output in the &process_outgoing_html() function.
178:
179: Each of these functions is passed the following arguments:
180:
181: =over 4
182:
183: =item self
184:
185: Reference to Apache::lonhtmlgateway object calling the function.
186:
187: =item input
188:
189: Textual context extracted between the <tag> and </tag> tags.
190: Note that this text I<could> contain HTML entities. Thus, for
191: functions that cannot handle entitized input,
192: &HTML::Entities::decode_entities() should be called on this data
193: before further handing it off.
194:
195: =back
196:
197: Example hash entry:
198:
199: mi => \&parse_mi_tag,
200:
201: =head2 Currently Supported Custom Tags
202:
203: =over 4
204:
205: =item <algebra>
206:
207: Intended to convert and simplify simple algebraic functions into
208: readable output. Corrects cases such as double negatives or
209: eliminates coefficients of 1 where appropriate. The actual
210: handling of content contained in this tag takes place inside
211: L<Apache::lontexconvert>, which in turn uses the AlgParser
212: module to actually process the input.
213:
214: Usage:
215: <algebra>2*x+(-5)</algebra>
216:
217: =item <chem>
218:
219: Formatter for chemical equations, adding superscripts, subscripts,
220: and appropriate arrow characters as appropriate. This parser is
221: wholly contained inside this module, but is a copy of a routine
222: found in homework/default_homework.lcpm.
223:
224: Usage:
225: <chem>CH3CO2H + H2O <=> CH3CO2- + H3O+</chem>
226:
227: =back
228:
229: =head3 Math Mode Tags
230:
231: These tags are intended for LaTeX math mode input, in order to
232: produce complex mathematical and scientific constructs, which
233: normal HTML cannot produce. The output is later rendered by
234: a user-defined TeX engine in web target, or handled directly
235: in the case of tex target. The only difference between the tags
236: below is determining the author's intent on how to appropriately
237: render the contents within the tag - this intent is
238: important in preserving the What You See Is What You Get philosophy
239: of the rich text editor.
240:
241: =over 4
242:
243: =item <mi>
244:
245: Inline math mode tag. Content is surrounded by "$" characters and
246: passed to the parser for the <m> tag.
247:
248: I<New for LON-CAPA 3.0>.
249:
250: =item <md>
251:
252: Display block math mode tag. Content is surrounded by "\[" and
253: "\]" characters and passed to the parser for the <m> tag.
254:
255: I<New for LON-CAPA 3.0>.
256:
257: =item <m>
258:
259: Math mode tag. Allows author to fully specify the display of their
260: TeX input, and contain mixed inline-and-block content within a single
261: tag.
262:
263: Due to tools such as the rich text editor needing to know whether a
264: custom tag is block-level or inline-level on render, the use of this
265: tag is discouraged starting with LON-CAPA 3.0 although it will continue
266: to function. Fully compatible with legacy LON-CAPA 2.x content.
267:
268: =back
269:
270: =cut
271:
272: ##############################################
273: ##############################################
274:
275: my %custom_tag_parsers = (
276: mi => \&parse_mi_tag,
277: md => \&parse_md_tag,
278: m => \&parse_m_tag,
279: algebra => \&parse_algebra_tag,
280: chem => \&parse_chem_tag
281: );
282:
283: ##############################################
284: ##############################################
285:
286: =head1 CLASS OBJECT CONSTRUCTOR
287:
288: =over 4
289:
290: =item new
291:
292: $gateway = Apache::libhtmlgateway->new();
293: $gateway = Apache::libhtmlgateway->new($target);
294:
295: Constructs and returns a new gateway object. An optional argument
296: allows one to specify the target of the output, defaults to 'web'.
297: Behind the scenes, a single XML::LibXML parser object is created
298: behind the scenes. On destroy, this parser object is destroyed
299: as well.
300:
301: =back
302:
303: =cut
304:
305: ##############################################
306: ##############################################
307:
308: sub new {
309: my $invocant = shift;
310: my $class = ref($invocant) || $invocant;
311: my $target = shift;
312: # create a new parser instance for libxml
313: my $self = {
314: parser => XML::LibXML->new(),
315: target => ($target) ? $target : 'web'
316: };
317: # options for the libxml parser
318: $self->{parser}->recover(1);
319: $self->{parser}->recover_silently(1);
320: bless($self, $class); # bless = pray that it works
321: return $self;
322: }
323:
324: sub DESTROY {
325: my $self = shift;
326: my $parser = $self->{parser};
327: undef $parser; # destroy the parser instance
328: }
329:
330: ##############################################
331: ##############################################
332:
333: =head1 PUBLIC OBJECT METHODS
334:
335: =over 4
336:
337: =item process_html_to_xml
338:
339: $xml = $gateway->process_html_to_xml($html);
340:
341: Takes presumably-malformed HTML, encodes ampersands characters
342: and passes the result to the Xml::LibXML parser, which creates
343: a DOM tree in memory of the content. This parse is as error-tolerant
344: as can be set, and libxml attempts to recover from any errors as much
345: as possible. This DOM tree is then taken and serialized,
346: eliminating unbalanced and malformed tags along the way. This
347: XML code (without any header tags) is then returned to the caller.
348:
349: =cut
350:
351: ##############################################
352: ##############################################
353:
354: sub process_html_to_xml {
355: my $self = shift;
356: my $input = shift;
357: my $parser = $self->{parser};
358:
359: if (length($input) < 1) { return ""; }
360:
361: # only encode ampersands -- brackets may be valid tags
362: my $encoded = &HTML::Entities::encode_entities($input, '&');
363:
364: # for the <chem> tag, we want the strings "<=>", "<-", "->" to be properly
365: # entitized so the parser doesn't destroy it
366: $encoded =~ s/(\<\s*chem\s*>.*)\<\=\>(.*\<\s*\/chem\s*>)/$1\<\;\&\#61\;\>\;$2/gi;
367: $encoded =~ s/(\<\s*chem\s*>.*)\-\>(.*\<\s*\/chem\s*>)/$1\-\>\;$2/gi;
368: $encoded =~ s/(\<\s*chem\s*>.*)\<\-(.*\<\s*\/chem\s*>)/$1\<\;\-$2/gi;
369:
370: # parse into libXML to tidy tags, we suppress any errors
371: # because otherwise the parser complains about non-HTML
372: # tags to STDERR and the Apache error logs
373: my $dom = $parser->parse_html_string($encoded,
374: {
375: suppress_errors => 1,
376: suppress_warnings => 1,
377: recover => 2
378: }
379: );
380: # the dom returns a full <html> structure, so just get
381: # all the child nodes of the <body> tag and put them together
382: my @body_nodes = $dom->findnodes('/html/body');
383: my @body_children = $body_nodes[0]->childNodes;
384: my $xml = "";
385: foreach my $child (@body_children) {
386: $xml .= $child->toString();
387: }
388: # entities passed into $input are in the form of '&lt;'
389: # they are double entities
390: return $xml;
391: }
392:
393: ##############################################
394: ##############################################
395:
396: =item process_xml_to_html
397:
398: $xhtml = $gateway->process_xml_to_html($xml);
399:
400: Takes XML input, decodes ampersands characters
401: and passes the result then to the caller.
402:
403: =cut
404:
405: ##############################################
406: ##############################################
407:
408: sub process_xml_to_html {
409: my $self = shift;
410: my $input = shift;
411: # decode one level of entities (XML) such that the
412: # output is returned to the original level of entities
413: # $input "<" --> $xml "&lt;" --> "<"
414: my $xhtml = &HTML::Entities::decode_entities($input);
415: # now we have valid XHTML that can be stored and parsed
416: return $xhtml;
417: }
418:
419: ##############################################
420: ##############################################
421:
422: =item process_incoming_html
423:
424: $xhtml = $gateway->process_incoming_html($html);
425: $xhtml = $gateway->process_incoming_html($html, $legacy);
426:
427: Designed to be called for all raw HTML inputs from the client
428: side before storing or rendering data. Decodes UTF-8 data,
429: trims leading and trailing "\n" and "<br />" tags. Processes
430: the result through the XML parser, converts this back to
431: balanced well-formed XHTML, re-encodes the result as UTF-8,
432: and returns the result to the caller.
433:
434: =over 4
435:
436: =item legacy
437:
438: $legacy = 0;
439: $legacy = 1;
440:
441: I<(optional)> If true, adds additional processing intended
442: to emulate LON-CAPA 2.x parsing of the content.
443:
444: =back
445:
446: =cut
447:
448: ##############################################
449: ##############################################
450:
451: sub process_incoming_html {
452: # this should be called by all HTML inputs before storing
453: # data --> for consistency's sake, call process_html_to_xml
454: # afterwards if you need to embed this in XML later on
455: my $self = shift;
456: my $input = shift;
457: my $legacy = shift;
458:
459: # no idea why i have to call this to get unicode characters
460: # working, but i do, so here it is.
461: $input = &Encode::decode_utf8($input);
462:
463: # trim leading and trailing whitespace and HTML breaks
464: chomp($input);
465: $input =~ s/\s+$//s;
466: $input =~ s/^\s+//s;
467: $input =~ s/\<br\s*\/*\>$//s;
468: my $no_p_input = (length($input) > 0 && $input !~ m/.*\<[\s]*p[\s]*\>.*/is);
469: my $xml = $self->process_html_to_xml($input);
470: if ($legacy && !&contains_block_level_tags($input)) {
471: # the xml returns content inside a <p> tag
472: # if there are no block tags... thus to preserve
473: # old behavior, we strip out that <p></p>
474: if ($no_p_input) {
475: $xml =~ s/^\<p\>(.*)\<\/p\>/$1/si;
476: }
477: }
478: my $xhtml = $self->process_xml_to_html($xml);
479: # see above unicode encoding comment
480: $xhtml = &Encode::encode_utf8($xhtml);
481: return $xhtml;
482: }
483:
484: ##############################################
485: ##############################################
486:
487: =item process_outgoing_html
488:
489: $html = $gateway->process_outgoing_html($xhtml);
490: $html = $gateway->process_outgoing_html($xhtml, $legacy);
491:
492: Designed to be called for all HTML outputs to the client
493: side before rendering data. This entitizes all non-allowed
494: tags, as was previously done in Apache::lonfeedback, and
495: processes and converts all LON-CAPA supported custom tags (see
496: above) to their respective output HTML.
497:
498: =over 4
499:
500: =item legacy
501:
502: $legacy = 0;
503: $legacy = 1;
504:
505: I<(optional)> If true, adds additional processing intended
506: to emulate LON-CAPA 2.x parsing of the content. This includes
507: behavior to convert "\n" to "<br />" if there are no block-level
508: tags detected in the input. In addition, raw URLs are converted
509: automatically to <a> links.
510:
511: =back
512:
513: =back
514:
515: =cut
516:
517: ##############################################
518: ##############################################
519:
520: sub process_outgoing_html {
521: # this should be called on all HTML outputs before displaying
522: # because it will filter out all non-HTML+LONCAPA tags.
523: # tags are not filtered at input stage for greater backwards
524: # compatibility. note that this disregards course preference.
525: my $self = shift;
526: my $input = shift;
527: my $legacy = shift;
528:
529: my %html = %Apache::lonhtmlgateway::LONCAPA_ALLOWED_STANDARD_TAGS;
530: # entitize all tags that are not explicitly allowed
531: $input =~ s/\<(\/?\s*(\w+)[^\>\<]*)/
532: {($html{uc($2)}&&(length($1)<1000))?"\<$1":"\<$1"}/ge;
533: $input =~ s/(\<?\s*(\w+)[^\<\>]*)\>/
534: {($html{uc($2)}&&(length($1)<1000))?"$1\>":"$1\>"}/ge;
535: if ($legacy) {
536: unless (&contains_block_level_tags($self, $input)) {
537: $input = $self->legacy_newline_to_br($input);
538: }
539: $input = $self->legacy_raw_href_to_link($input);
540: }
541: # at this point, we need to convert our own custom tags
542: # into the appropriate output
543: # see above for supported tags
544: my $output = "";
545: my $parser = HTML::LCParser->new(\$input);
546: while (my $token = $parser->get_token()) {
547: if ($token->[0] eq 'T') {
548: $output .= $token->[1];
549: } elsif ($token->[0] eq 'D' || $token->[0] eq 'C') {
550: $output .= $token->[1];
551: } elsif ($token->[0] eq 'PI' || $token->[0] eq 'E') {
552: $output .= $token->[2];
553: } elsif ($token->[0] eq 'S') {
554: my $tag = lc($token->[1]);
555: if (exists($custom_tag_parsers{$tag})) {
556: my $text = $parser->get_text();
557: $output .= $custom_tag_parsers{$tag}(
558: $self, $text, $self->{target});
559: } else {
560: $output .= $token->[4];
561: }
562: }
563: }
564:
565: return $output;
566: }
567:
568: ##############################################
569: ##############################################
570:
571: =head1 STATIC CLASS METHODS
572:
573: The following are static class methods that can be called
574: by any object.
575:
576: =over 4
577:
578: =item contains_block_level_tags
579:
580: $bool = Apache::lonhtmlgateway::contains_block_level_tags($input);
581:
582: Uses a regular expression to find, in the input data, any tags
583: described in %LONCAPA_ALLOWED_STANDARD_TAGS as block-level.
584: Returns 1 if true, 0 if false.
585:
586: =cut
587:
588: ##############################################
589: ##############################################
590:
591: sub contains_block_level_tags {
592: my $class = shift;
593: my $input = shift;
594: my @block_level_tags = @{&get_block_level_tags($class)};
595: foreach my $tag (@block_level_tags) {
596: if ($input =~ m/\<\/?\s*$tag[^\>\<]*/gi) {
597: # if your input loves this regular expression
598: # as much as i do, then return true.
599: # it searches for either a <tag> or <tag />
600: return 1;
601: }
602: }
603: return 0;
604: }
605:
606: ##############################################
607: ##############################################
608:
609: =item get_block_level_tags
610:
611: @tags = Apache::lonhtmlgateway::get_block_level_tags();
612:
613: Return an array with any tags described in
614: %LONCAPA_ALLOWED_STANDARD_TAGS as block-level. Note that these
615: tags are returned in no particular order, and the tag names
616: are returned in uppercase.
617:
618:
619: =cut
620:
621: ##############################################
622: ##############################################
623:
624: sub get_block_level_tags {
625: my $class = shift;
626: my %html = %Apache::lonhtmlgateway::LONCAPA_ALLOWED_STANDARD_TAGS;
627: my @block = [];
628: foreach my $tag (keys(%html)) {
629: if ($html{$tag} == 2) {
630: push(@block, $tag);
631: }
632: }
633: return \@block;
634: }
635:
636: ##############################################
637: ##############################################
638:
639: =head2 Legacy Functions
640:
641: These functions are intended to process input in the same or
642: a similar way to how it was processed in LON-CAPA 2.x.
643:
644: =item legacy_newline_to_br
645:
646: I<(formerly Apache::lonfeedback::newline_to_br)>
647:
648: $converted = Apache::lonhtmlgateway::legacy_newline_to_br($input);
649:
650: Parse the input using HTML::LCParser, and in any text nodes
651: which contain "\n" characters, replace those characters with
652: an HTML "<br />" tag.
653:
654: =cut
655:
656: ##############################################
657: ##############################################
658:
659: sub legacy_newline_to_br {
660: my $class = shift;
661: my $input = shift;
662: my $output;
663: my $parser = HTML::LCParser->new(\$input);
664: while (my $token = $parser->get_token()) {
665: if ($token->[0] eq 'T') {
666: my $text = $token->[1];
667: $text =~ s/\n/\<br \/\>/g;
668: $output .= $text;
669: } elsif ($token->[0] eq 'D' || $token->[0] eq 'C') {
670: $output .= $token->[1];
671: } elsif ($token->[0] eq 'PI' || $token->[0] eq 'E') {
672: $output .= $token->[2];
673: } elsif ($token->[0] eq 'S') {
674: $output .= $token->[4];
675: }
676: }
677: return $output;
678: }
679:
680: ##############################################
681: ##############################################
682:
683: =item legacy_raw_href_to_link
684:
685: I<(formerly Apache::lonhtmlcommon::raw_href_to_link)>
686:
687: $converted = Apache::lonhtmlgateway::legacy_raw_href_to_link($input);
688:
689: Search for any links/URLs within the input text, and convert them
690: to <a> tags whose content is embedded inside a <tt> tag.
691:
692: =back
693:
694: =cut
695:
696: ##############################################
697: ##############################################
698:
699: sub legacy_raw_href_to_link {
700: my $class = shift;
701: my $input = shift;
702: $input =~ s/(https?\:\/\/[^\s\'\"\<]+)([\s\<]|$)/<a href="$1"><tt>$1<\/tt><\/a>$2/gi;
703: return $input;
704: }
705:
706: sub parse_algebra_tag {
707: my $self = shift;
708: my $input = shift;
709: # the <algebra> parser does NOT handle entities,
710: # unlike the general <m> parser; thus we run
711: # the content of this tag through HTML::Entities,
1.2 ! faziophi 712: # decoding it first. we also just get the tex, and
! 713: # feed it through as if it were an <mi> tag.
1.1 faziophi 714: $input = &HTML::Entities::decode($input);
1.2 ! faziophi 715: my $algebra = &Apache::lontexconvert::algebra($input, 'tex');
! 716: return &parse_m_tag($self, $algebra);
1.1 faziophi 717: }
718:
719: sub parse_mi_tag {
720: my $self = shift;
721: my $input = shift;
1.2 ! faziophi 722: return &parse_m_tag($self, '\ensuremath{'.$input.'}');
1.1 faziophi 723: }
724:
725: sub parse_md_tag {
726: my $self = shift;
727: my $input = shift;
728: return &parse_m_tag($self, '\['.$input.'\]');
729: }
730:
731: sub parse_m_tag {
732: my $self = shift;
733: my $input = shift;
734: return &Apache::lontexconvert::to_convert($input, $self->{target});
735: }
736:
737: sub parse_chem_tag {
738: my $self = shift;
739: my $input = shift;
740: my $target = $self->{target};
741: # as with the <algebra> tag, some portions of the
742: # <chem> input may be coming in encoded, especially
743: # arrows -- so decode it in HTML::Entities
744: $input = &HTML::Entities::decode($input);
745: my @tokens = split(/(\s\+|\->|<=>|<\-|\.)/,$input);
746: my $formula = '';
747: foreach my $token (@tokens) {
748: if ($token eq '->' ) {
749: if ($target eq 'web') {
750: $formula .= '→ ';
751: } else {
752: $formula .= '<m>\ensuremath{\rightarrow}</m> ';
753: }
754: next;
755: }
756: if ($token eq '<-' ) {
757: if ($target eq 'web') {
758: $formula .= '← ';
759: } else {
760: $formula .= '<m>\ensuremath{\leftarrow}</m> ';
761: }
762: next;
763: }
764: if ($token eq '<=>') {
765: if ($target eq 'web') {
766: $formula .= '⇌ ';
767: } else {
768: $formula .= '<m>\ensuremath{\rightleftharpoons}</m> ';
769: }
770: next;
771: }
772: if ($token eq '.') {
773: $formula =~ s/(\ \;| )$//;
774: $formula .= '·';
775: next;
776: }
777: $token =~ /^\s*([\d|\/]*(?:&frac\d\d)?)(.*)/;
778: $formula .= $1 if ($1 ne '1'); # stoichiometric coefficient
779: my $molecule = $2;
780: # subscripts
781: $molecule =~ s|(?<=[a-zA-Z\)\]\s])(\d+)|<sub>$1</sub>|g;
782: # superscripts
783: $molecule =~ s|\^(\d*[+\-]*)|<sup>$1</sup>|g;
784: # strip whitespace
785: $molecule =~ s/\s*//g;
786: # forced space
787: $molecule =~ s/_/ /g;
788: $molecule =~ s/-/−/g;
789: $formula .= $molecule.' ';
790: }
791: # get rid of trailing space
792: $formula =~ s/(\ \;| )$//;
793: return $formula;
794: }
795:
796: ##############################################
797: ##############################################
798:
799: =head1 AUTHORS
800:
801: Phil Fazio
802:
803: =head1 VERSION
804:
1.2 ! faziophi 805: $Id: lonhtmlgateway.pm,v 1.1 2010/04/26 20:45:28 faziophi Exp $
1.1 faziophi 806:
807: =cut
808:
809: ##############################################
810: ##############################################
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>