Annotation of loncom/interface/lonhtmlgateway.pm, revision 1.1
1.1 ! faziophi 1: # The LearningOnline Network with CAPA
! 2: # gateway for html input/output to be properly parsed and handled
! 3: #
! 4: # $Id:$
! 5: #
! 6: # Copyright Michigan State University Board of Trustees
! 7: #
! 8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
! 9: #
! 10: # LON-CAPA is free software; you can redistribute it and/or modify
! 11: # it under the terms of the GNU General Public License as published by
! 12: # the Free Software Foundation; either version 2 of the License, or
! 13: # (at your option) any later version.
! 14: #
! 15: # LON-CAPA is distributed in the hope that it will be useful,
! 16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
! 17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 18: # GNU General Public License for more details.
! 19: #
! 20: # You should have received a copy of the GNU General Public License
! 21: # along with LON-CAPA; if not, write to the Free Software
! 22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
! 23: #
! 24: # /home/httpd/html/adm/gpl.txt
! 25: #
! 26: # http://www.lon-capa.org/
! 27: #
! 28: ######################################################################
! 29: ######################################################################
! 30:
! 31: =pod
! 32:
! 33: =head1 NAME
! 34:
! 35: Apache::lonhtmlgateway - properly parse and handle HTML input and output
! 36:
! 37: =head1 SYNOPSIS
! 38:
! 39: This is part of the LearningOnline Network with CAPA project
! 40: described at http://www.lon-capa.org.
! 41:
! 42: =head1 INTRODUCTION
! 43:
! 44: lonhtmlgateway is an object-oriented module used to parse and correct
! 45: malformed HTML input from the client, as well as to perform processing
! 46: of custom LON-CAPA HTML output before it is sent along to the end-user.
! 47: It replaces a number of subroutines in various modules, and adds new
! 48: code to tidy and process malformed HTML using XML::LibXML.
! 49:
! 50: This module is intended to be used for all non-authoring perspectives
! 51: in the system.
! 52:
! 53: New to LON-CAPA version 3.0.
! 54:
! 55: =head2 Example Usage
! 56:
! 57: Below is intended code to be invoked and called for use outside
! 58: of this module:
! 59:
! 60: $gateway = Apache::lonhtmlgateway->new();
! 61: $gateway = Apache::lonhtmlgateway->new($target);
! 62:
! 63: $xhtml = $gateway->process_incoming_html($html);
! 64: $xhtml = $gateway->process_incoming_html($html, $legacy);
! 65:
! 66: $xml = $gateway->process_html_to_xml($html);
! 67: $xhtml = $gateway->process_xml_to_html($xml);
! 68:
! 69: $bool = Apache::lonhtmlgateway->contains_block_level_tags($input);
! 70:
! 71: =head1 GLOBAL VARIABLES
! 72:
! 73: =over 4
! 74:
! 75: =cut
! 76:
! 77: ######################################################################
! 78: ######################################################################
! 79:
! 80: package Apache::lonhtmlgateway;
! 81:
! 82: use strict;
! 83: use utf8;
! 84: use Time::Local;
! 85: use Time::HiRes;
! 86: use Apache::lonlocal;
! 87: use Apache::lonnet;
! 88: use Apache::lonhtmlcommon;
! 89: use Apache::lonxml;
! 90: use Apache::lontexconvert;
! 91: use lib '/home/httpd/lib/perl/';
! 92: use LONCAPA;
! 93: use XML::LibXML;
! 94: use Encode;
! 95: use HTML::Entities;
! 96: use HTML::LCParser();
! 97: use Safe();
! 98:
! 99: local $XML::LibXML::skipXMLDeclaration = 1;
! 100: local $XML::LibXML::skipDTD = 1;
! 101: local $XML::LibXML::setTagCompression = 1;
! 102:
! 103: ##############################################
! 104: ##############################################
! 105:
! 106: =item %LONCAPA_ALLOWED_STANDARD_TAGS
! 107:
! 108: This is a hash of all tags, both HTML and custom LON-CAPA tags that
! 109: are allowed in non-authoring spaces. Examples of this include
! 110: course documents, bulletin boards, discussion posts, templated pages,
! 111: etc. In addition, in the event of rich text editing, the WYSIWYG
! 112: editor needs to know how to display LON-CAPA custom tags as either
! 113: inline-level (<span>) or block-level (<div>). Therefore, the hash is
! 114: set up with uppercase tag names as keys ("H1"), and the corresponding
! 115: entry an integer constant indicating that tag's role or purpose:
! 116:
! 117: =over 4
! 118:
! 119: =item 0 =
! 120:
! 121: Tag is explictly not allowed. Currently not used anywhere in this
! 122: module, but reserved for the future in case certain tags would like
! 123: to be explicitly blacklisted.
! 124:
! 125: =item 1 =
! 126:
! 127: Tag is allowed, and in cases where it is unclear, is rendered as an
! 128: inline-level element. Example: <algebra> should be rendered as an
! 129: inline element.
! 130:
! 131: =item 2 =
! 132:
! 133: Tag is allowed, and in cases where it is unclear, is rendered as a
! 134: block-level element. Example: <md> should be rendered as a block
! 135: element.
! 136:
! 137: =back
! 138:
! 139: =back
! 140:
! 141: =cut
! 142:
! 143: ##############################################
! 144: ##############################################
! 145:
! 146: our %LONCAPA_ALLOWED_STANDARD_TAGS = (
! 147: # standard html header tags
! 148: H1 => 2, H2 => 2, H3 => 2, H4 => 2, H5 => 2, H6 => 2,
! 149: # basic inline formatting and phrases
! 150: B => 1, I => 1, U => 1, STRONG => 1, EM => 1, STRIKE => 1,
! 151: BIG => 1, SMALL => 1, INS => 1, DEL => 1, S => 1,
! 152: Q => 1, DFN => 1, CODE => 1, SAMP => 1, KBD => 1, VAR => 1,
! 153: SUB => 1, SUP => 1,
! 154: # linking and embedding
! 155: A => 1, IMG => 1,
! 156: # block level tags
! 157: P => 2, DIV => 2, OL => 2, UL => 2, LI => 2, ADDRESS => 2,
! 158: BR => 2, HR => 2, BLOCKQUOTE => 2, PRE => 2,
! 159: # table-related tags
! 160: TABLE => 2, CAPTION => 2, TBODY => 2, TR => 2, TD => 2,
! 161: TH => 2,
! 162: # LON-CAPA custom tags
! 163: M => 1, MI => 1, MD => 2, ALGEBRA => 1,
! 164: CHEM => 1
! 165: );
! 166:
! 167: ##############################################
! 168: ##############################################
! 169:
! 170: =head1 PARSING LON-CAPA CUSTOM TAGS
! 171:
! 172: This module maintains a hash %custom_tag_parsers, containing
! 173: lowercase tag names as keys and function references as entries.
! 174: Convention used here names the actual parsing function whose
! 175: reference is stored here to be of the name &parse_tagname_tag().
! 176: These functions are called during the processing of outgoing
! 177: HTML output in the &process_outgoing_html() function.
! 178:
! 179: Each of these functions is passed the following arguments:
! 180:
! 181: =over 4
! 182:
! 183: =item self
! 184:
! 185: Reference to Apache::lonhtmlgateway object calling the function.
! 186:
! 187: =item input
! 188:
! 189: Textual context extracted between the <tag> and </tag> tags.
! 190: Note that this text I<could> contain HTML entities. Thus, for
! 191: functions that cannot handle entitized input,
! 192: &HTML::Entities::decode_entities() should be called on this data
! 193: before further handing it off.
! 194:
! 195: =back
! 196:
! 197: Example hash entry:
! 198:
! 199: mi => \&parse_mi_tag,
! 200:
! 201: =head2 Currently Supported Custom Tags
! 202:
! 203: =over 4
! 204:
! 205: =item <algebra>
! 206:
! 207: Intended to convert and simplify simple algebraic functions into
! 208: readable output. Corrects cases such as double negatives or
! 209: eliminates coefficients of 1 where appropriate. The actual
! 210: handling of content contained in this tag takes place inside
! 211: L<Apache::lontexconvert>, which in turn uses the AlgParser
! 212: module to actually process the input.
! 213:
! 214: Usage:
! 215: <algebra>2*x+(-5)</algebra>
! 216:
! 217: =item <chem>
! 218:
! 219: Formatter for chemical equations, adding superscripts, subscripts,
! 220: and appropriate arrow characters as appropriate. This parser is
! 221: wholly contained inside this module, but is a copy of a routine
! 222: found in homework/default_homework.lcpm.
! 223:
! 224: Usage:
! 225: <chem>CH3CO2H + H2O <=> CH3CO2- + H3O+</chem>
! 226:
! 227: =back
! 228:
! 229: =head3 Math Mode Tags
! 230:
! 231: These tags are intended for LaTeX math mode input, in order to
! 232: produce complex mathematical and scientific constructs, which
! 233: normal HTML cannot produce. The output is later rendered by
! 234: a user-defined TeX engine in web target, or handled directly
! 235: in the case of tex target. The only difference between the tags
! 236: below is determining the author's intent on how to appropriately
! 237: render the contents within the tag - this intent is
! 238: important in preserving the What You See Is What You Get philosophy
! 239: of the rich text editor.
! 240:
! 241: =over 4
! 242:
! 243: =item <mi>
! 244:
! 245: Inline math mode tag. Content is surrounded by "$" characters and
! 246: passed to the parser for the <m> tag.
! 247:
! 248: I<New for LON-CAPA 3.0>.
! 249:
! 250: =item <md>
! 251:
! 252: Display block math mode tag. Content is surrounded by "\[" and
! 253: "\]" characters and passed to the parser for the <m> tag.
! 254:
! 255: I<New for LON-CAPA 3.0>.
! 256:
! 257: =item <m>
! 258:
! 259: Math mode tag. Allows author to fully specify the display of their
! 260: TeX input, and contain mixed inline-and-block content within a single
! 261: tag.
! 262:
! 263: Due to tools such as the rich text editor needing to know whether a
! 264: custom tag is block-level or inline-level on render, the use of this
! 265: tag is discouraged starting with LON-CAPA 3.0 although it will continue
! 266: to function. Fully compatible with legacy LON-CAPA 2.x content.
! 267:
! 268: =back
! 269:
! 270: =cut
! 271:
! 272: ##############################################
! 273: ##############################################
! 274:
! 275: my %custom_tag_parsers = (
! 276: mi => \&parse_mi_tag,
! 277: md => \&parse_md_tag,
! 278: m => \&parse_m_tag,
! 279: algebra => \&parse_algebra_tag,
! 280: chem => \&parse_chem_tag
! 281: );
! 282:
! 283: ##############################################
! 284: ##############################################
! 285:
! 286: =head1 CLASS OBJECT CONSTRUCTOR
! 287:
! 288: =over 4
! 289:
! 290: =item new
! 291:
! 292: $gateway = Apache::libhtmlgateway->new();
! 293: $gateway = Apache::libhtmlgateway->new($target);
! 294:
! 295: Constructs and returns a new gateway object. An optional argument
! 296: allows one to specify the target of the output, defaults to 'web'.
! 297: Behind the scenes, a single XML::LibXML parser object is created
! 298: behind the scenes. On destroy, this parser object is destroyed
! 299: as well.
! 300:
! 301: =back
! 302:
! 303: =cut
! 304:
! 305: ##############################################
! 306: ##############################################
! 307:
! 308: sub new {
! 309: my $invocant = shift;
! 310: my $class = ref($invocant) || $invocant;
! 311: my $target = shift;
! 312: # create a new parser instance for libxml
! 313: my $self = {
! 314: parser => XML::LibXML->new(),
! 315: target => ($target) ? $target : 'web'
! 316: };
! 317: # options for the libxml parser
! 318: $self->{parser}->recover(1);
! 319: $self->{parser}->recover_silently(1);
! 320: bless($self, $class); # bless = pray that it works
! 321: return $self;
! 322: }
! 323:
! 324: sub DESTROY {
! 325: my $self = shift;
! 326: my $parser = $self->{parser};
! 327: undef $parser; # destroy the parser instance
! 328: }
! 329:
! 330: ##############################################
! 331: ##############################################
! 332:
! 333: =head1 PUBLIC OBJECT METHODS
! 334:
! 335: =over 4
! 336:
! 337: =item process_html_to_xml
! 338:
! 339: $xml = $gateway->process_html_to_xml($html);
! 340:
! 341: Takes presumably-malformed HTML, encodes ampersands characters
! 342: and passes the result to the Xml::LibXML parser, which creates
! 343: a DOM tree in memory of the content. This parse is as error-tolerant
! 344: as can be set, and libxml attempts to recover from any errors as much
! 345: as possible. This DOM tree is then taken and serialized,
! 346: eliminating unbalanced and malformed tags along the way. This
! 347: XML code (without any header tags) is then returned to the caller.
! 348:
! 349: =cut
! 350:
! 351: ##############################################
! 352: ##############################################
! 353:
! 354: sub process_html_to_xml {
! 355: my $self = shift;
! 356: my $input = shift;
! 357: my $parser = $self->{parser};
! 358:
! 359: if (length($input) < 1) { return ""; }
! 360:
! 361: # only encode ampersands -- brackets may be valid tags
! 362: my $encoded = &HTML::Entities::encode_entities($input, '&');
! 363:
! 364: # for the <chem> tag, we want the strings "<=>", "<-", "->" to be properly
! 365: # entitized so the parser doesn't destroy it
! 366: $encoded =~ s/(\<\s*chem\s*>.*)\<\=\>(.*\<\s*\/chem\s*>)/$1\<\;\&\#61\;\>\;$2/gi;
! 367: $encoded =~ s/(\<\s*chem\s*>.*)\-\>(.*\<\s*\/chem\s*>)/$1\-\>\;$2/gi;
! 368: $encoded =~ s/(\<\s*chem\s*>.*)\<\-(.*\<\s*\/chem\s*>)/$1\<\;\-$2/gi;
! 369:
! 370: # parse into libXML to tidy tags, we suppress any errors
! 371: # because otherwise the parser complains about non-HTML
! 372: # tags to STDERR and the Apache error logs
! 373: my $dom = $parser->parse_html_string($encoded,
! 374: {
! 375: suppress_errors => 1,
! 376: suppress_warnings => 1,
! 377: recover => 2
! 378: }
! 379: );
! 380: # the dom returns a full <html> structure, so just get
! 381: # all the child nodes of the <body> tag and put them together
! 382: my @body_nodes = $dom->findnodes('/html/body');
! 383: my @body_children = $body_nodes[0]->childNodes;
! 384: my $xml = "";
! 385: foreach my $child (@body_children) {
! 386: $xml .= $child->toString();
! 387: }
! 388: # entities passed into $input are in the form of '&lt;'
! 389: # they are double entities
! 390: return $xml;
! 391: }
! 392:
! 393: ##############################################
! 394: ##############################################
! 395:
! 396: =item process_xml_to_html
! 397:
! 398: $xhtml = $gateway->process_xml_to_html($xml);
! 399:
! 400: Takes XML input, decodes ampersands characters
! 401: and passes the result then to the caller.
! 402:
! 403: =cut
! 404:
! 405: ##############################################
! 406: ##############################################
! 407:
! 408: sub process_xml_to_html {
! 409: my $self = shift;
! 410: my $input = shift;
! 411: # decode one level of entities (XML) such that the
! 412: # output is returned to the original level of entities
! 413: # $input "<" --> $xml "&lt;" --> "<"
! 414: my $xhtml = &HTML::Entities::decode_entities($input);
! 415: # now we have valid XHTML that can be stored and parsed
! 416: return $xhtml;
! 417: }
! 418:
! 419: ##############################################
! 420: ##############################################
! 421:
! 422: =item process_incoming_html
! 423:
! 424: $xhtml = $gateway->process_incoming_html($html);
! 425: $xhtml = $gateway->process_incoming_html($html, $legacy);
! 426:
! 427: Designed to be called for all raw HTML inputs from the client
! 428: side before storing or rendering data. Decodes UTF-8 data,
! 429: trims leading and trailing "\n" and "<br />" tags. Processes
! 430: the result through the XML parser, converts this back to
! 431: balanced well-formed XHTML, re-encodes the result as UTF-8,
! 432: and returns the result to the caller.
! 433:
! 434: =over 4
! 435:
! 436: =item legacy
! 437:
! 438: $legacy = 0;
! 439: $legacy = 1;
! 440:
! 441: I<(optional)> If true, adds additional processing intended
! 442: to emulate LON-CAPA 2.x parsing of the content.
! 443:
! 444: =back
! 445:
! 446: =cut
! 447:
! 448: ##############################################
! 449: ##############################################
! 450:
! 451: sub process_incoming_html {
! 452: # this should be called by all HTML inputs before storing
! 453: # data --> for consistency's sake, call process_html_to_xml
! 454: # afterwards if you need to embed this in XML later on
! 455: my $self = shift;
! 456: my $input = shift;
! 457: my $legacy = shift;
! 458:
! 459: # no idea why i have to call this to get unicode characters
! 460: # working, but i do, so here it is.
! 461: $input = &Encode::decode_utf8($input);
! 462:
! 463: # trim leading and trailing whitespace and HTML breaks
! 464: chomp($input);
! 465: $input =~ s/\s+$//s;
! 466: $input =~ s/^\s+//s;
! 467: $input =~ s/\<br\s*\/*\>$//s;
! 468: my $no_p_input = (length($input) > 0 && $input !~ m/.*\<[\s]*p[\s]*\>.*/is);
! 469: my $xml = $self->process_html_to_xml($input);
! 470: if ($legacy && !&contains_block_level_tags($input)) {
! 471: # the xml returns content inside a <p> tag
! 472: # if there are no block tags... thus to preserve
! 473: # old behavior, we strip out that <p></p>
! 474: if ($no_p_input) {
! 475: $xml =~ s/^\<p\>(.*)\<\/p\>/$1/si;
! 476: }
! 477: }
! 478: my $xhtml = $self->process_xml_to_html($xml);
! 479: # see above unicode encoding comment
! 480: $xhtml = &Encode::encode_utf8($xhtml);
! 481: return $xhtml;
! 482: }
! 483:
! 484: ##############################################
! 485: ##############################################
! 486:
! 487: =item process_outgoing_html
! 488:
! 489: $html = $gateway->process_outgoing_html($xhtml);
! 490: $html = $gateway->process_outgoing_html($xhtml, $legacy);
! 491:
! 492: Designed to be called for all HTML outputs to the client
! 493: side before rendering data. This entitizes all non-allowed
! 494: tags, as was previously done in Apache::lonfeedback, and
! 495: processes and converts all LON-CAPA supported custom tags (see
! 496: above) to their respective output HTML.
! 497:
! 498: =over 4
! 499:
! 500: =item legacy
! 501:
! 502: $legacy = 0;
! 503: $legacy = 1;
! 504:
! 505: I<(optional)> If true, adds additional processing intended
! 506: to emulate LON-CAPA 2.x parsing of the content. This includes
! 507: behavior to convert "\n" to "<br />" if there are no block-level
! 508: tags detected in the input. In addition, raw URLs are converted
! 509: automatically to <a> links.
! 510:
! 511: =back
! 512:
! 513: =back
! 514:
! 515: =cut
! 516:
! 517: ##############################################
! 518: ##############################################
! 519:
! 520: sub process_outgoing_html {
! 521: # this should be called on all HTML outputs before displaying
! 522: # because it will filter out all non-HTML+LONCAPA tags.
! 523: # tags are not filtered at input stage for greater backwards
! 524: # compatibility. note that this disregards course preference.
! 525: my $self = shift;
! 526: my $input = shift;
! 527: my $legacy = shift;
! 528:
! 529: my %html = %Apache::lonhtmlgateway::LONCAPA_ALLOWED_STANDARD_TAGS;
! 530: # entitize all tags that are not explicitly allowed
! 531: $input =~ s/\<(\/?\s*(\w+)[^\>\<]*)/
! 532: {($html{uc($2)}&&(length($1)<1000))?"\<$1":"\<$1"}/ge;
! 533: $input =~ s/(\<?\s*(\w+)[^\<\>]*)\>/
! 534: {($html{uc($2)}&&(length($1)<1000))?"$1\>":"$1\>"}/ge;
! 535: if ($legacy) {
! 536: unless (&contains_block_level_tags($self, $input)) {
! 537: $input = $self->legacy_newline_to_br($input);
! 538: }
! 539: $input = $self->legacy_raw_href_to_link($input);
! 540: }
! 541: # at this point, we need to convert our own custom tags
! 542: # into the appropriate output
! 543: # see above for supported tags
! 544: my $output = "";
! 545: my $parser = HTML::LCParser->new(\$input);
! 546: while (my $token = $parser->get_token()) {
! 547: if ($token->[0] eq 'T') {
! 548: $output .= $token->[1];
! 549: } elsif ($token->[0] eq 'D' || $token->[0] eq 'C') {
! 550: $output .= $token->[1];
! 551: } elsif ($token->[0] eq 'PI' || $token->[0] eq 'E') {
! 552: $output .= $token->[2];
! 553: } elsif ($token->[0] eq 'S') {
! 554: my $tag = lc($token->[1]);
! 555: if (exists($custom_tag_parsers{$tag})) {
! 556: my $text = $parser->get_text();
! 557: $output .= $custom_tag_parsers{$tag}(
! 558: $self, $text, $self->{target});
! 559: } else {
! 560: $output .= $token->[4];
! 561: }
! 562: }
! 563: }
! 564:
! 565: return $output;
! 566: }
! 567:
! 568: ##############################################
! 569: ##############################################
! 570:
! 571: =head1 STATIC CLASS METHODS
! 572:
! 573: The following are static class methods that can be called
! 574: by any object.
! 575:
! 576: =over 4
! 577:
! 578: =item contains_block_level_tags
! 579:
! 580: $bool = Apache::lonhtmlgateway::contains_block_level_tags($input);
! 581:
! 582: Uses a regular expression to find, in the input data, any tags
! 583: described in %LONCAPA_ALLOWED_STANDARD_TAGS as block-level.
! 584: Returns 1 if true, 0 if false.
! 585:
! 586: =cut
! 587:
! 588: ##############################################
! 589: ##############################################
! 590:
! 591: sub contains_block_level_tags {
! 592: my $class = shift;
! 593: my $input = shift;
! 594: my @block_level_tags = @{&get_block_level_tags($class)};
! 595: foreach my $tag (@block_level_tags) {
! 596: if ($input =~ m/\<\/?\s*$tag[^\>\<]*/gi) {
! 597: # if your input loves this regular expression
! 598: # as much as i do, then return true.
! 599: # it searches for either a <tag> or <tag />
! 600: return 1;
! 601: }
! 602: }
! 603: return 0;
! 604: }
! 605:
! 606: ##############################################
! 607: ##############################################
! 608:
! 609: =item get_block_level_tags
! 610:
! 611: @tags = Apache::lonhtmlgateway::get_block_level_tags();
! 612:
! 613: Return an array with any tags described in
! 614: %LONCAPA_ALLOWED_STANDARD_TAGS as block-level. Note that these
! 615: tags are returned in no particular order, and the tag names
! 616: are returned in uppercase.
! 617:
! 618:
! 619: =cut
! 620:
! 621: ##############################################
! 622: ##############################################
! 623:
! 624: sub get_block_level_tags {
! 625: my $class = shift;
! 626: my %html = %Apache::lonhtmlgateway::LONCAPA_ALLOWED_STANDARD_TAGS;
! 627: my @block = [];
! 628: foreach my $tag (keys(%html)) {
! 629: if ($html{$tag} == 2) {
! 630: push(@block, $tag);
! 631: }
! 632: }
! 633: return \@block;
! 634: }
! 635:
! 636: ##############################################
! 637: ##############################################
! 638:
! 639: =head2 Legacy Functions
! 640:
! 641: These functions are intended to process input in the same or
! 642: a similar way to how it was processed in LON-CAPA 2.x.
! 643:
! 644: =item legacy_newline_to_br
! 645:
! 646: I<(formerly Apache::lonfeedback::newline_to_br)>
! 647:
! 648: $converted = Apache::lonhtmlgateway::legacy_newline_to_br($input);
! 649:
! 650: Parse the input using HTML::LCParser, and in any text nodes
! 651: which contain "\n" characters, replace those characters with
! 652: an HTML "<br />" tag.
! 653:
! 654: =cut
! 655:
! 656: ##############################################
! 657: ##############################################
! 658:
! 659: sub legacy_newline_to_br {
! 660: my $class = shift;
! 661: my $input = shift;
! 662: my $output;
! 663: my $parser = HTML::LCParser->new(\$input);
! 664: while (my $token = $parser->get_token()) {
! 665: if ($token->[0] eq 'T') {
! 666: my $text = $token->[1];
! 667: $text =~ s/\n/\<br \/\>/g;
! 668: $output .= $text;
! 669: } elsif ($token->[0] eq 'D' || $token->[0] eq 'C') {
! 670: $output .= $token->[1];
! 671: } elsif ($token->[0] eq 'PI' || $token->[0] eq 'E') {
! 672: $output .= $token->[2];
! 673: } elsif ($token->[0] eq 'S') {
! 674: $output .= $token->[4];
! 675: }
! 676: }
! 677: return $output;
! 678: }
! 679:
! 680: ##############################################
! 681: ##############################################
! 682:
! 683: =item legacy_raw_href_to_link
! 684:
! 685: I<(formerly Apache::lonhtmlcommon::raw_href_to_link)>
! 686:
! 687: $converted = Apache::lonhtmlgateway::legacy_raw_href_to_link($input);
! 688:
! 689: Search for any links/URLs within the input text, and convert them
! 690: to <a> tags whose content is embedded inside a <tt> tag.
! 691:
! 692: =back
! 693:
! 694: =cut
! 695:
! 696: ##############################################
! 697: ##############################################
! 698:
! 699: sub legacy_raw_href_to_link {
! 700: my $class = shift;
! 701: my $input = shift;
! 702: $input =~ s/(https?\:\/\/[^\s\'\"\<]+)([\s\<]|$)/<a href="$1"><tt>$1<\/tt><\/a>$2/gi;
! 703: return $input;
! 704: }
! 705:
! 706: sub parse_algebra_tag {
! 707: my $self = shift;
! 708: my $input = shift;
! 709: # the <algebra> parser does NOT handle entities,
! 710: # unlike the general <m> parser; thus we run
! 711: # the content of this tag through HTML::Entities,
! 712: # decoding it first
! 713: $input = &HTML::Entities::decode($input);
! 714: return &Apache::lontexconvert::algebra($input,$self->{target});
! 715: }
! 716:
! 717: sub parse_mi_tag {
! 718: my $self = shift;
! 719: my $input = shift;
! 720: return &parse_m_tag($self, '$'.$input.'$');
! 721: }
! 722:
! 723: sub parse_md_tag {
! 724: my $self = shift;
! 725: my $input = shift;
! 726: return &parse_m_tag($self, '\['.$input.'\]');
! 727: }
! 728:
! 729: sub parse_m_tag {
! 730: my $self = shift;
! 731: my $input = shift;
! 732: return &Apache::lontexconvert::to_convert($input, $self->{target});
! 733: }
! 734:
! 735: sub parse_chem_tag {
! 736: my $self = shift;
! 737: my $input = shift;
! 738: my $target = $self->{target};
! 739: # as with the <algebra> tag, some portions of the
! 740: # <chem> input may be coming in encoded, especially
! 741: # arrows -- so decode it in HTML::Entities
! 742: $input = &HTML::Entities::decode($input);
! 743: my @tokens = split(/(\s\+|\->|<=>|<\-|\.)/,$input);
! 744: my $formula = '';
! 745: foreach my $token (@tokens) {
! 746: if ($token eq '->' ) {
! 747: if ($target eq 'web') {
! 748: $formula .= '→ ';
! 749: } else {
! 750: $formula .= '<m>\ensuremath{\rightarrow}</m> ';
! 751: }
! 752: next;
! 753: }
! 754: if ($token eq '<-' ) {
! 755: if ($target eq 'web') {
! 756: $formula .= '← ';
! 757: } else {
! 758: $formula .= '<m>\ensuremath{\leftarrow}</m> ';
! 759: }
! 760: next;
! 761: }
! 762: if ($token eq '<=>') {
! 763: if ($target eq 'web') {
! 764: $formula .= '⇌ ';
! 765: } else {
! 766: $formula .= '<m>\ensuremath{\rightleftharpoons}</m> ';
! 767: }
! 768: next;
! 769: }
! 770: if ($token eq '.') {
! 771: $formula =~ s/(\ \;| )$//;
! 772: $formula .= '·';
! 773: next;
! 774: }
! 775: $token =~ /^\s*([\d|\/]*(?:&frac\d\d)?)(.*)/;
! 776: $formula .= $1 if ($1 ne '1'); # stoichiometric coefficient
! 777: my $molecule = $2;
! 778: # subscripts
! 779: $molecule =~ s|(?<=[a-zA-Z\)\]\s])(\d+)|<sub>$1</sub>|g;
! 780: # superscripts
! 781: $molecule =~ s|\^(\d*[+\-]*)|<sup>$1</sup>|g;
! 782: # strip whitespace
! 783: $molecule =~ s/\s*//g;
! 784: # forced space
! 785: $molecule =~ s/_/ /g;
! 786: $molecule =~ s/-/−/g;
! 787: $formula .= $molecule.' ';
! 788: }
! 789: # get rid of trailing space
! 790: $formula =~ s/(\ \;| )$//;
! 791: return $formula;
! 792: }
! 793:
! 794: ##############################################
! 795: ##############################################
! 796:
! 797: =head1 AUTHORS
! 798:
! 799: Phil Fazio
! 800:
! 801: =head1 VERSION
! 802:
! 803: $Id:$
! 804:
! 805: =cut
! 806:
! 807: ##############################################
! 808: ##############################################
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>