Annotation of loncom/homework/cleanxml/post_xml.pm, revision 1.1
1.1 ! damieng 1: # The LearningOnline Network
! 2: # Third step to clean a file.
! 3: #
! 4: # $Id$
! 5: #
! 6: # Copyright Michigan State University Board of Trustees
! 7: #
! 8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
! 9: #
! 10: # LON-CAPA is free software; you can redistribute it and/or modify
! 11: # it under the terms of the GNU General Public License as published by
! 12: # the Free Software Foundation; either version 2 of the License, or
! 13: # (at your option) any later version.
! 14: #
! 15: # LON-CAPA is distributed in the hope that it will be useful,
! 16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
! 17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 18: # GNU General Public License for more details.
! 19: #
! 20: # You should have received a copy of the GNU General Public License
! 21: # along with LON-CAPA; if not, write to the Free Software
! 22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
! 23: #
! 24: # /home/httpd/html/adm/gpl.txt
! 25: #
! 26: # http://www.lon-capa.org/
! 27: #
! 28: ###
! 29:
! 30: #!/usr/bin/perl
! 31:
! 32: package Apache::post_xml;
! 33:
! 34: use strict;
! 35: use utf8;
! 36: use warnings;
! 37:
! 38: use File::Basename;
! 39: use File::Temp qw/ tempfile /;
! 40: use Cwd 'abs_path';
! 41: use XML::LibXML;
! 42: use HTML::TokeParser; # used to parse sty files
! 43: use Tie::IxHash; # for ordered hashes
! 44:
! 45: use Env qw(RES_DIR); # path of res directory parent (without the / at the end)
! 46:
! 47: no warnings 'recursion'; # yes, fix_paragraph is using heavy recursion, I know
! 48:
! 49: # these are constants
! 50: my @block_elements = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm');
! 51: my @inline_like_block = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup'); # inline elements treated like blocks for pretty print and some other things
! 52: my @responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse','essayresponse','radiobuttonresponse','matchresponse','rankresponse','imageresponse','functionplotresponse');
! 53: my @block_html = ('html','head','body','section','h1','h2','h3','h4','h5','h6','div','p','ul','ol','li','table','tbody','tr','td','th','dl','dt','dd','pre','noscript','hr','address','blockquote','object','applet','embed','map','form','fieldset','iframe','center','frameset');
! 54: my @no_newline_inside = ('import','parserlib','scriptlib','data','function','label','xlabel','ylabel','tic','text','rectangle','image','title','h1','h2','h3','h4','h5','h6','li','td','p');
! 55: my @preserve_elements = ('script','answer','pre');
! 56: my @accepting_style = ('section','h1','h2','h3','h4','h5','h6','div','p','li','td','th','dt','dd','pre','blockquote');
! 57: my @latex_math = ('\alpha', '\theta', '\omicron', '\tau', '\beta', '\vartheta', '\pi', '\upsilon', '\gamma', '\gamma', '\varpi', '\phi', '\delta', '\kappa', '\rho', '\varphi', '\epsilon', '\lambda', '\varrho', '\chi', '\varepsilon', '\mu', '\sigma', '\psi', '\zeta', '\nu', '\varsigma', '\omega', '\eta', '\xi',
! 58: '\Gamma', '\Lambda', '\Sigma', '\Psi', '\Delta', '\Xi', '\Upsilon', '\Omega', '\Theta', '\Pi', '\Phi',
! 59: '\pm', '\cap', '\diamond', '\oplus', '\mp', '\cup', '\bigtriangleup', '\ominus', '\times', '\uplus', '\bigtriangledown', '\otimes', '\div', '\sqcap', '\triangleleft', '\oslash', '\ast', '\sqcup', '\triangleright', '\odot', '\star', '\vee', '\lhd$', '\bigcirc', '\circ', '\wedge', '\rhd$', '\dagger', '\bullet', '\setminus', '\unlhd$', '\ddagger', '\cdot', '\wr', '\unrhd$', '\amalg', '+', '-',
! 60: '\leq', '\geq', '\equiv', '\models', '\prec', '\succ', '\sim', '\perp', '\preceq', '\succeq', '\simeq', '\mid', '\ll', '\gg', '\asymp', '\parallel', '\subset', '\supset', '\approx', '\bowtie', '\subseteq', '\supseteq', '\cong', '\Join$', '\sqsubset$', '\sqsupset$', '\neq', '\smile', '\sqsubseteq', '\sqsupseteq', '\doteq', '\frown', '\in', '\ni', '\propto', '\vdash', '\dashv',
! 61: '\colon', '\ldotp', '\cdotp',
! 62: '\leftarrow', '\longleftarrow', '\uparrow', '\Leftarrow', '\Longleftarrow', '\Uparrow', '\rightarrow', '\longrightarrow', '\downarrow', '\Rightarrow', '\Longrightarrow', '\Downarrow', '\leftrightarrow', '\longleftrightarrow', '\updownarrow', '\Leftrightarrow', '\Longleftrightarrow', '\Updownarrow', '\mapsto', '\longmapsto', '\nearrow', '\hookleftarrow', '\hookrightarrow', '\searrow', '\leftharpoonup', '\rightharpoonup', '\swarrow', '\leftharpoondown', '\rightharpoondown', '\nwarrow', '\rightleftharpoons', '\leadsto$',
! 63: '\ldots', '\cdots', '\vdots', '\ddots', '\aleph', '\prime', '\forall', '\infty', '\hbar', '\emptyset', '\exists', '\Box$', '\imath', '\nabla', '\neg', '\Diamond$', '\jmath', '\surd', '\flat', '\triangle', '\ell', '\top', '\natural', '\clubsuit', '\wp', '\bot', '\sharp', '\diamondsuit', '\Re', '\|', '\backslash', '\heartsuit', '\Im', '\angle', '\partial', '\spadesuit', '\mho$',
! 64: '\sum', '\bigcap', '\bigodot', '\prod', '\bigcup', '\bigotimes', '\coprod', '\bigsqcup', '\bigoplus', '\int', '\bigvee', '\biguplus', '\oint', '\bigwedge',
! 65: '\arccos', '\cos', '\csc', '\exp', '\ker', '\limsup', '\min', '\sinh', '\arcsin', '\cosh', '\deg', '\gcd', '\lg', '\ln', '\Pr', '\sup', '\arctan', '\cot', '\det', '\hom', '\lim', '\log', '\sec', '\tan', '\arg', '\coth', '\dim', '\inf', '\liminf', '\max', '\sin', '\tanh',
! 66: '\uparrow', '\Uparrow', '\downarrow', '\Downarrow', '\updownarrow', '\Updownarrow', '\lfloor', '\rfloor', '\lceil', '\rceil', '\langle', '\rangle', '\backslash',
! 67: '\rmoustache', '\lmoustache', '\rgroup', '\lgroup', '\arrowvert', '\Arrowvert', '\bracevert',
! 68: '\hat{', '\acute{', '\bar{', '\dot{', '\breve{', '\check{', '\grave{', '\vec{', '\ddot{', '\tilde{',
! 69: '\widetilde{', '\widehat{', '\overleftarrow{', '\overrightarrow{', '\overline{', '\underline{', '\overbrace{', '\underbrace{', '\sqrt{', '\sqrt[', '\frac{'
! 70: );
! 71: # list of elements that can contain style elements:
! 72: my @containing_styles = ('library','problem',@responses,'foil','item','text','hintgroup','hintpart','label','part','preduedate','postanswerdate','solved','notsolved','block','while','web','standalone','problemtype','languageblock','translated','lang','window','windowlink','togglebox','instructorcomment','body','section','div','p','li','dd','td','th','blockquote','object','applet','video','audio','canvas','fieldset','button',
! 73: 'span','strong','em','b','i','sup','sub','code','kbd','samp','tt','ins','del','var','small','big','u','font');
! 74: my @html_styles = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'tt', 'var', 'small', 'big', 'u');
! 75:
! 76: my $warnings; # 1 = print warnings
! 77:
! 78:
! 79: # Parses the XML document and fixes many things to turn it into a document matching the schema.
! 80: # Returns the text of the document as a byte string.
! 81: sub post_xml {
! 82: my ($textref, $file_path, $warn) = @_;
! 83: $warnings = $warn;
! 84:
! 85: my $dom_doc = XML::LibXML->load_xml(string => $textref);
! 86:
! 87: my $root = fix_structure($dom_doc);
! 88:
! 89: remove_elements($root, ['startouttext','startoutext','startottext','startouttex','startouttect','atartouttext','starouttext','starttextout','starttext','starttextarea','endouttext','endoutext','endoutttext','endouttxt','endouutext','ednouttext','endouttex','endoouttext','endouttest','endtextout','endtextarea','startpartmarker','endpartmarker','basefont','x-claris-tagview','x-claris-window','x-sas-window']);
! 90:
! 91: remove_empty_attributes($root);
! 92:
! 93: fix_attribute_case($root);
! 94:
! 95: my $fix_by_hand = replace_m($root);
! 96:
! 97: my @all_block = (@block_elements, @block_html);
! 98: add_sty_blocks($file_path, $root, \@all_block); # must come before the subs using @all_block
! 99:
! 100: fix_block_styles($root, \@all_block);
! 101: $root->normalize();
! 102:
! 103: fix_fonts($root, \@all_block);
! 104:
! 105: replace_u($root);
! 106:
! 107: remove_bad_cdata_sections($root);
! 108:
! 109: add_cdata_sections($root);
! 110:
! 111: fix_style_element($root);
! 112:
! 113: fix_tables($root);
! 114:
! 115: fix_lists($root);
! 116:
! 117: fix_wrong_name_for_img($root); # should be before replace_deprecated_attributes_by_css
! 118:
! 119: replace_deprecated_attributes_by_css($root);
! 120:
! 121: replace_center($root, \@all_block); # must come after replace_deprecated_attributes_by_css
! 122:
! 123: replace_nobr($root);
! 124:
! 125: remove_useless_notsolved($root);
! 126:
! 127: fix_paragraphs_inside($root, \@all_block);
! 128:
! 129: remove_empty_style($root);
! 130:
! 131: fix_empty_lc_elements($root);
! 132:
! 133: lowercase_attribute_values($root);
! 134:
! 135: replace_numericalresponse_unit_attribute($root);
! 136:
! 137: replace_functions_by_elements($root);
! 138:
! 139: pretty($root, \@all_block);
! 140:
! 141: replace_tm_dtm($root);
! 142:
! 143: if ($fix_by_hand) {
! 144: die "The file has been converted but it should be fixed by hand.";
! 145: }
! 146: return $dom_doc->toString(); # byte string !
! 147: }
! 148:
! 149: sub fix_structure {
! 150: my ($doc) = @_;
! 151: # the root element has already been added in pre_xml
! 152: my $root = $doc->documentElement;
! 153: # inside the root, replace html, problem and library elements by their content
! 154: my @toreplace = ('html','problem','library');
! 155: foreach my $name (@toreplace) {
! 156: my @elements = $root->getElementsByTagName($name);
! 157: foreach my $element (@elements) {
! 158: replace_by_children($element);
! 159: }
! 160: }
! 161: # insert all link and style elements inside a new head element
! 162: my $current_node = undef;
! 163: my @heads = $doc->getElementsByTagName('head');
! 164: my @links = $doc->getElementsByTagName('link');
! 165: my @styles = $doc->getElementsByTagName('style');
! 166: my @titles = $doc->getElementsByTagName('title');
! 167: if (scalar(@titles) > 0) {
! 168: # NOTE: there is a title element in gnuplot, not to be confused with the one inside HTML head
! 169: for (my $i=0; $i<scalar(@titles); $i++) {
! 170: my $title = $titles[$i];
! 171: my $found_gnuplot = 0;
! 172: my $ancestor = $title->parentNode;
! 173: while (defined $ancestor) {
! 174: if ($ancestor->nodeName eq 'gnuplot') {
! 175: $found_gnuplot = 1;
! 176: last;
! 177: }
! 178: $ancestor = $ancestor->parentNode;
! 179: }
! 180: if ($found_gnuplot) {
! 181: splice(@titles, $i, 1);
! 182: $i--;
! 183: }
! 184: }
! 185: }
! 186: if (scalar(@heads) > 0 || scalar(@titles) > 0 || scalar(@links) > 0 || scalar(@styles) > 0) {
! 187: my $htmlhead = $doc->createElement('head');
! 188: foreach my $head (@heads) {
! 189: my $next;
! 190: for (my $child=$head->firstChild; defined $child; $child=$next) {
! 191: $next = $child->nextSibling;
! 192: $head->removeChild($child);
! 193: if ($child->nodeType != XML_ELEMENT_NODE ||
! 194: string_in_array(['title','script','style','meta','link','import','base'], $child->nodeName)) {
! 195: $htmlhead->appendChild($child);
! 196: } else {
! 197: # this should not be in head
! 198: insert_after_or_first($root, $child, $current_node);
! 199: }
! 200: }
! 201: $head->parentNode->removeChild($head);
! 202: }
! 203: foreach my $child (@titles, @links, @styles) {
! 204: $child->parentNode->removeChild($child);
! 205: $htmlhead->appendChild($child);
! 206: }
! 207: insert_after_or_first($root, $htmlhead, $current_node);
! 208: $current_node = $htmlhead;
! 209: }
! 210: # body
! 211: my $htmlbody = undef;
! 212: my @bodies = $doc->getElementsByTagName('body');
! 213: if (scalar(@bodies) > 0) {
! 214: # TODO: fix content and position of body elements
! 215: if ($root->nodeName eq 'problem') {
! 216: foreach my $body (@bodies) {
! 217: replace_by_children($body);
! 218: }
! 219: }
! 220: }
! 221: # add all the meta elements afterwards when they are LON-CAPA meta. Remove all HTML meta.
! 222: my @meta_names = ('abstract','author','authorspace','avetries','avetries_list','clear','comefrom','comefrom_list','copyright','correct','count','course','course_list','courserestricted','creationdate','dependencies','depth','difficulty','difficulty_list','disc','disc_list','domain','end','field','firstname','generation','goto','goto_list','groupname','helpful','highestgradelevel','hostname','id','keynum','keywords','language','lastname','lastrevisiondate','lowestgradelevel','middlename','mime','modifyinguser','notes','owner','permanentemail','scope','sequsage','sequsage_list','standards','start','stdno','stdno_list','subject','technical','title','url','username','value','version');
! 223: my @metas = $doc->getElementsByTagName('meta');
! 224: foreach my $meta (@metas) {
! 225: $meta->parentNode->removeChild($meta);
! 226: my $name = $meta->getAttribute('name');
! 227: my $content = $meta->getAttribute('content');
! 228: if (defined $name && defined $content && string_in_array(\@meta_names, lc($name))) {
! 229: my $lcmeta = $doc->createElement('meta');
! 230: $lcmeta->setAttribute('name', lc($name));
! 231: $lcmeta->setAttribute('content', $content);
! 232: insert_after_or_first($root, $lcmeta, $current_node);
! 233: $current_node = $lcmeta;
! 234: }
! 235: }
! 236: return($root);
! 237: }
! 238:
! 239: # insert the new child under parent after the reference child, or as the first child if the reference child is not defined
! 240: sub insert_after_or_first {
! 241: my ($parent, $newchild, $refchild) = @_;
! 242: if (defined $refchild) {
! 243: $parent->insertAfter($newchild, $refchild);
! 244: } elsif (defined $parent->firstChild) {
! 245: $parent->insertBefore($newchild, $parent->firstChild);
! 246: } else {
! 247: $parent->appendChild($newchild);
! 248: }
! 249: }
! 250:
! 251: # removes all elements with given names inside the node, but keep the content
! 252: sub remove_elements {
! 253: my ($node, $to_remove) = @_;
! 254: my $nextChild;
! 255: for (my $child=$node->firstChild; defined $child; $child=$nextChild) {
! 256: $nextChild = $child->nextSibling;
! 257: my $type = $node->nodeType;
! 258: if ($type == XML_ELEMENT_NODE) {
! 259: if (string_in_array($to_remove, $child->nodeName)) {
! 260: my $first_non_white = $child->firstChild;
! 261: if (defined $first_non_white && $first_non_white->nodeType == XML_TEXT_NODE &&
! 262: $first_non_white->nodeValue =~ /^\s*$/) {
! 263: $first_non_white = $first_non_white->nextSibling;
! 264: }
! 265: if (defined $first_non_white) {
! 266: $nextChild = $first_non_white;
! 267: replace_by_children($child);
! 268: } else {
! 269: $node->removeChild($child);
! 270: }
! 271: } else {
! 272: remove_elements($child, $to_remove);
! 273: }
! 274: }
! 275: }
! 276: }
! 277:
! 278: # removes some attributes that have an invalid empty value
! 279: sub remove_empty_attributes {
! 280: my ($root) = @_;
! 281: my $doc = $root->ownerDocument;
! 282: # this list is based on validation errors in the MSU subset (it could be more complete if it was based on the schema)
! 283: my @attributes = (
! 284: ['curve', ['pointsize']],
! 285: ['foil', ['location']],
! 286: ['foilgroup', ['checkboxoptions', 'options', 'texoptions']],
! 287: ['gnuplot', ['pattern', 'texwidth']],
! 288: ['img', ['height', 'texheight', 'texwidth', 'texwrap', 'width']],
! 289: ['import', ['importmode']],
! 290: ['optionresponse', ['max']],
! 291: ['organicstructure', ['options']],
! 292: ['radiobuttonresponse', ['max']],
! 293: ['randomlabel', ['height', 'texwidth', 'width']],
! 294: ['stringresponse', ['type']],
! 295: ['textline', ['size']],
! 296: );
! 297: foreach my $element_attributes (@attributes) {
! 298: my $element_name = $element_attributes->[0];
! 299: my $attribute_names = $element_attributes->[1];
! 300: my @elements = $doc->getElementsByTagName($element_name);
! 301: foreach my $element (@elements) {
! 302: foreach my $attribute_name (@$attribute_names) {
! 303: my $value = $element->getAttribute($attribute_name);
! 304: if (defined $value && $value =~ /^\s*$/) {
! 305: $element->removeAttribute($attribute_name);
! 306: }
! 307: }
! 308: }
! 309: }
! 310: }
! 311:
! 312: # fixes the case for a few attributes that are not all lowercase
! 313: # (the HTML parser used in html_to_xml turns everything lowercase, which is a good thing in general)
! 314: sub fix_attribute_case {
! 315: my ($root) = @_;
! 316: my $doc = $root->ownerDocument;
! 317: my @attributes = (
! 318: ['labelgroup', ['TeXsize']],
! 319: ['h1', ['TeXsize']],
! 320: ['h2', ['TeXsize']],
! 321: ['h3', ['TeXsize']],
! 322: ['h4', ['TeXsize']],
! 323: ['h5', ['TeXsize']],
! 324: ['h6', ['TeXsize']],
! 325: # font and basefont have a TeXsize but will be removed
! 326: ['optionresponse', ['TeXlayout']],
! 327: ['itemgroup', ['TeXitemgroupwidth']],
! 328: ['Task', ['OptionalRequired']],
! 329: ['Question', ['OptionalRequired','Mandatory']],
! 330: ['Instance', ['OptionalRequired','Disabled']],
! 331: ['Criteria', ['Mandatory']],
! 332: ['table', ['TeXwidth','TeXtheme']],
! 333: ['td', ['TeXwidth']],
! 334: ['th', ['TeXwidth']],
! 335: ['img', ['TeXwidth','TeXheight','TeXwrap']],
! 336: );
! 337: foreach my $element_attributes (@attributes) {
! 338: my $element_name = $element_attributes->[0];
! 339: my $attribute_names = $element_attributes->[1];
! 340: my @elements = $doc->getElementsByTagName($element_name);
! 341: foreach my $element (@elements) {
! 342: foreach my $attribute_name (@$attribute_names) {
! 343: my $value = $element->getAttribute(lc($attribute_name));
! 344: if (defined $value) {
! 345: $element->removeAttribute(lc($attribute_name));
! 346: $element->setAttribute($attribute_name, $value);
! 347: }
! 348: }
! 349: }
! 350: }
! 351: }
! 352:
! 353: # Replaces m by HTML, tm and/or dtm (which will be replaced by <m> later, but they are useful
! 354: # to know if the element is a block element or not).
! 355: # m might contain non-math LaTeX, while tm and dtm may only contain math.
! 356: # Returns 1 if the file should be fixed by hand, 0 otherwise.
! 357: sub replace_m {
! 358: my ($root) = @_;
! 359: my $doc = $root->ownerDocument;
! 360: my $fix_by_hand = 0;
! 361: # search for variable declarations
! 362: my @variables = ();
! 363: my @scripts = $root->getElementsByTagName('script');
! 364: foreach my $script (@scripts) {
! 365: my $type = $script->getAttribute('type');
! 366: if (defined $type && $type eq 'loncapa/perl') {
! 367: if (defined $script->firstChild && $script->firstChild->nodeType == XML_TEXT_NODE) {
! 368: my $text = $script->firstChild->nodeValue;
! 369: # NOTE: we are not interested in replacing "@value", only "$value"
! 370: # this regexp is for " $a = ..." and " $a[...] = ..."
! 371: while ($text =~ /^[ \t]*\$([a-zA-Z_0-9]+)(?:\[[^\]]+\])?[ \t]*=/gm) {
! 372: if (!string_in_array(\@variables, $1)) {
! 373: push(@variables, $1);
! 374: }
! 375: }
! 376: # this regexp is for "...; $a = ..." and "...; $a[...] = ..."
! 377: while ($text =~ /^[^'"\/;]+;[ \t]*\$([a-zA-Z_0-9]+)(?:\[[^\]]+\])?[ \t]*=/gm) {
! 378: if (!string_in_array(\@variables, $1)) {
! 379: push(@variables, $1);
! 380: }
! 381: }
! 382: # this regexp is for " @a = ..."
! 383: while ($text =~ /^[ \t]*\@([a-zA-Z_0-9]+)[ \t]*=/gm) {
! 384: if (!string_in_array(\@variables, $1)) {
! 385: push(@variables, $1);
! 386: }
! 387: }
! 388: # this regexp is for " ($a, $b, $c) = ..."
! 389: my @matches = ($text =~ /^[ \t]*\([ \t]*\$([a-zA-Z_0-9]+)(?:[ \t]*,[ \t]*\$([a-zA-Z_0-9]+))*[ \t]*\)[ \t]*=/gm);
! 390: foreach my $match (@matches) {
! 391: if (!defined $match) {
! 392: next; # not sure why it happens, but it does
! 393: }
! 394: if (!string_in_array(\@variables, $match)) {
! 395: push(@variables, $match);
! 396: }
! 397: }
! 398: # and this one is for "push @a"
! 399: while ($text =~ /^[ \t]*push @([a-zA-Z_0-9]+)[ \t,]*/gm) {
! 400: if (!string_in_array(\@variables, $1)) {
! 401: push(@variables, $1);
! 402: }
! 403: }
! 404: # use the opportunity to report usage of <m> in Perl scripts
! 405: if ($text =~ /^[^#].*<m[ >]/m) {
! 406: if ($warnings) {
! 407: print "WARNING: <m> is used in a script, it should be converted by hand\n";
! 408: }
! 409: $fix_by_hand = 1;
! 410: }
! 411: }
! 412: }
! 413: }
! 414: my @ms = $root->getElementsByTagName('m');
! 415: foreach my $m (@ms) {
! 416: if (!defined $m->firstChild) {
! 417: $m->parentNode->removeChild($m);
! 418: next;
! 419: }
! 420: if (defined $m->firstChild->nextSibling || $m->firstChild->nodeType != XML_TEXT_NODE) {
! 421: if ($warnings) {
! 422: print "WARNING: m value is not simple text\n";
! 423: }
! 424: $fix_by_hand = 1;
! 425: next;
! 426: }
! 427: my $text = $m->firstChild->nodeValue;
! 428: my $text_before_variable_replacement = $text;
! 429: my $var_key1 = 'dfhg3df54hg65hg4';
! 430: my $var_key2 = 'dfhg654d6f5g4h5f';
! 431: my $eval = defined $m->getAttribute('eval') && $m->getAttribute('eval') eq 'on';
! 432: if ($eval) {
! 433: # replace variables
! 434: foreach my $variable (@variables) {
! 435: my $replacement = $var_key1.$variable.$var_key2;
! 436: $text =~ s/\$$variable(?![a-zA-Z])/$replacement/ge;
! 437: $text =~ s/\$\{$variable\}/$replacement/ge;
! 438: }
! 439: }
! 440: # check if the expression is enclosed in math separators: $ $$ \( \) \[ \]
! 441: # if so, replace the whole node by dtm or tm
! 442: my $new_text;
! 443: my $new_node_name;
! 444: if ($text =~ /^\s*\$\$([^\$]*)\$\$\s*$/) {
! 445: $new_node_name = 'dtm';
! 446: $new_text = $1;
! 447: } elsif ($text =~ /^\s*\\\[(.*)\\\]\s*$/s) {
! 448: $new_node_name = 'dtm';
! 449: $new_text = $1;
! 450: } elsif ($text =~ /^\s*\$([^\$]*)\$\s*$/) {
! 451: $new_node_name = 'tm';
! 452: $new_text = $1;
! 453: } elsif ($text =~ /^\s*\\\((.*)\\\)\s*$/s) {
! 454: $new_node_name = 'tm';
! 455: $new_text = $1;
! 456: }
! 457: if (defined $new_node_name) {
! 458: if ($eval) {
! 459: foreach my $variable (@variables) {
! 460: my $replacement = $var_key1.$variable.$var_key2;
! 461: $new_text =~ s/$replacement([a-zA-Z])/\${$variable}$1/g;
! 462: $new_text =~ s/$replacement/\$$variable/g;
! 463: }
! 464: }
! 465: my $new_node = $doc->createElement($new_node_name);
! 466: if ($eval) {
! 467: $new_node->setAttribute('eval', 'on');
! 468: }
! 469: $new_node->appendChild($doc->createTextNode($new_text));
! 470: $m->parentNode->replaceChild($new_node, $m);
! 471: next;
! 472: }
! 473: if ($text !~ /\$|\\\(|\\\)|\\\[|\\\]/) {
! 474: # there are no math separators inside
! 475: # try to guess if this is meant as math
! 476: my $found_math = 0;
! 477: foreach my $symbol (@latex_math) {
! 478: if (index($text, $symbol) != -1) {
! 479: $found_math = 1;
! 480: last;
! 481: }
! 482: }
! 483: if ($found_math) {
! 484: # interpret the whole text as LaTeX inline math
! 485: my $new_node = $doc->createElement('tm');
! 486: if ($eval) {
! 487: $new_node->setAttribute('eval', 'on');
! 488: }
! 489: $new_node->appendChild($doc->createTextNode($text_before_variable_replacement));
! 490: $m->parentNode->replaceChild($new_node, $m);
! 491: next;
! 492: }
! 493: # no math symbol found, we will convert the text with tth
! 494: }
! 495:
! 496: # there are math separators inside, even after hiding variables, or there was no math symbol
! 497:
! 498: # hide math parts inside before running tth
! 499: my $math_key1 = '#ghjgdh5hg45gf';
! 500: my $math_key2 = '#';
! 501: my @maths = ();
! 502: my @separators = (['$$','$$'], ['\\(','\\)'], ['\\[','\\]'], ['$','$']);
! 503: foreach my $seps (@separators) {
! 504: my $sep1 = $seps->[0];
! 505: my $sep2 = $seps->[1];
! 506: my $pos1 = index($text, $sep1);
! 507: if ($pos1 == -1) {
! 508: next;
! 509: }
! 510: my $pos2 = index($text, $sep2, $pos1+length($sep1));
! 511: while ($pos1 != -1 && $pos2 != -1) {
! 512: my $replace = substr($text, $pos1, $pos2+length($sep2)-$pos1);
! 513: push(@maths, $replace);
! 514: my $by = $math_key1.scalar(@maths).$math_key2;
! 515: $text = substr($text, 0, $pos1).$by.substr($text, $pos2+length($sep2));
! 516: $pos1 = index($text, $sep1);
! 517: if ($pos1 != -1) {
! 518: $pos2 = index($text, $sep2, $pos1+length($sep1));
! 519: }
! 520: }
! 521: }
! 522: # get HTML as text from tth
! 523: my $html_text = tth($text);
! 524: # replace math by replacements
! 525: for (my $i=0; $i < scalar(@maths); $i++) {
! 526: my $math = $maths[$i];
! 527: $math =~ s/&/&/g;
! 528: $math =~ s/</</g;
! 529: $math =~ s/>/>/g;
! 530: if ($math =~ /^\$\$(.*)\$\$$/s) {
! 531: $math = '<dtm>'.$1.'</dtm>';
! 532: } elsif ($math =~ /^\\\[(.*)\\\]$/s) {
! 533: $math = '<dtm>'.$1.'</dtm>';
! 534: } elsif ($math =~ /^\\\((.*)\\\)$/s) {
! 535: $math = '<tm>'.$1.'</tm>';
! 536: } elsif ($math =~ /^\$(.*)\$$/s) {
! 537: $math = '<tm>'.$1.'</tm>';
! 538: }
! 539: my $replace = $math_key1.($i+1).$math_key2;
! 540: $html_text =~ s/$replace/$math/;
! 541: }
! 542: # replace variables if necessary
! 543: if ($eval) {
! 544: foreach my $variable (@variables) {
! 545: my $replacement = $var_key1.$variable.$var_key2;
! 546: $html_text =~ s/$replacement([a-zA-Z])/\${$variable}$1/g;
! 547: $html_text =~ s/$replacement/\$$variable/g;
! 548: }
! 549: }
! 550: my $fragment = html_to_dom($html_text);
! 551: $doc->adoptNode($fragment);
! 552: $m->parentNode->replaceChild($fragment, $m);
! 553:
! 554: }
! 555: return $fix_by_hand;
! 556: }
! 557:
! 558: # Returns the HTML equivalent of LaTeX input, using tth
! 559: sub tth {
! 560: my ($text) = @_;
! 561: my ($fh, $tmp_path) = tempfile();
! 562: binmode($fh, ':utf8');
! 563: print $fh $text;
! 564: close $fh;
! 565: my $output = `tth -r -w2 -u -y0 < $tmp_path 2>/dev/null`;
! 566: # hopefully the temp file will not be removed before this point (otherwise we should use unlink_on_destroy 0)
! 567: $output =~ s/^\s*|\s*$//;
! 568: $output =~ s/<div class="p"><!----><\/div>/<br\/>/; # why is tth using such ugly markup for \newline ?
! 569: return $output;
! 570: }
! 571:
! 572: # transform simple HTML into a DOM fragment (which will need to be adopted by the document)
! 573: sub html_to_dom {
! 574: my ($text) = @_;
! 575: $text = '<root>'.$text.'</root>';
! 576: my $textref = html_to_xml::html_to_xml(\$text);
! 577: utf8::upgrade($$textref); # otherwise the XML parser fails when the HTML parser turns into a character
! 578: my $dom_doc = XML::LibXML->load_xml(string => $textref);
! 579: my $root = $dom_doc->documentElement;
! 580: remove_empty_style($root);
! 581: my $fragment = $dom_doc->createDocumentFragment();
! 582: my $next;
! 583: for (my $n=$root->firstChild; defined $n; $n=$next) {
! 584: $next = $n->nextSibling;
! 585: $root->removeChild($n);
! 586: $fragment->appendChild($n);
! 587: }
! 588: return($fragment);
! 589: }
! 590:
! 591: # Use the linked sty files to guess which newly defined elements should be considered blocks.
! 592: # Also adds to @containing_styles the sty elements that contain styles.
! 593: # @param {string} fn - the file path (we only extract the directory path from it)
! 594: sub add_sty_blocks {
! 595: my ($fn, $root, $all_block) = @_;
! 596: my $doc = $root->ownerDocument;
! 597: my @parserlibs = $doc->getElementsByTagName('parserlib');
! 598: my @libs = ();
! 599: foreach my $parserlib (@parserlibs) {
! 600: if (defined $parserlib->firstChild && $parserlib->firstChild->nodeType == XML_TEXT_NODE) {
! 601: my $value = $parserlib->firstChild->nodeValue;
! 602: $value =~ s/^\s+|\s+$//g;
! 603: if ($value ne '') {
! 604: push(@libs, $value);
! 605: }
! 606: }
! 607: }
! 608: my ($name, $path, $suffix) = fileparse($fn);
! 609: foreach my $sty (@libs) {
! 610: if (substr($sty, 0, 1) eq '/') {
! 611: $sty = $RES_DIR.$sty;
! 612: } else {
! 613: $sty = $path.$sty;
! 614: }
! 615: my $new_elements = parse_sty($sty, $all_block);
! 616: better_guess($root, $new_elements, $all_block);
! 617: my $new_blocks = $new_elements->{'block'};
! 618: my $new_inlines = $new_elements->{'inline'};
! 619: push(@$all_block, @{$new_blocks});
! 620: #push(@inlines, @{$new_inlines}); # we are not using a list of inline elements at this point
! 621: }
! 622: }
! 623:
! 624: ##
! 625: # Parses a sty file and returns lists of block and inline elements.
! 626: # @param {string} fn - the file path
! 627: ##
! 628: sub parse_sty {
! 629: my ($fn, $all_block) = @_;
! 630: my @blocks = ();
! 631: my @inlines = ();
! 632: my $p = HTML::TokeParser->new($fn);
! 633: if (! $p) {
! 634: die "post_xml.pl: parse_sty: Error reading $fn\n";
! 635: }
! 636: $p->empty_element_tags(1);
! 637: my $in_definetag = 0;
! 638: my $in_render = 0;
! 639: my %newtags = ();
! 640: my $newtag = '';
! 641: my $is_block = 0;
! 642: while (my $token = $p->get_token) {
! 643: if ($token->[0] eq 'S') {
! 644: my $tag = lc($token->[1]);
! 645: if ($tag eq 'definetag') {
! 646: $in_definetag = 1;
! 647: $is_block = 0;
! 648: my $attributes = $token->[2];
! 649: $newtag = $attributes->{'name'};
! 650: if (substr($newtag, 0, 1) eq '/') {
! 651: $newtag = substr($newtag, 1);
! 652: }
! 653: } elsif ($in_definetag && $tag eq 'render') {
! 654: $in_render = 1;
! 655: $is_block = 0;
! 656: } elsif ($in_render) {
! 657: if (string_in_array($all_block, $tag)) {
! 658: $is_block = 1;
! 659: }
! 660: }
! 661: } elsif ($token->[0] eq 'E') {
! 662: my $tag = lc($token->[1]);
! 663: if ($tag eq 'definetag') {
! 664: $in_definetag = 0;
! 665: if (defined $newtags{$newtag}) {
! 666: $newtags{$newtag} = $newtags{$newtag} || $is_block;
! 667: } else {
! 668: $newtags{$newtag} = $is_block;
! 669: }
! 670: } elsif ($in_definetag && $tag eq 'render') {
! 671: $in_render = 0;
! 672: }
! 673: }
! 674: }
! 675: foreach $newtag (keys(%newtags)) {
! 676: if ($newtags{$newtag} == 1) {
! 677: push(@blocks, $newtag);
! 678: } else {
! 679: push(@inlines, $newtag);
! 680: }
! 681: }
! 682: return {'block'=>\@blocks, 'inline'=>\@inlines};
! 683: }
! 684:
! 685: ##
! 686: # Marks as block the elements that contain block elements in the input file.
! 687: # Also adds to @containing_styles the sty elements that contain styles.
! 688: # @param {string} fn - the file path
! 689: # @param {Hash<string,Array>} new_elements - contains arrays in 'block' and 'inline'
! 690: ##
! 691: sub better_guess {
! 692: my ($root, $new_elements, $all_block) = @_;
! 693: my $new_blocks = $new_elements->{'block'};
! 694: my $new_inlines = $new_elements->{'inline'};
! 695:
! 696: my @change = (); # change these elements from inline to block
! 697: foreach my $tag (@{$new_inlines}) {
! 698: my @nodes = $root->getElementsByTagName($tag);
! 699: NODE_LOOP: foreach my $node (@nodes) {
! 700: for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
! 701: if ($child->nodeType == XML_ELEMENT_NODE) {
! 702: if (string_in_array($all_block, $child->nodeName) || string_in_array($new_blocks, $child->nodeName)) {
! 703: push(@change, $tag);
! 704: last NODE_LOOP;
! 705: }
! 706: }
! 707: }
! 708: }
! 709: }
! 710: foreach my $inline (@change) {
! 711: my $index = 0;
! 712: $index++ until $new_inlines->[$index] eq $inline;
! 713: splice(@{$new_inlines}, $index, 1);
! 714: push(@{$new_blocks}, $inline);
! 715: }
! 716: # add to @containing_styles when a style is used inside
! 717: # NOTE: some sty elements will be added even though they should not, but if we don't do that
! 718: # all style will be removed in the sty elements.
! 719: foreach my $tag ((@{$new_blocks}, @{$new_inlines})) {
! 720: my @nodes = $root->getElementsByTagName($tag);
! 721: NODE_LOOP: foreach my $node (@nodes) {
! 722: for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
! 723: if ($child->nodeType == XML_ELEMENT_NODE) {
! 724: if (string_in_array(\@html_styles, $child->nodeName)) {
! 725: push(@containing_styles, $tag);
! 726: last NODE_LOOP;
! 727: }
! 728: }
! 729: }
! 730: }
! 731: }
! 732: }
! 733:
! 734: # When a style element contains a block, move the style inside the block where it is allowed.
! 735: # style/block/other -> block/style/other
! 736: # When a style is used where it is not allowed, move it inside its children or remove it (unless it contains only text)
! 737: # element_not_containing_styles/style/other -> element_not_containing_styles/other/style (except if other is a style)
! 738: # The fix is not perfect in the case of element_not_containing_styles/style1/style2/block/text (style1 will be lost):
! 739: # element_not_containing_styles/style1/style2/block/text -> element_not_containing_styles/block/style2/text
! 740: # (a solution to this problem would be to merge the styles in a span)
! 741: # NOTE: .sty defined elements might have been added to @containing_styles by better_guess().
! 742: sub fix_block_styles {
! 743: my ($element, $all_block) = @_;
! 744: my $doc = $element->ownerDocument;
! 745: if (string_in_array(\@html_styles, $element->nodeName)) {
! 746: # move spaces out of the style element
! 747: if (defined $element->firstChild && $element->firstChild->nodeType == XML_TEXT_NODE) {
! 748: my $child = $element->firstChild;
! 749: if ($child->nodeValue =~ /^(\s+)(\S.*)$/s) {
! 750: $element->parentNode->insertBefore($doc->createTextNode($1), $element);
! 751: $child->setData($2);
! 752: }
! 753: }
! 754: if (defined $element->lastChild && $element->lastChild->nodeType == XML_TEXT_NODE) {
! 755: my $child = $element->lastChild;
! 756: if ($child->nodeValue =~ /^(.*\S)(\s+)$/s) {
! 757: $element->parentNode->insertAfter($doc->createTextNode($2), $element);
! 758: $child->setData($1);
! 759: }
! 760: }
! 761:
! 762: my $found_block = 0;
! 763: for (my $child=$element->firstChild; defined $child; $child=$child->nextSibling) {
! 764: if ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
! 765: $found_block = 1;
! 766: last;
! 767: }
! 768: }
! 769: my $no_style_here = !string_in_array(\@containing_styles, $element->parentNode->nodeName);
! 770: if ($found_block || $no_style_here) {
! 771: # there is a block or the style is not allowed here,
! 772: # the style element has to be replaced by its modified children
! 773: my $s; # a clone of the style
! 774: my $next;
! 775: for (my $child=$element->firstChild; defined $child; $child=$next) {
! 776: $next = $child->nextSibling;
! 777: if ($child->nodeType == XML_ELEMENT_NODE && (string_in_array($all_block, $child->nodeName) ||
! 778: $child->nodeName eq 'br' || $no_style_here)) {
! 779: # avoid inverting a style with a style with $no_style_here (that would cause endless recursion)
! 780: if (!$no_style_here || (!string_in_array(\@html_styles, $child->nodeName) &&
! 781: string_in_array(\@containing_styles, $child->nodeName))) {
! 782: # block node or inline node when the style is not allowed:
! 783: # move all children inside the style, and make the style the only child
! 784: $s = $element->cloneNode();
! 785: my $next2;
! 786: for (my $child2=$child->firstChild; defined $child2; $child2=$next2) {
! 787: $next2 = $child2->nextSibling;
! 788: $child->removeChild($child2);
! 789: $s->appendChild($child2);
! 790: }
! 791: $child->appendChild($s);
! 792: }
! 793: $s = undef;
! 794: } elsif (($child->nodeType == XML_TEXT_NODE && $child->nodeValue !~ /^\s*$/) ||
! 795: $child->nodeType == XML_ELEMENT_NODE) {
! 796: # if the style is allowed, move text and inline nodes inside the style
! 797: if (!$no_style_here) {
! 798: if (!defined $s) {
! 799: $s = $element->cloneNode();
! 800: $element->insertBefore($s, $child);
! 801: }
! 802: $element->removeChild($child);
! 803: $s->appendChild($child);
! 804: }
! 805: } else {
! 806: # do not put other nodes inside the style
! 807: $s = undef;
! 808: }
! 809: }
! 810: # now replace by children and fix them
! 811: my $parent = $element->parentNode;
! 812: for (my $child=$element->firstChild; defined $child; $child=$next) {
! 813: $next = $child->nextSibling;
! 814: $element->removeChild($child);
! 815: $parent->insertBefore($child, $element);
! 816: if ($child->nodeType == XML_ELEMENT_NODE) {
! 817: fix_block_styles($child, $all_block);
! 818: }
! 819: }
! 820: $parent->removeChild($element);
! 821: return;
! 822: }
! 823: }
! 824: # otherwise fix all children
! 825: my $next;
! 826: for (my $child=$element->firstChild; defined $child; $child=$next) {
! 827: $next = $child->nextSibling;
! 828: if ($child->nodeType == XML_ELEMENT_NODE) {
! 829: fix_block_styles($child, $all_block);
! 830: }
! 831: }
! 832: }
! 833:
! 834: # removes empty font elements and font elements that contain at least one block element
! 835: # replaces other font elements by equivalent span
! 836: sub fix_fonts {
! 837: my ($root, $all_block) = @_;
! 838: my $doc = $root->ownerDocument;
! 839: my @fonts = $root->getElementsByTagName('font');
! 840: @fonts = reverse(@fonts); # to deal with the ancestor last in the case of font/font
! 841: foreach my $font (@fonts) {
! 842: my $block = 0;
! 843: for (my $child=$font->firstChild; defined $child; $child=$child->nextSibling) {
! 844: if (string_in_array($all_block, $child->nodeName) || string_in_array(\@inline_like_block, $child->nodeName)) {
! 845: $block = 1;
! 846: last;
! 847: }
! 848: }
! 849: if (!defined $font->firstChild || $block) {
! 850: # empty font or font containing block elements
! 851: # replace this node by its content
! 852: replace_by_children($font);
! 853: } else {
! 854: # replace by equivalent span
! 855: my $color = get_non_empty_attribute($font, 'color');
! 856: my $size = get_non_empty_attribute($font, 'size');
! 857: my $face = get_non_empty_attribute($font, 'face');
! 858: if (defined $face) {
! 859: $face =~ s/^,|,$//;
! 860: }
! 861: if (!defined $color && !defined $size && !defined $face) {
! 862: # useless font element: replace this node by its content
! 863: replace_by_children($font);
! 864: next;
! 865: }
! 866: my $replacement;
! 867: tie (my %properties, 'Tie::IxHash', ());
! 868: if (!defined $color && !defined $size && defined $face && lc($face) eq 'symbol') {
! 869: $replacement = $doc->createDocumentFragment();
! 870: } else {
! 871: $replacement = $doc->createElement('span');
! 872: my $css = '';
! 873: if (defined $color) {
! 874: $color =~ s/^x/#/;
! 875: $properties{'color'} = $color;
! 876: }
! 877: if (defined $size) {
! 878: my %hash = (
! 879: '1' => 'x-small',
! 880: '2' => 'small',
! 881: '3' => 'medium',
! 882: '4' => 'large',
! 883: '5' => 'x-large',
! 884: '6' => 'xx-large',
! 885: '7' => '300%',
! 886: '-1' => 'small',
! 887: '-2' => 'x-small',
! 888: '+1' => 'large',
! 889: '+2' => 'x-large',
! 890: '+3' => 'xx-large',
! 891: '+4' => '300%',
! 892: );
! 893: my $value = $hash{$size};
! 894: if (!defined $value) {
! 895: $value = 'medium';
! 896: }
! 897: $properties{'font-size'} = $value;
! 898: }
! 899: if (defined $face) {
! 900: if (lc($face) ne 'symbol' && lc($face) ne 'bold') {
! 901: $properties{'font-family'} = $face;
! 902: }
! 903: }
! 904: set_css_properties($replacement, \%properties);
! 905: }
! 906: if (defined $face && lc($face) eq 'symbol') {
! 907: # convert all content to unicode
! 908: my $next;
! 909: for (my $child=$font->firstChild; defined $child; $child=$next) {
! 910: $next = $child->nextSibling;
! 911: if ($child->nodeType == XML_TEXT_NODE) {
! 912: my $value = $child->nodeValue;
! 913: $value =~ tr/ABGDEZHQIKLMNXOPRSTUFCYWabgdezhqiklmnxoprVstufcywJjv¡«¬®/ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϑϕϖϒ↔←→/;
! 914: $child->setData($value);
! 915: }
! 916: }
! 917: }
! 918: # replace the font node
! 919: if ($replacement->nodeType == XML_ELEMENT_NODE && !defined $font->previousSibling &&
! 920: !defined $font->nextSibling && string_in_array(\@accepting_style, $font->parentNode->nodeName)) {
! 921: # use CSS on the parent block and replace font by its children instead of using a new element
! 922: set_css_properties($font->parentNode, \%properties);
! 923: replace_by_children($font);
! 924: } else {
! 925: # move all font children inside the replacement (span or fragment)
! 926: my $next;
! 927: for (my $child=$font->firstChild; defined $child; $child=$next) {
! 928: $next = $child->nextSibling;
! 929: $font->removeChild($child);
! 930: $replacement->appendChild($child);
! 931: }
! 932: # replace font
! 933: $font->parentNode->replaceChild($replacement, $font);
! 934: }
! 935: }
! 936: }
! 937: $root->normalize();
! 938: }
! 939:
! 940: # replaces u by <span style="text-decoration: underline">
! 941: sub replace_u {
! 942: my ($root) = @_;
! 943: my $doc = $root->ownerDocument;
! 944: my @us = $root->getElementsByTagName('u');
! 945: foreach my $u (@us) {
! 946: my $span = $doc->createElement('span');
! 947: $span->setAttribute('style', 'text-decoration: underline');
! 948: my $next;
! 949: for (my $child=$u->firstChild; defined $child; $child=$next) {
! 950: $next = $child->nextSibling;
! 951: $u->removeChild($child);
! 952: $span->appendChild($child);
! 953: }
! 954: $u->parentNode->replaceChild($span, $u);
! 955: }
! 956: }
! 957:
! 958: # removes CDATA sections tags that have not been parsed correcty by the HTML parser
! 959: # also removes bad comments in script elements
! 960: sub remove_bad_cdata_sections {
! 961: my ($root) = @_;
! 962: my $doc = $root->ownerDocument;
! 963: foreach my $name (@preserve_elements) {
! 964: my @nodes = $root->getElementsByTagName($name);
! 965: foreach my $node (@nodes) {
! 966: if (defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
! 967: my $value = $node->firstChild->nodeValue;
! 968: if ($name eq 'script' && (!defined $node->getAttribute('type') || $node->getAttribute('type') ne 'loncapa/perl') &&
! 969: !defined $node->firstChild->nextSibling && $value =~ /^(\s*)<!--(.*)-->(\s*)$/) {
! 970: # web browsers interpret that as a real comment when it is on 1 line, but the Perl HTML parser thinks it is the script
! 971: # -> turning it back into a comment
! 972: # (this is only true for Javascript script elements, since LON-CAPA does not parse loncapa/perl scripts in the same way)
! 973: $node->removeChild($node->firstChild);
! 974: $node->appendChild($doc->createComment($2));
! 975: next;
! 976: }
! 977: # at the beginning:
! 978: $value =~ s/^(\s*)<!\[CDATA\[/$1/; # <![CDATA[
! 979: $value =~ s/^(\s*)\/\*\s*<!\[CDATA\[\s*\*\//$1/; # /* <![CDATA[ */
! 980: $value =~ s/^(\s*)\/\/\s*<!\[CDATA\[/$1/; # // <![CDATA[
! 981: $value =~ s/^(\s*)(\/\/)?\s*<!--/$1/; # // <!--
! 982: # at the end:
! 983: $value =~ s/\/\/\s*\]\]>(\s*)$/$1/; # // ]]>
! 984: $value =~ s/\]\]>(\s*)$/$1/; # ]]>
! 985: $value =~ s/(\/\/)?\s*-->(\s*)$/$2/; # // -->
! 986: $value =~ s/\/\*\s*\]\]>\s*\*\/(\s*)$/$1/; # /* ]]> */
! 987:
! 988: $value = "\n".$value."\n";
! 989: $value =~ s/\s*(\n[ \t]*)/$1/;
! 990: $value =~ s/\s+$/\n/;
! 991: $node->firstChild->setData($value);
! 992: }
! 993: }
! 994: }
! 995: }
! 996:
! 997: # adds CDATA sections to scripts
! 998: sub add_cdata_sections {
! 999: my ($root) = @_;
! 1000: my $doc = $root->ownerDocument;
! 1001: my @scripts = $root->getElementsByTagName('script');
! 1002: my @answers = $root->getElementsByTagName('answer');
! 1003: foreach my $answer (@answers) {
! 1004: my $ancestor = $answer->parentNode;
! 1005: my $found_capa_response = 0;
! 1006: while (defined $ancestor) {
! 1007: if ($ancestor->nodeName eq 'numericalresponse' || $ancestor->nodeName eq 'formularesponse') {
! 1008: $found_capa_response = 1;
! 1009: last;
! 1010: }
! 1011: $ancestor = $ancestor->parentNode;
! 1012: }
! 1013: if (!$found_capa_response) {
! 1014: push(@scripts, $answer);
! 1015: }
! 1016: }
! 1017: foreach my $script (@scripts) {
! 1018: # use a CDATA section in the normal situation, for any script
! 1019: my $first = $script->firstChild;
! 1020: if (defined $first && $first->nodeType == XML_TEXT_NODE && !defined $first->nextSibling) {
! 1021: my $cdata = $doc->createCDATASection($first->nodeValue);
! 1022: $script->replaceChild($cdata, $first);
! 1023: }
! 1024: }
! 1025: }
! 1026:
! 1027: # removes "<!--" and "-->" at the beginning and end of style elements
! 1028: sub fix_style_element {
! 1029: my ($root) = @_;
! 1030: my @styles = $root->getElementsByTagName('style');
! 1031: foreach my $style (@styles) {
! 1032: if (defined $style->firstChild && $style->firstChild->nodeType == XML_TEXT_NODE &&
! 1033: !defined $style->firstChild->nextSibling) {
! 1034: my $text = $style->firstChild->nodeValue;
! 1035: if ($text =~ /^\s*<!--(.*)-->\s*$/s) {
! 1036: $style->firstChild->setData($1);
! 1037: }
! 1038: }
! 1039: }
! 1040: }
! 1041:
! 1042: # create missing cells at the end of table rows
! 1043: sub fix_tables {
! 1044: my ($root) = @_;
! 1045: my @tables = $root->getElementsByTagName('table');
! 1046: foreach my $table (@tables) {
! 1047: fix_cells($table);
! 1048: foreach my $tbody ($table->getChildrenByTagName('tbody')) {
! 1049: fix_cells($tbody);
! 1050: }
! 1051: foreach my $thead ($table->getChildrenByTagName('thead')) {
! 1052: fix_cells($thead);
! 1053: }
! 1054: foreach my $tfoot ($table->getChildrenByTagName('tfoot')) {
! 1055: fix_cells($tfoot);
! 1056: }
! 1057: }
! 1058: }
! 1059:
! 1060: # create missing cells at the end of table rows
! 1061: sub fix_cells {
! 1062: my ($table) = @_; # could actually be table, tbody, thead or tfoot
! 1063: my $doc = $table->ownerDocument;
! 1064: my @nb_cells = ();
! 1065: my $max_nb_cells = 0;
! 1066: my @rowspans = ();
! 1067: my @trs = $table->getChildrenByTagName('tr');
! 1068: foreach my $tr (@trs) {
! 1069: my $nb_cells;
! 1070: if (defined $rowspans[0]) {
! 1071: $nb_cells = shift(@rowspans);
! 1072: } else {
! 1073: $nb_cells = 0;
! 1074: }
! 1075: for (my $cell=$tr->firstChild; defined $cell; $cell=$cell->nextSibling) {
! 1076: if ($cell->nodeName eq 'td' || $cell->nodeName eq 'th') {
! 1077: my $colspan = $cell->getAttribute('colspan');
! 1078: if (defined $colspan && $colspan =~ /^\s*[0-9]+\s*$/) {
! 1079: $nb_cells += $colspan;
! 1080: } else {
! 1081: $nb_cells++;
! 1082: }
! 1083: my $rowspan = $cell->getAttribute('rowspan');
! 1084: if (defined $rowspan && $rowspan =~ /^\s*[0-9]+\s*$/) {
! 1085: for (my $i=0; $i < $rowspan-1; $i++) {
! 1086: if (!defined $rowspans[$i]) {
! 1087: $rowspans[$i] = 1;
! 1088: } else {
! 1089: $rowspans[$i]++;
! 1090: }
! 1091: }
! 1092: }
! 1093: }
! 1094: }
! 1095: push(@nb_cells, $nb_cells);
! 1096: if ($nb_cells > $max_nb_cells) {
! 1097: $max_nb_cells = $nb_cells;
! 1098: }
! 1099: }
! 1100: foreach my $tr (@trs) {
! 1101: my $nb_cells = shift(@nb_cells);
! 1102: if ($nb_cells < $max_nb_cells) {
! 1103: for (1..($max_nb_cells - $nb_cells)) {
! 1104: $tr->appendChild($doc->createElement('td'));
! 1105: }
! 1106: }
! 1107: }
! 1108: }
! 1109:
! 1110: # replaces ul/ul by ul/li/ul and the same for ol (using the previous li if possible)
! 1111: # also adds a ul element when a li has no ul/ol ancestor
! 1112: sub fix_lists {
! 1113: my ($root) = @_;
! 1114: my $doc = $root->ownerDocument;
! 1115: my @uls = $root->getElementsByTagName('ul');
! 1116: my @ols = $root->getElementsByTagName('ol');
! 1117: my @lists = (@uls, @ols);
! 1118: foreach my $list (@lists) {
! 1119: my $next;
! 1120: for (my $child=$list->firstChild; defined $child; $child=$next) {
! 1121: $next = $child->nextSibling;
! 1122: if ($child->nodeType == XML_ELEMENT_NODE && string_in_array(['ul','ol'], $child->nodeName)) {
! 1123: my $previous = $child->previousNonBlankSibling(); # note: non-DOM method
! 1124: $list->removeChild($child);
! 1125: if (defined $previous && $previous->nodeType == XML_ELEMENT_NODE && $previous->nodeName eq 'li') {
! 1126: $previous->appendChild($child);
! 1127: } else {
! 1128: my $li = $doc->createElement('li');
! 1129: $li->appendChild($child);
! 1130: if (!defined $next) {
! 1131: $list->appendChild($li);
! 1132: } else {
! 1133: $list->insertBefore($li, $next);
! 1134: }
! 1135: }
! 1136: }
! 1137: }
! 1138: }
! 1139: my @lis = $root->getElementsByTagName('li');
! 1140: foreach my $li (@lis) {
! 1141: my $found_list_ancestor = 0;
! 1142: my $ancestor = $li->parentNode;
! 1143: while (defined $ancestor) {
! 1144: if ($ancestor->nodeName eq 'ul' || $ancestor->nodeName eq 'ol') {
! 1145: $found_list_ancestor = 1;
! 1146: last;
! 1147: }
! 1148: $ancestor = $ancestor->parentNode;
! 1149: }
! 1150: if (!$found_list_ancestor) {
! 1151: # replace li by ul and add li under ul
! 1152: my $ul = $doc->createElement('ul');
! 1153: $li->parentNode->insertBefore($ul, $li);
! 1154: $li->parentNode->removeChild($li);
! 1155: $ul->appendChild($li);
! 1156: # add all other li afterwards inside ul (there might be text nodes in-between)
! 1157: my $next = $ul->nextSibling;
! 1158: while (defined $next) {
! 1159: my $next_next = $next->nextSibling;
! 1160: if ($next->nodeType == XML_TEXT_NODE && $next->nodeValue =~ /^\s*$/ &&
! 1161: defined $next_next && $next_next->nodeType == XML_ELEMENT_NODE && $next_next->nodeName eq 'li') {
! 1162: $next->parentNode->removeChild($next);
! 1163: $ul->appendChild($next);
! 1164: $next = $next_next;
! 1165: $next_next = $next_next->nextSibling;
! 1166: }
! 1167: if ($next->nodeType == XML_ELEMENT_NODE && $next->nodeName eq 'li') {
! 1168: $next->parentNode->removeChild($next);
! 1169: $ul->appendChild($next);
! 1170: } else {
! 1171: last;
! 1172: }
! 1173: $next = $next_next;
! 1174: }
! 1175: }
! 1176: }
! 1177: }
! 1178:
! 1179: # Some "image" elements are actually img element with a wrong name. This renames them.
! 1180: # Amazingly enough, "<image src=..." displays an image in some browsers
! 1181: # ("image" has existed at some point as an experimental HTML element).
! 1182: sub fix_wrong_name_for_img {
! 1183: my ($root) = @_;
! 1184: my @images = $root->getElementsByTagName('image');
! 1185: foreach my $image (@images) {
! 1186: if (!defined $image->getAttribute('src')) {
! 1187: next;
! 1188: }
! 1189: my $found_correct_ancestor = 0;
! 1190: my $ancestor = $image->parentNode;
! 1191: while (defined $ancestor) {
! 1192: if ($ancestor->nodeName eq 'drawimage' || $ancestor->nodeName eq 'imageresponse') {
! 1193: $found_correct_ancestor = 1;
! 1194: last;
! 1195: }
! 1196: $ancestor = $ancestor->parentNode;
! 1197: }
! 1198: if ($found_correct_ancestor) {
! 1199: next;
! 1200: }
! 1201: # this really has to be renamed "img"
! 1202: $image->setNodeName('img');
! 1203: }
! 1204: }
! 1205:
! 1206: # Replaces many deprecated attributes and replaces them by equivalent CSS when possible
! 1207: sub replace_deprecated_attributes_by_css {
! 1208: my ($root) = @_;
! 1209:
! 1210: fix_deprecated_in_tables($root);
! 1211:
! 1212: fix_deprecated_in_table_rows($root);
! 1213:
! 1214: fix_deprecated_in_table_cells($root);
! 1215:
! 1216: fix_deprecated_in_lists($root);
! 1217:
! 1218: fix_deprecated_in_list_items($root);
! 1219:
! 1220: fix_deprecated_in_hr($root);
! 1221:
! 1222: fix_deprecated_in_img($root);
! 1223:
! 1224: fix_deprecated_in_body($root);
! 1225:
! 1226: fix_align_attribute($root);
! 1227: }
! 1228:
! 1229: # Replaces deprecated attributes in tables
! 1230: sub fix_deprecated_in_tables {
! 1231: my ($root) = @_;
! 1232: my @tables = $root->getElementsByTagName('table');
! 1233: foreach my $table (@tables) {
! 1234: tie (my %new_properties, 'Tie::IxHash', ());
! 1235: my $align = $table->getAttribute('align');
! 1236: if (defined $align) {
! 1237: $table->removeAttribute('align');
! 1238: $align = lc(trim($align));
! 1239: }
! 1240: if ($table->parentNode->nodeName eq 'center' || (defined $align && $align eq 'center') ||
! 1241: (defined $table->parentNode->getAttribute('align') && $table->parentNode->getAttribute('align') eq 'center')) {
! 1242: $new_properties{'margin-left'} = 'auto';
! 1243: $new_properties{'margin-right'} = 'auto';
! 1244: }
! 1245: if (defined $align && ($align eq 'left' || $align eq 'right')) {
! 1246: $new_properties{'float'} = $align;
! 1247: }
! 1248: my $width = $table->getAttribute('width');
! 1249: if (defined $width) {
! 1250: $table->removeAttribute('width');
! 1251: $width = trim($width);
! 1252: if ($width =~ /^[0-9]+$/) {
! 1253: $width .= 'px';
! 1254: }
! 1255: if ($width ne '') {
! 1256: $new_properties{'width'} = $width;
! 1257: }
! 1258: }
! 1259: my $height = $table->getAttribute('height');
! 1260: if (defined $height) {
! 1261: $table->removeAttribute('height');
! 1262: # no replacement for table height
! 1263: }
! 1264: my $bgcolor = $table->getAttribute('bgcolor');
! 1265: if (defined $bgcolor) {
! 1266: $table->removeAttribute('bgcolor');
! 1267: $bgcolor = trim($bgcolor);
! 1268: $bgcolor =~ s/^x\s*//;
! 1269: if ($bgcolor ne '') {
! 1270: $new_properties{'background-color'} = $bgcolor;
! 1271: }
! 1272: }
! 1273: my $frame = $table->getAttribute('frame');
! 1274: if (defined $frame) {
! 1275: $table->removeAttribute('frame');
! 1276: $frame = lc(trim($frame));
! 1277: if ($frame eq 'void') {
! 1278: $new_properties{'border'} = 'none';
! 1279: } elsif ($frame eq 'above') {
! 1280: $new_properties{'border-top'} = '1px solid black';
! 1281: } elsif ($frame eq 'below') {
! 1282: $new_properties{'border-bottom'} = '1px solid black';
! 1283: } elsif ($frame eq 'hsides') {
! 1284: $new_properties{'border-top'} = '1px solid black';
! 1285: $new_properties{'border-bottom'} = '1px solid black';
! 1286: } elsif ($frame eq 'vsides') {
! 1287: $new_properties{'border-left'} = '1px solid black';
! 1288: $new_properties{'border-right'} = '1px solid black';
! 1289: } elsif ($frame eq 'lhs') {
! 1290: $new_properties{'border-left'} = '1px solid black';
! 1291: } elsif ($frame eq 'rhs') {
! 1292: $new_properties{'border-right'} = '1px solid black';
! 1293: } elsif ($frame eq 'box') {
! 1294: $new_properties{'border'} = '1px solid black';
! 1295: } elsif ($frame eq 'border') {
! 1296: $new_properties{'border'} = '1px solid black';
! 1297: }
! 1298: }
! 1299: if (scalar(keys %new_properties) > 0) {
! 1300: set_css_properties($table, \%new_properties);
! 1301: }
! 1302: # we can't replace the border attribute without creating a style block, but we can improve things like border="BORDER"
! 1303: my $border = $table->getAttribute('border');
! 1304: if (defined $border) {
! 1305: $border = trim($border);
! 1306: if ($border !~ /^\s*[0-9]+\s*(px)?\s*$/) {
! 1307: $table->setAttribute('border', '1');
! 1308: }
! 1309: }
! 1310: }
! 1311:
! 1312: }
! 1313:
! 1314: # Replaces deprecated attributes in tr elements
! 1315: sub fix_deprecated_in_table_rows {
! 1316: my ($root) = @_;
! 1317: my @trs = $root->getElementsByTagName('tr');
! 1318: foreach my $tr (@trs) {
! 1319: my $old_properties = get_css_properties($tr);
! 1320: tie (my %new_properties, 'Tie::IxHash', ());
! 1321: my $bgcolor = $tr->getAttribute('bgcolor');
! 1322: if (defined $bgcolor) {
! 1323: $tr->removeAttribute('bgcolor');
! 1324: if (!defined $old_properties->{'background-color'}) {
! 1325: $bgcolor = trim($bgcolor);
! 1326: $bgcolor =~ s/^x\s*//;
! 1327: if ($bgcolor ne '') {
! 1328: $new_properties{'background-color'} = $bgcolor;
! 1329: }
! 1330: }
! 1331: }
! 1332: my $align = $tr->getAttribute('align');
! 1333: if (defined $align && $align !~ /\s*char\s*/i) {
! 1334: $tr->removeAttribute('align');
! 1335: if (!defined $old_properties->{'text-align'}) {
! 1336: $align = lc(trim($align));
! 1337: if ($align ne '') {
! 1338: $new_properties{'text-align'} = $align;
! 1339: }
! 1340: }
! 1341: }
! 1342: my $valign = $tr->getAttribute('valign');
! 1343: if (defined $valign) {
! 1344: $tr->removeAttribute('valign');
! 1345: if (!defined $old_properties->{'vertical-align'}) {
! 1346: $valign = lc(trim($valign));
! 1347: if ($valign ne '') {
! 1348: $new_properties{'vertical-align'} = $valign;
! 1349: }
! 1350: }
! 1351: }
! 1352: if (scalar(keys %new_properties) > 0) {
! 1353: set_css_properties($tr, \%new_properties);
! 1354: }
! 1355: }
! 1356: }
! 1357:
! 1358: # Replaces deprecated attributes in table cells (td and th)
! 1359: sub fix_deprecated_in_table_cells {
! 1360: my ($root) = @_;
! 1361: my @tds = $root->getElementsByTagName('td');
! 1362: my @ths = $root->getElementsByTagName('th');
! 1363: my @cells = (@tds, @ths);
! 1364: foreach my $cell (@cells) {
! 1365: my $old_properties = get_css_properties($cell);
! 1366: tie (my %new_properties, 'Tie::IxHash', ());
! 1367: my $width = $cell->getAttribute('width');
! 1368: if (defined $width) {
! 1369: $cell->removeAttribute('width');
! 1370: if (!defined $old_properties->{'width'}) {
! 1371: $width = trim($width);
! 1372: if ($width =~ /^[0-9]+$/) {
! 1373: $width .= 'px';
! 1374: }
! 1375: if ($width ne '') {
! 1376: $new_properties{'width'} = $width;
! 1377: }
! 1378: }
! 1379: }
! 1380: my $height = $cell->getAttribute('height');
! 1381: if (defined $height) {
! 1382: $cell->removeAttribute('height');
! 1383: if (!defined $old_properties->{'height'}) {
! 1384: $height = trim($height);
! 1385: if ($height =~ /^[0-9]+$/) {
! 1386: $height .= 'px';
! 1387: }
! 1388: if ($height ne '') {
! 1389: $new_properties{'height'} = $height;
! 1390: }
! 1391: }
! 1392: }
! 1393: my $bgcolor = $cell->getAttribute('bgcolor');
! 1394: if (defined $bgcolor) {
! 1395: $cell->removeAttribute('bgcolor');
! 1396: if (!defined $old_properties->{'background-color'}) {
! 1397: $bgcolor = trim($bgcolor);
! 1398: $bgcolor =~ s/^x\s*//;
! 1399: if ($bgcolor ne '') {
! 1400: $new_properties{'background-color'} = $bgcolor;
! 1401: }
! 1402: }
! 1403: }
! 1404: my $align = $cell->getAttribute('align');
! 1405: if (defined $align && $align !~ /\s*char\s*/i) {
! 1406: $cell->removeAttribute('align');
! 1407: if (!defined $old_properties->{'text-align'}) {
! 1408: $align = lc(trim($align));
! 1409: if ($align ne '') {
! 1410: $new_properties{'text-align'} = $align;
! 1411: }
! 1412: }
! 1413: }
! 1414: my $valign = $cell->getAttribute('valign');
! 1415: if (defined $valign) {
! 1416: $cell->removeAttribute('valign');
! 1417: if (!defined $old_properties->{'vertical-align'}) {
! 1418: $valign = lc(trim($valign));
! 1419: if ($valign ne '') {
! 1420: $new_properties{'vertical-align'} = $valign;
! 1421: }
! 1422: }
! 1423: }
! 1424: if (scalar(keys %new_properties) > 0) {
! 1425: set_css_properties($cell, \%new_properties);
! 1426: }
! 1427: }
! 1428: }
! 1429:
! 1430: # Replaces deprecated attributes in lists (ul and ol)
! 1431: sub fix_deprecated_in_lists {
! 1432: my ($root) = @_;
! 1433: my @uls = $root->getElementsByTagName('ul');
! 1434: my @ols = $root->getElementsByTagName('ol');
! 1435: my @lists = (@uls, @ols);
! 1436: foreach my $list (@lists) {
! 1437: my $type = $list->getAttribute('type');
! 1438: if (defined $type) {
! 1439: my $lst = list_style_type($type);
! 1440: if (defined $lst) {
! 1441: $list->removeAttribute('type');
! 1442: if (!defined get_css_property($list, 'list-style-type')) {
! 1443: set_css_property($list, 'list-style-type', $lst);
! 1444: }
! 1445: }
! 1446: }
! 1447: }
! 1448: }
! 1449:
! 1450: # Replaces deprecated attributes in list items (li)
! 1451: sub fix_deprecated_in_list_items {
! 1452: my ($root) = @_;
! 1453: my @lis = $root->getElementsByTagName('li');
! 1454: foreach my $li (@lis) {
! 1455: my $type = $li->getAttribute('type');
! 1456: if (defined $type) {
! 1457: my $lst = list_style_type($type);
! 1458: if (defined $lst) {
! 1459: $li->removeAttribute('type');
! 1460: if (!defined get_css_property($li, 'list-style-type')) {
! 1461: set_css_property($li, 'list-style-type', $lst);
! 1462: }
! 1463: }
! 1464: }
! 1465: }
! 1466: }
! 1467:
! 1468: # returns the CSS list-style-type value equivalent to the given type attribute for a list or list item
! 1469: sub list_style_type {
! 1470: my ($type) = @_;
! 1471: my $value;
! 1472: $type = trim($type);
! 1473: if (lc($type) eq 'circle') {
! 1474: $value = 'circle';
! 1475: } elsif (lc($type) eq 'disc') {
! 1476: $value = 'disc';
! 1477: } elsif (lc($type) eq 'square') {
! 1478: $value = 'square';
! 1479: } elsif ($type eq 'a') {
! 1480: $value = 'lower-latin';
! 1481: } elsif ($type eq 'A') {
! 1482: $value = 'upper-latin';
! 1483: } elsif ($type eq 'i') {
! 1484: $value = 'lower-roman';
! 1485: } elsif ($type eq 'I') {
! 1486: $value = 'upper-roman';
! 1487: } elsif ($type eq '1') {
! 1488: $value = 'decimal';
! 1489: }
! 1490: return $value;
! 1491: }
! 1492:
! 1493: # Replaces deprecated attributes in hr
! 1494: sub fix_deprecated_in_hr {
! 1495: my ($root) = @_;
! 1496: my @hrs = $root->getElementsByTagName('hr');
! 1497: foreach my $hr (@hrs) {
! 1498: tie (my %new_properties, 'Tie::IxHash', ());
! 1499: my $align = $hr->getAttribute('align');
! 1500: if (defined $align) {
! 1501: $align = lc(trim($align));
! 1502: if ($align eq 'left') {
! 1503: $new_properties{'text-align'} = 'left';
! 1504: $new_properties{'margin-left'} = '0';
! 1505: } elsif ($align eq 'right') {
! 1506: $new_properties{'text-align'} = 'right';
! 1507: $new_properties{'margin-right'} = '0';
! 1508: }
! 1509: $hr->removeAttribute('align');
! 1510: }
! 1511: my $color = $hr->getAttribute('color');
! 1512: if (defined $color) {
! 1513: $color = trim($color);
! 1514: $color =~ s/^x\s*//;
! 1515: if ($color ne '') {
! 1516: $new_properties{'color'} = $color;
! 1517: $new_properties{'background-color'} = $color;
! 1518: }
! 1519: $hr->removeAttribute('color');
! 1520: }
! 1521: my $noshade = $hr->getAttribute('noshade');
! 1522: my $size = $hr->getAttribute('size');
! 1523: if (defined $noshade) {
! 1524: $new_properties{'border-width'} = '0';
! 1525: if (!defined $color) {
! 1526: $new_properties{'color'} = 'gray';
! 1527: $new_properties{'background-color'} = 'gray';
! 1528: }
! 1529: if (!defined $size) {
! 1530: $size = '2';
! 1531: }
! 1532: $hr->removeAttribute('noshade');
! 1533: }
! 1534: if (defined $size) {
! 1535: $size = trim($size);
! 1536: if ($size ne '') {
! 1537: $new_properties{'height'} = $size.'px';
! 1538: }
! 1539: if (defined $hr->getAttribute('size')) {
! 1540: $hr->removeAttribute('size');
! 1541: }
! 1542: }
! 1543: my $width = $hr->getAttribute('width');
! 1544: if (defined $width) {
! 1545: $width = trim($width);
! 1546: if ($width ne '') {
! 1547: if ($width !~ /\%$/) {
! 1548: $width .= 'px';
! 1549: }
! 1550: $new_properties{'width'} = $width;
! 1551: }
! 1552: $hr->removeAttribute('width');
! 1553: }
! 1554: if (scalar(keys %new_properties) > 0) {
! 1555: set_css_properties($hr, \%new_properties);
! 1556: }
! 1557: }
! 1558: }
! 1559:
! 1560: # Replaces deprecated attributes in img
! 1561: sub fix_deprecated_in_img {
! 1562: my ($root) = @_;
! 1563: my @imgs = $root->getElementsByTagName('img');
! 1564: foreach my $img (@imgs) {
! 1565: my $old_properties = get_css_properties($img);
! 1566: tie (my %new_properties, 'Tie::IxHash', ());
! 1567: my $align = $img->getAttribute('align');
! 1568: if (defined $align) {
! 1569: $align = lc(trim($align));
! 1570: if ($align eq 'middle' || $align eq 'top' || $align eq 'bottom') {
! 1571: $img->removeAttribute('align');
! 1572: if (!defined $old_properties->{'vertical-align'}) {
! 1573: $new_properties{'vertical-align'} = $align;
! 1574: }
! 1575: } elsif ($align eq 'left' || $align eq 'right') {
! 1576: $img->removeAttribute('align');
! 1577: if (!defined $old_properties->{'float'}) {
! 1578: $new_properties{'float'} = $align;
! 1579: }
! 1580: } elsif ($align eq 'center' || $align eq '') {
! 1581: $img->removeAttribute('align');
! 1582: }
! 1583: }
! 1584: my $border = $img->getAttribute('border');
! 1585: if (defined $border) {
! 1586: $border = lc(trim($border));
! 1587: if ($border =~ /^[0-9]+\s*(px)?$/) {
! 1588: $img->removeAttribute('border');
! 1589: if (!defined $old_properties->{'border'}) {
! 1590: if ($border !~ /px$/) {
! 1591: $border .= 'px';
! 1592: }
! 1593: $new_properties{'border'} = $border.' solid black';
! 1594: }
! 1595: }
! 1596: }
! 1597: my $hspace = $img->getAttribute('hspace');
! 1598: if (defined $hspace) {
! 1599: $hspace = lc(trim($hspace));
! 1600: if ($hspace =~ /^[0-9]+\s*(px)?$/) {
! 1601: $img->removeAttribute('hspace');
! 1602: if (!defined $old_properties->{'margin-left'} || !defined $old_properties->{'margin-right'}) {
! 1603: if ($hspace !~ /px$/) {
! 1604: $hspace .= 'px';
! 1605: }
! 1606: $new_properties{'margin-left'} = $hspace;
! 1607: $new_properties{'margin-right'} = $hspace;
! 1608: }
! 1609: }
! 1610: }
! 1611: if (scalar(keys %new_properties) > 0) {
! 1612: set_css_properties($img, \%new_properties);
! 1613: }
! 1614: }
! 1615: }
! 1616:
! 1617: # Replaces deprecated attributes in htmlbody (the style attribute could be used in a div for output)
! 1618: sub fix_deprecated_in_body {
! 1619: my ($root) = @_;
! 1620: my $doc = $root->ownerDocument;
! 1621: my @bodies = $root->getElementsByTagName('htmlbody');
! 1622: foreach my $body (@bodies) {
! 1623: my $old_properties = get_css_properties($body);
! 1624: tie (my %new_properties, 'Tie::IxHash', ());
! 1625: my $bgcolor = $body->getAttribute('bgcolor');
! 1626: if (defined $bgcolor) {
! 1627: $body->removeAttribute('bgcolor');
! 1628: if (!defined $old_properties->{'background-color'}) {
! 1629: $bgcolor = trim($bgcolor);
! 1630: $bgcolor =~ s/^x\s*//;
! 1631: if ($bgcolor ne '') {
! 1632: $new_properties{'background-color'} = $bgcolor;
! 1633: }
! 1634: }
! 1635: }
! 1636: my $color = $body->getAttribute('text');
! 1637: if (defined $color) {
! 1638: $body->removeAttribute('text');
! 1639: if (!defined $old_properties->{'color'}) {
! 1640: $color = trim($color);
! 1641: $color =~ s/^x\s*//;
! 1642: if ($color ne '') {
! 1643: $new_properties{'color'} = $color;
! 1644: }
! 1645: }
! 1646: }
! 1647: my $background = $body->getAttribute('background');
! 1648: if (defined $background && ($background =~ /\.jpe?g$|\.gif|\.png/i)) {
! 1649: $body->removeAttribute('background');
! 1650: if (!defined $old_properties->{'background-image'}) {
! 1651: $background = trim($background);
! 1652: if ($background ne '') {
! 1653: $new_properties{'background-image'} = 'url('.$background.')';
! 1654: }
! 1655: }
! 1656: }
! 1657: # NOTE: these attributes have never been standard and are better removed with no replacement
! 1658: foreach my $bad ('bottommargin', 'leftmargin', 'rightmargin', 'topmargin', 'marginheight', 'marginwidth') {
! 1659: if ($body->hasAttribute($bad)) {
! 1660: $body->removeAttribute($bad);
! 1661: }
! 1662: }
! 1663: # NOTE: link alink and vlink require a <style> block to be converted
! 1664: my $link = $body->getAttribute('link');
! 1665: my $alink = $body->getAttribute('alink');
! 1666: my $vlink = $body->getAttribute('vlink');
! 1667: if (defined $link || defined $alink || defined $vlink) {
! 1668: my $head;
! 1669: my @heads = $root->getElementsByTagName('htmlhead');
! 1670: if (scalar(@heads) > 0) {
! 1671: $head = $heads[0];
! 1672: } else {
! 1673: $head = $doc->createElement('htmlhead');
! 1674: $root->insertBefore($head, $root->firstChild);
! 1675: }
! 1676: my $style = $doc->createElement('style');
! 1677: $head->appendChild($style);
! 1678: my $css = "\n";
! 1679: if (defined $link) {
! 1680: $body->removeAttribute('link');
! 1681: $link = trim($link);
! 1682: $link =~ s/^x\s*//;
! 1683: $css .= ' a:link { color:'.$link.' }';
! 1684: $css .= "\n";
! 1685: }
! 1686: if (defined $alink) {
! 1687: $body->removeAttribute('alink');
! 1688: $alink = trim($alink);
! 1689: $alink =~ s/^x\s*//;
! 1690: $css .= ' a:active { color:'.$alink.' }';
! 1691: $css .= "\n";
! 1692: }
! 1693: if (defined $vlink) {
! 1694: $body->removeAttribute('vlink');
! 1695: $vlink = trim($vlink);
! 1696: $vlink =~ s/^x\s*//;
! 1697: $css .= ' a:visited { color:'.$vlink.' }';
! 1698: $css .= "\n";
! 1699: }
! 1700: $css .= ' ';
! 1701: $style->appendChild($doc->createTextNode($css));
! 1702: }
! 1703: if (scalar(keys %new_properties) > 0) {
! 1704: set_css_properties($body, \%new_properties);
! 1705: } elsif (!$body->hasAttributes) {
! 1706: $body->parentNode->removeChild($body);
! 1707: }
! 1708: }
! 1709: }
! 1710:
! 1711: # replaces <div align="center"> by <div style="text-align:center;">
! 1712: # also for p and h1..h6
! 1713: sub fix_align_attribute {
! 1714: my ($root) = @_;
! 1715: my @nodes = $root->getElementsByTagName('div');
! 1716: push(@nodes, $root->getElementsByTagName('p'));
! 1717: for (my $i=1; $i<=6; $i++) {
! 1718: push(@nodes, $root->getElementsByTagName('h'.$i));
! 1719: }
! 1720: foreach my $node (@nodes) {
! 1721: my $align = $node->getAttribute('align');
! 1722: if (defined $align) {
! 1723: $node->removeAttribute('align');
! 1724: $align = trim($align);
! 1725: if ($align ne '' && !defined get_css_property($node, 'text-align')) {
! 1726: set_css_property($node, 'text-align', lc($align));
! 1727: }
! 1728: }
! 1729: }
! 1730: }
! 1731:
! 1732: # replace center by a div or remove it if there is a table inside
! 1733: sub replace_center {
! 1734: my ($root, $all_block) = @_;
! 1735: my $doc = $root->ownerDocument;
! 1736: my @centers = $root->getElementsByTagName('center');
! 1737: foreach my $center (@centers) {
! 1738: if ($center->getChildrenByTagName('table')->size() > 0) { # note: getChildrenByTagName is not DOM (LibXML specific)
! 1739: replace_by_children($center);
! 1740: } else {
! 1741: if ((!defined $center->previousSibling ||
! 1742: ($center->previousSibling->nodeType == XML_TEXT_NODE && $center->previousSibling->nodeValue =~ /^\s*$/ && !defined $center->previousSibling->previousSibling)) &&
! 1743: (!defined $center->nextSibling ||
! 1744: ($center->nextSibling->nodeType == XML_TEXT_NODE && $center->nextSibling->nodeValue =~ /^\s*$/ && !defined $center->nextSibling->nextSibling)) &&
! 1745: string_in_array(\@accepting_style, $center->parentNode->nodeName)) {
! 1746: # use CSS on the parent block and replace center by its children
! 1747: set_css_property($center->parentNode, 'text-align', 'center');
! 1748: replace_by_children($center);
! 1749: } else {
! 1750: # use p or div ? check if there is a block inside
! 1751: my $found_block = 0;
! 1752: for (my $child=$center->firstChild; defined $child; $child=$child->nextSibling) {
! 1753: if ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
! 1754: $found_block = 1;
! 1755: last;
! 1756: }
! 1757: }
! 1758: my $new_node;
! 1759: if ($found_block) {
! 1760: $new_node = $doc->createElement('div');
! 1761: $new_node->setAttribute('style', 'text-align: center; margin: 0 auto');
! 1762: } else {
! 1763: $new_node = $doc->createElement('p');
! 1764: $new_node->setAttribute('style', 'text-align: center');
! 1765: }
! 1766: my $next;
! 1767: for (my $child=$center->firstChild; defined $child; $child=$next) {
! 1768: $next = $child->nextSibling;
! 1769: $center->removeChild($child);
! 1770: $new_node->appendChild($child);
! 1771: }
! 1772: $center->parentNode->replaceChild($new_node, $center);
! 1773: }
! 1774: }
! 1775: }
! 1776: }
! 1777:
! 1778: # replaces <nobr> by <span style="white-space:nowrap">
! 1779: sub replace_nobr {
! 1780: my ($root) = @_;
! 1781: my @nobrs = $root->getElementsByTagName('nobr');
! 1782: foreach my $nobr (@nobrs) {
! 1783: if (!defined $nobr->previousSibling && !defined $nobr->nextSibling &&
! 1784: string_in_array(\@accepting_style, $nobr->parentNode->nodeName)) {
! 1785: # use CSS on the parent block
! 1786: set_css_property($nobr->parentNode, 'white-space', 'nowrap');
! 1787: replace_by_children($nobr);
! 1788: } else {
! 1789: $nobr->setNodeName('span');
! 1790: $nobr->setAttribute('style', 'white-space:nowrap');
! 1791: }
! 1792: }
! 1793: }
! 1794:
! 1795: # removes notsolved tags in the case <hintgroup showoncorrect="no"><notsolved>...</notsolved></hintgroup>
! 1796: # and in the case <notsolved><hintgroup showoncorrect="no">...</hintgroup></notsolved>
! 1797: sub remove_useless_notsolved {
! 1798: my ($root) = @_;
! 1799: my @hintgroups = $root->getElementsByTagName('hintgroup');
! 1800: foreach my $hintgroup (@hintgroups) {
! 1801: my $showoncorrect = get_non_empty_attribute($hintgroup, 'showoncorrect');
! 1802: if (!defined $showoncorrect || $showoncorrect eq 'no') {
! 1803: my @notsolveds = $hintgroup->getElementsByTagName('notsolved');
! 1804: foreach my $notsolved (@notsolveds) {
! 1805: replace_by_children($notsolved);
! 1806: }
! 1807: }
! 1808: my $parent = $hintgroup->parentNode;
! 1809: if ($parent->nodeName eq 'notsolved' && scalar(@{$parent->nonBlankChildNodes()}) == 1) {
! 1810: replace_by_children($parent);
! 1811: }
! 1812: }
! 1813: }
! 1814:
! 1815: # adds a paragraph inside if needed and calls fix_paragraph for all paragraphs (including new ones)
! 1816: sub fix_paragraphs_inside {
! 1817: my ($node, $all_block) = @_;
! 1818: # blocks in which paragrahs will be added:
! 1819: my @blocks_with_p = ('loncapa','library','problem','part','problemtype','window','block','while','postanswerdate','preduedate','solved','notsolved','languageblock','translated','lang','instructorcomment','togglebox','standalone','form');
! 1820: my @fix_p_if_br_or_p = (@responses,'foil','item','text','label','hintgroup','hintpart','hint','web','windowlink','div','li','dd','td','th','blockquote');
! 1821: if ((string_in_array(\@blocks_with_p, $node->nodeName) && paragraph_needed($node)) ||
! 1822: (string_in_array(\@fix_p_if_br_or_p, $node->nodeName) && paragraph_inside($node))) {
! 1823: # if non-empty, add paragraphs where needed between all br and remove br
! 1824: # (it would be easier to just put everything in a p and fix it afterwards, but there are performance issues
! 1825: # when a paragraph has many blocks directly inside)
! 1826: my $doc = $node->ownerDocument;
! 1827: my $p = undef;
! 1828: my @new_children = ();
! 1829: my $next;
! 1830: for (my $child=$node->firstChild; defined $child; $child=$next) {
! 1831: $next = $child->nextSibling;
! 1832: $node->removeChild($child);
! 1833: if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'br') {
! 1834: if (defined $p) {
! 1835: push(@new_children, $p);
! 1836: } else {
! 1837: push(@new_children, $doc->createElement('p'));
! 1838: }
! 1839: $p = undef;
! 1840: } elsif ($child->nodeType == XML_ELEMENT_NODE && string_in_array(\@inline_like_block, $child->nodeName)) {
! 1841: # inline_like_block: use the paragraph if there is one, otherwise do not create one
! 1842: if (defined $p) {
! 1843: $p->appendChild($child);
! 1844: } else {
! 1845: push(@new_children, $child);
! 1846: }
! 1847: } elsif ($child->nodeType == XML_ELEMENT_NODE && string_in_array($all_block, $child->nodeName)) {
! 1848: # these children are blocks and should not be in a paragraph
! 1849: if (defined $p) {
! 1850: push(@new_children, $p);
! 1851: $p = undef;
! 1852: }
! 1853: push(@new_children, $child);
! 1854: } elsif ($child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^[ \t\f\n\r]*$/) {
! 1855: # blank text: add to paragraph if there is one and there is a next node, otherwise keep out of the paragraph
! 1856: if (defined $p) {
! 1857: if (defined $next) {
! 1858: $p->appendChild($child);
! 1859: } else {
! 1860: push(@new_children, $p);
! 1861: $p = undef;
! 1862: push(@new_children, $child);
! 1863: }
! 1864: } else {
! 1865: push(@new_children, $child);
! 1866: }
! 1867: } elsif ($child->nodeType == XML_TEXT_NODE ||
! 1868: $child->nodeType == XML_ELEMENT_NODE || $child->nodeType == XML_CDATA_SECTION_NODE ||
! 1869: $child->nodeType == XML_ENTITY_NODE || $child->nodeType == XML_ENTITY_REF_NODE) {
! 1870: # these children require a paragraph
! 1871: if (!defined $p) {
! 1872: $p = $doc->createElement('p');
! 1873: }
! 1874: $p->appendChild($child);
! 1875: } else {
! 1876: # these children do not require a paragraph (XML comments, PI)
! 1877: # -> do not move them in a new paragraph
! 1878: if (defined $p) {
! 1879: push(@new_children, $p);
! 1880: $p = undef;
! 1881: }
! 1882: push(@new_children, $child);
! 1883: }
! 1884: }
! 1885: if (defined $p) {
! 1886: push(@new_children, $p);
! 1887: }
! 1888: foreach my $child (@new_children) {
! 1889: $node->appendChild($child);
! 1890: }
! 1891: }
! 1892: # now fix the paragraphs everywhere, so that all inline nodes are inside a paragraph, and block nodes are outside
! 1893: my $next;
! 1894: for (my $child=$node->firstChild; defined $child; $child=$next) {
! 1895: $next = $child->nextSibling;
! 1896: if ($child->nodeType == XML_ELEMENT_NODE && defined $child->firstChild) {
! 1897: if ($child->nodeName eq 'p') {
! 1898: fix_paragraph($child, $all_block);
! 1899: } else {
! 1900: fix_paragraphs_inside($child, $all_block);
! 1901: }
! 1902: }
! 1903: }
! 1904: }
! 1905:
! 1906: # returns 1 if a paragraph is needed inside this node (assuming the parent can have paragraphs)
! 1907: sub paragraph_needed {
! 1908: my ($node) = @_;
! 1909: for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
! 1910: if (($child->nodeType == XML_TEXT_NODE && $child->nodeValue !~ /^\s*$/) ||
! 1911: ($child->nodeType == XML_ELEMENT_NODE && !string_in_array(\@inline_like_block, $child->nodeName)) ||
! 1912: $child->nodeType == XML_CDATA_SECTION_NODE ||
! 1913: $child->nodeType == XML_ENTITY_NODE || $child->nodeType == XML_ENTITY_REF_NODE) {
! 1914: return(1);
! 1915: }
! 1916: }
! 1917: return(0);
! 1918: }
! 1919:
! 1920: # returns 1 if there is a paragraph or br in a child of this node, or inside an inline child
! 1921: sub paragraph_inside {
! 1922: my ($node) = @_;
! 1923: # inline elements that can be split in half if there is a paragraph inside (currently all HTML):
! 1924: # (also used in first_block below)
! 1925: my @splitable_inline = ('span', 'a', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u');
! 1926: for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
! 1927: if ($child->nodeType == XML_ELEMENT_NODE) {
! 1928: my $name = $child->nodeName;
! 1929: if ($name eq 'p' || $name eq 'br') {
! 1930: return(1);
! 1931: } elsif (string_in_array(\@splitable_inline, $name)) {
! 1932: if (paragraph_inside($child)) {
! 1933: return(1);
! 1934: }
! 1935: }
! 1936: }
! 1937: }
! 1938: return(0);
! 1939: }
! 1940:
! 1941: # fixes paragraphs inside paragraphs (without a block in-between)
! 1942: sub fix_paragraph {
! 1943: my ($p, $all_block) = @_;
! 1944: my $loop_right = 1; # this loops is to avoid out of memory errors with recurse, see below
! 1945: while ($loop_right) {
! 1946: $loop_right = 0;
! 1947: my $block = find_first_block($p, $all_block);
! 1948: if (defined $block) {
! 1949: my $trees = clone_ancestor_around_node($p, $block);
! 1950: my $doc = $p->ownerDocument;
! 1951: my $replacement = $doc->createDocumentFragment();
! 1952: my $left = $trees->{'left'};
! 1953: my $middle = $trees->{'middle'};
! 1954: my $right = $trees->{'right'};
! 1955: my $left_needs_p = 0; # 1 if it needs a paragraph (used to replace br later)
! 1956:
! 1957: if (defined $left) {
! 1958: # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
! 1959: for (my $child=$left->firstChild; defined $child; $child=$child->nextSibling) {
! 1960: if ($child->nodeType == XML_ELEMENT_NODE) {
! 1961: fix_paragraphs_inside($child, $all_block);
! 1962: }
! 1963: }
! 1964: if (!paragraph_needed($left)) {
! 1965: # this was just blank text, comments or inline responses, it should not create a new paragraph
! 1966: my $next;
! 1967: for (my $child=$left->firstChild; defined $child; $child=$next) {
! 1968: $next = $child->nextSibling;
! 1969: $left->removeChild($child);
! 1970: $replacement->appendChild($child);
! 1971: }
! 1972: } else {
! 1973: $left_needs_p = 1;
! 1974: $replacement->appendChild($left);
! 1975: }
! 1976: }
! 1977:
! 1978: my $n = $middle->firstChild;
! 1979: while (defined $n) {
! 1980: if ($n->nodeType == XML_ELEMENT_NODE && (string_in_array($all_block, $n->nodeName) || $n->nodeName eq 'br')) {
! 1981: if ($n->nodeName eq 'p') {
! 1982: my $parent = $n->parentNode;
! 1983: # first apply recursion
! 1984: fix_paragraph($n, $all_block);
! 1985: # now the p might have been replaced by several nodes, which should replace the initial p
! 1986: my $next_block;
! 1987: for (my $block=$parent->firstChild; defined $block; $block=$next_block) {
! 1988: $next_block = $block->nextSibling;
! 1989: if ($block->nodeName eq 'p') {
! 1990: $parent->removeChild($block);
! 1991: # for each parent before $middle, clone in-between the p and its children (to preserve the styles)
! 1992: if (defined $block->firstChild) {
! 1993: for (my $p=$parent; $p!=$middle; $p=$p->parentNode) {
! 1994: my $newp = $p->cloneNode(0);
! 1995: my $next;
! 1996: for (my $child=$block->firstChild; defined $child; $child=$next) {
! 1997: $next = $child->nextSibling;
! 1998: $block->removeChild($child);
! 1999: $newp->appendChild($child);
! 2000: }
! 2001: $block->appendChild($newp);
! 2002: }
! 2003: }
! 2004: }
! 2005: $replacement->appendChild($block);
! 2006: }
! 2007: } else {
! 2008: # replace the whole p by this block, forgetting about intermediate inline elements
! 2009: $n->parentNode->removeChild($n);
! 2010: if ($n->nodeName eq 'br') {
! 2011: # replace a br by a paragraph if there was nothing before in the paragraph,
! 2012: # otherwise remove it because it already broke the paragraph in half
! 2013: if (!defined $left || !$left_needs_p) {
! 2014: $replacement->appendChild($middle);
! 2015: }
! 2016: } else {
! 2017: fix_paragraphs_inside($n, $all_block);
! 2018: $replacement->appendChild($n);
! 2019: }
! 2020: }
! 2021: last;
! 2022: }
! 2023: $n = $n->firstChild;
! 2024: if (defined $n && defined $n->nextSibling) {
! 2025: die "Error in post_xml.fix_paragraph: block not found";
! 2026: }
! 2027: }
! 2028:
! 2029: if (defined $right) {
! 2030: if ($block->nodeName eq 'p') {
! 2031: # remove attributes on the right paragraph
! 2032: my @attributelist = $right->attributes();
! 2033: foreach my $att (@attributelist) {
! 2034: $right->removeAttribute($att->nodeName);
! 2035: }
! 2036: }
! 2037: if ($right->firstChild->nodeType == XML_TEXT_NODE && $right->firstChild->nodeValue =~ /^[ \t\f\n\r]*$/) {
! 2038: # remove the first text node with whitespace only from the p, it should not trigger the creation of a p
! 2039: # (but take nbsp into account, so we should not use \s here)
! 2040: my $first = $right->firstChild;
! 2041: $right->removeChild($first);
! 2042: $replacement->appendChild($first);
! 2043: }
! 2044: if (defined $right->firstChild) {
! 2045: if (paragraph_needed($right)) {
! 2046: $replacement->appendChild($right);
! 2047: #fix_paragraph($right, $all_block); This is taking way too much memory for blocks with many children
! 2048: # -> loop instead of recurse
! 2049: $loop_right = 1;
! 2050: } else {
! 2051: # this was just blank text, comments or inline responses, it should not create a new paragraph
! 2052: my $next;
! 2053: for (my $child=$right->firstChild; defined $child; $child=$next) {
! 2054: $next = $child->nextSibling;
! 2055: $right->removeChild($child);
! 2056: $replacement->appendChild($child);
! 2057: # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
! 2058: if ($child->nodeType == XML_ELEMENT_NODE) {
! 2059: fix_paragraphs_inside($child, $all_block);
! 2060: }
! 2061: }
! 2062: }
! 2063: }
! 2064: }
! 2065:
! 2066: $p->parentNode->replaceChild($replacement, $p);
! 2067:
! 2068: if ($loop_right) {
! 2069: $p = $right;
! 2070: }
! 2071:
! 2072: } else {
! 2073: # fix paragraphs inside, in case one of the descendants can have paragraphs inside (like numericalresponse/hintgroup):
! 2074: my $next;
! 2075: for (my $child=$p->firstChild; defined $child; $child=$next) {
! 2076: $next = $child->nextSibling;
! 2077: if ($child->nodeType == XML_ELEMENT_NODE) {
! 2078: fix_paragraphs_inside($child, $all_block);
! 2079: }
! 2080: }
! 2081: }
! 2082: }
! 2083: }
! 2084:
! 2085: sub find_first_block {
! 2086: my ($node, $all_block) = @_;
! 2087: # inline elements that can be split in half if there is a paragraph inside (currently all HTML):
! 2088: my @splitable_inline = ('span', 'a', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u');
! 2089: for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
! 2090: if ($child->nodeType == XML_ELEMENT_NODE) {
! 2091: if (string_in_array($all_block, $child->nodeName) || $child->nodeName eq 'br') {
! 2092: return($child);
! 2093: }
! 2094: if (string_in_array(\@splitable_inline, $child->nodeName)) {
! 2095: my $block = find_first_block($child, $all_block);
! 2096: if (defined $block) {
! 2097: return($block);
! 2098: }
! 2099: }
! 2100: }
! 2101: }
! 2102: return(undef);
! 2103: }
! 2104:
! 2105: # Creates clones of the ancestor containing the descendants before the node, at the node, and after the node.
! 2106: # returns a hash with: left, middle, right (left and right can be undef)
! 2107: sub clone_ancestor_around_node {
! 2108: my ($ancestor, $node) = @_;
! 2109: my $middle_node;
! 2110: my ($left, $middle, $right);
! 2111: for (my $child=$ancestor->firstChild; defined $child; $child=$child->nextSibling) {
! 2112: if ($child == $node || is_ancestor_of($child, $node)) {
! 2113: $middle_node = $child;
! 2114: last;
! 2115: }
! 2116: }
! 2117: if (!defined $middle_node) {
! 2118: die "error in split_ancestor_around_node: middle not found";
! 2119: }
! 2120: if (defined $middle_node->previousSibling) {
! 2121: $left = $ancestor->cloneNode(0);
! 2122: for (my $child=$ancestor->firstChild; $child != $middle_node; $child=$child->nextSibling) {
! 2123: $left->appendChild($child->cloneNode(1));
! 2124: }
! 2125: }
! 2126: $middle = $ancestor->cloneNode(0);
! 2127: if ($middle_node == $node) {
! 2128: $middle->appendChild($middle_node->cloneNode(1));
! 2129: } else {
! 2130: my $subres = clone_ancestor_around_node($middle_node, $node);
! 2131: my $subleft = $subres->{'left'};
! 2132: if (defined $subleft) {
! 2133: if (!defined $left) {
! 2134: $left = $ancestor->cloneNode(0);
! 2135: }
! 2136: $left->appendChild($subleft);
! 2137: }
! 2138: $middle->appendChild($subres->{'middle'});
! 2139: my $subright = $subres->{'right'};
! 2140: if (defined $subright) {
! 2141: $right = $ancestor->cloneNode(0);
! 2142: $right->appendChild($subright);
! 2143: }
! 2144: }
! 2145: if (defined $middle_node->nextSibling) {
! 2146: if (!defined $right) {
! 2147: $right = $ancestor->cloneNode(0);
! 2148: }
! 2149: for (my $child=$middle_node->nextSibling; defined $child; $child=$child->nextSibling) {
! 2150: $right->appendChild($child->cloneNode(1));
! 2151: }
! 2152: }
! 2153: my %result = ();
! 2154: $result{'left'} = $left;
! 2155: $result{'middle'} = $middle;
! 2156: $result{'right'} = $right;
! 2157: return(\%result);
! 2158: }
! 2159:
! 2160: sub is_ancestor_of {
! 2161: my ($n1, $n2) = @_;
! 2162: my $n = $n2->parentNode;
! 2163: while (defined $n) {
! 2164: if ($n == $n1) {
! 2165: return(1);
! 2166: }
! 2167: $n = $n->parentNode;
! 2168: }
! 2169: return(0);
! 2170: }
! 2171:
! 2172: # removes empty style elements and replaces the ones with only whitespaces inside by their content
! 2173: # also remove hints that have become empty after empty style removal.
! 2174: sub remove_empty_style {
! 2175: my ($root) = @_;
! 2176: # actually, preserve some elements like ins when they have whitespace, only remove if they are empty
! 2177: my @remove_if_empty = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'code', 'kbd', 'samp', 'tt', 'ins', 'del', 'var', 'small', 'big', 'font', 'u', 'hint');
! 2178: my @remove_if_blank = ('span', 'strong', 'em' , 'b', 'i', 'sup', 'sub', 'tt', 'var', 'small', 'big', 'font', 'u', 'hint');
! 2179: foreach my $name (@remove_if_empty) {
! 2180: my @nodes = $root->getElementsByTagName($name);
! 2181: while (scalar(@nodes) > 0) {
! 2182: my $node = pop(@nodes);
! 2183: if (!defined $node->firstChild) {
! 2184: my $parent = $node->parentNode;
! 2185: if (defined $node->previousSibling && $node->previousSibling->nodeType == XML_TEXT_NODE &&
! 2186: $node->previousSibling->nodeValue =~ /\$\S*$/) {
! 2187: # case $a<sup></sup>x
! 2188: my $value = $node->previousSibling->nodeValue;
! 2189: $value =~ s/\$(\S*)$/\$\{$1\}/;
! 2190: $node->previousSibling->setData($value);
! 2191: }
! 2192: $parent->removeChild($node);
! 2193: $parent->normalize();
! 2194: # now that we removed the node, check if the parent has become an empty style, and so on
! 2195: while (defined $parent && string_in_array(\@remove_if_empty, $parent->nodeName) && !defined $parent->firstChild) {
! 2196: my $grandparent = $parent->parentNode;
! 2197: $grandparent->removeChild($parent);
! 2198: remove_reference_from_array(\@nodes, $parent);
! 2199: $parent = $grandparent;
! 2200: }
! 2201: }
! 2202: }
! 2203: }
! 2204: foreach my $name (@remove_if_blank) {
! 2205: my @nodes = $root->getElementsByTagName($name);
! 2206: while (scalar(@nodes) > 0) {
! 2207: my $node = pop(@nodes);
! 2208: if (defined $node->firstChild && !defined $node->firstChild->nextSibling && $node->firstChild->nodeType == XML_TEXT_NODE) {
! 2209: # NOTE: careful, with UTF-8, \s matches non-breaking spaces and we want to preserve these
! 2210: if ($node->firstChild->nodeValue =~ /^[\t\n\f\r ]*$/) {
! 2211: my $parent = $node->parentNode;
! 2212: replace_by_children($node);
! 2213: $parent->normalize();
! 2214: # now that we removed the node, check if the parent has become a style with only whitespace, and so on
! 2215: while (defined $parent && string_in_array(\@remove_if_blank, $parent->nodeName) &&
! 2216: (!defined $parent->firstChild ||
! 2217: (!defined $parent->firstChild->nextSibling && $parent->firstChild->nodeType == XML_TEXT_NODE &&
! 2218: $parent->firstChild->nodeValue =~ /^^[\t\n\f\r ]*$/))) {
! 2219: my $grandparent = $parent->parentNode;
! 2220: replace_by_children($parent);
! 2221: remove_reference_from_array(\@nodes, $parent);
! 2222: $parent = $grandparent;
! 2223: }
! 2224: }
! 2225: }
! 2226: }
! 2227: }
! 2228: }
! 2229:
! 2230: # remove whitespace inside LON-CAPA elements that have an empty content-model (HTML ones are handled by html_to_xml)
! 2231: sub fix_empty_lc_elements {
! 2232: my ($node) = @_;
! 2233: my @lcempty = ('arc','axis','backgroundplot','drawoptionlist','drawvectorsum','fill','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','hiddenline','hiddensubmission','key','line','location','organicstructure','parameter','plotobject','plotvector','responseparam','spline','textline');
! 2234: if (string_in_array(\@lcempty, $node->nodeName)) {
! 2235: if (defined $node->firstChild && !defined $node->firstChild->nextSibling &&
! 2236: $node->firstChild->nodeType == XML_TEXT_NODE && $node->firstChild->nodeValue =~ /^\s*$/) {
! 2237: $node->removeChild($node->firstChild);
! 2238: }
! 2239: if (defined $node->firstChild) {
! 2240: if ($warnings) {
! 2241: print "Warning: a ".$node->nodeName." has something inside\n";
! 2242: }
! 2243: }
! 2244: return;
! 2245: }
! 2246: for (my $child=$node->firstChild; defined $child; $child=$child->nextSibling) {
! 2247: if ($child->nodeType == XML_ELEMENT_NODE) {
! 2248: fix_empty_lc_elements($child);
! 2249: }
! 2250: }
! 2251: }
! 2252:
! 2253: # turn some attribute values into lowercase when they should be
! 2254: sub lowercase_attribute_values {
! 2255: my ($root) = @_;
! 2256: my @with_yesno = (['radiobuttonresponse', ['randomize']],
! 2257: ['optionresponse', ['randomize']],
! 2258: ['matchresponse', ['randomize']],
! 2259: ['itemgroup', ['randomize']],
! 2260: ['rankresponse', ['randomize']],
! 2261: ['functionplotresponse', ['xaxisvisible', 'yaxisvisible', 'gridvisible']],
! 2262: ['backgroundplot', ['fixed']],
! 2263: ['drawvectorsum', ['showvalue']],
! 2264: ['textline', ['readonly']],
! 2265: ['hint', ['showoncorrect']],
! 2266: ['body', ['dir']],
! 2267: ['img', ['encrypturl']],
! 2268: ['form', ['method']],
! 2269: ['input', ['type']]
! 2270: );
! 2271: foreach my $el_attributes (@with_yesno) {
! 2272: my $el_name = $el_attributes->[0];
! 2273: my @elements = $root->getElementsByTagName($el_name);
! 2274: foreach my $element (@elements) {
! 2275: my $att_list = $el_attributes->[1];
! 2276: foreach my $att_name (@$att_list) {
! 2277: my $att_value = $element->getAttribute($att_name);
! 2278: if (!defined $att_value) {
! 2279: next;
! 2280: }
! 2281: if ($att_value eq 'yes' || $att_value eq 'no') {
! 2282: next;
! 2283: }
! 2284: if ($att_value =~ /\s*yes\s*/i) {
! 2285: $element->setAttribute($att_name, 'yes');
! 2286: } elsif ($att_value =~ /\s*no\s*/i) {
! 2287: $element->setAttribute($att_name, 'no');
! 2288: }
! 2289: }
! 2290: }
! 2291: }
! 2292: }
! 2293:
! 2294: # fixes spelling mistakes for numericalresponse/@unit
! 2295: sub replace_numericalresponse_unit_attribute {
! 2296: my ($root) = @_;
! 2297: my @numericalresponses = $root->getElementsByTagName('numericalresponse');
! 2298: foreach my $numericalresponse (@numericalresponses) {
! 2299: if (defined $numericalresponse->getAttribute('units') && !defined $numericalresponse->getAttribute('unit')) {
! 2300: $numericalresponse->setAttribute('unit', $numericalresponse->getAttribute('units'));
! 2301: $numericalresponse->removeAttribute('units');
! 2302: }
! 2303: }
! 2304:
! 2305: }
! 2306:
! 2307: # Replaces &format and &prettyprint by <num> whenever possible.
! 2308: # Also replaces &chemparse by <chem>.
! 2309: # If the function call is enclosed in <display>, the <display> element is removed.
! 2310: sub replace_functions_by_elements {
! 2311: my ($root) = @_;
! 2312: my $doc = $root->ownerDocument;
! 2313: my @preserve = ('script','answer','parse','m','tm','dtm','numericalhintscript'); # display is handled later
! 2314: my @all = $root->getElementsByTagName('*');
! 2315: foreach my $element (@all) {
! 2316: if (string_in_array(\@preserve, $element->nodeName)) {
! 2317: next;
! 2318: }
! 2319: my $changed = 0;
! 2320: my $next;
! 2321: for (my $child=$element->firstChild; defined $child; $child=$next) {
! 2322: $next = $child->nextSibling;
! 2323: if ($child->nodeType == XML_TEXT_NODE) {
! 2324: my $value = $child->nodeValue;
! 2325: if ($value =~ /^(.*)&(?:format|prettyprint)\((\$\{?[a-zA-Z0-9]*\}?(?:\[[^\]]*\])?|[0-9.]+)\s?,\s?(["'][,.\$]?[0-9][eEfFgGsS]["']|\$[a-zA-Z0-9]*)\)(.*)$/s) {
! 2326: # NOTE: we don't check for &prettyprint's 3rd argument (target), but it has not been seen outside of script elements.
! 2327: # NOTE: the format options ',' and '$' are not supported by &format in current LON-CAPA since rev 1.81 of default_homework.lcpm,
! 2328: # but are supported by &prettyprint;
! 2329: # if we use (like current LON-CAPA) &prettyprint for <num> implementation, it will change a few resulting documents
! 2330: # (by making them display something they were probably intended to display, but which did not).
! 2331: # Usage of <num> with &prettyprint instead of &format might also change the display when there is an exponent.
! 2332: my $before = $1;
! 2333: my $number = $2;
! 2334: my $format = $3;
! 2335: my $after = $4;
! 2336: $format =~ s/^['"]|['"]$//g;
! 2337: # do not change this if the parent is <display> and there are other things before or after &format
! 2338: if ($element->nodeName eq 'display' && (defined $child->previousSibling || defined $next ||
! 2339: $before !~ /^\s*$/ || $after !~ /^\s*$/)) {
! 2340: last;
! 2341: }
! 2342: my $replacement = $doc->createDocumentFragment();
! 2343: my $num = $doc->createElement('num');
! 2344: $num->setAttribute('format', $format);
! 2345: $num->appendChild($doc->createTextNode($number));
! 2346: if (length($before) > 0) {
! 2347: $replacement->appendChild($doc->createTextNode($before));
! 2348: }
! 2349: $replacement->appendChild($num);
! 2350: if (length($after) > 0) {
! 2351: $replacement->appendChild($doc->createTextNode($after));
! 2352: }
! 2353: $element->replaceChild($replacement, $child);
! 2354: $changed = 1;
! 2355: $next = $element->firstChild; # start over, there might be another &format in the same text node
! 2356: } elsif ($value =~ /^(.*)&chemparse\(([^'"()]*|'[^']*'|"[^"]*")\)(.*)$/s) {
! 2357: my $before = $1;
! 2358: my $reaction = $2;
! 2359: my $after = $3;
! 2360: $reaction =~ s/^'(.*)'$/$1/;
! 2361: $reaction =~ s/^"(.*)"$/$1/;
! 2362: if ($element->nodeName eq 'display' && (defined $child->previousSibling || defined $next ||
! 2363: $before !~ /^\s*$/ || $after !~ /^\s*$/)) {
! 2364: last;
! 2365: }
! 2366: my $replacement = $doc->createDocumentFragment();
! 2367: my $chem = $doc->createElement('chem');
! 2368: $chem->appendChild($doc->createTextNode($reaction));
! 2369: if (length($before) > 0) {
! 2370: $replacement->appendChild($doc->createTextNode($before));
! 2371: }
! 2372: $replacement->appendChild($chem);
! 2373: if (length($after) > 0) {
! 2374: $replacement->appendChild($doc->createTextNode($after));
! 2375: }
! 2376: $element->replaceChild($replacement, $child);
! 2377: $changed = 1;
! 2378: $next = $element->firstChild;
! 2379: }
! 2380: }
! 2381: }
! 2382: if ($changed && $element->nodeName eq 'display') {
! 2383: my $first = $element->firstChild;
! 2384: if ($first->nodeType == XML_ELEMENT_NODE && string_in_array(['num','chem'], $first->nodeName) &&
! 2385: !defined $first->nextSibling) {
! 2386: # remove useless display element
! 2387: replace_by_children($element);
! 2388: }
! 2389: }
! 2390: }
! 2391: }
! 2392:
! 2393: # pretty-print using im-memory DOM tree
! 2394: sub pretty {
! 2395: my ($node, $all_block, $indent_level) = @_;
! 2396: my $doc = $node->ownerDocument;
! 2397: $indent_level ||= 0;
! 2398: my $type = $node->nodeType;
! 2399: if ($type == XML_ELEMENT_NODE) {
! 2400: my $name = $node->nodeName;
! 2401: if ((string_in_array($all_block, $name) || string_in_array(\@inline_like_block, $name)) &&
! 2402: !string_in_array(\@preserve_elements, $name)) {
! 2403: # make sure there is a newline at the beginning and at the end if there is anything inside
! 2404: if (defined $node->firstChild && !string_in_array(\@no_newline_inside, $name)) {
! 2405: my $first = $node->firstChild;
! 2406: if ($first->nodeType == XML_TEXT_NODE) {
! 2407: my $text = $first->nodeValue;
! 2408: if ($text !~ /^ *\n/) {
! 2409: $first->setData("\n" . $text);
! 2410: }
! 2411: } else {
! 2412: $node->insertBefore($doc->createTextNode("\n"), $first);
! 2413: }
! 2414: my $last = $node->lastChild;
! 2415: if ($last->nodeType == XML_TEXT_NODE) {
! 2416: my $text = $last->nodeValue;
! 2417: if ($text !~ /\n *$/) {
! 2418: $last->setData($text . "\n");
! 2419: }
! 2420: } else {
! 2421: $node->appendChild($doc->createTextNode("\n"));
! 2422: }
! 2423: }
! 2424:
! 2425: # indent and make sure there is a newline before and after a block element
! 2426: my $newline_indent = "\n".(' ' x (2*($indent_level + 1)));
! 2427: my $newline_indent_last = "\n".(' ' x (2*$indent_level));
! 2428: my $next;
! 2429: for (my $child=$node->firstChild; defined $child; $child=$next) {
! 2430: $next = $child->nextSibling;
! 2431: if ($child->nodeType == XML_ELEMENT_NODE) {
! 2432: if (string_in_array($all_block, $child->nodeName) || string_in_array(\@inline_like_block, $child->nodeName)) {
! 2433: # make sure there is a newline before and after a block element
! 2434: if (defined $child->previousSibling && $child->previousSibling->nodeType == XML_TEXT_NODE) {
! 2435: my $prev = $child->previousSibling;
! 2436: my $text = $prev->nodeValue;
! 2437: if ($text !~ /\n *$/) {
! 2438: $prev->setData($text . $newline_indent);
! 2439: }
! 2440: } else {
! 2441: $node->insertBefore($doc->createTextNode($newline_indent), $child);
! 2442: }
! 2443: if (defined $next && $next->nodeType == XML_TEXT_NODE) {
! 2444: my $text = $next->nodeValue;
! 2445: if ($text !~ /^ *\n/) {
! 2446: $next->setData($newline_indent . $text);
! 2447: }
! 2448: } else {
! 2449: $node->insertAfter($doc->createTextNode($newline_indent), $child);
! 2450: }
! 2451: }
! 2452: pretty($child, $all_block, $indent_level+1);
! 2453: } elsif ($child->nodeType == XML_TEXT_NODE) {
! 2454: my $text = $child->nodeValue;
! 2455: # collapse newlines
! 2456: $text =~ s/\n([\t ]*\n)+/\n/g;
! 2457: # indent and remove spaces and tabs before newlines
! 2458: if (defined $next) {
! 2459: $text =~ s/[\t ]*\n[\t ]*/$newline_indent/ge;
! 2460: } else {
! 2461: $text =~ s/[\t ]*\n[\t ]*/$newline_indent/ge;
! 2462: $text =~ s/[\t ]*\n[\t ]*$/$newline_indent_last/e;
! 2463: }
! 2464: $child->setData($text);
! 2465: }
! 2466: }
! 2467:
! 2468: # removes whitespace at the beginning and end of p td, th and li (except for nbsp at the beginning)
! 2469: my @to_trim = ('p','td','th','li');
! 2470: if (string_in_array(\@to_trim, $name) && defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
! 2471: my $text = $node->firstChild->nodeValue;
! 2472: $text =~ s/^[ \t\f\n\r]*//;
! 2473: if ($text eq '') {
! 2474: $node->removeChild($node->firstChild);
! 2475: } else {
! 2476: $node->firstChild->setData($text);
! 2477: }
! 2478: }
! 2479: if (string_in_array(\@to_trim, $name) && defined $node->lastChild && $node->lastChild->nodeType == XML_TEXT_NODE) {
! 2480: my $text = $node->lastChild->nodeValue;
! 2481: $text =~ s/\s*$//;
! 2482: if ($text eq '') {
! 2483: $node->removeChild($node->lastChild);
! 2484: } else {
! 2485: $node->lastChild->setData($text);
! 2486: }
! 2487: }
! 2488: } elsif (string_in_array(\@preserve_elements, $name)) {
! 2489: # collapse newlines at the beginning and the end of scripts
! 2490: if (defined $node->firstChild && $node->firstChild->nodeType == XML_TEXT_NODE) {
! 2491: my $text = $node->firstChild->nodeValue;
! 2492: $text =~ s/^\n( *\n)+/\n/;
! 2493: if ($text eq '') {
! 2494: $node->removeChild($node->firstChild);
! 2495: } else {
! 2496: $node->firstChild->setData($text);
! 2497: }
! 2498: }
! 2499: if (defined $node->lastChild && $node->lastChild->nodeType == XML_TEXT_NODE) {
! 2500: my $text = $node->lastChild->nodeValue;
! 2501: $text =~ s/\n( *\n)+$/\n/;
! 2502: if ($text eq '') {
! 2503: $node->removeChild($node->lastChild);
! 2504: } else {
! 2505: $node->lastChild->setData($text);
! 2506: }
! 2507: }
! 2508: }
! 2509: }
! 2510: }
! 2511:
! 2512: sub replace_tm_dtm {
! 2513: my ($root) = @_;
! 2514: my $doc = $root->ownerDocument;
! 2515: my @elements = $root->getElementsByTagName('tm');
! 2516: push(@elements, $root->getElementsByTagName('dtm'));
! 2517: foreach my $element (@elements) {
! 2518: my $first = $element->firstChild;
! 2519: if (defined $first && $first->nodeType == XML_TEXT_NODE) {
! 2520: my $text = $first->nodeValue;
! 2521: if ($element->nodeName eq 'tm') {
! 2522: $first->setData('$'.$text.'$');
! 2523: } else {
! 2524: $first->setData('$$'.$text.'$$');
! 2525: }
! 2526: }
! 2527: $element->setNodeName('m');
! 2528: }
! 2529: }
! 2530:
! 2531:
! 2532: ######## utilities ########
! 2533:
! 2534: ##
! 2535: # Trims a string (really, this should be built-in in Perl, this is ridiculous, ugly and slow)
! 2536: # @param {string} s - the string to trim
! 2537: # @returns the trimmed string
! 2538: ##
! 2539: sub trim {
! 2540: my ($s) = @_;
! 2541: $s =~ s/^\s+//;
! 2542: $s =~ s/\s+$//;
! 2543: return($s);
! 2544: }
! 2545:
! 2546: ##
! 2547: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
! 2548: # @param {Array<string>} array - reference to the array of strings
! 2549: # @param {string} value - the string to look for
! 2550: # @returns 1 if found, 0 otherwise
! 2551: ##
! 2552: sub string_in_array {
! 2553: my ($array, $value) = @_;
! 2554: # NOTE: would it be faster to use "any { $value eq $_ } @{$array}" from List::Util ?
! 2555: foreach my $v (@{$array}) {
! 2556: if ($v eq $value) {
! 2557: return 1;
! 2558: }
! 2559: }
! 2560: return 0;
! 2561: }
! 2562:
! 2563: ##
! 2564: # Tests if an object is in an array (using ==)
! 2565: # @param {Array<Object>} array - reference to the array of references
! 2566: # @param {Object} ref - the reference to look for
! 2567: # @returns 1 if found, 0 otherwise
! 2568: ##
! 2569: sub reference_in_array {
! 2570: my ($array, $ref) = @_;
! 2571: foreach my $v (@{$array}) {
! 2572: if ($v == $ref) {
! 2573: return 1;
! 2574: }
! 2575: }
! 2576: return 0;
! 2577: }
! 2578:
! 2579: ##
! 2580: # returns the index of a string in an array
! 2581: # @param {Array<Object>} array - reference to the array of strings
! 2582: # @param {string} s - the string to look for (using eq)
! 2583: # @returns the index if found, -1 otherwise
! 2584: ##
! 2585: sub index_of_string {
! 2586: my ($array, $s) = @_;
! 2587: for (my $i=0; $i<scalar(@{$array}); $i++) {
! 2588: if ($array->[$i] eq $s) {
! 2589: return $i;
! 2590: }
! 2591: }
! 2592: return -1;
! 2593: }
! 2594:
! 2595: ##
! 2596: # returns the index of a reference in an array
! 2597: # @param {Array<Object>} array - reference to the array of references
! 2598: # @param {Object} ref - the reference to look for
! 2599: # @returns the index if found, -1 otherwise
! 2600: ##
! 2601: sub index_of_reference {
! 2602: my ($array, $ref) = @_;
! 2603: for (my $i=0; $i<scalar(@{$array}); $i++) {
! 2604: if ($array->[$i] == $ref) {
! 2605: return $i;
! 2606: }
! 2607: }
! 2608: return -1;
! 2609: }
! 2610:
! 2611: ##
! 2612: # if found, removes a string from an array, otherwise do nothing
! 2613: # @param {Array<string>} array - reference to the array of string
! 2614: # @param {string} s - the string to look for (using eq)
! 2615: ##
! 2616: sub remove_string_from_array {
! 2617: my ($array, $s) = @_;
! 2618: my $index = index_of_string($array, $s);
! 2619: if ($index != -1) {
! 2620: splice(@$array, $index, 1);
! 2621: }
! 2622: }
! 2623:
! 2624: ##
! 2625: # if found, removes a reference from an array, otherwise do nothing
! 2626: # @param {Array<Object>} array - reference to the array of references
! 2627: # @param {Object} ref - the reference to look for
! 2628: ##
! 2629: sub remove_reference_from_array {
! 2630: my ($array, $ref) = @_;
! 2631: my $index = index_of_reference($array, $ref);
! 2632: if ($index != -1) {
! 2633: splice(@$array, $index, 1);
! 2634: }
! 2635: }
! 2636:
! 2637: ##
! 2638: # replaces a node by its children
! 2639: # @param {Node} node - the DOM node
! 2640: ##
! 2641: sub replace_by_children {
! 2642: my ($node) = @_;
! 2643: my $parent = $node->parentNode;
! 2644: my $next;
! 2645: my $previous;
! 2646: for (my $child=$node->firstChild; defined $child; $child=$next) {
! 2647: $next = $child->nextSibling;
! 2648: if ((!defined $previous || !defined $next) &&
! 2649: $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
! 2650: next; # do not keep first and last whitespace nodes
! 2651: } else {
! 2652: if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
! 2653: # remove whitespace at the beginning
! 2654: my $value = $child->nodeValue;
! 2655: $value =~ s/^\s+//;
! 2656: $child->setData($value);
! 2657: }
! 2658: if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
! 2659: # and at the end
! 2660: my $value = $child->nodeValue;
! 2661: $value =~ s/\s+$//;
! 2662: $child->setData($value);
! 2663: }
! 2664: }
! 2665: $node->removeChild($child);
! 2666: $parent->insertBefore($child, $node);
! 2667: $previous = $child;
! 2668: }
! 2669: $parent->removeChild($node);
! 2670: }
! 2671:
! 2672: ##
! 2673: # returns the trimmed attribute value if the attribute exists and is not blank, undef otherwise
! 2674: # @param {Node} node - the DOM node
! 2675: # @param {string} attribute_name - the attribute name
! 2676: ##
! 2677: sub get_non_empty_attribute {
! 2678: my ($node, $attribute_name) = @_;
! 2679: my $value = $node->getAttribute($attribute_name);
! 2680: if (defined $value && $value !~ /^\s*$/) {
! 2681: $value = trim($value);
! 2682: return($value);
! 2683: }
! 2684: return(undef);
! 2685: }
! 2686:
! 2687: ##
! 2688: # Returns a CSS property value from the style attribute of the element, or undef if not defined
! 2689: # @param {Element} el - the DOM element
! 2690: # @param {string} property_name - the CSS property name
! 2691: ##
! 2692: sub get_css_property {
! 2693: my ($el, $property_name) = @_;
! 2694: my $style = $el->getAttribute('style');
! 2695: if (defined $style) {
! 2696: $style =~ s/^\s*;\s*//;
! 2697: $style =~ s/\s*;\s*$//;
! 2698: } else {
! 2699: $style = '';
! 2700: }
! 2701: my @pairs = split(';', $style);
! 2702: foreach my $pair (@pairs) {
! 2703: my @name_value = split(':', $pair);
! 2704: if (scalar(@name_value) != 2) {
! 2705: next;
! 2706: }
! 2707: my $name = trim($name_value[0]);
! 2708: my $value = trim($name_value[1]);
! 2709: if (lc($name) eq $property_name) {
! 2710: return($value); # return the first one found
! 2711: }
! 2712: }
! 2713: return(undef);
! 2714: }
! 2715:
! 2716: ##
! 2717: # Returns the reference to a hash CSS property name => value from the style attribute of the element.
! 2718: # Returns an empty list if the style attribute is not defined,
! 2719: # @param {Element} el - the DOM element
! 2720: # @return {Hash<string, string>} reference to the hash property name => property value
! 2721: ##
! 2722: sub get_css_properties {
! 2723: my ($el) = @_;
! 2724: my $style = $el->getAttribute('style');
! 2725: if (defined $style) {
! 2726: $style =~ s/^\s*;\s*//;
! 2727: $style =~ s/\s*;\s*$//;
! 2728: } else {
! 2729: $style = '';
! 2730: }
! 2731: my @pairs = split(';', $style);
! 2732: tie (my %hash, 'Tie::IxHash', ());
! 2733: foreach my $pair (@pairs) {
! 2734: my @name_value = split(':', $pair);
! 2735: if (scalar(@name_value) != 2) {
! 2736: next;
! 2737: }
! 2738: my $name = trim($name_value[0]);
! 2739: my $value = trim($name_value[1]);
! 2740: if (defined $hash{$name}) {
! 2741: # duplicate property in the style attribute: keep only the last one
! 2742: delete $hash{$name};
! 2743: }
! 2744: $hash{$name} = $value;
! 2745: }
! 2746: return(\%hash);
! 2747: }
! 2748:
! 2749: ##
! 2750: # Sets a CSS property in the style attribute of an element
! 2751: # @param {Element} el - the DOM element
! 2752: # @param {string} property_name - the CSS property name
! 2753: # @param {string} property_value - the CSS property value
! 2754: ##
! 2755: sub set_css_property {
! 2756: my ($el, $property_name, $property_value) = @_;
! 2757: my $hash_ref = { $property_name => $property_value };
! 2758: set_css_properties($el, $hash_ref);
! 2759: }
! 2760:
! 2761: ##
! 2762: # Sets several CSS properties in the style attribute of an element
! 2763: # @param {Element} el - the DOM element
! 2764: # @param {Hash<string, string>} properties - reference to the hash property name => property value
! 2765: ##
! 2766: sub set_css_properties {
! 2767: my ($el, $properties) = @_;
! 2768: my $hash = get_css_properties($el);
! 2769: foreach my $property_name (keys %$properties) {
! 2770: my $property_value = $properties->{$property_name};
! 2771: if (defined $hash->{$property_name}) {
! 2772: delete $hash->{$property_name}; # to add the new one at the end
! 2773: }
! 2774: $hash->{$property_name} = $property_value;
! 2775: }
! 2776: my $style = '';
! 2777: foreach my $key (keys %$hash) {
! 2778: $style .= $key.':'.$hash->{$key}.'; ';
! 2779: }
! 2780: $style =~ s/; $//;
! 2781: $el->setAttribute('style', $style);
! 2782: }
! 2783:
! 2784: 1;
! 2785: __END__
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>