Annotation of loncom/homework/cleanxml/xml_to_loncapa.pm, revision 1.1
1.1 ! damieng 1: # The LearningOnline Network
! 2: # convert_file takes a well-formed XML file content and converts it to LON-CAPA syntax.
! 3: #
! 4: # $Id$
! 5: #
! 6: # Copyright Michigan State University Board of Trustees
! 7: #
! 8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
! 9: #
! 10: # LON-CAPA is free software; you can redistribute it and/or modify
! 11: # it under the terms of the GNU General Public License as published by
! 12: # the Free Software Foundation; either version 2 of the License, or
! 13: # (at your option) any later version.
! 14: #
! 15: # LON-CAPA is distributed in the hope that it will be useful,
! 16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
! 17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 18: # GNU General Public License for more details.
! 19: #
! 20: # You should have received a copy of the GNU General Public License
! 21: # along with LON-CAPA; if not, write to the Free Software
! 22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
! 23: #
! 24: # /home/httpd/html/adm/gpl.txt
! 25: #
! 26: # http://www.lon-capa.org/
! 27: #
! 28: ###
! 29:
! 30: #!/usr/bin/perl
! 31:
! 32: package Apache::xml_to_loncapa;
! 33:
! 34: use strict;
! 35: use utf8;
! 36: use warnings;
! 37:
! 38: use XML::LibXML;
! 39:
! 40:
! 41: my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
! 42:
! 43: my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline
! 44:
! 45: # HTML elements that trigger the addition of startouttext/endouttext
! 46: my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
! 47:
! 48: my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
! 49:
! 50: my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
! 51:
! 52:
! 53: # Converts a file and return the modified contents
! 54: sub convert_file {
! 55: my ($contents) = @_;
! 56:
! 57: my $dom_doc = XML::LibXML->load_xml(string => $contents);
! 58: add_outtext($dom_doc);
! 59: return node_to_string($dom_doc);
! 60: }
! 61:
! 62:
! 63: sub node_to_string {
! 64: my ($node) = @_;
! 65:
! 66: if ($node->nodeType == XML_DOCUMENT_NODE) {
! 67: my $root = $node->documentElement();
! 68: return node_to_string($root);
! 69: } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
! 70: my $parent = $node->parentNode;
! 71: my $parent_name = $parent->nodeName;
! 72: my $grandparent_name;
! 73: if (defined $parent->parentNode) {
! 74: $grandparent_name = $parent->parentNode->nodeName;
! 75: }
! 76: my @no_escape = ('m', 'script', 'display', 'parse', 'answer');
! 77: if (string_in_array(\@no_escape, $parent_name) &&
! 78: ($parent_name ne 'answer' ||
! 79: (defined $grandparent_name &&
! 80: $grandparent_name ne 'numericalresponse' &&
! 81: $grandparent_name ne 'formularesponse'))) {
! 82: return $node->nodeValue;
! 83: } else {
! 84: return $node->toString();
! 85: }
! 86: } elsif ($node->nodeType == XML_ELEMENT_NODE) {
! 87: my $s = '';
! 88: my $tag = $node->nodeName;
! 89: $s .= "<$tag";
! 90: my @attributes = $node->attributes();
! 91: foreach my $attribute (@attributes) {
! 92: $s .= ' ';
! 93: $s .= $attribute->nodeName;
! 94: $s .= '="';
! 95: $s .= escape($attribute->nodeValue);
! 96: $s .= '"';
! 97: }
! 98: if ($node->hasChildNodes()) {
! 99: $s .= '>';
! 100: foreach my $child ($node->childNodes) {
! 101: $s .= node_to_string($child);
! 102: }
! 103: $s .= "</$tag>";
! 104: } else {
! 105: $s .= '/>';
! 106: }
! 107: return $s;
! 108: } else {
! 109: return $node->toString();
! 110: }
! 111: }
! 112:
! 113: # Escapes a string for LON-CAPA output (used for text nodes, not attribute values)
! 114: sub escape {
! 115: my ($s) = @_;
! 116: $s =~ s/&/&/sg;
! 117: $s =~ s/</</sg;
! 118: $s =~ s/>/>/sg;
! 119: # quot and apos do not need to be escaped outside attribute values
! 120: return $s;
! 121: }
! 122:
! 123: # Adds startouttext and endouttext where useful for the colorful editor
! 124: sub add_outtext {
! 125: my ($node) = @_;
! 126:
! 127: if ($node->nodeType == XML_DOCUMENT_NODE) {
! 128: my $root = $node->documentElement();
! 129: add_outtext($root);
! 130: return;
! 131: }
! 132: if ($node->nodeType != XML_ELEMENT_NODE) {
! 133: return;
! 134: }
! 135: if (string_in_array(\@simple_data, $node->nodeName)) {
! 136: return;
! 137: }
! 138: convert_paragraphs($node);
! 139: my $next;
! 140: my $in_outtext = 0;
! 141: for (my $child=$node->firstChild; defined $child; $child=$next) {
! 142: $next = $child->nextSibling;
! 143: if (!$in_outtext && inside_outtext($child)) {
! 144: add_startouttext($node, $child);
! 145: $in_outtext = 1;
! 146: } elsif ($in_outtext && !continue_outtext($child)) {
! 147: add_endouttext($node, $child);
! 148: $in_outtext = 0;
! 149: }
! 150: if (!$in_outtext) {
! 151: add_outtext($child);
! 152: }
! 153: }
! 154: if ($in_outtext) {
! 155: add_endouttext($node);
! 156: }
! 157: }
! 158:
! 159: # Returns 1 if this node should trigger the addition of startouttext before it
! 160: sub inside_outtext {
! 161: my ($node) = @_;
! 162: if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
! 163: return 1;
! 164: }
! 165: if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) {
! 166: if (contains_loncapa_block($node)) {
! 167: return 0;
! 168: } else {
! 169: return 1;
! 170: }
! 171: }
! 172: if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {
! 173: return 1;
! 174: }
! 175: return 0;
! 176: }
! 177:
! 178: # Returns 1 if the outtext environment can continue with this node
! 179: sub continue_outtext {
! 180: my ($node) = @_;
! 181: if (inside_outtext($node)) {
! 182: return 1;
! 183: }
! 184: if ($node->nodeType == XML_TEXT_NODE) {
! 185: return 1; # continue even if this is just spaces
! 186: }
! 187: return 0;
! 188: }
! 189:
! 190: # Returns 1 if the node contains a LON-CAPA block in a descendant.
! 191: sub contains_loncapa_block {
! 192: my ($node) = @_;
! 193: foreach my $child ($node->childNodes) {
! 194: if ($child->nodeType == XML_ELEMENT_NODE) {
! 195: if (string_in_array(\@loncapa_block, $child->nodeName)) {
! 196: return 1;
! 197: }
! 198: if (contains_loncapa_block($child)) {
! 199: return 1;
! 200: }
! 201: }
! 202: }
! 203: return 0;
! 204: }
! 205:
! 206: sub add_startouttext {
! 207: my ($parent, $before_node) = @_;
! 208: my $doc = $parent->ownerDocument;
! 209: if ($before_node->nodeType == XML_TEXT_NODE) {
! 210: # split space at the beginning of the node
! 211: if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) {
! 212: my $space_node = $doc->createTextNode($1);
! 213: $before_node->setData($2);
! 214: $parent->insertBefore($space_node, $before_node);
! 215: }
! 216: }
! 217: my $startouttext = $doc->createElement('startouttext');
! 218: $parent->insertBefore($startouttext, $before_node);
! 219: }
! 220:
! 221: sub add_endouttext {
! 222: my ($parent, $before_node) = @_;
! 223: my $doc = $parent->ownerDocument;
! 224: my $endouttext = $doc->createElement('endouttext');
! 225: my $before_before;
! 226: if (defined $before_node) {
! 227: $before_before = $before_node->previousSibling;
! 228: } else {
! 229: $before_before = $parent->lastChild;
! 230: }
! 231: if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
! 232: # split space at the end of the node
! 233: if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
! 234: $before_before->setData($1);
! 235: my $space_node = $doc->createTextNode($2);
! 236: if (defined $before_node) {
! 237: $parent->insertBefore($space_node, $before_node);
! 238: } else {
! 239: $parent->appendChild($space_node);
! 240: }
! 241: $before_node = $space_node;
! 242: }
! 243: }
! 244: if (defined $before_node) {
! 245: $parent->insertBefore($endouttext, $before_node);
! 246: } else {
! 247: $parent->appendChild($endouttext);
! 248: }
! 249: }
! 250:
! 251: # Convert paragraph children when one contains an inline response into content + <br>
! 252: # (the colorful editor does not support paragraphs containing inline responses)
! 253: sub convert_paragraphs {
! 254: my ($parent) = @_;
! 255: my $p_child_with_inline_response = 0;
! 256: foreach my $child ($parent->childNodes) {
! 257: if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
! 258: foreach my $child2 ($child->childNodes) {
! 259: if ($child2->nodeType == XML_ELEMENT_NODE) {
! 260: if (string_in_array(\@inline_responses, $child2->nodeName)) {
! 261: $p_child_with_inline_response = 1;
! 262: last;
! 263: }
! 264: }
! 265: }
! 266: }
! 267: if ($p_child_with_inline_response) {
! 268: last;
! 269: }
! 270: }
! 271: if ($p_child_with_inline_response) {
! 272: my $doc = $parent->ownerDocument;
! 273: my $next;
! 274: for (my $child=$parent->firstChild; defined $child; $child=$next) {
! 275: $next = $child->nextSibling;
! 276: if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
! 277: replace_by_children($child);
! 278: if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE ||
! 279: $next->nodeValue !~ /^\s*$/)) {
! 280: # we only add a br if there is something after
! 281: my $br = $doc->createElement('br');
! 282: $parent->insertBefore($br, $next);
! 283: }
! 284: }
! 285: }
! 286: }
! 287: }
! 288:
! 289: ##
! 290: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
! 291: # @param {Array<string>} array - reference to the array of strings
! 292: # @param {string} value - the string to look for
! 293: # @returns 1 if found, 0 otherwise
! 294: ##
! 295: sub string_in_array {
! 296: my ($array, $value) = @_;
! 297: foreach my $v (@{$array}) {
! 298: if ($v eq $value) {
! 299: return 1;
! 300: }
! 301: }
! 302: return 0;
! 303: }
! 304:
! 305: ##
! 306: # replaces a node by its children
! 307: # @param {Node} node - the DOM node
! 308: ##
! 309: sub replace_by_children {
! 310: my ($node) = @_;
! 311: my $parent = $node->parentNode;
! 312: my $next;
! 313: my $previous;
! 314: for (my $child=$node->firstChild; defined $child; $child=$next) {
! 315: $next = $child->nextSibling;
! 316: if ((!defined $previous || !defined $next) &&
! 317: $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
! 318: next; # do not keep first and last whitespace nodes
! 319: } else {
! 320: if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
! 321: # remove whitespace at the beginning
! 322: my $value = $child->nodeValue;
! 323: $value =~ s/^\s+//;
! 324: $child->setData($value);
! 325: }
! 326: if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
! 327: # and at the end
! 328: my $value = $child->nodeValue;
! 329: $value =~ s/\s+$//;
! 330: $child->setData($value);
! 331: }
! 332: }
! 333: $node->removeChild($child);
! 334: $parent->insertBefore($child, $node);
! 335: $previous = $child;
! 336: }
! 337: $parent->removeChild($node);
! 338: }
! 339:
! 340: 1;
! 341: __END__
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>