Annotation of loncom/homework/cleanxml/xml_to_loncapa.pm, revision 1.3
1.1 damieng 1: # The LearningOnline Network
2: # convert_file takes a well-formed XML file content and converts it to LON-CAPA syntax.
3: #
1.3 ! damieng 4: # $Id: xml_to_loncapa.pm,v 1.2 2015/12/18 22:08:51 damieng Exp $
1.1 damieng 5: #
6: # Copyright Michigan State University Board of Trustees
7: #
8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
9: #
10: # LON-CAPA is free software; you can redistribute it and/or modify
11: # it under the terms of the GNU General Public License as published by
12: # the Free Software Foundation; either version 2 of the License, or
13: # (at your option) any later version.
14: #
15: # LON-CAPA is distributed in the hope that it will be useful,
16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18: # GNU General Public License for more details.
19: #
20: # You should have received a copy of the GNU General Public License
21: # along with LON-CAPA; if not, write to the Free Software
22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23: #
24: # /home/httpd/html/adm/gpl.txt
25: #
26: # http://www.lon-capa.org/
27: #
28: ###
29:
30: #!/usr/bin/perl
31:
32: package Apache::xml_to_loncapa;
33:
34: use strict;
35: use utf8;
36: use warnings;
37:
38: use XML::LibXML;
39:
40:
41: my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
42:
43: my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline
44:
45: # HTML elements that trigger the addition of startouttext/endouttext
46: my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
47:
48: my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
49:
50: my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
51:
52:
53: # Converts a file and return the modified contents
54: sub convert_file {
55: my ($contents) = @_;
56:
57: my $dom_doc = XML::LibXML->load_xml(string => $contents);
58: add_outtext($dom_doc);
59: return node_to_string($dom_doc);
60: }
61:
62:
63: sub node_to_string {
64: my ($node) = @_;
65:
66: if ($node->nodeType == XML_DOCUMENT_NODE) {
67: my $root = $node->documentElement();
68: return node_to_string($root);
69: } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
70: my $parent = $node->parentNode;
71: my $parent_name = $parent->nodeName;
72: my $grandparent_name;
73: if (defined $parent->parentNode) {
74: $grandparent_name = $parent->parentNode->nodeName;
75: }
76: my @no_escape = ('m', 'script', 'display', 'parse', 'answer');
77: if (string_in_array(\@no_escape, $parent_name) &&
78: ($parent_name ne 'answer' ||
79: (defined $grandparent_name &&
80: $grandparent_name ne 'numericalresponse' &&
81: $grandparent_name ne 'formularesponse'))) {
82: return $node->nodeValue;
83: } else {
84: return $node->toString();
85: }
86: } elsif ($node->nodeType == XML_ELEMENT_NODE) {
87: my $s = '';
88: my $tag = $node->nodeName;
89: $s .= "<$tag";
90: my @attributes = $node->attributes();
91: foreach my $attribute (@attributes) {
92: $s .= ' ';
93: $s .= $attribute->nodeName;
94: $s .= '="';
1.2 damieng 95: $s .= escape_attribute($attribute->nodeValue);
1.1 damieng 96: $s .= '"';
97: }
1.3 ! damieng 98: if ($node->hasChildNodes() || $tag eq 'script') {
1.1 damieng 99: $s .= '>';
100: foreach my $child ($node->childNodes) {
101: $s .= node_to_string($child);
102: }
103: $s .= "</$tag>";
104: } else {
105: $s .= '/>';
106: }
107: return $s;
108: } else {
109: return $node->toString();
110: }
111: }
112:
1.2 damieng 113: # Escapes an attribute value
114: sub escape_attribute {
1.1 damieng 115: my ($s) = @_;
1.2 damieng 116: # normal XML escapes do not work with LON-CAPA, for instance with reactionresponse
117: #$s =~ s/&/&/sg;
118: #$s =~ s/</</sg;
119: #$s =~ s/>/>/sg;
120: $s =~ s/"/"/sg;
1.1 damieng 121: return $s;
122: }
123:
124: # Adds startouttext and endouttext where useful for the colorful editor
125: sub add_outtext {
126: my ($node) = @_;
127:
128: if ($node->nodeType == XML_DOCUMENT_NODE) {
129: my $root = $node->documentElement();
130: add_outtext($root);
131: return;
132: }
133: if ($node->nodeType != XML_ELEMENT_NODE) {
134: return;
135: }
136: if (string_in_array(\@simple_data, $node->nodeName)) {
137: return;
138: }
139: convert_paragraphs($node);
140: my $next;
141: my $in_outtext = 0;
142: for (my $child=$node->firstChild; defined $child; $child=$next) {
143: $next = $child->nextSibling;
144: if (!$in_outtext && inside_outtext($child)) {
145: add_startouttext($node, $child);
146: $in_outtext = 1;
147: } elsif ($in_outtext && !continue_outtext($child)) {
148: add_endouttext($node, $child);
149: $in_outtext = 0;
150: }
151: if (!$in_outtext) {
152: add_outtext($child);
153: }
154: }
155: if ($in_outtext) {
156: add_endouttext($node);
157: }
158: }
159:
160: # Returns 1 if this node should trigger the addition of startouttext before it
161: sub inside_outtext {
162: my ($node) = @_;
163: if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
164: return 1;
165: }
166: if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) {
167: if (contains_loncapa_block($node)) {
168: return 0;
169: } else {
170: return 1;
171: }
172: }
173: if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {
174: return 1;
175: }
176: return 0;
177: }
178:
179: # Returns 1 if the outtext environment can continue with this node
180: sub continue_outtext {
181: my ($node) = @_;
182: if (inside_outtext($node)) {
183: return 1;
184: }
185: if ($node->nodeType == XML_TEXT_NODE) {
186: return 1; # continue even if this is just spaces
187: }
188: return 0;
189: }
190:
191: # Returns 1 if the node contains a LON-CAPA block in a descendant.
192: sub contains_loncapa_block {
193: my ($node) = @_;
194: foreach my $child ($node->childNodes) {
195: if ($child->nodeType == XML_ELEMENT_NODE) {
196: if (string_in_array(\@loncapa_block, $child->nodeName)) {
197: return 1;
198: }
199: if (contains_loncapa_block($child)) {
200: return 1;
201: }
202: }
203: }
204: return 0;
205: }
206:
207: sub add_startouttext {
208: my ($parent, $before_node) = @_;
209: my $doc = $parent->ownerDocument;
210: if ($before_node->nodeType == XML_TEXT_NODE) {
211: # split space at the beginning of the node
212: if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) {
213: my $space_node = $doc->createTextNode($1);
214: $before_node->setData($2);
215: $parent->insertBefore($space_node, $before_node);
216: }
217: }
218: my $startouttext = $doc->createElement('startouttext');
219: $parent->insertBefore($startouttext, $before_node);
220: }
221:
222: sub add_endouttext {
223: my ($parent, $before_node) = @_;
224: my $doc = $parent->ownerDocument;
225: my $endouttext = $doc->createElement('endouttext');
226: my $before_before;
227: if (defined $before_node) {
228: $before_before = $before_node->previousSibling;
229: } else {
230: $before_before = $parent->lastChild;
231: }
232: if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
233: # split space at the end of the node
234: if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
235: $before_before->setData($1);
236: my $space_node = $doc->createTextNode($2);
237: if (defined $before_node) {
238: $parent->insertBefore($space_node, $before_node);
239: } else {
240: $parent->appendChild($space_node);
241: }
242: $before_node = $space_node;
243: }
244: }
245: if (defined $before_node) {
246: $parent->insertBefore($endouttext, $before_node);
247: } else {
248: $parent->appendChild($endouttext);
249: }
250: }
251:
252: # Convert paragraph children when one contains an inline response into content + <br>
253: # (the colorful editor does not support paragraphs containing inline responses)
254: sub convert_paragraphs {
255: my ($parent) = @_;
256: my $p_child_with_inline_response = 0;
257: foreach my $child ($parent->childNodes) {
258: if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
259: foreach my $child2 ($child->childNodes) {
260: if ($child2->nodeType == XML_ELEMENT_NODE) {
261: if (string_in_array(\@inline_responses, $child2->nodeName)) {
262: $p_child_with_inline_response = 1;
263: last;
264: }
265: }
266: }
267: }
268: if ($p_child_with_inline_response) {
269: last;
270: }
271: }
272: if ($p_child_with_inline_response) {
273: my $doc = $parent->ownerDocument;
274: my $next;
275: for (my $child=$parent->firstChild; defined $child; $child=$next) {
276: $next = $child->nextSibling;
277: if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
278: replace_by_children($child);
279: if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE ||
280: $next->nodeValue !~ /^\s*$/)) {
281: # we only add a br if there is something after
282: my $br = $doc->createElement('br');
283: $parent->insertBefore($br, $next);
284: }
285: }
286: }
287: }
288: }
289:
290: ##
291: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
292: # @param {Array<string>} array - reference to the array of strings
293: # @param {string} value - the string to look for
294: # @returns 1 if found, 0 otherwise
295: ##
296: sub string_in_array {
297: my ($array, $value) = @_;
298: foreach my $v (@{$array}) {
299: if ($v eq $value) {
300: return 1;
301: }
302: }
303: return 0;
304: }
305:
306: ##
307: # replaces a node by its children
308: # @param {Node} node - the DOM node
309: ##
310: sub replace_by_children {
311: my ($node) = @_;
312: my $parent = $node->parentNode;
313: my $next;
314: my $previous;
315: for (my $child=$node->firstChild; defined $child; $child=$next) {
316: $next = $child->nextSibling;
317: if ((!defined $previous || !defined $next) &&
318: $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
319: next; # do not keep first and last whitespace nodes
320: } else {
321: if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
322: # remove whitespace at the beginning
323: my $value = $child->nodeValue;
324: $value =~ s/^\s+//;
325: $child->setData($value);
326: }
327: if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
328: # and at the end
329: my $value = $child->nodeValue;
330: $value =~ s/\s+$//;
331: $child->setData($value);
332: }
333: }
334: $node->removeChild($child);
335: $parent->insertBefore($child, $node);
336: $previous = $child;
337: }
338: $parent->removeChild($node);
339: }
340:
341: 1;
342: __END__
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>