Annotation of modules/damieng/clean_xml/xml_to_loncapa.pl, revision 1.1
1.1 ! damieng 1: #!/usr/bin/perl
! 2:
! 3: # This takes a well-formed XML file as input, and converts it to LON-CAPA syntax.
! 4:
! 5: use strict;
! 6: use utf8;
! 7: use warnings;
! 8:
! 9: use XML::LibXML;
! 10:
! 11: binmode(STDOUT, ':encoding(UTF-8)');
! 12:
! 13: if (scalar(@ARGV) != 1) {
! 14: print STDERR "Usage: perl xml_to_loncapa.pl file.xml\n";
! 15: exit(1);
! 16: }
! 17:
! 18: # find the command-line argument encoding
! 19: use I18N::Langinfo qw(langinfo CODESET);
! 20: my $codeset = langinfo(CODESET);
! 21: use Encode qw(decode);
! 22: @ARGV = map { decode $codeset, $_ } @ARGV;
! 23:
! 24: my $pathname = "$ARGV[0]";
! 25: if (-f $pathname) {
! 26: convert_file($pathname);
! 27: }
! 28:
! 29: # Converts a file, creating a .loncapa file in the same directory.
! 30: # TODO: use the right extension based on content (or just ouput content)
! 31: sub convert_file {
! 32: my ($pathname) = @_;
! 33:
! 34: # create a name for the new file
! 35: my $newpath = $pathname.'.loncapa';
! 36:
! 37: print "converting $pathname...\n";
! 38:
! 39: my $dom_doc = XML::LibXML->load_xml(location => $pathname);
! 40:
! 41: open my $out, '>:encoding(UTF-8)', $newpath;
! 42: print $out node_to_string($dom_doc);
! 43: close $out;
! 44: }
! 45:
! 46: sub node_to_string {
! 47: my ($node) = @_;
! 48:
! 49: if ($node->nodeType == XML_DOCUMENT_NODE) {
! 50: my $root = $node->documentElement();
! 51: return node_to_string($root);
! 52: } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
! 53: my $parent = $node->parentNode;
! 54: my $parent_name = $parent->nodeName;
! 55: my $grandparent_name;
! 56: if (defined $parent->parentNode) {
! 57: $grandparent_name = $parent->parentNode->nodeName;
! 58: }
! 59: my @no_escape = ('m', 'script', 'display', 'parse', 'answer');
! 60: if (string_in_array(\@no_escape, $parent_name) &&
! 61: ($parent_name ne 'answer' ||
! 62: (defined $grandparent_name &&
! 63: $grandparent_name ne 'numericalresponse' &&
! 64: $grandparent_name ne 'formularesponse'))) {
! 65: return $node->nodeValue;
! 66: } else {
! 67: return $node->toString();
! 68: }
! 69: } elsif ($node->nodeType == XML_ELEMENT_NODE) {
! 70: my $s = '';
! 71: my $tag = $node->nodeName;
! 72: $s .= "<$tag";
! 73: my @attributes = $node->attributes();
! 74: foreach my $attribute (@attributes) {
! 75: $s .= ' ';
! 76: $s .= $attribute->nodeName;
! 77: $s .= '="';
! 78: $s .= escape($attribute->nodeValue);
! 79: $s .= '"';
! 80: }
! 81: if ($node->hasChildNodes()) {
! 82: $s .= '>';
! 83: foreach my $child ($node->childNodes) {
! 84: $s .= node_to_string($child);
! 85: }
! 86: $s .= "</$tag>";
! 87: } else {
! 88: $s .= '/>';
! 89: }
! 90: return $s;
! 91: } else {
! 92: return $node->toString();
! 93: }
! 94: }
! 95:
! 96: # Escapes a string for LON-CAPA output (used for text nodes, not attribute values)
! 97: sub escape {
! 98: my ($s) = @_;
! 99: $s =~ s/&/&/sg;
! 100: $s =~ s/</</sg;
! 101: $s =~ s/>/>/sg;
! 102: # quot and apos do not need to be escaped outside attribute values
! 103: return $s;
! 104: }
! 105:
! 106: ##
! 107: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
! 108: # @param {Array<string>} array - reference to the array of strings
! 109: # @param {string} value - the string to look for
! 110: # @returns 1 if found, 0 otherwise
! 111: ##
! 112: sub string_in_array {
! 113: my ($array, $value) = @_;
! 114: foreach my $v (@{$array}) {
! 115: if ($v eq $value) {
! 116: return 1;
! 117: }
! 118: }
! 119: return 0;
! 120: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>