File:  [LON-CAPA] / modules / damieng / clean_xml / clean_xml.pl
Revision 1.1: download - view: text, annotated - select for diffs
Fri Apr 17 15:35:01 2015 UTC (9 years, 5 months ago) by damieng
Branches: MAIN
CVS tags: HEAD
added clean_xml and graphical_editor

    1: #!/usr/bin/perl
    2: 
    3: use strict;
    4: use utf8;
    5: use warnings;
    6: 
    7: use File::Basename;
    8: use Try::Tiny;
    9: 
   10: use lib dirname(__FILE__);
   11: 
   12: use pre_xml;
   13: use html_to_xml;
   14: use post_xml;
   15: 
   16: 
   17: binmode(STDOUT, ':encoding(UTF-8)');
   18: 
   19: if (scalar(@ARGV) != 1) {
   20:   print STDERR "Usage: perl clean_xml.pl file|directory\n";
   21:   exit(1);
   22: }
   23: 
   24: # find the command-line argument encoding
   25: use I18N::Langinfo qw(langinfo CODESET);
   26: my $codeset = langinfo(CODESET);
   27: use Encode qw(decode);
   28: @ARGV = map { decode $codeset, $_ } @ARGV;
   29: 
   30: my $pathname = "$ARGV[0]";
   31: if (-d "$pathname") {
   32:   $pathname =~ s/\/$//;
   33:   my $start = time();
   34:   my ($converted, $failures) = convert_dir($pathname);
   35:   my $end = time();
   36:   my $elapsed = $end - $start;
   37:   my $minutes = int($elapsed / 60);
   38:   my $seconds = $elapsed - ($minutes*60);
   39:   print "\n".scalar(@$converted)." files were converted in $minutes minutes $seconds seconds\n";
   40:   if (scalar(@$failures) > 0) {
   41:     print "\n".scalar(@$failures)." files need a manual fix:\n";
   42:     foreach my $failure (@$failures) {
   43:       print "  $failure\n";
   44:     }
   45:   }
   46: } elsif (-f $pathname) {
   47:   convert_file($pathname);
   48: }
   49: 
   50: # Converts a directory recursively, selecting only non-version .problem/exam/survey/html/library files.
   51: # Returns a list of files that were converted, and a list of files that could not be converted.
   52: sub convert_dir {
   53:   my ($dirpath) = @_;
   54:   
   55:   my @converted = ();
   56:   my @failures = ();
   57:   opendir (my $dh, $dirpath) or die $!;
   58:   while (my $entry = readdir($dh)) {
   59:     next if ($entry =~ m/^\./); # ignore entries starting with a period
   60:     my $pathname = $dirpath.'/'.$entry;
   61:     if (-d $pathname) {
   62:       my ($new_converted, $new_failures) = convert_dir($pathname);
   63:       push(@converted, @$new_converted);
   64:       push(@failures, @$new_failures);
   65:     } elsif (-f $pathname) {
   66:       # check that the file ends in .problem, .exam, .survey, .html or .htm but not .number.*
   67:       if (($pathname =~ /\.problem$/i || $pathname =~ /\.exam$/i || $pathname =~ /\.survey$/i ||
   68:           $pathname =~ /\.html?$/i || $pathname =~ /\.library$/i) &&
   69:           $pathname !~ /\.[0-9]+\.[a-z]+$/) {
   70:         try {
   71:           convert_file($pathname);
   72:           push(@converted, $pathname);
   73:         } catch {
   74:           print "$_\n"; # continue processing even if a file cannot be converted
   75:           push(@failures, $pathname);
   76:         };
   77:       }
   78:     }
   79:   }
   80:   closedir($dh);
   81:   return((\@converted, \@failures));
   82: }
   83: 
   84: # Converts a file, creating a .xml file in the same directory.
   85: sub convert_file {
   86:   my ($pathname) = @_;
   87: 
   88:   # create a name for the new file
   89:   my $newpath = $pathname.'.xml';
   90: 
   91:   print "converting $pathname...\n";
   92: 
   93:   my $textref;
   94:   try {
   95:     $textref = pre_xml::pre_xml($pathname);
   96:   } catch {
   97:     die "pre_xml error for $pathname: $_";
   98:   };
   99: 
  100:   try {
  101:     $textref = html_to_xml::html_to_xml($textref);
  102:   } catch {
  103:     die "html_to_xml error for $pathname: $_";
  104:   };
  105: 
  106:   try {
  107:     post_xml::post_xml($textref, $newpath);
  108:   } catch {
  109:     die "post_xml error for $pathname: $_";
  110:   };
  111: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>