Annotation of modules/damieng/clean_xml/clean_xml.pl, revision 1.1
1.1 ! damieng 1: #!/usr/bin/perl
! 2:
! 3: use strict;
! 4: use utf8;
! 5: use warnings;
! 6:
! 7: use File::Basename;
! 8: use Try::Tiny;
! 9:
! 10: use lib dirname(__FILE__);
! 11:
! 12: use pre_xml;
! 13: use html_to_xml;
! 14: use post_xml;
! 15:
! 16:
! 17: binmode(STDOUT, ':encoding(UTF-8)');
! 18:
! 19: if (scalar(@ARGV) != 1) {
! 20: print STDERR "Usage: perl clean_xml.pl file|directory\n";
! 21: exit(1);
! 22: }
! 23:
! 24: # find the command-line argument encoding
! 25: use I18N::Langinfo qw(langinfo CODESET);
! 26: my $codeset = langinfo(CODESET);
! 27: use Encode qw(decode);
! 28: @ARGV = map { decode $codeset, $_ } @ARGV;
! 29:
! 30: my $pathname = "$ARGV[0]";
! 31: if (-d "$pathname") {
! 32: $pathname =~ s/\/$//;
! 33: my $start = time();
! 34: my ($converted, $failures) = convert_dir($pathname);
! 35: my $end = time();
! 36: my $elapsed = $end - $start;
! 37: my $minutes = int($elapsed / 60);
! 38: my $seconds = $elapsed - ($minutes*60);
! 39: print "\n".scalar(@$converted)." files were converted in $minutes minutes $seconds seconds\n";
! 40: if (scalar(@$failures) > 0) {
! 41: print "\n".scalar(@$failures)." files need a manual fix:\n";
! 42: foreach my $failure (@$failures) {
! 43: print " $failure\n";
! 44: }
! 45: }
! 46: } elsif (-f $pathname) {
! 47: convert_file($pathname);
! 48: }
! 49:
! 50: # Converts a directory recursively, selecting only non-version .problem/exam/survey/html/library files.
! 51: # Returns a list of files that were converted, and a list of files that could not be converted.
! 52: sub convert_dir {
! 53: my ($dirpath) = @_;
! 54:
! 55: my @converted = ();
! 56: my @failures = ();
! 57: opendir (my $dh, $dirpath) or die $!;
! 58: while (my $entry = readdir($dh)) {
! 59: next if ($entry =~ m/^\./); # ignore entries starting with a period
! 60: my $pathname = $dirpath.'/'.$entry;
! 61: if (-d $pathname) {
! 62: my ($new_converted, $new_failures) = convert_dir($pathname);
! 63: push(@converted, @$new_converted);
! 64: push(@failures, @$new_failures);
! 65: } elsif (-f $pathname) {
! 66: # check that the file ends in .problem, .exam, .survey, .html or .htm but not .number.*
! 67: if (($pathname =~ /\.problem$/i || $pathname =~ /\.exam$/i || $pathname =~ /\.survey$/i ||
! 68: $pathname =~ /\.html?$/i || $pathname =~ /\.library$/i) &&
! 69: $pathname !~ /\.[0-9]+\.[a-z]+$/) {
! 70: try {
! 71: convert_file($pathname);
! 72: push(@converted, $pathname);
! 73: } catch {
! 74: print "$_\n"; # continue processing even if a file cannot be converted
! 75: push(@failures, $pathname);
! 76: };
! 77: }
! 78: }
! 79: }
! 80: closedir($dh);
! 81: return((\@converted, \@failures));
! 82: }
! 83:
! 84: # Converts a file, creating a .xml file in the same directory.
! 85: sub convert_file {
! 86: my ($pathname) = @_;
! 87:
! 88: # create a name for the new file
! 89: my $newpath = $pathname.'.xml';
! 90:
! 91: print "converting $pathname...\n";
! 92:
! 93: my $textref;
! 94: try {
! 95: $textref = pre_xml::pre_xml($pathname);
! 96: } catch {
! 97: die "pre_xml error for $pathname: $_";
! 98: };
! 99:
! 100: try {
! 101: $textref = html_to_xml::html_to_xml($textref);
! 102: } catch {
! 103: die "html_to_xml error for $pathname: $_";
! 104: };
! 105:
! 106: try {
! 107: post_xml::post_xml($textref, $newpath);
! 108: } catch {
! 109: die "post_xml error for $pathname: $_";
! 110: };
! 111: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>