#!/usr/bin/perl
use strict;
use utf8;
use warnings;
use File::Basename;
use Try::Tiny;
use lib dirname(__FILE__);
use pre_xml;
use html_to_xml;
use post_xml;
binmode(STDOUT, ':encoding(UTF-8)');
if (scalar(@ARGV) != 1) {
print STDERR "Usage: perl clean_xml.pl file|directory\n";
exit(1);
}
# find the command-line argument encoding
use I18N::Langinfo qw(langinfo CODESET);
my $codeset = langinfo(CODESET);
use Encode qw(decode);
@ARGV = map { decode $codeset, $_ } @ARGV;
my $pathname = "$ARGV[0]";
if (-d "$pathname") {
$pathname =~ s/\/$//;
my $start = time();
my ($converted, $failures) = convert_dir($pathname);
my $end = time();
my $elapsed = $end - $start;
my $minutes = int($elapsed / 60);
my $seconds = $elapsed - ($minutes*60);
print "\n".scalar(@$converted)." files were converted in $minutes minutes $seconds seconds\n";
if (scalar(@$failures) > 0) {
print "\n".scalar(@$failures)." files need a manual fix:\n";
foreach my $failure (@$failures) {
print " $failure\n";
}
}
} elsif (-f $pathname) {
convert_file($pathname);
}
# Converts a directory recursively, selecting only non-version .problem/exam/survey/html/library files.
# Returns a list of files that were converted, and a list of files that could not be converted.
sub convert_dir {
my ($dirpath) = @_;
my @converted = ();
my @failures = ();
opendir (my $dh, $dirpath) or die $!;
while (my $entry = readdir($dh)) {
next if ($entry =~ m/^\./); # ignore entries starting with a period
my $pathname = $dirpath.'/'.$entry;
if (-d $pathname) {
my ($new_converted, $new_failures) = convert_dir($pathname);
push(@converted, @$new_converted);
push(@failures, @$new_failures);
} elsif (-f $pathname) {
# check that the file ends in .problem, .exam, .survey, .html or .htm but not .number.*
if (($pathname =~ /\.problem$/i || $pathname =~ /\.exam$/i || $pathname =~ /\.survey$/i ||
$pathname =~ /\.html?$/i || $pathname =~ /\.library$/i) &&
$pathname !~ /\.[0-9]+\.[a-z]+$/) {
try {
convert_file($pathname);
push(@converted, $pathname);
} catch {
print "$_\n"; # continue processing even if a file cannot be converted
push(@failures, $pathname);
};
}
}
}
closedir($dh);
return((\@converted, \@failures));
}
# Converts a file, creating a .xml file in the same directory.
sub convert_file {
my ($pathname) = @_;
# create a name for the new file
my $newpath = $pathname.'.xml';
print "converting $pathname...\n";
my $textref;
try {
$textref = pre_xml::pre_xml($pathname);
} catch {
die "pre_xml error for $pathname: $_";
};
try {
$textref = html_to_xml::html_to_xml($textref);
} catch {
die "html_to_xml error for $pathname: $_";
};
try {
post_xml::post_xml($textref, $newpath);
} catch {
die "post_xml error for $pathname: $_";
};
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>