#!/usr/bin/perl
# YEAR=2002
# 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
#
###
# Read in 2 XML file; first is the filter specification, the second
# is the XML file to be filtered
###############################################################################
## ##
## ORGANIZATION OF THIS PERL SCRIPT ##
## 1. Notes ##
## 2. Get command line arguments ##
## 3. First pass through (grab distribution-specific information) ##
## 4. Second pass through (parse out what is not necessary) ##
## 5. Third pass through (translate markup according to specified mode) ##
## 6. Functions (most all just format contents of different markup tags) ##
## 7. POD (plain old documentation, CPAN style) ##
## ##
###############################################################################
# ----------------------------------------------------------------------- Notes
#
# I am using a multiple pass-through approach to parsing
# the xfml file. This saves memory and makes sure the server
# will never be overloaded.
#
# This is meant to parse files meeting the piml document type.
# See xfml.dtd. XFML=XML Filtering Markup Language.
use HTML::TokeParser;
use strict;
unless (@ARGV) {
print <<END;
Incorrect invocation.
Example usages:
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
END
}
my %eh;
my %ih;
my $tofilter=shift @ARGV;
open IN,"<$tofilter";
my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
close IN;
my $parser = HTML::TokeParser->new(\$parsestring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
# Define handling methods for mode-dependent text rendering
my %conditions; &cc;
$parser->{textify}={
xfml => \&format_xfml,
'when:name' => \&format_when_name,
'when:attribute' => \&format_when_attribute,
'when:cdata' => \&format_when_cdata,
'choice:include' => \&format_choice_include,
'choice:exclude' => \&format_choice_exclude,
};
my $text;
my $xfml;
my $wloc=0;
my %eha;
while (my $token = $parser->get_tag('xfml')) {
&format_xfml(@{$token});
$text = $parser->get_text('/xfml');
$token = $parser->get_tag('/xfml');
}
#open IN,"<$tofilter";
my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
$parser = HTML::TokeParser->new(\$parsestring2) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
my $token;
my $hloc=0;
my %ts;
my $tr;
my $echild=0;
my $exclude=0;
my $excluden=0;
my $excludea=0;
my $et=0;
my $cdata='';
my $excludenold=0;
my $ign=0;
while ($token = $parser->get_token()) {
if ($token->[0] eq 'D') {
print $token->[1];
}
elsif ($token->[0] eq 'C') {
print $token->[1];
}
elsif ($token->[0] eq 'S') {
$cdata='';
$hloc++;
# if token can be excluded, then pretend it is until all conditions are
# run (eha); then output during end tag processing
# else, output
# a token can be excluded when it is an eh key, or a child node of
# an eh key
if ($eh{$token->[1]}) {
$echild=$token->[1];
}
if ($echild) {
# run through names for echild
# then attributes and/or values and/or cdata
my $name=$token->[1];
my @attributes=@{$token->[3]};
my %atthash=%{$token->[2]};
foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
foreach my $namematch (@{$namemlist}) {
my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
if ($name=~/$nm/) {
$excludenold=$excluden;
$excluden++;
foreach my $attributemlist
(@{$eha{$echild}->{'attribute'}}) {
foreach my $attributematch
(@{$attributemlist}) {
my ($an,$am)=
split(/\=/,$attributematch,2);
$am=~s/^.//;
$am=~s/.$//;
if ($atthash{$an}) {
if ($atthash{$an}=~/$am/) {
$excludea++;
}
}
}
}
}
}
}
$tr.=$token->[4];
}
else {
print $token->[4];
}
}
elsif ($token->[0] eq 'E') {
if ($echild) {
$tr.=$token->[2];
if ($excluden) {
my $i=0;
CDATALOOP:
foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
$i++;
my $j;
foreach my $cdatamatch (@{$cdatamlist}) {
$j++;
# print "CDATA: $cdatamatch, $cdata\n";
my $cm=$cdatamatch;
my $not=0;
if ($cm=~/\!/) {
$not=1;
$cm=~s/^.//;
}
$cm=~s/^.//; $cm=~s/.$//;
if ($not and $cdata=~/$cm/) {
$ign=1; $exclude=0;
}
if ((!$not and $cdata!~/$cm/)
or ($not and $cdata=~/$cm/)) {
# nothing happens
# $exclude=0;
}
elsif (($not and $cdata!~/$cm/)
or (!$not and $cdata=~/$cm/)) {
$exclude++ unless $ign;
}
}
}
}
}
if ($eh{$token->[1]}) {
$ign=0;
$echild=0;
if (!$exclude and !$excludea) {
print $tr;
# print $token->[2];
$tr='';
}
elsif ($exclude>0 or $excludea>0) {
# print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
$exclude=0; $excluden=0; $excludea=0;
$tr='';
}
$exclude=0; $excluden=0; $excludea=0;
}
else {
if ($echild) {
# $tr.=$token->[2];
}
else {
print $token->[2];
$tr='';
}
}
$hloc--;
}
elsif ($token->[0] eq 'T') {
if ($echild) {
$tr.=$token->[1];
$cdata=$token->[1];
}
else {
print $token->[1];
$tr='';
}
}
}
# ------------------------------------------------------------ clear conditions
sub cc {
@{$conditions{'name'}}=(); pop @{$conditions{'name'}};
@{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
@{$conditions{'value'}}=(); pop @{$conditions{'value'}};
@{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
}
# --------------------------------------- remove starting and ending whitespace
sub trim {
my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}
# --------------------------------------------------------- Format xfml section
sub format_xfml {
my (@tokeninfo)=@_;
return '';
}
# ---------------------------------------------------- Format when:name section
sub format_when_name {
my (@tokeninfo)=@_;
$wloc++;
my $att_match=$tokeninfo[2]->{'match'};
push @{$conditions{'name'}},$att_match;
my $text=&trim($parser->get_text('/when:name'));
$parser->get_tag('/when:name');
$wloc--;
&cc unless $wloc;
return '';
}
# ----------------------------------------------- Format when:attribute section
sub format_when_attribute {
my (@tokeninfo)=@_;
$wloc++;
my $att_match=$tokeninfo[2]->{'match'};
push @{$conditions{'attribute'}},$att_match;
my $text=&trim($parser->get_text('/when:attribute'));
$parser->get_tag('/when:attribute');
$wloc--;
&cc unless $wloc;
return '';
}
# --------------------------------------------------- Format when:cdata section
sub format_when_cdata {
my (@tokeninfo)=@_;
$wloc++;
my $att_match=$tokeninfo[2]->{'match'};
push @{$conditions{'cdata'}},$att_match;
my $text=&trim($parser->get_text('/when:cdata'));
$parser->get_tag('/when:cdata');
$wloc--;
&cc unless $wloc;
return '';
}
# ----------------------------------------------- Format choice:include section
sub format_choice_include {
my (@tokeninfo)=@_;
my $text=&trim($parser->get_text('/choice:include'));
$parser->get_tag('/choice:include');
$ih{$tokeninfo[2]->{'match'}}++;
return '';
}
# ----------------------------------------------- Format choice:exclude section
sub format_choice_exclude {
my (@tokeninfo)=@_;
my $text=&trim($parser->get_text('/choice:exclude'));
$parser->get_tag('/choice:exclude');
$eh{$tokeninfo[2]->{'nodename'}}++;
push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
[@{$conditions{'name'}}];
push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
[@{$conditions{'attribute'}}];
push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
[@{$conditions{'value'}}];
push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
[@{$conditions{'cdata'}}];
return '';
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>