version 1.1, 2002/01/29 10:42:42
|
version 1.4, 2002/04/08 10:52:24
|
Line 1
|
Line 1
|
#!/usr/bin/perl |
#!/usr/bin/perl |
|
|
|
# -------------------------------------------------------- Documentation notice |
|
# Run "perldoc ./lpml_parse.pl" in order to best view the software |
|
# documentation internalized in this program. |
|
|
|
# --------------------------------------------------------- License Information |
|
# The LearningOnline Network with CAPA |
|
# piml_parse.pl - Linux Packaging Markup Language parser |
|
# |
|
# $Id$ |
|
# |
|
# Written by Scott Harrison, codeharrison@yahoo.com |
|
# |
|
# Copyright Michigan State University Board of Trustees |
|
# |
|
# This file is part of the LearningOnline Network with CAPA (LON-CAPA). |
|
# |
|
# LON-CAPA is free software; you can redistribute it and/or modify |
|
# it under the terms of the GNU General Public License as published by |
|
# the Free Software Foundation; either version 2 of the License, or |
|
# (at your option) any later version. |
|
# |
|
# LON-CAPA is distributed in the hope that it will be useful, |
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
# GNU General Public License for more details. |
|
# |
|
# You should have received a copy of the GNU General Public License |
|
# along with LON-CAPA; if not, write to the Free Software |
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
# |
|
# /home/httpd/html/adm/gpl.txt |
|
# |
|
# http://www.lon-capa.org/ |
|
# |
# YEAR=2002 |
# YEAR=2002 |
# 1/26,1/27,1/28 - Scott Harrison |
# 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison |
|
# |
|
### |
|
|
# Read in 2 XML file; first is the filter specification, the second |
# Read in 2 XML file; first is the filter specification, the second |
# is the XML file to be filtered |
# is the XML file to be filtered |
|
|
|
############################################################################### |
|
## ## |
|
## ORGANIZATION OF THIS PERL SCRIPT ## |
|
## 1. Notes ## |
|
## 2. Read in filter file ## |
|
## 3. Initialize and clear conditions ## |
|
## 4. Run through and apply clauses ## |
|
## ## |
|
############################################################################### |
|
|
|
# ----------------------------------------------------------------------- Notes |
|
# |
|
# This is meant to parse files meeting the xfml document type. |
|
# See xfml.dtd. XFML=XML Filtering Markup Language. |
|
|
use HTML::TokeParser; |
use HTML::TokeParser; |
use strict; |
use strict; |
|
|
unless (@ARGV) { |
unless (@ARGV) { |
print <<END; |
print(<<END); |
Incorrect invocation. |
Incorrect invocation. |
Example usages: |
Example usages: |
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml |
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml |
Line 19 END
|
Line 70 END
|
} |
} |
|
|
my %eh; |
my %eh; |
my %ih; |
|
|
# ---------------------------------------------- Read in filter file from @ARGV |
my $tofilter=shift @ARGV; |
my $tofilter=shift @ARGV; |
my @lines=<>; my $parsestring=join('',@lines); undef @lines; |
open(IN,"<$tofilter"); my @lines=<IN>; |
|
my $parsestring=join('',@lines); undef @lines; close IN; |
my $parser = HTML::TokeParser->new(\$parsestring) or |
my $parser = HTML::TokeParser->new(\$parsestring) or |
die('can\'t create TokeParser object'); |
die('can\'t create TokeParser object'); |
$parser->xml_mode('1'); |
$parser->xml_mode('1'); |
|
|
# Define handling methods for mode-dependent text rendering |
# --------------------------------------------- initialize and clear conditions |
|
|
my %conditions; &cc; |
my %conditions; &cc; |
|
|
|
# Define handling methods for mode-dependent text rendering |
$parser->{textify}={ |
$parser->{textify}={ |
xfml => \&format_xfml, |
'xfml' => \&format_xfml, |
'when:name' => \&format_when_name, |
'when:name' => \&format_when_name, |
'when:attribute' => \&format_when_attribute, |
'when:attribute' => \&format_when_attribute, |
'when:cdata' => \&format_when_cdata, |
'when:cdata' => \&format_when_cdata, |
'choice:include' => \&format_choice_include, |
|
'choice:exclude' => \&format_choice_exclude, |
'choice:exclude' => \&format_choice_exclude, |
|
'clause' => \&format_clause, |
}; |
}; |
|
|
my $text; |
my $text; |
Line 44 my $xfml;
|
Line 97 my $xfml;
|
my $wloc=0; |
my $wloc=0; |
my %eha; |
my %eha; |
|
|
while (my $token = $parser->get_tag('xfml')) { |
# ----------------------------------------------- Run through and apply clauses |
&format_xfml(@{$token}); |
my @lines2=<>; my $output=join('',@lines2); undef @lines2; |
$text = $parser->get_text('/xfml'); |
my $lparser = HTML::TokeParser->new(\$output) or |
# print $xfml; |
|
# print $text; |
|
$token = $parser->get_tag('/xfml'); |
|
} |
|
|
|
open IN,"<$tofilter"; |
|
my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2; |
|
$parser = HTML::TokeParser->new(\$parsestring2) or |
|
die('can\'t create TokeParser object'); |
die('can\'t create TokeParser object'); |
$parser->xml_mode('1'); |
$lparser->xml_mode('1'); |
|
my $parsestring2; |
my $token; |
while (my $token = $parser->get_tag('clause')) { |
my $hloc=0; |
$parsestring2=$output; |
my %ts; |
$lparser = HTML::TokeParser->new(\$parsestring2); |
my $tr; |
$lparser->xml_mode('1'); |
my $echild=0; |
$output=''; |
my $exclude=0; |
&format_clause(@{$token}); |
my $excluden=0; |
$text = $parser->get_text('/clause'); |
my $excludea=0; |
$token = $parser->get_tag('/clause'); |
my $et=0; |
|
my $cdata=''; |
my $token=''; |
while ($token = $parser->get_token()) { |
my $ttype=''; |
# from HTML::TokeParser documentation: |
my $excludeflag=0; |
# ["S", $tag, %$attr, @$attrseq, $text] |
my $outcache=''; |
# ["E", $tag, $text] |
while ($token = $lparser->get_token()) { |
# ["T", $text, $is_data] |
if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; } |
# ["C", $text] |
elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; } |
# ["D", $text] |
elsif ($token->[0] eq 'T') { |
# ["PI", $token0, $text] |
if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S' |
# push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, |
or $ttype eq 'E') { |
# @{$conditions{'name'}}; |
$output.=$token->[1]; |
# push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, |
} |
# @{$conditions{'attribute'}}; |
else { |
# push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, |
$outcache.=$token->[1]; |
# @{$conditions{'value'}}; |
} |
# push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, |
|
# @{$conditions{'cdata'}}; |
|
if ($token->[0] eq 'D') { |
|
print $token->[1]; |
|
} |
|
elsif ($token->[0] eq 'C') { |
|
print $token->[1]; |
|
} |
|
elsif ($token->[0] eq 'S') { |
|
$cdata=''; |
|
$hloc++; |
|
# if token can be excluded, then pretend it is until all conditions are |
|
# run (eha); then output during end tag processing |
|
# else, output |
|
|
|
# a token can be excluded when it is an eh key, or a child node of |
|
# an eh key |
|
|
|
if ($eh{$token->[1]}) { |
|
$echild=$token->[1]; |
|
# print "ECHILD=$echild\n"; |
|
} |
} |
if ($echild) { |
elsif ($token->[0] eq 'S') { |
# run through names for echild |
if ($eh{$token->[1]} or $excludeflag==1) { |
# then attributes and/or values and/or cdata |
$ttype=''; |
my $name=$token->[1]; |
$excludeflag=1; |
my @attributes=@{$token->[3]}; |
$outcache.=$token->[4]; |
my %atthash=%{$token->[2]}; |
} |
foreach my $namemlist (@{$eha{$echild}->{'name'}}) { |
else { |
foreach my $namematch (@{$namemlist}) { |
$ttype='S'; |
my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//; |
$output.=$token->[4]; |
if ($name=~/$nm/) { |
} |
# print "NMATCH: $nm ($name)\n"; |
if ($excludeflag==1) { |
$excluden++; |
|
foreach my $attributemlist |
|
(@{$eha{$echild}->{'attribute'}}) { |
|
foreach my $attributematch |
|
(@{$attributemlist}) { |
|
my ($an,$am)= |
|
split(/\=/,$attributematch,2); |
|
$am=~s/^.//; |
|
$am=~s/.$//; |
|
# print 'AM:'."($an,$am)\t"; |
|
# print 'ATT:'.join(',',%atthash)."\n"; |
|
if ($atthash{$an}) { |
|
if ($atthash{$an}=~/$am/) { |
|
$excludea++; |
|
# print "AMATCH: $am (". |
|
# join(',', |
|
# @attributes) |
|
# ."\n"; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
} |
$tr.=$token->[4]; |
|
} |
} |
else { |
elsif ($token->[0] eq 'E') { |
print $token->[4]; |
if ($eh{$token->[1]} and $excludeflag==1) { |
|
$ttype='E'; |
|
$excludeflag=0; |
|
$outcache.=$token->[2]; |
|
my $retval=&evalconditions($outcache); |
|
if (&evalconditions($outcache)) { |
|
$output.=$outcache; |
|
} |
|
else { |
|
$output.='<!-- FILTERED OUT -->'; |
|
} |
|
$outcache=''; |
|
} |
|
elsif ($excludeflag==1) { |
|
$ttype=''; |
|
$outcache.=$token->[2]; |
|
} |
|
else { |
|
$output.=$token->[2]; |
|
$ttype='E'; |
|
} |
} |
} |
} |
} |
elsif ($token->[0] eq 'E') { |
&cc; |
if ($echild) { |
} |
$tr.=$token->[2]; |
print $output; |
if ($excluden) { |
|
foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) { |
# -------------------------------------------------------------- evalconditions |
foreach my $cdatamatch (@{$cdatamlist}) { |
sub evalconditions { |
# print "CDATA: $cdatamatch, $cdata\n"; |
my ($parsetext)=@_; |
my $cm=$cdatamatch; |
my $eparser = HTML::TokeParser->new(\$parsetext); |
my $not=0; |
unless (@{$conditions{'name'}} or |
if ($cm=~/\!/) { |
@{$conditions{'attribute'}}) { |
$not=1; |
return 0; |
$cm=~s/^.//; |
} |
|
my $nameflag=0; |
|
my $cdataflag=0; |
|
my $matchflag=0; |
|
my $Ttoken=''; |
|
while (my $token = $eparser->get_token()) { |
|
if ($token->[0] eq 'S') { |
|
foreach my $name (@{$conditions{'name'}}) { |
|
my $flag=0; |
|
my $match=$name; |
|
if ($match=~/^\!/) { |
|
$match=~s/^\!//g; |
|
$flag=1; |
|
} |
|
$match=~s/^\///g; |
|
$match=~s/\/$//g; |
|
if ((!$flag and $token->[1]=~/$match/) or |
|
($flag and $token->[1]!~/$match/)) { |
|
$nameflag=1; |
|
} |
|
} |
|
$Ttoken=''; |
|
} |
|
elsif ($token->[0] eq 'E') { |
|
foreach my $name (@{$conditions{'name'}}) { |
|
my $flag=0; |
|
my $match=$name; |
|
if ($match=~/^\!/) { |
|
$match=~s/^\!//g; |
|
$flag=1; |
|
} |
|
$match=~s/^\///g; |
|
$match=~s/\/$//g; |
|
if ((!$flag and $token->[1]=~/$match/) or |
|
($flag and $token->[1]!~/$match/)) { |
|
foreach my $cdata (@{$conditions{'cdata'}}) { |
|
my $flag=0; |
|
my $match=$cdata; |
|
if ($match=~/^\!/) { |
|
$match=~s/^\!//g; |
|
$flag=1; |
} |
} |
$cm=~s/^.//; $cm=~s/.$//; |
$match=~s/^\///g; |
if ((!$not and $cdata!~/$cm/) |
$match=~s/\/$//g; |
or ($not and $cdata=~/$cm/)) { |
if ((!$flag and $Ttoken=~/$match/) or |
# print "CMISMATCH: $cm ($cdata)\n"; |
($flag and $Ttoken!~/$match/)) { |
|
$cdataflag=1; |
} |
} |
elsif (($not and $cdata!~/$cm/) |
} |
or (!$not and $cdata=~/$cm/)) { |
if (@{$conditions{'cdata'}}) { |
$exclude++; |
if ($cdataflag) { |
|
return 0; |
|
} |
|
} |
|
else { |
|
if ($nameflag) { |
|
return 0; |
} |
} |
} |
} |
|
$nameflag=0; |
} |
} |
} |
} |
} |
} |
if ($eh{$token->[1]}) { |
elsif ($token->[0] eq 'T') { |
$echild=0; |
if ($nameflag) { |
if (!$exclude and !$excludea) { |
$Ttoken.=$token->[1]; |
print $tr; |
|
# print $token->[2]; |
|
$tr=''; |
|
} |
|
elsif ($exclude>0 or $excludea>0) { |
|
# print "EXCLUDING $token->[1] $excludea $excluden\n"; |
|
$exclude=0; $excluden=0; $excludea=0; |
|
$tr=''; |
|
} |
} |
$exclude=0; $excluden=0; $excludea=0; |
|
} |
|
else { |
|
if ($echild) { |
|
# $tr.=$token->[2]; |
|
} |
|
else { |
|
print $token->[2]; |
|
$tr=''; |
|
} |
|
} |
|
$hloc--; |
|
} |
|
elsif ($token->[0] eq 'T') { |
|
if ($echild) { |
|
$tr.=$token->[1]; |
|
$cdata=$token->[1]; |
|
} |
|
else { |
|
print $token->[1]; |
|
$tr=''; |
|
} |
} |
} |
} |
|
return 1; |
} |
} |
|
|
# ------------------------------------------------------------ clear conditions |
# ------------------------------------------------------------ clear conditions |
Line 215 sub cc {
|
Line 255 sub cc {
|
@{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; |
@{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; |
@{$conditions{'value'}}=(); pop @{$conditions{'value'}}; |
@{$conditions{'value'}}=(); pop @{$conditions{'value'}}; |
@{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; |
@{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; |
|
%eh=(1,1); delete $eh{1}; |
} |
} |
|
|
# --------------------------------------- remove starting and ending whitespace |
# --------------------------------------- remove starting and ending whitespace |
Line 222 sub trim {
|
Line 263 sub trim {
|
my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; |
my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; |
} |
} |
|
|
|
|
|
|
|
|
# --------------------------------------------------------- Format xfml section |
# --------------------------------------------------------- Format xfml section |
sub format_xfml { |
sub format_xfml { |
my (@tokeninfo)=@_; |
my (@tokeninfo)=@_; |
return ''; |
return ''; |
} |
} |
|
|
|
# ------------------------------------------------------- Format clause section |
|
sub format_clause { |
|
my (@tokeninfo)=@_; |
|
return ''; |
|
} |
|
|
# ---------------------------------------------------- Format when:name section |
# ---------------------------------------------------- Format when:name section |
sub format_when_name { |
sub format_when_name { |
my (@tokeninfo)=@_; |
my (@tokeninfo)=@_; |
$wloc++; |
# $wloc++; |
my $att_match=$tokeninfo[2]->{'match'}; |
my $att_match=$tokeninfo[2]->{'match'}; |
push @{$conditions{'name'}},$att_match; |
push @{$conditions{'name'}},$att_match; |
my $text=&trim($parser->get_text('/when:name')); |
my $text=&trim($parser->get_text('/when:name')); |
$parser->get_tag('/when:name'); |
$parser->get_tag('/when:name'); |
# print 'Name Matching...'.$att_match; |
# $wloc--; |
$wloc--; |
# &cc unless $wloc; |
&cc unless $wloc; |
|
return ''; |
|
} |
|
|
|
# ----------------------------------------------- Format when:attribute section |
|
sub format_when_attribute { |
|
my (@tokeninfo)=@_; |
|
$wloc++; |
|
my $att_match=$tokeninfo[2]->{'match'}; |
|
push @{$conditions{'attribute'}},$att_match; |
|
my $text=&trim($parser->get_text('/when:attribute')); |
|
$parser->get_tag('/when:attribute'); |
|
# print 'Attribute Matching...'.$att_match; |
|
$wloc--; |
|
&cc unless $wloc; |
|
return ''; |
return ''; |
} |
} |
|
|
Line 261 sub format_when_cdata {
|
Line 296 sub format_when_cdata {
|
my (@tokeninfo)=@_; |
my (@tokeninfo)=@_; |
$wloc++; |
$wloc++; |
my $att_match=$tokeninfo[2]->{'match'}; |
my $att_match=$tokeninfo[2]->{'match'}; |
# print 'Cdata Matching...'.$att_match; |
|
push @{$conditions{'cdata'}},$att_match; |
push @{$conditions{'cdata'}},$att_match; |
my $text=&trim($parser->get_text('/when:cdata')); |
my $text=&trim($parser->get_text('/when:cdata')); |
$parser->get_tag('/when:cdata'); |
$parser->get_tag('/when:cdata'); |
$wloc--; |
$wloc--; |
&cc unless $wloc; |
# &cc unless $wloc; |
return ''; |
|
} |
|
|
|
# ----------------------------------------------- Format choice:include section |
|
sub format_choice_include { |
|
my (@tokeninfo)=@_; |
|
my $text=&trim($parser->get_text('/choice:include')); |
|
$parser->get_tag('/choice:include'); |
|
$ih{$tokeninfo[2]->{'match'}}++; |
|
return ''; |
return ''; |
} |
} |
|
|
Line 295 sub format_choice_exclude {
|
Line 320 sub format_choice_exclude {
|
[@{$conditions{'cdata'}}]; |
[@{$conditions{'cdata'}}]; |
return ''; |
return ''; |
} |
} |
|
|
|
# ----------------------------------- POD (plain old documentation, CPAN style) |
|
|
|
=pod |
|
|
|
=head1 NAME |
|
|
|
xfml_parse.pl - This is meant to parse files meeting the xfml document type. |
|
See xfml.dtd. XFML=XML Filtering Markup Language. |
|
|
|
=head1 SYNOPSIS |
|
|
|
Usage is for lpml file to come in through standard input. |
|
|
|
=over 4 |
|
|
|
=item * |
|
|
|
1st argument is name of xfml file. |
|
|
|
=back |
|
|
|
Example: |
|
|
|
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml |
|
|
|
or |
|
|
|
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml |
|
|
|
=head1 DESCRIPTION |
|
|
|
I am using a multiple pass-through approach to parsing |
|
the xfml file. This saves memory and makes sure the server |
|
will never be overloaded. |
|
|
|
=head1 README |
|
|
|
I am using a multiple pass-through approach to parsing |
|
the xfml file. This saves memory and makes sure the server |
|
will never be overloaded. |
|
|
|
=head1 PREREQUISITES |
|
|
|
HTML::TokeParser |
|
|
|
=head1 COREQUISITES |
|
|
|
=head1 OSNAMES |
|
|
|
linux |
|
|
|
=head1 SCRIPT CATEGORIES |
|
|
|
Packaging/Administrative |
|
|
|
=head1 AUTHOR |
|
|
|
Scott Harrison |
|
codeharrison@yahoo.com |
|
|
|
Please let me know how/if you are finding this script useful and |
|
any/all suggestions. -Scott |
|
|
|
=cut |
|
|