File:
[LON-CAPA] /
loncom /
build /
xfml_parse.pl
Revision
1.7:
download - view:
text,
annotated -
select for diffs
Wed May 22 17:07:50 2002 UTC (22 years, 5 months ago) by
harris41
Branches:
MAIN
CVS tags:
version_2_9_X,
version_2_9_99_0,
version_2_9_1,
version_2_9_0,
version_2_8_X,
version_2_8_99_1,
version_2_8_99_0,
version_2_8_2,
version_2_8_1,
version_2_8_0,
version_2_7_X,
version_2_7_99_1,
version_2_7_99_0,
version_2_7_1,
version_2_7_0,
version_2_6_X,
version_2_6_99_1,
version_2_6_99_0,
version_2_6_3,
version_2_6_2,
version_2_6_1,
version_2_6_0,
version_2_5_X,
version_2_5_99_1,
version_2_5_99_0,
version_2_5_2,
version_2_5_1,
version_2_5_0,
version_2_4_X,
version_2_4_99_0,
version_2_4_2,
version_2_4_1,
version_2_4_0,
version_2_3_X,
version_2_3_99_0,
version_2_3_2,
version_2_3_1,
version_2_3_0,
version_2_2_X,
version_2_2_99_1,
version_2_2_99_0,
version_2_2_2,
version_2_2_1,
version_2_2_0,
version_2_1_X,
version_2_1_99_3,
version_2_1_99_2,
version_2_1_99_1,
version_2_1_99_0,
version_2_1_3,
version_2_1_2,
version_2_1_1,
version_2_1_0,
version_2_12_X,
version_2_11_X,
version_2_11_5_msu,
version_2_11_5,
version_2_11_4_uiuc,
version_2_11_4_msu,
version_2_11_4,
version_2_11_3_uiuc,
version_2_11_3_msu,
version_2_11_3,
version_2_11_2_uiuc,
version_2_11_2_msu,
version_2_11_2_educog,
version_2_11_2,
version_2_11_1,
version_2_11_0_RC3,
version_2_11_0_RC2,
version_2_11_0_RC1,
version_2_11_0,
version_2_10_X,
version_2_10_1,
version_2_10_0_RC2,
version_2_10_0_RC1,
version_2_10_0,
version_2_0_X,
version_2_0_99_1,
version_2_0_2,
version_2_0_1,
version_2_0_0,
version_1_99_3,
version_1_99_2,
version_1_99_1_tmcc,
version_1_99_1,
version_1_99_0_tmcc,
version_1_99_0,
version_1_3_X,
version_1_3_3,
version_1_3_2,
version_1_3_1,
version_1_3_0,
version_1_2_X,
version_1_2_99_1,
version_1_2_99_0,
version_1_2_1,
version_1_2_0,
version_1_1_X,
version_1_1_99_5,
version_1_1_99_4,
version_1_1_99_3,
version_1_1_99_2,
version_1_1_99_1,
version_1_1_99_0,
version_1_1_3,
version_1_1_2,
version_1_1_1,
version_1_1_0,
version_1_0_99_3,
version_1_0_99_2,
version_1_0_99_1,
version_1_0_99,
version_1_0_3,
version_1_0_2,
version_1_0_1,
version_1_0_0,
version_0_99_5,
version_0_99_4,
version_0_99_3,
version_0_99_2,
version_0_99_1,
version_0_99_0,
version_0_6_2,
version_0_6,
version_0_5_1,
version_0_5,
version_0_4,
stable_2002_july,
loncapaMITrelate_1,
language_hyphenation_merge,
language_hyphenation,
conference_2003,
bz6209-base,
bz6209,
bz5969,
bz5610,
bz2851,
STABLE,
PRINT_INCOMPLETE_base,
PRINT_INCOMPLETE,
HEAD,
GCI_3,
GCI_2,
GCI_1,
BZ5971-printing-apage,
BZ5434-fox,
BZ4492-merge,
BZ4492-feature_horizontal_radioresponse,
BZ4492-feature_Support_horizontal_radioresponse,
BZ4492-Support_horizontal_radioresponse
xfml_parse.pl already has been well-tested. the changes in 1.6 are incorrect
and are now reversed.
#!/usr/bin/perl
# -------------------------------------------------------- Documentation notice
# Run "perldoc ./lpml_parse.pl" in order to best view the software
# documentation internalized in this program.
# --------------------------------------------------------- License Information
# The LearningOnline Network with CAPA
# piml_parse.pl - Linux Packaging Markup Language parser
#
# $Id: xfml_parse.pl,v 1.7 2002/05/22 17:07:50 harris41 Exp $
#
# Written by Scott Harrison, codeharrison@yahoo.com
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
# YEAR=2002
# 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
#
###
# Read in 2 XML file; first is the filter specification, the second
# is the XML file to be filtered
###############################################################################
## ##
## ORGANIZATION OF THIS PERL SCRIPT ##
## 1. Notes ##
## 2. Read in filter file ##
## 3. Initialize and clear conditions ##
## 4. Run through and apply clauses ##
## ##
###############################################################################
# ----------------------------------------------------------------------- Notes
#
# This is meant to parse files meeting the xfml document type.
# See xfml.dtd. XFML=XML Filtering Markup Language.
use HTML::TokeParser;
use strict;
unless (@ARGV) {
print(<<END);
Incorrect invocation.
Example usages:
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
END
}
my %eh;
# ---------------------------------------------- Read in filter file from @ARGV
my $tofilter=shift @ARGV;
open(IN,"<$tofilter"); my @lines=<IN>;
my $parsestring=join('',@lines); undef @lines; close IN;
my $parser = HTML::TokeParser->new(\$parsestring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
# --------------------------------------------- initialize and clear conditions
my %conditions; &cc;
# Define handling methods for mode-dependent text rendering
$parser->{textify}={
'xfml' => \&format_xfml,
'when:name' => \&format_when_name,
'when:attribute' => \&format_when_attribute,
'when:cdata' => \&format_when_cdata,
'choice:exclude' => \&format_choice_exclude,
'clause' => \&format_clause,
};
my $text;
my $xfml;
my $wloc=0;
my %eha;
# ----------------------------------------------- Run through and apply clauses
my @lines2=<>; my $output=join('',@lines2); undef @lines2;
my $lparser = HTML::TokeParser->new(\$output) or
die('can\'t create TokeParser object');
$lparser->xml_mode('1');
my $parsestring2;
while (my $token = $parser->get_tag('clause')) {
$parsestring2=$output;
$lparser = HTML::TokeParser->new(\$parsestring2);
$lparser->xml_mode('1');
$output='';
&format_clause(@{$token});
$text = $parser->get_text('/clause');
$token = $parser->get_tag('/clause');
my $token='';
my $ttype='';
my $excludeflag=0;
my $outcache='';
while ($token = $lparser->get_token()) {
if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; }
elsif ($token->[0] eq 'T') {
if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
or $ttype eq 'E') {
$output.=$token->[1];
}
else {
$outcache.=$token->[1];
}
}
elsif ($token->[0] eq 'S') {
if ($eh{$token->[1]} or $excludeflag==1) {
$ttype='';
$excludeflag=1;
$outcache.=$token->[4];
}
else {
$ttype='S';
$output.=$token->[4];
}
if ($excludeflag==1) {
}
}
elsif ($token->[0] eq 'E') {
if ($eh{$token->[1]} and $excludeflag==1) {
$ttype='E';
$excludeflag=0;
$outcache.=$token->[2];
my $retval=&evalconditions($outcache);
if (&evalconditions($outcache)) {
$output.=$outcache;
}
else {
$output.='<!-- FILTERED OUT -->';
}
$outcache='';
}
elsif ($excludeflag==1) {
$ttype='';
$outcache.=$token->[2];
}
else {
$output.=$token->[2];
$ttype='E';
}
}
}
&cc;
}
print $output;
# -------------------------------------------------------------- evalconditions
sub evalconditions {
my ($parsetext)=@_;
my $eparser = HTML::TokeParser->new(\$parsetext);
unless (@{$conditions{'name'}} or
@{$conditions{'attribute'}}) {
return 0;
}
my $nameflag=0;
my $cdataflag=0;
my $matchflag=0;
my $Ttoken='';
while (my $token = $eparser->get_token()) {
if ($token->[0] eq 'S') {
foreach my $name (@{$conditions{'name'}}) {
my $flag=0;
my $match=$name;
if ($match=~/^\!/) {
$match=~s/^\!//g;
$flag=1;
}
$match=~s/^\///g;
$match=~s/\/$//g;
if ((!$flag and $token->[1]=~/$match/) or
($flag and $token->[1]!~/$match/)) {
$nameflag=1;
}
}
$Ttoken='';
}
elsif ($token->[0] eq 'E') {
foreach my $name (@{$conditions{'name'}}) {
my $flag=0;
my $match=$name;
if ($match=~/^\!/) {
$match=~s/^\!//g;
$flag=1;
}
$match=~s/^\///g;
$match=~s/\/$//g;
if ((!$flag and $token->[1]=~/$match/) or
($flag and $token->[1]!~/$match/)) {
foreach my $cdata (@{$conditions{'cdata'}}) {
my $flag=0;
my $match=$cdata;
if ($match=~/^\!/) {
$match=~s/^\!//g;
$flag=1;
}
$match=~s/^\///g;
$match=~s/\/$//g;
if ((!$flag and $Ttoken=~/$match/) or
($flag and $Ttoken!~/$match/)) {
$cdataflag=1;
}
}
if (@{$conditions{'cdata'}}) {
if ($cdataflag) {
return 0;
}
}
else {
if ($nameflag) {
return 0;
}
}
$nameflag=0;
}
}
}
elsif ($token->[0] eq 'T') {
if ($nameflag) {
$Ttoken.=$token->[1];
}
}
}
return 1;
}
# ------------------------------------------------------------ clear conditions
sub cc {
@{$conditions{'name'}}=(); pop @{$conditions{'name'}};
@{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
@{$conditions{'value'}}=(); pop @{$conditions{'value'}};
@{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
%eh=(1,1); delete $eh{1};
}
# --------------------------------------- remove starting and ending whitespace
sub trim {
my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}
# --------------------------------------------------------- Format xfml section
sub format_xfml {
my (@tokeninfo)=@_;
return '';
}
# ------------------------------------------------------- Format clause section
sub format_clause {
my (@tokeninfo)=@_;
return '';
}
# ---------------------------------------------------- Format when:name section
sub format_when_name {
my (@tokeninfo)=@_;
# $wloc++;
my $att_match=$tokeninfo[2]->{'match'};
push @{$conditions{'name'}},$att_match;
my $text=&trim($parser->get_text('/when:name'));
$parser->get_tag('/when:name');
# $wloc--;
# &cc unless $wloc;
return '';
}
# --------------------------------------------------- Format when:cdata section
sub format_when_cdata {
my (@tokeninfo)=@_;
$wloc++;
my $att_match=$tokeninfo[2]->{'match'};
push @{$conditions{'cdata'}},$att_match;
my $text=&trim($parser->get_text('/when:cdata'));
$parser->get_tag('/when:cdata');
$wloc--;
# &cc unless $wloc;
return '';
}
# ----------------------------------------------- Format choice:exclude section
sub format_choice_exclude {
my (@tokeninfo)=@_;
my $text=&trim($parser->get_text('/choice:exclude'));
$parser->get_tag('/choice:exclude');
$eh{$tokeninfo[2]->{'nodename'}}++;
push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
[@{$conditions{'name'}}];
push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
[@{$conditions{'attribute'}}];
push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
[@{$conditions{'value'}}];
push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
[@{$conditions{'cdata'}}];
return '';
}
# ----------------------------------- POD (plain old documentation, CPAN style)
=pod
=head1 NAME
xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
=head1 SYNOPSIS
Usage is for lpml file to come in through standard input.
=over 4
=item *
1st argument is name of xfml file.
=back
Example:
cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
or
perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
=head1 DESCRIPTION
I am using a multiple pass-through approach to parsing
the xfml file. This saves memory and makes sure the server
will never be overloaded.
=head1 README
I am using a multiple pass-through approach to parsing
the xfml file. This saves memory and makes sure the server
will never be overloaded.
=head1 PREREQUISITES
HTML::TokeParser
=head1 COREQUISITES
=head1 OSNAMES
linux
=head1 SCRIPT CATEGORIES
Packaging/Administrative
=head1 AUTHOR
Scott Harrison
codeharrison@yahoo.com
Please let me know how/if you are finding this script useful and
any/all suggestions. -Scott
=cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>