loncom/build/xfml_parse.pl - diff

Return to xfml_parse.pl CVS log

Up to [LON-CAPA] / loncom / build

Diff for /loncom/build/xfml_parse.pl between versions 1.1 and 1.4

-version 1.1, 2002/01/29 10:42:42
+version 1.4, 2002/04/08 10:52:24
  Line 1
  #!/usr/bin/perl
+ # -------------------------------------------------------- Documentation notice
+ # Run "perldoc ./lpml_parse.pl" in order to best view the software
+ # documentation internalized in this program.
+ # --------------------------------------------------------- License Information
+ # The LearningOnline Network with CAPA
+ # piml_parse.pl - Linux Packaging Markup Language parser
+ #
+ # $Id$
+ #
+ # Written by Scott Harrison, codeharrison@yahoo.com
+ #
+ # Copyright Michigan State University Board of Trustees
+ #
+ # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
+ #
+ # LON-CAPA is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or
+ # (at your option) any later version.
+ #
+ # LON-CAPA is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with LON-CAPA; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ #
+ # /home/httpd/html/adm/gpl.txt
+ #
+ # http://www.lon-capa.org/
+ #
  # YEAR=2002
- # 1/26,1/27,1/28 - Scott Harrison
+ # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
+ #
+ ###
  # Read in 2 XML file; first is the filter specification, the second
  # is the XML file to be filtered
+ ###############################################################################
+ ##                                                                           ##
+ ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
+ ## 1. Notes                                                                  ##
+ ## 2. Read in filter file                                                    ##
+ ## 3. Initialize and clear conditions                                        ##
+ ## 4. Run through and apply clauses                                          ##
+ ##                                                                           ##
+ ###############################################################################
+ # ----------------------------------------------------------------------- Notes
+ #
+ # This is meant to parse files meeting the xfml document type.
+ # See xfml.dtd.  XFML=XML Filtering Markup Language.
  use HTML::TokeParser;
  use strict;
  unless (@ARGV) {
-     print <<END;
+     print(<<END);
  Incorrect invocation.
  Example usages:
  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
- Line 19  END
+ Line 70  END
  }
  my %eh;
- my %ih;
+ # ---------------------------------------------- Read in filter file from @ARGV
  my $tofilter=shift @ARGV;
- my @lines=<>; my $parsestring=join('',@lines); undef @lines;
+ open(IN,"<$tofilter"); my @lines=<IN>;
+ my $parsestring=join('',@lines); undef @lines; close IN;
  my $parser = HTML::TokeParser->new(\$parsestring) or
      die('can\'t create TokeParser object');
  $parser->xml_mode('1');
- # Define handling methods for mode-dependent text rendering
+ # --------------------------------------------- initialize and clear conditions
  my %conditions; &cc;
+ # Define handling methods for mode-dependent text rendering
  $parser->{textify}={
-     xfml => \&format_xfml,
+     'xfml' => \&format_xfml,
      'when:name' => \&format_when_name,
      'when:attribute' => \&format_when_attribute,
      'when:cdata' => \&format_when_cdata,
-     'choice:include' => \&format_choice_include,
      'choice:exclude' => \&format_choice_exclude,
+     'clause' => \&format_clause,
      };
  my $text;
- Line 44  my $xfml;
+ Line 97  my $xfml;
  my $wloc=0;
  my %eha;
- while (my $token = $parser->get_tag('xfml')) {
+ # ----------------------------------------------- Run through and apply clauses
-     &format_xfml(@{$token});
+ my @lines2=<>; my $output=join('',@lines2); undef @lines2;
-     $text = $parser->get_text('/xfml');
+ my $lparser = HTML::TokeParser->new(\$output) or
- #    print $xfml;
- #    print $text;
-     $token = $parser->get_tag('/xfml');
- }
- open IN,"<$tofilter";
- my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2;
- $parser = HTML::TokeParser->new(\$parsestring2) or
      die('can\'t create TokeParser object');
- $parser->xml_mode('1');
+ $lparser->xml_mode('1');
+ my $parsestring2;
- my $token;
+ while (my $token = $parser->get_tag('clause')) {
- my $hloc=0;
+     $parsestring2=$output;
- my %ts;
+     $lparser = HTML::TokeParser->new(\$parsestring2);
- my $tr;
+     $lparser->xml_mode('1');
- my $echild=0;
+     $output='';
- my $exclude=0;
+     &format_clause(@{$token});
- my $excluden=0;
+     $text = $parser->get_text('/clause');
- my $excludea=0;
+     $token = $parser->get_tag('/clause');
- my $et=0;
- my $cdata='';
+     my $token='';
- while ($token = $parser->get_token()) {
+     my $ttype='';
- # from HTML::TokeParser documentation:
+     my $excludeflag=0;
- #             ["S",  $tag, %$attr, @$attrseq, $text]
+     my $outcache='';
- #             ["E",  $tag, $text]
+     while ($token = $lparser->get_token()) {
- #             ["T",  $text, $is_data]
+ 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
- #             ["C",  $text]
+ 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
- #             ["D",  $text]
+ 	elsif ($token->[0] eq 'T') {
- #             ["PI", $token0, $text]
+ 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
- #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
+ 		or $ttype eq 'E') {
- #         @{$conditions{'name'}};
+ 		$output.=$token->[1];
- #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
+ 	    }
- #         @{$conditions{'attribute'}};
+ 	    else {
- #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
+ 		$outcache.=$token->[1];
- #         @{$conditions{'value'}};
+ 	    }
- #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
- #         @{$conditions{'cdata'}};
-     if ($token->[0] eq 'D') {
- 	print $token->[1];
-     }
-     elsif ($token->[0] eq 'C') {
- 	print $token->[1];
-     }
-     elsif ($token->[0] eq 'S') {
- 	$cdata='';
- 	$hloc++;
- # if token can be excluded, then pretend it is until all conditions are
- # run (eha); then output during end tag processing
- # else, output
- # a token can be excluded when it is an eh key, or a child node of
- # an eh key
- 	if ($eh{$token->[1]}) {
- 	    $echild=$token->[1];
- #	    print "ECHILD=$echild\n";
  	}
- 	if ($echild) {
+ 	elsif ($token->[0] eq 'S') {
- 	    # run through names for echild
+ 	    if ($eh{$token->[1]} or $excludeflag==1) {
- 	    # then attributes and/or values and/or cdata
+ 		$ttype='';
- 	    my $name=$token->[1];
+ 		$excludeflag=1;
- 	    my @attributes=@{$token->[3]};
+ 		$outcache.=$token->[4];
- 	    my %atthash=%{$token->[2]};
+ 	    }
- 	    foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
+ 	    else {
- 		foreach my $namematch (@{$namemlist}) {
+ 		$ttype='S';
- 		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
+ 		$output.=$token->[4];
- 		    if ($name=~/$nm/) {
+ 	    }
- #			print "NMATCH: $nm ($name)\n";
+ 	    if ($excludeflag==1) {
- 			$excluden++;
- 			foreach my $attributemlist
- 			    (@{$eha{$echild}->{'attribute'}}) {
- 				foreach my $attributematch
- 				    (@{$attributemlist}) {
- 					my ($an,$am)=
- 					    split(/\=/,$attributematch,2);
- 					$am=~s/^.//;
- 					$am=~s/.$//;
- #					print 'AM:'."($an,$am)\t";
- #					print 'ATT:'.join(',',%atthash)."\n";
- 					if ($atthash{$an}) {
- 					    if ($atthash{$an}=~/$am/) {
- 						$excludea++;
- #						print "AMATCH: $am (".
- #						    join(',',
- #							 @attributes)
- #							."\n";
- 					    }
- 					}
- 				    }
- 			    }
- 		    }
- 		}
  	    }
- 	    $tr.=$token->[4];
  	}
- 	else {
+ 	elsif ($token->[0] eq 'E') {
- 	    print $token->[4];
+ 	    if ($eh{$token->[1]} and $excludeflag==1) {
+ 		$ttype='E';
+ 		$excludeflag=0;
+ 		$outcache.=$token->[2];
+ 		my $retval=&evalconditions($outcache);
+ 		if (&evalconditions($outcache)) {
+ 		    $output.=$outcache;
+ 		}
+ 		else {
+ 		    $output.='<!-- FILTERED OUT -->';
+ 		}
+ 		$outcache='';
+ 	    }
+ 	    elsif ($excludeflag==1) {
+ 		$ttype='';
+ 		$outcache.=$token->[2];
+ 	    }
+ 	    else {
+ 		$output.=$token->[2];
+ 		$ttype='E';
+ 	    }
  	}
      }
-     elsif ($token->[0] eq 'E') {
+     &cc;
- 	if ($echild) {
+ }
- 	    $tr.=$token->[2];
+ print $output;
- 	    if ($excluden) {
- 		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
+ # -------------------------------------------------------------- evalconditions
- 		    foreach my $cdatamatch (@{$cdatamlist}) {
+ sub evalconditions {
- #				print "CDATA: $cdatamatch, $cdata\n";
+     my ($parsetext)=@_;
- 			my $cm=$cdatamatch;
+     my $eparser = HTML::TokeParser->new(\$parsetext);
- 			my $not=0;
+     unless (@{$conditions{'name'}} or
- 			if ($cm=~/\!/) {
+ 	    @{$conditions{'attribute'}}) {
- 			    $not=1;
+ 	return 0;
- 			    $cm=~s/^.//;
+     }
+     my $nameflag=0;
+     my $cdataflag=0;
+     my $matchflag=0;
+     my $Ttoken='';
+     while (my $token = $eparser->get_token()) {
+ 	if ($token->[0] eq 'S') {
+ 	    foreach my $name (@{$conditions{'name'}}) {
+ 		my $flag=0;
+ 		my $match=$name;
+ 		if ($match=~/^\!/) {
+ 		    $match=~s/^\!//g;
+ 		    $flag=1;
+ 		}
+ 		$match=~s/^\///g;
+ 		$match=~s/\/$//g;
+ 		if ((!$flag and $token->[1]=~/$match/) or
+ 		    ($flag and $token->[1]!~/$match/)) {
+ 		    $nameflag=1;
+ 		}
+ 	    }
+ 	    $Ttoken='';
+ 	}
+ 	elsif ($token->[0] eq 'E') {
+ 	    foreach my $name (@{$conditions{'name'}}) {
+ 		my $flag=0;
+ 		my $match=$name;
+ 		if ($match=~/^\!/) {
+ 		    $match=~s/^\!//g;
+ 		    $flag=1;
+ 		}
+ 		$match=~s/^\///g;
+ 		$match=~s/\/$//g;
+ 		if ((!$flag and $token->[1]=~/$match/) or
+ 		    ($flag and $token->[1]!~/$match/)) {
+ 		    foreach my $cdata (@{$conditions{'cdata'}}) {
+ 			my $flag=0;
+ 			my $match=$cdata;
+ 			if ($match=~/^\!/) {
+ 			    $match=~s/^\!//g;
+ 			    $flag=1;
  			}
- 			$cm=~s/^.//; $cm=~s/.$//;
+ 			$match=~s/^\///g;
- 			if ((!$not and $cdata!~/$cm/)
+ 			$match=~s/\/$//g;
- 			    or ($not and $cdata=~/$cm/)) {
+ 			if ((!$flag and $Ttoken=~/$match/) or
- #				    print "CMISMATCH: $cm ($cdata)\n";
+ 			    ($flag and $Ttoken!~/$match/)) {
+ 			    $cdataflag=1;
  			}
- 			elsif (($not and $cdata!~/$cm/)
+ 		    }
- 			       or (!$not and $cdata=~/$cm/)) {
+ 		    if (@{$conditions{'cdata'}}) {
- 			    $exclude++;
+ 			if ($cdataflag) {
+ 			    return 0;
+ 			}
+ 		    }
+ 		    else {
+ 			if ($nameflag) {
+ 			    return 0;
  			}
  		    }
+ 		    $nameflag=0;
  		}
  	    }
  	}
- 	if ($eh{$token->[1]}) {
+ 	elsif ($token->[0] eq 'T') {
- 	    $echild=0;
+ 	    if ($nameflag) {
- 	    if (!$exclude and !$excludea) {
+ 		$Ttoken.=$token->[1];
- 		print $tr;
- #		print $token->[2];
- 		$tr='';
- 	    }
- 	    elsif ($exclude>0 or $excludea>0) {
- #		print "EXCLUDING $token->[1] $excludea $excluden\n";
- 		$exclude=0; $excluden=0; $excludea=0;
- 		$tr='';
  	    }
- 	    $exclude=0; $excluden=0; $excludea=0;
- 	}
- 	else {
- 	    if ($echild) {
- #		$tr.=$token->[2];
- 	    }
- 	    else {
- 		print $token->[2];
- 		$tr='';
- 	    }
- 	}
- 	$hloc--;
-     }
-     elsif ($token->[0] eq 'T') {
- 	if ($echild) {
- 	    $tr.=$token->[1];
- 	    $cdata=$token->[1];
- 	}
- 	else {
- 	    print $token->[1];
- 	    $tr='';
  	}
      }
+     return 1;
  }
  # ------------------------------------------------------------ clear conditions
- Line 215  sub cc {
+ Line 255  sub cc {
      @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
      @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
      @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
+     %eh=(1,1); delete $eh{1};
  }
  # --------------------------------------- remove starting and ending whitespace
- Line 222  sub trim {
+ Line 263  sub trim {
      my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
  }
  # --------------------------------------------------------- Format xfml section
  sub format_xfml {
      my (@tokeninfo)=@_;
      return '';
  }
+ # ------------------------------------------------------- Format clause section
+ sub format_clause {
+     my (@tokeninfo)=@_;
+     return '';
+ }
  # ---------------------------------------------------- Format when:name section
  sub format_when_name {
      my (@tokeninfo)=@_;
-     $wloc++;
+ #    $wloc++;
      my $att_match=$tokeninfo[2]->{'match'};
      push @{$conditions{'name'}},$att_match;
      my $text=&trim($parser->get_text('/when:name'));
      $parser->get_tag('/when:name');
- #    print 'Name Matching...'.$att_match;
+ #    $wloc--;
-     $wloc--;
+ #    &cc unless $wloc;
-     &cc unless $wloc;
-     return '';
- }
- # ----------------------------------------------- Format when:attribute section
- sub format_when_attribute {
-     my (@tokeninfo)=@_;
-     $wloc++;
-     my $att_match=$tokeninfo[2]->{'match'};
-     push @{$conditions{'attribute'}},$att_match;
-     my $text=&trim($parser->get_text('/when:attribute'));
-     $parser->get_tag('/when:attribute');
- #    print 'Attribute Matching...'.$att_match;
-     $wloc--;
-     &cc unless $wloc;
      return '';
  }
- Line 261  sub format_when_cdata {
+ Line 296  sub format_when_cdata {
      my (@tokeninfo)=@_;
      $wloc++;
      my $att_match=$tokeninfo[2]->{'match'};
- #    print 'Cdata Matching...'.$att_match;
      push @{$conditions{'cdata'}},$att_match;
      my $text=&trim($parser->get_text('/when:cdata'));
      $parser->get_tag('/when:cdata');
      $wloc--;
-     &cc unless $wloc;
+ #    &cc unless $wloc;
-     return '';
- }
- # ----------------------------------------------- Format choice:include section
- sub format_choice_include {
-     my (@tokeninfo)=@_;
-     my $text=&trim($parser->get_text('/choice:include'));
-     $parser->get_tag('/choice:include');
-     $ih{$tokeninfo[2]->{'match'}}++;
      return '';
  }
- Line 295  sub format_choice_exclude {
+ Line 320  sub format_choice_exclude {
           [@{$conditions{'cdata'}}];
      return '';
  }
+ # ----------------------------------- POD (plain old documentation, CPAN style)
+ =pod
+ =head1 NAME
+ xfml_parse.pl - This is meant to parse files meeting the xfml document type.
+ See xfml.dtd.  XFML=XML Filtering Markup Language.
+ =head1 SYNOPSIS
+ Usage is for lpml file to come in through standard input.
+ =over 4
+ =item *
+st argument is name of xfml file.
+ =back
+ Example:
+  cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
+ or
+  perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
+ =head1 DESCRIPTION
+ I am using a multiple pass-through approach to parsing
+ the xfml file.  This saves memory and makes sure the server
+ will never be overloaded.
+ =head1 README
+ I am using a multiple pass-through approach to parsing
+ the xfml file.  This saves memory and makes sure the server
+ will never be overloaded.
+ =head1 PREREQUISITES
+ HTML::TokeParser
+ =head1 COREQUISITES
+ =head1 OSNAMES
+ linux
+ =head1 SCRIPT CATEGORIES
+ Packaging/Administrative
+ =head1 AUTHOR
+  Scott Harrison
+  codeharrison@yahoo.com
+ Please let me know how/if you are finding this script useful and
+ any/all suggestions.  -Scott
+ =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1
changed lines
	Added in v.1.4