Annotation of nsdl/build/xfml_parse.pl, revision 1.1
1.1 ! harris41 1: #!/usr/bin/perl
! 2:
! 3: # -------------------------------------------------------- Documentation notice
! 4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
! 5: # documentation internalized in this program.
! 6:
! 7: # --------------------------------------------------------- License Information
! 8: # The LearningOnline Network with CAPA
! 9: # piml_parse.pl - Linux Packaging Markup Language parser
! 10: #
! 11: # $Id: xfml_parse.pl,v 1.5 2002/04/08 12:51:03 harris41 Exp $
! 12: #
! 13: # Written by Scott Harrison, codeharrison@yahoo.com
! 14: #
! 15: # Copyright Michigan State University Board of Trustees
! 16: #
! 17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
! 18: #
! 19: # LON-CAPA is free software; you can redistribute it and/or modify
! 20: # it under the terms of the GNU General Public License as published by
! 21: # the Free Software Foundation; either version 2 of the License, or
! 22: # (at your option) any later version.
! 23: #
! 24: # LON-CAPA is distributed in the hope that it will be useful,
! 25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
! 26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 27: # GNU General Public License for more details.
! 28: #
! 29: # You should have received a copy of the GNU General Public License
! 30: # along with LON-CAPA; if not, write to the Free Software
! 31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
! 32: #
! 33: # /home/httpd/html/adm/gpl.txt
! 34: #
! 35: # http://www.lon-capa.org/
! 36: #
! 37: # YEAR=2002
! 38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
! 39: #
! 40: ###
! 41:
! 42: # Read in 2 XML file; first is the filter specification, the second
! 43: # is the XML file to be filtered
! 44:
! 45: ###############################################################################
! 46: ## ##
! 47: ## ORGANIZATION OF THIS PERL SCRIPT ##
! 48: ## 1. Notes ##
! 49: ## 2. Read in filter file ##
! 50: ## 3. Initialize and clear conditions ##
! 51: ## 4. Run through and apply clauses ##
! 52: ## ##
! 53: ###############################################################################
! 54:
! 55: # ----------------------------------------------------------------------- Notes
! 56: #
! 57: # This is meant to parse files meeting the xfml document type.
! 58: # See xfml.dtd. XFML=XML Filtering Markup Language.
! 59:
! 60: use HTML::TokeParser;
! 61: use strict;
! 62:
! 63: unless (@ARGV) {
! 64: print(<<END);
! 65: Incorrect invocation.
! 66: Example usages:
! 67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
! 68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
! 69: END
! 70: }
! 71:
! 72: my %eh;
! 73:
! 74: # ---------------------------------------------- Read in filter file from @ARGV
! 75: my $tofilter=shift @ARGV;
! 76: open(IN,"<$tofilter"); my @lines=<IN>;
! 77: my $parsestring=join('',@lines); undef @lines; close IN;
! 78: my $parser = HTML::TokeParser->new(\$parsestring) or
! 79: die('can\'t create TokeParser object');
! 80: $parser->xml_mode('1');
! 81:
! 82: # --------------------------------------------- initialize and clear conditions
! 83: my %conditions; &cc;
! 84:
! 85: # Define handling methods for mode-dependent text rendering
! 86: $parser->{textify}={
! 87: 'xfml' => \&format_xfml,
! 88: 'when:name' => \&format_when_name,
! 89: 'when:attribute' => \&format_when_attribute,
! 90: 'when:cdata' => \&format_when_cdata,
! 91: 'choice:exclude' => \&format_choice_exclude,
! 92: 'clause' => \&format_clause,
! 93: };
! 94:
! 95: my $text;
! 96: my $xfml;
! 97: my $wloc=0;
! 98: my %eha;
! 99:
! 100: # ----------------------------------------------- Run through and apply clauses
! 101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
! 102: my $lparser = HTML::TokeParser->new(\$output) or
! 103: die('can\'t create TokeParser object');
! 104: $lparser->xml_mode('1');
! 105: my $parsestring2;
! 106: while (my $token = $parser->get_tag('clause')) {
! 107: $parsestring2=$output;
! 108: $lparser = HTML::TokeParser->new(\$parsestring2);
! 109: $lparser->xml_mode('1');
! 110: $output='';
! 111: &format_clause(@{$token});
! 112: $text = $parser->get_text('/clause');
! 113: $token = $parser->get_tag('/clause');
! 114:
! 115: my $token='';
! 116: my $ttype='';
! 117: my $excludeflag=0;
! 118: my $outcache='';
! 119: while ($token = $lparser->get_token()) {
! 120: if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
! 121: elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; }
! 122: elsif ($token->[0] eq 'T') {
! 123: if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
! 124: or $ttype eq 'E') {
! 125: $output.=$token->[1];
! 126: }
! 127: else {
! 128: $outcache.=$token->[1];
! 129: }
! 130: }
! 131: elsif ($token->[0] eq 'S') {
! 132: if ($eh{$token->[1]} or $excludeflag==1) {
! 133: $ttype='';
! 134: $excludeflag=1;
! 135: $outcache.=$token->[4];
! 136: }
! 137: else {
! 138: $ttype='S';
! 139: $output.=$token->[4];
! 140: }
! 141: if ($excludeflag==1) {
! 142:
! 143: }
! 144: }
! 145: elsif ($token->[0] eq 'E') {
! 146: if ($eh{$token->[1]} and $excludeflag==1) {
! 147: $ttype='E';
! 148: $excludeflag=0;
! 149: $outcache.=$token->[2];
! 150: my $retval=&evalconditions($outcache);
! 151: if (&evalconditions($outcache)) {
! 152: $output.=$outcache;
! 153: }
! 154: else {
! 155: $output.='<!-- FILTERED OUT -->';
! 156: }
! 157: $outcache='';
! 158: }
! 159: elsif ($excludeflag==1) {
! 160: $ttype='';
! 161: $outcache.=$token->[2];
! 162: }
! 163: else {
! 164: $output.=$token->[2];
! 165: $ttype='E';
! 166: }
! 167: }
! 168: }
! 169: &cc;
! 170: }
! 171: print $output;
! 172:
! 173: # -------------------------------------------------------------- evalconditions
! 174: sub evalconditions {
! 175: my ($parsetext)=@_;
! 176: my $eparser = HTML::TokeParser->new(\$parsetext);
! 177: unless (@{$conditions{'name'}} or
! 178: @{$conditions{'attribute'}}) {
! 179: return 0;
! 180: }
! 181: my $nameflag=0;
! 182: my $cdataflag=0;
! 183: my $matchflag=0;
! 184: my $Ttoken='';
! 185: while (my $token = $eparser->get_token()) {
! 186: if ($token->[0] eq 'S') {
! 187: foreach my $name (@{$conditions{'name'}}) {
! 188: my $flag=0;
! 189: my $match=$name;
! 190: if ($match=~/^\!/) {
! 191: $match=~s/^\!//g;
! 192: $flag=1;
! 193: }
! 194: $match=~s/^\///g;
! 195: $match=~s/\/$//g;
! 196: if ((!$flag and $token->[1]=~/$match/) or
! 197: ($flag and $token->[1]!~/$match/)) {
! 198: $nameflag=1;
! 199: }
! 200: }
! 201: $Ttoken='';
! 202: }
! 203: elsif ($token->[0] eq 'E') {
! 204: foreach my $name (@{$conditions{'name'}}) {
! 205: my $flag=0;
! 206: my $match=$name;
! 207: if ($match=~/^\!/) {
! 208: $match=~s/^\!//g;
! 209: $flag=1;
! 210: }
! 211: $match=~s/^\///g;
! 212: $match=~s/\/$//g;
! 213: if ((!$flag and $token->[1]=~/$match/) or
! 214: ($flag and $token->[1]!~/$match/)) {
! 215: foreach my $cdata (@{$conditions{'cdata'}}) {
! 216: my $flag=0;
! 217: my $match=$cdata;
! 218: if ($match=~/^\!/) {
! 219: $match=~s/^\!//g;
! 220: $flag=1;
! 221: }
! 222: $match=~s/^\///g;
! 223: $match=~s/\/$//g;
! 224: if ((!$flag and $Ttoken=~/$match/) or
! 225: ($flag and $Ttoken!~/$match/)) {
! 226: $cdataflag=1;
! 227: }
! 228: }
! 229: if (@{$conditions{'cdata'}}) {
! 230: if ($cdataflag) {
! 231: return 0;
! 232: }
! 233: }
! 234: else {
! 235: if ($nameflag) {
! 236: return 0;
! 237: }
! 238: }
! 239: $nameflag=0;
! 240: }
! 241: }
! 242: }
! 243: elsif ($token->[0] eq 'T') {
! 244: if ($nameflag) {
! 245: $Ttoken.=$token->[1];
! 246: }
! 247: }
! 248: }
! 249: return 1;
! 250: }
! 251:
! 252: # ------------------------------------------------------------ clear conditions
! 253: sub cc {
! 254: @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
! 255: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
! 256: @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
! 257: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
! 258: %eh=(1,1); delete $eh{1};
! 259: }
! 260:
! 261: # --------------------------------------- remove starting and ending whitespace
! 262: sub trim {
! 263: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
! 264: }
! 265:
! 266:
! 267:
! 268:
! 269: # --------------------------------------------------------- Format xfml section
! 270: sub format_xfml {
! 271: my (@tokeninfo)=@_;
! 272: return '';
! 273: }
! 274:
! 275: # ------------------------------------------------------- Format clause section
! 276: sub format_clause {
! 277: my (@tokeninfo)=@_;
! 278: return '';
! 279: }
! 280:
! 281: # ---------------------------------------------------- Format when:name section
! 282: sub format_when_name {
! 283: my (@tokeninfo)=@_;
! 284: # $wloc++;
! 285: my $att_match=$tokeninfo[2]->{'match'};
! 286: push @{$conditions{'name'}},$att_match;
! 287: my $text=&trim($parser->get_text('/when:name'));
! 288: $parser->get_tag('/when:name');
! 289: # $wloc--;
! 290: # &cc unless $wloc;
! 291: return '';
! 292: }
! 293:
! 294: # --------------------------------------------------- Format when:cdata section
! 295: sub format_when_cdata {
! 296: my (@tokeninfo)=@_;
! 297: $wloc++;
! 298: my $att_match=$tokeninfo[2]->{'match'};
! 299: push @{$conditions{'cdata'}},$att_match;
! 300: my $text=&trim($parser->get_text('/when:cdata'));
! 301: $parser->get_tag('/when:cdata');
! 302: $wloc--;
! 303: # &cc unless $wloc;
! 304: return '';
! 305: }
! 306:
! 307: # ----------------------------------------------- Format choice:exclude section
! 308: sub format_choice_exclude {
! 309: my (@tokeninfo)=@_;
! 310: my $text=&trim($parser->get_text('/choice:exclude'));
! 311: $parser->get_tag('/choice:exclude');
! 312: $eh{$tokeninfo[2]->{'nodename'}}++;
! 313: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
! 314: [@{$conditions{'name'}}];
! 315: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
! 316: [@{$conditions{'attribute'}}];
! 317: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
! 318: [@{$conditions{'value'}}];
! 319: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
! 320: [@{$conditions{'cdata'}}];
! 321: return '';
! 322: }
! 323:
! 324: # ----------------------------------- POD (plain old documentation, CPAN style)
! 325:
! 326: =pod
! 327:
! 328: =head1 NAME
! 329:
! 330: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
! 331:
! 332: =head1 SYNOPSIS
! 333:
! 334: Usage is for lpml file to come in through standard input.
! 335:
! 336: =over 4
! 337:
! 338: =item *
! 339:
! 340: 1st argument is name of xfml file.
! 341:
! 342: =back
! 343:
! 344: Example:
! 345:
! 346: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
! 347:
! 348: or
! 349:
! 350: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
! 351:
! 352: =head1 DESCRIPTION
! 353:
! 354: I am using a multiple pass-through approach to parsing
! 355: the xfml file. This saves memory and makes sure the server
! 356: will never be overloaded.
! 357:
! 358: =head1 README
! 359:
! 360: I am using a multiple pass-through approach to parsing
! 361: the xfml file. This saves memory and makes sure the server
! 362: will never be overloaded.
! 363:
! 364: =head1 PREREQUISITES
! 365:
! 366: HTML::TokeParser
! 367:
! 368: =head1 COREQUISITES
! 369:
! 370: =head1 OSNAMES
! 371:
! 372: linux
! 373:
! 374: =head1 SCRIPT CATEGORIES
! 375:
! 376: Packaging/Administrative
! 377:
! 378: =head1 AUTHOR
! 379:
! 380: Scott Harrison
! 381: codeharrison@yahoo.com
! 382:
! 383: Please let me know how/if you are finding this script useful and
! 384: any/all suggestions. -Scott
! 385:
! 386: =cut
! 387:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>