File:  [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.3: download - view: text, annotated - select for diffs
Wed Feb 20 00:21:42 2002 UTC (22 years, 4 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
a cleaner leaner script (major rewrite)

    1: #!/usr/bin/perl
    2: 
    3: # YEAR=2002
    4: # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
    5: #
    6: ###
    7: 
    8: # Read in 2 XML file; first is the filter specification, the second
    9: # is the XML file to be filtered
   10: 
   11: ###############################################################################
   12: ##                                                                           ##
   13: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
   14: ## 1. Notes                                                                  ##
   15: ## 2. Read in filter file                                                    ##
   16: ## 3. Initialize and clear conditions                                        ##
   17: ## 4. Run through and apply clauses                                          ##
   18: ##                                                                           ##
   19: ###############################################################################
   20: 
   21: # ----------------------------------------------------------------------- Notes
   22: #
   23: # This is meant to parse files meeting the xfml document type.
   24: # See xfml.dtd.  XFML=XML Filtering Markup Language.
   25: 
   26: use HTML::TokeParser;
   27: use strict;
   28: 
   29: unless (@ARGV) {
   30:     print <<END;
   31: Incorrect invocation.
   32: Example usages:
   33: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
   34: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
   35: END
   36: }
   37: 
   38: my %eh;
   39: 
   40: # ---------------------------------------------- Read in filter file from @ARGV
   41: my $tofilter=shift @ARGV;
   42: open IN,"<$tofilter"; my @lines=<IN>;
   43: my $parsestring=join('',@lines); undef @lines; close IN;
   44: my $parser = HTML::TokeParser->new(\$parsestring) or
   45:     die('can\'t create TokeParser object');
   46: $parser->xml_mode('1');
   47: 
   48: # --------------------------------------------- initialize and clear conditions
   49: my %conditions; &cc;
   50: 
   51: # Define handling methods for mode-dependent text rendering
   52: $parser->{textify}={
   53:     'xfml' => \&format_xfml,
   54:     'when:name' => \&format_when_name,
   55:     'when:attribute' => \&format_when_attribute,
   56:     'when:cdata' => \&format_when_cdata,
   57:     'choice:exclude' => \&format_choice_exclude,
   58:     'clause' => \&format_clause,
   59:     };
   60: 
   61: my $text;
   62: my $xfml;
   63: my $wloc=0;
   64: my %eha;
   65: 
   66: # ----------------------------------------------- Run through and apply clauses
   67: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
   68: my $lparser = HTML::TokeParser->new(\$output) or
   69:     die('can\'t create TokeParser object');
   70: $lparser->xml_mode('1');
   71: my $parsestring2;
   72: while (my $token = $parser->get_tag('clause')) {
   73:     $parsestring2=$output;
   74:     $lparser = HTML::TokeParser->new(\$parsestring2);
   75:     $lparser->xml_mode('1');
   76:     $output='';
   77:     &format_clause(@{$token});
   78:     $text = $parser->get_text('/clause');
   79:     $token = $parser->get_tag('/clause');
   80: 
   81:     my $token='';
   82:     my $ttype='';
   83:     my $excludeflag=0;
   84:     my $outcache='';
   85:     while ($token = $lparser->get_token()) {
   86: 	if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
   87: 	elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1];	}
   88: 	elsif ($token->[0] eq 'T') {
   89: 	    if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
   90: 		or $ttype eq 'E') {
   91: 		$output.=$token->[1];
   92: 	    }
   93: 	    else {
   94: 		$outcache.=$token->[1];
   95: 	    }
   96: 	}
   97: 	elsif ($token->[0] eq 'S') {
   98: 	    if ($eh{$token->[1]} or $excludeflag==1) {
   99: 		$ttype='';
  100: 		$excludeflag=1;
  101: 		$outcache.=$token->[4];
  102: 	    }
  103: 	    else {
  104: 		$ttype='S';
  105: 		$output.=$token->[4];
  106: 	    }
  107: 	    if ($excludeflag==1) {
  108: 		
  109: 	    }
  110: 	}
  111: 	elsif ($token->[0] eq 'E') {
  112: 	    if ($eh{$token->[1]} and $excludeflag==1) {
  113: 		$ttype='E';
  114: 		$excludeflag=0;
  115: 		$outcache.=$token->[2];
  116: 		my $retval=&evalconditions($outcache);
  117: 		if (&evalconditions($outcache)) {
  118: 		    $output.=$outcache;
  119: 		}
  120: 		else {
  121: 		    $output.='<!-- FILTERED OUT -->';
  122: 		}
  123: 		$outcache='';
  124: 	    }
  125: 	    elsif ($excludeflag==1) {
  126: 		$ttype='';
  127: 		$outcache.=$token->[2];
  128: 	    }
  129: 	    else {
  130: 		$output.=$token->[2];
  131: 		$ttype='E';
  132: 	    }
  133: 	}
  134:     }
  135:     &cc;
  136: }
  137: print $output;
  138: 
  139: # -------------------------------------------------------------- evalconditions
  140: sub evalconditions {
  141:     my ($parsetext)=@_;
  142:     my $eparser = HTML::TokeParser->new(\$parsetext);
  143:     unless (@{$conditions{'name'}} or
  144: 	    @{$conditions{'attribute'}}) {
  145: 	return 0;
  146:     }
  147:     my $nameflag=0;
  148:     my $cdataflag=0;
  149:     my $matchflag=0;
  150:     my $Ttoken='';
  151:     while (my $token = $eparser->get_token()) {
  152: 	if ($token->[0] eq 'S') {
  153: 	    foreach my $name (@{$conditions{'name'}}) {
  154: 		my $flag=0;
  155: 		my $match=$name;
  156: 		if ($match=~/^\!/) {
  157: 		    $match=~s/^\!//g;
  158: 		    $flag=1;
  159: 		}
  160: 		$match=~s/^\///g;
  161: 		$match=~s/\/$//g;
  162: 		if ((!$flag and $token->[1]=~/$match/) or
  163: 		    ($flag and $token->[1]!~/$match/)) {
  164: 		    $nameflag=1;
  165: 		}
  166: 	    }
  167: 	    $Ttoken='';
  168: 	}
  169: 	elsif ($token->[0] eq 'E') {
  170: 	    foreach my $name (@{$conditions{'name'}}) {
  171: 		my $flag=0;
  172: 		my $match=$name;
  173: 		if ($match=~/^\!/) {
  174: 		    $match=~s/^\!//g;
  175: 		    $flag=1;
  176: 		}
  177: 		$match=~s/^\///g;
  178: 		$match=~s/\/$//g;
  179: 		if ((!$flag and $token->[1]=~/$match/) or
  180: 		    ($flag and $token->[1]!~/$match/)) {
  181: 		    foreach my $cdata (@{$conditions{'cdata'}}) {
  182: 			my $flag=0;
  183: 			my $match=$cdata;
  184: 			if ($match=~/^\!/) {
  185: 			    $match=~s/^\!//g;
  186: 			    $flag=1;
  187: 			}
  188: 			$match=~s/^\///g;
  189: 			$match=~s/\/$//g;
  190: 			if ((!$flag and $Ttoken=~/$match/) or
  191: 			    ($flag and $Ttoken!~/$match/)) {
  192: 			    $cdataflag=1;
  193: 			}
  194: 		    }
  195: 		    if (@{$conditions{'cdata'}}) {
  196: 			if ($cdataflag) {
  197: 			    return 0;
  198: 			}
  199: 		    }
  200: 		    else {
  201: 			if ($nameflag) {
  202: 			    return 0;
  203: 			}
  204: 		    }
  205: 		    $nameflag=0;
  206: 		}
  207: 	    }
  208: 	}
  209: 	elsif ($token->[0] eq 'T') {
  210: 	    if ($nameflag) {
  211: 		$Ttoken.=$token->[1];
  212: 	    }
  213: 	}
  214:     }
  215:     return 1;
  216: }
  217: 
  218: # ------------------------------------------------------------ clear conditions
  219: sub cc {
  220:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
  221:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
  222:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
  223:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
  224:     %eh=(1,1); delete $eh{1};
  225: }
  226: 
  227: # --------------------------------------- remove starting and ending whitespace
  228: sub trim {
  229:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
  230: }
  231: 
  232: 
  233: 
  234: 
  235: # --------------------------------------------------------- Format xfml section
  236: sub format_xfml {
  237:     my (@tokeninfo)=@_;
  238:     return '';
  239: }
  240: 
  241: # ------------------------------------------------------- Format clause section
  242: sub format_clause {
  243:     my (@tokeninfo)=@_;
  244:     return '';
  245: }
  246: 
  247: # ---------------------------------------------------- Format when:name section
  248: sub format_when_name {
  249:     my (@tokeninfo)=@_;
  250: #    $wloc++;
  251:     my $att_match=$tokeninfo[2]->{'match'};
  252:     push @{$conditions{'name'}},$att_match;
  253:     my $text=&trim($parser->get_text('/when:name'));
  254:     $parser->get_tag('/when:name');
  255: #    $wloc--;
  256: #    &cc unless $wloc;
  257:     return '';
  258: }
  259: 
  260: # --------------------------------------------------- Format when:cdata section
  261: sub format_when_cdata {
  262:     my (@tokeninfo)=@_;
  263:     $wloc++;
  264:     my $att_match=$tokeninfo[2]->{'match'};
  265:     push @{$conditions{'cdata'}},$att_match;
  266:     my $text=&trim($parser->get_text('/when:cdata'));
  267:     $parser->get_tag('/when:cdata');
  268:     $wloc--;
  269: #    &cc unless $wloc;
  270:     return '';
  271: }
  272: 
  273: # ----------------------------------------------- Format choice:exclude section
  274: sub format_choice_exclude {
  275:     my (@tokeninfo)=@_;
  276:     my $text=&trim($parser->get_text('/choice:exclude'));
  277:     $parser->get_tag('/choice:exclude');
  278:     $eh{$tokeninfo[2]->{'nodename'}}++;
  279:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
  280:          [@{$conditions{'name'}}];
  281:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
  282:          [@{$conditions{'attribute'}}];
  283:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
  284:          [@{$conditions{'value'}}];
  285:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
  286:          [@{$conditions{'cdata'}}];
  287:     return '';
  288: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>