Annotation of loncom/build/xfml_parse.pl, revision 1.1
1.1 ! harris41 1: #!/usr/bin/perl
! 2:
! 3: # YEAR=2002
! 4: # 1/26,1/27,1/28 - Scott Harrison
! 5:
! 6: # Read in 2 XML file; first is the filter specification, the second
! 7: # is the XML file to be filtered
! 8:
! 9: use HTML::TokeParser;
! 10: use strict;
! 11:
! 12: unless (@ARGV) {
! 13: print <<END;
! 14: Incorrect invocation.
! 15: Example usages:
! 16: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
! 17: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
! 18: END
! 19: }
! 20:
! 21: my %eh;
! 22: my %ih;
! 23: my $tofilter=shift @ARGV;
! 24: my @lines=<>; my $parsestring=join('',@lines); undef @lines;
! 25: my $parser = HTML::TokeParser->new(\$parsestring) or
! 26: die('can\'t create TokeParser object');
! 27: $parser->xml_mode('1');
! 28:
! 29: # Define handling methods for mode-dependent text rendering
! 30:
! 31: my %conditions; &cc;
! 32:
! 33: $parser->{textify}={
! 34: xfml => \&format_xfml,
! 35: 'when:name' => \&format_when_name,
! 36: 'when:attribute' => \&format_when_attribute,
! 37: 'when:cdata' => \&format_when_cdata,
! 38: 'choice:include' => \&format_choice_include,
! 39: 'choice:exclude' => \&format_choice_exclude,
! 40: };
! 41:
! 42: my $text;
! 43: my $xfml;
! 44: my $wloc=0;
! 45: my %eha;
! 46:
! 47: while (my $token = $parser->get_tag('xfml')) {
! 48: &format_xfml(@{$token});
! 49: $text = $parser->get_text('/xfml');
! 50: # print $xfml;
! 51: # print $text;
! 52: $token = $parser->get_tag('/xfml');
! 53: }
! 54:
! 55: open IN,"<$tofilter";
! 56: my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2;
! 57: $parser = HTML::TokeParser->new(\$parsestring2) or
! 58: die('can\'t create TokeParser object');
! 59: $parser->xml_mode('1');
! 60:
! 61: my $token;
! 62: my $hloc=0;
! 63: my %ts;
! 64: my $tr;
! 65: my $echild=0;
! 66: my $exclude=0;
! 67: my $excluden=0;
! 68: my $excludea=0;
! 69: my $et=0;
! 70: my $cdata='';
! 71: while ($token = $parser->get_token()) {
! 72: # from HTML::TokeParser documentation:
! 73: # ["S", $tag, %$attr, @$attrseq, $text]
! 74: # ["E", $tag, $text]
! 75: # ["T", $text, $is_data]
! 76: # ["C", $text]
! 77: # ["D", $text]
! 78: # ["PI", $token0, $text]
! 79: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
! 80: # @{$conditions{'name'}};
! 81: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
! 82: # @{$conditions{'attribute'}};
! 83: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
! 84: # @{$conditions{'value'}};
! 85: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
! 86: # @{$conditions{'cdata'}};
! 87: if ($token->[0] eq 'D') {
! 88: print $token->[1];
! 89: }
! 90: elsif ($token->[0] eq 'C') {
! 91: print $token->[1];
! 92: }
! 93: elsif ($token->[0] eq 'S') {
! 94: $cdata='';
! 95: $hloc++;
! 96: # if token can be excluded, then pretend it is until all conditions are
! 97: # run (eha); then output during end tag processing
! 98: # else, output
! 99:
! 100: # a token can be excluded when it is an eh key, or a child node of
! 101: # an eh key
! 102:
! 103: if ($eh{$token->[1]}) {
! 104: $echild=$token->[1];
! 105: # print "ECHILD=$echild\n";
! 106: }
! 107: if ($echild) {
! 108: # run through names for echild
! 109: # then attributes and/or values and/or cdata
! 110: my $name=$token->[1];
! 111: my @attributes=@{$token->[3]};
! 112: my %atthash=%{$token->[2]};
! 113: foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
! 114: foreach my $namematch (@{$namemlist}) {
! 115: my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
! 116: if ($name=~/$nm/) {
! 117: # print "NMATCH: $nm ($name)\n";
! 118: $excluden++;
! 119: foreach my $attributemlist
! 120: (@{$eha{$echild}->{'attribute'}}) {
! 121: foreach my $attributematch
! 122: (@{$attributemlist}) {
! 123: my ($an,$am)=
! 124: split(/\=/,$attributematch,2);
! 125: $am=~s/^.//;
! 126: $am=~s/.$//;
! 127: # print 'AM:'."($an,$am)\t";
! 128: # print 'ATT:'.join(',',%atthash)."\n";
! 129: if ($atthash{$an}) {
! 130: if ($atthash{$an}=~/$am/) {
! 131: $excludea++;
! 132: # print "AMATCH: $am (".
! 133: # join(',',
! 134: # @attributes)
! 135: # ."\n";
! 136: }
! 137: }
! 138: }
! 139: }
! 140: }
! 141: }
! 142: }
! 143: $tr.=$token->[4];
! 144: }
! 145: else {
! 146: print $token->[4];
! 147: }
! 148: }
! 149: elsif ($token->[0] eq 'E') {
! 150: if ($echild) {
! 151: $tr.=$token->[2];
! 152: if ($excluden) {
! 153: foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
! 154: foreach my $cdatamatch (@{$cdatamlist}) {
! 155: # print "CDATA: $cdatamatch, $cdata\n";
! 156: my $cm=$cdatamatch;
! 157: my $not=0;
! 158: if ($cm=~/\!/) {
! 159: $not=1;
! 160: $cm=~s/^.//;
! 161: }
! 162: $cm=~s/^.//; $cm=~s/.$//;
! 163: if ((!$not and $cdata!~/$cm/)
! 164: or ($not and $cdata=~/$cm/)) {
! 165: # print "CMISMATCH: $cm ($cdata)\n";
! 166: }
! 167: elsif (($not and $cdata!~/$cm/)
! 168: or (!$not and $cdata=~/$cm/)) {
! 169: $exclude++;
! 170: }
! 171: }
! 172: }
! 173: }
! 174: }
! 175: if ($eh{$token->[1]}) {
! 176: $echild=0;
! 177: if (!$exclude and !$excludea) {
! 178: print $tr;
! 179: # print $token->[2];
! 180: $tr='';
! 181: }
! 182: elsif ($exclude>0 or $excludea>0) {
! 183: # print "EXCLUDING $token->[1] $excludea $excluden\n";
! 184: $exclude=0; $excluden=0; $excludea=0;
! 185: $tr='';
! 186: }
! 187: $exclude=0; $excluden=0; $excludea=0;
! 188: }
! 189: else {
! 190: if ($echild) {
! 191: # $tr.=$token->[2];
! 192: }
! 193: else {
! 194: print $token->[2];
! 195: $tr='';
! 196: }
! 197: }
! 198: $hloc--;
! 199: }
! 200: elsif ($token->[0] eq 'T') {
! 201: if ($echild) {
! 202: $tr.=$token->[1];
! 203: $cdata=$token->[1];
! 204: }
! 205: else {
! 206: print $token->[1];
! 207: $tr='';
! 208: }
! 209: }
! 210: }
! 211:
! 212: # ------------------------------------------------------------ clear conditions
! 213: sub cc {
! 214: @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
! 215: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
! 216: @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
! 217: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
! 218: }
! 219:
! 220: # --------------------------------------- remove starting and ending whitespace
! 221: sub trim {
! 222: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
! 223: }
! 224:
! 225: # --------------------------------------------------------- Format xfml section
! 226: sub format_xfml {
! 227: my (@tokeninfo)=@_;
! 228: return '';
! 229: }
! 230:
! 231: # ---------------------------------------------------- Format when:name section
! 232: sub format_when_name {
! 233: my (@tokeninfo)=@_;
! 234: $wloc++;
! 235: my $att_match=$tokeninfo[2]->{'match'};
! 236: push @{$conditions{'name'}},$att_match;
! 237: my $text=&trim($parser->get_text('/when:name'));
! 238: $parser->get_tag('/when:name');
! 239: # print 'Name Matching...'.$att_match;
! 240: $wloc--;
! 241: &cc unless $wloc;
! 242: return '';
! 243: }
! 244:
! 245: # ----------------------------------------------- Format when:attribute section
! 246: sub format_when_attribute {
! 247: my (@tokeninfo)=@_;
! 248: $wloc++;
! 249: my $att_match=$tokeninfo[2]->{'match'};
! 250: push @{$conditions{'attribute'}},$att_match;
! 251: my $text=&trim($parser->get_text('/when:attribute'));
! 252: $parser->get_tag('/when:attribute');
! 253: # print 'Attribute Matching...'.$att_match;
! 254: $wloc--;
! 255: &cc unless $wloc;
! 256: return '';
! 257: }
! 258:
! 259: # --------------------------------------------------- Format when:cdata section
! 260: sub format_when_cdata {
! 261: my (@tokeninfo)=@_;
! 262: $wloc++;
! 263: my $att_match=$tokeninfo[2]->{'match'};
! 264: # print 'Cdata Matching...'.$att_match;
! 265: push @{$conditions{'cdata'}},$att_match;
! 266: my $text=&trim($parser->get_text('/when:cdata'));
! 267: $parser->get_tag('/when:cdata');
! 268: $wloc--;
! 269: &cc unless $wloc;
! 270: return '';
! 271: }
! 272:
! 273: # ----------------------------------------------- Format choice:include section
! 274: sub format_choice_include {
! 275: my (@tokeninfo)=@_;
! 276: my $text=&trim($parser->get_text('/choice:include'));
! 277: $parser->get_tag('/choice:include');
! 278: $ih{$tokeninfo[2]->{'match'}}++;
! 279: return '';
! 280: }
! 281:
! 282: # ----------------------------------------------- Format choice:exclude section
! 283: sub format_choice_exclude {
! 284: my (@tokeninfo)=@_;
! 285: my $text=&trim($parser->get_text('/choice:exclude'));
! 286: $parser->get_tag('/choice:exclude');
! 287: $eh{$tokeninfo[2]->{'nodename'}}++;
! 288: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
! 289: [@{$conditions{'name'}}];
! 290: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
! 291: [@{$conditions{'attribute'}}];
! 292: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
! 293: [@{$conditions{'value'}}];
! 294: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
! 295: [@{$conditions{'cdata'}}];
! 296: return '';
! 297: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>