Annotation of loncom/build/xfml_parse.pl, revision 1.3
1.1 harris41 1: #!/usr/bin/perl
2:
3: # YEAR=2002
1.2 harris41 4: # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
5: #
6: ###
1.1 harris41 7:
8: # Read in 2 XML file; first is the filter specification, the second
9: # is the XML file to be filtered
10:
1.2 harris41 11: ###############################################################################
12: ## ##
13: ## ORGANIZATION OF THIS PERL SCRIPT ##
14: ## 1. Notes ##
1.3 ! harris41 15: ## 2. Read in filter file ##
! 16: ## 3. Initialize and clear conditions ##
! 17: ## 4. Run through and apply clauses ##
1.2 harris41 18: ## ##
19: ###############################################################################
20:
21: # ----------------------------------------------------------------------- Notes
22: #
1.3 ! harris41 23: # This is meant to parse files meeting the xfml document type.
1.2 harris41 24: # See xfml.dtd. XFML=XML Filtering Markup Language.
25:
1.1 harris41 26: use HTML::TokeParser;
27: use strict;
28:
29: unless (@ARGV) {
30: print <<END;
31: Incorrect invocation.
32: Example usages:
33: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
34: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
35: END
36: }
37:
38: my %eh;
1.3 ! harris41 39:
! 40: # ---------------------------------------------- Read in filter file from @ARGV
1.1 harris41 41: my $tofilter=shift @ARGV;
1.3 ! harris41 42: open IN,"<$tofilter"; my @lines=<IN>;
! 43: my $parsestring=join('',@lines); undef @lines; close IN;
1.1 harris41 44: my $parser = HTML::TokeParser->new(\$parsestring) or
45: die('can\'t create TokeParser object');
46: $parser->xml_mode('1');
47:
1.3 ! harris41 48: # --------------------------------------------- initialize and clear conditions
1.1 harris41 49: my %conditions; &cc;
50:
1.3 ! harris41 51: # Define handling methods for mode-dependent text rendering
1.1 harris41 52: $parser->{textify}={
1.3 ! harris41 53: 'xfml' => \&format_xfml,
1.1 harris41 54: 'when:name' => \&format_when_name,
55: 'when:attribute' => \&format_when_attribute,
56: 'when:cdata' => \&format_when_cdata,
57: 'choice:exclude' => \&format_choice_exclude,
1.3 ! harris41 58: 'clause' => \&format_clause,
1.1 harris41 59: };
60:
61: my $text;
62: my $xfml;
63: my $wloc=0;
64: my %eha;
65:
1.3 ! harris41 66: # ----------------------------------------------- Run through and apply clauses
! 67: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
! 68: my $lparser = HTML::TokeParser->new(\$output) or
! 69: die('can\'t create TokeParser object');
! 70: $lparser->xml_mode('1');
! 71: my $parsestring2;
! 72: while (my $token = $parser->get_tag('clause')) {
! 73: $parsestring2=$output;
! 74: $lparser = HTML::TokeParser->new(\$parsestring2);
! 75: $lparser->xml_mode('1');
! 76: $output='';
! 77: &format_clause(@{$token});
! 78: $text = $parser->get_text('/clause');
! 79: $token = $parser->get_tag('/clause');
! 80:
! 81: my $token='';
! 82: my $ttype='';
! 83: my $excludeflag=0;
! 84: my $outcache='';
! 85: while ($token = $lparser->get_token()) {
! 86: if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
! 87: elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; }
! 88: elsif ($token->[0] eq 'T') {
! 89: if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
! 90: or $ttype eq 'E') {
! 91: $output.=$token->[1];
! 92: }
! 93: else {
! 94: $outcache.=$token->[1];
! 95: }
! 96: }
! 97: elsif ($token->[0] eq 'S') {
! 98: if ($eh{$token->[1]} or $excludeflag==1) {
! 99: $ttype='';
! 100: $excludeflag=1;
! 101: $outcache.=$token->[4];
! 102: }
! 103: else {
! 104: $ttype='S';
! 105: $output.=$token->[4];
! 106: }
! 107: if ($excludeflag==1) {
! 108:
! 109: }
! 110: }
! 111: elsif ($token->[0] eq 'E') {
! 112: if ($eh{$token->[1]} and $excludeflag==1) {
! 113: $ttype='E';
! 114: $excludeflag=0;
! 115: $outcache.=$token->[2];
! 116: my $retval=&evalconditions($outcache);
! 117: if (&evalconditions($outcache)) {
! 118: $output.=$outcache;
! 119: }
! 120: else {
! 121: $output.='<!-- FILTERED OUT -->';
! 122: }
! 123: $outcache='';
! 124: }
! 125: elsif ($excludeflag==1) {
! 126: $ttype='';
! 127: $outcache.=$token->[2];
! 128: }
! 129: else {
! 130: $output.=$token->[2];
! 131: $ttype='E';
! 132: }
! 133: }
! 134: }
! 135: &cc;
1.1 harris41 136: }
1.3 ! harris41 137: print $output;
1.1 harris41 138:
1.3 ! harris41 139: # -------------------------------------------------------------- evalconditions
! 140: sub evalconditions {
! 141: my ($parsetext)=@_;
! 142: my $eparser = HTML::TokeParser->new(\$parsetext);
! 143: unless (@{$conditions{'name'}} or
! 144: @{$conditions{'attribute'}}) {
! 145: return 0;
1.1 harris41 146: }
1.3 ! harris41 147: my $nameflag=0;
! 148: my $cdataflag=0;
! 149: my $matchflag=0;
! 150: my $Ttoken='';
! 151: while (my $token = $eparser->get_token()) {
! 152: if ($token->[0] eq 'S') {
! 153: foreach my $name (@{$conditions{'name'}}) {
! 154: my $flag=0;
! 155: my $match=$name;
! 156: if ($match=~/^\!/) {
! 157: $match=~s/^\!//g;
! 158: $flag=1;
! 159: }
! 160: $match=~s/^\///g;
! 161: $match=~s/\/$//g;
! 162: if ((!$flag and $token->[1]=~/$match/) or
! 163: ($flag and $token->[1]!~/$match/)) {
! 164: $nameflag=1;
1.1 harris41 165: }
166: }
1.3 ! harris41 167: $Ttoken='';
1.1 harris41 168: }
1.3 ! harris41 169: elsif ($token->[0] eq 'E') {
! 170: foreach my $name (@{$conditions{'name'}}) {
! 171: my $flag=0;
! 172: my $match=$name;
! 173: if ($match=~/^\!/) {
! 174: $match=~s/^\!//g;
! 175: $flag=1;
! 176: }
! 177: $match=~s/^\///g;
! 178: $match=~s/\/$//g;
! 179: if ((!$flag and $token->[1]=~/$match/) or
! 180: ($flag and $token->[1]!~/$match/)) {
! 181: foreach my $cdata (@{$conditions{'cdata'}}) {
! 182: my $flag=0;
! 183: my $match=$cdata;
! 184: if ($match=~/^\!/) {
! 185: $match=~s/^\!//g;
! 186: $flag=1;
1.1 harris41 187: }
1.3 ! harris41 188: $match=~s/^\///g;
! 189: $match=~s/\/$//g;
! 190: if ((!$flag and $Ttoken=~/$match/) or
! 191: ($flag and $Ttoken!~/$match/)) {
! 192: $cdataflag=1;
1.2 harris41 193: }
1.3 ! harris41 194: }
! 195: if (@{$conditions{'cdata'}}) {
! 196: if ($cdataflag) {
! 197: return 0;
1.1 harris41 198: }
1.3 ! harris41 199: }
! 200: else {
! 201: if ($nameflag) {
! 202: return 0;
1.1 harris41 203: }
204: }
1.3 ! harris41 205: $nameflag=0;
1.1 harris41 206: }
207: }
208: }
1.3 ! harris41 209: elsif ($token->[0] eq 'T') {
! 210: if ($nameflag) {
! 211: $Ttoken.=$token->[1];
1.1 harris41 212: }
213: }
214: }
1.3 ! harris41 215: return 1;
1.1 harris41 216: }
217:
218: # ------------------------------------------------------------ clear conditions
219: sub cc {
220: @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
221: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
222: @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
223: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
1.3 ! harris41 224: %eh=(1,1); delete $eh{1};
1.1 harris41 225: }
226:
227: # --------------------------------------- remove starting and ending whitespace
228: sub trim {
229: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
230: }
231:
1.2 harris41 232:
233:
1.3 ! harris41 234:
1.1 harris41 235: # --------------------------------------------------------- Format xfml section
236: sub format_xfml {
237: my (@tokeninfo)=@_;
238: return '';
239: }
240:
1.3 ! harris41 241: # ------------------------------------------------------- Format clause section
! 242: sub format_clause {
! 243: my (@tokeninfo)=@_;
! 244: return '';
! 245: }
! 246:
1.1 harris41 247: # ---------------------------------------------------- Format when:name section
248: sub format_when_name {
249: my (@tokeninfo)=@_;
1.3 ! harris41 250: # $wloc++;
1.1 harris41 251: my $att_match=$tokeninfo[2]->{'match'};
252: push @{$conditions{'name'}},$att_match;
253: my $text=&trim($parser->get_text('/when:name'));
254: $parser->get_tag('/when:name');
1.3 ! harris41 255: # $wloc--;
! 256: # &cc unless $wloc;
1.1 harris41 257: return '';
258: }
259:
260: # --------------------------------------------------- Format when:cdata section
261: sub format_when_cdata {
262: my (@tokeninfo)=@_;
263: $wloc++;
264: my $att_match=$tokeninfo[2]->{'match'};
265: push @{$conditions{'cdata'}},$att_match;
266: my $text=&trim($parser->get_text('/when:cdata'));
267: $parser->get_tag('/when:cdata');
268: $wloc--;
1.3 ! harris41 269: # &cc unless $wloc;
1.1 harris41 270: return '';
271: }
272:
273: # ----------------------------------------------- Format choice:exclude section
274: sub format_choice_exclude {
275: my (@tokeninfo)=@_;
276: my $text=&trim($parser->get_text('/choice:exclude'));
277: $parser->get_tag('/choice:exclude');
278: $eh{$tokeninfo[2]->{'nodename'}}++;
279: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
280: [@{$conditions{'name'}}];
281: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
282: [@{$conditions{'attribute'}}];
283: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
284: [@{$conditions{'value'}}];
285: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
286: [@{$conditions{'cdata'}}];
287: return '';
288: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>