1: #!/usr/bin/perl
2:
3: # YEAR=2002
4: # 1/26,1/27,1/28 - Scott Harrison
5:
6: # Read in 2 XML file; first is the filter specification, the second
7: # is the XML file to be filtered
8:
9: use HTML::TokeParser;
10: use strict;
11:
12: unless (@ARGV) {
13: print <<END;
14: Incorrect invocation.
15: Example usages:
16: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
17: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
18: END
19: }
20:
21: my %eh;
22: my %ih;
23: my $tofilter=shift @ARGV;
24: my @lines=<>; my $parsestring=join('',@lines); undef @lines;
25: my $parser = HTML::TokeParser->new(\$parsestring) or
26: die('can\'t create TokeParser object');
27: $parser->xml_mode('1');
28:
29: # Define handling methods for mode-dependent text rendering
30:
31: my %conditions; &cc;
32:
33: $parser->{textify}={
34: xfml => \&format_xfml,
35: 'when:name' => \&format_when_name,
36: 'when:attribute' => \&format_when_attribute,
37: 'when:cdata' => \&format_when_cdata,
38: 'choice:include' => \&format_choice_include,
39: 'choice:exclude' => \&format_choice_exclude,
40: };
41:
42: my $text;
43: my $xfml;
44: my $wloc=0;
45: my %eha;
46:
47: while (my $token = $parser->get_tag('xfml')) {
48: &format_xfml(@{$token});
49: $text = $parser->get_text('/xfml');
50: # print $xfml;
51: # print $text;
52: $token = $parser->get_tag('/xfml');
53: }
54:
55: open IN,"<$tofilter";
56: my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2;
57: $parser = HTML::TokeParser->new(\$parsestring2) or
58: die('can\'t create TokeParser object');
59: $parser->xml_mode('1');
60:
61: my $token;
62: my $hloc=0;
63: my %ts;
64: my $tr;
65: my $echild=0;
66: my $exclude=0;
67: my $excluden=0;
68: my $excludea=0;
69: my $et=0;
70: my $cdata='';
71: while ($token = $parser->get_token()) {
72: # from HTML::TokeParser documentation:
73: # ["S", $tag, %$attr, @$attrseq, $text]
74: # ["E", $tag, $text]
75: # ["T", $text, $is_data]
76: # ["C", $text]
77: # ["D", $text]
78: # ["PI", $token0, $text]
79: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
80: # @{$conditions{'name'}};
81: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
82: # @{$conditions{'attribute'}};
83: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
84: # @{$conditions{'value'}};
85: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
86: # @{$conditions{'cdata'}};
87: if ($token->[0] eq 'D') {
88: print $token->[1];
89: }
90: elsif ($token->[0] eq 'C') {
91: print $token->[1];
92: }
93: elsif ($token->[0] eq 'S') {
94: $cdata='';
95: $hloc++;
96: # if token can be excluded, then pretend it is until all conditions are
97: # run (eha); then output during end tag processing
98: # else, output
99:
100: # a token can be excluded when it is an eh key, or a child node of
101: # an eh key
102:
103: if ($eh{$token->[1]}) {
104: $echild=$token->[1];
105: # print "ECHILD=$echild\n";
106: }
107: if ($echild) {
108: # run through names for echild
109: # then attributes and/or values and/or cdata
110: my $name=$token->[1];
111: my @attributes=@{$token->[3]};
112: my %atthash=%{$token->[2]};
113: foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
114: foreach my $namematch (@{$namemlist}) {
115: my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
116: if ($name=~/$nm/) {
117: # print "NMATCH: $nm ($name)\n";
118: $excluden++;
119: foreach my $attributemlist
120: (@{$eha{$echild}->{'attribute'}}) {
121: foreach my $attributematch
122: (@{$attributemlist}) {
123: my ($an,$am)=
124: split(/\=/,$attributematch,2);
125: $am=~s/^.//;
126: $am=~s/.$//;
127: # print 'AM:'."($an,$am)\t";
128: # print 'ATT:'.join(',',%atthash)."\n";
129: if ($atthash{$an}) {
130: if ($atthash{$an}=~/$am/) {
131: $excludea++;
132: # print "AMATCH: $am (".
133: # join(',',
134: # @attributes)
135: # ."\n";
136: }
137: }
138: }
139: }
140: }
141: }
142: }
143: $tr.=$token->[4];
144: }
145: else {
146: print $token->[4];
147: }
148: }
149: elsif ($token->[0] eq 'E') {
150: if ($echild) {
151: $tr.=$token->[2];
152: if ($excluden) {
153: foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
154: foreach my $cdatamatch (@{$cdatamlist}) {
155: # print "CDATA: $cdatamatch, $cdata\n";
156: my $cm=$cdatamatch;
157: my $not=0;
158: if ($cm=~/\!/) {
159: $not=1;
160: $cm=~s/^.//;
161: }
162: $cm=~s/^.//; $cm=~s/.$//;
163: if ((!$not and $cdata!~/$cm/)
164: or ($not and $cdata=~/$cm/)) {
165: # print "CMISMATCH: $cm ($cdata)\n";
166: }
167: elsif (($not and $cdata!~/$cm/)
168: or (!$not and $cdata=~/$cm/)) {
169: $exclude++;
170: }
171: }
172: }
173: }
174: }
175: if ($eh{$token->[1]}) {
176: $echild=0;
177: if (!$exclude and !$excludea) {
178: print $tr;
179: # print $token->[2];
180: $tr='';
181: }
182: elsif ($exclude>0 or $excludea>0) {
183: # print "EXCLUDING $token->[1] $excludea $excluden\n";
184: $exclude=0; $excluden=0; $excludea=0;
185: $tr='';
186: }
187: $exclude=0; $excluden=0; $excludea=0;
188: }
189: else {
190: if ($echild) {
191: # $tr.=$token->[2];
192: }
193: else {
194: print $token->[2];
195: $tr='';
196: }
197: }
198: $hloc--;
199: }
200: elsif ($token->[0] eq 'T') {
201: if ($echild) {
202: $tr.=$token->[1];
203: $cdata=$token->[1];
204: }
205: else {
206: print $token->[1];
207: $tr='';
208: }
209: }
210: }
211:
212: # ------------------------------------------------------------ clear conditions
213: sub cc {
214: @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
215: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
216: @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
217: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
218: }
219:
220: # --------------------------------------- remove starting and ending whitespace
221: sub trim {
222: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
223: }
224:
225: # --------------------------------------------------------- Format xfml section
226: sub format_xfml {
227: my (@tokeninfo)=@_;
228: return '';
229: }
230:
231: # ---------------------------------------------------- Format when:name section
232: sub format_when_name {
233: my (@tokeninfo)=@_;
234: $wloc++;
235: my $att_match=$tokeninfo[2]->{'match'};
236: push @{$conditions{'name'}},$att_match;
237: my $text=&trim($parser->get_text('/when:name'));
238: $parser->get_tag('/when:name');
239: # print 'Name Matching...'.$att_match;
240: $wloc--;
241: &cc unless $wloc;
242: return '';
243: }
244:
245: # ----------------------------------------------- Format when:attribute section
246: sub format_when_attribute {
247: my (@tokeninfo)=@_;
248: $wloc++;
249: my $att_match=$tokeninfo[2]->{'match'};
250: push @{$conditions{'attribute'}},$att_match;
251: my $text=&trim($parser->get_text('/when:attribute'));
252: $parser->get_tag('/when:attribute');
253: # print 'Attribute Matching...'.$att_match;
254: $wloc--;
255: &cc unless $wloc;
256: return '';
257: }
258:
259: # --------------------------------------------------- Format when:cdata section
260: sub format_when_cdata {
261: my (@tokeninfo)=@_;
262: $wloc++;
263: my $att_match=$tokeninfo[2]->{'match'};
264: # print 'Cdata Matching...'.$att_match;
265: push @{$conditions{'cdata'}},$att_match;
266: my $text=&trim($parser->get_text('/when:cdata'));
267: $parser->get_tag('/when:cdata');
268: $wloc--;
269: &cc unless $wloc;
270: return '';
271: }
272:
273: # ----------------------------------------------- Format choice:include section
274: sub format_choice_include {
275: my (@tokeninfo)=@_;
276: my $text=&trim($parser->get_text('/choice:include'));
277: $parser->get_tag('/choice:include');
278: $ih{$tokeninfo[2]->{'match'}}++;
279: return '';
280: }
281:
282: # ----------------------------------------------- Format choice:exclude section
283: sub format_choice_exclude {
284: my (@tokeninfo)=@_;
285: my $text=&trim($parser->get_text('/choice:exclude'));
286: $parser->get_tag('/choice:exclude');
287: $eh{$tokeninfo[2]->{'nodename'}}++;
288: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
289: [@{$conditions{'name'}}];
290: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
291: [@{$conditions{'attribute'}}];
292: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
293: [@{$conditions{'value'}}];
294: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
295: [@{$conditions{'cdata'}}];
296: return '';
297: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>