1: #!/usr/bin/perl
2:
3: # YEAR=2002
4: # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
5: #
6: ###
7:
8: # Read in 2 XML file; first is the filter specification, the second
9: # is the XML file to be filtered
10:
11: ###############################################################################
12: ## ##
13: ## ORGANIZATION OF THIS PERL SCRIPT ##
14: ## 1. Notes ##
15: ## 2. Read in filter file ##
16: ## 3. Initialize and clear conditions ##
17: ## 4. Run through and apply clauses ##
18: ## ##
19: ###############################################################################
20:
21: # ----------------------------------------------------------------------- Notes
22: #
23: # This is meant to parse files meeting the xfml document type.
24: # See xfml.dtd. XFML=XML Filtering Markup Language.
25:
26: use HTML::TokeParser;
27: use strict;
28:
29: unless (@ARGV) {
30: print <<END;
31: Incorrect invocation.
32: Example usages:
33: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
34: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
35: END
36: }
37:
38: my %eh;
39:
40: # ---------------------------------------------- Read in filter file from @ARGV
41: my $tofilter=shift @ARGV;
42: open IN,"<$tofilter"; my @lines=<IN>;
43: my $parsestring=join('',@lines); undef @lines; close IN;
44: my $parser = HTML::TokeParser->new(\$parsestring) or
45: die('can\'t create TokeParser object');
46: $parser->xml_mode('1');
47:
48: # --------------------------------------------- initialize and clear conditions
49: my %conditions; &cc;
50:
51: # Define handling methods for mode-dependent text rendering
52: $parser->{textify}={
53: 'xfml' => \&format_xfml,
54: 'when:name' => \&format_when_name,
55: 'when:attribute' => \&format_when_attribute,
56: 'when:cdata' => \&format_when_cdata,
57: 'choice:exclude' => \&format_choice_exclude,
58: 'clause' => \&format_clause,
59: };
60:
61: my $text;
62: my $xfml;
63: my $wloc=0;
64: my %eha;
65:
66: # ----------------------------------------------- Run through and apply clauses
67: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
68: my $lparser = HTML::TokeParser->new(\$output) or
69: die('can\'t create TokeParser object');
70: $lparser->xml_mode('1');
71: my $parsestring2;
72: while (my $token = $parser->get_tag('clause')) {
73: $parsestring2=$output;
74: $lparser = HTML::TokeParser->new(\$parsestring2);
75: $lparser->xml_mode('1');
76: $output='';
77: &format_clause(@{$token});
78: $text = $parser->get_text('/clause');
79: $token = $parser->get_tag('/clause');
80:
81: my $token='';
82: my $ttype='';
83: my $excludeflag=0;
84: my $outcache='';
85: while ($token = $lparser->get_token()) {
86: if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
87: elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; }
88: elsif ($token->[0] eq 'T') {
89: if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
90: or $ttype eq 'E') {
91: $output.=$token->[1];
92: }
93: else {
94: $outcache.=$token->[1];
95: }
96: }
97: elsif ($token->[0] eq 'S') {
98: if ($eh{$token->[1]} or $excludeflag==1) {
99: $ttype='';
100: $excludeflag=1;
101: $outcache.=$token->[4];
102: }
103: else {
104: $ttype='S';
105: $output.=$token->[4];
106: }
107: if ($excludeflag==1) {
108:
109: }
110: }
111: elsif ($token->[0] eq 'E') {
112: if ($eh{$token->[1]} and $excludeflag==1) {
113: $ttype='E';
114: $excludeflag=0;
115: $outcache.=$token->[2];
116: my $retval=&evalconditions($outcache);
117: if (&evalconditions($outcache)) {
118: $output.=$outcache;
119: }
120: else {
121: $output.='<!-- FILTERED OUT -->';
122: }
123: $outcache='';
124: }
125: elsif ($excludeflag==1) {
126: $ttype='';
127: $outcache.=$token->[2];
128: }
129: else {
130: $output.=$token->[2];
131: $ttype='E';
132: }
133: }
134: }
135: &cc;
136: }
137: print $output;
138:
139: # -------------------------------------------------------------- evalconditions
140: sub evalconditions {
141: my ($parsetext)=@_;
142: my $eparser = HTML::TokeParser->new(\$parsetext);
143: unless (@{$conditions{'name'}} or
144: @{$conditions{'attribute'}}) {
145: return 0;
146: }
147: my $nameflag=0;
148: my $cdataflag=0;
149: my $matchflag=0;
150: my $Ttoken='';
151: while (my $token = $eparser->get_token()) {
152: if ($token->[0] eq 'S') {
153: foreach my $name (@{$conditions{'name'}}) {
154: my $flag=0;
155: my $match=$name;
156: if ($match=~/^\!/) {
157: $match=~s/^\!//g;
158: $flag=1;
159: }
160: $match=~s/^\///g;
161: $match=~s/\/$//g;
162: if ((!$flag and $token->[1]=~/$match/) or
163: ($flag and $token->[1]!~/$match/)) {
164: $nameflag=1;
165: }
166: }
167: $Ttoken='';
168: }
169: elsif ($token->[0] eq 'E') {
170: foreach my $name (@{$conditions{'name'}}) {
171: my $flag=0;
172: my $match=$name;
173: if ($match=~/^\!/) {
174: $match=~s/^\!//g;
175: $flag=1;
176: }
177: $match=~s/^\///g;
178: $match=~s/\/$//g;
179: if ((!$flag and $token->[1]=~/$match/) or
180: ($flag and $token->[1]!~/$match/)) {
181: foreach my $cdata (@{$conditions{'cdata'}}) {
182: my $flag=0;
183: my $match=$cdata;
184: if ($match=~/^\!/) {
185: $match=~s/^\!//g;
186: $flag=1;
187: }
188: $match=~s/^\///g;
189: $match=~s/\/$//g;
190: if ((!$flag and $Ttoken=~/$match/) or
191: ($flag and $Ttoken!~/$match/)) {
192: $cdataflag=1;
193: }
194: }
195: if (@{$conditions{'cdata'}}) {
196: if ($cdataflag) {
197: return 0;
198: }
199: }
200: else {
201: if ($nameflag) {
202: return 0;
203: }
204: }
205: $nameflag=0;
206: }
207: }
208: }
209: elsif ($token->[0] eq 'T') {
210: if ($nameflag) {
211: $Ttoken.=$token->[1];
212: }
213: }
214: }
215: return 1;
216: }
217:
218: # ------------------------------------------------------------ clear conditions
219: sub cc {
220: @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
221: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
222: @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
223: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
224: %eh=(1,1); delete $eh{1};
225: }
226:
227: # --------------------------------------- remove starting and ending whitespace
228: sub trim {
229: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
230: }
231:
232:
233:
234:
235: # --------------------------------------------------------- Format xfml section
236: sub format_xfml {
237: my (@tokeninfo)=@_;
238: return '';
239: }
240:
241: # ------------------------------------------------------- Format clause section
242: sub format_clause {
243: my (@tokeninfo)=@_;
244: return '';
245: }
246:
247: # ---------------------------------------------------- Format when:name section
248: sub format_when_name {
249: my (@tokeninfo)=@_;
250: # $wloc++;
251: my $att_match=$tokeninfo[2]->{'match'};
252: push @{$conditions{'name'}},$att_match;
253: my $text=&trim($parser->get_text('/when:name'));
254: $parser->get_tag('/when:name');
255: # $wloc--;
256: # &cc unless $wloc;
257: return '';
258: }
259:
260: # --------------------------------------------------- Format when:cdata section
261: sub format_when_cdata {
262: my (@tokeninfo)=@_;
263: $wloc++;
264: my $att_match=$tokeninfo[2]->{'match'};
265: push @{$conditions{'cdata'}},$att_match;
266: my $text=&trim($parser->get_text('/when:cdata'));
267: $parser->get_tag('/when:cdata');
268: $wloc--;
269: # &cc unless $wloc;
270: return '';
271: }
272:
273: # ----------------------------------------------- Format choice:exclude section
274: sub format_choice_exclude {
275: my (@tokeninfo)=@_;
276: my $text=&trim($parser->get_text('/choice:exclude'));
277: $parser->get_tag('/choice:exclude');
278: $eh{$tokeninfo[2]->{'nodename'}}++;
279: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
280: [@{$conditions{'name'}}];
281: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
282: [@{$conditions{'attribute'}}];
283: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
284: [@{$conditions{'value'}}];
285: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
286: [@{$conditions{'cdata'}}];
287: return '';
288: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>