Annotation of loncom/build/xfml_parse.pl, revision 1.6
1.1 harris41 1: #!/usr/bin/perl
2:
1.4 harris41 3: # -------------------------------------------------------- Documentation notice
4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
5: # documentation internalized in this program.
6:
7: # --------------------------------------------------------- License Information
8: # The LearningOnline Network with CAPA
9: # piml_parse.pl - Linux Packaging Markup Language parser
10: #
1.6 ! matthew 11: # $Id: xfml_parse.pl,v 1.5 2002/04/08 12:51:03 harris41 Exp $
1.4 harris41 12: #
13: # Written by Scott Harrison, codeharrison@yahoo.com
14: #
15: # Copyright Michigan State University Board of Trustees
16: #
17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
18: #
19: # LON-CAPA is free software; you can redistribute it and/or modify
20: # it under the terms of the GNU General Public License as published by
21: # the Free Software Foundation; either version 2 of the License, or
22: # (at your option) any later version.
23: #
24: # LON-CAPA is distributed in the hope that it will be useful,
25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27: # GNU General Public License for more details.
28: #
29: # You should have received a copy of the GNU General Public License
30: # along with LON-CAPA; if not, write to the Free Software
31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32: #
33: # /home/httpd/html/adm/gpl.txt
34: #
35: # http://www.lon-capa.org/
36: #
1.1 harris41 37: # YEAR=2002
1.4 harris41 38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
1.2 harris41 39: #
40: ###
1.1 harris41 41:
42: # Read in 2 XML file; first is the filter specification, the second
43: # is the XML file to be filtered
44:
1.2 harris41 45: ###############################################################################
46: ## ##
47: ## ORGANIZATION OF THIS PERL SCRIPT ##
48: ## 1. Notes ##
1.3 harris41 49: ## 2. Read in filter file ##
50: ## 3. Initialize and clear conditions ##
51: ## 4. Run through and apply clauses ##
1.2 harris41 52: ## ##
53: ###############################################################################
54:
55: # ----------------------------------------------------------------------- Notes
56: #
1.3 harris41 57: # This is meant to parse files meeting the xfml document type.
1.2 harris41 58: # See xfml.dtd. XFML=XML Filtering Markup Language.
59:
1.1 harris41 60: use HTML::TokeParser;
61: use strict;
62:
63: unless (@ARGV) {
1.4 harris41 64: print(<<END);
1.1 harris41 65: Incorrect invocation.
66: Example usages:
67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
69: END
70: }
71:
72: my %eh;
1.3 harris41 73:
74: # ---------------------------------------------- Read in filter file from @ARGV
1.1 harris41 75: my $tofilter=shift @ARGV;
1.4 harris41 76: open(IN,"<$tofilter"); my @lines=<IN>;
1.3 harris41 77: my $parsestring=join('',@lines); undef @lines; close IN;
1.1 harris41 78: my $parser = HTML::TokeParser->new(\$parsestring) or
79: die('can\'t create TokeParser object');
80: $parser->xml_mode('1');
81:
1.3 harris41 82: # --------------------------------------------- initialize and clear conditions
1.1 harris41 83: my %conditions; &cc;
84:
1.3 harris41 85: # Define handling methods for mode-dependent text rendering
1.1 harris41 86: $parser->{textify}={
1.3 harris41 87: 'xfml' => \&format_xfml,
1.1 harris41 88: 'when:name' => \&format_when_name,
89: 'when:attribute' => \&format_when_attribute,
90: 'when:cdata' => \&format_when_cdata,
91: 'choice:exclude' => \&format_choice_exclude,
1.3 harris41 92: 'clause' => \&format_clause,
1.1 harris41 93: };
94:
95: my $text;
96: my $xfml;
97: my $wloc=0;
98: my %eha;
99:
1.3 harris41 100: # ----------------------------------------------- Run through and apply clauses
101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
102: my $lparser = HTML::TokeParser->new(\$output) or
103: die('can\'t create TokeParser object');
104: $lparser->xml_mode('1');
105: my $parsestring2;
106: while (my $token = $parser->get_tag('clause')) {
107: $parsestring2=$output;
108: $lparser = HTML::TokeParser->new(\$parsestring2);
109: $lparser->xml_mode('1');
110: $output='';
111: &format_clause(@{$token});
112: $text = $parser->get_text('/clause');
113: $token = $parser->get_tag('/clause');
114:
115: my $token='';
116: my $ttype='';
117: my $excludeflag=0;
118: my $outcache='';
119: while ($token = $lparser->get_token()) {
120: if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
121: elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; }
122: elsif ($token->[0] eq 'T') {
123: if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
124: or $ttype eq 'E') {
125: $output.=$token->[1];
126: }
127: else {
128: $outcache.=$token->[1];
129: }
130: }
131: elsif ($token->[0] eq 'S') {
132: if ($eh{$token->[1]} or $excludeflag==1) {
133: $ttype='';
134: $excludeflag=1;
135: $outcache.=$token->[4];
136: }
137: else {
138: $ttype='S';
139: $output.=$token->[4];
140: }
141: if ($excludeflag==1) {
142:
143: }
144: }
145: elsif ($token->[0] eq 'E') {
146: if ($eh{$token->[1]} and $excludeflag==1) {
147: $ttype='E';
148: $excludeflag=0;
149: $outcache.=$token->[2];
150: if (&evalconditions($outcache)) {
1.6 ! matthew 151: $output.='<!-- FILTERED OUT -->';
1.3 harris41 152: }
153: else {
1.6 ! matthew 154: $output.=$outcache;
1.3 harris41 155: }
156: $outcache='';
157: }
158: elsif ($excludeflag==1) {
159: $ttype='';
160: $outcache.=$token->[2];
161: }
162: else {
163: $output.=$token->[2];
164: $ttype='E';
165: }
166: }
167: }
168: &cc;
1.1 harris41 169: }
1.3 harris41 170: print $output;
1.1 harris41 171:
1.3 harris41 172: # -------------------------------------------------------------- evalconditions
173: sub evalconditions {
174: my ($parsetext)=@_;
175: my $eparser = HTML::TokeParser->new(\$parsetext);
176: unless (@{$conditions{'name'}} or
177: @{$conditions{'attribute'}}) {
1.6 ! matthew 178: return 1;
1.1 harris41 179: }
1.3 harris41 180: my $nameflag=0;
181: my $cdataflag=0;
182: my $matchflag=0;
183: my $Ttoken='';
184: while (my $token = $eparser->get_token()) {
185: if ($token->[0] eq 'S') {
186: foreach my $name (@{$conditions{'name'}}) {
187: my $flag=0;
188: my $match=$name;
189: if ($match=~/^\!/) {
190: $match=~s/^\!//g;
191: $flag=1;
192: }
193: $match=~s/^\///g;
194: $match=~s/\/$//g;
195: if ((!$flag and $token->[1]=~/$match/) or
196: ($flag and $token->[1]!~/$match/)) {
197: $nameflag=1;
1.1 harris41 198: }
199: }
1.3 harris41 200: $Ttoken='';
1.1 harris41 201: }
1.3 harris41 202: elsif ($token->[0] eq 'E') {
203: foreach my $name (@{$conditions{'name'}}) {
204: my $flag=0;
205: my $match=$name;
206: if ($match=~/^\!/) {
207: $match=~s/^\!//g;
208: $flag=1;
209: }
210: $match=~s/^\///g;
211: $match=~s/\/$//g;
212: if ((!$flag and $token->[1]=~/$match/) or
213: ($flag and $token->[1]!~/$match/)) {
214: foreach my $cdata (@{$conditions{'cdata'}}) {
215: my $flag=0;
216: my $match=$cdata;
217: if ($match=~/^\!/) {
218: $match=~s/^\!//g;
219: $flag=1;
1.1 harris41 220: }
1.3 harris41 221: $match=~s/^\///g;
222: $match=~s/\/$//g;
223: if ((!$flag and $Ttoken=~/$match/) or
224: ($flag and $Ttoken!~/$match/)) {
225: $cdataflag=1;
1.2 harris41 226: }
1.3 harris41 227: }
228: if (@{$conditions{'cdata'}}) {
229: if ($cdataflag) {
230: return 0;
1.1 harris41 231: }
1.3 harris41 232: }
233: else {
234: if ($nameflag) {
235: return 0;
1.1 harris41 236: }
237: }
1.3 harris41 238: $nameflag=0;
1.1 harris41 239: }
240: }
241: }
1.3 harris41 242: elsif ($token->[0] eq 'T') {
243: if ($nameflag) {
244: $Ttoken.=$token->[1];
1.1 harris41 245: }
246: }
247: }
1.3 harris41 248: return 1;
1.1 harris41 249: }
250:
251: # ------------------------------------------------------------ clear conditions
252: sub cc {
253: @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
254: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
255: @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
256: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
1.3 harris41 257: %eh=(1,1); delete $eh{1};
1.1 harris41 258: }
259:
260: # --------------------------------------- remove starting and ending whitespace
261: sub trim {
262: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
263: }
264:
1.2 harris41 265:
266:
1.3 harris41 267:
1.1 harris41 268: # --------------------------------------------------------- Format xfml section
269: sub format_xfml {
270: my (@tokeninfo)=@_;
271: return '';
272: }
273:
1.3 harris41 274: # ------------------------------------------------------- Format clause section
275: sub format_clause {
276: my (@tokeninfo)=@_;
277: return '';
278: }
279:
1.1 harris41 280: # ---------------------------------------------------- Format when:name section
281: sub format_when_name {
282: my (@tokeninfo)=@_;
1.3 harris41 283: # $wloc++;
1.1 harris41 284: my $att_match=$tokeninfo[2]->{'match'};
285: push @{$conditions{'name'}},$att_match;
286: my $text=&trim($parser->get_text('/when:name'));
287: $parser->get_tag('/when:name');
1.3 harris41 288: # $wloc--;
289: # &cc unless $wloc;
1.1 harris41 290: return '';
291: }
292:
293: # --------------------------------------------------- Format when:cdata section
294: sub format_when_cdata {
295: my (@tokeninfo)=@_;
296: $wloc++;
297: my $att_match=$tokeninfo[2]->{'match'};
298: push @{$conditions{'cdata'}},$att_match;
299: my $text=&trim($parser->get_text('/when:cdata'));
300: $parser->get_tag('/when:cdata');
301: $wloc--;
1.3 harris41 302: # &cc unless $wloc;
1.1 harris41 303: return '';
304: }
305:
306: # ----------------------------------------------- Format choice:exclude section
307: sub format_choice_exclude {
308: my (@tokeninfo)=@_;
309: my $text=&trim($parser->get_text('/choice:exclude'));
310: $parser->get_tag('/choice:exclude');
311: $eh{$tokeninfo[2]->{'nodename'}}++;
312: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
313: [@{$conditions{'name'}}];
314: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
315: [@{$conditions{'attribute'}}];
316: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
317: [@{$conditions{'value'}}];
318: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
319: [@{$conditions{'cdata'}}];
320: return '';
321: }
1.4 harris41 322:
323: # ----------------------------------- POD (plain old documentation, CPAN style)
324:
325: =pod
326:
327: =head1 NAME
328:
1.5 harris41 329: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
1.4 harris41 330:
331: =head1 SYNOPSIS
332:
333: Usage is for lpml file to come in through standard input.
334:
335: =over 4
336:
337: =item *
338:
339: 1st argument is name of xfml file.
340:
341: =back
342:
343: Example:
344:
345: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
346:
347: or
348:
349: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
350:
351: =head1 DESCRIPTION
352:
353: I am using a multiple pass-through approach to parsing
354: the xfml file. This saves memory and makes sure the server
355: will never be overloaded.
356:
357: =head1 README
358:
359: I am using a multiple pass-through approach to parsing
360: the xfml file. This saves memory and makes sure the server
361: will never be overloaded.
362:
363: =head1 PREREQUISITES
364:
365: HTML::TokeParser
366:
367: =head1 COREQUISITES
368:
369: =head1 OSNAMES
370:
371: linux
372:
373: =head1 SCRIPT CATEGORIES
374:
375: Packaging/Administrative
376:
377: =head1 AUTHOR
378:
379: Scott Harrison
380: codeharrison@yahoo.com
381:
382: Please let me know how/if you are finding this script useful and
383: any/all suggestions. -Scott
384:
385: =cut
386:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>