File:
[LON-CAPA] /
loncom /
build /
xfml_parse.pl
Revision
1.7:
download - view:
text,
annotated -
select for diffs
Wed May 22 17:07:50 2002 UTC (22 years, 9 months ago) by
harris41
Branches:
MAIN
CVS tags:
version_2_9_X,
version_2_9_99_0,
version_2_9_1,
version_2_9_0,
version_2_8_X,
version_2_8_99_1,
version_2_8_99_0,
version_2_8_2,
version_2_8_1,
version_2_8_0,
version_2_7_X,
version_2_7_99_1,
version_2_7_99_0,
version_2_7_1,
version_2_7_0,
version_2_6_X,
version_2_6_99_1,
version_2_6_99_0,
version_2_6_3,
version_2_6_2,
version_2_6_1,
version_2_6_0,
version_2_5_X,
version_2_5_99_1,
version_2_5_99_0,
version_2_5_2,
version_2_5_1,
version_2_5_0,
version_2_4_X,
version_2_4_99_0,
version_2_4_2,
version_2_4_1,
version_2_4_0,
version_2_3_X,
version_2_3_99_0,
version_2_3_2,
version_2_3_1,
version_2_3_0,
version_2_2_X,
version_2_2_99_1,
version_2_2_99_0,
version_2_2_2,
version_2_2_1,
version_2_2_0,
version_2_1_X,
version_2_1_99_3,
version_2_1_99_2,
version_2_1_99_1,
version_2_1_99_0,
version_2_1_3,
version_2_1_2,
version_2_1_1,
version_2_1_0,
version_2_12_X,
version_2_11_X,
version_2_11_6_msu,
version_2_11_6,
version_2_11_5_msu,
version_2_11_5,
version_2_11_4_uiuc,
version_2_11_4_msu,
version_2_11_4,
version_2_11_3_uiuc,
version_2_11_3_msu,
version_2_11_3,
version_2_11_2_uiuc,
version_2_11_2_msu,
version_2_11_2_educog,
version_2_11_2,
version_2_11_1,
version_2_11_0_RC3,
version_2_11_0_RC2,
version_2_11_0_RC1,
version_2_11_0,
version_2_10_X,
version_2_10_1,
version_2_10_0_RC2,
version_2_10_0_RC1,
version_2_10_0,
version_2_0_X,
version_2_0_99_1,
version_2_0_2,
version_2_0_1,
version_2_0_0,
version_1_99_3,
version_1_99_2,
version_1_99_1_tmcc,
version_1_99_1,
version_1_99_0_tmcc,
version_1_99_0,
version_1_3_X,
version_1_3_3,
version_1_3_2,
version_1_3_1,
version_1_3_0,
version_1_2_X,
version_1_2_99_1,
version_1_2_99_0,
version_1_2_1,
version_1_2_0,
version_1_1_X,
version_1_1_99_5,
version_1_1_99_4,
version_1_1_99_3,
version_1_1_99_2,
version_1_1_99_1,
version_1_1_99_0,
version_1_1_3,
version_1_1_2,
version_1_1_1,
version_1_1_0,
version_1_0_99_3,
version_1_0_99_2,
version_1_0_99_1,
version_1_0_99,
version_1_0_3,
version_1_0_2,
version_1_0_1,
version_1_0_0,
version_0_99_5,
version_0_99_4,
version_0_99_3,
version_0_99_2,
version_0_99_1,
version_0_99_0,
version_0_6_2,
version_0_6,
version_0_5_1,
version_0_5,
version_0_4,
stable_2002_july,
loncapaMITrelate_1,
language_hyphenation_merge,
language_hyphenation,
conference_2003,
bz6209-base,
bz6209,
bz5969,
bz5610,
bz2851,
STABLE,
PRINT_INCOMPLETE_base,
PRINT_INCOMPLETE,
HEAD,
GCI_3,
GCI_2,
GCI_1,
BZ5971-printing-apage,
BZ5434-fox,
BZ4492-merge,
BZ4492-feature_horizontal_radioresponse,
BZ4492-feature_Support_horizontal_radioresponse,
BZ4492-Support_horizontal_radioresponse
xfml_parse.pl already has been well-tested. the changes in 1.6 are incorrect
and are now reversed.
1: #!/usr/bin/perl
2:
3: # -------------------------------------------------------- Documentation notice
4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
5: # documentation internalized in this program.
6:
7: # --------------------------------------------------------- License Information
8: # The LearningOnline Network with CAPA
9: # piml_parse.pl - Linux Packaging Markup Language parser
10: #
11: # $Id: xfml_parse.pl,v 1.7 2002/05/22 17:07:50 harris41 Exp $
12: #
13: # Written by Scott Harrison, codeharrison@yahoo.com
14: #
15: # Copyright Michigan State University Board of Trustees
16: #
17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
18: #
19: # LON-CAPA is free software; you can redistribute it and/or modify
20: # it under the terms of the GNU General Public License as published by
21: # the Free Software Foundation; either version 2 of the License, or
22: # (at your option) any later version.
23: #
24: # LON-CAPA is distributed in the hope that it will be useful,
25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27: # GNU General Public License for more details.
28: #
29: # You should have received a copy of the GNU General Public License
30: # along with LON-CAPA; if not, write to the Free Software
31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32: #
33: # /home/httpd/html/adm/gpl.txt
34: #
35: # http://www.lon-capa.org/
36: #
37: # YEAR=2002
38: # 1/26,1/27,1/28,1/29,1/30,1/31,2/20,4/8 - Scott Harrison
39: #
40: ###
41:
42: # Read in 2 XML file; first is the filter specification, the second
43: # is the XML file to be filtered
44:
45: ###############################################################################
46: ## ##
47: ## ORGANIZATION OF THIS PERL SCRIPT ##
48: ## 1. Notes ##
49: ## 2. Read in filter file ##
50: ## 3. Initialize and clear conditions ##
51: ## 4. Run through and apply clauses ##
52: ## ##
53: ###############################################################################
54:
55: # ----------------------------------------------------------------------- Notes
56: #
57: # This is meant to parse files meeting the xfml document type.
58: # See xfml.dtd. XFML=XML Filtering Markup Language.
59:
60: use HTML::TokeParser;
61: use strict;
62:
63: unless (@ARGV) {
64: print(<<END);
65: Incorrect invocation.
66: Example usages:
67: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
68: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
69: END
70: }
71:
72: my %eh;
73:
74: # ---------------------------------------------- Read in filter file from @ARGV
75: my $tofilter=shift @ARGV;
76: open(IN,"<$tofilter"); my @lines=<IN>;
77: my $parsestring=join('',@lines); undef @lines; close IN;
78: my $parser = HTML::TokeParser->new(\$parsestring) or
79: die('can\'t create TokeParser object');
80: $parser->xml_mode('1');
81:
82: # --------------------------------------------- initialize and clear conditions
83: my %conditions; &cc;
84:
85: # Define handling methods for mode-dependent text rendering
86: $parser->{textify}={
87: 'xfml' => \&format_xfml,
88: 'when:name' => \&format_when_name,
89: 'when:attribute' => \&format_when_attribute,
90: 'when:cdata' => \&format_when_cdata,
91: 'choice:exclude' => \&format_choice_exclude,
92: 'clause' => \&format_clause,
93: };
94:
95: my $text;
96: my $xfml;
97: my $wloc=0;
98: my %eha;
99:
100: # ----------------------------------------------- Run through and apply clauses
101: my @lines2=<>; my $output=join('',@lines2); undef @lines2;
102: my $lparser = HTML::TokeParser->new(\$output) or
103: die('can\'t create TokeParser object');
104: $lparser->xml_mode('1');
105: my $parsestring2;
106: while (my $token = $parser->get_tag('clause')) {
107: $parsestring2=$output;
108: $lparser = HTML::TokeParser->new(\$parsestring2);
109: $lparser->xml_mode('1');
110: $output='';
111: &format_clause(@{$token});
112: $text = $parser->get_text('/clause');
113: $token = $parser->get_tag('/clause');
114:
115: my $token='';
116: my $ttype='';
117: my $excludeflag=0;
118: my $outcache='';
119: while ($token = $lparser->get_token()) {
120: if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; }
121: elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; }
122: elsif ($token->[0] eq 'T') {
123: if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S'
124: or $ttype eq 'E') {
125: $output.=$token->[1];
126: }
127: else {
128: $outcache.=$token->[1];
129: }
130: }
131: elsif ($token->[0] eq 'S') {
132: if ($eh{$token->[1]} or $excludeflag==1) {
133: $ttype='';
134: $excludeflag=1;
135: $outcache.=$token->[4];
136: }
137: else {
138: $ttype='S';
139: $output.=$token->[4];
140: }
141: if ($excludeflag==1) {
142:
143: }
144: }
145: elsif ($token->[0] eq 'E') {
146: if ($eh{$token->[1]} and $excludeflag==1) {
147: $ttype='E';
148: $excludeflag=0;
149: $outcache.=$token->[2];
150: my $retval=&evalconditions($outcache);
151: if (&evalconditions($outcache)) {
152: $output.=$outcache;
153: }
154: else {
155: $output.='<!-- FILTERED OUT -->';
156: }
157: $outcache='';
158: }
159: elsif ($excludeflag==1) {
160: $ttype='';
161: $outcache.=$token->[2];
162: }
163: else {
164: $output.=$token->[2];
165: $ttype='E';
166: }
167: }
168: }
169: &cc;
170: }
171: print $output;
172:
173: # -------------------------------------------------------------- evalconditions
174: sub evalconditions {
175: my ($parsetext)=@_;
176: my $eparser = HTML::TokeParser->new(\$parsetext);
177: unless (@{$conditions{'name'}} or
178: @{$conditions{'attribute'}}) {
179: return 0;
180: }
181: my $nameflag=0;
182: my $cdataflag=0;
183: my $matchflag=0;
184: my $Ttoken='';
185: while (my $token = $eparser->get_token()) {
186: if ($token->[0] eq 'S') {
187: foreach my $name (@{$conditions{'name'}}) {
188: my $flag=0;
189: my $match=$name;
190: if ($match=~/^\!/) {
191: $match=~s/^\!//g;
192: $flag=1;
193: }
194: $match=~s/^\///g;
195: $match=~s/\/$//g;
196: if ((!$flag and $token->[1]=~/$match/) or
197: ($flag and $token->[1]!~/$match/)) {
198: $nameflag=1;
199: }
200: }
201: $Ttoken='';
202: }
203: elsif ($token->[0] eq 'E') {
204: foreach my $name (@{$conditions{'name'}}) {
205: my $flag=0;
206: my $match=$name;
207: if ($match=~/^\!/) {
208: $match=~s/^\!//g;
209: $flag=1;
210: }
211: $match=~s/^\///g;
212: $match=~s/\/$//g;
213: if ((!$flag and $token->[1]=~/$match/) or
214: ($flag and $token->[1]!~/$match/)) {
215: foreach my $cdata (@{$conditions{'cdata'}}) {
216: my $flag=0;
217: my $match=$cdata;
218: if ($match=~/^\!/) {
219: $match=~s/^\!//g;
220: $flag=1;
221: }
222: $match=~s/^\///g;
223: $match=~s/\/$//g;
224: if ((!$flag and $Ttoken=~/$match/) or
225: ($flag and $Ttoken!~/$match/)) {
226: $cdataflag=1;
227: }
228: }
229: if (@{$conditions{'cdata'}}) {
230: if ($cdataflag) {
231: return 0;
232: }
233: }
234: else {
235: if ($nameflag) {
236: return 0;
237: }
238: }
239: $nameflag=0;
240: }
241: }
242: }
243: elsif ($token->[0] eq 'T') {
244: if ($nameflag) {
245: $Ttoken.=$token->[1];
246: }
247: }
248: }
249: return 1;
250: }
251:
252: # ------------------------------------------------------------ clear conditions
253: sub cc {
254: @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
255: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
256: @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
257: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
258: %eh=(1,1); delete $eh{1};
259: }
260:
261: # --------------------------------------- remove starting and ending whitespace
262: sub trim {
263: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
264: }
265:
266:
267:
268:
269: # --------------------------------------------------------- Format xfml section
270: sub format_xfml {
271: my (@tokeninfo)=@_;
272: return '';
273: }
274:
275: # ------------------------------------------------------- Format clause section
276: sub format_clause {
277: my (@tokeninfo)=@_;
278: return '';
279: }
280:
281: # ---------------------------------------------------- Format when:name section
282: sub format_when_name {
283: my (@tokeninfo)=@_;
284: # $wloc++;
285: my $att_match=$tokeninfo[2]->{'match'};
286: push @{$conditions{'name'}},$att_match;
287: my $text=&trim($parser->get_text('/when:name'));
288: $parser->get_tag('/when:name');
289: # $wloc--;
290: # &cc unless $wloc;
291: return '';
292: }
293:
294: # --------------------------------------------------- Format when:cdata section
295: sub format_when_cdata {
296: my (@tokeninfo)=@_;
297: $wloc++;
298: my $att_match=$tokeninfo[2]->{'match'};
299: push @{$conditions{'cdata'}},$att_match;
300: my $text=&trim($parser->get_text('/when:cdata'));
301: $parser->get_tag('/when:cdata');
302: $wloc--;
303: # &cc unless $wloc;
304: return '';
305: }
306:
307: # ----------------------------------------------- Format choice:exclude section
308: sub format_choice_exclude {
309: my (@tokeninfo)=@_;
310: my $text=&trim($parser->get_text('/choice:exclude'));
311: $parser->get_tag('/choice:exclude');
312: $eh{$tokeninfo[2]->{'nodename'}}++;
313: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
314: [@{$conditions{'name'}}];
315: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
316: [@{$conditions{'attribute'}}];
317: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
318: [@{$conditions{'value'}}];
319: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
320: [@{$conditions{'cdata'}}];
321: return '';
322: }
323:
324: # ----------------------------------- POD (plain old documentation, CPAN style)
325:
326: =pod
327:
328: =head1 NAME
329:
330: xfml_parse.pl - This is meant to parse XFML files (XML Filtering Markup Language.)
331:
332: =head1 SYNOPSIS
333:
334: Usage is for lpml file to come in through standard input.
335:
336: =over 4
337:
338: =item *
339:
340: 1st argument is name of xfml file.
341:
342: =back
343:
344: Example:
345:
346: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
347:
348: or
349:
350: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
351:
352: =head1 DESCRIPTION
353:
354: I am using a multiple pass-through approach to parsing
355: the xfml file. This saves memory and makes sure the server
356: will never be overloaded.
357:
358: =head1 README
359:
360: I am using a multiple pass-through approach to parsing
361: the xfml file. This saves memory and makes sure the server
362: will never be overloaded.
363:
364: =head1 PREREQUISITES
365:
366: HTML::TokeParser
367:
368: =head1 COREQUISITES
369:
370: =head1 OSNAMES
371:
372: linux
373:
374: =head1 SCRIPT CATEGORIES
375:
376: Packaging/Administrative
377:
378: =head1 AUTHOR
379:
380: Scott Harrison
381: codeharrison@yahoo.com
382:
383: Please let me know how/if you are finding this script useful and
384: any/all suggestions. -Scott
385:
386: =cut
387:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>