loncom/build/xfml_parse.pl - view

File: [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.3: download - view: text, annotated - select for diffs
Wed Feb 20 00:21:42 2002 UTC (23 years, 4 months ago) by harris41
Branches: MAIN
CVS tags: HEAD

a cleaner leaner script (major rewrite)

1: #!/usr/bin/perl 2: 3: # YEAR=2002 4: # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison 5: # 6: ### 7: 8: # Read in 2 XML file; first is the filter specification, the second 9: # is the XML file to be filtered 10: 11: ############################################################################### 12: ## ## 13: ## ORGANIZATION OF THIS PERL SCRIPT ## 14: ## 1. Notes ## 15: ## 2. Read in filter file ## 16: ## 3. Initialize and clear conditions ## 17: ## 4. Run through and apply clauses ## 18: ## ## 19: ############################################################################### 20: 21: # ----------------------------------------------------------------------- Notes 22: # 23: # This is meant to parse files meeting the xfml document type. 24: # See xfml.dtd. XFML=XML Filtering Markup Language. 25: 26: use HTML::TokeParser; 27: use strict; 28: 29: unless (@ARGV) { 30: print <<END; 31: Incorrect invocation. 32: Example usages: 33: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml 34: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml 35: END 36: } 37: 38: my %eh; 39: 40: # ---------------------------------------------- Read in filter file from @ARGV 41: my $tofilter=shift @ARGV; 42: open IN,"<$tofilter"; my @lines=<IN>; 43: my $parsestring=join('',@lines); undef @lines; close IN; 44: my $parser = HTML::TokeParser->new(\$parsestring) or 45: die('can\'t create TokeParser object'); 46: $parser->xml_mode('1'); 47: 48: # --------------------------------------------- initialize and clear conditions 49: my %conditions; &cc; 50: 51: # Define handling methods for mode-dependent text rendering 52: $parser->{textify}={ 53: 'xfml' => \&format_xfml, 54: 'when:name' => \&format_when_name, 55: 'when:attribute' => \&format_when_attribute, 56: 'when:cdata' => \&format_when_cdata, 57: 'choice:exclude' => \&format_choice_exclude, 58: 'clause' => \&format_clause, 59: }; 60: 61: my $text; 62: my $xfml; 63: my $wloc=0; 64: my %eha; 65: 66: # ----------------------------------------------- Run through and apply clauses 67: my @lines2=<>; my $output=join('',@lines2); undef @lines2; 68: my $lparser = HTML::TokeParser->new(\$output) or 69: die('can\'t create TokeParser object'); 70: $lparser->xml_mode('1'); 71: my $parsestring2; 72: while (my $token = $parser->get_tag('clause')) { 73: $parsestring2=$output; 74: $lparser = HTML::TokeParser->new(\$parsestring2); 75: $lparser->xml_mode('1'); 76: $output=''; 77: &format_clause(@{$token}); 78: $text = $parser->get_text('/clause'); 79: $token = $parser->get_tag('/clause'); 80: 81: my $token=''; 82: my $ttype=''; 83: my $excludeflag=0; 84: my $outcache=''; 85: while ($token = $lparser->get_token()) { 86: if ($token->[0] eq 'D') { $ttype='D'; $output.=$token->[1]; } 87: elsif ($token->[0] eq 'C') { $ttype='C'; $output.=$token->[1]; } 88: elsif ($token->[0] eq 'T') { 89: if ($ttype eq 'D' or $ttype eq 'C' or $ttype eq 'S' 90: or $ttype eq 'E') { 91: $output.=$token->[1]; 92: } 93: else { 94: $outcache.=$token->[1]; 95: } 96: } 97: elsif ($token->[0] eq 'S') { 98: if ($eh{$token->[1]} or $excludeflag==1) { 99: $ttype=''; 100: $excludeflag=1; 101: $outcache.=$token->[4]; 102: } 103: else { 104: $ttype='S'; 105: $output.=$token->[4]; 106: } 107: if ($excludeflag==1) { 108: 109: } 110: } 111: elsif ($token->[0] eq 'E') { 112: if ($eh{$token->[1]} and $excludeflag==1) { 113: $ttype='E'; 114: $excludeflag=0; 115: $outcache.=$token->[2]; 116: my $retval=&evalconditions($outcache); 117: if (&evalconditions($outcache)) { 118: $output.=$outcache; 119: } 120: else { 121: $output.=''; 122: } 123: $outcache=''; 124: } 125: elsif ($excludeflag==1) { 126: $ttype=''; 127: $outcache.=$token->[2]; 128: } 129: else { 130: $output.=$token->[2]; 131: $ttype='E'; 132: } 133: } 134: } 135: &cc; 136: } 137: print $output; 138: 139: # -------------------------------------------------------------- evalconditions 140: sub evalconditions { 141: my ($parsetext)=@_; 142: my $eparser = HTML::TokeParser->new(\$parsetext); 143: unless (@{$conditions{'name'}} or 144: @{$conditions{'attribute'}}) { 145: return 0; 146: } 147: my $nameflag=0; 148: my $cdataflag=0; 149: my $matchflag=0; 150: my $Ttoken=''; 151: while (my $token = $eparser->get_token()) { 152: if ($token->[0] eq 'S') { 153: foreach my $name (@{$conditions{'name'}}) { 154: my $flag=0; 155: my $match=$name; 156: if ($match=~/^\!/) { 157: $match=~s/^\!//g; 158: $flag=1; 159: } 160: $match=~s/^\///g; 161: $match=~s/\/$//g; 162: if ((!$flag and $token->[1]=~/$match/) or 163: ($flag and $token->[1]!~/$match/)) { 164: $nameflag=1; 165: } 166: } 167: $Ttoken=''; 168: } 169: elsif ($token->[0] eq 'E') { 170: foreach my $name (@{$conditions{'name'}}) { 171: my $flag=0; 172: my $match=$name; 173: if ($match=~/^\!/) { 174: $match=~s/^\!//g; 175: $flag=1; 176: } 177: $match=~s/^\///g; 178: $match=~s/\/$//g; 179: if ((!$flag and $token->[1]=~/$match/) or 180: ($flag and $token->[1]!~/$match/)) { 181: foreach my $cdata (@{$conditions{'cdata'}}) { 182: my $flag=0; 183: my $match=$cdata; 184: if ($match=~/^\!/) { 185: $match=~s/^\!//g; 186: $flag=1; 187: } 188: $match=~s/^\///g; 189: $match=~s/\/$//g; 190: if ((!$flag and $Ttoken=~/$match/) or 191: ($flag and $Ttoken!~/$match/)) { 192: $cdataflag=1; 193: } 194: } 195: if (@{$conditions{'cdata'}}) { 196: if ($cdataflag) { 197: return 0; 198: } 199: } 200: else { 201: if ($nameflag) { 202: return 0; 203: } 204: } 205: $nameflag=0; 206: } 207: } 208: } 209: elsif ($token->[0] eq 'T') { 210: if ($nameflag) { 211: $Ttoken.=$token->[1]; 212: } 213: } 214: } 215: return 1; 216: } 217: 218: # ------------------------------------------------------------ clear conditions 219: sub cc { 220: @{$conditions{'name'}}=(); pop @{$conditions{'name'}}; 221: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; 222: @{$conditions{'value'}}=(); pop @{$conditions{'value'}}; 223: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; 224: %eh=(1,1); delete $eh{1}; 225: } 226: 227: # --------------------------------------- remove starting and ending whitespace 228: sub trim { 229: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; 230: } 231: 232: 233: 234: 235: # --------------------------------------------------------- Format xfml section 236: sub format_xfml { 237: my (@tokeninfo)=@_; 238: return ''; 239: } 240: 241: # ------------------------------------------------------- Format clause section 242: sub format_clause { 243: my (@tokeninfo)=@_; 244: return ''; 245: } 246: 247: # ---------------------------------------------------- Format when:name section 248: sub format_when_name { 249: my (@tokeninfo)=@_; 250: # $wloc++; 251: my $att_match=$tokeninfo[2]->{'match'}; 252: push @{$conditions{'name'}},$att_match; 253: my $text=&trim($parser->get_text('/when:name')); 254: $parser->get_tag('/when:name'); 255: # $wloc--; 256: # &cc unless $wloc; 257: return ''; 258: } 259: 260: # --------------------------------------------------- Format when:cdata section 261: sub format_when_cdata { 262: my (@tokeninfo)=@_; 263: $wloc++; 264: my $att_match=$tokeninfo[2]->{'match'}; 265: push @{$conditions{'cdata'}},$att_match; 266: my $text=&trim($parser->get_text('/when:cdata')); 267: $parser->get_tag('/when:cdata'); 268: $wloc--; 269: # &cc unless $wloc; 270: return ''; 271: } 272: 273: # ----------------------------------------------- Format choice:exclude section 274: sub format_choice_exclude { 275: my (@tokeninfo)=@_; 276: my $text=&trim($parser->get_text('/choice:exclude')); 277: $parser->get_tag('/choice:exclude'); 278: $eh{$tokeninfo[2]->{'nodename'}}++; 279: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, 280: [@{$conditions{'name'}}]; 281: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, 282: [@{$conditions{'attribute'}}]; 283: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, 284: [@{$conditions{'value'}}]; 285: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, 286: [@{$conditions{'cdata'}}]; 287: return ''; 288: }