loncom/build/xfml_parse.pl - diff

Return to xfml_parse.pl CVS log

Up to [LON-CAPA] / loncom / build

Diff for /loncom/build/xfml_parse.pl between versions 1.1 and 1.2

version 1.1, 2002/01/29 10:42:42	version 1.2, 2002/02/01 10:56:41
Line 1	Line 1
#!/usr/bin/perl	#!/usr/bin/perl

# YEAR=2002	# YEAR=2002
# 1/26,1/27,1/28 - Scott Harrison	# 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
	#
	###

# Read in 2 XML file; first is the filter specification, the second	# Read in 2 XML file; first is the filter specification, the second
# is the XML file to be filtered	# is the XML file to be filtered

	###############################################################################
	## ##
	## ORGANIZATION OF THIS PERL SCRIPT ##
	## 1. Notes ##
	## 2. Get command line arguments ##
	## 3. First pass through (grab distribution-specific information) ##
	## 4. Second pass through (parse out what is not necessary) ##
	## 5. Third pass through (translate markup according to specified mode) ##
	## 6. Functions (most all just format contents of different markup tags) ##
	## 7. POD (plain old documentation, CPAN style) ##
	## ##
	###############################################################################

	# ----------------------------------------------------------------------- Notes
	#
	# I am using a multiple pass-through approach to parsing
	# the xfml file. This saves memory and makes sure the server
	# will never be overloaded.
	#
	# This is meant to parse files meeting the piml document type.
	# See xfml.dtd. XFML=XML Filtering Markup Language.

use HTML::TokeParser;	use HTML::TokeParser;
use strict;	use strict;

Line 21 END	Line 45 END
my %eh;	my %eh;
my %ih;	my %ih;
my $tofilter=shift @ARGV;	my $tofilter=shift @ARGV;
my @lines=<>; my $parsestring=join('',@lines); undef @lines;	open IN,"<$tofilter";
	my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
	close IN;
my $parser = HTML::TokeParser->new(\$parsestring) or	my $parser = HTML::TokeParser->new(\$parsestring) or
die('can\'t create TokeParser object');	die('can\'t create TokeParser object');
$parser->xml_mode('1');	$parser->xml_mode('1');
Line 47 my %eha;	Line 73 my %eha;
while (my $token = $parser->get_tag('xfml')) {	while (my $token = $parser->get_tag('xfml')) {
&format_xfml(@{$token});	&format_xfml(@{$token});
$text = $parser->get_text('/xfml');	$text = $parser->get_text('/xfml');
# print $xfml;
# print $text;
$token = $parser->get_tag('/xfml');	$token = $parser->get_tag('/xfml');
}	}

open IN,"<$tofilter";	#open IN,"<$tofilter";
my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2;	my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
$parser = HTML::TokeParser->new(\$parsestring2) or	$parser = HTML::TokeParser->new(\$parsestring2) or
die('can\'t create TokeParser object');	die('can\'t create TokeParser object');
$parser->xml_mode('1');	$parser->xml_mode('1');
Line 68 my $excluden=0;	Line 92 my $excluden=0;
my $excludea=0;	my $excludea=0;
my $et=0;	my $et=0;
my $cdata='';	my $cdata='';
	my $excludenold=0;
	my $ign=0;

while ($token = $parser->get_token()) {	while ($token = $parser->get_token()) {
# from HTML::TokeParser documentation:
# ["S", $tag, %$attr, @$attrseq, $text]
# ["E", $tag, $text]
# ["T", $text, $is_data]
# ["C", $text]
# ["D", $text]
# ["PI", $token0, $text]
# push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
# @{$conditions{'name'}};
# push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
# @{$conditions{'attribute'}};
# push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
# @{$conditions{'value'}};
# push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
# @{$conditions{'cdata'}};
if ($token->[0] eq 'D') {	if ($token->[0] eq 'D') {
print $token->[1];	print $token->[1];
}	}
Line 102 while ($token = $parser->get_token()) {	Line 114 while ($token = $parser->get_token()) {

if ($eh{$token->[1]}) {	if ($eh{$token->[1]}) {
$echild=$token->[1];	$echild=$token->[1];
# print "ECHILD=$echild\n";
}	}
if ($echild) {	if ($echild) {
# run through names for echild	# run through names for echild
Line 114 while ($token = $parser->get_token()) {	Line 125 while ($token = $parser->get_token()) {
foreach my $namematch (@{$namemlist}) {	foreach my $namematch (@{$namemlist}) {
my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;	my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
if ($name=~/$nm/) {	if ($name=~/$nm/) {
# print "NMATCH: $nm ($name)\n";	$excludenold=$excluden;
$excluden++;	$excluden++;
foreach my $attributemlist	foreach my $attributemlist
(@{$eha{$echild}->{'attribute'}}) {	(@{$eha{$echild}->{'attribute'}}) {
Line 124 while ($token = $parser->get_token()) {	Line 135 while ($token = $parser->get_token()) {
split(/\=/,$attributematch,2);	split(/\=/,$attributematch,2);
$am=~s/^.//;	$am=~s/^.//;
$am=~s/.$//;	$am=~s/.$//;
# print 'AM:'."($an,$am)\t";
# print 'ATT:'.join(',',%atthash)."\n";
if ($atthash{$an}) {	if ($atthash{$an}) {
if ($atthash{$an}=~/$am/) {	if ($atthash{$an}=~/$am/) {
$excludea++;	$excludea++;
# print "AMATCH: $am (".
# join(',',
# @attributes)
# ."\n";
}	}
}	}
}	}
Line 150 while ($token = $parser->get_token()) {	Line 155 while ($token = $parser->get_token()) {
if ($echild) {	if ($echild) {
$tr.=$token->[2];	$tr.=$token->[2];
if ($excluden) {	if ($excluden) {
	my $i=0;
	CDATALOOP:
foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {	foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
	$i++;
	my $j;
foreach my $cdatamatch (@{$cdatamlist}) {	foreach my $cdatamatch (@{$cdatamlist}) {
	$j++;
# print "CDATA: $cdatamatch, $cdata\n";	# print "CDATA: $cdatamatch, $cdata\n";
my $cm=$cdatamatch;	my $cm=$cdatamatch;
my $not=0;	my $not=0;
Line 160 while ($token = $parser->get_token()) {	Line 170 while ($token = $parser->get_token()) {
$cm=~s/^.//;	$cm=~s/^.//;
}	}
$cm=~s/^.//; $cm=~s/.$//;	$cm=~s/^.//; $cm=~s/.$//;
	if ($not and $cdata=~/$cm/) {
	$ign=1; $exclude=0;
	}
if ((!$not and $cdata!~/$cm/)	if ((!$not and $cdata!~/$cm/)
or ($not and $cdata=~/$cm/)) {	or ($not and $cdata=~/$cm/)) {
# print "CMISMATCH: $cm ($cdata)\n";	# nothing happens
	# $exclude=0;
}	}
elsif (($not and $cdata!~/$cm/)	elsif (($not and $cdata!~/$cm/)
or (!$not and $cdata=~/$cm/)) {	or (!$not and $cdata=~/$cm/)) {
$exclude++;	$exclude++ unless $ign;
}	}
}	}
}	}
}	}
}	}
if ($eh{$token->[1]}) {	if ($eh{$token->[1]}) {
	$ign=0;
$echild=0;	$echild=0;
if (!$exclude and !$excludea) {	if (!$exclude and !$excludea) {
print $tr;	print $tr;
Line 180 while ($token = $parser->get_token()) {	Line 195 while ($token = $parser->get_token()) {
$tr='';	$tr='';
}	}
elsif ($exclude>0 or $excludea>0) {	elsif ($exclude>0 or $excludea>0) {
# print "EXCLUDING $token->[1] $excludea $excluden\n";	# print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
$exclude=0; $excluden=0; $excludea=0;	$exclude=0; $excluden=0; $excludea=0;
$tr='';	$tr='';
}	}
Line 222 sub trim {	Line 237 sub trim {
my ($s)=@_; $s=~s/^\s//; $s=~s/\s$//; return $s;	my ($s)=@_; $s=~s/^\s//; $s=~s/\s$//; return $s;
}	}



# --------------------------------------------------------- Format xfml section	# --------------------------------------------------------- Format xfml section
sub format_xfml {	sub format_xfml {
my (@tokeninfo)=@_;	my (@tokeninfo)=@_;
Line 236 sub format_when_name {	Line 253 sub format_when_name {
push @{$conditions{'name'}},$att_match;	push @{$conditions{'name'}},$att_match;
my $text=&trim($parser->get_text('/when:name'));	my $text=&trim($parser->get_text('/when:name'));
$parser->get_tag('/when:name');	$parser->get_tag('/when:name');
# print 'Name Matching...'.$att_match;
$wloc--;	$wloc--;
&cc unless $wloc;	&cc unless $wloc;
return '';	return '';
Line 250 sub format_when_attribute {	Line 266 sub format_when_attribute {
push @{$conditions{'attribute'}},$att_match;	push @{$conditions{'attribute'}},$att_match;
my $text=&trim($parser->get_text('/when:attribute'));	my $text=&trim($parser->get_text('/when:attribute'));
$parser->get_tag('/when:attribute');	$parser->get_tag('/when:attribute');
# print 'Attribute Matching...'.$att_match;
$wloc--;	$wloc--;
&cc unless $wloc;	&cc unless $wloc;
return '';	return '';
Line 261 sub format_when_cdata {	Line 276 sub format_when_cdata {
my (@tokeninfo)=@_;	my (@tokeninfo)=@_;
$wloc++;	$wloc++;
my $att_match=$tokeninfo[2]->{'match'};	my $att_match=$tokeninfo[2]->{'match'};
# print 'Cdata Matching...'.$att_match;
push @{$conditions{'cdata'}},$att_match;	push @{$conditions{'cdata'}},$att_match;
my $text=&trim($parser->get_text('/when:cdata'));	my $text=&trim($parser->get_text('/when:cdata'));
$parser->get_tag('/when:cdata');	$parser->get_tag('/when:cdata');

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1
changed lines
	Added in v.1.2