--- loncom/build/lpml_parse.pl 2001/05/06 22:53:30 1.1 +++ loncom/build/lpml_parse.pl 2001/09/17 18:25:15 1.10 @@ -1,12 +1,865 @@ #!/usr/bin/perl + +# Scott Harrison +# YEAR=2001 +# May 2001 +# 06/19/2001,06/20,06/24 - Scott Harrison +# 9/5/2001,9/6,9/7,9/8 - Scott Harrison + +############################################################################### +## ## +## ORGANIZATION OF THIS PERL SCRIPT ## +## 1. Notes ## +## 2. Get command line arguments ## +## 3. First pass through (grab distribution-specific information) ## +## 4. Second pass through (parse out what is not necessary) ## +## 5. Third pass through (translate markup according to specified mode) ## +## ## +############################################################################### + +# ----------------------------------------------------------------------- Notes +# +# I am using a multiple pass-through approach to parsing +# the lpml file. This saves memory and makes sure the server +# will never be overloaded. +# +# This is meant to parse files meeting the lpml document type. +# See lpml.dtd. LPML=Linux Packaging Markup Language. + use HTML::TokeParser; -$p = HTML::TokeParser->new(shift||"test.lpml"); -while (my $token = $p->get_tag("category")) { - my $url = $token->[1]{name} . $token->[1]{type}; - my $chmodtoken=$p->get_tag("chmod"); - my $text = $p->get_trimmed_text("/chmod"); - print "CHMOD: $text\n"; - my $text = $p->get_trimmed_text("/category"); - print "$url\t$text\t".join(" ",@{$token->[2]})."\n"; +my $usage=<){} # throw away the input to avoid broken pipes + print $usage; + exit -1; # exit with error status +} + +my $categorytype; +if (@ARGV) { + $categorytype = shift @ARGV; +} + +my $dist; +if (@ARGV) { + $dist = shift @ARGV; +} + +my $targetroot; +my $sourceroot; +if (@ARGV) { + $sourceroot = shift @ARGV; +} +if (@ARGV) { + $targetroot = shift @ARGV; +} +$sourceroot=~s/\/$//; +$targetroot=~s/\/$//; + +my $invocation; +# --------------------------------------------------- Record program invocation +if ($mode eq 'install') { + $invocation=(<; +my $parsestring = join('',@parsecontents); +my $outstring; + +# Need to make a pass through and figure out what defaults are +# overrided. Top-down overriding strategy (leaves don't know +# about distant leaves). + +my @hierarchy; +$hierarchy[0]=0; +my $hloc=0; +my $token; +$parser = HTML::TokeParser->new(\$parsestring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); +my %hash; +my $key; +while ($token = $parser->get_token()) { + if ($token->[0] eq 'S') { + $hloc++; + $hierarchy[$hloc]++; + $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); + my $thisdist=' '.$token->[2]{'dist'}.' '; + if ($thisdist eq ' default ') { + $hash{$key}=1; # there is a default setting for this key + } + elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) { + $hash{$key}=2; # disregard default setting for this key if + # there is a directly requested distribution match + } + } + if ($token->[0] eq 'E') { + $hloc--; + } +} + +# --------------------------------------------------- Start second pass through +undef $hloc; +undef @hierarchy; +undef $parser; +$hierarchy[0]=0; +$parser = HTML::TokeParser->new(\$parsestring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); +my $cleanstring; +while ($token = $parser->get_token()) { + if ($token->[0] eq 'S') { + $hloc++; + $hierarchy[$hloc]++; + $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); + my $thisdist=' '.$token->[2]{'dist'}.' '; + # This conditional clause is set up to ignore two sets + # of invalid conditions before accepting entry into + # the cleanstring. + if ($hash{$key}==2 and + !($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) { + if ($token->[4]!~/\/>$/) { + $parser->get_tag('/'.$token->[1]); + $hloc--; + } + } + elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and + !($thisdist eq ' default ' and $hash{$key}!=2)) { + if ($token->[4]!~/\/>$/) { + $parser->get_tag('/'.$token->[1]); + $hloc--; + } + } + else { + $cleanstring.=$token->[4]; + } + if ($token->[4]=~/\/>$/) { + $hloc--; + } + } + if ($token->[0] eq 'E') { + $cleanstring.=$token->[2]; + $hloc--; + } + if ($token->[0] eq 'T') { + $cleanstring.=$token->[1]; + } +} +$cleanstring=&trim($cleanstring); +$cleanstring=~s/\>\s*\n\s*\\new(\$cleanstring) or + die('can\'t create TokeParser object'); +$parser->xml_mode('1'); + +# Define handling methods for mode-dependent text rendering +$parser->{textify}={ + targetroot => \&format_targetroot, + sourceroot => \&format_sourceroot, + categories => \&format_categories, + category => \&format_category, + targetdir => \&format_targetdir, + chown => \&format_chown, + chmod => \&format_chmod, + rpm => \&format_rpm, + rpmSummary => \&format_rpmSummary, + rpmName => \&format_rpmName, + rpmVersion => \&format_rpmVersion, + rpmRelease => \&format_rpmRelease, + rpmVendor => \&format_rpmVendor, + rpmBuildRoot => \&format_rpmBuildRoot, + rpmCopyright => \&format_rpmCopyright, + rpmGroup => \&format_rpmGroup, + rpmSource => \&format_rpmSource, + rpmAutoReqProv => \&format_rpmAutoReqProv, + rpmdescription => \&format_rpmdescription, + rpmpre => \&format_rpmpre, + directories => \&format_directories, + directory => \&format_directory, + categoryname => \&format_categoryname, + description => \&format_description, + files => \&format_files, + file => \&format_file, + fileglob => \&format_fileglob, + links => \&format_links, + link => \&format_link, + linkto => \&format_linkto, + source => \&format_source, + target => \&format_target, + note => \&format_note, + build => \&format_build, + status => \&format_status, + dependencies => \&format_dependencies, + glob => \&format_glob, + sourcedir => \&format_sourcedir, + filenames => \&format_filenames, + }; + +my $text; +my $token; +undef $hloc; +undef @hierarchy; +my $hloc; +my @hierarchy2; +while ($token = $parser->get_tag('lpml')) { + &format_lpml(@{$token}); + $text = &trim($parser->get_text('/lpml')); + $token = $parser->get_tag('/lpml'); + print $lpml; + print "\n"; +# $text=~s/\s*\n\s*\n\s*/\n/g; + print $text; + print "\n"; + print &end(); +} +exit; + +sub end { + if ($mode eq 'html') { + return "
THE END\n"; + } + if ($mode eq 'install') { + return ''; + } +} + +# ----------------------- Take in string to parse and the separation expression +sub extract_array { + my ($stringtoparse,$sepexp) = @_; + my @a=split(/$sepexp/,$stringtoparse); + return \@a; +} + +# --------------------------------------------------------- Format lpml section +sub format_lpml { + my (@tokeninfo)=@_; + my $date=`date`; chop $date; + if ($mode eq 'html') { + $lpml = "
LPML BEGINNING: $date"; + } + elsif ($mode eq 'install') { + print '# LPML install targets. Linux Packaging Markup Language,'; + print ' by Scott Harrison 2001'."\n"; + print '# This file was automatically generated on '.`date`; + print "\n".$invocation; + } + else { + return ''; + } +} +# --------------------------------------------------- Format targetroot section +sub format_targetroot { + my $text=&trim($parser->get_text('/targetroot')); + $text=$targetroot if $targetroot; + $parser->get_tag('/targetroot'); + if ($mode eq 'html') { + return $targetroot="\n
TARGETROOT: $text"; + } + elsif ($mode eq 'install') { + return '# TARGET INSTALL LOCATION is "'.$targetroot."\"\n"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format sourceroot section +sub format_sourceroot { + my $text=&trim($parser->get_text('/sourceroot')); + $text=$sourceroot if $sourceroot; + $parser->get_tag('/sourceroot'); + if ($mode eq 'html') { + return $sourceroot="\n
SOURCEROOT: $text"; + } + elsif ($mode eq 'install') { + return '# SOURCE CODE LOCATION IS "'.$sourceroot."\"\n";; + } + else { + return ''; + } +} +# --------------------------------------------------- Format categories section +sub format_categories { + my $text=&trim($parser->get_text('/categories')); + $parser->get_tag('/categories'); + if ($mode eq 'html') { + return $categories="\n
BEGIN CATEGORIES\n$text\n". + "
END CATEGORIES\n"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format categories section +sub format_category { + my (@tokeninfo)=@_; + $category_att_name=$tokeninfo[2]->{'name'}; + $category_att_type=$tokeninfo[2]->{'type'}; + $chmod='';$chown=''; + $parser->get_text('/category'); + $parser->get_tag('/category'); + if ($mode eq 'html') { + return $category="\n
CATEGORY $category_att_name ". + "$category_att_type $chmod $chown"; + } + else { + if ($category_att_type eq $categorytype) { + my ($user,$group)=split(/\:/,$chown); + $categoryhash{$category_att_name}='-o '.$user.' -g '.$group. + ' -m '.$chmod; + } + return ''; + } +} +# -------------------------------------------------------- Format chown section +sub format_chown { + my @tokeninfo=@_; + $chown=''; + my $text=&trim($parser->get_text('/chown')); + if ($text) { + $parser->get_tag('/chown'); + $chown=$text; + } + return ''; +} +# -------------------------------------------------------- Format chmod section +sub format_chmod { + my @tokeninfo=@_; + $chmod=''; + my $text=&trim($parser->get_text('/chmod')); + if ($text) { + $parser->get_tag('/chmod'); + $chmod=$text; + } + return ''; +} +# ---------------------------------------------------------- Format rpm section +sub format_rpm { + my $text=&trim($parser->get_text('/rpm')); + $parser->get_tag('/rpm'); + if ($mode eq 'html') { + return $rpm="\n
BEGIN RPM\n$text\n
END RPM"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format rpmSummary section +sub format_rpmSummary { + my $text=&trim($parser->get_text('/rpmSummary')); + $parser->get_tag('/rpmSummary'); + if ($mode eq 'html') { + return $rpmSummary="\n
RPMSUMMARY $text"; + } + else { + return ''; + } +} +# ------------------------------------------------------ Format rpmName section +sub format_rpmName { + my $text=&trim($parser->get_text('/rpmName')); + $parser->get_tag('/rpmName'); + if ($mode eq 'html') { + return $rpmName="\n
RPMNAME $text"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format rpmVersion section +sub format_rpmVersion { + my $text=$parser->get_text('/rpmVersion'); + $parser->get_tag('/rpmVersion'); + if ($mode eq 'html') { + return $rpmVersion="\n
RPMVERSION $text"; + } + else { + return ''; + } +} +# --------------------------------------------------- Format rpmRelease section +sub format_rpmRelease { + my $text=$parser->get_text('/rpmRelease'); + $parser->get_tag('/rpmRelease'); + if ($mode eq 'html') { + return $rpmRelease="\n
RPMRELEASE $text"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format rpmVendor section +sub format_rpmVendor { + my $text=$parser->get_text('/rpmVendor'); + $parser->get_tag('/rpmVendor'); + if ($mode eq 'html') { + return $rpmVendor="\n
RPMVENDOR $text"; + } + else { + return ''; + } +} +# ------------------------------------------------- Format rpmBuildRoot section +sub format_rpmBuildRoot { + my $text=$parser->get_text('/rpmBuildRoot'); + $parser->get_tag('/rpmBuildRoot'); + if ($mode eq 'html') { + return $rpmBuildRoot="\n
RPMBUILDROOT $text"; + } + else { + return ''; + } +} +# ------------------------------------------------- Format rpmCopyright section +sub format_rpmCopyright { + my $text=$parser->get_text('/rpmCopyright'); + $parser->get_tag('/rpmCopyright'); + if ($mode eq 'html') { + return $rpmCopyright="\n
RPMCOPYRIGHT $text"; + } + else { + return ''; + } +} +# ----------------------------------------------------- Format rpmGroup section +sub format_rpmGroup { + my $text=$parser->get_text('/rpmGroup'); + $parser->get_tag('/rpmGroup'); + if ($mode eq 'html') { + return $rpmGroup="\n
RPMGROUP $text"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format rpmSource section +sub format_rpmSource { + my $text=$parser->get_text('/rpmSource'); + $parser->get_tag('/rpmSource'); + if ($mode eq 'html') { + return $rpmSource="\n
RPMSOURCE $text"; + } + else { + return ''; + } +} +# ----------------------------------------------- Format rpmAutoReqProv section +sub format_rpmAutoReqProv { + my $text=$parser->get_text('/rpmAutoReqProv'); + $parser->get_tag('/rpmAutoReqProv'); + if ($mode eq 'html') { + return $rpmAutoReqProv="\n
RPMAUTOREQPROV $text"; + } + else { + return ''; + } +} +# ----------------------------------------------- Format rpmdescription section +sub format_rpmdescription { + my $text=$parser->get_text('/rpmdescription'); + $parser->get_tag('/rpmdescription'); + if ($mode eq 'html') { + return $rpmdescription="\n
RPMDESCRIPTION $text"; + } + else { + return ''; + } +} +# ------------------------------------------------------- Format rpmpre section +sub format_rpmpre { + my $text=$parser->get_text('/rpmpre'); + $parser->get_tag('/rpmpre'); + if ($mode eq 'html') { + return $rpmpre="\n
RPMPRE $text"; + } + else { + return ''; + } +} +# -------------------------------------------------- Format directories section +sub format_directories { + my $text=$parser->get_text('/directories'); + $parser->get_tag('/directories'); + if ($mode eq 'html') { + return $directories="\n
BEGIN DIRECTORIES\n$text\n
". + "END DIRECTORIES\n"; + } + elsif ($mode eq 'install') { + return "\n".'directories:'."\n".$text; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format directory section +sub format_directory { + my (@tokeninfo)=@_; + $targetdir='';$categoryname='';$description=''; + $parser->get_text('/directory'); + $parser->get_tag('/directory'); + if ($mode eq 'html') { + return $directory="\n
DIRECTORY $targetdir $categoryname ". + "$description"; + } + elsif ($mode eq 'install') { + return "\t".'install '.$categoryhash{$categoryname}.' -d '. + $targetroot.'/'.$targetdir."\n"; + } + else { + return ''; + } +} +# ---------------------------------------------------- Format targetdir section +sub format_targetdir { + my @tokeninfo=@_; + $targetdir=''; + my $text=&trim($parser->get_text('/targetdir')); + if ($text) { + $parser->get_tag('/targetdir'); + $targetdir=$text; + } + return ''; +} +# ------------------------------------------------- Format categoryname section +sub format_categoryname { + my @tokeninfo=@_; + $categoryname=''; + my $text=&trim($parser->get_text('/categoryname')); + if ($text) { + $parser->get_tag('/categoryname'); + $categoryname=$text; + } + return ''; +} +# -------------------------------------------------- Format description section +sub format_description { + my @tokeninfo=@_; + $description=''; + my $text=&htmlsafe(&trim($parser->get_text('/description'))); + if ($text) { + $parser->get_tag('/description'); + $description=$text; + } + return ''; +} +# -------------------------------------------------------- Format files section +sub format_files { + my $text=$parser->get_text('/files'); + $parser->get_tag('/files'); + if ($mode eq 'html') { + return $directories="\n
BEGIN FILES\n$text\n
END FILES\n"; + } + elsif ($mode eq 'install') { + return "\n".'files:'."\n".$text. + "\n".'links:'."\n".join('',@links); + } + else { + return ''; + } +} +# ---------------------------------------------------- Format fileglobs section +sub format_fileglobs { + +} +# -------------------------------------------------------- Format links section +# deprecated.. currently 's are included in +sub format_links { + my $text=$parser->get_text('/links'); + $parser->get_tag('/links'); + if ($mode eq 'html') { + return $links="\n
BEGIN LINKS\n$text\n
END LINKS\n"; + } + elsif ($mode eq 'install') { + return "\n".'links:'."\n\t".$text; + } + else { + return ''; + } +} +# --------------------------------------------------------- Format file section +sub format_file { + my @tokeninfo=@_; + $file=''; $source=''; $target=''; $categoryname=''; $description=''; + $note=''; $build=''; $status=''; $dependencies=''; + my $text=&trim($parser->get_text('/file')); + if ($source) { + $parser->get_tag('/file'); + if ($mode eq 'html') { + return ($file="\n
BEGIN FILE\n". + "$source $target $categoryname $description $note " . + "$build $status $dependencies" . + "\nEND FILE"); + } + elsif ($mode eq 'install' && $categoryname ne 'conf') { + return "\t".'@test -e '.$sourceroot.'/'.$source. + ' && install '. + $categoryhash{$categoryname}.' '. + $sourceroot.'/'.$source.' '. + $targetroot.'/'.$target. + ' || echo "**** LON-CAPA WARNING '. + '**** CVS source file does not exist: '.$sourceroot.'/'. + $source.'"'."\n"; + } + else { + return ''; + } + } + return ''; +} +# --------------------------------------------------------- Format link section +sub format_link { + my @tokeninfo=@_; + $link=''; $linkto=''; $target=''; $categoryname=''; $description=''; + $note=''; $build=''; $status=''; $dependencies=''; + my $text=&trim($parser->get_text('/link')); + if ($linkto) { + $parser->get_tag('/link'); + if ($mode eq 'html') { + return $link="\n
BEGIN LINK\n". + "$linkto $target $categoryname $description $note " . + "$build $status $dependencies" . + "\nEND LINK"; + } + elsif ($mode eq 'install') { + my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target); + foreach my $tgt (@targets) { + push @links,"\t".'ln -fs /'.$linkto.' /'.$targetroot.$tgt. + "\n"; + } + return ''; + } + else { + return ''; + } + } + return ''; +} +# ----------------------------------------------------- Format fileglob section +sub format_fileglob { + my @tokeninfo=@_; + $fileglob=''; $glob=''; $sourcedir=''; + $targetdir=''; $categoryname=''; $description=''; + $note=''; $build=''; $status=''; $dependencies=''; + $filenames=''; + my $text=&trim($parser->get_text('/fileglob')); + if ($sourcedir) { + $parser->get_tag('/fileglob'); + if ($mode eq 'html') { + return $fileglob="\n
BEGIN FILEGLOB\n". + "$glob sourcedir $targetdir $categoryname $description $note ". + "$build $status $dependencies $filenames" . + "\nEND FILEGLOB"; + } + elsif ($mode eq 'install') { + return "\t".'install '. + $categoryhash{$categoryname}.' '. + $sourceroot.'/'.$sourcedir.'[^CVS]'.$glob.' '. + $targetroot.'/'.$targetdir.'.'."\n"; + } + else { + return ''; + } + } + return ''; +} +# ---------------------------------------------------- Format sourcedir section +sub format_sourcedir { + my @tokeninfo=@_; + $sourcedir=''; + my $text=&trim($parser->get_text('/sourcedir')); + if ($text) { + $parser->get_tag('/sourcedir'); + $sourcedir=$text; + } + return ''; +} +# ------------------------------------------------------- Format target section +sub format_target { + my @tokeninfo=@_; + $target=''; + my $text=&trim($parser->get_text('/target')); + if ($text) { + $parser->get_tag('/target'); + $target=$text; + } + return ''; +} +# ------------------------------------------------------- Format source section +sub format_source { + my @tokeninfo=@_; + $source=''; + my $text=&trim($parser->get_text('/source')); + if ($text) { + $parser->get_tag('/source'); + $source=$text; + } + return ''; +} +# --------------------------------------------------------- Format note section +sub format_note { + my @tokeninfo=@_; + $note=''; + my $text=&trim($parser->get_text('/note')); + if ($text) { + $parser->get_tag('/note'); + $note=$text; + } + return ''; + +} +# -------------------------------------------------------- Format build section +sub format_build { + my @tokeninfo=@_; + $build=''; + my $text=&trim($parser->get_text('/build')); + if ($text) { + $parser->get_tag('/build'); + $build=$text; + } + return ''; +} +# ------------------------------------------------------- Format status section +sub format_status { + my @tokeninfo=@_; + $status=''; + my $text=&trim($parser->get_text('/status')); + if ($text) { + $parser->get_tag('/status'); + $status=$text; + } + return ''; +} +# ------------------------------------------------- Format dependencies section +sub format_dependencies { + my @tokeninfo=@_; + $dependencies=''; + my $text=&trim($parser->get_text('/dependencies')); + if ($text) { + $parser->get_tag('/dependencies'); + $dependencies=$text; + } + return ''; +} +# --------------------------------------------------------- Format glob section +sub format_glob { + my @tokeninfo=@_; + $glob=''; + my $text=&trim($parser->get_text('/glob')); + if ($text) { + $parser->get_tag('/glob'); + $glob=$text; + } + return ''; +} +# ---------------------------------------------------- Format filenames section +sub format_filenames { + my @tokeninfo=@_; + my $text=&trim($parser->get_text('/filenames')); + if ($text) { + $parser->get_tag('/filenames'); + $filenames=$text; + } + return ''; +} +# ------------------------------------------------------- Format linkto section +sub format_linkto { + my @tokeninfo=@_; + my $text=&trim($parser->get_text('/linkto')); + if ($text) { + $parser->get_tag('/linkto'); + $linkto=$text; + } + return ''; +} +# ------------------------------------- Render less-than and greater-than signs +sub htmlsafe { + my $text=@_[0]; + $text =~ s//>/g; + return $text; } +# --------------------------------------- remove starting and ending whitespace +sub trim { + my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; +}