version 1.44, 2002/04/08 12:51:03
|
version 1.47, 2002/04/26 15:53:00
|
Line 4
|
Line 4
|
# Run "perldoc ./lpml_parse.pl" in order to best view the software |
# Run "perldoc ./lpml_parse.pl" in order to best view the software |
# documentation internalized in this program. |
# documentation internalized in this program. |
|
|
|
# --------------------------------------------------------- Distribution notice |
|
# This script is distributed with the LPML software project available at |
|
# http://lpml.sourceforge.net |
|
|
# --------------------------------------------------------- License Information |
# --------------------------------------------------------- License Information |
# The LearningOnline Network with CAPA |
# The LearningOnline Network with CAPA |
# lpml_parse.pl - Linux Packaging Markup Language parser |
# lpml_parse.pl - Linux Packaging Markup Language parser |
Line 42
|
Line 46
|
# 11/4,11/5,11/6,11/7,11/16,11/17 - Scott Harrison |
# 11/4,11/5,11/6,11/7,11/16,11/17 - Scott Harrison |
# 12/2,12/3,12/4,12/5,12/6,12/13,12/19,12/29 - Scott Harrison |
# 12/2,12/3,12/4,12/5,12/6,12/13,12/19,12/29 - Scott Harrison |
# YEAR=2002 |
# YEAR=2002 |
# 1/8,1/9,1/29,1/31,2/5,3/21,4/8 - Scott Harrison |
# 1/8,1/9,1/29,1/31,2/5,3/21,4/8,4/12 - Scott Harrison |
# |
# |
### |
### |
|
|
Line 63
|
Line 67
|
# |
# |
# I am using a multiple pass-through approach to parsing |
# I am using a multiple pass-through approach to parsing |
# the lpml file. This saves memory and makes sure the server |
# the lpml file. This saves memory and makes sure the server |
# will never be overloaded. At some point, I expect the |
# will never be overloaded. |
# first two steps will be implemented with my XFML |
|
# |
# |
# This is meant to parse files meeting the lpml document type. |
# This is meant to parse files meeting the lpml document type. |
# See lpml.dtd. LPML=Linux Packaging Markup Language. |
# See lpml.dtd. LPML=Linux Packaging Markup Language. |
Line 74 use HTML::TokeParser;
|
Line 77 use HTML::TokeParser;
|
my $usage=<<END; |
my $usage=<<END; |
**** ERROR ERROR ERROR ERROR **** |
**** ERROR ERROR ERROR ERROR **** |
Usage is for lpml file to come in through standard input. |
Usage is for lpml file to come in through standard input. |
1st argument is the mode of parsing. |
1st argument is the mode of parsing: |
2nd argument is the category permissions to use (runtime or development) |
install,configinstall,build,rpm,dpkg,htmldoc,textdoc,status |
3rd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc). |
2nd argument is the category permissions to use: |
|
typical choices: runtime,development |
|
3rd argument is the distribution: |
|
typical choices: default,redhat6.2,debian2.2,redhat7 |
4th argument is to manually specify a sourceroot. |
4th argument is to manually specify a sourceroot. |
5th argument is to manually specify a targetroot. |
5th argument is to manually specify a targetroot. |
|
|
Line 86 Example:
|
Line 92 Example:
|
|
|
cat ../../doc/loncapafiles.lpml |\\ |
cat ../../doc/loncapafiles.lpml |\\ |
perl lpml_parse.pl html development default /home/sherbert/loncapa /tmp/install |
perl lpml_parse.pl html development default /home/sherbert/loncapa /tmp/install |
|
|
|
For more information, type "perldoc lpml_parse.pl". |
END |
END |
|
|
# ------------------------------------------------- Grab command line arguments |
# ------------------------------------------------- Grab command line arguments |
Line 121 if (@ARGV) {
|
Line 129 if (@ARGV) {
|
if (@ARGV) { |
if (@ARGV) { |
$targetroot = shift @ARGV; |
$targetroot = shift @ARGV; |
} |
} |
$sourceroot=~s/\/$//; |
$sourceroot=~s/\/$//; # remove trailing directory slash |
$targetroot=~s/\/$//; |
$targetroot=~s/\/$//; # remove trailing directory slash |
$sourcerootarg=$sourceroot; |
$sourcerootarg=$sourceroot; |
$targetrootarg=$targetroot; |
$targetrootarg=$targetroot; |
|
|
my $logcmd='| tee -a WARNINGS'; |
my $logcmd='| tee -a WARNINGS'; |
|
|
my $invocation; |
my $invocation; # Record how the program was invoked |
# --------------------------------------------------- Record program invocation |
# --------------------------------------------------- Record program invocation |
if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') { |
if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') { |
$invocation=(<<END); |
$invocation=(<<END); |
Line 141 if ($mode eq 'install' or $mode eq 'conf
|
Line 149 if ($mode eq 'install' or $mode eq 'conf
|
END |
END |
} |
} |
|
|
# ---------------------------------------------------- Start first pass through |
# -------------------------- Start first pass through (just gather information) |
my @parsecontents = <>; |
my @parsecontents=<>; |
my $parsestring = join('',@parsecontents); |
my $parsestring=join('',@parsecontents); |
my $outstring=''; |
|
|
|
# Need to make a pass through and figure out what defaults are |
# Need to make a pass through and figure out what defaults are |
# overrided. Top-down overriding strategy (leaves don't know |
# overrided. Top-down overriding strategy (tree leaves don't know |
# about distant leaves). |
# about distant tree leaves). |
|
|
my @hierarchy; |
my @hierarchy; |
$hierarchy[0]=0; |
$hierarchy[0]=0; |
Line 157 my $token;
|
Line 164 my $token;
|
$parser = HTML::TokeParser->new(\$parsestring) or |
$parser = HTML::TokeParser->new(\$parsestring) or |
die('can\'t create TokeParser object'); |
die('can\'t create TokeParser object'); |
$parser->xml_mode('1'); |
$parser->xml_mode('1'); |
my %hash; |
my %setting; |
my $key=''; |
|
while ($token = $parser->get_token()) { |
# Values for the %setting hash |
|
my $defaultset=1; # a default setting exists for a key |
|
my $distset=2; # a distribution setting exists for a key |
|
# (overrides default setting) |
|
|
|
my $key=''; # this is a unique key identifier (the token name with its |
|
# coordinates inside the hierarchy) |
|
while ($token = $parser->get_token()) { # navigate through $parsestring |
if ($token->[0] eq 'S') { |
if ($token->[0] eq 'S') { |
$hloc++; |
$hloc++; |
$hierarchy[$hloc]++; |
$hierarchy[$hloc]++; |
$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); |
$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); |
my $thisdist=' '.$token->[2]{'dist'}.' '; |
my $thisdist=' '.$token->[2]{'dist'}.' '; |
if ($thisdist eq ' default ') { |
if ($thisdist eq ' default ') { |
$hash{$key}=1; # there is a default setting for this key |
$setting{$key}=$defaultset; |
} |
} |
elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) { |
elsif (length($dist)>0 && |
$hash{$key}=2; # disregard default setting for this key if |
$setting{$key}==$defaultset && |
# there is a directly requested distribution match |
$thisdist=~/\s$dist\s/) { |
|
$setting{$key}=$distset; |
|
# disregard default setting for this key if |
|
# there is a directly requested distribution match |
|
# (in other words, there must first be a default |
|
# setting for a key in order for it to be overridden) |
} |
} |
} |
} |
if ($token->[0] eq 'E') { |
if ($token->[0] eq 'E') { |
Line 178 while ($token = $parser->get_token()) {
|
Line 197 while ($token = $parser->get_token()) {
|
} |
} |
} |
} |
|
|
# --------------------------------------------------- Start second pass through |
# - Start second pass through (clean up the string to allow for easy rendering) |
|
|
|
# The string is cleaned up so that there is no white-space surrounding any |
|
# XML tag. White-space inside text 'T' elements is preserved. |
|
|
|
# Clear up memory |
undef($hloc); |
undef($hloc); |
undef(@hierarchy); |
undef(@hierarchy); |
undef($parser); |
undef($parser); |
$hierarchy[0]=0; |
$hierarchy[0]=0; # initialize hierarchy |
$parser = HTML::TokeParser->new(\$parsestring) or |
$parser = HTML::TokeParser->new(\$parsestring) or |
die('can\'t create TokeParser object'); |
die('can\'t create TokeParser object'); |
$parser->xml_mode('1'); |
$parser->xml_mode('1'); |
my $cleanstring; |
my $cleanstring; # contains the output of the second step |
while ($token = $parser->get_token()) { |
while ($token = $parser->get_token()) { # navigate through $parsestring |
if ($token->[0] eq 'S') { |
if ($token->[0] eq 'S') { # a start tag |
$hloc++; |
$hloc++; |
$hierarchy[$hloc]++; |
$hierarchy[$hloc]++; |
$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); |
$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); |
my $thisdist=' '.$token->[2]{'dist'}.' '; |
|
|
# Surround tagdist (the dist attribute of an XML tag) |
|
# with white-space to allow for uniform searching a few |
|
# lines below here. |
|
my $tagdist=' '.$token->[2]{'dist'}.' '; |
|
|
# This conditional clause is set up to ignore two sets |
# This conditional clause is set up to ignore two sets |
# of invalid conditions before accepting entry into |
# of invalid conditions before accepting entry into |
# the cleanstring. |
# $cleanstring. |
if ($hash{$key}==2 and |
|
!($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) { |
# Condition #1: Ignore this part of the string if the tag |
|
# has a superior distribution-specific setting and the tag |
|
# being evaluated has a dist setting something other than |
|
# blank or $dist. |
|
if ($setting{$key}==$distset and |
|
!($tagdist eq ' ' or $tagdist =~/\s$dist\s/)) { |
if ($token->[4]!~/\/>$/) { |
if ($token->[4]!~/\/>$/) { |
$parser->get_tag('/'.$token->[1]); |
$parser->get_tag('/'.$token->[1]); |
$hloc--; |
$hloc--; |
} |
} |
} |
} |
elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and |
# Condition #2: Ignore this part of the string if the tag has |
!($thisdist eq ' default ' and $hash{$key}!=2)) { |
# is not blank and does not equal dist and |
|
# either does not equal default or it has a prior $dist-specific |
|
# setting. |
|
elsif ($tagdist ne ' ' and $tagdist!~/\s$dist\s/ and |
|
!($tagdist eq ' default ' and $setting{$key}!=$distset)) { |
if ($token->[4]!~/\/>$/) { |
if ($token->[4]!~/\/>$/) { |
$parser->get_tag('/'.$token->[1]); |
$parser->get_tag('/'.$token->[1]); |
$hloc--; |
$hloc--; |
} |
} |
} |
} |
|
# In other words, output to $cleanstring if the tag is dist=default |
|
# or if the tag is set to dist=$dist for the first time. And, always |
|
# output when dist='' is not present. |
else { |
else { |
$cleanstring.=$token->[4]; |
$cleanstring.=$token->[4]; |
} |
} |
if ($token->[4]=~/\/>$/) { |
|
# $hloc--; |
|
} |
|
} |
} |
if ($token->[0] eq 'E') { |
# Note: this loop DOES work with <tag /> style markup as well as |
|
# <tag></tag> style markup since I always check for $token->[4] ending |
|
# with "/>". |
|
if ($token->[0] eq 'E') { # an end tag |
$cleanstring.=$token->[2]; |
$cleanstring.=$token->[2]; |
$hloc--; |
$hloc--; |
} |
} |
if ($token->[0] eq 'T') { |
if ($token->[0] eq 'T') { # text contents inside tags |
$cleanstring.=$token->[1]; |
$cleanstring.=$token->[1]; |
} |
} |
} |
} |
$cleanstring=&trim($cleanstring); |
$cleanstring=&trim($cleanstring); |
$cleanstring=~s/\>\s*\n\s*\</\>\</g; |
$cleanstring=~s/\>\s*\n\s*\</\>\</g; |
|
|
# ---------------------------------------------------- Start final pass through |
# -------------------------------------------- Start final (third) pass through |
|
|
# storage variables |
# storage variables |
my $lpml; |
my $lpml; |
Line 1015 sub format_description {
|
Line 1056 sub format_description {
|
sub format_files { |
sub format_files { |
my $text=$parser->get_text('/files'); |
my $text=$parser->get_text('/files'); |
$parser->get_tag('/files'); |
$parser->get_tag('/files'); |
if ($mode eq 'html') { |
if ($mode eq 'MANIFEST') { |
|
return $text; |
|
} |
|
elsif ($mode eq 'html') { |
return $directories="\n<br /> <br />". |
return $directories="\n<br /> <br />". |
"<a name='files' />". |
"<a name='files' />". |
"<font size='+2'>Files</font><br /> <br />". |
"<font size='+2'>Files</font><br /> <br />". |
Line 1133 sub format_file {
|
Line 1177 sub format_file {
|
$categorycount{$categoryname}++; |
$categorycount{$categoryname}++; |
if ($source) { |
if ($source) { |
$parser->get_tag('/file'); |
$parser->get_tag('/file'); |
if ($mode eq 'html') { |
if ($mode eq 'MANIFEST') { |
|
my $command=$build; |
|
if ($command!~/\s/) { |
|
$command=~s/\/([^\/]*)$//; |
|
} |
|
else { |
|
$command=~s/(.*?\/)([^\/]+\s+.*)$/$1/; |
|
} |
|
$command=~s/^$sourceroot\///; |
|
my (@deps)=split(/\;/,$dependencies); |
|
my $retval=join("\n",($source, |
|
(map {"$command$_"} @deps))); |
|
return $retval."\n"; |
|
} |
|
elsif ($mode eq 'html') { |
return ($file="\n<!-- FILESORT:$target -->". |
return ($file="\n<!-- FILESORT:$target -->". |
"<tr>". |
"<tr>". |
"<td><!-- POSTEVAL [$categoryname] verify.pl file '$sourcerootarg' ". |
"<td><!-- POSTEVAL [$categoryname] verify.pl file '$sourcerootarg' ". |
Line 1344 sub format_fileglob {
|
Line 1402 sub format_fileglob {
|
$categorycount{$categoryname}+=scalar(@semi)+1; |
$categorycount{$categoryname}+=scalar(@semi)+1; |
if ($sourcedir) { |
if ($sourcedir) { |
$parser->get_tag('/fileglob'); |
$parser->get_tag('/fileglob'); |
if ($mode eq 'html') { |
if ($mode eq 'MANIFEST') { |
|
return join("\n",(map {"$sourcedir$_"} split(/\;/,$filenames2)))."\n"; |
|
} |
|
elsif ($mode eq 'html') { |
return $fileglob="\n<tr>". |
return $fileglob="\n<tr>". |
"<td><!-- POSTEVAL [$categoryname] verify.pl fileglob '$sourcerootarg' ". |
"<td><!-- POSTEVAL [$categoryname] verify.pl fileglob '$sourcerootarg' ". |
"'$targetrootarg' ". |
"'$targetrootarg' ". |
Line 1559 sub trim {
|
Line 1620 sub trim {
|
|
|
=head1 NAME |
=head1 NAME |
|
|
lpml_parse.pl - This is meant to parse LPML files (Linux Packaging Markup Language) |
lpml_parse.pl - This is meant to parse files meeting the lpml document type. |
|
|
=head1 SYNOPSIS |
=head1 SYNOPSIS |
|
|
Usage is for lpml file to come in through standard input. |
<STDIN> | perl lpml_parse.pl <MODE> <CATEGORY> <DIST> <SOURCE> <TARGET> |
|
|
|
Usage is for the lpml file to come in through standard input. |
|
|
=over 4 |
=over 4 |
|
|
Line 1595 Only the 1st argument is mandatory for t
|
Line 1658 Only the 1st argument is mandatory for t
|
Example: |
Example: |
|
|
cat ../../doc/loncapafiles.lpml |\\ |
cat ../../doc/loncapafiles.lpml |\\ |
perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install |
perl lpml_parse.pl html runtime default /home/sherbert/loncapa /tmp/install |
|
|
=head1 DESCRIPTION |
=head1 DESCRIPTION |
|
|
I am using a multiple pass-through approach to parsing |
The general flow of the script is to get command line arguments, run through |
the lpml file. This saves memory and makes sure the server |
the XML document three times, and output according to any desired mode: |
will never be overloaded. |
install, configinstall, build, rpm, dpkg, htmldoc, textdoc, and status. |
|
|
|
A number of coding decisions are made according to the following principle: |
|
installation software must be stand-alone. Therefore, for instance, I try |
|
not to use the GetOpt::Long module or any other perl modules. (I do however |
|
use HTML::TokeParser.) I also have tried to keep all the MODES of |
|
parsing inside this file. Therefore, format_TAG subroutines are fairly |
|
lengthy with their conditional logic. A more "elegant" solution might |
|
be to dynamically register the parsing mode and subroutines, or maybe even work |
|
with stylesheets. However, in order to make this the installation back-bone |
|
of choice, there are advantages for HAVING EVERYTHING IN ONE FILE. |
|
This way, the LPML installation software does not have to rely on OTHER |
|
installation software (a chicken versus the egg problem). Besides, I would |
|
suggest the modes of parsing are fairly constant: install, configinstall, |
|
build, rpm, dpkg, htmldoc, textdoc, and status. |
|
|
|
Another coding decision is about using a multiple pass-through approach to |
|
parsing the lpml file. This saves memory and makes sure the server will never |
|
be overloaded. During the first pass-through, the script gathers information |
|
specific as to resolving what tags with what 'dist=' attributes are to be used. |
|
During the second pass-through, the script cleans up white-space surrounding |
|
the XML tags, and filters through the tags based on information regarding the |
|
'dist=' attributes (information gathered in the first pass-through). |
|
The third and final pass-through involves formatting and rendering the XML |
|
into whatever XML mode is chosen: install, configinstall, build, rpm, dpkg, |
|
htmldoc, textdoc, and status. |
|
|
|
The hierarchy mandated by the DTD does not always correspond to the hierarchy |
|
that is sensible for a Makefile. For instance, in a Makefile it is sensible |
|
that soft-links are installed after files. However, in an LPML document, it |
|
is sensible that files and links be considered together and the writer of the |
|
LPML document should be free to place things in whatever order makes best |
|
sense in terms of LOOKING at the information. The complication that arises |
|
is that the parser needs to have a memory for passing values from |
|
leaves on the XML tree to higher-up branches. Currently, this memory is |
|
hard-coded (like with the @links array), but it may benefit from a more |
|
formal approach in the future. |
|
|
=head1 README |
=head1 README |
|
|
I am using a multiple pass-through approach to parsing |
This parses an LPML file to generate information useful for |
the lpml file. This saves memory and makes sure the server |
source to target installation, compilation, filesystem status |
will never be overloaded. |
checking, RPM and Debian software packaging, and documentation. |
|
|
|
More information on LPML is available at http://lpml.sourceforge.net. |
|
|
=head1 PREREQUISITES |
=head1 PREREQUISITES |
|
|
Line 1621 linux
|
Line 1722 linux
|
|
|
=head1 SCRIPT CATEGORIES |
=head1 SCRIPT CATEGORIES |
|
|
Packaging/Administrative |
UNIX/System_administration |
|
|
=head1 AUTHOR |
=head1 AUTHOR |
|
|