--- loncom/build/lpml_parse.pl	2001/12/06 23:14:33	1.28
+++ loncom/build/lpml_parse.pl	2003/09/11 22:01:48	1.52
@@ -1,11 +1,20 @@
 #!/usr/bin/perl
 
+# -------------------------------------------------------- Documentation notice
+# Run "perldoc ./lpml_parse.pl" in order to best view the software
+# documentation internalized in this program.
+
+# --------------------------------------------------------- Distribution notice
+# This script is distributed with the LPML software project available at
+# http://lpml.sourceforge.net
+
+# --------------------------------------------------------- License Information
 # The LearningOnline Network with CAPA
 # lpml_parse.pl - Linux Packaging Markup Language parser
 #
-# $Id: lpml_parse.pl,v 1.28 2001/12/06 23:14:33 harris41 Exp $
+# $Id: lpml_parse.pl,v 1.52 2003/09/11 22:01:48 albertel Exp $
 #
-# Written by Scott Harrison, harris41@msu.edu
+# Written by Scott Harrison, codeharrison@yahoo.com
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -35,7 +44,10 @@
 # 9/5/2001,9/6,9/7,9/8 - Scott Harrison
 # 9/17,9/18 - Scott Harrison
 # 11/4,11/5,11/6,11/7,11/16,11/17 - Scott Harrison
-# 12/2,12/3,12/4,12/5,12/6 - Scott Harrison
+# 12/2,12/3,12/4,12/5,12/6,12/13,12/19,12/29 - Scott Harrison
+# YEAR=2002
+# 1/8,1/9,1/29,1/31,2/5,3/21,4/8,4/12 - Scott Harrison
+# 4/21,4/26,5/19,5/23,10/13 - Scott Harrison
 #
 ###
 
@@ -66,9 +78,12 @@ use HTML::TokeParser;
 my $usage=<<END;
 **** ERROR ERROR ERROR ERROR ****
 Usage is for lpml file to come in through standard input.
-1st argument is the mode of parsing.
-2nd argument is the category permissions to use (runtime or development)
-3rd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
+1st argument is the mode of parsing:
+    install,configinstall,build,rpm,dpkg,htmldoc,textdoc,status
+2nd argument is the category permissions to use:
+    typical choices: runtime,development
+3rd argument is the distribution:
+    typical choices: default,redhat6.2,debian2.2,redhat7
 4th argument is to manually specify a sourceroot.
 5th argument is to manually specify a targetroot.
 
@@ -78,11 +93,13 @@ Example:
 
 cat ../../doc/loncapafiles.lpml |\\
 perl lpml_parse.pl html development default /home/sherbert/loncapa /tmp/install
+
+For more information, type "perldoc lpml_parse.pl".
 END
 
 # ------------------------------------------------- Grab command line arguments
 
-my $mode;
+my $mode='';
 if (@ARGV==5) {
     $mode = shift @ARGV;
 }
@@ -93,34 +110,34 @@ else {
     exit -1; # exit with error status
 }
 
-my $categorytype;
+my $categorytype='';
 if (@ARGV) {
     $categorytype = shift @ARGV;
 }
 
-my $dist;
+my $dist='';
 if (@ARGV) {
     $dist = shift @ARGV;
 }
 
-my $targetroot;
-my $sourceroot;
-my $targetrootarg;
-my $sourcerootarg;
+my $targetroot='';
+my $sourceroot='';
+my $targetrootarg='';
+my $sourcerootarg='';
 if (@ARGV) {
     $sourceroot = shift @ARGV;
 }
 if (@ARGV) {
     $targetroot = shift @ARGV;
 }
-$sourceroot=~s/\/$//;
-$targetroot=~s/\/$//;
+$sourceroot=~s/\/$//; # remove trailing directory slash
+$targetroot=~s/\/$//; # remove trailing directory slash
 $sourcerootarg=$sourceroot;
 $targetrootarg=$targetroot;
 
 my $logcmd='| tee -a WARNINGS';
 
-my $invocation;
+my $invocation; # Record how the program was invoked
 # --------------------------------------------------- Record program invocation
 if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
     $invocation=(<<END);
@@ -128,19 +145,18 @@ if ($mode eq 'install' or $mode eq 'conf
 #             1st argument (mode) is: $mode
 #             2nd argument (category type) is: $categorytype
 #             3rd argument (distribution) is: $dist
-#             4th argument (targetroot) is: described below
-#             5th argument (sourceroot) is: described below
+#             4th argument (sourceroot) is: described below
+#             5th argument (targetroot) is: described below
 END
 }
 
-# ---------------------------------------------------- Start first pass through
-my @parsecontents = <>;
-my $parsestring = join('',@parsecontents);
-my $outstring;
+# -------------------------- Start first pass through (just gather information)
+my @parsecontents=<>;
+my $parsestring=join('',@parsecontents);
 
 # Need to make a pass through and figure out what defaults are
-# overrided.  Top-down overriding strategy (leaves don't know
-# about distant leaves).
+# overrided.  Top-down overriding strategy (tree leaves don't know
+# about distant tree leaves).
 
 my @hierarchy;
 $hierarchy[0]=0;
@@ -149,20 +165,32 @@ my $token;
 $parser = HTML::TokeParser->new(\$parsestring) or
     die('can\'t create TokeParser object');
 $parser->xml_mode('1');
-my %hash;
-my $key;
-while ($token = $parser->get_token()) {
+my %setting;
+
+# Values for the %setting hash
+my $defaultset=1; # a default setting exists for a key
+my $distset=2; # a distribution setting exists for a key
+               # (overrides default setting)
+
+my $key=''; # this is a unique key identifier (the token name with its
+            # coordinates inside the hierarchy)
+while ($token = $parser->get_token()) { # navigate through $parsestring
     if ($token->[0] eq 'S') {
 	$hloc++;
 	$hierarchy[$hloc]++;
 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
 	my $thisdist=' '.$token->[2]{'dist'}.' ';
 	if ($thisdist eq ' default ') {
-	    $hash{$key}=1; # there is a default setting for this key
+	    $setting{$key}=$defaultset;
 	}
-	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
-	    $hash{$key}=2; # disregard default setting for this key if
-	                   # there is a directly requested distribution match
+	elsif (length($dist)>0 &&
+	       $setting{$key}==$defaultset &&
+	       $thisdist=~/\s$dist\s/) {
+	    $setting{$key}=$distset;
+                   # disregard default setting for this key if
+                   # there is a directly requested distribution match
+                   # (in other words, there must first be a default
+	           # setting for a key in order for it to be overridden)
 	}
     }
     if ($token->[0] eq 'E') {
@@ -170,61 +198,84 @@ while ($token = $parser->get_token()) {
     }
 }
 
-# --------------------------------------------------- Start second pass through
-undef $hloc;
-undef @hierarchy;
-undef $parser;
-$hierarchy[0]=0;
+# - Start second pass through (clean up the string to allow for easy rendering)
+
+# The string is cleaned up so that there is no white-space surrounding any
+# XML tag.  White-space inside text 'T' elements is preserved.
+
+# Clear up memory
+undef($hloc);
+undef(@hierarchy);
+undef($parser);
+$hierarchy[0]=0; # initialize hierarchy
 $parser = HTML::TokeParser->new(\$parsestring) or
     die('can\'t create TokeParser object');
 $parser->xml_mode('1');
-my $cleanstring;
-while ($token = $parser->get_token()) {
-    if ($token->[0] eq 'S') {
+my $cleanstring; # contains the output of the second step
+while ($token = $parser->get_token()) { # navigate through $parsestring
+    if ($token->[0] eq 'S') { # a start tag
 	$hloc++;
 	$hierarchy[$hloc]++;
 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
-	my $thisdist=' '.$token->[2]{'dist'}.' ';
+
+	# Surround tagdist (the dist attribute of an XML tag)
+	# with white-space to allow for uniform searching a few
+	# lines below here.
+	my $tagdist=' '.$token->[2]{'dist'}.' ';
+
 	# This conditional clause is set up to ignore two sets
 	# of invalid conditions before accepting entry into
-	# the cleanstring.
-	if ($hash{$key}==2 and
-	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
+	# $cleanstring.
+
+	# Condition #1: Ignore this part of the string if the tag 
+	# has a superior distribution-specific setting and the tag
+	# being evaluated has a dist setting something other than
+	# blank or $dist.
+	if ($setting{$key}==$distset and
+	    !($tagdist eq '  ' or $tagdist =~/\s$dist\s/)) {
 	    if ($token->[4]!~/\/>$/) {
 		$parser->get_tag('/'.$token->[1]);
 		$hloc--;
 	    }
 	}
-	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
-	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
+	# Condition #2: Ignore this part of the string if the tag has
+	# is not blank and does not equal dist and
+	# either does not equal default or it has a prior $dist-specific
+	# setting.
+	elsif ($tagdist ne '  ' and $tagdist!~/\s$dist\s/ and
+	       !($tagdist eq ' default ' and $setting{$key}!=$distset)) {
 	    if ($token->[4]!~/\/>$/) {
 		$parser->get_tag('/'.$token->[1]);
 		$hloc--;
 	    }
 	}
+	# In other words, output to $cleanstring if the tag is dist=default
+	# or if the tag is set to dist=$dist for the first time.  And, always
+	# output when dist='' is not present.
 	else {
 	    $cleanstring.=$token->[4];
 	}
-	if ($token->[4]=~/\/>$/) {
-	    $hloc--;
-	}
     }
-    if ($token->[0] eq 'E') {
+    # Note: this loop DOES work with <tag /> style markup as well as
+    # <tag></tag> style markup since I always check for $token->[4] ending
+    # with "/>".
+    if ($token->[0] eq 'E') { # an end tag
 	$cleanstring.=$token->[2];
 	$hloc--;
     }
-    if ($token->[0] eq 'T') {
+    if ($token->[0] eq 'T') { # text contents inside tags
 	$cleanstring.=$token->[1];
     }
 }
 $cleanstring=&trim($cleanstring);
 $cleanstring=~s/\>\s*\n\s*\</\>\</g;
 
-# ---------------------------------------------------- Start final pass through
+# -------------------------------------------- Start final (third) pass through
 
 # storage variables
 my $lpml;
 my $categories;
+my @categorynamelist;
 my $category;
 my $category_att_name;
 my $category_att_type;
@@ -248,6 +299,7 @@ my $directories;
 my $directory;
 my $targetdirs;
 my $targetdir;
+my $protectionlevel;
 my $categoryname;
 my $description;
 my $files;
@@ -272,6 +324,12 @@ my @links;
 my %categoryhash;
 my $dpathlength;
 my %fab; # file category abbreviation
+my $directory_count;
+my $file_count;
+my $link_count;
+my $fileglob_count;
+my $fileglobnames_count;
+my %categorycount;
 
 my @buildall;
 my @buildinfo;
@@ -287,12 +345,15 @@ $parser->xml_mode('1');
 # Define handling methods for mode-dependent text rendering
 
 $parser->{textify}={
+    specialnotices => \&format_specialnotices,
+    specialnotice => \&format_specialnotice,
     targetroot => \&format_targetroot,
     sourceroot => \&format_sourceroot,
     categories => \&format_categories,
     category => \&format_category,
     abbreviation => \&format_abbreviation,
     targetdir => \&format_targetdir,
+    protectionlevel => \&format_protectionlevel,
     chown => \&format_chown,
     chmod => \&format_chmod,
     rpm => \&format_rpm,
@@ -308,6 +369,7 @@ $parser->{textify}={
     rpmAutoReqProv => \&format_rpmAutoReqProv,
     rpmdescription => \&format_rpmdescription,
     rpmpre => \&format_rpmpre,
+    rpmRequires => \&format_rpmRequires,
     directories => \&format_directories,
     directory => \&format_directory,
     categoryname => \&format_categoryname,
@@ -324,6 +386,7 @@ $parser->{textify}={
     build => \&format_build,
     status => \&format_status,
     dependencies => \&format_dependencies,
+    privatedependencies => \&format_privatedependencies,
     buildlink => \&format_buildlink,
     glob => \&format_glob,
     sourcedir => \&format_sourcedir,
@@ -354,7 +417,43 @@ exit;
 # ------------------------ Final output at end of markup parsing and formatting
 sub end {
     if ($mode eq 'html') {
-	return "</body></html>\n";
+	return "<br />&nbsp;<br />".
+	    "<a name='summary' /><font size='+2'>Summary of Source Repository".
+	    "</font>".
+	    "<br />&nbsp;<br />".
+	    "<table border='1' cellpadding='5'>".
+	    "<caption>Files, Directories, and Symbolic Links</caption>".
+	    "<tr><td>Files (not referenced by globs)</td><td>$file_count</td>".
+	    "</tr>".
+	    "<tr><td>Files (referenced by globs)</td>".
+	    "<td>$fileglobnames_count</td>".
+	    "</tr>".
+	    "<tr><td>Total Files</td>".
+	    "<td>".($fileglobnames_count+$file_count)."</td>".
+	    "</tr>".
+	    "<tr><td>File globs</td>".
+	    "<td>".$fileglob_count."</td>".
+	    "</tr>".
+	    "<tr><td>Directories</td>".
+	    "<td>".$directory_count."</td>".
+	    "</tr>".
+	    "<tr><td>Symbolic links</td>".
+	    "<td>".$link_count."</td>".
+	    "</tr>".
+	    "</table>".
+	    "<table border='1' cellpadding='5'>".
+	    "<caption>File Category Count</caption>".
+	    "<tr><th>Icon</th><th>Name</th><th>Number of Occurrences</th>".
+	    "<th>Number of Incorrect Counts</th>".
+	    "</tr>".
+	    join("\n",(map {"<tr><td><img src='$fab{$_}.gif' ".
+		 "alt='$_ icon' /></td>".
+ 	         "<td>$_</td><td>$categorycount{$_}</td>".
+		 "<td><!-- POSTEVALINLINE $_ --></td></tr>"}
+		@categorynamelist)).
+	    "</table>".
+	    "</body></html>\n";
+
     }
     if ($mode eq 'install') {
 	return '';
@@ -392,6 +491,7 @@ Descriptions</a></li>
 <li><a href='#package'>Software Package Description</a></li>
 <li><a href='#directories'>Directory Structure</a></li>
 <li><a href='#files'>Files</a></li>
+<li><a href='#summary'>Summary of Source Repository</a></li>
 </ul>
 END
         $lpml .=<<END;
@@ -525,6 +625,7 @@ sub format_category {
     $fab{$category_att_name}=$abbreviation;
     if ($mode eq 'html') {
 	if ($category_att_type eq $categorytype) {
+	    push @categorynamelist,$category_att_name;
 	    $categoryhash{$category_att_name}="$chmod $chown";
 	    return $category="<tr>".
 		"<td><img src='$abbreviation.gif' ".
@@ -596,6 +697,9 @@ $text
 </table>
 END
     }
+    elsif ($mode eq 'make_rpm') {
+	return $text;
+    }
     elsif ($mode eq 'text') {
 	return $rpm=<<END;
 Software Package Description
@@ -617,6 +721,11 @@ sub format_rpmSummary {
     elsif ($mode eq 'text') {
 	return $rpmSummary="\nSummary     : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<summary>$text</summary>
+END
+    }
     else {
 	return '';
     }
@@ -631,6 +740,11 @@ sub format_rpmName {
     elsif ($mode eq 'text') {
 	return $rpmName="\nName        : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<name>$text</name>
+END
+    }
     else {
 	return '';
     }
@@ -673,6 +787,11 @@ sub format_rpmVendor {
     elsif ($mode eq 'text') {
 	return $rpmVendor="\nVendor      : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<vendor>$text</vendor>
+END
+    }
     else {
 	return '';
     }
@@ -701,6 +820,11 @@ sub format_rpmCopyright {
     elsif ($mode eq 'text') {
 	return $rpmCopyright="\nLicense     : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<copyright>$text</copyright>
+END
+    }
     else {
 	return '';
     }
@@ -715,6 +839,11 @@ sub format_rpmGroup {
     elsif ($mode eq 'text') {
 	return $rpmGroup="\nGroup       : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<group>Utilities/System</group>
+END
+    }
     else {
 	return '';
     }
@@ -740,9 +869,14 @@ sub format_rpmAutoReqProv {
     if ($mode eq 'html') {
 	return $rpmAutoReqProv="\nAutoReqProv : $text";
     }
-    if ($mode eq 'text') {
+    elsif ($mode eq 'text') {
 	return $rpmAutoReqProv="\nAutoReqProv : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<AutoReqProv>$text</AutoReqProv>
+END
+    }
     else {
 	return '';
     }
@@ -761,6 +895,13 @@ sub format_rpmdescription {
 	$text=~s/\\n/\n/g;
 	return $rpmdescription="\nDescription : $text";
     }
+    elsif ($mode eq 'make_rpm') {
+	$text=~s/\n//g;
+	$text=~s/\\n/\n/g;
+	return <<END;
+<description>$text</description>
+END
+    }
     else {
 	return '';
     }
@@ -773,10 +914,42 @@ sub format_rpmpre {
 #	return $rpmpre="\n<br />RPMPRE $text";
 	return '';
     }
+    elsif ($mode eq 'make_rpm') {
+	return <<END;
+<pre>$text</pre>
+END
+    }
     else {
 	return '';
     }
 }
+# -------------------------------------------------- Format requires section
+sub format_rpmRequires {
+    my @tokeninfo=@_;
+    my $aref;
+    my $text;
+    if ($mode eq 'make_rpm') {
+	while ($aref=$parser->get_token()) {
+	    if ($aref->[0] eq 'E' && $aref->[1] eq 'rpmRequires') {
+		last;
+	    }
+	    elsif ($aref->[0] eq 'S') {
+		$text.=$aref->[4];
+	    }
+	    elsif ($aref->[0] eq 'E') {
+		$text.=$aref->[2];
+	    }
+	    else {
+		$text.=$aref->[1];
+	    }
+	}
+    }
+    else {
+	$parser->get_tag('/rpmRequires');
+	return '';
+    }
+    return '<rpmRequires>'.$text.'</rpmRequires>';
+}
 # -------------------------------------------------- Format directories section
 sub format_directories {
     my $text=$parser->get_text('/directories');
@@ -801,7 +974,13 @@ sub format_directories {
     }
     elsif ($mode eq 'install') {
 	return "\n".'directories:'."\n".$text;
-   }
+    }
+    elsif ($mode eq 'rpm_file_list') {
+	return $text;
+    }
+    elsif ($mode eq 'uninstall_shell_commands') {
+	return $text;
+    }
     else {
 	return '';
     }
@@ -809,9 +988,11 @@ sub format_directories {
 # ---------------------------------------------------- Format directory section
 sub format_directory {
     my (@tokeninfo)=@_;
-    $targetdir='';$categoryname='';$description='';
+    $targetdir='';$categoryname='';$description='';$protectionlevel='';
     $parser->get_text('/directory');
     $parser->get_tag('/directory');
+    $directory_count++;
+    $categorycount{$categoryname}++;
     if ($mode eq 'html') {
 	my @a;
 	@a=($targetdir=~/\//g);
@@ -822,7 +1003,9 @@ sub format_directory {
 	my ($chmod,$chown)=split(/\s/,$categoryhash{$categoryname});
 	return $directory="\n<tr><td rowspan='2' bgcolor='#ffffff'>".
 	    "$categoryname</td>".
-	    "<td rowspan='2' bgcolor='#ffffff'><!-- POSTEVAL2 verify.pl directory /$targetdir $categoryhash{$categoryname} -->&nbsp;</td>".
+	    "<td rowspan='2' bgcolor='#ffffff'><!-- POSTEVAL [$categoryname] ".
+	    "verify.pl directory /$targetdir $categoryhash{$categoryname} -->".
+	    "&nbsp;</td>".
 	    "<td rowspan='2' bgcolor='#ffffff'>$chmod<br />$chown</td>".
 	    "<td bgcolor='#ffffff'>$thtml</td></tr>".
 	    "<tr><td bgcolor='#ffffff' colspan='[{{{{{DPATHLENGTH}}}}}]'>".
@@ -836,6 +1019,39 @@ sub format_directory {
 	return "\t".'install '.$categoryhash{$categoryname}.' -d '.
 	    $targetroot.'/'.$targetdir."\n";
     }
+    elsif ($mode eq 'rpm_file_list') {
+	return $targetroot.'/'.$targetdir."\n";
+    }
+    elsif ($mode eq 'uninstall_shell_commands') {
+	if ($protectionlevel eq 'never_delete') {
+	    return 'echo "LEAVING BEHIND '.$targetroot.'/'.$targetdir.
+		' which may have important data worth saving"'."\n";
+	}
+	elsif ($protectionlevel eq 'weak_delete') {
+	    if ($targetdir!~/\w/) {
+		die("targetdir=\"$targetdir\"! NEVER EVER DELETE THE WHOLE ".
+		    "FILESYSTEM"."\n");
+	    }
+	    return 'rm -Rvf -i '.$targetroot.'/'.$targetdir."\n";
+	}
+	elsif ($protectionlevel =~ /never/) {
+	    die("CONFUSING PROTECTION LEVEL \"$protectionlevel\" FOUND ".
+		"FOR directory $targetdir"."\n");
+	}
+	elsif ($protectionlevel !~
+    /^never_delete|weak_delete|modest_delete|strong_delete|absolute_delete$/) {
+	    die("CONFUSING OR MISSING PROTECTION LEVEL \"$protectionlevel\" ".
+		"FOUND FOR directory $targetdir\n");
+	}
+	else {
+	    if ($targetdir!~/\w/) {
+		die("targetdir=\"$targetdir\"! NEVER EVER DELETE THE WHOLE ".
+		    "FILESYSTEM"."\n");
+	    }
+	    return 'rm -Rvf '.$targetroot.'/'.$targetdir.
+		"| grep 'removed directory'"."\n";
+	}
+    }
     else {
 	return '';
     }
@@ -851,6 +1067,17 @@ sub format_targetdir {
     }
     return '';
 }
+# ---------------------------------------------- Format protectionlevel section
+sub format_protectionlevel {
+    my @tokeninfo=@_;
+    $protectionlevel='';
+    my $text=&trim($parser->get_text('/protectionlevel'));
+    if ($text) {
+	$parser->get_tag('/protectionlevel');
+	$protectionlevel=$text;
+    }
+    return '';
+}
 # ------------------------------------------------- Format categoryname section
 sub format_categoryname {
     my @tokeninfo=@_;
@@ -877,7 +1104,10 @@ sub format_description {
 sub format_files {
     my $text=$parser->get_text('/files');
     $parser->get_tag('/files');
-    if ($mode eq 'html') {
+    if ($mode eq 'MANIFEST') {
+	return $text;
+    }
+    elsif ($mode eq 'html') {
 	return $directories="\n<br />&nbsp;<br />".
 	    "<a name='files' />".
 	    "<font size='+2'>Files</font><br />&nbsp;<br />".
@@ -915,8 +1145,14 @@ sub format_files {
 	foreach my $bi (@buildinfo) {
 	    my ($target,$source,$command,$trigger,@deps)=split(/\;/,$bi);
 	    $tword=''; $tword=' alwaysrun' if $trigger eq 'always run'; 
-	    $command=~s/\/([^\/]*)$//;
-	    $command2="cd $command; sh ./$1;\\";
+	    if ($command!~/\s/) {
+		$command=~s/\/([^\/]*)$//;
+		$command2="cd $command; sh ./$1;\\";
+	    }
+	    else {
+		$command=~s/(.*?\/)([^\/]+\s+.*)$/$1/;
+		$command2="cd $command; sh ./$2;\\";
+	    }
 	    my $depstring;
 	    my $depstring2="\t\t\@echo '';\\\n";
 	    my $olddep;
@@ -952,6 +1188,9 @@ sub format_files {
 		$binfo."\n".
 		"alwaysrun:\n\n";
     }
+    elsif ($mode eq 'rpm_file_list') {
+	return $text;
+    }
     else {
 	return '';
     }
@@ -982,12 +1221,31 @@ sub format_file {
     $note=''; $build=''; $status=''; $dependencies='';
     my $text=&trim($parser->get_text('/file'));
     my $buildtest;
+    $file_count++;
+    $categorycount{$categoryname}++;
     if ($source) {
 	$parser->get_tag('/file');
-	if ($mode eq 'html') {
+	if ($mode eq 'MANIFEST') {
+	    my $command=$build;
+	    if ($command!~/\s/) {
+		$command=~s/\/([^\/]*)$//;
+	    }
+	    else {
+		$command=~s/(.*?\/)([^\/]+\s+.*)$/$1/;
+	    }
+	    $command=~s/^$sourceroot\///;
+	    my (@deps)=split(/\;/,$dependencies);
+	    my $retval=join("\n",($source,
+		       (map {"$command$_"} @deps)));
+	    if ($tokeninfo[2]{type} eq 'private') {
+		return "\n";
+	    }
+	    return $retval."\n";
+	}
+	elsif ($mode eq 'html') {
 	    return ($file="\n<!-- FILESORT:$target -->".
 		    "<tr>".
-		    "<td><!-- POSTEVAL2 verify.pl file '$sourcerootarg' ".
+          "<td><!-- POSTEVAL [$categoryname] verify.pl file '$sourcerootarg' ".
 		    "'$targetrootarg' ".
 		    "'$source' '$target' ".
 		    "$categoryhash{$categoryname} -->&nbsp;</td><td>".
@@ -1016,10 +1274,10 @@ sub format_file {
 		foreach my $dep (@deps) {
 		    $depstring.=<<END;
 		ECODE=0; DEP=''; \\
-		test -e $command/$dep || (echo '**** WARNING **** cannot evaluate status of dependency $command/$dep (for building ${sourceroot}/${source} with)'$logcmd); DEP="1"; \\
-		[ -n DEP ] && { perl filecompare.pl -b2 $command/$dep ${targetroot}/${target} || ECODE=\$\$?; } || DEP="1"; \\
+		test -e $dep || (echo '**** WARNING **** cannot evaluate status of dependency $dep (for building ${sourceroot}/${source} with)'$logcmd); DEP="1"; \\
+		[ -n DEP ] && { perl filecompare.pl -b2 $dep ${targetroot}/${target} || ECODE=\$\$?; } || DEP="1"; \\
 		case "\$\$ECODE" in \\
-			2) echo "**** WARNING **** dependency $command/$dep is newer than target file ${targetroot}/${target}; you may want to run make build"$logcmd;; \\
+			2) echo "**** WARNING **** dependency $dep is newer than target file ${targetroot}/${target}; you may want to run make build"$logcmd;; \\
 		esac; \\
 END
 		}
@@ -1059,35 +1317,55 @@ END
 	elsif ($mode eq 'configinstall' && $categoryname eq 'conf') {
 	    push @configall,$targetroot.'/'.$target;
 	    return $targetroot.'/'.$target.': alwaysrun'."\n".
-		"\t".'@echo -n ""; ECODE=0 && { perl filecompare.pl -b4 '.
-		$sourceroot.'/'.$source.' '.$targetroot.'/'.$target.
-		' || ECODE=$$?; } && '.
-		'{ [ $$ECODE != "2" ] || (install '.
-                $categoryhash{$categoryname}.' '.
-		$sourceroot.'/'.$source.' '.
-		$targetroot.'/'.$target.'.lpmlnew'.
+		"\t".'@# Compare source with target and intelligently respond'.
+		"\n\t\n\t\n".
+
+
+		"\t".'@echo -n ""; ECODE=0 && { perl filecompare.pl -b4 \\'.
+		"\n\t".$sourceroot.'/'.$source." \\\n\t".
+		$targetroot.'/'.$target." \\\n\t".
+		' || ECODE=$$?; } && '."\\\n\t"."\\\n\t"."\\\n\t".
+
+
+		'{ [ $$ECODE != "2" ] || '." \\\n\t".'(install '.
+                $categoryhash{$categoryname}." \\\n\t\t".
+		$sourceroot.'/'.$source." \\\n\t\t".
+		$targetroot.'/'.$target.'.lpmlnew'." \\\n\t\t".
 		' && echo "**** NOTE: CONFIGURATION FILE CHANGE ****"'.
-		$logcmd.' && echo "'.
-		'You likely need to compare contents of '.
-		''.$targetroot.'/'.$target.' with the new '.
-                ''.$targetroot.'/'.$target.'.lpmlnew"'.
-		"$logcmd); } && ".
-		'{ [ $$ECODE != "3" ] || (install '.
-                $categoryhash{$categoryname}.' '.
-		$sourceroot.'/'.$source.' '.
-		$targetroot.'/'.$target.''.
+		" \\\n\t\t".$logcmd.' && '." \\\n\t\t"."echo -n \"".
+		'You likely need to compare contents of "'."\\\n\t\t\t".
+		'&& echo -n "'.$targetroot.'/'.$target.'"'."\\\n\t\t".
+		'&& echo -n " with the new "'."\\\n\t\t\t".
+                '&& echo "'.$targetroot.'/'.$target.'.lpmlnew"'."\\\n\t\t".
+		"$logcmd); } && "." \\\n\t"."\\\n\t"."\\\n\t".
+
+
+		'{ [ $$ECODE != "3" ] || '."\\\n\t".
+		'(install '.
+                $categoryhash{$categoryname}."\\\n\t\t".
+		$sourceroot.'/'.$source."\\\n\t\t".
+		$targetroot.'/'.$target."\\\n\t\t".
 		' && echo "**** WARNING: NEW CONFIGURATION FILE ADDED ****"'.
-		$logcmd.' && echo "'.
-		'You likely need to review the contents of '.
-		''.$targetroot.'/'.$target.' to make sure its '.
-                'settings are compatible with your overall system"'.
-		"$logcmd); } && ".
-		'{ [ $$ECODE != "1" ] || ('.
-		'echo "**** ERROR ****"'.
-		$logcmd.' && echo "'.
-		'Configuration source file does not exist '.
-		''.$sourceroot.'/'.$source.'"'.
-		"$logcmd); } && perl verifymodown.pl ${targetroot}/${target} \"$categoryhash{$categoryname}\"$logcmd;\n\n";
+		"\\\n\t\t".$logcmd.' && '."\\\n\t\t".
+		'echo -n "'.
+		'You likely need to review the contents of "'."\\\n\t\t\t".
+		'&& echo -n "'.
+		$targetroot.'/'.$target.'"'."\\\n\t\t\t".
+		'&& echo -n "'.
+		' to make sure its "'."\\\n\t\t".
+		'&& echo "'.
+                'settings are compatible with your overall system"'."\\\n\t\t".
+		"$logcmd); } && "."\\\n\t"."\\\n\t"."\\\n\t".
+
+
+		'{ [ $$ECODE != "1" ] || ('."\\\n\t\t".
+		'echo "**** ERROR ****"'.$logcmd.' && '."\\\n\t\t".'echo -n "'.
+		'Configuration source file does not exist "'."\\\n\t\t".
+		'&& echo -n "'.$sourceroot.'/'.$source.'"'."\\\n\t\t".
+		"$logcmd); } && "."\\\n\t\t".
+		"perl verifymodown.pl ${targetroot}/${target} "."\\\n\t\t\t".
+		"\"$categoryhash{$categoryname}\""."\\\n\t\t\t".
+		"$logcmd;\n\n";
 	}
 	elsif ($mode eq 'build' && $build) {
 	    push @buildall,$sourceroot.'/'.$source;
@@ -1096,6 +1374,17 @@ END
 		$dependencies;
 #	    return '# need to build '.$source.";
 	}
+        elsif ($mode eq 'rpm_file_list') {
+	    if ($categoryname eq 'doc') {
+		return $targetroot.'/'.$target.' # doc'."\n";
+	    }
+	    elsif ($categoryname eq 'conf') {
+		return $targetroot.'/'.$target.' # config'."\n";
+	    }
+	    else {
+		return $targetroot.'/'.$target."\n";
+	    }
+	}
 	else {
 	    return '';
 	}
@@ -1112,10 +1401,12 @@ sub format_link {
 	$parser->get_tag('/link');
 	if ($mode eq 'html') {
 	    my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target);
+	    $link_count+=scalar(@targets);
 	    foreach my $tgt (@targets) {
+		$categorycount{$categoryname}++;
 		push @links,("\n<!-- FILESORT:$tgt -->".
 		    "<tr>".
-		    "<td><!-- POSTEVAL2 verify.pl link ".
+		    "<td><!-- POSTEVAL [$categoryname] verify.pl link ".
 		    "'/$targetrootarg$linkto' '/$targetrootarg$tgt' ".
 		    "$categoryhash{$categoryname} -->&nbsp;</td><td>".
 		    "<img src='$fab{$categoryname}.gif' ".
@@ -1146,11 +1437,20 @@ sub format_link {
 	elsif ($mode eq 'install') {
 	    my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target);
 	    foreach my $tgt (@targets) {
-		push @links,"\t".'ln -fs /'.$linkto.' /'.$targetroot.$tgt.
+		push @links,"\t".'ln -fs /'.$linkto.' '.$targetroot.'/'.$tgt.
 		    "\n";
 	    }
+#	    return join('',@links);
 	    return '';
 	}
+	elsif ($mode eq 'rpm_file_list') {
+	    my @linklocs;
+	    my @targets=map {s/^\s*//;s/\s$//;$_} split(/\;/,$target);
+	    foreach my $tgt (@targets) {
+		push @linklocs,''.$targetroot.'/'.$tgt."\n";
+	    }
+	    return join('',@linklocs);
+	}
 	else {
 	    return '';
 	}
@@ -1166,11 +1466,18 @@ sub format_fileglob {
     $filenames='';
     my $text=&trim($parser->get_text('/fileglob'));
     my $filenames2=$filenames;$filenames2=~s/\s//g;
+    $fileglob_count++;
+    my @semi=($filenames2=~/(\;)/g);
+    $fileglobnames_count+=scalar(@semi)+1;
+    $categorycount{$categoryname}+=scalar(@semi)+1;
     if ($sourcedir) {
 	$parser->get_tag('/fileglob');
-	if ($mode eq 'html') {
+	if ($mode eq 'MANIFEST') {
+         return join("\n",(map {"$sourcedir$_"} split(/\;/,$filenames2)))."\n";
+	}
+	elsif ($mode eq 'html') {
 	    return $fileglob="\n<tr>".
-		"<td><!-- POSTEVAL2 verify.pl fileglob '$sourcerootarg' ".
+      "<td><!-- POSTEVAL [$categoryname] verify.pl fileglob '$sourcerootarg' ".
 		"'$targetrootarg' ".
 		"'$glob' '$sourcedir' '$filenames2' '$targetdir' ".
 		"$categoryhash{$categoryname} -->&nbsp;</td>".
@@ -1192,11 +1499,25 @@ sub format_fileglob {
 #		"\nEND FILEGLOB</td></tr>";
 	}
 	elsif ($mode eq 'install') {
+	    my $eglob=$glob;
+	    if ($glob eq '*') {
+		$eglob='[^C][^V][^S]'.$glob;
+	    }
 	    return "\t".'install '.
 		$categoryhash{$categoryname}.' '.
-		$sourceroot.'/'.$sourcedir.'[^C][^V][^S]'.$glob.' '.
+		$sourceroot.'/'.$sourcedir.$eglob.' '.
 		$targetroot.'/'.$targetdir.'.'."\n";
 	}
+	elsif ($mode eq 'rpm_file_list') {
+	    my $eglob=$glob;
+	    if ($glob eq '*') {
+		$eglob='[^C][^V][^S]'.$glob;
+	    }
+	    my $targetdir2=$targetdir;$targetdir2=~s/\/$//;
+	    my @gfiles=map {s/^.*\///;"$targetroot/$targetdir2/$_\n"}
+	               glob("$sourceroot/$sourcedir/$eglob");
+	    return join('',@gfiles);
+	}
 	else {
 	    return '';
 	}
@@ -1272,6 +1593,7 @@ sub format_build {
     if ($text) {
 	$parser->get_tag('/build');
 	$build=$sourceroot.'/'.$text.';'.$tokeninfo[2]{'trigger'};
+	$build=~s/([^\\])\\\s+/$1/g; # allow for lines split onto new lines
     }
     return '';
 }
@@ -1300,12 +1622,24 @@ sub format_status {
 # ------------------------------------------------- Format dependencies section
 sub format_dependencies {
     my @tokeninfo=@_;
-    $dependencies='';
+    #$dependencies='';
     my $text=&trim($parser->get_text('/dependencies'));
     if ($text) {
 	$parser->get_tag('/dependencies');
-	$dependencies=join(';',
-			      (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)));
+	$dependencies=join(';',((map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)),$dependencies));
+	$dependencies=~s/;$//;
+    }
+    return '';
+}
+sub format_privatedependencies {
+    my @tokeninfo=@_;
+    #$dependencies='';
+    my $text=&trim($parser->get_text('/privatedependencies'));
+    if ($text) {
+	$parser->get_tag('/privatedependencies');
+	if ($mode eq 'MANIFEST') { return '';	}
+	$dependencies=join(';',((map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)),$dependencies));
+	$dependencies=~s/;$//;
     }
     return '';
 }
@@ -1330,6 +1664,16 @@ sub format_filenames {
     }
     return '';
 }
+# ----------------------------------------------- Format specialnotices section
+sub format_specialnotices {
+    $parser->get_tag('/specialnotices');
+    return '';
+}
+# ------------------------------------------------ Format specialnotice section
+sub format_specialnotice {
+    $parser->get_tag('/specialnotice');
+    return '';
+}
 # ------------------------------------------------------- Format linkto section
 sub format_linkto {
     my @tokeninfo=@_;
@@ -1354,14 +1698,17 @@ sub trim {
 
 # ----------------------------------- POD (plain old documentation, CPAN style)
 
+=pod
+
 =head1 NAME
 
 lpml_parse.pl - This is meant to parse files meeting the lpml document type.
-See lpml.dtd.  LPML=Linux Packaging Markup Language.
 
 =head1 SYNOPSIS
 
-Usage is for lpml file to come in through standard input.
+<STDIN> | perl lpml_parse.pl <MODE> <CATEGORY> <DIST> <SOURCE> <TARGET>
+
+Usage is for the lpml file to come in through standard input.
 
 =over 4
 
@@ -1393,19 +1740,57 @@ Only the 1st argument is mandatory for t
 Example:
 
 cat ../../doc/loncapafiles.lpml |\\
-perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install
+perl lpml_parse.pl html runtime default /home/sherbert/loncapa /tmp/install
 
 =head1 DESCRIPTION
 
-I am using a multiple pass-through approach to parsing
-the lpml file.  This saves memory and makes sure the server
-will never be overloaded.
+The general flow of the script is to get command line arguments, run through
+the XML document three times, and output according to any desired mode:
+install, configinstall, build, rpm, dpkg, htmldoc, textdoc, and status.
+
+A number of coding decisions are made according to the following principle:
+installation software must be stand-alone.  Therefore, for instance, I try
+not to use the GetOpt::Long module or any other perl modules.  (I do however
+use HTML::TokeParser.)  I also have tried to keep all the MODES of
+parsing inside this file.  Therefore, format_TAG subroutines are fairly
+lengthy with their conditional logic.  A more "elegant" solution might
+be to dynamically register the parsing mode and subroutines, or maybe even work
+with stylesheets.  However, in order to make this the installation back-bone
+of choice, there are advantages for HAVING EVERYTHING IN ONE FILE.
+This way, the LPML installation software does not have to rely on OTHER
+installation software (a chicken versus the egg problem).  Besides, I would
+suggest the modes of parsing are fairly constant: install, configinstall,
+build, rpm, dpkg, htmldoc, textdoc, and status.
+
+Another coding decision is about using a multiple pass-through approach to
+parsing the lpml file.  This saves memory and makes sure the server will never
+be overloaded.  During the first pass-through, the script gathers information
+specific as to resolving what tags with what 'dist=' attributes are to be used.
+During the second pass-through, the script cleans up white-space surrounding
+the XML tags, and filters through the tags based on information regarding the
+'dist=' attributes (information gathered in the first pass-through).
+The third and final pass-through involves formatting and rendering the XML
+into whatever XML mode is chosen: install, configinstall, build, rpm, dpkg,
+htmldoc, textdoc, and status.
+
+The hierarchy mandated by the DTD does not always correspond to the hierarchy
+that is sensible for a Makefile.  For instance, in a Makefile it is sensible
+that soft-links are installed after files.  However, in an LPML document, it
+is sensible that files and links be considered together and the writer of the
+LPML document should be free to place things in whatever order makes best
+sense in terms of LOOKING at the information.  The complication that arises
+is that the parser needs to have a memory for passing values from
+leaves on the XML tree to higher-up branches.  Currently, this memory is
+hard-coded (like with the @links array), but it may benefit from a more
+formal approach in the future.
 
 =head1 README
 
-I am using a multiple pass-through approach to parsing
-the lpml file.  This saves memory and makes sure the server
-will never be overloaded.
+This parses an LPML file to generate information useful for
+source to target installation, compilation, filesystem status
+checking, RPM and Debian software packaging, and documentation.
+
+More information on LPML is available at http://lpml.sourceforge.net.
 
 =head1 PREREQUISITES
 
@@ -1419,6 +1804,14 @@ linux
 
 =head1 SCRIPT CATEGORIES
 
-Packaging/Administrative
+UNIX/System_administration
+
+=head1 AUTHOR
+
+ Scott Harrison
+ codeharrison@yahoo.com
+
+Please let me know how/if you are finding this script useful and
+any/all suggestions.  -Scott
 
 =cut