#!/usr/bin/perl
# Scott Harrison
# YEAR=2001
# May 2001
# 06/19/2001,06/20,06/24 - Scott Harrison
# 9/5/2001,9/6,9/7,9/8 - Scott Harrison
###############################################################################
## ##
## ORGANIZATION OF THIS PERL SCRIPT ##
## 1. Notes ##
## 2. Get command line arguments ##
## 3. First pass through (grab distribution-specific information) ##
## 4. Second pass through (parse out what is not necessary) ##
## 5. Third pass through (translate markup according to specified mode) ##
## ##
###############################################################################
# ----------------------------------------------------------------------- Notes
#
# I am using a multiple pass-through approach to parsing
# the lpml file. This saves memory and makes sure the server
# will never be overloaded.
#
# This is meant to parse files meeting the lpml document type.
# See lpml.dtd. LPML=Linux Packaging Markup Language.
use HTML::TokeParser;
my $usage=<<END;
**** ERROR ERROR ERROR ERROR ****
Usage is for lpml file to come in through standard input.
1st argument is the mode of parsing.
2nd argument is the category permissions to use (runtime or development)
3rd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
4th argument is to manually specify a sourceroot.
5th argument is to manually specify a targetroot.
Only the 1st argument is mandatory for the program to run.
Example:
cat ../../doc/loncapafiles.lpml |\\
perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install
END
# ------------------------------------------------- Grab command line arguments
my $mode;
if (@ARGV==5) {
$mode = shift @ARGV;
}
else {
@ARGV=();shift @ARGV;
while(<>){} # throw away the input to avoid broken pipes
print $usage;
exit -1; # exit with error status
}
my $categorytype;
if (@ARGV) {
$categorytype = shift @ARGV;
}
my $dist;
if (@ARGV) {
$dist = shift @ARGV;
}
my $targetroot;
my $sourceroot;
if (@ARGV) {
$sourceroot = shift @ARGV;
}
if (@ARGV) {
$targetroot = shift @ARGV;
}
$sourceroot=~s/\/$//;
$targetroot=~s/\/$//;
my $invocation;
# --------------------------------------------------- Record program invocation
if ($mode eq 'install') {
$invocation=(<<END);
# Invocation: STDINPUT | lpml_parse.pl
# 1st argument (mode) is: $mode
# 2nd argument (category type) is: $categorytype
# 3rd argument (distribution) is: $dist
# 4th argument (targetroot) is: described below
# 5th argument (sourceroot) is: described below
END
}
# ---------------------------------------------------- Start first pass through
my @parsecontents = <>;
my $parsestring = join('',@parsecontents);
my $outstring;
# Need to make a pass through and figure out what defaults are
# overrided. Top-down overriding strategy (leaves don't know
# about distant leaves).
my @hierarchy;
$hierarchy[0]=0;
my $hloc=0;
my $token;
$parser = HTML::TokeParser->new(\$parsestring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
my %hash;
my $key;
while ($token = $parser->get_token()) {
if ($token->[0] eq 'S') {
$hloc++;
$hierarchy[$hloc]++;
$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
my $thisdist=' '.$token->[2]{'dist'}.' ';
if ($thisdist eq ' default ') {
$hash{$key}=1; # there is a default setting for this key
}
elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
$hash{$key}=2; # disregard default setting for this key if
# there is a directly requested distribution match
}
}
if ($token->[0] eq 'E') {
$hloc--;
}
}
# --------------------------------------------------- Start second pass through
undef $hloc;
undef @hierarchy;
undef $parser;
$hierarchy[0]=0;
$parser = HTML::TokeParser->new(\$parsestring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
my $cleanstring;
while ($token = $parser->get_token()) {
if ($token->[0] eq 'S') {
$hloc++;
$hierarchy[$hloc]++;
$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
my $thisdist=' '.$token->[2]{'dist'}.' ';
# This conditional clause is set up to ignore two sets
# of invalid conditions before accepting entry into
# the cleanstring.
if ($hash{$key}==2 and
!($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) {
if ($token->[4]!~/\/>$/) {
$parser->get_tag('/'.$token->[1]);
$hloc--;
}
}
elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and
!($thisdist eq ' default ' and $hash{$key}!=2)) {
if ($token->[4]!~/\/>$/) {
$parser->get_tag('/'.$token->[1]);
$hloc--;
}
}
else {
$cleanstring.=$token->[4];
}
if ($token->[4]=~/\/>$/) {
$hloc--;
}
}
if ($token->[0] eq 'E') {
$cleanstring.=$token->[2];
$hloc--;
}
if ($token->[0] eq 'T') {
$cleanstring.=$token->[1];
}
}
$cleanstring=&trim($cleanstring);
$cleanstring=~s/\s*\n\s*//g;
# ---------------------------------------------------- Start final pass through
# storage variables
my $lpml;
my $categories;
my $category;
my $category_att_name;
my $category_att_type;
my $chown;
my $chmod;
my $rpm;
my $rpmSummary;
my $rpmName;
my $rpmVersion;
my $rpmRelease;
my $rpmVendor;
my $rpmBuildRoot;
my $rpmCopyright;
my $rpmGroup;
my $rpmSource;
my $rpmAutoReqProv;
my $rpmdescription;
my $rpmpre;
my $directories;
my $directory;
my $targetdirs;
my $targetdir;
my $categoryname;
my $description;
my $files;
my $fileglobs;
my $links;
my $file;
my $link;
my $fileglob;
my $sourcedir;
my $targets;
my $target;
my $source;
my $note;
my $build;
my $commands;
my $command;
my $status;
my $dependencies;
my $dependency;
my @links;
my %categoryhash;
# Make new parser with distribution specific input
undef $parser;
$parser = HTML::TokeParser->new(\$cleanstring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
# Define handling methods for mode-dependent text rendering
$parser->{textify}={
targetroot => \&format_targetroot,
sourceroot => \&format_sourceroot,
categories => \&format_categories,
category => \&format_category,
targetdir => \&format_targetdir,
chown => \&format_chown,
chmod => \&format_chmod,
rpm => \&format_rpm,
rpmSummary => \&format_rpmSummary,
rpmName => \&format_rpmName,
rpmVersion => \&format_rpmVersion,
rpmRelease => \&format_rpmRelease,
rpmVendor => \&format_rpmVendor,
rpmBuildRoot => \&format_rpmBuildRoot,
rpmCopyright => \&format_rpmCopyright,
rpmGroup => \&format_rpmGroup,
rpmSource => \&format_rpmSource,
rpmAutoReqProv => \&format_rpmAutoReqProv,
rpmdescription => \&format_rpmdescription,
rpmpre => \&format_rpmpre,
directories => \&format_directories,
directory => \&format_directory,
categoryname => \&format_categoryname,
description => \&format_description,
files => \&format_files,
file => \&format_file,
fileglob => \&format_fileglob,
links => \&format_links,
link => \&format_link,
linkto => \&format_linkto,
source => \&format_source,
target => \&format_target,
note => \&format_note,
build => \&format_build,
status => \&format_status,
dependencies => \&format_dependencies,
glob => \&format_glob,
sourcedir => \&format_sourcedir,
filenames => \&format_filenames,
};
my $text;
my $token;
undef $hloc;
undef @hierarchy;
my $hloc;
my @hierarchy2;
while ($token = $parser->get_tag('lpml')) {
&format_lpml(@{$token});
$text = &trim($parser->get_text('/lpml'));
$token = $parser->get_tag('/lpml');
print $lpml;
print "\n";
# $text=~s/\s*\n\s*\n\s*/\n/g;
print $text;
print "\n";
print &end();
}
exit;
sub end {
if ($mode eq 'html') {
return "THE END\n";
}
if ($mode eq 'install') {
return '';
}
}
# ----------------------- Take in string to parse and the separation expression
sub extract_array {
my ($stringtoparse,$sepexp) = @_;
my @a=split(/$sepexp/,$stringtoparse);
return \@a;
}
# --------------------------------------------------------- Format lpml section
sub format_lpml {
my (@tokeninfo)=@_;
my $date=`date`; chop $date;
if ($mode eq 'html') {
$lpml = "LPML BEGINNING: $date";
}
elsif ($mode eq 'install') {
print '# LPML install targets. Linux Packaging Markup Language,';
print ' by Scott Harrison 2001'."\n";
print '# This file was automatically generated on '.`date`;
print "\n".$invocation;
}
else {
return '';
}
}
# --------------------------------------------------- Format targetroot section
sub format_targetroot {
my $text=&trim($parser->get_text('/targetroot'));
$text=$targetroot if $targetroot;
$parser->get_tag('/targetroot');
if ($mode eq 'html') {
return $targetroot="\nTARGETROOT: $text";
}
elsif ($mode eq 'install') {
return '# TARGET INSTALL LOCATION is "'.$targetroot."\"\n";
}
else {
return '';
}
}
# --------------------------------------------------- Format sourceroot section
sub format_sourceroot {
my $text=&trim($parser->get_text('/sourceroot'));
$text=$sourceroot if $sourceroot;
$parser->get_tag('/sourceroot');
if ($mode eq 'html') {
return $sourceroot="\nSOURCEROOT: $text";
}
elsif ($mode eq 'install') {
return '# SOURCE CODE LOCATION IS "'.$sourceroot."\"\n";;
}
else {
return '';
}
}
# --------------------------------------------------- Format categories section
sub format_categories {
my $text=&trim($parser->get_text('/categories'));
$parser->get_tag('/categories');
if ($mode eq 'html') {
return $categories="\nBEGIN CATEGORIES\n$text\nEND CATEGORIES\n";
}
else {
return '';
}
}
# --------------------------------------------------- Format categories section
sub format_category {
my (@tokeninfo)=@_;
$category_att_name=$tokeninfo[2]->{'name'};
$category_att_type=$tokeninfo[2]->{'type'};
$chmod='';$chown='';
$parser->get_text('/category');
$parser->get_tag('/category');
if ($mode eq 'html') {
return $category="\nCATEGORY $category_att_name $category_att_type ".
"$chmod $chown";
}
else {
if ($category_att_type eq $categorytype) {
my ($user,$group)=split(/\:/,$chown);
$categoryhash{$category_att_name}='-o '.$user.' -g '.$group.
' -m '.$chmod;
}
return '';
}
}
# -------------------------------------------------------- Format chown section
sub format_chown {
my @tokeninfo=@_;
$chown='';
my $text=&trim($parser->get_text('/chown'));
if ($text) {
$parser->get_tag('/chown');
$chown=$text;
}
return '';
}
# -------------------------------------------------------- Format chmod section
sub format_chmod {
my @tokeninfo=@_;
$chmod='';
my $text=&trim($parser->get_text('/chmod'));
if ($text) {
$parser->get_tag('/chmod');
$chmod=$text;
}
return '';
}
# ---------------------------------------------------------- Format rpm section
sub format_rpm {
my $text=&trim($parser->get_text('/rpm'));
$parser->get_tag('/rpm');
if ($mode eq 'html') {
return $rpm="\nBEGIN RPM\n$text\nEND RPM";
}
else {
return '';
}
}
# --------------------------------------------------- Format rpmSummary section
sub format_rpmSummary {
my $text=&trim($parser->get_text('/rpmSummary'));
$parser->get_tag('/rpmSummary');
if ($mode eq 'html') {
return $rpmSummary="\nRPMSUMMARY $text";
}
else {
return '';
}
}
# ------------------------------------------------------ Format rpmName section
sub format_rpmName {
my $text=&trim($parser->get_text('/rpmName'));
$parser->get_tag('/rpmName');
if ($mode eq 'html') {
return $rpmName="\nRPMNAME $text";
}
else {
return '';
}
}
# --------------------------------------------------- Format rpmVersion section
sub format_rpmVersion {
my $text=$parser->get_text('/rpmVersion');
$parser->get_tag('/rpmVersion');
if ($mode eq 'html') {
return $rpmVersion="\nRPMVERSION $text";
}
else {
return '';
}
}
# --------------------------------------------------- Format rpmRelease section
sub format_rpmRelease {
my $text=$parser->get_text('/rpmRelease');
$parser->get_tag('/rpmRelease');
if ($mode eq 'html') {
return $rpmRelease="\nRPMRELEASE $text";
}
else {
return '';
}
}
# ---------------------------------------------------- Format rpmVendor section
sub format_rpmVendor {
my $text=$parser->get_text('/rpmVendor');
$parser->get_tag('/rpmVendor');
if ($mode eq 'html') {
return $rpmVendor="\nRPMVENDOR $text";
}
else {
return '';
}
}
# ------------------------------------------------- Format rpmBuildRoot section
sub format_rpmBuildRoot {
my $text=$parser->get_text('/rpmBuildRoot');
$parser->get_tag('/rpmBuildRoot');
if ($mode eq 'html') {
return $rpmBuildRoot="\nRPMBUILDROOT $text";
}
else {
return '';
}
}
# ------------------------------------------------- Format rpmCopyright section
sub format_rpmCopyright {
my $text=$parser->get_text('/rpmCopyright');
$parser->get_tag('/rpmCopyright');
if ($mode eq 'html') {
return $rpmCopyright="\nRPMCOPYRIGHT $text";
}
else {
return '';
}
}
# ----------------------------------------------------- Format rpmGroup section
sub format_rpmGroup {
my $text=$parser->get_text('/rpmGroup');
$parser->get_tag('/rpmGroup');
if ($mode eq 'html') {
return $rpmGroup="\nRPMGROUP $text";
}
else {
return '';
}
}
# ---------------------------------------------------- Format rpmSource section
sub format_rpmSource {
my $text=$parser->get_text('/rpmSource');
$parser->get_tag('/rpmSource');
if ($mode eq 'html') {
return $rpmSource="\nRPMSOURCE $text";
}
else {
return '';
}
}
# ----------------------------------------------- Format rpmAutoReqProv section
sub format_rpmAutoReqProv {
my $text=$parser->get_text('/rpmAutoReqProv');
$parser->get_tag('/rpmAutoReqProv');
if ($mode eq 'html') {
return $rpmAutoReqProv="\nRPMAUTOREQPROV $text";
}
else {
return '';
}
}
# ----------------------------------------------- Format rpmdescription section
sub format_rpmdescription {
my $text=$parser->get_text('/rpmdescription');
$parser->get_tag('/rpmdescription');
if ($mode eq 'html') {
return $rpmdescription="\nRPMDESCRIPTION $text";
}
else {
return '';
}
}
# ------------------------------------------------------- Format rpmpre section
sub format_rpmpre {
my $text=$parser->get_text('/rpmpre');
$parser->get_tag('/rpmpre');
if ($mode eq 'html') {
return $rpmpre="\nRPMPRE $text";
}
else {
return '';
}
}
# -------------------------------------------------- Format directories section
sub format_directories {
my $text=$parser->get_text('/directories');
$parser->get_tag('/directories');
if ($mode eq 'html') {
return $directories="\nBEGIN DIRECTORIES\n$text\nEND DIRECTORIES\n";
}
elsif ($mode eq 'install') {
return "\n".'directories:'."\n".$text;
}
else {
return '';
}
}
# ---------------------------------------------------- Format directory section
sub format_directory {
my (@tokeninfo)=@_;
$targetdir='';$categoryname='';$description='';
$parser->get_text('/directory');
$parser->get_tag('/directory');
if ($mode eq 'html') {
return $directory="\nDIRECTORY $targetdir $categoryname $description";
}
elsif ($mode eq 'install') {
return "\t".'install '.$categoryhash{$categoryname}.' -d '.
$targetroot.'/'.$targetdir."\n";
}
else {
return '';
}
}
# ---------------------------------------------------- Format targetdir section
sub format_targetdir {
my @tokeninfo=@_;
$targetdir='';
my $text=&trim($parser->get_text('/targetdir'));
if ($text) {
$parser->get_tag('/targetdir');
$targetdir=$text;
}
return '';
}
# ------------------------------------------------- Format categoryname section
sub format_categoryname {
my @tokeninfo=@_;
$categoryname='';
my $text=&trim($parser->get_text('/categoryname'));
if ($text) {
$parser->get_tag('/categoryname');
$categoryname=$text;
}
return '';
}
# -------------------------------------------------- Format description section
sub format_description {
my @tokeninfo=@_;
$description='';
my $text=&trim($parser->get_text('/description'));
if ($text) {
$parser->get_tag('/description');
$description=$text;
}
return '';
}
# -------------------------------------------------------- Format files section
sub format_files {
my $text=$parser->get_text('/files');
$parser->get_tag('/files');
if ($mode eq 'html') {
return $directories="\nBEGIN FILES\n$text\nEND FILES\n";
}
elsif ($mode eq 'install') {
return "\n".'files:'."\n".$text.
"\n".'links:'."\n".join('',@links);
}
else {
return '';
}
}
# ---------------------------------------------------- Format fileglobs section
sub format_fileglobs {
}
# -------------------------------------------------------- Format links section
# deprecated.. currently <link></link>'s are included in <files></files>
sub format_links {
my $text=$parser->get_text('/links');
$parser->get_tag('/links');
if ($mode eq 'html') {
return $links="\nBEGIN LINKS\n$text\nEND LINKS\n";
}
elsif ($mode eq 'install') {
return "\n".'links:'."\n\t".$text;
}
else {
return '';
}
}
# --------------------------------------------------------- Format file section
sub format_file {
my @tokeninfo=@_;
$file=''; $source=''; $target=''; $categoryname=''; $description='';
$note=''; $build=''; $status=''; $dependencies='';
my $text=&trim($parser->get_text('/file'));
if ($source) {
$parser->get_tag('/file');
if ($mode eq 'html') {
return ($file="\nBEGIN FILE\n".
"$source $target $categoryname $description $note " .
"$build $status $dependencies" .
"\nEND FILE");
}
elsif ($mode eq 'install' && $categoryname ne 'conf') {
return "\t".'@test -e '.$sourceroot.'/'.$source.
' && install '.
$categoryhash{$categoryname}.' '.
$sourceroot.'/'.$source.' '.
$targetroot.'/'.$target.
' || echo "**** LON-CAPA WARNING '.
'**** CVS source file does not exist: '.$sourceroot.'/'.
$source.'"'."\n";
}
else {
return '';
}
}
return '';
}
# --------------------------------------------------------- Format link section
sub format_link {
my @tokeninfo=@_;
$link=''; $linkto=''; $target=''; $categoryname=''; $description='';
$note=''; $build=''; $status=''; $dependencies='';
my $text=&trim($parser->get_text('/link'));
if ($linkto) {
$parser->get_tag('/link');
if ($mode eq 'html') {
return $link="\nBEGIN LINK\n".
"$linkto $target $categoryname $description $note " .
"$build $status $dependencies" .
"\nEND LINK";
}
elsif ($mode eq 'install') {
my @targets=split(/\;/,$target);
foreach my $tgt (@targets) {
push @links,"\t".'ln -fs /'.$linkto.' /'.$targetroot.$tgt.
"\n";
}
return '';
}
else {
return '';
}
}
return '';
}
# ----------------------------------------------------- Format fileglob section
sub format_fileglob {
my @tokeninfo=@_;
$fileglob=''; $glob=''; $sourcedir='';
$targetdir=''; $categoryname=''; $description='';
$note=''; $build=''; $status=''; $dependencies='';
$filenames='';
my $text=&trim($parser->get_text('/fileglob'));
if ($sourcedir) {
$parser->get_tag('/fileglob');
if ($mode eq 'html') {
return $fileglob="\nBEGIN FILEGLOB\n".
"$glob sourcedir $targetdir $categoryname $description $note ".
"$build $status $dependencies $filenames" .
"\nEND FILEGLOB";
}
elsif ($mode eq 'install') {
return "\t".'install '.
$categoryhash{$categoryname}.' '.
$sourceroot.'/'.$sourcedir.$glob.' '.
$targetroot.'/'.$targetdir.'.'."\n";
}
else {
return '';
}
}
return '';
}
# ---------------------------------------------------- Format sourcedir section
sub format_sourcedir {
my @tokeninfo=@_;
$sourcedir='';
my $text=&trim($parser->get_text('/sourcedir'));
if ($text) {
$parser->get_tag('/sourcedir');
$sourcedir=$text;
}
return '';
}
# ------------------------------------------------------- Format target section
sub format_target {
my @tokeninfo=@_;
$target='';
my $text=&trim($parser->get_text('/target'));
if ($text) {
$parser->get_tag('/target');
$target=$text;
}
return '';
}
# ------------------------------------------------------- Format source section
sub format_source {
my @tokeninfo=@_;
$source='';
my $text=&trim($parser->get_text('/source'));
if ($text) {
$parser->get_tag('/source');
$source=$text;
}
return '';
}
# --------------------------------------------------------- Format note section
sub format_note {
my @tokeninfo=@_;
$note='';
my $text=&trim($parser->get_text('/note'));
if ($text) {
$parser->get_tag('/note');
$note=$text;
}
return '';
}
# -------------------------------------------------------- Format build section
sub format_build {
my @tokeninfo=@_;
$build='';
my $text=&trim($parser->get_text('/build'));
if ($text) {
$parser->get_tag('/build');
$build=$text;
}
return '';
}
# ------------------------------------------------------- Format status section
sub format_status {
my @tokeninfo=@_;
$status='';
my $text=&trim($parser->get_text('/status'));
if ($text) {
$parser->get_tag('/status');
$status=$text;
}
return '';
}
# ------------------------------------------------- Format dependencies section
sub format_dependencies {
my @tokeninfo=@_;
$dependencies='';
my $text=&trim($parser->get_text('/dependencies'));
if ($text) {
$parser->get_tag('/dependencies');
$dependencies=$text;
}
return '';
}
# --------------------------------------------------------- Format glob section
sub format_glob {
my @tokeninfo=@_;
$glob='';
my $text=&trim($parser->get_text('/glob'));
if ($text) {
$parser->get_tag('/glob');
$glob=$text;
}
return '';
}
# ---------------------------------------------------- Format filenames section
sub format_filenames {
my @tokeninfo=@_;
my $text=&trim($parser->get_text('/filenames'));
if ($text) {
$parser->get_tag('/filenames');
$filenames=$text;
}
return '';
}
# ------------------------------------------------------- Format linkto section
sub format_linkto {
my @tokeninfo=@_;
my $text=&trim($parser->get_text('/linkto'));
if ($text) {
$parser->get_tag('/linkto');
$linkto=$text;
}
return '';
}
# --------------------------------------- remove starting and ending whitespace
sub trim {
my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>