loncom/build/piml_parse.pl - view

File: [LON-CAPA] / loncom / build / piml_parse.pl
Revision 1.2: download - view: text, annotated - select for diffs
Thu Jan 31 17:32:25 2002 UTC (22 years, 11 months ago) by harris41
Branches: MAIN
CVS tags: HEAD

very operational now

#!/usr/bin/perl # The LearningOnline Network with CAPA # piml_parse.pl - Linux Packaging Markup Language parser # # $Id: piml_parse.pl,v 1.2 2002/01/31 17:32:25 harris41 Exp $ # # Written by Scott Harrison, harris41@msu.edu # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # # YEAR=2002 # 1/28 - Scott Harrison # ### ############################################################################### ## ## ## ORGANIZATION OF THIS PERL SCRIPT ## ## 1. Notes ## ## 2. Get command line arguments ## ## 3. First pass through (grab distribution-specific information) ## ## 4. Second pass through (parse out what is not necessary) ## ## 5. Third pass through (translate markup according to specified mode) ## ## 6. Functions (most all just format contents of different markup tags) ## ## 7. POD (plain old documentation, CPAN style) ## ## ## ############################################################################### # ----------------------------------------------------------------------- Notes # # I am using a multiple pass-through approach to parsing # the piml file. This saves memory and makes sure the server # will never be overloaded. # # This is meant to parse files meeting the piml document type. # See piml.dtd. PIML=Post Installation Markup Language. use HTML::TokeParser; my $usage=<<END; **** ERROR ERROR ERROR ERROR **** Usage is for piml file to come in through standard input. 1st argument is the category permissions to use (runtime or development) 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc). 3rd argument is to manually specify a targetroot Only the 1st argument is mandatory for the program to run. Example: cat ../../doc/loncapafiles.piml |\\ perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install END # ------------------------------------------------- Grab command line arguments my $mode; if (@ARGV==4) { $mode = shift @ARGV; } else { @ARGV=();shift @ARGV; while(<>){} # throw away the input to avoid broken pipes print $usage; exit -1; # exit with error status } my $categorytype; if (@ARGV) { $categorytype = shift @ARGV; } my $dist; if (@ARGV) { $dist = shift @ARGV; } my $targetroot; my $targetrootarg; if (@ARGV) { $targetroot = shift @ARGV; } $targetroot=~s/\/$//; $targetrootarg=$targetroot; my $logcmd='| tee -a WARNINGS'; my $invocation; # --------------------------------------------------- Record program invocation if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') { $invocation=(<<END); # Invocation: STDINPUT | piml_parse.pl # 1st argument (category type) is: $categorytype # 2nd argument (distribution) is: $dist # 3rd argument (targetroot) is: described below END } # ---------------------------------------------------- Start first pass through my @parsecontents = <>; my $parsestring = join('',@parsecontents); my $outstring; # Need to make a pass through and figure out what defaults are # overrided. Top-down overriding strategy (leaves don't know # about distant leaves). my @hierarchy; $hierarchy[0]=0; my $hloc=0; my $token; $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); my %hash; my $key; while ($token = $parser->get_token()) { if ($token->[0] eq 'S') { $hloc++; $hierarchy[$hloc]++; $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); my $thisdist=' '.$token->[2]{'dist'}.' '; if ($thisdist eq ' default ') { $hash{$key}=1; # there is a default setting for this key } elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) { $hash{$key}=2; # disregard default setting for this key if # there is a directly requested distribution match } } if ($token->[0] eq 'E') { $hloc--; } } # --------------------------------------------------- Start second pass through undef $hloc; undef @hierarchy; undef $parser; $hierarchy[0]=0; $parser = HTML::TokeParser->new(\$parsestring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); my $cleanstring; while ($token = $parser->get_token()) { if ($token->[0] eq 'S') { $hloc++; $hierarchy[$hloc]++; $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); my $thisdist=' '.$token->[2]{'dist'}.' '; # This conditional clause is set up to ignore two sets # of invalid conditions before accepting entry into # the cleanstring. if ($hash{$key}==2 and !($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) { if ($token->[4]!~/\/>$/) { $parser->get_tag('/'.$token->[1]); $hloc--; } } elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and !($thisdist eq ' default ' and $hash{$key}!=2)) { if ($token->[4]!~/\/>$/) { $parser->get_tag('/'.$token->[1]); $hloc--; } } else { $cleanstring.=$token->[4]; } if ($token->[4]=~/\/>$/) { $hloc--; } } if ($token->[0] eq 'E') { $cleanstring.=$token->[2]; $hloc--; } if ($token->[0] eq 'T') { $cleanstring.=$token->[1]; } } $cleanstring=&trim($cleanstring); $cleanstring=~s/\>\s*\n\s*\</\>\</g; # ---------------------------------------------------- Start final pass through # storage variables my $piml; my $categories; my @categorynamelist; my $category; my $category_att_name; my $category_att_type; my $chown; my $chmod; my $abbreviation; # space-free abbreviation; esp. for image names my $categoryname; my $description; my $files; my $file; my $target; my $note; my $commands; my $command; my $dependencies; my @links; my %categoryhash; my $dpathlength; my %fab; # file category abbreviation my $directory_count; my $file_count; my $link_count; my $fileglob_count; my $fileglobnames_count; my %categorycount; my @buildall; my @buildinfo; my @configall; # Make new parser with distribution specific input undef $parser; $parser = HTML::TokeParser->new(\$cleanstring) or die('can\'t create TokeParser object'); $parser->xml_mode('1'); # Define handling methods for mode-dependent text rendering $parser->{textify}={ specialnotices => \&format_specialnotices, specialnotice => \&format_specialnotice, targetroot => \&format_targetroot, categories => \&format_categories, category => \&format_category, abbreviation => \&format_abbreviation, chown => \&format_chown, chmod => \&format_chmod, categoryname => \&format_categoryname, files => \&format_files, file => \&format_file, target => \&format_target, note => \&format_note, build => \&format_build, dependencies => \&format_dependencies, filenames => \&format_filenames, perlscript => \&format_perlscript, TARGET => \&format_TARGET, }; my $text; my $token; undef $hloc; undef @hierarchy; my $hloc; my @hierarchy2; while ($token = $parser->get_tag('piml')) { &format_piml(@{$token}); $text = &trim($parser->get_text('/piml')); $token = $parser->get_tag('/piml'); print $piml; print "\n"; print $text; print "\n"; print &end(); } exit; # ---------- Functions (most all just format contents of different markup tags) # ------------------------ Final output at end of markup parsing and formatting sub end { } # ----------------------- Take in string to parse and the separation expression sub extract_array { my ($stringtoparse,$sepexp) = @_; my @a=split(/$sepexp/,$stringtoparse); return \@a; } # --------------------------------------------------------- Format piml section sub format_piml { my (@tokeninfo)=@_; my $date=`date`; chop $date; $piml=<<END; #!/usr/bin/perl # Generated from a PIML (Post Installation Markup Language) document END } # --------------------------------------------------- Format targetroot section sub format_targetroot { my $text=&trim($parser->get_text('/targetroot')); $text=$targetroot if $targetroot; $parser->get_tag('/targetroot'); return '# TARGET INSTALL LOCATION is "'.$targetroot."\"\n"; } # -------------------------------------------------- Format perl script section sub format_perlscript { my (@tokeninfo)=@_; my $text=$parser->get_text('/perlscript'); $parser->get_tag('/perlscript'); return $text; } # --------------------------------------------------------------- Format TARGET sub format_TARGET { my (@tokeninfo)=@_; $parser->get_tag('/TARGET'); return $target; } # --------------------------------------------------- Format categories section sub format_categories { my $text=&trim($parser->get_text('/categories')); $parser->get_tag('/categories'); return '# CATEGORIES'."\n".$text; } # --------------------------------------------------- Format categories section sub format_category { my (@tokeninfo)=@_; $category_att_name=$tokeninfo[2]->{'name'}; $category_att_type=$tokeninfo[2]->{'type'}; $abbreviation=''; $chmod='';$chown=''; $parser->get_text('/category'); $parser->get_tag('/category'); $fab{$category_att_name}=$abbreviation; if ($category_att_type eq $categorytype) { my ($user,$group)=split(/\:/,$chown); $categoryhash{$category_att_name}='-o '.$user.' -g '.$group. ' -m '.$chmod; } return ''; } # --------------------------------------------------- Format categories section sub format_abbreviation { my @tokeninfo=@_; $abbreviation=''; my $text=&trim($parser->get_text('/abbreviation')); if ($text) { $parser->get_tag('/abbreviation'); $abbreviation=$text; } return ''; } # -------------------------------------------------------- Format chown section sub format_chown { my @tokeninfo=@_; $chown=''; my $text=&trim($parser->get_text('/chown')); if ($text) { $parser->get_tag('/chown'); $chown=$text; } return ''; } # -------------------------------------------------------- Format chmod section sub format_chmod { my @tokeninfo=@_; $chmod=''; my $text=&trim($parser->get_text('/chmod')); if ($text) { $parser->get_tag('/chmod'); $chmod=$text; } return ''; } # ------------------------------------------------- Format categoryname section sub format_categoryname { my @tokeninfo=@_; $categoryname=''; my $text=&trim($parser->get_text('/categoryname')); if ($text) { $parser->get_tag('/categoryname'); $categoryname=$text; } return ''; } # -------------------------------------------------------- Format files section sub format_files { my $text=$parser->get_text('/files'); $parser->get_tag('/files'); return "\n".'# There are '.$file_count.' files this script works on'. "\n\n".$text; } # --------------------------------------------------------- Format file section sub format_file { my @tokeninfo=@_; $file=''; $source=''; $target=''; $categoryname=''; $description=''; $note=''; $build=''; $status=''; $dependencies=''; my $text=&trim($parser->get_text('/file')); $file_count++; $categorycount{$categoryname}++; $parser->get_tag('/file'); return "# File: $target\n". "$text\n"; return ''; } # ------------------------------------------------------- Format target section sub format_target { my @tokeninfo=@_; $target=''; my $text=&trim($parser->get_text('/target')); if ($text) { $parser->get_tag('/target'); $target=$targetrootarg.$text; } return ''; } # --------------------------------------------------------- Format note section sub format_note { my @tokeninfo=@_; $note=''; my $aref; my $text; while ($aref=$parser->get_token()) { if ($aref->[0] eq 'E' && $aref->[1] eq 'note') { last; } elsif ($aref->[0] eq 'S') { $text.=$aref->[4]; } elsif ($aref->[0] eq 'E') { $text.=$aref->[2]; } else { $text.=$aref->[1]; } } if ($text) { $note=$text; } return ''; } # ------------------------------------------------- Format dependencies section sub format_dependencies { my @tokeninfo=@_; $dependencies=''; my $text=&trim($parser->get_text('/dependencies')); if ($text) { $parser->get_tag('/dependencies'); $dependencies=join(';', (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text))); } return ''; } # ------------------------------------------------ Format specialnotice section sub format_specialnotices { $parser->get_tag('/specialnotices'); return ''; } # ------------------------------------------------ Format specialnotice section sub format_specialnotice { $parser->get_tag('/specialnotice'); return ''; } # ------------------------------------- Render less-than and greater-than signs sub htmlsafe { my $text=@_[0]; $text =~ s/</</g; $text =~ s/>/>/g; return $text; } # --------------------------------------- remove starting and ending whitespace sub trim { my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; } # ----------------------------------- POD (plain old documentation, CPAN style) =head1 NAME piml_parse.pl - This is meant to parse files meeting the piml document type. See piml.dtd. PIML=Post Installation Markup Language. =head1 SYNOPSIS Usage is for piml file to come in through standard input. =over 4 =item * 1st argument is the category permissions to use (runtime or development) =item * 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc). =item * 3rd argument is to manually specify a targetroot. =back Only the 1st argument is mandatory for the program to run. Example: cat ../../doc/loncapafiles.piml |\\ perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install =head1 DESCRIPTION I am using a multiple pass-through approach to parsing the piml file. This saves memory and makes sure the server will never be overloaded. =head1 README I am using a multiple pass-through approach to parsing the piml file. This saves memory and makes sure the server will never be overloaded. =head1 PREREQUISITES HTML::TokeParser =head1 COREQUISITES =head1 OSNAMES linux =head1 SCRIPT CATEGORIES Packaging/Administrative =cut