File:  [LON-CAPA] / loncom / build / piml_parse.pl
Revision 1.8: download - view: text, annotated - select for diffs
Tue Nov 26 15:25:21 2002 UTC (21 years, 8 months ago) by matthew
Branches: MAIN
CVS tags: HEAD
Documentation changes - it's always nice to see how things really work
instead of how they claim to work.  Fixed bug which caused abortion when
all four possible arguements were provided.

    1: #!/usr/bin/perl
    2: 
    3: # -------------------------------------------------------- Documentation notice
    4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
    5: # documentation internalized in this program.
    6: 
    7: # --------------------------------------------------------- License Information
    8: # The LearningOnline Network with CAPA
    9: # piml_parse.pl - Linux Packaging Markup Language parser
   10: #
   11: # $Id: piml_parse.pl,v 1.8 2002/11/26 15:25:21 matthew Exp $
   12: #
   13: # Written by Scott Harrison, codeharrison@yahoo.com
   14: #
   15: # Copyright Michigan State University Board of Trustees
   16: #
   17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   18: #
   19: # LON-CAPA is free software; you can redistribute it and/or modify
   20: # it under the terms of the GNU General Public License as published by
   21: # the Free Software Foundation; either version 2 of the License, or
   22: # (at your option) any later version.
   23: #
   24: # LON-CAPA is distributed in the hope that it will be useful,
   25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   27: # GNU General Public License for more details.
   28: #
   29: # You should have received a copy of the GNU General Public License
   30: # along with LON-CAPA; if not, write to the Free Software
   31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   32: #
   33: # /home/httpd/html/adm/gpl.txt
   34: #
   35: # http://www.lon-capa.org/
   36: #
   37: # YEAR=2002
   38: # 1/28,1/29,1/30,1/31,2/5,4/8 - Scott Harrison
   39: #
   40: ###
   41: 
   42: ###############################################################################
   43: ##                                                                           ##
   44: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
   45: ## 1. Notes                                                                  ##
   46: ## 2. Get command line arguments                                             ##
   47: ## 3. First pass through (grab distribution-specific information)            ##
   48: ## 4. Second pass through (parse out what is not necessary)                  ##
   49: ## 5. Third pass through (translate markup according to specified mode)      ##
   50: ## 6. Functions (most all just format contents of different markup tags)     ##
   51: ## 7. POD (plain old documentation, CPAN style)                              ##
   52: ##                                                                           ##
   53: ###############################################################################
   54: 
   55: # ----------------------------------------------------------------------- Notes
   56: #
   57: # I am using a multiple pass-through approach to parsing
   58: # the piml file.  This saves memory and makes sure the server
   59: # will never be overloaded.
   60: #
   61: # This is meant to parse files meeting the piml document type.
   62: # See piml.dtd.  PIML=Post Installation Markup Language.
   63: 
   64: use HTML::TokeParser;
   65: 
   66: my $usage=(<<END);
   67: **** ERROR ERROR ERROR ERROR ****
   68: Usage is for piml file to come in through standard input.
   69: 1st is install, configinstall, or build
   70: 2st argument is the category permissions to use (runtime or development)
   71: 3nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
   72: 4rd argument is to manually specify a targetroot
   73: 
   74: The first 3 arguments are required for the program to run.
   75: 
   76: Example:
   77: 
   78: cat ../../doc/loncapafiles.piml |\\
   79: perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install
   80: END
   81: 
   82: # ------------------------------------------------- Grab command line arguments
   83: 
   84: my $mode;
   85: if (@ARGV>=3) {
   86:     $mode = shift @ARGV;
   87: }
   88: else {
   89:     @ARGV=();shift @ARGV;
   90:     while(<>){} # throw away the input to avoid broken pipes
   91:     print $usage;
   92:     exit -1; # exit with error status
   93: }
   94: 
   95: my $categorytype;
   96: if (@ARGV) {
   97:     $categorytype = shift @ARGV;
   98: }
   99: 
  100: my $dist;
  101: if (@ARGV) {
  102:     $dist = shift @ARGV;
  103: }
  104: 
  105: my $targetroot;
  106: my $targetrootarg;
  107: if (@ARGV) {
  108:     $targetroot = shift @ARGV;
  109: }
  110: $targetroot=~s/\/$//;
  111: $targetrootarg=$targetroot;
  112: 
  113: my $logcmd='| tee -a WARNINGS';
  114: 
  115: my $invocation;
  116: # --------------------------------------------------- Record program invocation
  117: if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
  118:     $invocation=(<<END);
  119: # Invocation: STDINPUT | piml_parse.pl
  120: #             1st argument (category type) is: $categorytype
  121: #             2nd argument (distribution) is: $dist
  122: #             3rd argument (targetroot) is: described below
  123: END
  124: }
  125: 
  126: # ---------------------------------------------------- Start first pass through
  127: my @parsecontents = <>;
  128: my $parsestring = join('',@parsecontents);
  129: my $outstring='';
  130: 
  131: # Need to make a pass through and figure out what defaults are
  132: # overrided.  Top-down overriding strategy (leaves don't know
  133: # about distant leaves).
  134: 
  135: my @hierarchy;
  136: $hierarchy[0]=0;
  137: my $hloc=0;
  138: my $token='';
  139: $parser = HTML::TokeParser->new(\$parsestring) or
  140:     die('can\'t create TokeParser object');
  141: $parser->xml_mode('1');
  142: my %hash;
  143: my $key;
  144: while ($token = $parser->get_token()) {
  145:     if ($token->[0] eq 'S') {
  146: 	$hloc++;
  147: 	$hierarchy[$hloc]++;
  148: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
  149: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
  150: 	if ($thisdist eq ' default ') {
  151: 	    $hash{$key}=1; # there is a default setting for this key
  152: 	}
  153: 	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
  154: 	    $hash{$key}=2; # disregard default setting for this key if
  155: 	                   # there is a directly requested distribution match
  156: 	}
  157:     }
  158:     if ($token->[0] eq 'E') {
  159: 	$hloc--;
  160:     }
  161: }
  162: 
  163: # --------------------------------------------------- Start second pass through
  164: undef $hloc;
  165: undef @hierarchy;
  166: undef $parser;
  167: $hierarchy[0]=0;
  168: $parser = HTML::TokeParser->new(\$parsestring) or
  169:     die('can\'t create TokeParser object');
  170: $parser->xml_mode('1');
  171: my $cleanstring;
  172: while ($token = $parser->get_token()) {
  173:     if ($token->[0] eq 'S') {
  174: 	$hloc++;
  175: 	$hierarchy[$hloc]++;
  176: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
  177: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
  178: 	# This conditional clause is set up to ignore two sets
  179: 	# of invalid conditions before accepting entry into
  180: 	# the cleanstring.
  181: 	if ($hash{$key}==2 and
  182: 	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
  183: 	    if ($token->[4]!~/\/>$/) {
  184: 		$parser->get_tag('/'.$token->[1]);
  185: 		$hloc--;
  186: 	    }
  187: 	}
  188: 	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
  189: 	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
  190: 	    if ($token->[4]!~/\/>$/) {
  191: 		$parser->get_tag('/'.$token->[1]);
  192: 		$hloc--;
  193: 	    }
  194: 	}
  195: 	else {
  196: 	    $cleanstring.=$token->[4];
  197: 	}
  198: 	if ($token->[4]=~/\/>$/) {
  199: #	    $hloc--;
  200: 	}
  201:     }
  202:     if ($token->[0] eq 'E') {
  203: 	$cleanstring.=$token->[2];
  204: 	$hloc--;
  205:     }
  206:     if ($token->[0] eq 'T') {
  207: 	$cleanstring.=$token->[1];
  208:     }
  209: }
  210: $cleanstring=&trim($cleanstring);
  211: $cleanstring=~s/\>\s*\n\s*\</\>\</g;
  212: 
  213: # ---------------------------------------------------- Start final pass through
  214: 
  215: # storage variables
  216: my $piml;
  217: my $categories;
  218: my @categorynamelist;
  219: my $category;
  220: my $category_att_name;
  221: my $category_att_type;
  222: my $chown;
  223: my $chmod;
  224: my $abbreviation; # space-free abbreviation; esp. for image names
  225: my $categoryname;
  226: my $description;
  227: my $files;
  228: my $file;
  229: my $target;
  230: my $note;
  231: my $commands;
  232: my $command;
  233: my $dependencies;
  234: my @links;
  235: my %categoryhash;
  236: my $dpathlength;
  237: my %fab; # file category abbreviation
  238: my $directory_count;
  239: my $file_count;
  240: my $link_count;
  241: my $fileglob_count;
  242: my $fileglobnames_count;
  243: my %categorycount;
  244: 
  245: my $mode;
  246: 
  247: my @buildall;
  248: my @buildinfo;
  249: 
  250: my @configall;
  251: 
  252: # Make new parser with distribution specific input
  253: undef($parser);
  254: $parser = HTML::TokeParser->new(\$cleanstring) or
  255:     die('can\'t create TokeParser object');
  256: $parser->xml_mode('1');
  257: 
  258: # Define handling methods for mode-dependent text rendering
  259: 
  260: $parser->{textify}={
  261:     specialnotices => \&format_specialnotices,
  262:     specialnotice => \&format_specialnotice,
  263:     targetroot => \&format_targetroot,
  264:     categories => \&format_categories,
  265:     category => \&format_category,
  266:     abbreviation => \&format_abbreviation,
  267:     chown => \&format_chown,
  268:     chmod => \&format_chmod,
  269:     categoryname => \&format_categoryname,
  270:     files => \&format_files,
  271:     file => \&format_file,
  272:     target => \&format_target,
  273:     note => \&format_note,
  274:     build => \&format_build,
  275:     dependencies => \&format_dependencies,
  276:     filenames => \&format_filenames,
  277:     perlscript => \&format_perlscript,
  278:     TARGET => \&format_TARGET,
  279:     };
  280: 
  281: my $text;
  282: my $token;
  283: undef($hloc);
  284: undef(@hierarchy);
  285: my $hloc;
  286: my @hierarchy2;
  287: while ($token = $parser->get_tag('piml')) {
  288:     &format_piml(@{$token});
  289:     $text = &trim($parser->get_text('/piml'));
  290:     $token = $parser->get_tag('/piml');
  291:     print($piml); 
  292:     print("\n");
  293:     print($text);
  294:     print("\n");
  295:     print(&end());
  296: }
  297: exit(0);
  298: 
  299: # ---------- Functions (most all just format contents of different markup tags)
  300: 
  301: # ------------------------ Final output at end of markup parsing and formatting
  302: sub end {
  303: 
  304: }
  305: 
  306: # ----------------------- Take in string to parse and the separation expression
  307: sub extract_array {
  308:     my ($stringtoparse,$sepexp) = @_;
  309:     my @a=split(/$sepexp/,$stringtoparse);
  310:     return \@a;
  311: }
  312: 
  313: # --------------------------------------------------------- Format piml section
  314: sub format_piml {
  315:     my (@tokeninfo)=@_;
  316:     my $date=`date`; chop $date;
  317:     $piml=<<END;
  318: #!/usr/bin/perl
  319: 
  320: # Generated from a PIML (Post Installation Markup Language) document
  321: 
  322: END
  323: }
  324: # --------------------------------------------------- Format targetroot section
  325: sub format_targetroot {
  326:     my $text=&trim($parser->get_text('/targetroot'));
  327:     $text=$targetroot if $targetroot;
  328:     $parser->get_tag('/targetroot');
  329:     return('# TARGET INSTALL LOCATION is "'.$targetroot."\"\n");
  330: }
  331: # -------------------------------------------------- Format perl script section
  332: sub format_perlscript {
  333:     my (@tokeninfo)=@_;
  334:     $mode=$tokeninfo[2]->{'mode'};
  335:     my $text=$parser->get_text('/perlscript');
  336:     $parser->get_tag('/perlscript');
  337:     if ($mode eq 'bg') {
  338: 	open(OUT,">/tmp/piml$$.pl");
  339: 	print(OUT $text);
  340: 	close(OUT);
  341: 	return(<<END);
  342: 	# launch background process for $target
  343: 	system("perl /tmp/piml$$.pl &");
  344: END
  345:     }
  346:     else {
  347: 	return($text);
  348:     }
  349: }
  350: # --------------------------------------------------------------- Format TARGET
  351: sub format_TARGET {
  352:     my (@tokeninfo)=@_;
  353:     $parser->get_tag('/TARGET');
  354:     return($target);
  355: }
  356: # --------------------------------------------------- Format categories section
  357: sub format_categories {
  358:     my $text=&trim($parser->get_text('/categories'));
  359:     $parser->get_tag('/categories');
  360:     return('# CATEGORIES'."\n".$text);
  361: }
  362: # --------------------------------------------------- Format categories section
  363: sub format_category {
  364:     my (@tokeninfo)=@_;
  365:     $category_att_name=$tokeninfo[2]->{'name'};
  366:     $category_att_type=$tokeninfo[2]->{'type'};
  367:     $abbreviation=''; $chmod='';$chown='';
  368:     $parser->get_text('/category');
  369:     $parser->get_tag('/category');
  370:     $fab{$category_att_name}=$abbreviation;
  371:     if ($category_att_type eq $categorytype) {
  372: 	my ($user,$group)=split(/\:/,$chown);
  373: 	$categoryhash{$category_att_name}='-o '.$user.' -g '.$group.
  374: 	    ' -m '.$chmod;
  375:     }
  376:     return('');
  377: }
  378: # --------------------------------------------------- Format categories section
  379: sub format_abbreviation {
  380:     my @tokeninfo=@_;
  381:     $abbreviation='';
  382:     my $text=&trim($parser->get_text('/abbreviation'));
  383:     if ($text) {
  384: 	$parser->get_tag('/abbreviation');
  385: 	$abbreviation=$text;
  386:     }
  387:     return('');
  388: }
  389: # -------------------------------------------------------- Format chown section
  390: sub format_chown {
  391:     my @tokeninfo=@_;
  392:     $chown='';
  393:     my $text=&trim($parser->get_text('/chown'));
  394:     if ($text) {
  395: 	$parser->get_tag('/chown');
  396: 	$chown=$text;
  397:     }
  398:     return('');
  399: }
  400: # -------------------------------------------------------- Format chmod section
  401: sub format_chmod {
  402:     my @tokeninfo=@_;
  403:     $chmod='';
  404:     my $text=&trim($parser->get_text('/chmod'));
  405:     if ($text) {
  406: 	$parser->get_tag('/chmod');
  407: 	$chmod=$text;
  408:     }
  409:     return('');
  410: }
  411: # ------------------------------------------------- Format categoryname section
  412: sub format_categoryname {
  413:     my @tokeninfo=@_;
  414:     $categoryname='';
  415:     my $text=&trim($parser->get_text('/categoryname'));
  416:     if ($text) {
  417: 	$parser->get_tag('/categoryname');
  418: 	$categoryname=$text;
  419:     }
  420:     return('');
  421: }
  422: # -------------------------------------------------------- Format files section
  423: sub format_files {
  424:     my $text=$parser->get_text('/files');
  425:     $parser->get_tag('/files');
  426:     return("\n".'# There are '.$file_count.' files this script works on'.
  427: 	"\n\n".$text);
  428: }
  429: # --------------------------------------------------------- Format file section
  430: sub format_file {
  431:     my @tokeninfo=@_;
  432:     $file=''; $source=''; $target=''; $categoryname=''; $description='';
  433:     $note=''; $build=''; $status=''; $dependencies='';
  434:     my $text=&trim($parser->get_text('/file'));
  435:     $file_count++;
  436:     $categorycount{$categoryname}++;
  437:     $parser->get_tag('/file');
  438:     return("# File: $target\n".
  439: 	"$text\n");
  440: }
  441: # ------------------------------------------------------- Format target section
  442: sub format_target {
  443:     my @tokeninfo=@_;
  444:     $target='';
  445:     my $text=&trim($parser->get_text('/target'));
  446:     if ($text) {
  447: 	$parser->get_tag('/target');
  448: 	$target=$targetrootarg.$text;
  449:     }
  450:     return('');
  451: }
  452: # --------------------------------------------------------- Format note section
  453: sub format_note {
  454:     my @tokeninfo=@_;
  455:     $note='';
  456:     my $aref;
  457:     my $text;
  458:     while ($aref=$parser->get_token()) {
  459: 	if ($aref->[0] eq 'E' && $aref->[1] eq 'note') {
  460: 	    last;
  461: 	}
  462: 	elsif ($aref->[0] eq 'S') {
  463: 	    $text.=$aref->[4];
  464: 	}
  465: 	elsif ($aref->[0] eq 'E') {
  466: 	    $text.=$aref->[2];
  467: 	}
  468: 	else {
  469: 	    $text.=$aref->[1];
  470: 	}
  471:     }
  472:     if ($text) {
  473: 	$note=$text;
  474:     }
  475:     return('');
  476: }
  477: # ------------------------------------------------- Format dependencies section
  478: sub format_dependencies {
  479:     my @tokeninfo=@_;
  480:     $dependencies='';
  481:     my $text=&trim($parser->get_text('/dependencies'));
  482:     if ($text) {
  483: 	$parser->get_tag('/dependencies');
  484: 	$dependencies=join(';',
  485: 			      (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)));
  486:     }
  487:     return('');
  488: }
  489: # ------------------------------------------------ Format specialnotice section
  490: sub format_specialnotices {
  491:     $parser->get_tag('/specialnotices');
  492:     return('');
  493: }
  494: # ------------------------------------------------ Format specialnotice section
  495: sub format_specialnotice {
  496:     $parser->get_tag('/specialnotice');
  497:     return('');
  498: }
  499: # ------------------------------------- Render less-than and greater-than signs
  500: sub htmlsafe {
  501:     my $text=@_[0];
  502:     $text =~ s/</&lt;/g;
  503:     $text =~ s/>/&gt;/g;
  504:     return($text);
  505: }
  506: # --------------------------------------- remove starting and ending whitespace
  507: sub trim {
  508:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return($s);
  509: }
  510: 
  511: # ----------------------------------- POD (plain old documentation, CPAN style)
  512: 
  513: =pod
  514: 
  515: =head1 NAME
  516: 
  517: piml_parse.pl - This is meant to parse piml files (Post Installation Markup Language)
  518: 
  519: =head1 SYNOPSIS
  520: 
  521: Usage is for piml file to come in through standard input.
  522: 
  523: =over 4
  524: 
  525: =item * 
  526: 
  527: 1st argument is the category permissions to use (runtime or development)
  528: 
  529: =item *
  530: 
  531: 2nd argument is the distribution
  532: (default,redhat6.2,debian2.2,redhat7.1,etc).
  533: 
  534: =item *
  535: 
  536: 3rd argument is to manually specify a targetroot.
  537: 
  538: =back
  539: 
  540: Only the 1st argument is mandatory for the program to run.
  541: 
  542: Example:
  543: 
  544: cat ../../doc/loncapafiles.piml |\\
  545: perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install
  546: 
  547: =head1 DESCRIPTION
  548: 
  549: I am using a multiple pass-through approach to parsing
  550: the piml file.  This saves memory and makes sure the server
  551: will never be overloaded.
  552: 
  553: =head1 README
  554: 
  555: I am using a multiple pass-through approach to parsing
  556: the piml file.  This saves memory and makes sure the server
  557: will never be overloaded.
  558: 
  559: =head1 PREREQUISITES
  560: 
  561: HTML::TokeParser
  562: 
  563: =head1 COREQUISITES
  564: 
  565: =head1 OSNAMES
  566: 
  567: linux
  568: 
  569: =head1 SCRIPT CATEGORIES
  570: 
  571: Packaging/Administrative
  572: 
  573: =head1 AUTHOR
  574: 
  575:  Scott Harrison
  576:  codeharrison@yahoo.com
  577: 
  578: Please let me know how/if you are finding this script useful and
  579: any/all suggestions.  -Scott
  580: 
  581: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>