Annotation of loncom/build/piml_parse.pl, revision 1.8

1.1       harris41    1: #!/usr/bin/perl
                      2: 
1.6       harris41    3: # -------------------------------------------------------- Documentation notice
                      4: # Run "perldoc ./lpml_parse.pl" in order to best view the software
                      5: # documentation internalized in this program.
                      6: 
                      7: # --------------------------------------------------------- License Information
1.1       harris41    8: # The LearningOnline Network with CAPA
                      9: # piml_parse.pl - Linux Packaging Markup Language parser
                     10: #
1.8     ! matthew    11: # $Id: piml_parse.pl,v 1.7 2002/04/08 12:51:03 harris41 Exp $
1.1       harris41   12: #
1.6       harris41   13: # Written by Scott Harrison, codeharrison@yahoo.com
1.1       harris41   14: #
                     15: # Copyright Michigan State University Board of Trustees
                     16: #
                     17: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                     18: #
                     19: # LON-CAPA is free software; you can redistribute it and/or modify
                     20: # it under the terms of the GNU General Public License as published by
                     21: # the Free Software Foundation; either version 2 of the License, or
                     22: # (at your option) any later version.
                     23: #
                     24: # LON-CAPA is distributed in the hope that it will be useful,
                     25: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     26: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     27: # GNU General Public License for more details.
                     28: #
                     29: # You should have received a copy of the GNU General Public License
                     30: # along with LON-CAPA; if not, write to the Free Software
                     31: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     32: #
                     33: # /home/httpd/html/adm/gpl.txt
                     34: #
                     35: # http://www.lon-capa.org/
                     36: #
                     37: # YEAR=2002
1.6       harris41   38: # 1/28,1/29,1/30,1/31,2/5,4/8 - Scott Harrison
1.1       harris41   39: #
                     40: ###
                     41: 
                     42: ###############################################################################
                     43: ##                                                                           ##
                     44: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
                     45: ## 1. Notes                                                                  ##
                     46: ## 2. Get command line arguments                                             ##
                     47: ## 3. First pass through (grab distribution-specific information)            ##
                     48: ## 4. Second pass through (parse out what is not necessary)                  ##
                     49: ## 5. Third pass through (translate markup according to specified mode)      ##
                     50: ## 6. Functions (most all just format contents of different markup tags)     ##
                     51: ## 7. POD (plain old documentation, CPAN style)                              ##
                     52: ##                                                                           ##
                     53: ###############################################################################
                     54: 
                     55: # ----------------------------------------------------------------------- Notes
                     56: #
                     57: # I am using a multiple pass-through approach to parsing
                     58: # the piml file.  This saves memory and makes sure the server
                     59: # will never be overloaded.
                     60: #
                     61: # This is meant to parse files meeting the piml document type.
                     62: # See piml.dtd.  PIML=Post Installation Markup Language.
                     63: 
                     64: use HTML::TokeParser;
                     65: 
1.6       harris41   66: my $usage=(<<END);
1.1       harris41   67: **** ERROR ERROR ERROR ERROR ****
                     68: Usage is for piml file to come in through standard input.
1.8     ! matthew    69: 1st is install, configinstall, or build
        !            70: 2st argument is the category permissions to use (runtime or development)
        !            71: 3nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
        !            72: 4rd argument is to manually specify a targetroot
1.1       harris41   73: 
1.8     ! matthew    74: The first 3 arguments are required for the program to run.
1.1       harris41   75: 
                     76: Example:
                     77: 
                     78: cat ../../doc/loncapafiles.piml |\\
                     79: perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install
                     80: END
                     81: 
                     82: # ------------------------------------------------- Grab command line arguments
                     83: 
                     84: my $mode;
1.8     ! matthew    85: if (@ARGV>=3) {
1.1       harris41   86:     $mode = shift @ARGV;
                     87: }
                     88: else {
                     89:     @ARGV=();shift @ARGV;
                     90:     while(<>){} # throw away the input to avoid broken pipes
                     91:     print $usage;
                     92:     exit -1; # exit with error status
                     93: }
                     94: 
                     95: my $categorytype;
                     96: if (@ARGV) {
                     97:     $categorytype = shift @ARGV;
                     98: }
                     99: 
                    100: my $dist;
                    101: if (@ARGV) {
                    102:     $dist = shift @ARGV;
                    103: }
                    104: 
                    105: my $targetroot;
                    106: my $targetrootarg;
                    107: if (@ARGV) {
                    108:     $targetroot = shift @ARGV;
                    109: }
                    110: $targetroot=~s/\/$//;
                    111: $targetrootarg=$targetroot;
                    112: 
                    113: my $logcmd='| tee -a WARNINGS';
                    114: 
                    115: my $invocation;
                    116: # --------------------------------------------------- Record program invocation
                    117: if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') {
                    118:     $invocation=(<<END);
                    119: # Invocation: STDINPUT | piml_parse.pl
1.2       harris41  120: #             1st argument (category type) is: $categorytype
                    121: #             2nd argument (distribution) is: $dist
                    122: #             3rd argument (targetroot) is: described below
1.1       harris41  123: END
                    124: }
                    125: 
                    126: # ---------------------------------------------------- Start first pass through
                    127: my @parsecontents = <>;
                    128: my $parsestring = join('',@parsecontents);
1.6       harris41  129: my $outstring='';
1.1       harris41  130: 
                    131: # Need to make a pass through and figure out what defaults are
                    132: # overrided.  Top-down overriding strategy (leaves don't know
                    133: # about distant leaves).
                    134: 
                    135: my @hierarchy;
                    136: $hierarchy[0]=0;
                    137: my $hloc=0;
1.6       harris41  138: my $token='';
1.1       harris41  139: $parser = HTML::TokeParser->new(\$parsestring) or
                    140:     die('can\'t create TokeParser object');
                    141: $parser->xml_mode('1');
                    142: my %hash;
                    143: my $key;
                    144: while ($token = $parser->get_token()) {
                    145:     if ($token->[0] eq 'S') {
                    146: 	$hloc++;
                    147: 	$hierarchy[$hloc]++;
                    148: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
                    149: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
                    150: 	if ($thisdist eq ' default ') {
                    151: 	    $hash{$key}=1; # there is a default setting for this key
                    152: 	}
                    153: 	elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
                    154: 	    $hash{$key}=2; # disregard default setting for this key if
                    155: 	                   # there is a directly requested distribution match
                    156: 	}
                    157:     }
                    158:     if ($token->[0] eq 'E') {
                    159: 	$hloc--;
                    160:     }
                    161: }
                    162: 
                    163: # --------------------------------------------------- Start second pass through
                    164: undef $hloc;
                    165: undef @hierarchy;
                    166: undef $parser;
                    167: $hierarchy[0]=0;
                    168: $parser = HTML::TokeParser->new(\$parsestring) or
                    169:     die('can\'t create TokeParser object');
                    170: $parser->xml_mode('1');
                    171: my $cleanstring;
                    172: while ($token = $parser->get_token()) {
                    173:     if ($token->[0] eq 'S') {
                    174: 	$hloc++;
                    175: 	$hierarchy[$hloc]++;
                    176: 	$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
                    177: 	my $thisdist=' '.$token->[2]{'dist'}.' ';
                    178: 	# This conditional clause is set up to ignore two sets
                    179: 	# of invalid conditions before accepting entry into
                    180: 	# the cleanstring.
                    181: 	if ($hash{$key}==2 and
                    182: 	    !($thisdist eq '  ' or $thisdist =~/\s$dist\s/)) {
                    183: 	    if ($token->[4]!~/\/>$/) {
                    184: 		$parser->get_tag('/'.$token->[1]);
                    185: 		$hloc--;
                    186: 	    }
                    187: 	}
                    188: 	elsif ($thisdist ne '  ' and $thisdist!~/\s$dist\s/ and
                    189: 	       !($thisdist eq ' default ' and $hash{$key}!=2)) {
                    190: 	    if ($token->[4]!~/\/>$/) {
                    191: 		$parser->get_tag('/'.$token->[1]);
                    192: 		$hloc--;
                    193: 	    }
                    194: 	}
                    195: 	else {
                    196: 	    $cleanstring.=$token->[4];
                    197: 	}
                    198: 	if ($token->[4]=~/\/>$/) {
1.4       harris41  199: #	    $hloc--;
1.1       harris41  200: 	}
                    201:     }
                    202:     if ($token->[0] eq 'E') {
                    203: 	$cleanstring.=$token->[2];
                    204: 	$hloc--;
                    205:     }
                    206:     if ($token->[0] eq 'T') {
                    207: 	$cleanstring.=$token->[1];
                    208:     }
                    209: }
                    210: $cleanstring=&trim($cleanstring);
                    211: $cleanstring=~s/\>\s*\n\s*\</\>\</g;
                    212: 
                    213: # ---------------------------------------------------- Start final pass through
                    214: 
                    215: # storage variables
                    216: my $piml;
                    217: my $categories;
                    218: my @categorynamelist;
                    219: my $category;
                    220: my $category_att_name;
                    221: my $category_att_type;
                    222: my $chown;
                    223: my $chmod;
                    224: my $abbreviation; # space-free abbreviation; esp. for image names
                    225: my $categoryname;
                    226: my $description;
                    227: my $files;
                    228: my $file;
                    229: my $target;
                    230: my $note;
                    231: my $commands;
                    232: my $command;
                    233: my $dependencies;
                    234: my @links;
                    235: my %categoryhash;
                    236: my $dpathlength;
                    237: my %fab; # file category abbreviation
                    238: my $directory_count;
                    239: my $file_count;
                    240: my $link_count;
                    241: my $fileglob_count;
                    242: my $fileglobnames_count;
                    243: my %categorycount;
                    244: 
1.4       harris41  245: my $mode;
                    246: 
1.1       harris41  247: my @buildall;
                    248: my @buildinfo;
                    249: 
                    250: my @configall;
                    251: 
                    252: # Make new parser with distribution specific input
1.6       harris41  253: undef($parser);
1.1       harris41  254: $parser = HTML::TokeParser->new(\$cleanstring) or
                    255:     die('can\'t create TokeParser object');
                    256: $parser->xml_mode('1');
                    257: 
                    258: # Define handling methods for mode-dependent text rendering
                    259: 
                    260: $parser->{textify}={
                    261:     specialnotices => \&format_specialnotices,
                    262:     specialnotice => \&format_specialnotice,
                    263:     targetroot => \&format_targetroot,
                    264:     categories => \&format_categories,
                    265:     category => \&format_category,
                    266:     abbreviation => \&format_abbreviation,
                    267:     chown => \&format_chown,
                    268:     chmod => \&format_chmod,
                    269:     categoryname => \&format_categoryname,
                    270:     files => \&format_files,
                    271:     file => \&format_file,
                    272:     target => \&format_target,
                    273:     note => \&format_note,
                    274:     build => \&format_build,
                    275:     dependencies => \&format_dependencies,
                    276:     filenames => \&format_filenames,
1.2       harris41  277:     perlscript => \&format_perlscript,
                    278:     TARGET => \&format_TARGET,
1.1       harris41  279:     };
                    280: 
                    281: my $text;
                    282: my $token;
1.6       harris41  283: undef($hloc);
                    284: undef(@hierarchy);
1.1       harris41  285: my $hloc;
                    286: my @hierarchy2;
                    287: while ($token = $parser->get_tag('piml')) {
                    288:     &format_piml(@{$token});
                    289:     $text = &trim($parser->get_text('/piml'));
                    290:     $token = $parser->get_tag('/piml');
1.6       harris41  291:     print($piml); 
                    292:     print("\n");
                    293:     print($text);
                    294:     print("\n");
                    295:     print(&end());
1.1       harris41  296: }
1.6       harris41  297: exit(0);
1.1       harris41  298: 
                    299: # ---------- Functions (most all just format contents of different markup tags)
                    300: 
                    301: # ------------------------ Final output at end of markup parsing and formatting
                    302: sub end {
1.2       harris41  303: 
1.1       harris41  304: }
                    305: 
                    306: # ----------------------- Take in string to parse and the separation expression
                    307: sub extract_array {
                    308:     my ($stringtoparse,$sepexp) = @_;
                    309:     my @a=split(/$sepexp/,$stringtoparse);
                    310:     return \@a;
                    311: }
                    312: 
                    313: # --------------------------------------------------------- Format piml section
                    314: sub format_piml {
                    315:     my (@tokeninfo)=@_;
                    316:     my $date=`date`; chop $date;
1.2       harris41  317:     $piml=<<END;
                    318: #!/usr/bin/perl
1.1       harris41  319: 
1.2       harris41  320: # Generated from a PIML (Post Installation Markup Language) document
1.1       harris41  321: 
                    322: END
                    323: }
                    324: # --------------------------------------------------- Format targetroot section
                    325: sub format_targetroot {
                    326:     my $text=&trim($parser->get_text('/targetroot'));
                    327:     $text=$targetroot if $targetroot;
                    328:     $parser->get_tag('/targetroot');
1.6       harris41  329:     return('# TARGET INSTALL LOCATION is "'.$targetroot."\"\n");
1.2       harris41  330: }
                    331: # -------------------------------------------------- Format perl script section
                    332: sub format_perlscript {
                    333:     my (@tokeninfo)=@_;
1.5       harris41  334:     $mode=$tokeninfo[2]->{'mode'};
1.2       harris41  335:     my $text=$parser->get_text('/perlscript');
                    336:     $parser->get_tag('/perlscript');
1.5       harris41  337:     if ($mode eq 'bg') {
1.6       harris41  338: 	open(OUT,">/tmp/piml$$.pl");
                    339: 	print(OUT $text);
                    340: 	close(OUT);
                    341: 	return(<<END);
1.5       harris41  342: 	# launch background process for $target
                    343: 	system("perl /tmp/piml$$.pl &");
                    344: END
                    345:     }
                    346:     else {
1.6       harris41  347: 	return($text);
1.5       harris41  348:     }
1.1       harris41  349: }
1.2       harris41  350: # --------------------------------------------------------------- Format TARGET
                    351: sub format_TARGET {
                    352:     my (@tokeninfo)=@_;
                    353:     $parser->get_tag('/TARGET');
1.6       harris41  354:     return($target);
1.1       harris41  355: }
                    356: # --------------------------------------------------- Format categories section
                    357: sub format_categories {
                    358:     my $text=&trim($parser->get_text('/categories'));
                    359:     $parser->get_tag('/categories');
1.6       harris41  360:     return('# CATEGORIES'."\n".$text);
1.1       harris41  361: }
                    362: # --------------------------------------------------- Format categories section
                    363: sub format_category {
                    364:     my (@tokeninfo)=@_;
                    365:     $category_att_name=$tokeninfo[2]->{'name'};
                    366:     $category_att_type=$tokeninfo[2]->{'type'};
                    367:     $abbreviation=''; $chmod='';$chown='';
                    368:     $parser->get_text('/category');
                    369:     $parser->get_tag('/category');
                    370:     $fab{$category_att_name}=$abbreviation;
1.2       harris41  371:     if ($category_att_type eq $categorytype) {
                    372: 	my ($user,$group)=split(/\:/,$chown);
                    373: 	$categoryhash{$category_att_name}='-o '.$user.' -g '.$group.
                    374: 	    ' -m '.$chmod;
1.1       harris41  375:     }
1.6       harris41  376:     return('');
1.1       harris41  377: }
                    378: # --------------------------------------------------- Format categories section
                    379: sub format_abbreviation {
                    380:     my @tokeninfo=@_;
                    381:     $abbreviation='';
                    382:     my $text=&trim($parser->get_text('/abbreviation'));
                    383:     if ($text) {
                    384: 	$parser->get_tag('/abbreviation');
                    385: 	$abbreviation=$text;
                    386:     }
1.6       harris41  387:     return('');
1.1       harris41  388: }
                    389: # -------------------------------------------------------- Format chown section
                    390: sub format_chown {
                    391:     my @tokeninfo=@_;
                    392:     $chown='';
                    393:     my $text=&trim($parser->get_text('/chown'));
                    394:     if ($text) {
                    395: 	$parser->get_tag('/chown');
                    396: 	$chown=$text;
                    397:     }
1.6       harris41  398:     return('');
1.1       harris41  399: }
                    400: # -------------------------------------------------------- Format chmod section
                    401: sub format_chmod {
                    402:     my @tokeninfo=@_;
                    403:     $chmod='';
                    404:     my $text=&trim($parser->get_text('/chmod'));
                    405:     if ($text) {
                    406: 	$parser->get_tag('/chmod');
                    407: 	$chmod=$text;
                    408:     }
1.6       harris41  409:     return('');
1.1       harris41  410: }
                    411: # ------------------------------------------------- Format categoryname section
                    412: sub format_categoryname {
                    413:     my @tokeninfo=@_;
                    414:     $categoryname='';
                    415:     my $text=&trim($parser->get_text('/categoryname'));
                    416:     if ($text) {
                    417: 	$parser->get_tag('/categoryname');
                    418: 	$categoryname=$text;
                    419:     }
1.6       harris41  420:     return('');
1.1       harris41  421: }
                    422: # -------------------------------------------------------- Format files section
                    423: sub format_files {
                    424:     my $text=$parser->get_text('/files');
                    425:     $parser->get_tag('/files');
1.6       harris41  426:     return("\n".'# There are '.$file_count.' files this script works on'.
                    427: 	"\n\n".$text);
1.1       harris41  428: }
                    429: # --------------------------------------------------------- Format file section
                    430: sub format_file {
                    431:     my @tokeninfo=@_;
                    432:     $file=''; $source=''; $target=''; $categoryname=''; $description='';
                    433:     $note=''; $build=''; $status=''; $dependencies='';
                    434:     my $text=&trim($parser->get_text('/file'));
                    435:     $file_count++;
                    436:     $categorycount{$categoryname}++;
1.2       harris41  437:     $parser->get_tag('/file');
1.6       harris41  438:     return("# File: $target\n".
                    439: 	"$text\n");
1.1       harris41  440: }
                    441: # ------------------------------------------------------- Format target section
                    442: sub format_target {
                    443:     my @tokeninfo=@_;
                    444:     $target='';
                    445:     my $text=&trim($parser->get_text('/target'));
                    446:     if ($text) {
                    447: 	$parser->get_tag('/target');
1.2       harris41  448: 	$target=$targetrootarg.$text;
1.1       harris41  449:     }
1.6       harris41  450:     return('');
1.1       harris41  451: }
                    452: # --------------------------------------------------------- Format note section
                    453: sub format_note {
                    454:     my @tokeninfo=@_;
                    455:     $note='';
                    456:     my $aref;
                    457:     my $text;
                    458:     while ($aref=$parser->get_token()) {
                    459: 	if ($aref->[0] eq 'E' && $aref->[1] eq 'note') {
                    460: 	    last;
                    461: 	}
                    462: 	elsif ($aref->[0] eq 'S') {
                    463: 	    $text.=$aref->[4];
                    464: 	}
                    465: 	elsif ($aref->[0] eq 'E') {
                    466: 	    $text.=$aref->[2];
                    467: 	}
                    468: 	else {
                    469: 	    $text.=$aref->[1];
                    470: 	}
                    471:     }
                    472:     if ($text) {
                    473: 	$note=$text;
                    474:     }
1.6       harris41  475:     return('');
1.1       harris41  476: }
                    477: # ------------------------------------------------- Format dependencies section
                    478: sub format_dependencies {
                    479:     my @tokeninfo=@_;
                    480:     $dependencies='';
                    481:     my $text=&trim($parser->get_text('/dependencies'));
                    482:     if ($text) {
                    483: 	$parser->get_tag('/dependencies');
                    484: 	$dependencies=join(';',
                    485: 			      (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text)));
                    486:     }
1.6       harris41  487:     return('');
1.1       harris41  488: }
                    489: # ------------------------------------------------ Format specialnotice section
                    490: sub format_specialnotices {
                    491:     $parser->get_tag('/specialnotices');
1.6       harris41  492:     return('');
1.1       harris41  493: }
                    494: # ------------------------------------------------ Format specialnotice section
                    495: sub format_specialnotice {
                    496:     $parser->get_tag('/specialnotice');
1.6       harris41  497:     return('');
1.1       harris41  498: }
                    499: # ------------------------------------- Render less-than and greater-than signs
                    500: sub htmlsafe {
                    501:     my $text=@_[0];
                    502:     $text =~ s/</&lt;/g;
                    503:     $text =~ s/>/&gt;/g;
1.6       harris41  504:     return($text);
1.1       harris41  505: }
                    506: # --------------------------------------- remove starting and ending whitespace
                    507: sub trim {
1.6       harris41  508:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return($s);
                    509: }
1.1       harris41  510: 
                    511: # ----------------------------------- POD (plain old documentation, CPAN style)
                    512: 
1.6       harris41  513: =pod
                    514: 
1.1       harris41  515: =head1 NAME
                    516: 
1.7       harris41  517: piml_parse.pl - This is meant to parse piml files (Post Installation Markup Language)
1.1       harris41  518: 
                    519: =head1 SYNOPSIS
                    520: 
                    521: Usage is for piml file to come in through standard input.
                    522: 
                    523: =over 4
                    524: 
                    525: =item * 
                    526: 
1.2       harris41  527: 1st argument is the category permissions to use (runtime or development)
1.1       harris41  528: 
                    529: =item *
                    530: 
1.2       harris41  531: 2nd argument is the distribution
1.1       harris41  532: (default,redhat6.2,debian2.2,redhat7.1,etc).
                    533: 
                    534: =item *
                    535: 
1.2       harris41  536: 3rd argument is to manually specify a targetroot.
1.1       harris41  537: 
                    538: =back
                    539: 
                    540: Only the 1st argument is mandatory for the program to run.
                    541: 
                    542: Example:
                    543: 
                    544: cat ../../doc/loncapafiles.piml |\\
                    545: perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install
                    546: 
                    547: =head1 DESCRIPTION
                    548: 
                    549: I am using a multiple pass-through approach to parsing
                    550: the piml file.  This saves memory and makes sure the server
                    551: will never be overloaded.
                    552: 
                    553: =head1 README
                    554: 
                    555: I am using a multiple pass-through approach to parsing
                    556: the piml file.  This saves memory and makes sure the server
                    557: will never be overloaded.
                    558: 
                    559: =head1 PREREQUISITES
                    560: 
                    561: HTML::TokeParser
                    562: 
                    563: =head1 COREQUISITES
                    564: 
                    565: =head1 OSNAMES
                    566: 
                    567: linux
                    568: 
                    569: =head1 SCRIPT CATEGORIES
                    570: 
                    571: Packaging/Administrative
1.6       harris41  572: 
                    573: =head1 AUTHOR
                    574: 
                    575:  Scott Harrison
                    576:  codeharrison@yahoo.com
                    577: 
                    578: Please let me know how/if you are finding this script useful and
                    579: any/all suggestions.  -Scott
1.1       harris41  580: 
                    581: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>