1: #!/usr/bin/perl
2:
3: # Scott Harrison
4: # May 2001
5: # 06/19/2001,06/20,06/24 - Scott Harrison
6:
7: # I am using a multiple pass-through approach to parsing
8: # the lpml file. This saves memory and makes sure the server
9: # will never be overloaded.
10:
11: use HTML::TokeParser;
12:
13: my $usage=<<END;
14: **** ERROR ERROR ERROR ERROR ****
15: Usage is for lpml file to come in through standard input.
16: 1st argument is the mode of parsing.
17: 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
18: 3rd argument is to manually specify a sourceroot.
19: 4th argument is to manually specify a targetroot.
20:
21: Only the 1st argument is mandatory for the program to run.
22:
23: Example:
24:
25: cat ../../doc/loncapafiles.lpml |\\
26: perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install
27: END
28:
29: # ------------------------------------------------- Grab command line arguments
30:
31: my $mode;
32: if (@ARGV) {
33: $mode = shift @ARGV;
34: }
35: else {
36: while(<>){} # throw away the input to avoid broken pipes
37: print $usage;
38: exit -1; # exit with error status
39: }
40:
41: my $dist;
42: if (@ARGV) {
43: $dist = shift @ARGV;
44: }
45:
46: my $targetroot;
47: my $sourceroot;
48: if (@ARGV) {
49: $targetroot = shift @ARGV;
50: }
51: if (@ARGV) {
52: $sourceroot = shift @ARGV;
53: }
54:
55: # ---------------------------------------------------- Start first pass through
56: my @parsecontents = <>;
57: my $parsestring = join('',@parsecontents);
58: my $outstring;
59:
60: # Need to make a pass through and figure out what defaults are
61: # overrided. Top-down overriding strategy (leaves don't know
62: # about distant leaves).
63:
64: my @hierarchy;
65: $hierarchy[0]=0;
66: my $hloc=0;
67: my $token;
68: $parser = HTML::TokeParser->new(\$parsestring) or
69: die('can\'t create TokeParser object');
70: $parser->xml_mode('1');
71: my %hash;
72: my $key;
73: while ($token = $parser->get_token()) {
74: if ($token->[0] eq 'S') {
75: $hloc++;
76: $hierarchy[$hloc]++;
77: $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
78: my $thisdist=' '.$token->[2]{'dist'}.' ';
79: if ($thisdist eq ' default ') {
80: $hash{$key}=1; # there is a default setting for this key
81: }
82: elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
83: $hash{$key}=2; # disregard default setting for this key if
84: # there is a directly requested distribution match
85: }
86: }
87: if ($token->[0] eq 'E') {
88: $hloc--;
89: }
90: }
91:
92: # --------------------------------------------------- Start second pass through
93: undef $hloc;
94: undef @hierarchy;
95: undef $parser;
96: $hierarchy[0]=0;
97: $parser = HTML::TokeParser->new(\$parsestring) or
98: die('can\'t create TokeParser object');
99: $parser->xml_mode('1');
100: my $cleanstring;
101: while ($token = $parser->get_token()) {
102: if ($token->[0] eq 'S') {
103: $hloc++;
104: $hierarchy[$hloc]++;
105: $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
106: my $thisdist=' '.$token->[2]{'dist'}.' ';
107: if ($hash{$key}==2 and
108: !($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) {
109: if ($token->[4]!~/\/>$/) {
110: $parser->get_tag('/'.$token->[1]);
111: $hloc--;
112: }
113: }
114: elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and
115: !($thisdist eq ' default ' and $hash{$key}!=2)) {
116: if ($token->[4]!~/\/>$/) {
117: $parser->get_tag('/'.$token->[1]);
118: $hloc--;
119: }
120: }
121: else {
122: $cleanstring.=$token->[4];
123: }
124: if ($token->[4]=~/\/>$/) {
125: $hloc--;
126: }
127: }
128: if ($token->[0] eq 'E') {
129: $cleanstring.=$token->[2];
130: $hloc--;
131: }
132: if ($token->[0] eq 'T') {
133: $cleanstring.=$token->[1];
134: }
135: }
136: $cleanstring=&trim($cleanstring);
137:
138: # ---------------------------------------------------- Start final pass through
139:
140: # storage variables
141: my $lpml;
142: my $categories;
143: my $category;
144: my $category_att_name;
145: my $category_att_type;
146: my $chown;
147: my $chmod;
148: my $rpm;
149: my $rpmSummary;
150: my $rpmName;
151: my $rpmVersion;
152: my $rpmRelease;
153: my $rpmVendor;
154: my $rpmBuildRoot;
155: my $rpmCopyright;
156: my $rpmGroup;
157: my $rpmSource;
158: my $rpmAutoReqProv;
159: my $rpmdescription;
160: my $rpmpre;
161: my $directories;
162: my $directory;
163: my $targetdirs;
164: my $targetdir;
165: my $categoryname;
166: my $description;
167: my $files;
168: my $fileglobs;
169: my $links;
170: my $file;
171: my $link;
172: my $fileglob;
173: my $sourcedir;
174: my $targets;
175: my $target;
176: my $source;
177: my $note;
178: my $build;
179: my $commands;
180: my $command;
181: my $status;
182: my $dependencies;
183: my $dependency;
184:
185: # Make new parser with distribution specific input
186: undef $parser;
187: $parser = HTML::TokeParser->new(\$cleanstring) or
188: die('can\'t create TokeParser object');
189: $parser->xml_mode('1');
190:
191: # Define handling methods for mode-dependent text rendering
192: $parser->{textify}={
193: targetroot => \&format_targetroot,
194: sourceroot => \&format_sourceroot,
195: categories => \&format_categories,
196: category => \&format_category,
197: targetdir => \&format_targetdir,
198: chown => \&format_chown,
199: chmod => \&format_chmod,
200: rpm => \&format_rpm,
201: rpmSummary => \&format_rpmSummary,
202: rpmName => \&format_rpmName,
203: rpmVersion => \&format_rpmVersion,
204: rpmRelease => \&format_rpmRelease,
205: rpmVendor => \&format_rpmVendor,
206: rpmBuildRoot => \&format_rpmBuildRoot,
207: rpmCopyright => \&format_rpmCopyright,
208: rpmGroup => \&format_rpmGroup,
209: rpmSource => \&format_rpmSource,
210: rpmAutoReqProv => \&format_rpmAutoReqProv,
211: rpmdescription => \&format_rpmdescription,
212: rpmpre => \&format_rpmpre,
213: directories => \&format_directories,
214: directory => \&format_directory,
215: categoryname => \&format_categoryname,
216: description => \&format_description,
217: files => \&format_files,
218: file => \&format_file,
219: fileglob => \&format_fileglob,
220: link => \&format_link,
221: linkto => \&format_linkto,
222: source => \&format_source,
223: target => \&format_target,
224: note => \&format_note,
225: build => \&format_build,
226: status => \&format_status,
227: dependencies => \&format_dependencies,
228: glob => \&format_glob,
229: sourcedir => \&format_sourcedir,
230: filenames => \&format_filenames,
231: };
232:
233: my $text;
234: my $token;
235: undef $hloc;
236: undef @hierarchy;
237: my $hloc;
238: my @hierarchy2;
239: while ($token = $parser->get_tag('lpml')) {
240: &format_lpml(@{$token});
241: $text = &trim($parser->get_text('/lpml'));
242: $token = $parser->get_tag('/lpml');
243: print $lpml;
244: print "\n";
245: $text=~s/\s*\n\s*\n\s*/\n/g;
246: print $text;
247: print "\n";
248: print &end();
249: }
250: exit;
251:
252: sub end {
253: if ($mode eq 'html') {
254: return "THE END\n";
255: }
256: }
257:
258: # ----------------------- Take in string to parse and the separation expression
259: sub extract_array {
260: my ($stringtoparse,$sepexp) = @_;
261: my @a=split(/$sepexp/,$stringtoparse);
262: return \@a;
263: }
264:
265: # --------------------------------------------------------- Format lpml section
266: sub format_lpml {
267: my (@tokeninfo)=@_;
268: my $date=`date`; chop $date;
269: if ($mode eq 'html') {
270: $lpml = "LPML BEGINNING: $date";
271: }
272: }
273: # --------------------------------------------------- Format targetroot section
274: sub format_targetroot {
275: my $text=&trim($parser->get_text('/targetroot'));
276: $text=$targetroot if $targetroot;
277: $parser->get_tag('/targetroot');
278: if ($mode eq 'html') {
279: return $targetroot="\nTARGETROOT: $text";
280: }
281: else {
282: return '';
283: }
284: }
285: # --------------------------------------------------- Format sourceroot section
286: sub format_sourceroot {
287: my $text=&trim($parser->get_text('/sourceroot'));
288: $text=$sourceroot if $sourceroot;
289: $parser->get_tag('/sourceroot');
290: if ($mode eq 'html') {
291: return $sourceroot="\nSOURCEROOT: $text";
292: }
293: else {
294: return '';
295: }
296: }
297: # --------------------------------------------------- Format categories section
298: sub format_categories {
299: my $text=&trim($parser->get_text('/categories'));
300: $parser->get_tag('/categories');
301: if ($mode eq 'html') {
302: return $categories="\nBEGIN CATEGORIES\n$text\nEND CATEGORIES\n";
303: }
304: else {
305: return '';
306: }
307: }
308: # --------------------------------------------------- Format categories section
309: sub format_category {
310: my (@tokeninfo)=@_;
311: $category_att_name=$tokeninfo[2]->{'name'};
312: $category_att_type=$tokeninfo[2]->{'type'};
313: $chmod='';$chown='';
314: $parser->get_text('/category');
315: $parser->get_tag('/category');
316: if ($mode eq 'html') {
317: return $category="\nCATEGORY $category_att_name $category_att_type ".
318: "$chmod $chown";
319: }
320: else {
321: return '';
322: }
323: }
324: # -------------------------------------------------------- Format chown section
325: sub format_chown {
326: my @tokeninfo=@_;
327: $chown='';
328: my $text=&trim($parser->get_text('/chown'));
329: if ($text) {
330: $parser->get_tag('/chown');
331: $chown=$text;
332: }
333: return '';
334: }
335: # -------------------------------------------------------- Format chmod section
336: sub format_chmod {
337: my @tokeninfo=@_;
338: $chmod='';
339: my $text=&trim($parser->get_text('/chmod'));
340: if ($text) {
341: $parser->get_tag('/chmod');
342: $chmod=$text;
343: }
344: return '';
345: }
346: # ---------------------------------------------------------- Format rpm section
347: sub format_rpm {
348: my $text=&trim($parser->get_text('/rpm'));
349: $parser->get_tag('/rpm');
350: if ($mode eq 'html') {
351: return $rpm="\nBEGIN RPM\n$text\nEND RPM";
352: }
353: else {
354: return '';
355: }
356: }
357: # --------------------------------------------------- Format rpmSummary section
358: sub format_rpmSummary {
359: my $text=&trim($parser->get_text('/rpmSummary'));
360: $parser->get_tag('/rpmSummary');
361: if ($mode eq 'html') {
362: return $rpmSummary="\nRPMSUMMARY $text";
363: }
364: else {
365: return '';
366: }
367: }
368: # ------------------------------------------------------ Format rpmName section
369: sub format_rpmName {
370: my $text=&trim($parser->get_text('/rpmName'));
371: $parser->get_tag('/rpmName');
372: if ($mode eq 'html') {
373: return $rpmName="\nRPMNAME $text";
374: }
375: else {
376: return '';
377: }
378: }
379: # --------------------------------------------------- Format rpmVersion section
380: sub format_rpmVersion {
381: my $text=$parser->get_text('/rpmVersion');
382: $parser->get_tag('/rpmVersion');
383: if ($mode eq 'html') {
384: return $rpmVersion="\nRPMVERSION $text";
385: }
386: else {
387: return '';
388: }
389: }
390: # --------------------------------------------------- Format rpmRelease section
391: sub format_rpmRelease {
392: my $text=$parser->get_text('/rpmRelease');
393: $parser->get_tag('/rpmRelease');
394: if ($mode eq 'html') {
395: return $rpmRelease="\nRPMRELEASE $text";
396: }
397: else {
398: return '';
399: }
400: }
401: # ---------------------------------------------------- Format rpmVendor section
402: sub format_rpmVendor {
403: my $text=$parser->get_text('/rpmVendor');
404: $parser->get_tag('/rpmVendor');
405: if ($mode eq 'html') {
406: return $rpmVendor="\nRPMVENDOR $text";
407: }
408: else {
409: return '';
410: }
411: }
412: # ------------------------------------------------- Format rpmBuildRoot section
413: sub format_rpmBuildRoot {
414: my $text=$parser->get_text('/rpmBuildRoot');
415: $parser->get_tag('/rpmBuildRoot');
416: if ($mode eq 'html') {
417: return $rpmBuildRoot="\nRPMBUILDROOT $text";
418: }
419: else {
420: return '';
421: }
422: }
423: # ------------------------------------------------- Format rpmCopyright section
424: sub format_rpmCopyright {
425: my $text=$parser->get_text('/rpmCopyright');
426: $parser->get_tag('/rpmCopyright');
427: if ($mode eq 'html') {
428: return $rpmCopyright="\nRPMCOPYRIGHT $text";
429: }
430: else {
431: return '';
432: }
433: }
434: # ----------------------------------------------------- Format rpmGroup section
435: sub format_rpmGroup {
436: my $text=$parser->get_text('/rpmGroup');
437: $parser->get_tag('/rpmGroup');
438: if ($mode eq 'html') {
439: return $rpmGroup="\nRPMGROUP $text";
440: }
441: else {
442: return '';
443: }
444: }
445: # ---------------------------------------------------- Format rpmSource section
446: sub format_rpmSource {
447: my $text=$parser->get_text('/rpmSource');
448: $parser->get_tag('/rpmSource');
449: if ($mode eq 'html') {
450: return $rpmSource="\nRPMSOURCE $text";
451: }
452: else {
453: return '';
454: }
455: }
456: # ----------------------------------------------- Format rpmAutoReqProv section
457: sub format_rpmAutoReqProv {
458: my $text=$parser->get_text('/rpmAutoReqProv');
459: $parser->get_tag('/rpmAutoReqProv');
460: if ($mode eq 'html') {
461: return $rpmAutoReqProv="\nRPMAUTOREQPROV $text";
462: }
463: else {
464: return '';
465: }
466: }
467: # ----------------------------------------------- Format rpmdescription section
468: sub format_rpmdescription {
469: my $text=$parser->get_text('/rpmdescription');
470: $parser->get_tag('/rpmdescription');
471: if ($mode eq 'html') {
472: return $rpmdescription="\nRPMDESCRIPTION $text";
473: }
474: else {
475: return '';
476: }
477: }
478: # ------------------------------------------------------- Format rpmpre section
479: sub format_rpmpre {
480: my $text=$parser->get_text('/rpmpre');
481: $parser->get_tag('/rpmpre');
482: if ($mode eq 'html') {
483: return $rpmpre="\nRPMPRE $text";
484: }
485: else {
486: return '';
487: }
488: }
489: # -------------------------------------------------- Format directories section
490: sub format_directories {
491: my $text=&trim($parser->get_text('/directories'));
492: $parser->get_tag('/directories');
493: if ($mode eq 'html') {
494: return $directories="\nBEGIN DIRECTORIES\n$text\nEND DIRECTORIES\n";
495: }
496: else {
497: return '';
498: }
499: }
500: # ---------------------------------------------------- Format directory section
501: sub format_directory {
502: my (@tokeninfo)=@_;
503: $targetdir='';$categoryname='';$description='';
504: $parser->get_text('/directory');
505: $parser->get_tag('/directory');
506: if ($mode eq 'html') {
507: return $directory="\nDIRECTORY $targetdir $categoryname $description";
508: }
509: else {
510: return '';
511: }
512: }
513: # ---------------------------------------------------- Format targetdir section
514: sub format_targetdir {
515: my @tokeninfo=@_;
516: $targetdir='';
517: my $text=&trim($parser->get_text('/targetdir'));
518: if ($text) {
519: $parser->get_tag('/targetdir');
520: $targetdir=$text;
521: }
522: return '';
523: }
524: # ------------------------------------------------- Format categoryname section
525: sub format_categoryname {
526: my @tokeninfo=@_;
527: $categoryname='';
528: my $text=&trim($parser->get_text('/categoryname'));
529: if ($text) {
530: $parser->get_tag('/categoryname');
531: $categoryname=$text;
532: }
533: return '';
534: }
535: # -------------------------------------------------- Format description section
536: sub format_description {
537: my @tokeninfo=@_;
538: $description='';
539: my $text=&trim($parser->get_text('/description'));
540: if ($text) {
541: $parser->get_tag('/description');
542: $description=$text;
543: }
544: return '';
545: }
546: # -------------------------------------------------------- Format files section
547: sub format_files {
548: my $text=&trim($parser->get_text('/files'));
549: $parser->get_tag('/files');
550: if ($mode eq 'html') {
551: return $directories="\nBEGIN FILES\n$text\nEND FILES\n";
552: }
553: else {
554: return '';
555: }
556: }
557: # ---------------------------------------------------- Format fileglobs section
558: sub format_fileglobs {
559:
560: }
561: # -------------------------------------------------------- Format links section
562: sub format_links {
563:
564: }
565: # --------------------------------------------------------- Format file section
566: sub format_file {
567: my @tokeninfo=@_;
568: $file=''; $source=''; $target=''; $categoryname=''; $description='';
569: $note=''; $build=''; $status=''; $dependencies='';
570: my $text=&trim($parser->get_text('/file'));
571: if ($source) {
572: $parser->get_tag('/file');
573: if ($mode eq 'html') {
574: return ($file="\nBEGIN FILE\n".
575: "$source $target $categoryname $description $note " .
576: "$build $status $dependencies" .
577: "\nEND FILE");
578: }
579: else {
580: return '';
581: }
582: }
583: return '';
584: }
585: # --------------------------------------------------------- Format link section
586: sub format_link {
587: my @tokeninfo=@_;
588: $link=''; $linkto=''; $target=''; $categoryname=''; $description='';
589: $note=''; $build=''; $status=''; $dependencies='';
590: my $text=&trim($parser->get_text('/link'));
591: if ($linkto) {
592: $parser->get_tag('/link');
593: if ($mode eq 'html') {
594: return $link="\nBEGIN LINK\n".
595: "$linkto $target $categoryname $description $note " .
596: "$build $status $dependencies" .
597: "\nEND LINK";
598: }
599: else {
600: return '';
601: }
602: }
603: return '';
604: }
605: # ----------------------------------------------------- Format fileglob section
606: sub format_fileglob {
607: my @tokeninfo=@_;
608: $fileglob=''; $glob=''; $sourcedir='';
609: $targetdir=''; $categoryname=''; $description='';
610: $note=''; $build=''; $status=''; $dependencies='';
611: $filenames='';
612: my $text=&trim($parser->get_text('/fileglob'));
613: if ($sourcedir) {
614: $parser->get_tag('/fileglob');
615: if ($mode eq 'html') {
616: return $fileglob="\nBEGIN FILEGLOB\n".
617: "$glob sourcedir $targetdir $categoryname $description $note ".
618: "$build $status $dependencies $filenames" .
619: "\nEND FILEGLOB";
620: }
621: else {
622: return '';
623: }
624: }
625: return '';
626: }
627: # ---------------------------------------------------- Format sourcedir section
628: sub format_sourcedir {
629: my @tokeninfo=@_;
630: $sourcedir='';
631: my $text=&trim($parser->get_text('/sourcedir'));
632: if ($text) {
633: $parser->get_tag('/sourcedir');
634: $sourcedir=$text;
635: }
636: return '';
637: }
638: # ------------------------------------------------------- Format target section
639: sub format_target {
640: my @tokeninfo=@_;
641: $target='';
642: my $text=&trim($parser->get_text('/target'));
643: if ($text) {
644: $parser->get_tag('/target');
645: $target=$text;
646: }
647: return '';
648: }
649: # ------------------------------------------------------- Format source section
650: sub format_source {
651: my @tokeninfo=@_;
652: $source='';
653: my $text=&trim($parser->get_text('/source'));
654: if ($text) {
655: $parser->get_tag('/source');
656: $source=$text;
657: }
658: return '';
659: }
660: # --------------------------------------------------------- Format note section
661: sub format_note {
662: my @tokeninfo=@_;
663: $note='';
664: my $text=&trim($parser->get_text('/note'));
665: if ($text) {
666: $parser->get_tag('/note');
667: $note=$text;
668: }
669: return '';
670:
671: }
672: # -------------------------------------------------------- Format build section
673: sub format_build {
674: my @tokeninfo=@_;
675: $build='';
676: my $text=&trim($parser->get_text('/build'));
677: if ($text) {
678: $parser->get_tag('/build');
679: $build=$text;
680: }
681: return '';
682: }
683: # ------------------------------------------------------- Format status section
684: sub format_status {
685: my @tokeninfo=@_;
686: $status='';
687: my $text=&trim($parser->get_text('/status'));
688: if ($text) {
689: $parser->get_tag('/status');
690: $status=$text;
691: }
692: return '';
693: }
694: # ------------------------------------------------- Format dependencies section
695: sub format_dependencies {
696: my @tokeninfo=@_;
697: $dependencies='';
698: my $text=&trim($parser->get_text('/dependencies'));
699: if ($text) {
700: $parser->get_tag('/dependencies');
701: $dependencies=$text;
702: }
703: return '';
704: }
705: # --------------------------------------------------------- Format glob section
706: sub format_glob {
707: my @tokeninfo=@_;
708: $glob='';
709: my $text=&trim($parser->get_text('/glob'));
710: if ($text) {
711: $parser->get_tag('/glob');
712: $glob=$text;
713: }
714: return '';
715: }
716: # ---------------------------------------------------- Format filenames section
717: sub format_filenames {
718: my @tokeninfo=@_;
719: $glob='';
720: my $text=&trim($parser->get_text('/filenames'));
721: if ($text) {
722: $parser->get_tag('/filenames');
723: $filenames=$text;
724: }
725: return '';
726: }
727: # ------------------------------------------------------- Format linkto section
728: sub format_linkto {
729: my @tokeninfo=@_;
730: $glob='';
731: my $text=&trim($parser->get_text('/linkto'));
732: if ($text) {
733: $parser->get_tag('/linkto');
734: $linkto=$text;
735: }
736: return '';
737: }
738: # --------------------------------------- remove starting and ending whitespace
739: sub trim {
740: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
741: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>