Annotation of loncom/build/filecompare.pl, revision 1.14
1.1 harris41 1: #!/usr/bin/perl
2:
1.5 harris41 3: # The LearningOnline Network with CAPA
1.10 harris41 4: # filecompare.pl - script used to help probe and compare file statistics
5: #
1.14 ! albertel 6: # $Id: filecompare.pl,v 1.13 2002/05/16 00:23:04 harris41 Exp $
1.10 harris41 7: #
8: # Copyright Michigan State University Board of Trustees
9: #
10: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
11: #
12: # LON-CAPA is free software; you can redistribute it and/or modify
13: # it under the terms of the GNU General Public License as published by
14: # the Free Software Foundation; either version 2 of the License, or
15: # (at your option) any later version.
16: #
17: # LON-CAPA is distributed in the hope that it will be useful,
18: # but WITHOUT ANY WARRANTY; without even the implied warranty of
19: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20: # GNU General Public License for more details.
1.4 harris41 21: #
1.10 harris41 22: # You should have received a copy of the GNU General Public License
23: # along with LON-CAPA; if not, write to the Free Software
24: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25: #
26: # /home/httpd/html/adm/gpl.txt
27: #
28: # http://www.lon-capa.org/
1.4 harris41 29: #
1.1 harris41 30: # YEAR=2001
1.4 harris41 31: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
32: # 11/14 Guy Albertelli
1.8 harris41 33: # 11/16,11/17 Scott Harrison
1.9 harris41 34: # 12/3,12/5 Scott Harrison
1.4 harris41 35: #
36: ###
1.1 harris41 37:
1.5 harris41 38: ###############################################################################
39: ## ##
40: ## ORGANIZATION OF THIS PERL SCRIPT ##
41: ## ##
42: ## 1. Invocation ##
43: ## 2. Notes ##
44: ## 3. Dependencies ##
45: ## 4. Process command line arguments ##
46: ## 5. Process file/dir location arguments ##
47: ## 6. Process comparison restrictions ##
48: ## 7. Define output and measure subroutines ##
49: ## 8. Loop through files and calculate differences ##
50: ## 9. Subroutines ##
51: ## 10. POD (plain old documentation, CPAN style) ##
52: ## ##
53: ###############################################################################
54:
1.4 harris41 55: # ------------------------------------------------------------------ Invocation
1.1 harris41 56: my $invocation=<<END;
1.4 harris41 57: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
58: or
59: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
1.9 harris41 60: or
61: filecompare.pl [ options ... ] -s TARGET=[target] SOURCE=[source] MODE=[mode]
62: LOC1 LOC2
1.4 harris41 63:
64: Restrictions: a list of space separated values (after the file/dir names)
65: can restrict the comparison.
66: These values can be: existence, cvstime, age, md5sum, size, lines,
67: and/or diffs.
68:
69: Options (before file/dir names):
70: -p show all files that have the same comparison
71: -n show all files that have different comparisons
72: -a show all files (with comparisons)
73: -q only show file names (based on first file/dir)
74: -v verbose mode (default)
1.13 harris41 75: -bN buildmode (controls EXIT code of this script; 0 unless...)
1.6 harris41 76: N=1: md5sum=same --> 1; cvstime<0 --> 2
1.5 harris41 77: N=2: same as N=1 except without md5sum
78: N=3: md5sum=same --> 1; age<0 --> 2
79: N=4: cvstime>0 --> 2
1.9 harris41 80:
81: The third way to pass arguments is set by the -s flag.
82: filecompare.pl -s SOURCE=[source] TARGET=[target] MODE=[mode] LOC1 LOC2
83:
84: TARGET corresponds to the root path of LOC2. SOURCE corresponds to
85: the root path of LOC1. MODE can either be file, directory, link, or fileglob.
86:
1.1 harris41 87: END
88: unless (@ARGV) {
89: print $invocation;
90: exit 1;
91: }
1.5 harris41 92:
1.1 harris41 93: # ----------------------------------------------------------------------- Notes
94: #
95: # What are all the different ways to compare two files and how to look
96: # at the differences?
97: #
98: # Ways of comparison:
99: # existence similarity
1.6 harris41 100: # cvs time similarity (1st arg treated as CVS source; only for buildmode)
1.1 harris41 101: # age similarity (modification time)
102: # md5sum similarity
103: # size similarity (bytes)
104: # line count difference
105: # number of different lines
106: #
107: # Quantities of comparison:
108: # existence (no,yes); other values become 'n/a'
1.2 harris41 109: # cvstime in seconds
1.1 harris41 110: # age in seconds
111: # md5sum ("same" or "different")
112: # size similarity (byte difference)
113: # line count difference (integer)
114: # number of different lines (integer)
115:
1.5 harris41 116: # ---------------------------------------------------------------- Dependencies
1.1 harris41 117: # implementing from unix command line (assuming bash)
118: # md5sum, diff, wc -l
119:
120: # ---------------------------------------------- Process command line arguments
121: # Flags (before file/dir names):
122: # -p show all files the same
123: # -n show all files different
124: # -a show all files (with comparisons)
125: # -q only show file names (based on first file/dir)
126: # -v verbose mode (default)
1.5 harris41 127: # -bN build/install mode (returns exitcode)
1.9 harris41 128: # -s status checking mode for lpml
129:
1.1 harris41 130: my $verbose='1';
131: my $show='all';
1.2 harris41 132: my $buildmode=0;
1.9 harris41 133: my $statusmode=0;
1.6 harris41 134: ALOOP: while (@ARGV) {
1.1 harris41 135: my $flag;
136: if ($ARGV[0]=~/^\-(\w)/) {
137: $flag=$1;
1.5 harris41 138: if ($flag eq 'b') {
139: $ARGV[0]=~/^\-\w(\d)/;
140: $buildmode=$1;
1.6 harris41 141: shift @ARGV;
142: next ALOOP;
1.5 harris41 143: }
1.1 harris41 144: shift @ARGV;
145: SWITCH: {
146: $verbose=0, last SWITCH if $flag eq 'q';
147: $verbose=1, last SWITCH if $flag eq 'v';
148: $show='same', last SWITCH if $flag eq 'p';
149: $show='different', last SWITCH if $flag eq 'n';
150: $show='all', last SWITCH if $flag eq 'a';
1.9 harris41 151: $statusmode=1, last SWITCH if $flag eq 's';
1.1 harris41 152: print($invocation), exit(1);
153: }
154: }
155: else {
156: last;
157: }
158: }
1.2 harris41 159: dowarn('Verbose: '.$verbose."\n");
160: dowarn('Show: '.$show."\n");
1.1 harris41 161:
1.9 harris41 162: my @files;
163: my $loc1;
164: my $loc2;
1.10 harris41 165: my $dirmode='directories';
1.9 harris41 166: # ----------------------------------------- If status checking mode for lpml
167: my ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob);
168: my ($source,$target);
169: if ($statusmode==1) {
170: ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob)=splice(@ARGV,0,5);
171: $targetroot.='/' if $targetroot!~/\/$/;
172: $sourceroot=~s/^SOURCE\=//;
173: $targetroot=~s/^TARGET\=//;
174: $source=$sourceroot.'/'.$sourceglob;
175: $target=$targetroot.''.$targetglob;
176: # print "SOURCE: $source\n";
177: # print "TARGET: $target\n";
178: if ($mode eq 'MODE=fileglob') {
1.10 harris41 179: $loc1=$source;$loc1=~s/\/[^\/]*$// if length($loc1)>2;
180: $loc2=$target;$loc2=~s/\/[^\/]*$// if length($loc2)>2;
181: @files=map {s/^$loc1\///;$_} glob($source);
182: $dirmode='directories';
183: }
184: elsif ($mode eq 'MODE=file') {
185: $loc1=$source;
186: $loc2=$target;
187: $dirmode='files';
188: @files=($loc1);
1.9 harris41 189: }
190: }
191: else {
192:
1.5 harris41 193: # ----------------------------------------- Process file/dir location arguments
1.1 harris41 194: # FILE1 FILE2 or DIR1 DIR2
1.9 harris41 195: $loc1=shift @ARGV;
196: $loc2=shift @ARGV;
1.1 harris41 197: unless ($loc1 and $loc2) {
1.9 harris41 198: print "LOC1: $loc1\nLOC2: $loc2\n";
1.1 harris41 199: print($invocation), exit(1);
200: }
201: if (-f $loc1) {
202: $dirmode='files';
203: @files=($loc1);
204: }
205: else {
206: if (-e $loc1) {
207: @files=`find $loc1 -type f`;
208: }
209: else {
210: @files=($loc1);
211: }
212: map {chomp; s/^$loc1\///; $_} @files;
213: }
1.2 harris41 214: dowarn('Processing for mode: '.$dirmode."\n");
215: dowarn('Location #1: '.$loc1."\n");
216: dowarn('Location #2: '.$loc2."\n");
1.9 harris41 217: }
1.5 harris41 218: # --------------------------------------------- Process comparison restrictions
1.1 harris41 219: # A list of space separated values (after the file/dir names)
220: # can restrict the comparison.
1.5 harris41 221: my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0,
222: 'lines'=>0,'diffs'=>0);
1.1 harris41 223: my %restrict;
224: while (@ARGV) {
225: my $r=shift @ARGV;
1.5 harris41 226: if ($rhash{$r}==0) {$restrict{$r}=1;}
227: else {print($invocation), exit(1);}
1.1 harris41 228: }
229: if (%restrict) {
1.5 harris41 230: dowarn('Restricting comparison to: '.
1.1 harris41 231: join(' ',keys %restrict)."\n");
232: }
233:
1.5 harris41 234: # --------------------------------------- Define output and measure subroutines
1.1 harris41 235: my %OUTPUT=(
1.4 harris41 236: 'existence'=>( sub {print 'existence: '.@_[0]; return;}),
237: 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
238: 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
239: 'age'=>(sub {print 'age: '.@_[0];return;}),
240: 'size'=>(sub {print 'size: '.@_[0];return;}),
241: 'lines'=>(sub {print 'lines: '.@_[0];return;}),
242: 'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
1.1 harris41 243: );
244:
245: my %MEASURE=(
1.4 harris41 246: 'existence' => ( sub { my ($file1,$file2)=@_;
1.1 harris41 247: my $rv1=(-e $file1)?'yes':'no';
248: my $rv2=(-e $file2)?'yes':'no';
1.4 harris41 249: return ($rv1,$rv2); } ),
250: 'md5sum'=>( sub { my ($file1,$file2)=@_;
1.3 albertel 251: my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
252: my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
1.4 harris41 253: return ($rv1,$rv2); } ),
254: 'cvstime'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 255: my $rv1=&cvstime($file1);
256: my @a=stat($file2); my $gmt=gmtime($a[9]);
257: my $rv2=&utctime($gmt);
1.4 harris41 258: return ($rv1,$rv2); } ),
259: 'age'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 260: my @a=stat($file1); my $rv1=$a[9];
261: @a=stat($file2); my $rv2=$a[9];
1.4 harris41 262: return ($rv1,$rv2); } ),
263: 'size'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 264: my @a=stat($file1); my $rv1=$a[7];
265: @a=stat($file2); my $rv2=$a[7];
1.4 harris41 266: return ($rv1,$rv2); } ),
267: 'lines'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 268: my $rv1=`wc -l $file1`; chop $rv1;
269: my $rv2=`wc -l $file2`; chop $rv2;
1.4 harris41 270: return ($rv1,$rv2); } ),
271: 'diffs'=>( sub { my ($file1,$file2)=@_;
1.14 ! albertel 272: return (0,0);
1.1 harris41 273: my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
274: chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
275: my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
276: chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
1.4 harris41 277: return ($rv1,$rv2); } ),
1.1 harris41 278: );
279:
1.5 harris41 280: FLOOP: foreach my $file (@files) {
1.1 harris41 281: my $file1;
282: my $file2;
283: if ($dirmode eq 'directories') {
284: $file1=$loc1.'/'.$file;
285: $file2=$loc2.'/'.$file;
286: }
287: else {
288: $file1=$loc1;
289: $file2=$loc2;
290: }
291: my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
292: my $existence=$existence1.':'.$existence2;
1.2 harris41 293: my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
1.1 harris41 294: if ($existence1 eq 'no' or $existence2 eq 'no') {
295: $md5sum='n/a';
296: $age='n/a';
1.2 harris41 297: $cvstime='n/a';
1.1 harris41 298: $size='n/a';
299: $lines='n/a';
300: $diffs='n/a';
301: }
302: else {
1.6 harris41 303: if ($buildmode) {
304: my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
305: $cvstime=$cvstime1-$cvstime2;
306: }
307: else {
308: $cvstime='n/a';
309: }
1.1 harris41 310: my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
311: $age=$age1-$age2;
312: my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
1.3 albertel 313: if ($md5sum1 eq $md5sum2) {
1.1 harris41 314: $md5sum='same';
315: $size=0;
316: $lines=0;
1.6 harris41 317: $diffs='0:0';
1.1 harris41 318: }
1.3 albertel 319: elsif ($md5sum1 ne $md5sum2) {
1.1 harris41 320: $md5sum='different';
321: my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
322: $size=$size1-$size2;
323: my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
324: $lines=$lines1-$lines2;
325: my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
326: $diffs=$diffs1.':'.$diffs2;
327: }
328: }
329: my $showflag=0;
330: if ($show eq 'all') {
331: $showflag=1;
332: }
333: if ($show eq 'different') {
334: my @ks=(keys %restrict);
335: unless (@ks) {
1.2 harris41 336: @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
1.1 harris41 337: }
1.5 harris41 338: FLOOP2: for my $key (@ks) {
1.1 harris41 339: if ($key eq 'existence') {
340: if ($existence ne 'yes:yes') {
341: $showflag=1;
342: }
343: }
344: elsif ($key eq 'md5sum') {
345: if ($md5sum ne 'same') {
346: $showflag=1;
347: }
348: }
1.6 harris41 349: elsif ($key eq 'cvstime' and $buildmode) {
1.2 harris41 350: if ($cvstime!=0) {
351: $showflag=1;
352: }
353: }
1.1 harris41 354: elsif ($key eq 'age') {
355: if ($age!=0) {
356: $showflag=1;
357: }
358: }
359: elsif ($key eq 'size') {
360: if ($size!=0) {
361: $showflag=1;
362: }
363: }
364: elsif ($key eq 'lines') {
365: if ($lines!=0) {
366: $showflag=1;
367: }
368: }
369: elsif ($key eq 'diffs') {
370: if ($diffs ne '0:0') {
371: $showflag=1;
372: }
373: }
374: if ($showflag) {
1.5 harris41 375: last FLOOP2;
1.1 harris41 376: }
377: }
378: }
379: elsif ($show eq 'same') {
380: my @ks=(keys %restrict);
381: unless (@ks) {
1.2 harris41 382: @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
1.1 harris41 383: }
384: my $showcount=length(@ks);
1.6 harris41 385: $showcount-- unless $buildmode;
1.5 harris41 386: FLOOP3: for my $key (@ks) {
1.1 harris41 387: if ($key eq 'existence') {
388: if ($existence ne 'yes:yes') {
389: $showcount--;
390: }
391: }
392: elsif ($key eq 'md5sum') {
393: if ($md5sum ne 'same') {
394: $showcount--;
395: }
396: }
1.6 harris41 397: elsif ($key eq 'cvstime' and $buildmode) {
1.2 harris41 398: if ($cvstime!=0) {
399: $showcount--;
400: }
401: }
1.1 harris41 402: elsif ($key eq 'age') {
403: if ($age!=0) {
404: $showcount--;
405: }
406: }
407: elsif ($key eq 'size') {
408: if ($size!=0) {
409: $showcount--;
410: }
411: }
412: elsif ($key eq 'lines') {
413: if ($lines!=0) {
414: $showcount--;
415: }
416: }
417: elsif ($key eq 'diffs') {
418: if ($diffs ne '0:0') {
419: $showcount--;
420: }
421: }
422: }
423: if ($showcount==0) {
424: $showflag=1;
425: }
426: }
1.13 harris41 427: if ($buildmode==1) { # -b1
1.2 harris41 428: if ($md5sum eq 'same') {
429: exit(1);
430: }
431: elsif ($cvstime<0) {
432: exit(2);
433: }
434: else {
435: exit(0);
436: }
437: }
1.13 harris41 438: elsif ($buildmode==2) { # -b2
1.2 harris41 439: if ($cvstime<0) {
440: exit(2);
441: }
442: else {
443: exit(0);
444: }
445: }
1.13 harris41 446: elsif ($buildmode==3) { # -b3
1.2 harris41 447: if ($md5sum eq 'same') {
448: exit(1);
449: }
450: elsif ($age<0) {
451: exit(2);
452: }
453: else {
454: exit(0);
455: }
456: }
1.13 harris41 457: elsif ($buildmode==4) { # -b4
1.7 harris41 458: if ($existence=~/no$/) {
459: exit(3);
460: }
461: elsif ($cvstime>0) {
1.2 harris41 462: exit(2);
1.7 harris41 463: }
464: elsif ($existence=~/^no/) {
465: exit(1);
1.2 harris41 466: }
467: else {
468: exit(0);
469: }
470: }
1.6 harris41 471: if ($showflag) {
472: print "$file";
473: if ($verbose==1) {
474: print "\t";
475: print &{$OUTPUT{'existence'}}($existence);
476: print "\t";
477: print &{$OUTPUT{'cvstime'}}($cvstime);
478: print "\t";
479: print &{$OUTPUT{'age'}}($age);
480: print "\t";
481: print &{$OUTPUT{'md5sum'}}($md5sum);
482: print "\t";
483: print &{$OUTPUT{'size'}}($size);
484: print "\t";
485: print &{$OUTPUT{'lines'}}($lines);
486: print "\t";
487: print &{$OUTPUT{'diffs'}}($diffs);
488: }
489: print "\n";
1.1 harris41 490: }
491: }
492:
1.5 harris41 493: # ----------------------------------------------------------------- Subroutines
494:
1.2 harris41 495: sub cvstime {
496: my ($f)=@_;
497: my $path; my $file;
498: if ($f=~/^(.*\/)(.*?)$/) {
499: $f=~/^(.*\/)(.*?)$/;
500: ($path,$file)=($1,$2);
501: }
502: else {
503: $file=$f; $path='';
504: }
505: my $cvstime;
506: if ($buildmode!=3) {
1.12 harris41 507: my $entry=`grep '^/$file/' ${path}CVS/Entries 2>/dev/null`;
1.11 harris41 508: # or
509: # die('*** WARNING *** cannot grep against '.${path}.
510: # 'CVS/Entries for ' .$file . "\n");
511: if ($entry) {
512: my @fields=split(/\//,$entry);
513: $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
514: chomp $cvstime;
515: }
516: else {
517: $cvstime='n/a';
518: }
1.2 harris41 519: }
520: else {
521: $cvstime='n/a';
522: }
523: return $cvstime;
524: }
1.1 harris41 525:
1.2 harris41 526: sub utctime {
527: my ($f)=@_;
528: my $utctime=`date -d '$f UTC' --utc +"%s"`;
529: chomp $utctime;
530: return $utctime;
531: }
1.1 harris41 532:
1.2 harris41 533: sub dowarn {
534: my ($msg)=@_;
535: warn($msg) unless $buildmode;
536: }
1.5 harris41 537:
538: # ----------------------------------- POD (plain old documentation, CPAN style)
1.4 harris41 539:
540: =head1 NAME
541:
542: filecompare.pl - script used to help probe and compare file statistics
543:
544: =head1 SYNOPSIS
545:
546: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
547:
548: or
549:
550: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
551:
552: Restrictions: a list of space separated values (after the file/dir names)
553: can restrict the comparison.
554: These values can be: existence, cvstime, age, md5sum, size, lines,
555: and/or diffs.
556:
557: Options (before file/dir names):
558:
559: -p show all files that have the same comparison
560:
561: -n show all files that have different comparisons
562:
563: -a show all files (with comparisons)
564:
565: -q only show file names (based on first file/dir)
566:
567: -v verbose mode (default)
568:
569: =head1 DESCRIPTION
570:
571: filecompare.pl can work in two modes: file comparison mode, or directory
572: comparison mode.
573:
574: Comparisons can be a function of:
575: * existence similarity
576: * cvs time similarity (first argument treated as CVS source)
577: * age similarity (modification time)
578: * md5sum similarity
579: * size similarity (bytes)
580: * line count difference
581: * number of different lines
582:
583: filecompare.pl integrates smoothly with the LPML installation language
584: (linux packaging markup language). filecompare.pl is a tool that can
585: be used for safe CVS source-to-target installations.
586:
587: =head1 README
588:
589: filecompare.pl integrates smoothly with the LPML installation language
590: (linux packaging markup language). filecompare.pl is a tool that can
591: be used for safe CVS source-to-target installations.
592:
593: The unique identifier is considered to be the file name(s) independent
594: of the directory path.
595:
596: =head1 PREREQUISITES
597:
598: =head1 COREQUISITES
599:
600: =head1 OSNAMES
601:
602: linux
603:
604: =head1 SCRIPT CATEGORIES
605:
606: Packaging/Administrative
607:
608: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>