Annotation of loncom/build/filecompare.pl, revision 1.9
1.1 harris41 1: #!/usr/bin/perl
2:
1.5 harris41 3: # The LearningOnline Network with CAPA
1.4 harris41 4: #
5: # filecompare.pl - script used to help probe and compare file statistics
6: #
1.1 harris41 7: # YEAR=2001
1.4 harris41 8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
9: # 11/14 Guy Albertelli
1.8 harris41 10: # 11/16,11/17 Scott Harrison
1.9 ! harris41 11: # 12/3,12/5 Scott Harrison
1.4 harris41 12: #
1.9 ! harris41 13: # $Id: filecompare.pl,v 1.8 2001/11/17 23:00:10 harris41 Exp $
1.4 harris41 14: ###
1.1 harris41 15:
1.5 harris41 16: ###############################################################################
17: ## ##
18: ## ORGANIZATION OF THIS PERL SCRIPT ##
19: ## ##
20: ## 1. Invocation ##
21: ## 2. Notes ##
22: ## 3. Dependencies ##
23: ## 4. Process command line arguments ##
24: ## 5. Process file/dir location arguments ##
25: ## 6. Process comparison restrictions ##
26: ## 7. Define output and measure subroutines ##
27: ## 8. Loop through files and calculate differences ##
28: ## 9. Subroutines ##
29: ## 10. POD (plain old documentation, CPAN style) ##
30: ## ##
31: ###############################################################################
32:
1.4 harris41 33: # ------------------------------------------------------------------ Invocation
1.1 harris41 34: my $invocation=<<END;
1.4 harris41 35: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
36: or
37: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
1.9 ! harris41 38: or
! 39: filecompare.pl [ options ... ] -s TARGET=[target] SOURCE=[source] MODE=[mode]
! 40: LOC1 LOC2
1.4 harris41 41:
42: Restrictions: a list of space separated values (after the file/dir names)
43: can restrict the comparison.
44: These values can be: existence, cvstime, age, md5sum, size, lines,
45: and/or diffs.
46:
47: Options (before file/dir names):
48: -p show all files that have the same comparison
49: -n show all files that have different comparisons
50: -a show all files (with comparisons)
51: -q only show file names (based on first file/dir)
52: -v verbose mode (default)
1.5 harris41 53: -bN buildmode (controls exit code of this script; 0 unless...)
1.6 harris41 54: N=1: md5sum=same --> 1; cvstime<0 --> 2
1.5 harris41 55: N=2: same as N=1 except without md5sum
56: N=3: md5sum=same --> 1; age<0 --> 2
57: N=4: cvstime>0 --> 2
1.9 ! harris41 58:
! 59: The third way to pass arguments is set by the -s flag.
! 60: filecompare.pl -s SOURCE=[source] TARGET=[target] MODE=[mode] LOC1 LOC2
! 61:
! 62: TARGET corresponds to the root path of LOC2. SOURCE corresponds to
! 63: the root path of LOC1. MODE can either be file, directory, link, or fileglob.
! 64:
1.1 harris41 65: END
66: unless (@ARGV) {
67: print $invocation;
68: exit 1;
69: }
1.5 harris41 70:
1.1 harris41 71: # ----------------------------------------------------------------------- Notes
72: #
73: # What are all the different ways to compare two files and how to look
74: # at the differences?
75: #
76: # Ways of comparison:
77: # existence similarity
1.6 harris41 78: # cvs time similarity (1st arg treated as CVS source; only for buildmode)
1.1 harris41 79: # age similarity (modification time)
80: # md5sum similarity
81: # size similarity (bytes)
82: # line count difference
83: # number of different lines
84: #
85: # Quantities of comparison:
86: # existence (no,yes); other values become 'n/a'
1.2 harris41 87: # cvstime in seconds
1.1 harris41 88: # age in seconds
89: # md5sum ("same" or "different")
90: # size similarity (byte difference)
91: # line count difference (integer)
92: # number of different lines (integer)
93:
1.5 harris41 94: # ---------------------------------------------------------------- Dependencies
1.1 harris41 95: # implementing from unix command line (assuming bash)
96: # md5sum, diff, wc -l
97:
98: # ---------------------------------------------- Process command line arguments
99: # Flags (before file/dir names):
100: # -p show all files the same
101: # -n show all files different
102: # -a show all files (with comparisons)
103: # -q only show file names (based on first file/dir)
104: # -v verbose mode (default)
1.5 harris41 105: # -bN build/install mode (returns exitcode)
1.9 ! harris41 106: # -s status checking mode for lpml
! 107:
1.1 harris41 108: my $verbose='1';
109: my $show='all';
1.2 harris41 110: my $buildmode=0;
1.9 ! harris41 111: my $statusmode=0;
1.6 harris41 112: ALOOP: while (@ARGV) {
1.1 harris41 113: my $flag;
114: if ($ARGV[0]=~/^\-(\w)/) {
115: $flag=$1;
1.5 harris41 116: if ($flag eq 'b') {
117: $ARGV[0]=~/^\-\w(\d)/;
118: $buildmode=$1;
1.6 harris41 119: shift @ARGV;
120: next ALOOP;
1.5 harris41 121: }
1.1 harris41 122: shift @ARGV;
123: SWITCH: {
124: $verbose=0, last SWITCH if $flag eq 'q';
125: $verbose=1, last SWITCH if $flag eq 'v';
126: $show='same', last SWITCH if $flag eq 'p';
127: $show='different', last SWITCH if $flag eq 'n';
128: $show='all', last SWITCH if $flag eq 'a';
1.9 ! harris41 129: $statusmode=1, last SWITCH if $flag eq 's';
1.1 harris41 130: print($invocation), exit(1);
131: }
132: }
133: else {
134: last;
135: }
136: }
1.2 harris41 137: dowarn('Verbose: '.$verbose."\n");
138: dowarn('Show: '.$show."\n");
1.1 harris41 139:
1.9 ! harris41 140: my @files;
! 141: my $loc1;
! 142: my $loc2;
! 143: # ----------------------------------------- If status checking mode for lpml
! 144: my ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob);
! 145: my ($source,$target);
! 146: if ($statusmode==1) {
! 147: ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob)=splice(@ARGV,0,5);
! 148: $targetroot.='/' if $targetroot!~/\/$/;
! 149: $sourceroot=~s/^SOURCE\=//;
! 150: $targetroot=~s/^TARGET\=//;
! 151: $source=$sourceroot.'/'.$sourceglob;
! 152: $target=$targetroot.''.$targetglob;
! 153: # print "SOURCE: $source\n";
! 154: # print "TARGET: $target\n";
! 155: if ($mode eq 'MODE=fileglob') {
! 156: @files=glob($source);
! 157: }
! 158: }
! 159: else {
! 160:
1.5 harris41 161: # ----------------------------------------- Process file/dir location arguments
1.1 harris41 162: # FILE1 FILE2 or DIR1 DIR2
1.9 ! harris41 163: $loc1=shift @ARGV;
! 164: $loc2=shift @ARGV;
1.1 harris41 165: my $dirmode='directories';
166: unless ($loc1 and $loc2) {
1.9 ! harris41 167: print "LOC1: $loc1\nLOC2: $loc2\n";
1.1 harris41 168: print($invocation), exit(1);
169: }
170: if (-f $loc1) {
171: $dirmode='files';
172: @files=($loc1);
173: }
174: else {
175: if (-e $loc1) {
176: @files=`find $loc1 -type f`;
177: }
178: else {
179: @files=($loc1);
180: }
181: map {chomp; s/^$loc1\///; $_} @files;
182: }
1.2 harris41 183: dowarn('Processing for mode: '.$dirmode."\n");
184: dowarn('Location #1: '.$loc1."\n");
185: dowarn('Location #2: '.$loc2."\n");
1.9 ! harris41 186: }
1.5 harris41 187: # --------------------------------------------- Process comparison restrictions
1.1 harris41 188: # A list of space separated values (after the file/dir names)
189: # can restrict the comparison.
1.5 harris41 190: my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0,
191: 'lines'=>0,'diffs'=>0);
1.1 harris41 192: my %restrict;
193: while (@ARGV) {
194: my $r=shift @ARGV;
1.5 harris41 195: if ($rhash{$r}==0) {$restrict{$r}=1;}
196: else {print($invocation), exit(1);}
1.1 harris41 197: }
198: if (%restrict) {
1.5 harris41 199: dowarn('Restricting comparison to: '.
1.1 harris41 200: join(' ',keys %restrict)."\n");
201: }
202:
1.5 harris41 203: # --------------------------------------- Define output and measure subroutines
1.1 harris41 204: my %OUTPUT=(
1.4 harris41 205: 'existence'=>( sub {print 'existence: '.@_[0]; return;}),
206: 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
207: 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
208: 'age'=>(sub {print 'age: '.@_[0];return;}),
209: 'size'=>(sub {print 'size: '.@_[0];return;}),
210: 'lines'=>(sub {print 'lines: '.@_[0];return;}),
211: 'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
1.1 harris41 212: );
213:
214: my %MEASURE=(
1.4 harris41 215: 'existence' => ( sub { my ($file1,$file2)=@_;
1.1 harris41 216: my $rv1=(-e $file1)?'yes':'no';
217: my $rv2=(-e $file2)?'yes':'no';
1.4 harris41 218: return ($rv1,$rv2); } ),
219: 'md5sum'=>( sub { my ($file1,$file2)=@_;
1.3 albertel 220: my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
221: my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
1.4 harris41 222: return ($rv1,$rv2); } ),
223: 'cvstime'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 224: my $rv1=&cvstime($file1);
225: my @a=stat($file2); my $gmt=gmtime($a[9]);
226: my $rv2=&utctime($gmt);
1.4 harris41 227: return ($rv1,$rv2); } ),
228: 'age'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 229: my @a=stat($file1); my $rv1=$a[9];
230: @a=stat($file2); my $rv2=$a[9];
1.4 harris41 231: return ($rv1,$rv2); } ),
232: 'size'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 233: my @a=stat($file1); my $rv1=$a[7];
234: @a=stat($file2); my $rv2=$a[7];
1.4 harris41 235: return ($rv1,$rv2); } ),
236: 'lines'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 237: my $rv1=`wc -l $file1`; chop $rv1;
238: my $rv2=`wc -l $file2`; chop $rv2;
1.4 harris41 239: return ($rv1,$rv2); } ),
240: 'diffs'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 241: my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
242: chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
243: my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
244: chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
1.4 harris41 245: return ($rv1,$rv2); } ),
1.1 harris41 246: );
247:
1.5 harris41 248: FLOOP: foreach my $file (@files) {
1.1 harris41 249: my $file1;
250: my $file2;
251: if ($dirmode eq 'directories') {
252: $file1=$loc1.'/'.$file;
253: $file2=$loc2.'/'.$file;
254: }
255: else {
256: $file1=$loc1;
257: $file2=$loc2;
258: }
259: my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
260: my $existence=$existence1.':'.$existence2;
1.2 harris41 261: my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
1.1 harris41 262: if ($existence1 eq 'no' or $existence2 eq 'no') {
263: $md5sum='n/a';
264: $age='n/a';
1.2 harris41 265: $cvstime='n/a';
1.1 harris41 266: $size='n/a';
267: $lines='n/a';
268: $diffs='n/a';
269: }
270: else {
1.6 harris41 271: if ($buildmode) {
272: my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
273: $cvstime=$cvstime1-$cvstime2;
274: }
275: else {
276: $cvstime='n/a';
277: }
1.1 harris41 278: my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
279: $age=$age1-$age2;
280: my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
1.3 albertel 281: if ($md5sum1 eq $md5sum2) {
1.1 harris41 282: $md5sum='same';
283: $size=0;
284: $lines=0;
1.6 harris41 285: $diffs='0:0';
1.1 harris41 286: }
1.3 albertel 287: elsif ($md5sum1 ne $md5sum2) {
1.1 harris41 288: $md5sum='different';
289: my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
290: $size=$size1-$size2;
291: my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
292: $lines=$lines1-$lines2;
293: my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
294: $diffs=$diffs1.':'.$diffs2;
295: }
296: }
297: my $showflag=0;
298: if ($show eq 'all') {
299: $showflag=1;
300: }
301: if ($show eq 'different') {
302: my @ks=(keys %restrict);
303: unless (@ks) {
1.2 harris41 304: @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
1.1 harris41 305: }
1.5 harris41 306: FLOOP2: for my $key (@ks) {
1.1 harris41 307: if ($key eq 'existence') {
308: if ($existence ne 'yes:yes') {
309: $showflag=1;
310: }
311: }
312: elsif ($key eq 'md5sum') {
313: if ($md5sum ne 'same') {
314: $showflag=1;
315: }
316: }
1.6 harris41 317: elsif ($key eq 'cvstime' and $buildmode) {
1.2 harris41 318: if ($cvstime!=0) {
319: $showflag=1;
320: }
321: }
1.1 harris41 322: elsif ($key eq 'age') {
323: if ($age!=0) {
324: $showflag=1;
325: }
326: }
327: elsif ($key eq 'size') {
328: if ($size!=0) {
329: $showflag=1;
330: }
331: }
332: elsif ($key eq 'lines') {
333: if ($lines!=0) {
334: $showflag=1;
335: }
336: }
337: elsif ($key eq 'diffs') {
338: if ($diffs ne '0:0') {
339: $showflag=1;
340: }
341: }
342: if ($showflag) {
1.5 harris41 343: last FLOOP2;
1.1 harris41 344: }
345: }
346: }
347: elsif ($show eq 'same') {
348: my @ks=(keys %restrict);
349: unless (@ks) {
1.2 harris41 350: @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
1.1 harris41 351: }
352: my $showcount=length(@ks);
1.6 harris41 353: $showcount-- unless $buildmode;
1.5 harris41 354: FLOOP3: for my $key (@ks) {
1.1 harris41 355: if ($key eq 'existence') {
356: if ($existence ne 'yes:yes') {
357: $showcount--;
358: }
359: }
360: elsif ($key eq 'md5sum') {
361: if ($md5sum ne 'same') {
362: $showcount--;
363: }
364: }
1.6 harris41 365: elsif ($key eq 'cvstime' and $buildmode) {
1.2 harris41 366: if ($cvstime!=0) {
367: $showcount--;
368: }
369: }
1.1 harris41 370: elsif ($key eq 'age') {
371: if ($age!=0) {
372: $showcount--;
373: }
374: }
375: elsif ($key eq 'size') {
376: if ($size!=0) {
377: $showcount--;
378: }
379: }
380: elsif ($key eq 'lines') {
381: if ($lines!=0) {
382: $showcount--;
383: }
384: }
385: elsif ($key eq 'diffs') {
386: if ($diffs ne '0:0') {
387: $showcount--;
388: }
389: }
390: }
391: if ($showcount==0) {
392: $showflag=1;
393: }
394: }
1.2 harris41 395: if ($buildmode==1) {
396: if ($md5sum eq 'same') {
397: exit(1);
398: }
399: elsif ($cvstime<0) {
400: exit(2);
401: }
402: else {
403: exit(0);
404: }
405: }
406: elsif ($buildmode==2) {
407: if ($cvstime<0) {
408: exit(2);
409: }
410: else {
411: exit(0);
412: }
413: }
414: elsif ($buildmode==3) {
415: if ($md5sum eq 'same') {
416: exit(1);
417: }
418: elsif ($age<0) {
419: exit(2);
420: }
421: else {
422: exit(0);
423: }
424: }
425: elsif ($buildmode==4) {
1.7 harris41 426: if ($existence=~/no$/) {
427: exit(3);
428: }
429: elsif ($cvstime>0) {
1.2 harris41 430: exit(2);
1.7 harris41 431: }
432: elsif ($existence=~/^no/) {
433: exit(1);
1.2 harris41 434: }
435: else {
436: exit(0);
437: }
438: }
1.6 harris41 439: if ($showflag) {
440: print "$file";
441: if ($verbose==1) {
442: print "\t";
443: print &{$OUTPUT{'existence'}}($existence);
444: print "\t";
445: print &{$OUTPUT{'cvstime'}}($cvstime);
446: print "\t";
447: print &{$OUTPUT{'age'}}($age);
448: print "\t";
449: print &{$OUTPUT{'md5sum'}}($md5sum);
450: print "\t";
451: print &{$OUTPUT{'size'}}($size);
452: print "\t";
453: print &{$OUTPUT{'lines'}}($lines);
454: print "\t";
455: print &{$OUTPUT{'diffs'}}($diffs);
456: }
457: print "\n";
1.1 harris41 458: }
459: }
460:
1.5 harris41 461: # ----------------------------------------------------------------- Subroutines
462:
1.2 harris41 463: sub cvstime {
464: my ($f)=@_;
465: my $path; my $file;
466: if ($f=~/^(.*\/)(.*?)$/) {
467: $f=~/^(.*\/)(.*?)$/;
468: ($path,$file)=($1,$2);
469: }
470: else {
471: $file=$f; $path='';
472: }
473: my $cvstime;
474: if ($buildmode!=3) {
475: my $entry=`grep '^/$file/' ${path}CVS/Entries` or
476: die('*** ERROR *** cannot grep against '.${path}.
477: 'CVS/Entries for ' .$file . "\n");
478: my @fields=split(/\//,$entry);
479: $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
480: chomp $cvstime;
481: }
482: else {
483: $cvstime='n/a';
484: }
485: return $cvstime;
486: }
1.1 harris41 487:
1.2 harris41 488: sub utctime {
489: my ($f)=@_;
490: my $utctime=`date -d '$f UTC' --utc +"%s"`;
491: chomp $utctime;
492: return $utctime;
493: }
1.1 harris41 494:
1.2 harris41 495: sub dowarn {
496: my ($msg)=@_;
497: warn($msg) unless $buildmode;
498: }
1.5 harris41 499:
500: # ----------------------------------- POD (plain old documentation, CPAN style)
1.4 harris41 501:
502: =head1 NAME
503:
504: filecompare.pl - script used to help probe and compare file statistics
505:
506: =head1 SYNOPSIS
507:
508: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
509:
510: or
511:
512: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
513:
514: Restrictions: a list of space separated values (after the file/dir names)
515: can restrict the comparison.
516: These values can be: existence, cvstime, age, md5sum, size, lines,
517: and/or diffs.
518:
519: Options (before file/dir names):
520:
521: -p show all files that have the same comparison
522:
523: -n show all files that have different comparisons
524:
525: -a show all files (with comparisons)
526:
527: -q only show file names (based on first file/dir)
528:
529: -v verbose mode (default)
530:
531: =head1 DESCRIPTION
532:
533: filecompare.pl can work in two modes: file comparison mode, or directory
534: comparison mode.
535:
536: Comparisons can be a function of:
537: * existence similarity
538: * cvs time similarity (first argument treated as CVS source)
539: * age similarity (modification time)
540: * md5sum similarity
541: * size similarity (bytes)
542: * line count difference
543: * number of different lines
544:
545: filecompare.pl integrates smoothly with the LPML installation language
546: (linux packaging markup language). filecompare.pl is a tool that can
547: be used for safe CVS source-to-target installations.
548:
549: =head1 README
550:
551: filecompare.pl integrates smoothly with the LPML installation language
552: (linux packaging markup language). filecompare.pl is a tool that can
553: be used for safe CVS source-to-target installations.
554:
555: The unique identifier is considered to be the file name(s) independent
556: of the directory path.
557:
558: =head1 PREREQUISITES
559:
560: =head1 COREQUISITES
561:
562: =head1 OSNAMES
563:
564: linux
565:
566: =head1 SCRIPT CATEGORIES
567:
568: Packaging/Administrative
569:
570: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>