Annotation of loncom/build/filecompare.pl, revision 1.5
1.1 harris41 1: #!/usr/bin/perl
2:
1.5 ! harris41 3: # The LearningOnline Network with CAPA
1.4 harris41 4: #
5: # filecompare.pl - script used to help probe and compare file statistics
6: #
1.1 harris41 7: # YEAR=2001
1.4 harris41 8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
9: # 11/14 Guy Albertelli
10: # 11/16 Scott Harrison
11: #
1.5 ! harris41 12: # $Id: filecompare.pl,v 1.4 2001/11/16 20:06:08 harris41 Exp $
1.4 harris41 13: ###
1.1 harris41 14:
1.5 ! harris41 15: ###############################################################################
! 16: ## ##
! 17: ## ORGANIZATION OF THIS PERL SCRIPT ##
! 18: ## ##
! 19: ## 1. Invocation ##
! 20: ## 2. Notes ##
! 21: ## 3. Dependencies ##
! 22: ## 4. Process command line arguments ##
! 23: ## 5. Process file/dir location arguments ##
! 24: ## 6. Process comparison restrictions ##
! 25: ## 7. Define output and measure subroutines ##
! 26: ## 8. Loop through files and calculate differences ##
! 27: ## 9. Subroutines ##
! 28: ## 10. POD (plain old documentation, CPAN style) ##
! 29: ## ##
! 30: ###############################################################################
! 31:
1.4 harris41 32: # ------------------------------------------------------------------ Invocation
1.1 harris41 33: my $invocation=<<END;
1.4 harris41 34: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
35: or
36: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
37:
38: Restrictions: a list of space separated values (after the file/dir names)
39: can restrict the comparison.
40: These values can be: existence, cvstime, age, md5sum, size, lines,
41: and/or diffs.
42:
43: Options (before file/dir names):
44: -p show all files that have the same comparison
45: -n show all files that have different comparisons
46: -a show all files (with comparisons)
47: -q only show file names (based on first file/dir)
48: -v verbose mode (default)
1.5 ! harris41 49: -bN buildmode (controls exit code of this script; 0 unless...)
! 50: N=1: md5sum=same --> 2; cvstime<0 --> 1
! 51: N=2: same as N=1 except without md5sum
! 52: N=3: md5sum=same --> 1; age<0 --> 2
! 53: N=4: cvstime>0 --> 2
1.1 harris41 54: END
55: unless (@ARGV) {
56: print $invocation;
57: exit 1;
58: }
1.5 ! harris41 59:
1.1 harris41 60: # ----------------------------------------------------------------------- Notes
61: #
62: # What are all the different ways to compare two files and how to look
63: # at the differences?
64: #
65: # Ways of comparison:
66: # existence similarity
1.2 harris41 67: # cvs time similarity (first argument treated as CVS source)
1.1 harris41 68: # age similarity (modification time)
69: # md5sum similarity
70: # size similarity (bytes)
71: # line count difference
72: # number of different lines
73: #
74: # Quantities of comparison:
75: # existence (no,yes); other values become 'n/a'
1.2 harris41 76: # cvstime in seconds
1.1 harris41 77: # age in seconds
78: # md5sum ("same" or "different")
79: # size similarity (byte difference)
80: # line count difference (integer)
81: # number of different lines (integer)
82:
1.5 ! harris41 83: # ---------------------------------------------------------------- Dependencies
1.1 harris41 84: # implementing from unix command line (assuming bash)
85: # md5sum, diff, wc -l
86:
87: # ---------------------------------------------- Process command line arguments
88: # Flags (before file/dir names):
89: # -p show all files the same
90: # -n show all files different
91: # -a show all files (with comparisons)
92: # -q only show file names (based on first file/dir)
93: # -v verbose mode (default)
1.5 ! harris41 94: # -bN build/install mode (returns exitcode)
1.1 harris41 95: my $verbose='1';
96: my $show='all';
1.2 harris41 97: my $buildmode=0;
1.1 harris41 98: while (@ARGV) {
99: my $flag;
100: if ($ARGV[0]=~/^\-(\w)/) {
101: $flag=$1;
1.5 ! harris41 102: if ($flag eq 'b') {
! 103: $ARGV[0]=~/^\-\w(\d)/;
! 104: $buildmode=$1;
! 105: }
1.1 harris41 106: shift @ARGV;
107: SWITCH: {
108: $verbose=0, last SWITCH if $flag eq 'q';
109: $verbose=1, last SWITCH if $flag eq 'v';
110: $show='same', last SWITCH if $flag eq 'p';
111: $show='different', last SWITCH if $flag eq 'n';
112: $show='all', last SWITCH if $flag eq 'a';
113: print($invocation), exit(1);
114: }
115: }
116: else {
117: last;
118: }
119: }
1.2 harris41 120: dowarn('Verbose: '.$verbose."\n");
121: dowarn('Show: '.$show."\n");
1.1 harris41 122:
1.5 ! harris41 123: # ----------------------------------------- Process file/dir location arguments
1.1 harris41 124: # FILE1 FILE2 or DIR1 DIR2
125: my $loc1=shift @ARGV;
126: my $loc2=shift @ARGV;
127: my $dirmode='directories';
128: my @files;
129: unless ($loc1 and $loc2) {
130: print($invocation), exit(1);
131: }
132: if (-f $loc1) {
133: $dirmode='files';
134: @files=($loc1);
135: }
136: else {
137: if (-e $loc1) {
138: @files=`find $loc1 -type f`;
139: }
140: else {
141: @files=($loc1);
142: }
143: map {chomp; s/^$loc1\///; $_} @files;
144: }
1.2 harris41 145: dowarn('Processing for mode: '.$dirmode."\n");
146: dowarn('Location #1: '.$loc1."\n");
147: dowarn('Location #2: '.$loc2."\n");
1.1 harris41 148:
1.5 ! harris41 149: # --------------------------------------------- Process comparison restrictions
1.1 harris41 150: # A list of space separated values (after the file/dir names)
151: # can restrict the comparison.
1.5 ! harris41 152: my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0,
! 153: 'lines'=>0,'diffs'=>0);
1.1 harris41 154: my %restrict;
155: while (@ARGV) {
156: my $r=shift @ARGV;
1.5 ! harris41 157: if ($rhash{$r}==0) {$restrict{$r}=1;}
! 158: else {print($invocation), exit(1);}
1.1 harris41 159: }
160: if (%restrict) {
1.5 ! harris41 161: dowarn('Restricting comparison to: '.
1.1 harris41 162: join(' ',keys %restrict)."\n");
163: }
164:
1.5 ! harris41 165: # --------------------------------------- Define output and measure subroutines
1.1 harris41 166: my %OUTPUT=(
1.4 harris41 167: 'existence'=>( sub {print 'existence: '.@_[0]; return;}),
168: 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
169: 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
170: 'age'=>(sub {print 'age: '.@_[0];return;}),
171: 'size'=>(sub {print 'size: '.@_[0];return;}),
172: 'lines'=>(sub {print 'lines: '.@_[0];return;}),
173: 'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
1.1 harris41 174: );
175:
176: my %MEASURE=(
1.4 harris41 177: 'existence' => ( sub { my ($file1,$file2)=@_;
1.1 harris41 178: my $rv1=(-e $file1)?'yes':'no';
179: my $rv2=(-e $file2)?'yes':'no';
1.4 harris41 180: return ($rv1,$rv2); } ),
181: 'md5sum'=>( sub { my ($file1,$file2)=@_;
1.3 albertel 182: my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
183: my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
1.4 harris41 184: return ($rv1,$rv2); } ),
185: 'cvstime'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 186: my $rv1=&cvstime($file1);
187: my @a=stat($file2); my $gmt=gmtime($a[9]);
188: my $rv2=&utctime($gmt);
1.4 harris41 189: return ($rv1,$rv2); } ),
190: 'age'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 191: my @a=stat($file1); my $rv1=$a[9];
192: @a=stat($file2); my $rv2=$a[9];
1.4 harris41 193: return ($rv1,$rv2); } ),
194: 'size'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 195: my @a=stat($file1); my $rv1=$a[7];
196: @a=stat($file2); my $rv2=$a[7];
1.4 harris41 197: return ($rv1,$rv2); } ),
198: 'lines'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 199: my $rv1=`wc -l $file1`; chop $rv1;
200: my $rv2=`wc -l $file2`; chop $rv2;
1.4 harris41 201: return ($rv1,$rv2); } ),
202: 'diffs'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 203: my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
204: chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
205: my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
206: chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
1.4 harris41 207: return ($rv1,$rv2); } ),
1.1 harris41 208: );
209:
1.5 ! harris41 210: FLOOP: foreach my $file (@files) {
1.1 harris41 211: my $file1;
212: my $file2;
213: if ($dirmode eq 'directories') {
214: $file1=$loc1.'/'.$file;
215: $file2=$loc2.'/'.$file;
216: }
217: else {
218: $file1=$loc1;
219: $file2=$loc2;
220: }
221: my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
222: my $existence=$existence1.':'.$existence2;
1.2 harris41 223: my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
1.1 harris41 224: if ($existence1 eq 'no' or $existence2 eq 'no') {
225: $md5sum='n/a';
226: $age='n/a';
1.2 harris41 227: $cvstime='n/a';
1.1 harris41 228: $size='n/a';
229: $lines='n/a';
230: $diffs='n/a';
231: }
232: else {
1.2 harris41 233: my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
234: $cvstime=$cvstime1-$cvstime2;
1.1 harris41 235: my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
236: $age=$age1-$age2;
237: my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
1.3 albertel 238: if ($md5sum1 eq $md5sum2) {
1.1 harris41 239: $md5sum='same';
240: $size=0;
241: $lines=0;
242: $diffs=0;
243: }
1.3 albertel 244: elsif ($md5sum1 ne $md5sum2) {
1.1 harris41 245: $md5sum='different';
246: my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
247: $size=$size1-$size2;
248: my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
249: $lines=$lines1-$lines2;
250: my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
251: $diffs=$diffs1.':'.$diffs2;
252: }
253: }
254: my $showflag=0;
255: if ($show eq 'all') {
256: $showflag=1;
257: }
258: if ($show eq 'different') {
259: my @ks=(keys %restrict);
260: unless (@ks) {
1.2 harris41 261: @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
1.1 harris41 262: }
1.5 ! harris41 263: FLOOP2: for my $key (@ks) {
1.1 harris41 264: if ($key eq 'existence') {
265: if ($existence ne 'yes:yes') {
266: $showflag=1;
267: }
268: }
269: elsif ($key eq 'md5sum') {
270: if ($md5sum ne 'same') {
271: $showflag=1;
272: }
273: }
1.2 harris41 274: elsif ($key eq 'cvstime') {
275: if ($cvstime!=0) {
276: $showflag=1;
277: }
278: }
1.1 harris41 279: elsif ($key eq 'age') {
280: if ($age!=0) {
281: $showflag=1;
282: }
283: }
284: elsif ($key eq 'size') {
285: if ($size!=0) {
286: $showflag=1;
287: }
288: }
289: elsif ($key eq 'lines') {
290: if ($lines!=0) {
291: $showflag=1;
292: }
293: }
294: elsif ($key eq 'diffs') {
295: if ($diffs ne '0:0') {
296: $showflag=1;
297: }
298: }
299: if ($showflag) {
1.5 ! harris41 300: last FLOOP2;
1.1 harris41 301: }
302: }
303: }
304: elsif ($show eq 'same') {
305: my @ks=(keys %restrict);
306: unless (@ks) {
1.2 harris41 307: @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
1.1 harris41 308: }
309: my $showcount=length(@ks);
1.5 ! harris41 310: FLOOP3: for my $key (@ks) {
1.1 harris41 311: if ($key eq 'existence') {
312: if ($existence ne 'yes:yes') {
313: $showcount--;
314: }
315: }
316: elsif ($key eq 'md5sum') {
317: if ($md5sum ne 'same') {
318: $showcount--;
319: }
320: }
1.2 harris41 321: elsif ($key eq 'cvstime') {
322: if ($cvstime!=0) {
323: $showcount--;
324: }
325: }
1.1 harris41 326: elsif ($key eq 'age') {
327: if ($age!=0) {
328: $showcount--;
329: }
330: }
331: elsif ($key eq 'size') {
332: if ($size!=0) {
333: $showcount--;
334: }
335: }
336: elsif ($key eq 'lines') {
337: if ($lines!=0) {
338: $showcount--;
339: }
340: }
341: elsif ($key eq 'diffs') {
342: if ($diffs ne '0:0') {
343: $showcount--;
344: }
345: }
346: }
347: if ($showcount==0) {
348: $showflag=1;
349: }
350: }
1.2 harris41 351: if ($buildmode==1) {
352: if ($md5sum eq 'same') {
353: exit(1);
354: }
355: elsif ($cvstime<0) {
356: exit(2);
357: }
358: else {
359: exit(0);
360: }
361: }
362: elsif ($buildmode==2) {
363: if ($cvstime<0) {
364: exit(2);
365: }
366: else {
367: exit(0);
368: }
369: }
370: elsif ($buildmode==3) {
371: if ($md5sum eq 'same') {
372: exit(1);
373: }
374: elsif ($age<0) {
375: exit(2);
376: }
377: else {
378: exit(0);
379: }
380: }
381: elsif ($buildmode==4) {
382: if ($cvstime>0) {
383: exit(2);
384: }
385: else {
386: exit(0);
387: }
388: }
1.1 harris41 389: print "$file";
390: if ($verbose==1) {
391: print "\t";
392: print &{$OUTPUT{'existence'}}($existence);
393: print "\t";
1.2 harris41 394: print &{$OUTPUT{'cvstime'}}($cvstime);
395: print "\t";
1.1 harris41 396: print &{$OUTPUT{'age'}}($age);
397: print "\t";
398: print &{$OUTPUT{'md5sum'}}($md5sum);
1.5 ! harris41 399: print "\t";
1.1 harris41 400: print &{$OUTPUT{'size'}}($size);
401: print "\t";
402: print &{$OUTPUT{'lines'}}($lines);
403: print "\t";
404: print &{$OUTPUT{'diffs'}}($diffs);
405: }
406: print "\n";
407: }
408:
1.5 ! harris41 409: # ----------------------------------------------------------------- Subroutines
! 410:
1.2 harris41 411: sub cvstime {
412: my ($f)=@_;
413: my $path; my $file;
414: if ($f=~/^(.*\/)(.*?)$/) {
415: $f=~/^(.*\/)(.*?)$/;
416: ($path,$file)=($1,$2);
417: }
418: else {
419: $file=$f; $path='';
420: }
421: my $cvstime;
422: if ($buildmode!=3) {
423: my $entry=`grep '^/$file/' ${path}CVS/Entries` or
424: die('*** ERROR *** cannot grep against '.${path}.
425: 'CVS/Entries for ' .$file . "\n");
426: my @fields=split(/\//,$entry);
427: $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
428: chomp $cvstime;
429: }
430: else {
431: $cvstime='n/a';
432: }
433: return $cvstime;
434: }
1.1 harris41 435:
1.2 harris41 436: sub utctime {
437: my ($f)=@_;
438: my $utctime=`date -d '$f UTC' --utc +"%s"`;
439: chomp $utctime;
440: return $utctime;
441: }
1.1 harris41 442:
1.2 harris41 443: sub dowarn {
444: my ($msg)=@_;
445: warn($msg) unless $buildmode;
446: }
1.5 ! harris41 447:
! 448: # ----------------------------------- POD (plain old documentation, CPAN style)
1.4 harris41 449:
450: =head1 NAME
451:
452: filecompare.pl - script used to help probe and compare file statistics
453:
454: =head1 SYNOPSIS
455:
456: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
457:
458: or
459:
460: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
461:
462: Restrictions: a list of space separated values (after the file/dir names)
463: can restrict the comparison.
464: These values can be: existence, cvstime, age, md5sum, size, lines,
465: and/or diffs.
466:
467: Options (before file/dir names):
468:
469: -p show all files that have the same comparison
470:
471: -n show all files that have different comparisons
472:
473: -a show all files (with comparisons)
474:
475: -q only show file names (based on first file/dir)
476:
477: -v verbose mode (default)
478:
479: =head1 DESCRIPTION
480:
481: filecompare.pl can work in two modes: file comparison mode, or directory
482: comparison mode.
483:
484: Comparisons can be a function of:
485: * existence similarity
486: * cvs time similarity (first argument treated as CVS source)
487: * age similarity (modification time)
488: * md5sum similarity
489: * size similarity (bytes)
490: * line count difference
491: * number of different lines
492:
493: filecompare.pl integrates smoothly with the LPML installation language
494: (linux packaging markup language). filecompare.pl is a tool that can
495: be used for safe CVS source-to-target installations.
496:
497: =head1 README
498:
499: filecompare.pl integrates smoothly with the LPML installation language
500: (linux packaging markup language). filecompare.pl is a tool that can
501: be used for safe CVS source-to-target installations.
502:
503: The unique identifier is considered to be the file name(s) independent
504: of the directory path.
505:
506: =head1 PREREQUISITES
507:
508: =head1 COREQUISITES
509:
510: =head1 OSNAMES
511:
512: linux
513:
514: =head1 SCRIPT CATEGORIES
515:
516: Packaging/Administrative
517:
518: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>