Annotation of loncom/build/filecompare.pl, revision 1.8
1.1 harris41 1: #!/usr/bin/perl
2:
1.5 harris41 3: # The LearningOnline Network with CAPA
1.4 harris41 4: #
5: # filecompare.pl - script used to help probe and compare file statistics
6: #
1.1 harris41 7: # YEAR=2001
1.4 harris41 8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
9: # 11/14 Guy Albertelli
1.8 ! harris41 10: # 11/16,11/17 Scott Harrison
1.4 harris41 11: #
1.8 ! harris41 12: # $Id: filecompare.pl,v 1.7 2001/11/17 21:29:24 harris41 Exp $
1.4 harris41 13: ###
1.1 harris41 14:
1.5 harris41 15: ###############################################################################
16: ## ##
17: ## ORGANIZATION OF THIS PERL SCRIPT ##
18: ## ##
19: ## 1. Invocation ##
20: ## 2. Notes ##
21: ## 3. Dependencies ##
22: ## 4. Process command line arguments ##
23: ## 5. Process file/dir location arguments ##
24: ## 6. Process comparison restrictions ##
25: ## 7. Define output and measure subroutines ##
26: ## 8. Loop through files and calculate differences ##
27: ## 9. Subroutines ##
28: ## 10. POD (plain old documentation, CPAN style) ##
29: ## ##
30: ###############################################################################
31:
1.4 harris41 32: # ------------------------------------------------------------------ Invocation
1.1 harris41 33: my $invocation=<<END;
1.4 harris41 34: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
35: or
36: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
37:
38: Restrictions: a list of space separated values (after the file/dir names)
39: can restrict the comparison.
40: These values can be: existence, cvstime, age, md5sum, size, lines,
41: and/or diffs.
42:
43: Options (before file/dir names):
44: -p show all files that have the same comparison
45: -n show all files that have different comparisons
46: -a show all files (with comparisons)
47: -q only show file names (based on first file/dir)
48: -v verbose mode (default)
1.5 harris41 49: -bN buildmode (controls exit code of this script; 0 unless...)
1.6 harris41 50: N=1: md5sum=same --> 1; cvstime<0 --> 2
1.5 harris41 51: N=2: same as N=1 except without md5sum
52: N=3: md5sum=same --> 1; age<0 --> 2
53: N=4: cvstime>0 --> 2
1.1 harris41 54: END
55: unless (@ARGV) {
56: print $invocation;
57: exit 1;
58: }
1.5 harris41 59:
1.1 harris41 60: # ----------------------------------------------------------------------- Notes
61: #
62: # What are all the different ways to compare two files and how to look
63: # at the differences?
64: #
65: # Ways of comparison:
66: # existence similarity
1.6 harris41 67: # cvs time similarity (1st arg treated as CVS source; only for buildmode)
1.1 harris41 68: # age similarity (modification time)
69: # md5sum similarity
70: # size similarity (bytes)
71: # line count difference
72: # number of different lines
73: #
74: # Quantities of comparison:
75: # existence (no,yes); other values become 'n/a'
1.2 harris41 76: # cvstime in seconds
1.1 harris41 77: # age in seconds
78: # md5sum ("same" or "different")
79: # size similarity (byte difference)
80: # line count difference (integer)
81: # number of different lines (integer)
82:
1.5 harris41 83: # ---------------------------------------------------------------- Dependencies
1.1 harris41 84: # implementing from unix command line (assuming bash)
85: # md5sum, diff, wc -l
86:
87: # ---------------------------------------------- Process command line arguments
88: # Flags (before file/dir names):
89: # -p show all files the same
90: # -n show all files different
91: # -a show all files (with comparisons)
92: # -q only show file names (based on first file/dir)
93: # -v verbose mode (default)
1.5 harris41 94: # -bN build/install mode (returns exitcode)
1.1 harris41 95: my $verbose='1';
96: my $show='all';
1.2 harris41 97: my $buildmode=0;
1.6 harris41 98: ALOOP: while (@ARGV) {
1.1 harris41 99: my $flag;
100: if ($ARGV[0]=~/^\-(\w)/) {
101: $flag=$1;
1.5 harris41 102: if ($flag eq 'b') {
103: $ARGV[0]=~/^\-\w(\d)/;
104: $buildmode=$1;
1.6 harris41 105: shift @ARGV;
106: next ALOOP;
1.5 harris41 107: }
1.1 harris41 108: shift @ARGV;
109: SWITCH: {
110: $verbose=0, last SWITCH if $flag eq 'q';
111: $verbose=1, last SWITCH if $flag eq 'v';
112: $show='same', last SWITCH if $flag eq 'p';
113: $show='different', last SWITCH if $flag eq 'n';
114: $show='all', last SWITCH if $flag eq 'a';
115: print($invocation), exit(1);
116: }
117: }
118: else {
119: last;
120: }
121: }
1.2 harris41 122: dowarn('Verbose: '.$verbose."\n");
123: dowarn('Show: '.$show."\n");
1.1 harris41 124:
1.5 harris41 125: # ----------------------------------------- Process file/dir location arguments
1.1 harris41 126: # FILE1 FILE2 or DIR1 DIR2
127: my $loc1=shift @ARGV;
128: my $loc2=shift @ARGV;
129: my $dirmode='directories';
130: my @files;
131: unless ($loc1 and $loc2) {
132: print($invocation), exit(1);
133: }
134: if (-f $loc1) {
135: $dirmode='files';
136: @files=($loc1);
137: }
138: else {
139: if (-e $loc1) {
140: @files=`find $loc1 -type f`;
141: }
142: else {
143: @files=($loc1);
144: }
145: map {chomp; s/^$loc1\///; $_} @files;
146: }
1.2 harris41 147: dowarn('Processing for mode: '.$dirmode."\n");
148: dowarn('Location #1: '.$loc1."\n");
149: dowarn('Location #2: '.$loc2."\n");
1.1 harris41 150:
1.5 harris41 151: # --------------------------------------------- Process comparison restrictions
1.1 harris41 152: # A list of space separated values (after the file/dir names)
153: # can restrict the comparison.
1.5 harris41 154: my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0,
155: 'lines'=>0,'diffs'=>0);
1.1 harris41 156: my %restrict;
157: while (@ARGV) {
158: my $r=shift @ARGV;
1.5 harris41 159: if ($rhash{$r}==0) {$restrict{$r}=1;}
160: else {print($invocation), exit(1);}
1.1 harris41 161: }
162: if (%restrict) {
1.5 harris41 163: dowarn('Restricting comparison to: '.
1.1 harris41 164: join(' ',keys %restrict)."\n");
165: }
166:
1.5 harris41 167: # --------------------------------------- Define output and measure subroutines
1.1 harris41 168: my %OUTPUT=(
1.4 harris41 169: 'existence'=>( sub {print 'existence: '.@_[0]; return;}),
170: 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
171: 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
172: 'age'=>(sub {print 'age: '.@_[0];return;}),
173: 'size'=>(sub {print 'size: '.@_[0];return;}),
174: 'lines'=>(sub {print 'lines: '.@_[0];return;}),
175: 'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
1.1 harris41 176: );
177:
178: my %MEASURE=(
1.4 harris41 179: 'existence' => ( sub { my ($file1,$file2)=@_;
1.1 harris41 180: my $rv1=(-e $file1)?'yes':'no';
181: my $rv2=(-e $file2)?'yes':'no';
1.4 harris41 182: return ($rv1,$rv2); } ),
183: 'md5sum'=>( sub { my ($file1,$file2)=@_;
1.3 albertel 184: my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
185: my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
1.4 harris41 186: return ($rv1,$rv2); } ),
187: 'cvstime'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 188: my $rv1=&cvstime($file1);
189: my @a=stat($file2); my $gmt=gmtime($a[9]);
190: my $rv2=&utctime($gmt);
1.4 harris41 191: return ($rv1,$rv2); } ),
192: 'age'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 193: my @a=stat($file1); my $rv1=$a[9];
194: @a=stat($file2); my $rv2=$a[9];
1.4 harris41 195: return ($rv1,$rv2); } ),
196: 'size'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 197: my @a=stat($file1); my $rv1=$a[7];
198: @a=stat($file2); my $rv2=$a[7];
1.4 harris41 199: return ($rv1,$rv2); } ),
200: 'lines'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 201: my $rv1=`wc -l $file1`; chop $rv1;
202: my $rv2=`wc -l $file2`; chop $rv2;
1.4 harris41 203: return ($rv1,$rv2); } ),
204: 'diffs'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 205: my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
206: chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
207: my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
208: chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
1.4 harris41 209: return ($rv1,$rv2); } ),
1.1 harris41 210: );
211:
1.5 harris41 212: FLOOP: foreach my $file (@files) {
1.1 harris41 213: my $file1;
214: my $file2;
215: if ($dirmode eq 'directories') {
216: $file1=$loc1.'/'.$file;
217: $file2=$loc2.'/'.$file;
218: }
219: else {
220: $file1=$loc1;
221: $file2=$loc2;
222: }
223: my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
224: my $existence=$existence1.':'.$existence2;
1.2 harris41 225: my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
1.1 harris41 226: if ($existence1 eq 'no' or $existence2 eq 'no') {
227: $md5sum='n/a';
228: $age='n/a';
1.2 harris41 229: $cvstime='n/a';
1.1 harris41 230: $size='n/a';
231: $lines='n/a';
232: $diffs='n/a';
233: }
234: else {
1.6 harris41 235: if ($buildmode) {
236: my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
237: $cvstime=$cvstime1-$cvstime2;
238: }
239: else {
240: $cvstime='n/a';
241: }
1.1 harris41 242: my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
243: $age=$age1-$age2;
244: my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
1.3 albertel 245: if ($md5sum1 eq $md5sum2) {
1.1 harris41 246: $md5sum='same';
247: $size=0;
248: $lines=0;
1.6 harris41 249: $diffs='0:0';
1.1 harris41 250: }
1.3 albertel 251: elsif ($md5sum1 ne $md5sum2) {
1.1 harris41 252: $md5sum='different';
253: my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
254: $size=$size1-$size2;
255: my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
256: $lines=$lines1-$lines2;
257: my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
258: $diffs=$diffs1.':'.$diffs2;
259: }
260: }
261: my $showflag=0;
262: if ($show eq 'all') {
263: $showflag=1;
264: }
265: if ($show eq 'different') {
266: my @ks=(keys %restrict);
267: unless (@ks) {
1.2 harris41 268: @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
1.1 harris41 269: }
1.5 harris41 270: FLOOP2: for my $key (@ks) {
1.1 harris41 271: if ($key eq 'existence') {
272: if ($existence ne 'yes:yes') {
273: $showflag=1;
274: }
275: }
276: elsif ($key eq 'md5sum') {
277: if ($md5sum ne 'same') {
278: $showflag=1;
279: }
280: }
1.6 harris41 281: elsif ($key eq 'cvstime' and $buildmode) {
1.2 harris41 282: if ($cvstime!=0) {
283: $showflag=1;
284: }
285: }
1.1 harris41 286: elsif ($key eq 'age') {
287: if ($age!=0) {
288: $showflag=1;
289: }
290: }
291: elsif ($key eq 'size') {
292: if ($size!=0) {
293: $showflag=1;
294: }
295: }
296: elsif ($key eq 'lines') {
297: if ($lines!=0) {
298: $showflag=1;
299: }
300: }
301: elsif ($key eq 'diffs') {
302: if ($diffs ne '0:0') {
303: $showflag=1;
304: }
305: }
306: if ($showflag) {
1.5 harris41 307: last FLOOP2;
1.1 harris41 308: }
309: }
310: }
311: elsif ($show eq 'same') {
312: my @ks=(keys %restrict);
313: unless (@ks) {
1.2 harris41 314: @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
1.1 harris41 315: }
316: my $showcount=length(@ks);
1.6 harris41 317: $showcount-- unless $buildmode;
1.5 harris41 318: FLOOP3: for my $key (@ks) {
1.1 harris41 319: if ($key eq 'existence') {
320: if ($existence ne 'yes:yes') {
321: $showcount--;
322: }
323: }
324: elsif ($key eq 'md5sum') {
325: if ($md5sum ne 'same') {
326: $showcount--;
327: }
328: }
1.6 harris41 329: elsif ($key eq 'cvstime' and $buildmode) {
1.2 harris41 330: if ($cvstime!=0) {
331: $showcount--;
332: }
333: }
1.1 harris41 334: elsif ($key eq 'age') {
335: if ($age!=0) {
336: $showcount--;
337: }
338: }
339: elsif ($key eq 'size') {
340: if ($size!=0) {
341: $showcount--;
342: }
343: }
344: elsif ($key eq 'lines') {
345: if ($lines!=0) {
346: $showcount--;
347: }
348: }
349: elsif ($key eq 'diffs') {
350: if ($diffs ne '0:0') {
351: $showcount--;
352: }
353: }
354: }
355: if ($showcount==0) {
356: $showflag=1;
357: }
358: }
1.2 harris41 359: if ($buildmode==1) {
360: if ($md5sum eq 'same') {
361: exit(1);
362: }
363: elsif ($cvstime<0) {
364: exit(2);
365: }
366: else {
367: exit(0);
368: }
369: }
370: elsif ($buildmode==2) {
371: if ($cvstime<0) {
372: exit(2);
373: }
374: else {
375: exit(0);
376: }
377: }
378: elsif ($buildmode==3) {
379: if ($md5sum eq 'same') {
380: exit(1);
381: }
382: elsif ($age<0) {
383: exit(2);
384: }
385: else {
386: exit(0);
387: }
388: }
389: elsif ($buildmode==4) {
1.7 harris41 390: if ($existence=~/no$/) {
391: exit(3);
392: }
393: elsif ($cvstime>0) {
1.2 harris41 394: exit(2);
1.7 harris41 395: }
396: elsif ($existence=~/^no/) {
397: exit(1);
1.2 harris41 398: }
399: else {
400: exit(0);
401: }
402: }
1.6 harris41 403: if ($showflag) {
404: print "$file";
405: if ($verbose==1) {
406: print "\t";
407: print &{$OUTPUT{'existence'}}($existence);
408: print "\t";
409: print &{$OUTPUT{'cvstime'}}($cvstime);
410: print "\t";
411: print &{$OUTPUT{'age'}}($age);
412: print "\t";
413: print &{$OUTPUT{'md5sum'}}($md5sum);
414: print "\t";
415: print &{$OUTPUT{'size'}}($size);
416: print "\t";
417: print &{$OUTPUT{'lines'}}($lines);
418: print "\t";
419: print &{$OUTPUT{'diffs'}}($diffs);
420: }
421: print "\n";
1.1 harris41 422: }
423: }
424:
1.5 harris41 425: # ----------------------------------------------------------------- Subroutines
426:
1.2 harris41 427: sub cvstime {
428: my ($f)=@_;
429: my $path; my $file;
430: if ($f=~/^(.*\/)(.*?)$/) {
431: $f=~/^(.*\/)(.*?)$/;
432: ($path,$file)=($1,$2);
433: }
434: else {
435: $file=$f; $path='';
436: }
437: my $cvstime;
438: if ($buildmode!=3) {
439: my $entry=`grep '^/$file/' ${path}CVS/Entries` or
440: die('*** ERROR *** cannot grep against '.${path}.
441: 'CVS/Entries for ' .$file . "\n");
442: my @fields=split(/\//,$entry);
443: $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
444: chomp $cvstime;
445: }
446: else {
447: $cvstime='n/a';
448: }
449: return $cvstime;
450: }
1.1 harris41 451:
1.2 harris41 452: sub utctime {
453: my ($f)=@_;
454: my $utctime=`date -d '$f UTC' --utc +"%s"`;
455: chomp $utctime;
456: return $utctime;
457: }
1.1 harris41 458:
1.2 harris41 459: sub dowarn {
460: my ($msg)=@_;
461: warn($msg) unless $buildmode;
462: }
1.5 harris41 463:
464: # ----------------------------------- POD (plain old documentation, CPAN style)
1.4 harris41 465:
466: =head1 NAME
467:
468: filecompare.pl - script used to help probe and compare file statistics
469:
470: =head1 SYNOPSIS
471:
472: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
473:
474: or
475:
476: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
477:
478: Restrictions: a list of space separated values (after the file/dir names)
479: can restrict the comparison.
480: These values can be: existence, cvstime, age, md5sum, size, lines,
481: and/or diffs.
482:
483: Options (before file/dir names):
484:
485: -p show all files that have the same comparison
486:
487: -n show all files that have different comparisons
488:
489: -a show all files (with comparisons)
490:
491: -q only show file names (based on first file/dir)
492:
493: -v verbose mode (default)
494:
495: =head1 DESCRIPTION
496:
497: filecompare.pl can work in two modes: file comparison mode, or directory
498: comparison mode.
499:
500: Comparisons can be a function of:
501: * existence similarity
502: * cvs time similarity (first argument treated as CVS source)
503: * age similarity (modification time)
504: * md5sum similarity
505: * size similarity (bytes)
506: * line count difference
507: * number of different lines
508:
509: filecompare.pl integrates smoothly with the LPML installation language
510: (linux packaging markup language). filecompare.pl is a tool that can
511: be used for safe CVS source-to-target installations.
512:
513: =head1 README
514:
515: filecompare.pl integrates smoothly with the LPML installation language
516: (linux packaging markup language). filecompare.pl is a tool that can
517: be used for safe CVS source-to-target installations.
518:
519: The unique identifier is considered to be the file name(s) independent
520: of the directory path.
521:
522: =head1 PREREQUISITES
523:
524: =head1 COREQUISITES
525:
526: =head1 OSNAMES
527:
528: linux
529:
530: =head1 SCRIPT CATEGORIES
531:
532: Packaging/Administrative
533:
534: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>