Annotation of loncom/build/filecompare.pl, revision 1.4
1.1 harris41 1: #!/usr/bin/perl
2:
1.4 ! harris41 3: # The LearningOnline Network witih CAPA
! 4: #
! 5: # filecompare.pl - script used to help probe and compare file statistics
! 6: #
1.1 harris41 7: # YEAR=2001
1.4 ! harris41 8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
! 9: # 11/14 Guy Albertelli
! 10: # 11/16 Scott Harrison
! 11: #
! 12: # $Id: pwchange,v 1.3 2001/11/14 13:19:36 albertel Exp $
! 13: ###
1.1 harris41 14:
1.4 ! harris41 15: # ------------------------------------------------------------------ Invocation
1.1 harris41 16: my $invocation=<<END;
1.4 ! harris41 17: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
! 18: or
! 19: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
! 20:
! 21: Restrictions: a list of space separated values (after the file/dir names)
! 22: can restrict the comparison.
! 23: These values can be: existence, cvstime, age, md5sum, size, lines,
! 24: and/or diffs.
! 25:
! 26: Options (before file/dir names):
! 27: -p show all files that have the same comparison
! 28: -n show all files that have different comparisons
! 29: -a show all files (with comparisons)
! 30: -q only show file names (based on first file/dir)
! 31: -v verbose mode (default)
1.1 harris41 32: END
33: unless (@ARGV) {
34: print $invocation;
35: exit 1;
36: }
37: # ----------------------------------------------------------------------- Notes
38: #
39: # What are all the different ways to compare two files and how to look
40: # at the differences?
41: #
42: # Ways of comparison:
43: # existence similarity
1.2 harris41 44: # cvs time similarity (first argument treated as CVS source)
1.1 harris41 45: # age similarity (modification time)
46: # md5sum similarity
47: # size similarity (bytes)
48: # line count difference
49: # number of different lines
50: #
51: # Quantities of comparison:
52: # existence (no,yes); other values become 'n/a'
1.2 harris41 53: # cvstime in seconds
1.1 harris41 54: # age in seconds
55: # md5sum ("same" or "different")
56: # size similarity (byte difference)
57: # line count difference (integer)
58: # number of different lines (integer)
59: #
60: # Text output of comparison:
61: # existence VALUE
1.2 harris41 62: # cvstime VALUE
1.1 harris41 63: # age VALUE
64: # md5sum VALUE
65: # size VALUE
66: # lines VALUE
67: # diffs VALUE
68: #
69: # Output of comparison:
70: # exist
71: # if md5sum not same, then different
1.2 harris41 72: # if cvstime not 0, then older/newer
1.1 harris41 73: # if age not 0, then older/newer
74: # if size not 0, then bigger/smaller
75: # if lines not 0, then more lines of code/less lines of code
76: # if diffs not 0, then subtracted lines/added lines/changed lines
77:
78: # implementing from unix command line (assuming bash)
79: # md5sum, diff, wc -l
80:
81: # ---------------------------------------------- Process command line arguments
82: # Flags (before file/dir names):
83: # -p show all files the same
84: # -n show all files different
85: # -a show all files (with comparisons)
86: # -q only show file names (based on first file/dir)
87: # -v verbose mode (default)
1.2 harris41 88: # -b build/install mode (returns exitcode)
1.1 harris41 89: my $verbose='1';
90: my $show='all';
1.2 harris41 91: my $buildmode=0;
1.1 harris41 92: while (@ARGV) {
93: my $flag;
94: if ($ARGV[0]=~/^\-(\w)/) {
95: $flag=$1;
96: shift @ARGV;
97: SWITCH: {
98: $verbose=0, last SWITCH if $flag eq 'q';
99: $verbose=1, last SWITCH if $flag eq 'v';
100: $show='same', last SWITCH if $flag eq 'p';
101: $show='different', last SWITCH if $flag eq 'n';
102: $show='all', last SWITCH if $flag eq 'a';
1.2 harris41 103: $buildmode=1, last SWITCH if $flag eq 'b';
104: $buildmode=2, last SWITCH if $flag eq 'B';
105: $buildmode=3, last SWITCH if $flag eq 'g';
106: $buildmode=4, last SWITCH if $flag eq 'G';
1.1 harris41 107: print($invocation), exit(1);
108: }
109: }
110: else {
111: last;
112: }
113: }
1.2 harris41 114: dowarn('Verbose: '.$verbose."\n");
115: dowarn('Show: '.$show."\n");
1.1 harris41 116:
117: # FILE1 FILE2 or DIR1 DIR2
118: my $loc1=shift @ARGV;
119: my $loc2=shift @ARGV;
120: my $dirmode='directories';
121: my @files;
122: unless ($loc1 and $loc2) {
123: print($invocation), exit(1);
124: }
125: if (-f $loc1) {
126: $dirmode='files';
127: @files=($loc1);
128: }
129: else {
130: if (-e $loc1) {
131: @files=`find $loc1 -type f`;
132: }
133: else {
134: @files=($loc1);
135: }
136: map {chomp; s/^$loc1\///; $_} @files;
137: }
1.2 harris41 138: dowarn('Processing for mode: '.$dirmode."\n");
139: dowarn('Location #1: '.$loc1."\n");
140: dowarn('Location #2: '.$loc2."\n");
1.1 harris41 141:
142: # A list of space separated values (after the file/dir names)
143: # can restrict the comparison.
144: my %restrict;
145: while (@ARGV) {
146: my $r=shift @ARGV;
147: if ($r eq 'existence' or
1.2 harris41 148: $r eq 'cvstime' or
1.1 harris41 149: $r eq 'md5sum' or
150: $r eq 'age' or
151: $r eq 'size' or
152: $r eq 'lines' or
153: $r eq 'diffs') {
154: $restrict{$r}=1;
155: }
156: else {
157: print($invocation), exit(1);
158: }
159: }
160: if (%restrict) {
161: warn('Restricting comparison to: '.
162: join(' ',keys %restrict)."\n");
163: }
164:
165: my %OUTPUT=(
1.4 ! harris41 166: 'existence'=>( sub {print 'existence: '.@_[0]; return;}),
! 167: 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
! 168: 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
! 169: 'age'=>(sub {print 'age: '.@_[0];return;}),
! 170: 'size'=>(sub {print 'size: '.@_[0];return;}),
! 171: 'lines'=>(sub {print 'lines: '.@_[0];return;}),
! 172: 'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
1.1 harris41 173: );
174:
175: my %MEASURE=(
1.4 ! harris41 176: 'existence' => ( sub { my ($file1,$file2)=@_;
1.1 harris41 177: my $rv1=(-e $file1)?'yes':'no';
178: my $rv2=(-e $file2)?'yes':'no';
1.4 ! harris41 179: return ($rv1,$rv2); } ),
! 180: 'md5sum'=>( sub { my ($file1,$file2)=@_;
1.3 albertel 181: my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
182: my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
1.4 ! harris41 183: return ($rv1,$rv2); } ),
! 184: 'cvstime'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 185: my $rv1=&cvstime($file1);
186: my @a=stat($file2); my $gmt=gmtime($a[9]);
187: my $rv2=&utctime($gmt);
1.4 ! harris41 188: return ($rv1,$rv2); } ),
! 189: 'age'=>( sub { my ($file1,$file2)=@_;
1.2 harris41 190: my @a=stat($file1); my $rv1=$a[9];
191: @a=stat($file2); my $rv2=$a[9];
1.4 ! harris41 192: return ($rv1,$rv2); } ),
! 193: 'size'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 194: my @a=stat($file1); my $rv1=$a[7];
195: @a=stat($file2); my $rv2=$a[7];
1.4 ! harris41 196: return ($rv1,$rv2); } ),
! 197: 'lines'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 198: my $rv1=`wc -l $file1`; chop $rv1;
199: my $rv2=`wc -l $file2`; chop $rv2;
1.4 ! harris41 200: return ($rv1,$rv2); } ),
! 201: 'diffs'=>( sub { my ($file1,$file2)=@_;
1.1 harris41 202: my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
203: chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
204: my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
205: chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
1.4 ! harris41 206: return ($rv1,$rv2); } ),
1.1 harris41 207: );
208:
209: FLOP: foreach my $file (@files) {
210: my $file1;
211: my $file2;
212: if ($dirmode eq 'directories') {
213: $file1=$loc1.'/'.$file;
214: $file2=$loc2.'/'.$file;
215: }
216: else {
217: $file1=$loc1;
218: $file2=$loc2;
219: }
220: my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
221: my $existence=$existence1.':'.$existence2;
1.2 harris41 222: my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
1.1 harris41 223: if ($existence1 eq 'no' or $existence2 eq 'no') {
224: $md5sum='n/a';
225: $age='n/a';
1.2 harris41 226: $cvstime='n/a';
1.1 harris41 227: $size='n/a';
228: $lines='n/a';
229: $diffs='n/a';
230: }
231: else {
1.2 harris41 232: my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
233: $cvstime=$cvstime1-$cvstime2;
1.1 harris41 234: my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
235: $age=$age1-$age2;
236: my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
1.3 albertel 237: if ($md5sum1 eq $md5sum2) {
1.1 harris41 238: $md5sum='same';
239: $size=0;
240: $lines=0;
241: $diffs=0;
242: }
1.3 albertel 243: elsif ($md5sum1 ne $md5sum2) {
1.1 harris41 244: $md5sum='different';
245: my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
246: $size=$size1-$size2;
247: my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
248: $lines=$lines1-$lines2;
249: my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
250: $diffs=$diffs1.':'.$diffs2;
251: }
252: }
253: my $showflag=0;
254: if ($show eq 'all') {
255: $showflag=1;
256: }
257: if ($show eq 'different') {
258: my @ks=(keys %restrict);
259: unless (@ks) {
1.2 harris41 260: @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
1.1 harris41 261: }
262: FLOP2: for my $key (@ks) {
263: if ($key eq 'existence') {
264: if ($existence ne 'yes:yes') {
265: $showflag=1;
266: }
267: }
268: elsif ($key eq 'md5sum') {
269: if ($md5sum ne 'same') {
270: $showflag=1;
271: }
272: }
1.2 harris41 273: elsif ($key eq 'cvstime') {
274: if ($cvstime!=0) {
275: $showflag=1;
276: }
277: }
1.1 harris41 278: elsif ($key eq 'age') {
279: if ($age!=0) {
280: $showflag=1;
281: }
282: }
283: elsif ($key eq 'size') {
284: if ($size!=0) {
285: $showflag=1;
286: }
287: }
288: elsif ($key eq 'lines') {
289: if ($lines!=0) {
290: $showflag=1;
291: }
292: }
293: elsif ($key eq 'diffs') {
294: if ($diffs ne '0:0') {
295: $showflag=1;
296: }
297: }
298: if ($showflag) {
299: last FLOP2;
300: }
301: }
302: }
303: elsif ($show eq 'same') {
304: my @ks=(keys %restrict);
305: unless (@ks) {
1.2 harris41 306: @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
1.1 harris41 307: }
308: my $showcount=length(@ks);
309: FLOP3: for my $key (@ks) {
310: if ($key eq 'existence') {
311: if ($existence ne 'yes:yes') {
312: $showcount--;
313: }
314: }
315: elsif ($key eq 'md5sum') {
316: if ($md5sum ne 'same') {
317: $showcount--;
318: }
319: }
1.2 harris41 320: elsif ($key eq 'cvstime') {
321: if ($cvstime!=0) {
322: $showcount--;
323: }
324: }
1.1 harris41 325: elsif ($key eq 'age') {
326: if ($age!=0) {
327: $showcount--;
328: }
329: }
330: elsif ($key eq 'size') {
331: if ($size!=0) {
332: $showcount--;
333: }
334: }
335: elsif ($key eq 'lines') {
336: if ($lines!=0) {
337: $showcount--;
338: }
339: }
340: elsif ($key eq 'diffs') {
341: if ($diffs ne '0:0') {
342: $showcount--;
343: }
344: }
345: }
346: if ($showcount==0) {
347: $showflag=1;
348: }
349: }
1.2 harris41 350: if ($buildmode==1) {
351: if ($md5sum eq 'same') {
352: exit(1);
353: }
354: elsif ($cvstime<0) {
355: exit(2);
356: }
357: else {
358: exit(0);
359: }
360: }
361: elsif ($buildmode==2) {
362: if ($cvstime<0) {
363: exit(2);
364: }
365: else {
366: exit(0);
367: }
368: }
369: elsif ($buildmode==3) {
370: if ($md5sum eq 'same') {
371: exit(1);
372: }
373: elsif ($age<0) {
374: exit(2);
375: }
376: else {
377: exit(0);
378: }
379: }
380: elsif ($buildmode==4) {
381: if ($cvstime>0) {
382: exit(2);
383: }
384: else {
385: exit(0);
386: }
387: }
1.1 harris41 388: print "$file";
389: if ($verbose==1) {
390: print "\t";
391: print &{$OUTPUT{'existence'}}($existence);
392: print "\t";
1.2 harris41 393: print &{$OUTPUT{'cvstime'}}($cvstime);
394: print "\t";
1.1 harris41 395: print &{$OUTPUT{'age'}}($age);
396: print "\t";
397: print &{$OUTPUT{'md5sum'}}($md5sum);
398: print "\t";
399: print &{$OUTPUT{'size'}}($size);
400: print "\t";
401: print &{$OUTPUT{'lines'}}($lines);
402: print "\t";
403: print &{$OUTPUT{'diffs'}}($diffs);
404: }
405: print "\n";
406: }
407:
1.2 harris41 408: sub cvstime {
409: my ($f)=@_;
410: my $path; my $file;
411: if ($f=~/^(.*\/)(.*?)$/) {
412: $f=~/^(.*\/)(.*?)$/;
413: ($path,$file)=($1,$2);
414: }
415: else {
416: $file=$f; $path='';
417: }
418: my $cvstime;
419: if ($buildmode!=3) {
420: my $entry=`grep '^/$file/' ${path}CVS/Entries` or
421: die('*** ERROR *** cannot grep against '.${path}.
422: 'CVS/Entries for ' .$file . "\n");
423: my @fields=split(/\//,$entry);
424: $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
425: chomp $cvstime;
426: }
427: else {
428: $cvstime='n/a';
429: }
430: return $cvstime;
431: }
1.1 harris41 432:
1.2 harris41 433: sub utctime {
434: my ($f)=@_;
435: my $utctime=`date -d '$f UTC' --utc +"%s"`;
436: chomp $utctime;
437: return $utctime;
438: }
1.1 harris41 439:
1.2 harris41 440: sub dowarn {
441: my ($msg)=@_;
442: warn($msg) unless $buildmode;
443: }
1.4 ! harris41 444:
! 445: =head1 NAME
! 446:
! 447: filecompare.pl - script used to help probe and compare file statistics
! 448:
! 449: =head1 SYNOPSIS
! 450:
! 451: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
! 452:
! 453: or
! 454:
! 455: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
! 456:
! 457: Restrictions: a list of space separated values (after the file/dir names)
! 458: can restrict the comparison.
! 459: These values can be: existence, cvstime, age, md5sum, size, lines,
! 460: and/or diffs.
! 461:
! 462: Options (before file/dir names):
! 463:
! 464: -p show all files that have the same comparison
! 465:
! 466: -n show all files that have different comparisons
! 467:
! 468: -a show all files (with comparisons)
! 469:
! 470: -q only show file names (based on first file/dir)
! 471:
! 472: -v verbose mode (default)
! 473:
! 474: =head1 DESCRIPTION
! 475:
! 476: filecompare.pl can work in two modes: file comparison mode, or directory
! 477: comparison mode.
! 478:
! 479: Comparisons can be a function of:
! 480: * existence similarity
! 481: * cvs time similarity (first argument treated as CVS source)
! 482: * age similarity (modification time)
! 483: * md5sum similarity
! 484: * size similarity (bytes)
! 485: * line count difference
! 486: * number of different lines
! 487:
! 488: filecompare.pl integrates smoothly with the LPML installation language
! 489: (linux packaging markup language). filecompare.pl is a tool that can
! 490: be used for safe CVS source-to-target installations.
! 491:
! 492: =head1 README
! 493:
! 494: filecompare.pl integrates smoothly with the LPML installation language
! 495: (linux packaging markup language). filecompare.pl is a tool that can
! 496: be used for safe CVS source-to-target installations.
! 497:
! 498: The unique identifier is considered to be the file name(s) independent
! 499: of the directory path.
! 500:
! 501: =head1 PREREQUISITES
! 502:
! 503: =head1 COREQUISITES
! 504:
! 505: =head1 OSNAMES
! 506:
! 507: linux
! 508:
! 509: =head1 SCRIPT CATEGORIES
! 510:
! 511: Packaging/Administrative
! 512:
! 513: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>