1: #!/usr/bin/perl
2:
3: # The LearningOnline Network witih CAPA
4: #
5: # filecompare.pl - script used to help probe and compare file statistics
6: #
7: # YEAR=2001
8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
9: # 11/14 Guy Albertelli
10: # 11/16 Scott Harrison
11: #
12: # $Id: filecompare.pl,v 1.4 2001/11/16 20:06:08 harris41 Exp $
13: ###
14:
15: # ------------------------------------------------------------------ Invocation
16: my $invocation=<<END;
17: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
18: or
19: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
20:
21: Restrictions: a list of space separated values (after the file/dir names)
22: can restrict the comparison.
23: These values can be: existence, cvstime, age, md5sum, size, lines,
24: and/or diffs.
25:
26: Options (before file/dir names):
27: -p show all files that have the same comparison
28: -n show all files that have different comparisons
29: -a show all files (with comparisons)
30: -q only show file names (based on first file/dir)
31: -v verbose mode (default)
32: END
33: unless (@ARGV) {
34: print $invocation;
35: exit 1;
36: }
37: # ----------------------------------------------------------------------- Notes
38: #
39: # What are all the different ways to compare two files and how to look
40: # at the differences?
41: #
42: # Ways of comparison:
43: # existence similarity
44: # cvs time similarity (first argument treated as CVS source)
45: # age similarity (modification time)
46: # md5sum similarity
47: # size similarity (bytes)
48: # line count difference
49: # number of different lines
50: #
51: # Quantities of comparison:
52: # existence (no,yes); other values become 'n/a'
53: # cvstime in seconds
54: # age in seconds
55: # md5sum ("same" or "different")
56: # size similarity (byte difference)
57: # line count difference (integer)
58: # number of different lines (integer)
59: #
60: # Text output of comparison:
61: # existence VALUE
62: # cvstime VALUE
63: # age VALUE
64: # md5sum VALUE
65: # size VALUE
66: # lines VALUE
67: # diffs VALUE
68: #
69: # Output of comparison:
70: # exist
71: # if md5sum not same, then different
72: # if cvstime not 0, then older/newer
73: # if age not 0, then older/newer
74: # if size not 0, then bigger/smaller
75: # if lines not 0, then more lines of code/less lines of code
76: # if diffs not 0, then subtracted lines/added lines/changed lines
77:
78: # implementing from unix command line (assuming bash)
79: # md5sum, diff, wc -l
80:
81: # ---------------------------------------------- Process command line arguments
82: # Flags (before file/dir names):
83: # -p show all files the same
84: # -n show all files different
85: # -a show all files (with comparisons)
86: # -q only show file names (based on first file/dir)
87: # -v verbose mode (default)
88: # -b build/install mode (returns exitcode)
89: my $verbose='1';
90: my $show='all';
91: my $buildmode=0;
92: while (@ARGV) {
93: my $flag;
94: if ($ARGV[0]=~/^\-(\w)/) {
95: $flag=$1;
96: shift @ARGV;
97: SWITCH: {
98: $verbose=0, last SWITCH if $flag eq 'q';
99: $verbose=1, last SWITCH if $flag eq 'v';
100: $show='same', last SWITCH if $flag eq 'p';
101: $show='different', last SWITCH if $flag eq 'n';
102: $show='all', last SWITCH if $flag eq 'a';
103: $buildmode=1, last SWITCH if $flag eq 'b';
104: $buildmode=2, last SWITCH if $flag eq 'B';
105: $buildmode=3, last SWITCH if $flag eq 'g';
106: $buildmode=4, last SWITCH if $flag eq 'G';
107: print($invocation), exit(1);
108: }
109: }
110: else {
111: last;
112: }
113: }
114: dowarn('Verbose: '.$verbose."\n");
115: dowarn('Show: '.$show."\n");
116:
117: # FILE1 FILE2 or DIR1 DIR2
118: my $loc1=shift @ARGV;
119: my $loc2=shift @ARGV;
120: my $dirmode='directories';
121: my @files;
122: unless ($loc1 and $loc2) {
123: print($invocation), exit(1);
124: }
125: if (-f $loc1) {
126: $dirmode='files';
127: @files=($loc1);
128: }
129: else {
130: if (-e $loc1) {
131: @files=`find $loc1 -type f`;
132: }
133: else {
134: @files=($loc1);
135: }
136: map {chomp; s/^$loc1\///; $_} @files;
137: }
138: dowarn('Processing for mode: '.$dirmode."\n");
139: dowarn('Location #1: '.$loc1."\n");
140: dowarn('Location #2: '.$loc2."\n");
141:
142: # A list of space separated values (after the file/dir names)
143: # can restrict the comparison.
144: my %restrict;
145: while (@ARGV) {
146: my $r=shift @ARGV;
147: if ($r eq 'existence' or
148: $r eq 'cvstime' or
149: $r eq 'md5sum' or
150: $r eq 'age' or
151: $r eq 'size' or
152: $r eq 'lines' or
153: $r eq 'diffs') {
154: $restrict{$r}=1;
155: }
156: else {
157: print($invocation), exit(1);
158: }
159: }
160: if (%restrict) {
161: warn('Restricting comparison to: '.
162: join(' ',keys %restrict)."\n");
163: }
164:
165: my %OUTPUT=(
166: 'existence'=>( sub {print 'existence: '.@_[0]; return;}),
167: 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
168: 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
169: 'age'=>(sub {print 'age: '.@_[0];return;}),
170: 'size'=>(sub {print 'size: '.@_[0];return;}),
171: 'lines'=>(sub {print 'lines: '.@_[0];return;}),
172: 'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
173: );
174:
175: my %MEASURE=(
176: 'existence' => ( sub { my ($file1,$file2)=@_;
177: my $rv1=(-e $file1)?'yes':'no';
178: my $rv2=(-e $file2)?'yes':'no';
179: return ($rv1,$rv2); } ),
180: 'md5sum'=>( sub { my ($file1,$file2)=@_;
181: my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
182: my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
183: return ($rv1,$rv2); } ),
184: 'cvstime'=>( sub { my ($file1,$file2)=@_;
185: my $rv1=&cvstime($file1);
186: my @a=stat($file2); my $gmt=gmtime($a[9]);
187: my $rv2=&utctime($gmt);
188: return ($rv1,$rv2); } ),
189: 'age'=>( sub { my ($file1,$file2)=@_;
190: my @a=stat($file1); my $rv1=$a[9];
191: @a=stat($file2); my $rv2=$a[9];
192: return ($rv1,$rv2); } ),
193: 'size'=>( sub { my ($file1,$file2)=@_;
194: my @a=stat($file1); my $rv1=$a[7];
195: @a=stat($file2); my $rv2=$a[7];
196: return ($rv1,$rv2); } ),
197: 'lines'=>( sub { my ($file1,$file2)=@_;
198: my $rv1=`wc -l $file1`; chop $rv1;
199: my $rv2=`wc -l $file2`; chop $rv2;
200: return ($rv1,$rv2); } ),
201: 'diffs'=>( sub { my ($file1,$file2)=@_;
202: my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
203: chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
204: my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
205: chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
206: return ($rv1,$rv2); } ),
207: );
208:
209: FLOP: foreach my $file (@files) {
210: my $file1;
211: my $file2;
212: if ($dirmode eq 'directories') {
213: $file1=$loc1.'/'.$file;
214: $file2=$loc2.'/'.$file;
215: }
216: else {
217: $file1=$loc1;
218: $file2=$loc2;
219: }
220: my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
221: my $existence=$existence1.':'.$existence2;
222: my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
223: if ($existence1 eq 'no' or $existence2 eq 'no') {
224: $md5sum='n/a';
225: $age='n/a';
226: $cvstime='n/a';
227: $size='n/a';
228: $lines='n/a';
229: $diffs='n/a';
230: }
231: else {
232: my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
233: $cvstime=$cvstime1-$cvstime2;
234: my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
235: $age=$age1-$age2;
236: my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
237: if ($md5sum1 eq $md5sum2) {
238: $md5sum='same';
239: $size=0;
240: $lines=0;
241: $diffs=0;
242: }
243: elsif ($md5sum1 ne $md5sum2) {
244: $md5sum='different';
245: my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
246: $size=$size1-$size2;
247: my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
248: $lines=$lines1-$lines2;
249: my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
250: $diffs=$diffs1.':'.$diffs2;
251: }
252: }
253: my $showflag=0;
254: if ($show eq 'all') {
255: $showflag=1;
256: }
257: if ($show eq 'different') {
258: my @ks=(keys %restrict);
259: unless (@ks) {
260: @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
261: }
262: FLOP2: for my $key (@ks) {
263: if ($key eq 'existence') {
264: if ($existence ne 'yes:yes') {
265: $showflag=1;
266: }
267: }
268: elsif ($key eq 'md5sum') {
269: if ($md5sum ne 'same') {
270: $showflag=1;
271: }
272: }
273: elsif ($key eq 'cvstime') {
274: if ($cvstime!=0) {
275: $showflag=1;
276: }
277: }
278: elsif ($key eq 'age') {
279: if ($age!=0) {
280: $showflag=1;
281: }
282: }
283: elsif ($key eq 'size') {
284: if ($size!=0) {
285: $showflag=1;
286: }
287: }
288: elsif ($key eq 'lines') {
289: if ($lines!=0) {
290: $showflag=1;
291: }
292: }
293: elsif ($key eq 'diffs') {
294: if ($diffs ne '0:0') {
295: $showflag=1;
296: }
297: }
298: if ($showflag) {
299: last FLOP2;
300: }
301: }
302: }
303: elsif ($show eq 'same') {
304: my @ks=(keys %restrict);
305: unless (@ks) {
306: @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
307: }
308: my $showcount=length(@ks);
309: FLOP3: for my $key (@ks) {
310: if ($key eq 'existence') {
311: if ($existence ne 'yes:yes') {
312: $showcount--;
313: }
314: }
315: elsif ($key eq 'md5sum') {
316: if ($md5sum ne 'same') {
317: $showcount--;
318: }
319: }
320: elsif ($key eq 'cvstime') {
321: if ($cvstime!=0) {
322: $showcount--;
323: }
324: }
325: elsif ($key eq 'age') {
326: if ($age!=0) {
327: $showcount--;
328: }
329: }
330: elsif ($key eq 'size') {
331: if ($size!=0) {
332: $showcount--;
333: }
334: }
335: elsif ($key eq 'lines') {
336: if ($lines!=0) {
337: $showcount--;
338: }
339: }
340: elsif ($key eq 'diffs') {
341: if ($diffs ne '0:0') {
342: $showcount--;
343: }
344: }
345: }
346: if ($showcount==0) {
347: $showflag=1;
348: }
349: }
350: if ($buildmode==1) {
351: if ($md5sum eq 'same') {
352: exit(1);
353: }
354: elsif ($cvstime<0) {
355: exit(2);
356: }
357: else {
358: exit(0);
359: }
360: }
361: elsif ($buildmode==2) {
362: if ($cvstime<0) {
363: exit(2);
364: }
365: else {
366: exit(0);
367: }
368: }
369: elsif ($buildmode==3) {
370: if ($md5sum eq 'same') {
371: exit(1);
372: }
373: elsif ($age<0) {
374: exit(2);
375: }
376: else {
377: exit(0);
378: }
379: }
380: elsif ($buildmode==4) {
381: if ($cvstime>0) {
382: exit(2);
383: }
384: else {
385: exit(0);
386: }
387: }
388: print "$file";
389: if ($verbose==1) {
390: print "\t";
391: print &{$OUTPUT{'existence'}}($existence);
392: print "\t";
393: print &{$OUTPUT{'cvstime'}}($cvstime);
394: print "\t";
395: print &{$OUTPUT{'age'}}($age);
396: print "\t";
397: print &{$OUTPUT{'md5sum'}}($md5sum);
398: print "\t";
399: print &{$OUTPUT{'size'}}($size);
400: print "\t";
401: print &{$OUTPUT{'lines'}}($lines);
402: print "\t";
403: print &{$OUTPUT{'diffs'}}($diffs);
404: }
405: print "\n";
406: }
407:
408: sub cvstime {
409: my ($f)=@_;
410: my $path; my $file;
411: if ($f=~/^(.*\/)(.*?)$/) {
412: $f=~/^(.*\/)(.*?)$/;
413: ($path,$file)=($1,$2);
414: }
415: else {
416: $file=$f; $path='';
417: }
418: my $cvstime;
419: if ($buildmode!=3) {
420: my $entry=`grep '^/$file/' ${path}CVS/Entries` or
421: die('*** ERROR *** cannot grep against '.${path}.
422: 'CVS/Entries for ' .$file . "\n");
423: my @fields=split(/\//,$entry);
424: $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
425: chomp $cvstime;
426: }
427: else {
428: $cvstime='n/a';
429: }
430: return $cvstime;
431: }
432:
433: sub utctime {
434: my ($f)=@_;
435: my $utctime=`date -d '$f UTC' --utc +"%s"`;
436: chomp $utctime;
437: return $utctime;
438: }
439:
440: sub dowarn {
441: my ($msg)=@_;
442: warn($msg) unless $buildmode;
443: }
444:
445: =head1 NAME
446:
447: filecompare.pl - script used to help probe and compare file statistics
448:
449: =head1 SYNOPSIS
450:
451: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
452:
453: or
454:
455: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
456:
457: Restrictions: a list of space separated values (after the file/dir names)
458: can restrict the comparison.
459: These values can be: existence, cvstime, age, md5sum, size, lines,
460: and/or diffs.
461:
462: Options (before file/dir names):
463:
464: -p show all files that have the same comparison
465:
466: -n show all files that have different comparisons
467:
468: -a show all files (with comparisons)
469:
470: -q only show file names (based on first file/dir)
471:
472: -v verbose mode (default)
473:
474: =head1 DESCRIPTION
475:
476: filecompare.pl can work in two modes: file comparison mode, or directory
477: comparison mode.
478:
479: Comparisons can be a function of:
480: * existence similarity
481: * cvs time similarity (first argument treated as CVS source)
482: * age similarity (modification time)
483: * md5sum similarity
484: * size similarity (bytes)
485: * line count difference
486: * number of different lines
487:
488: filecompare.pl integrates smoothly with the LPML installation language
489: (linux packaging markup language). filecompare.pl is a tool that can
490: be used for safe CVS source-to-target installations.
491:
492: =head1 README
493:
494: filecompare.pl integrates smoothly with the LPML installation language
495: (linux packaging markup language). filecompare.pl is a tool that can
496: be used for safe CVS source-to-target installations.
497:
498: The unique identifier is considered to be the file name(s) independent
499: of the directory path.
500:
501: =head1 PREREQUISITES
502:
503: =head1 COREQUISITES
504:
505: =head1 OSNAMES
506:
507: linux
508:
509: =head1 SCRIPT CATEGORIES
510:
511: Packaging/Administrative
512:
513: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>