1: #!/usr/bin/perl
2:
3: # The LearningOnline Network with CAPA
4: # filecompare.pl - script used to help probe and compare file statistics
5: #
6: # $Id: filecompare.pl,v 1.1 2002/05/23 18:50:42 harris41 Exp $
7: #
8: # Copyright Michigan State University Board of Trustees
9: #
10: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
11: #
12: # LON-CAPA is free software; you can redistribute it and/or modify
13: # it under the terms of the GNU General Public License as published by
14: # the Free Software Foundation; either version 2 of the License, or
15: # (at your option) any later version.
16: #
17: # LON-CAPA is distributed in the hope that it will be useful,
18: # but WITHOUT ANY WARRANTY; without even the implied warranty of
19: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20: # GNU General Public License for more details.
21: #
22: # You should have received a copy of the GNU General Public License
23: # along with LON-CAPA; if not, write to the Free Software
24: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25: #
26: # /home/httpd/html/adm/gpl.txt
27: #
28: # http://www.lon-capa.org/
29: #
30: # YEAR=2001
31: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
32: # 11/14 Guy Albertelli
33: # 11/16,11/17 Scott Harrison
34: # 12/3,12/5 Scott Harrison
35: #
36: ###
37:
38: ###############################################################################
39: ## ##
40: ## ORGANIZATION OF THIS PERL SCRIPT ##
41: ## ##
42: ## 1. Invocation ##
43: ## 2. Notes ##
44: ## 3. Dependencies ##
45: ## 4. Process command line arguments ##
46: ## 5. Process file/dir location arguments ##
47: ## 6. Process comparison restrictions ##
48: ## 7. Define output and measure subroutines ##
49: ## 8. Loop through files and calculate differences ##
50: ## 9. Subroutines ##
51: ## 10. POD (plain old documentation, CPAN style) ##
52: ## ##
53: ###############################################################################
54:
55: # ------------------------------------------------------------------ Invocation
56: my $invocation=<<END;
57: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
58: or
59: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
60: or
61: filecompare.pl [ options ... ] -s TARGET=[target] SOURCE=[source] MODE=[mode]
62: LOC1 LOC2
63:
64: Restrictions: a list of space separated values (after the file/dir names)
65: can restrict the comparison.
66: These values can be: existence, cvstime, age, md5sum, size, lines,
67: and/or diffs.
68:
69: Options (before file/dir names):
70: -p show all files that have the same comparison
71: -n show all files that have different comparisons
72: -a show all files (with comparisons)
73: -q only show file names (based on first file/dir)
74: -v verbose mode (default)
75: -bN buildmode (controls EXIT code of this script; 0 unless...)
76: N=1: md5sum=same --> 1; cvstime<0 --> 2
77: N=2: same as N=1 except without md5sum
78: N=3: md5sum=same --> 1; age<0 --> 2
79: N=4: cvstime>0 --> 2
80:
81: The third way to pass arguments is set by the -s flag.
82: filecompare.pl -s SOURCE=[source] TARGET=[target] MODE=[mode] LOC1 LOC2
83:
84: TARGET corresponds to the root path of LOC2. SOURCE corresponds to
85: the root path of LOC1. MODE can either be file, directory, link, or fileglob.
86:
87: END
88: unless (@ARGV) {
89: print $invocation;
90: exit 1;
91: }
92:
93: # ----------------------------------------------------------------------- Notes
94: #
95: # What are all the different ways to compare two files and how to look
96: # at the differences?
97: #
98: # Ways of comparison:
99: # existence similarity
100: # cvs time similarity (1st arg treated as CVS source; only for buildmode)
101: # age similarity (modification time)
102: # md5sum similarity
103: # size similarity (bytes)
104: # line count difference
105: # number of different lines
106: #
107: # Quantities of comparison:
108: # existence (no,yes); other values become 'n/a'
109: # cvstime in seconds
110: # age in seconds
111: # md5sum ("same" or "different")
112: # size similarity (byte difference)
113: # line count difference (integer)
114: # number of different lines (integer)
115:
116: # ---------------------------------------------------------------- Dependencies
117: # implementing from unix command line (assuming bash)
118: # md5sum, diff, wc -l
119:
120: # ---------------------------------------------- Process command line arguments
121: # Flags (before file/dir names):
122: # -p show all files the same
123: # -n show all files different
124: # -a show all files (with comparisons)
125: # -q only show file names (based on first file/dir)
126: # -v verbose mode (default)
127: # -bN build/install mode (returns exitcode)
128: # -s status checking mode for lpml
129:
130: my $verbose='1';
131: my $show='all';
132: my $buildmode=0;
133: my $statusmode=0;
134: ALOOP: while (@ARGV) {
135: my $flag;
136: if ($ARGV[0]=~/^\-(\w)/) {
137: $flag=$1;
138: if ($flag eq 'b') {
139: $ARGV[0]=~/^\-\w(\d)/;
140: $buildmode=$1;
141: shift @ARGV;
142: next ALOOP;
143: }
144: shift @ARGV;
145: SWITCH: {
146: $verbose=0, last SWITCH if $flag eq 'q';
147: $verbose=1, last SWITCH if $flag eq 'v';
148: $show='same', last SWITCH if $flag eq 'p';
149: $show='different', last SWITCH if $flag eq 'n';
150: $show='all', last SWITCH if $flag eq 'a';
151: $statusmode=1, last SWITCH if $flag eq 's';
152: print($invocation), exit(1);
153: }
154: }
155: else {
156: last;
157: }
158: }
159: dowarn('Verbose: '.$verbose."\n");
160: dowarn('Show: '.$show."\n");
161:
162: my @files;
163: my $loc1;
164: my $loc2;
165: my $dirmode='directories';
166: # ----------------------------------------- If status checking mode for lpml
167: my ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob);
168: my ($source,$target);
169: if ($statusmode==1) {
170: ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob)=splice(@ARGV,0,5);
171: $targetroot.='/' if $targetroot!~/\/$/;
172: $sourceroot=~s/^SOURCE\=//;
173: $targetroot=~s/^TARGET\=//;
174: $source=$sourceroot.'/'.$sourceglob;
175: $target=$targetroot.''.$targetglob;
176: # print "SOURCE: $source\n";
177: # print "TARGET: $target\n";
178: if ($mode eq 'MODE=fileglob') {
179: $loc1=$source;$loc1=~s/\/[^\/]*$// if length($loc1)>2;
180: $loc2=$target;$loc2=~s/\/[^\/]*$// if length($loc2)>2;
181: @files=map {s/^$loc1\///;$_} glob($source);
182: $dirmode='directories';
183: }
184: elsif ($mode eq 'MODE=file') {
185: $loc1=$source;
186: $loc2=$target;
187: $dirmode='files';
188: @files=($loc1);
189: }
190: }
191: else {
192:
193: # ----------------------------------------- Process file/dir location arguments
194: # FILE1 FILE2 or DIR1 DIR2
195: $loc1=shift @ARGV;
196: $loc2=shift @ARGV;
197: unless ($loc1 and $loc2) {
198: print "LOC1: $loc1\nLOC2: $loc2\n";
199: print($invocation), exit(1);
200: }
201: if (-f $loc1) {
202: $dirmode='files';
203: @files=($loc1);
204: }
205: else {
206: if (-e $loc1) {
207: @files=`find $loc1 -type f`;
208: }
209: else {
210: @files=($loc1);
211: }
212: map {chomp; s/^$loc1\///; $_} @files;
213: }
214: dowarn('Processing for mode: '.$dirmode."\n");
215: dowarn('Location #1: '.$loc1."\n");
216: dowarn('Location #2: '.$loc2."\n");
217: }
218: # --------------------------------------------- Process comparison restrictions
219: # A list of space separated values (after the file/dir names)
220: # can restrict the comparison.
221: my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0,
222: 'lines'=>0,'diffs'=>0);
223: my %restrict;
224: while (@ARGV) {
225: my $r=shift @ARGV;
226: if ($rhash{$r}==0) {$restrict{$r}=1;}
227: else {print($invocation), exit(1);}
228: }
229: if (%restrict) {
230: dowarn('Restricting comparison to: '.
231: join(' ',keys %restrict)."\n");
232: }
233:
234: # --------------------------------------- Define output and measure subroutines
235: my %OUTPUT=(
236: 'existence'=>( sub {print 'existence: '.@_[0]; return;}),
237: 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
238: 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
239: 'age'=>(sub {print 'age: '.@_[0];return;}),
240: 'size'=>(sub {print 'size: '.@_[0];return;}),
241: 'lines'=>(sub {print 'lines: '.@_[0];return;}),
242: 'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
243: );
244:
245: my %MEASURE=(
246: 'existence' => ( sub { my ($file1,$file2)=@_;
247: my $rv1=(-e $file1)?'yes':'no';
248: my $rv2=(-e $file2)?'yes':'no';
249: return ($rv1,$rv2); } ),
250: 'md5sum'=>( sub { my ($file1,$file2)=@_;
251: my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
252: my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
253: return ($rv1,$rv2); } ),
254: 'cvstime'=>( sub { my ($file1,$file2)=@_;
255: my $rv1=&cvstime($file1);
256: my @a=stat($file2); my $gmt=gmtime($a[9]);
257: my $rv2=&utctime($gmt);
258: return ($rv1,$rv2); } ),
259: 'age'=>( sub { my ($file1,$file2)=@_;
260: my @a=stat($file1); my $rv1=$a[9];
261: @a=stat($file2); my $rv2=$a[9];
262: return ($rv1,$rv2); } ),
263: 'size'=>( sub { my ($file1,$file2)=@_;
264: my @a=stat($file1); my $rv1=$a[7];
265: @a=stat($file2); my $rv2=$a[7];
266: return ($rv1,$rv2); } ),
267: 'lines'=>( sub { my ($file1,$file2)=@_;
268: my $rv1=`wc -l $file1`; chop $rv1;
269: my $rv2=`wc -l $file2`; chop $rv2;
270: return ($rv1,$rv2); } ),
271: 'diffs'=>( sub { my ($file1,$file2)=@_;
272: my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
273: chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
274: my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
275: chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
276: return ($rv1,$rv2); } ),
277: );
278:
279: FLOOP: foreach my $file (@files) {
280: my $file1;
281: my $file2;
282: if ($dirmode eq 'directories') {
283: $file1=$loc1.'/'.$file;
284: $file2=$loc2.'/'.$file;
285: }
286: else {
287: $file1=$loc1;
288: $file2=$loc2;
289: }
290: my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
291: my $existence=$existence1.':'.$existence2;
292: my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
293: if ($existence1 eq 'no' or $existence2 eq 'no') {
294: $md5sum='n/a';
295: $age='n/a';
296: $cvstime='n/a';
297: $size='n/a';
298: $lines='n/a';
299: $diffs='n/a';
300: }
301: else {
302: if ($buildmode) {
303: my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
304: $cvstime=$cvstime1-$cvstime2;
305: }
306: else {
307: $cvstime='n/a';
308: }
309: my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
310: $age=$age1-$age2;
311: my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
312: if ($md5sum1 eq $md5sum2) {
313: $md5sum='same';
314: $size=0;
315: $lines=0;
316: $diffs='0:0';
317: }
318: elsif ($md5sum1 ne $md5sum2) {
319: $md5sum='different';
320: my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
321: $size=$size1-$size2;
322: my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
323: $lines=$lines1-$lines2;
324: my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
325: $diffs=$diffs1.':'.$diffs2;
326: }
327: }
328: my $showflag=0;
329: if ($show eq 'all') {
330: $showflag=1;
331: }
332: if ($show eq 'different') {
333: my @ks=(keys %restrict);
334: unless (@ks) {
335: @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
336: }
337: FLOOP2: for my $key (@ks) {
338: if ($key eq 'existence') {
339: if ($existence ne 'yes:yes') {
340: $showflag=1;
341: }
342: }
343: elsif ($key eq 'md5sum') {
344: if ($md5sum ne 'same') {
345: $showflag=1;
346: }
347: }
348: elsif ($key eq 'cvstime' and $buildmode) {
349: if ($cvstime!=0) {
350: $showflag=1;
351: }
352: }
353: elsif ($key eq 'age') {
354: if ($age!=0) {
355: $showflag=1;
356: }
357: }
358: elsif ($key eq 'size') {
359: if ($size!=0) {
360: $showflag=1;
361: }
362: }
363: elsif ($key eq 'lines') {
364: if ($lines!=0) {
365: $showflag=1;
366: }
367: }
368: elsif ($key eq 'diffs') {
369: if ($diffs ne '0:0') {
370: $showflag=1;
371: }
372: }
373: if ($showflag) {
374: last FLOOP2;
375: }
376: }
377: }
378: elsif ($show eq 'same') {
379: my @ks=(keys %restrict);
380: unless (@ks) {
381: @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
382: }
383: my $showcount=length(@ks);
384: $showcount-- unless $buildmode;
385: FLOOP3: for my $key (@ks) {
386: if ($key eq 'existence') {
387: if ($existence ne 'yes:yes') {
388: $showcount--;
389: }
390: }
391: elsif ($key eq 'md5sum') {
392: if ($md5sum ne 'same') {
393: $showcount--;
394: }
395: }
396: elsif ($key eq 'cvstime' and $buildmode) {
397: if ($cvstime!=0) {
398: $showcount--;
399: }
400: }
401: elsif ($key eq 'age') {
402: if ($age!=0) {
403: $showcount--;
404: }
405: }
406: elsif ($key eq 'size') {
407: if ($size!=0) {
408: $showcount--;
409: }
410: }
411: elsif ($key eq 'lines') {
412: if ($lines!=0) {
413: $showcount--;
414: }
415: }
416: elsif ($key eq 'diffs') {
417: if ($diffs ne '0:0') {
418: $showcount--;
419: }
420: }
421: }
422: if ($showcount==0) {
423: $showflag=1;
424: }
425: }
426: if ($buildmode==1) { # -b1
427: if ($md5sum eq 'same') {
428: exit(1);
429: }
430: elsif ($cvstime<0) {
431: exit(2);
432: }
433: else {
434: exit(0);
435: }
436: }
437: elsif ($buildmode==2) { # -b2
438: if ($cvstime<0) {
439: exit(2);
440: }
441: else {
442: exit(0);
443: }
444: }
445: elsif ($buildmode==3) { # -b3
446: if ($md5sum eq 'same') {
447: exit(1);
448: }
449: elsif ($age<0) {
450: exit(2);
451: }
452: else {
453: exit(0);
454: }
455: }
456: elsif ($buildmode==4) { # -b4
457: if ($existence=~/no$/) {
458: exit(3);
459: }
460: elsif ($cvstime>0) {
461: exit(2);
462: }
463: elsif ($existence=~/^no/) {
464: exit(1);
465: }
466: else {
467: exit(0);
468: }
469: }
470: if ($showflag) {
471: print "$file";
472: if ($verbose==1) {
473: print "\t";
474: print &{$OUTPUT{'existence'}}($existence);
475: print "\t";
476: print &{$OUTPUT{'cvstime'}}($cvstime);
477: print "\t";
478: print &{$OUTPUT{'age'}}($age);
479: print "\t";
480: print &{$OUTPUT{'md5sum'}}($md5sum);
481: print "\t";
482: print &{$OUTPUT{'size'}}($size);
483: print "\t";
484: print &{$OUTPUT{'lines'}}($lines);
485: print "\t";
486: print &{$OUTPUT{'diffs'}}($diffs);
487: }
488: print "\n";
489: }
490: }
491:
492: # ----------------------------------------------------------------- Subroutines
493:
494: sub cvstime {
495: my ($f)=@_;
496: my $path; my $file;
497: if ($f=~/^(.*\/)(.*?)$/) {
498: $f=~/^(.*\/)(.*?)$/;
499: ($path,$file)=($1,$2);
500: }
501: else {
502: $file=$f; $path='';
503: }
504: my $cvstime;
505: if ($buildmode!=3) {
506: my $entry=`grep '^/$file/' ${path}CVS/Entries 2>/dev/null`;
507: # or
508: # die('*** WARNING *** cannot grep against '.${path}.
509: # 'CVS/Entries for ' .$file . "\n");
510: if ($entry) {
511: my @fields=split(/\//,$entry);
512: $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
513: chomp $cvstime;
514: }
515: else {
516: $cvstime='n/a';
517: }
518: }
519: else {
520: $cvstime='n/a';
521: }
522: return $cvstime;
523: }
524:
525: sub utctime {
526: my ($f)=@_;
527: my $utctime=`date -d '$f UTC' --utc +"%s"`;
528: chomp $utctime;
529: return $utctime;
530: }
531:
532: sub dowarn {
533: my ($msg)=@_;
534: warn($msg) unless $buildmode;
535: }
536:
537: # ----------------------------------- POD (plain old documentation, CPAN style)
538:
539: =head1 NAME
540:
541: filecompare.pl - script used to help probe and compare file statistics
542:
543: =head1 SYNOPSIS
544:
545: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
546:
547: or
548:
549: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
550:
551: Restrictions: a list of space separated values (after the file/dir names)
552: can restrict the comparison.
553: These values can be: existence, cvstime, age, md5sum, size, lines,
554: and/or diffs.
555:
556: Options (before file/dir names):
557:
558: -p show all files that have the same comparison
559:
560: -n show all files that have different comparisons
561:
562: -a show all files (with comparisons)
563:
564: -q only show file names (based on first file/dir)
565:
566: -v verbose mode (default)
567:
568: =head1 DESCRIPTION
569:
570: filecompare.pl can work in two modes: file comparison mode, or directory
571: comparison mode.
572:
573: Comparisons can be a function of:
574: * existence similarity
575: * cvs time similarity (first argument treated as CVS source)
576: * age similarity (modification time)
577: * md5sum similarity
578: * size similarity (bytes)
579: * line count difference
580: * number of different lines
581:
582: filecompare.pl integrates smoothly with the LPML installation language
583: (linux packaging markup language). filecompare.pl is a tool that can
584: be used for safe CVS source-to-target installations.
585:
586: =head1 README
587:
588: filecompare.pl integrates smoothly with the LPML installation language
589: (linux packaging markup language). filecompare.pl is a tool that can
590: be used for safe CVS source-to-target installations.
591:
592: The unique identifier is considered to be the file name(s) independent
593: of the directory path.
594:
595: =head1 PREREQUISITES
596:
597: =head1 COREQUISITES
598:
599: =head1 OSNAMES
600:
601: linux
602:
603: =head1 SCRIPT CATEGORIES
604:
605: Packaging/Administrative
606:
607: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>