Annotation of nsdl/build/filecompare.pl, revision 1.1
1.1 ! harris41 1: #!/usr/bin/perl
! 2:
! 3: # The LearningOnline Network with CAPA
! 4: # filecompare.pl - script used to help probe and compare file statistics
! 5: #
! 6: # $Id: filecompare.pl,v 1.13 2002/05/16 00:23:04 harris41 Exp $
! 7: #
! 8: # Copyright Michigan State University Board of Trustees
! 9: #
! 10: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
! 11: #
! 12: # LON-CAPA is free software; you can redistribute it and/or modify
! 13: # it under the terms of the GNU General Public License as published by
! 14: # the Free Software Foundation; either version 2 of the License, or
! 15: # (at your option) any later version.
! 16: #
! 17: # LON-CAPA is distributed in the hope that it will be useful,
! 18: # but WITHOUT ANY WARRANTY; without even the implied warranty of
! 19: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 20: # GNU General Public License for more details.
! 21: #
! 22: # You should have received a copy of the GNU General Public License
! 23: # along with LON-CAPA; if not, write to the Free Software
! 24: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
! 25: #
! 26: # /home/httpd/html/adm/gpl.txt
! 27: #
! 28: # http://www.lon-capa.org/
! 29: #
! 30: # YEAR=2001
! 31: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
! 32: # 11/14 Guy Albertelli
! 33: # 11/16,11/17 Scott Harrison
! 34: # 12/3,12/5 Scott Harrison
! 35: #
! 36: ###
! 37:
! 38: ###############################################################################
! 39: ## ##
! 40: ## ORGANIZATION OF THIS PERL SCRIPT ##
! 41: ## ##
! 42: ## 1. Invocation ##
! 43: ## 2. Notes ##
! 44: ## 3. Dependencies ##
! 45: ## 4. Process command line arguments ##
! 46: ## 5. Process file/dir location arguments ##
! 47: ## 6. Process comparison restrictions ##
! 48: ## 7. Define output and measure subroutines ##
! 49: ## 8. Loop through files and calculate differences ##
! 50: ## 9. Subroutines ##
! 51: ## 10. POD (plain old documentation, CPAN style) ##
! 52: ## ##
! 53: ###############################################################################
! 54:
! 55: # ------------------------------------------------------------------ Invocation
! 56: my $invocation=<<END;
! 57: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
! 58: or
! 59: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
! 60: or
! 61: filecompare.pl [ options ... ] -s TARGET=[target] SOURCE=[source] MODE=[mode]
! 62: LOC1 LOC2
! 63:
! 64: Restrictions: a list of space separated values (after the file/dir names)
! 65: can restrict the comparison.
! 66: These values can be: existence, cvstime, age, md5sum, size, lines,
! 67: and/or diffs.
! 68:
! 69: Options (before file/dir names):
! 70: -p show all files that have the same comparison
! 71: -n show all files that have different comparisons
! 72: -a show all files (with comparisons)
! 73: -q only show file names (based on first file/dir)
! 74: -v verbose mode (default)
! 75: -bN buildmode (controls EXIT code of this script; 0 unless...)
! 76: N=1: md5sum=same --> 1; cvstime<0 --> 2
! 77: N=2: same as N=1 except without md5sum
! 78: N=3: md5sum=same --> 1; age<0 --> 2
! 79: N=4: cvstime>0 --> 2
! 80:
! 81: The third way to pass arguments is set by the -s flag.
! 82: filecompare.pl -s SOURCE=[source] TARGET=[target] MODE=[mode] LOC1 LOC2
! 83:
! 84: TARGET corresponds to the root path of LOC2. SOURCE corresponds to
! 85: the root path of LOC1. MODE can either be file, directory, link, or fileglob.
! 86:
! 87: END
! 88: unless (@ARGV) {
! 89: print $invocation;
! 90: exit 1;
! 91: }
! 92:
! 93: # ----------------------------------------------------------------------- Notes
! 94: #
! 95: # What are all the different ways to compare two files and how to look
! 96: # at the differences?
! 97: #
! 98: # Ways of comparison:
! 99: # existence similarity
! 100: # cvs time similarity (1st arg treated as CVS source; only for buildmode)
! 101: # age similarity (modification time)
! 102: # md5sum similarity
! 103: # size similarity (bytes)
! 104: # line count difference
! 105: # number of different lines
! 106: #
! 107: # Quantities of comparison:
! 108: # existence (no,yes); other values become 'n/a'
! 109: # cvstime in seconds
! 110: # age in seconds
! 111: # md5sum ("same" or "different")
! 112: # size similarity (byte difference)
! 113: # line count difference (integer)
! 114: # number of different lines (integer)
! 115:
! 116: # ---------------------------------------------------------------- Dependencies
! 117: # implementing from unix command line (assuming bash)
! 118: # md5sum, diff, wc -l
! 119:
! 120: # ---------------------------------------------- Process command line arguments
! 121: # Flags (before file/dir names):
! 122: # -p show all files the same
! 123: # -n show all files different
! 124: # -a show all files (with comparisons)
! 125: # -q only show file names (based on first file/dir)
! 126: # -v verbose mode (default)
! 127: # -bN build/install mode (returns exitcode)
! 128: # -s status checking mode for lpml
! 129:
! 130: my $verbose='1';
! 131: my $show='all';
! 132: my $buildmode=0;
! 133: my $statusmode=0;
! 134: ALOOP: while (@ARGV) {
! 135: my $flag;
! 136: if ($ARGV[0]=~/^\-(\w)/) {
! 137: $flag=$1;
! 138: if ($flag eq 'b') {
! 139: $ARGV[0]=~/^\-\w(\d)/;
! 140: $buildmode=$1;
! 141: shift @ARGV;
! 142: next ALOOP;
! 143: }
! 144: shift @ARGV;
! 145: SWITCH: {
! 146: $verbose=0, last SWITCH if $flag eq 'q';
! 147: $verbose=1, last SWITCH if $flag eq 'v';
! 148: $show='same', last SWITCH if $flag eq 'p';
! 149: $show='different', last SWITCH if $flag eq 'n';
! 150: $show='all', last SWITCH if $flag eq 'a';
! 151: $statusmode=1, last SWITCH if $flag eq 's';
! 152: print($invocation), exit(1);
! 153: }
! 154: }
! 155: else {
! 156: last;
! 157: }
! 158: }
! 159: dowarn('Verbose: '.$verbose."\n");
! 160: dowarn('Show: '.$show."\n");
! 161:
! 162: my @files;
! 163: my $loc1;
! 164: my $loc2;
! 165: my $dirmode='directories';
! 166: # ----------------------------------------- If status checking mode for lpml
! 167: my ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob);
! 168: my ($source,$target);
! 169: if ($statusmode==1) {
! 170: ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob)=splice(@ARGV,0,5);
! 171: $targetroot.='/' if $targetroot!~/\/$/;
! 172: $sourceroot=~s/^SOURCE\=//;
! 173: $targetroot=~s/^TARGET\=//;
! 174: $source=$sourceroot.'/'.$sourceglob;
! 175: $target=$targetroot.''.$targetglob;
! 176: # print "SOURCE: $source\n";
! 177: # print "TARGET: $target\n";
! 178: if ($mode eq 'MODE=fileglob') {
! 179: $loc1=$source;$loc1=~s/\/[^\/]*$// if length($loc1)>2;
! 180: $loc2=$target;$loc2=~s/\/[^\/]*$// if length($loc2)>2;
! 181: @files=map {s/^$loc1\///;$_} glob($source);
! 182: $dirmode='directories';
! 183: }
! 184: elsif ($mode eq 'MODE=file') {
! 185: $loc1=$source;
! 186: $loc2=$target;
! 187: $dirmode='files';
! 188: @files=($loc1);
! 189: }
! 190: }
! 191: else {
! 192:
! 193: # ----------------------------------------- Process file/dir location arguments
! 194: # FILE1 FILE2 or DIR1 DIR2
! 195: $loc1=shift @ARGV;
! 196: $loc2=shift @ARGV;
! 197: unless ($loc1 and $loc2) {
! 198: print "LOC1: $loc1\nLOC2: $loc2\n";
! 199: print($invocation), exit(1);
! 200: }
! 201: if (-f $loc1) {
! 202: $dirmode='files';
! 203: @files=($loc1);
! 204: }
! 205: else {
! 206: if (-e $loc1) {
! 207: @files=`find $loc1 -type f`;
! 208: }
! 209: else {
! 210: @files=($loc1);
! 211: }
! 212: map {chomp; s/^$loc1\///; $_} @files;
! 213: }
! 214: dowarn('Processing for mode: '.$dirmode."\n");
! 215: dowarn('Location #1: '.$loc1."\n");
! 216: dowarn('Location #2: '.$loc2."\n");
! 217: }
! 218: # --------------------------------------------- Process comparison restrictions
! 219: # A list of space separated values (after the file/dir names)
! 220: # can restrict the comparison.
! 221: my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0,
! 222: 'lines'=>0,'diffs'=>0);
! 223: my %restrict;
! 224: while (@ARGV) {
! 225: my $r=shift @ARGV;
! 226: if ($rhash{$r}==0) {$restrict{$r}=1;}
! 227: else {print($invocation), exit(1);}
! 228: }
! 229: if (%restrict) {
! 230: dowarn('Restricting comparison to: '.
! 231: join(' ',keys %restrict)."\n");
! 232: }
! 233:
! 234: # --------------------------------------- Define output and measure subroutines
! 235: my %OUTPUT=(
! 236: 'existence'=>( sub {print 'existence: '.@_[0]; return;}),
! 237: 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
! 238: 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
! 239: 'age'=>(sub {print 'age: '.@_[0];return;}),
! 240: 'size'=>(sub {print 'size: '.@_[0];return;}),
! 241: 'lines'=>(sub {print 'lines: '.@_[0];return;}),
! 242: 'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
! 243: );
! 244:
! 245: my %MEASURE=(
! 246: 'existence' => ( sub { my ($file1,$file2)=@_;
! 247: my $rv1=(-e $file1)?'yes':'no';
! 248: my $rv2=(-e $file2)?'yes':'no';
! 249: return ($rv1,$rv2); } ),
! 250: 'md5sum'=>( sub { my ($file1,$file2)=@_;
! 251: my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
! 252: my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
! 253: return ($rv1,$rv2); } ),
! 254: 'cvstime'=>( sub { my ($file1,$file2)=@_;
! 255: my $rv1=&cvstime($file1);
! 256: my @a=stat($file2); my $gmt=gmtime($a[9]);
! 257: my $rv2=&utctime($gmt);
! 258: return ($rv1,$rv2); } ),
! 259: 'age'=>( sub { my ($file1,$file2)=@_;
! 260: my @a=stat($file1); my $rv1=$a[9];
! 261: @a=stat($file2); my $rv2=$a[9];
! 262: return ($rv1,$rv2); } ),
! 263: 'size'=>( sub { my ($file1,$file2)=@_;
! 264: my @a=stat($file1); my $rv1=$a[7];
! 265: @a=stat($file2); my $rv2=$a[7];
! 266: return ($rv1,$rv2); } ),
! 267: 'lines'=>( sub { my ($file1,$file2)=@_;
! 268: my $rv1=`wc -l $file1`; chop $rv1;
! 269: my $rv2=`wc -l $file2`; chop $rv2;
! 270: return ($rv1,$rv2); } ),
! 271: 'diffs'=>( sub { my ($file1,$file2)=@_;
! 272: my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
! 273: chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
! 274: my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
! 275: chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
! 276: return ($rv1,$rv2); } ),
! 277: );
! 278:
! 279: FLOOP: foreach my $file (@files) {
! 280: my $file1;
! 281: my $file2;
! 282: if ($dirmode eq 'directories') {
! 283: $file1=$loc1.'/'.$file;
! 284: $file2=$loc2.'/'.$file;
! 285: }
! 286: else {
! 287: $file1=$loc1;
! 288: $file2=$loc2;
! 289: }
! 290: my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
! 291: my $existence=$existence1.':'.$existence2;
! 292: my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
! 293: if ($existence1 eq 'no' or $existence2 eq 'no') {
! 294: $md5sum='n/a';
! 295: $age='n/a';
! 296: $cvstime='n/a';
! 297: $size='n/a';
! 298: $lines='n/a';
! 299: $diffs='n/a';
! 300: }
! 301: else {
! 302: if ($buildmode) {
! 303: my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
! 304: $cvstime=$cvstime1-$cvstime2;
! 305: }
! 306: else {
! 307: $cvstime='n/a';
! 308: }
! 309: my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
! 310: $age=$age1-$age2;
! 311: my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
! 312: if ($md5sum1 eq $md5sum2) {
! 313: $md5sum='same';
! 314: $size=0;
! 315: $lines=0;
! 316: $diffs='0:0';
! 317: }
! 318: elsif ($md5sum1 ne $md5sum2) {
! 319: $md5sum='different';
! 320: my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
! 321: $size=$size1-$size2;
! 322: my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
! 323: $lines=$lines1-$lines2;
! 324: my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
! 325: $diffs=$diffs1.':'.$diffs2;
! 326: }
! 327: }
! 328: my $showflag=0;
! 329: if ($show eq 'all') {
! 330: $showflag=1;
! 331: }
! 332: if ($show eq 'different') {
! 333: my @ks=(keys %restrict);
! 334: unless (@ks) {
! 335: @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
! 336: }
! 337: FLOOP2: for my $key (@ks) {
! 338: if ($key eq 'existence') {
! 339: if ($existence ne 'yes:yes') {
! 340: $showflag=1;
! 341: }
! 342: }
! 343: elsif ($key eq 'md5sum') {
! 344: if ($md5sum ne 'same') {
! 345: $showflag=1;
! 346: }
! 347: }
! 348: elsif ($key eq 'cvstime' and $buildmode) {
! 349: if ($cvstime!=0) {
! 350: $showflag=1;
! 351: }
! 352: }
! 353: elsif ($key eq 'age') {
! 354: if ($age!=0) {
! 355: $showflag=1;
! 356: }
! 357: }
! 358: elsif ($key eq 'size') {
! 359: if ($size!=0) {
! 360: $showflag=1;
! 361: }
! 362: }
! 363: elsif ($key eq 'lines') {
! 364: if ($lines!=0) {
! 365: $showflag=1;
! 366: }
! 367: }
! 368: elsif ($key eq 'diffs') {
! 369: if ($diffs ne '0:0') {
! 370: $showflag=1;
! 371: }
! 372: }
! 373: if ($showflag) {
! 374: last FLOOP2;
! 375: }
! 376: }
! 377: }
! 378: elsif ($show eq 'same') {
! 379: my @ks=(keys %restrict);
! 380: unless (@ks) {
! 381: @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
! 382: }
! 383: my $showcount=length(@ks);
! 384: $showcount-- unless $buildmode;
! 385: FLOOP3: for my $key (@ks) {
! 386: if ($key eq 'existence') {
! 387: if ($existence ne 'yes:yes') {
! 388: $showcount--;
! 389: }
! 390: }
! 391: elsif ($key eq 'md5sum') {
! 392: if ($md5sum ne 'same') {
! 393: $showcount--;
! 394: }
! 395: }
! 396: elsif ($key eq 'cvstime' and $buildmode) {
! 397: if ($cvstime!=0) {
! 398: $showcount--;
! 399: }
! 400: }
! 401: elsif ($key eq 'age') {
! 402: if ($age!=0) {
! 403: $showcount--;
! 404: }
! 405: }
! 406: elsif ($key eq 'size') {
! 407: if ($size!=0) {
! 408: $showcount--;
! 409: }
! 410: }
! 411: elsif ($key eq 'lines') {
! 412: if ($lines!=0) {
! 413: $showcount--;
! 414: }
! 415: }
! 416: elsif ($key eq 'diffs') {
! 417: if ($diffs ne '0:0') {
! 418: $showcount--;
! 419: }
! 420: }
! 421: }
! 422: if ($showcount==0) {
! 423: $showflag=1;
! 424: }
! 425: }
! 426: if ($buildmode==1) { # -b1
! 427: if ($md5sum eq 'same') {
! 428: exit(1);
! 429: }
! 430: elsif ($cvstime<0) {
! 431: exit(2);
! 432: }
! 433: else {
! 434: exit(0);
! 435: }
! 436: }
! 437: elsif ($buildmode==2) { # -b2
! 438: if ($cvstime<0) {
! 439: exit(2);
! 440: }
! 441: else {
! 442: exit(0);
! 443: }
! 444: }
! 445: elsif ($buildmode==3) { # -b3
! 446: if ($md5sum eq 'same') {
! 447: exit(1);
! 448: }
! 449: elsif ($age<0) {
! 450: exit(2);
! 451: }
! 452: else {
! 453: exit(0);
! 454: }
! 455: }
! 456: elsif ($buildmode==4) { # -b4
! 457: if ($existence=~/no$/) {
! 458: exit(3);
! 459: }
! 460: elsif ($cvstime>0) {
! 461: exit(2);
! 462: }
! 463: elsif ($existence=~/^no/) {
! 464: exit(1);
! 465: }
! 466: else {
! 467: exit(0);
! 468: }
! 469: }
! 470: if ($showflag) {
! 471: print "$file";
! 472: if ($verbose==1) {
! 473: print "\t";
! 474: print &{$OUTPUT{'existence'}}($existence);
! 475: print "\t";
! 476: print &{$OUTPUT{'cvstime'}}($cvstime);
! 477: print "\t";
! 478: print &{$OUTPUT{'age'}}($age);
! 479: print "\t";
! 480: print &{$OUTPUT{'md5sum'}}($md5sum);
! 481: print "\t";
! 482: print &{$OUTPUT{'size'}}($size);
! 483: print "\t";
! 484: print &{$OUTPUT{'lines'}}($lines);
! 485: print "\t";
! 486: print &{$OUTPUT{'diffs'}}($diffs);
! 487: }
! 488: print "\n";
! 489: }
! 490: }
! 491:
! 492: # ----------------------------------------------------------------- Subroutines
! 493:
! 494: sub cvstime {
! 495: my ($f)=@_;
! 496: my $path; my $file;
! 497: if ($f=~/^(.*\/)(.*?)$/) {
! 498: $f=~/^(.*\/)(.*?)$/;
! 499: ($path,$file)=($1,$2);
! 500: }
! 501: else {
! 502: $file=$f; $path='';
! 503: }
! 504: my $cvstime;
! 505: if ($buildmode!=3) {
! 506: my $entry=`grep '^/$file/' ${path}CVS/Entries 2>/dev/null`;
! 507: # or
! 508: # die('*** WARNING *** cannot grep against '.${path}.
! 509: # 'CVS/Entries for ' .$file . "\n");
! 510: if ($entry) {
! 511: my @fields=split(/\//,$entry);
! 512: $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
! 513: chomp $cvstime;
! 514: }
! 515: else {
! 516: $cvstime='n/a';
! 517: }
! 518: }
! 519: else {
! 520: $cvstime='n/a';
! 521: }
! 522: return $cvstime;
! 523: }
! 524:
! 525: sub utctime {
! 526: my ($f)=@_;
! 527: my $utctime=`date -d '$f UTC' --utc +"%s"`;
! 528: chomp $utctime;
! 529: return $utctime;
! 530: }
! 531:
! 532: sub dowarn {
! 533: my ($msg)=@_;
! 534: warn($msg) unless $buildmode;
! 535: }
! 536:
! 537: # ----------------------------------- POD (plain old documentation, CPAN style)
! 538:
! 539: =head1 NAME
! 540:
! 541: filecompare.pl - script used to help probe and compare file statistics
! 542:
! 543: =head1 SYNOPSIS
! 544:
! 545: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
! 546:
! 547: or
! 548:
! 549: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
! 550:
! 551: Restrictions: a list of space separated values (after the file/dir names)
! 552: can restrict the comparison.
! 553: These values can be: existence, cvstime, age, md5sum, size, lines,
! 554: and/or diffs.
! 555:
! 556: Options (before file/dir names):
! 557:
! 558: -p show all files that have the same comparison
! 559:
! 560: -n show all files that have different comparisons
! 561:
! 562: -a show all files (with comparisons)
! 563:
! 564: -q only show file names (based on first file/dir)
! 565:
! 566: -v verbose mode (default)
! 567:
! 568: =head1 DESCRIPTION
! 569:
! 570: filecompare.pl can work in two modes: file comparison mode, or directory
! 571: comparison mode.
! 572:
! 573: Comparisons can be a function of:
! 574: * existence similarity
! 575: * cvs time similarity (first argument treated as CVS source)
! 576: * age similarity (modification time)
! 577: * md5sum similarity
! 578: * size similarity (bytes)
! 579: * line count difference
! 580: * number of different lines
! 581:
! 582: filecompare.pl integrates smoothly with the LPML installation language
! 583: (linux packaging markup language). filecompare.pl is a tool that can
! 584: be used for safe CVS source-to-target installations.
! 585:
! 586: =head1 README
! 587:
! 588: filecompare.pl integrates smoothly with the LPML installation language
! 589: (linux packaging markup language). filecompare.pl is a tool that can
! 590: be used for safe CVS source-to-target installations.
! 591:
! 592: The unique identifier is considered to be the file name(s) independent
! 593: of the directory path.
! 594:
! 595: =head1 PREREQUISITES
! 596:
! 597: =head1 COREQUISITES
! 598:
! 599: =head1 OSNAMES
! 600:
! 601: linux
! 602:
! 603: =head1 SCRIPT CATEGORIES
! 604:
! 605: Packaging/Administrative
! 606:
! 607: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>