Annotation of loncom/build/filecompare.pl, revision 1.6

1.1       harris41    1: #!/usr/bin/perl
                      2: 
1.5       harris41    3: # The LearningOnline Network with CAPA
1.4       harris41    4: #
                      5: # filecompare.pl - script used to help probe and compare file statistics
                      6: #
1.1       harris41    7: # YEAR=2001
1.4       harris41    8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
                      9: # 11/14 Guy Albertelli
                     10: # 11/16 Scott Harrison
                     11: #
1.6     ! harris41   12: # $Id: filecompare.pl,v 1.5 2001/11/16 21:12:46 harris41 Exp $
1.4       harris41   13: ###
1.1       harris41   14: 
1.5       harris41   15: ###############################################################################
                     16: ##                                                                           ##
                     17: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
                     18: ##                                                                           ##
                     19: ## 1. Invocation                                                             ##
                     20: ## 2. Notes                                                                  ##
                     21: ## 3. Dependencies                                                           ##
                     22: ## 4. Process command line arguments                                         ##
                     23: ## 5. Process file/dir location arguments                                    ##
                     24: ## 6. Process comparison restrictions                                        ##
                     25: ## 7. Define output and measure subroutines                                  ##
                     26: ## 8. Loop through files and calculate differences                           ##
                     27: ## 9. Subroutines                                                            ##
                     28: ## 10. POD (plain old documentation, CPAN style)                             ##
                     29: ##                                                                           ##
                     30: ###############################################################################
                     31: 
1.4       harris41   32: # ------------------------------------------------------------------ Invocation
1.1       harris41   33: my $invocation=<<END;
1.4       harris41   34: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
                     35: or
                     36: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
                     37: 
                     38: Restrictions: a list of space separated values (after the file/dir names)
                     39: can restrict the comparison.
                     40: These values can be: existence, cvstime, age, md5sum, size, lines,
                     41: and/or diffs.
                     42: 
                     43: Options (before file/dir names):
                     44: -p show all files that have the same comparison
                     45: -n show all files that have different comparisons
                     46: -a show all files (with comparisons)
                     47: -q only show file names (based on first file/dir)
                     48: -v verbose mode (default)
1.5       harris41   49: -bN buildmode (controls exit code of this script; 0 unless...)
1.6     ! harris41   50:    N=1: md5sum=same --> 1; cvstime<0 --> 2
1.5       harris41   51:    N=2: same as N=1 except without md5sum
                     52:    N=3: md5sum=same --> 1; age<0 --> 2
                     53:    N=4: cvstime>0 --> 2
1.1       harris41   54: END
                     55: unless (@ARGV) {
                     56:     print $invocation;
                     57:     exit 1;
                     58: }
1.5       harris41   59: 
1.1       harris41   60: # ----------------------------------------------------------------------- Notes
                     61: #
                     62: # What are all the different ways to compare two files and how to look
                     63: # at the differences?
                     64: #
                     65: # Ways of comparison:
                     66: #   existence similarity
1.6     ! harris41   67: #   cvs time similarity (1st arg treated as CVS source; only for buildmode)
1.1       harris41   68: #   age similarity (modification time)
                     69: #   md5sum similarity
                     70: #   size similarity (bytes)
                     71: #   line count difference
                     72: #   number of different lines
                     73: #
                     74: # Quantities of comparison:
                     75: #   existence (no,yes); other values become 'n/a'
1.2       harris41   76: #   cvstime in seconds
1.1       harris41   77: #   age in seconds
                     78: #   md5sum ("same" or "different")
                     79: #   size similarity (byte difference)
                     80: #   line count difference (integer)
                     81: #   number of different lines (integer)
                     82: 
1.5       harris41   83: # ---------------------------------------------------------------- Dependencies
1.1       harris41   84: # implementing from unix command line (assuming bash)
                     85: # md5sum, diff, wc -l
                     86: 
                     87: # ---------------------------------------------- Process command line arguments
                     88: # Flags (before file/dir names):
                     89: # -p show all files the same
                     90: # -n show all files different
                     91: # -a show all files (with comparisons)
                     92: # -q only show file names (based on first file/dir)
                     93: # -v verbose mode (default)
1.5       harris41   94: # -bN build/install mode (returns exitcode)
1.1       harris41   95: my $verbose='1';
                     96: my $show='all';
1.2       harris41   97: my $buildmode=0;
1.6     ! harris41   98: ALOOP: while (@ARGV) {
1.1       harris41   99:     my $flag;
                    100:     if ($ARGV[0]=~/^\-(\w)/) {
                    101: 	$flag=$1;
1.5       harris41  102: 	if ($flag eq 'b') {
                    103: 	    $ARGV[0]=~/^\-\w(\d)/;
                    104: 	    $buildmode=$1;
1.6     ! harris41  105: 	    shift @ARGV;
        !           106: 	    next ALOOP;
1.5       harris41  107: 	}
1.1       harris41  108: 	shift @ARGV;
                    109:       SWITCH: {
                    110: 	  $verbose=0, last SWITCH if $flag eq 'q';
                    111: 	  $verbose=1, last SWITCH if $flag eq 'v';
                    112: 	  $show='same', last SWITCH if $flag eq 'p';
                    113: 	  $show='different', last SWITCH if $flag eq 'n';
                    114: 	  $show='all', last SWITCH if $flag eq 'a';
                    115: 	  print($invocation), exit(1);
                    116:       }
                    117:     }
                    118:     else {
                    119: 	last;
                    120:     }
                    121: }
1.2       harris41  122: dowarn('Verbose: '.$verbose."\n");
                    123: dowarn('Show: '.$show."\n");
1.1       harris41  124: 
1.5       harris41  125: # ----------------------------------------- Process file/dir location arguments
1.1       harris41  126: # FILE1 FILE2 or DIR1 DIR2
                    127: my $loc1=shift @ARGV;
                    128: my $loc2=shift @ARGV;
                    129: my $dirmode='directories';
                    130: my @files;
                    131: unless ($loc1 and $loc2) {
                    132:     print($invocation), exit(1);
                    133: }
                    134: if (-f $loc1) {
                    135:     $dirmode='files';
                    136:     @files=($loc1);
                    137: }
                    138: else {
                    139:     if (-e $loc1) {
                    140: 	@files=`find $loc1 -type f`;
                    141:     }
                    142:     else {
                    143: 	@files=($loc1);
                    144:     }
                    145:     map {chomp; s/^$loc1\///; $_} @files;
                    146: }
1.2       harris41  147: dowarn('Processing for mode: '.$dirmode."\n");
                    148: dowarn('Location #1: '.$loc1."\n");
                    149: dowarn('Location #2: '.$loc2."\n");
1.1       harris41  150: 
1.5       harris41  151: # --------------------------------------------- Process comparison restrictions
1.1       harris41  152: # A list of space separated values (after the file/dir names)
                    153: # can restrict the comparison.
1.5       harris41  154: my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0,
                    155: 	      'lines'=>0,'diffs'=>0);
1.1       harris41  156: my %restrict;
                    157: while (@ARGV) {
                    158:     my $r=shift @ARGV;
1.5       harris41  159:     if ($rhash{$r}==0) {$restrict{$r}=1;}
                    160:     else {print($invocation), exit(1);}
1.1       harris41  161: }
                    162: if (%restrict) {
1.5       harris41  163:     dowarn('Restricting comparison to: '.
1.1       harris41  164: 	 join(' ',keys %restrict)."\n");
                    165: }
                    166: 
1.5       harris41  167: # --------------------------------------- Define output and measure subroutines
1.1       harris41  168: my %OUTPUT=(
1.4       harris41  169:          'existence'=>( sub {print 'existence: '.@_[0]; return;}),
                    170: 	 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
                    171:          'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
                    172:          'age'=>(sub {print 'age: '.@_[0];return;}),
                    173:          'size'=>(sub {print 'size: '.@_[0];return;}),
                    174:          'lines'=>(sub {print 'lines: '.@_[0];return;}),
                    175:          'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
1.1       harris41  176: );
                    177: 
                    178: my %MEASURE=(
1.4       harris41  179: 	 'existence' => ( sub { my ($file1,$file2)=@_;
1.1       harris41  180: 		        my $rv1=(-e $file1)?'yes':'no';
                    181: 			my $rv2=(-e $file2)?'yes':'no';
1.4       harris41  182: 			return ($rv1,$rv2); } ),
                    183: 	 'md5sum'=>( sub { my ($file1,$file2)=@_;
1.3       albertel  184: 			my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
                    185: 			my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
1.4       harris41  186: 			return ($rv1,$rv2); } ),
                    187: 	 'cvstime'=>( sub { my ($file1,$file2)=@_;
1.2       harris41  188: 			my $rv1=&cvstime($file1);
                    189: 			my @a=stat($file2); my $gmt=gmtime($a[9]);
                    190: 			my $rv2=&utctime($gmt);
1.4       harris41  191: 			return ($rv1,$rv2); } ),
                    192:          'age'=>( sub {	my ($file1,$file2)=@_;
1.2       harris41  193: 			my @a=stat($file1); my $rv1=$a[9];
                    194: 			@a=stat($file2); my $rv2=$a[9];
1.4       harris41  195: 			return ($rv1,$rv2); } ),
                    196:          'size'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  197: 			my @a=stat($file1); my $rv1=$a[7];
                    198: 			@a=stat($file2); my $rv2=$a[7];
1.4       harris41  199: 			return ($rv1,$rv2); } ),
                    200:          'lines'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  201: 			my $rv1=`wc -l $file1`; chop $rv1;
                    202: 			my $rv2=`wc -l $file2`; chop $rv2;
1.4       harris41  203: 			return ($rv1,$rv2); } ),
                    204:          'diffs'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  205: 			my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
                    206: 			chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
                    207: 			my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
                    208: 			chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
1.4       harris41  209: 			return ($rv1,$rv2); } ),
1.1       harris41  210: );
                    211: 
1.5       harris41  212: FLOOP: foreach my $file (@files) {
1.1       harris41  213:     my $file1;
                    214:     my $file2;
                    215:     if ($dirmode eq 'directories') {
                    216:         $file1=$loc1.'/'.$file;
                    217:         $file2=$loc2.'/'.$file;
                    218:     }
                    219:     else {
                    220:         $file1=$loc1;
                    221:         $file2=$loc2;
                    222:     }
                    223:     my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
                    224:     my $existence=$existence1.':'.$existence2;
1.2       harris41  225:     my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
1.1       harris41  226:     if ($existence1 eq 'no' or $existence2 eq 'no') {
                    227:         $md5sum='n/a';
                    228:         $age='n/a';
1.2       harris41  229:         $cvstime='n/a';
1.1       harris41  230:         $size='n/a';
                    231:         $lines='n/a';
                    232:         $diffs='n/a';
                    233:     }
                    234:     else {
1.6     ! harris41  235: 	if ($buildmode) {
        !           236: 	    my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
        !           237: 	    $cvstime=$cvstime1-$cvstime2;
        !           238: 	}
        !           239: 	else {
        !           240: 	    $cvstime='n/a';
        !           241: 	}
1.1       harris41  242:         my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
                    243:         $age=$age1-$age2;
                    244:         my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
1.3       albertel  245:         if ($md5sum1 eq $md5sum2) {
1.1       harris41  246:             $md5sum='same';
                    247:             $size=0;
                    248:             $lines=0;
1.6     ! harris41  249:             $diffs='0:0';
1.1       harris41  250: 	}
1.3       albertel  251:         elsif ($md5sum1 ne $md5sum2) {
1.1       harris41  252:             $md5sum='different';
                    253:             my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
                    254:             $size=$size1-$size2;
                    255:             my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
                    256:             $lines=$lines1-$lines2;
                    257:             my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
                    258:             $diffs=$diffs1.':'.$diffs2;
                    259:         }
                    260:     }
                    261:     my $showflag=0;
                    262:     if ($show eq 'all') {
                    263:         $showflag=1;
                    264:     }
                    265:     if ($show eq 'different') {
                    266:         my @ks=(keys %restrict);
                    267:         unless (@ks) {
1.2       harris41  268: 	    @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
1.1       harris41  269: 	}
1.5       harris41  270:         FLOOP2: for my $key (@ks) {
1.1       harris41  271: 	    if ($key eq 'existence') {
                    272: 		if ($existence ne 'yes:yes') {
                    273: 		    $showflag=1;
                    274: 		}
                    275: 	    }
                    276: 	    elsif ($key eq 'md5sum') {
                    277: 		if ($md5sum ne 'same') {
                    278: 		    $showflag=1;
                    279: 		}
                    280: 	    }
1.6     ! harris41  281: 	    elsif ($key eq 'cvstime' and $buildmode) {
1.2       harris41  282: 		if ($cvstime!=0) {
                    283: 		    $showflag=1;
                    284: 		}
                    285: 	    }
1.1       harris41  286: 	    elsif ($key eq 'age') {
                    287: 		if ($age!=0) {
                    288: 		    $showflag=1;
                    289: 		}
                    290: 	    }
                    291: 	    elsif ($key eq 'size') {
                    292: 		if ($size!=0) {
                    293: 		    $showflag=1;
                    294: 		}
                    295: 	    }
                    296: 	    elsif ($key eq 'lines') {
                    297: 		if ($lines!=0) {
                    298: 		    $showflag=1;
                    299: 		}
                    300: 	    }
                    301: 	    elsif ($key eq 'diffs') {
                    302: 		if ($diffs ne '0:0') {
                    303: 		    $showflag=1;
                    304: 		}
                    305: 	    }
                    306: 	    if ($showflag) {
1.5       harris41  307: 		last FLOOP2;
1.1       harris41  308: 	    }
                    309:         }
                    310:     }
                    311:     elsif ($show eq 'same') {
                    312:         my @ks=(keys %restrict);
                    313:         unless (@ks) {
1.2       harris41  314: 	    @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
1.1       harris41  315: 	}
                    316:         my $showcount=length(@ks);
1.6     ! harris41  317: 	$showcount-- unless $buildmode;
1.5       harris41  318:         FLOOP3: for my $key (@ks) {
1.1       harris41  319: 	    if ($key eq 'existence') {
                    320: 		if ($existence ne 'yes:yes') {
                    321: 		    $showcount--;
                    322: 		}
                    323: 	    }
                    324: 	    elsif ($key eq 'md5sum') {
                    325: 		if ($md5sum ne 'same') {
                    326: 		    $showcount--;
                    327: 		}
                    328: 	    }
1.6     ! harris41  329: 	    elsif ($key eq 'cvstime' and $buildmode) {
1.2       harris41  330: 		if ($cvstime!=0) {
                    331: 		    $showcount--;
                    332: 		}
                    333: 	    }
1.1       harris41  334: 	    elsif ($key eq 'age') {
                    335: 		if ($age!=0) {
                    336: 		    $showcount--;
                    337: 		}
                    338: 	    }
                    339: 	    elsif ($key eq 'size') {
                    340: 		if ($size!=0) {
                    341: 		    $showcount--;
                    342: 		}
                    343: 	    }
                    344: 	    elsif ($key eq 'lines') {
                    345: 		if ($lines!=0) {
                    346: 		    $showcount--;
                    347: 		}
                    348: 	    }
                    349: 	    elsif ($key eq 'diffs') {
                    350: 		if ($diffs ne '0:0') {
                    351: 		    $showcount--;
                    352: 		}
                    353: 	    }
                    354:         }
                    355:         if ($showcount==0) {
                    356: 	    $showflag=1;
                    357: 	}
                    358:     }
1.2       harris41  359:     if ($buildmode==1) {
                    360:         if ($md5sum eq 'same') {
                    361: 	    exit(1);
                    362: 	}
                    363:         elsif ($cvstime<0) {
                    364: 	    exit(2);
                    365: 	}
                    366:         else {
                    367: 	    exit(0);
                    368: 	}
                    369:     }
                    370:     elsif ($buildmode==2) {
                    371:         if ($cvstime<0) {
                    372: 	    exit(2);
                    373: 	}
                    374:         else {
                    375: 	    exit(0);
                    376: 	}
                    377:     }
                    378:     elsif ($buildmode==3) {
                    379:         if ($md5sum eq 'same') {
                    380: 	    exit(1);
                    381: 	}
                    382:         elsif ($age<0) {
                    383: 	    exit(2);
                    384: 	}
                    385:         else {
                    386: 	    exit(0);
                    387: 	}
                    388:     }
                    389:     elsif ($buildmode==4) {
                    390:         if ($cvstime>0) {
                    391: 	    exit(2);
                    392: 	}
                    393:         else {
                    394: 	    exit(0);
                    395: 	}
                    396:     }
1.6     ! harris41  397:     if ($showflag) {
        !           398: 	print "$file";
        !           399: 	if ($verbose==1) {
        !           400: 	    print "\t";
        !           401: 	    print &{$OUTPUT{'existence'}}($existence);
        !           402: 	    print "\t";
        !           403: 	    print &{$OUTPUT{'cvstime'}}($cvstime);
        !           404: 	    print "\t";
        !           405: 	    print &{$OUTPUT{'age'}}($age);
        !           406: 	    print "\t";
        !           407: 	    print &{$OUTPUT{'md5sum'}}($md5sum);
        !           408: 	    print "\t";
        !           409: 	    print &{$OUTPUT{'size'}}($size);
        !           410: 	    print "\t";
        !           411: 	    print &{$OUTPUT{'lines'}}($lines);
        !           412: 	    print "\t";
        !           413: 	    print &{$OUTPUT{'diffs'}}($diffs);
        !           414: 	}
        !           415: 	print "\n";
1.1       harris41  416:     }
                    417: }
                    418: 
1.5       harris41  419: # ----------------------------------------------------------------- Subroutines
                    420: 
1.2       harris41  421: sub cvstime {
                    422:     my ($f)=@_;
                    423:     my $path; my $file;
                    424:     if ($f=~/^(.*\/)(.*?)$/) {
                    425: 	$f=~/^(.*\/)(.*?)$/;
                    426: 	($path,$file)=($1,$2);
                    427:     }
                    428:     else {
                    429: 	$file=$f; $path='';
                    430:     }
                    431:     my $cvstime;
                    432:     if ($buildmode!=3) {
                    433: 	my $entry=`grep '^/$file/' ${path}CVS/Entries` or
                    434: 	    die('*** ERROR *** cannot grep against '.${path}.
                    435: 		'CVS/Entries for ' .$file . "\n");
                    436:         my @fields=split(/\//,$entry);
                    437:         $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
                    438:         chomp $cvstime;
                    439:     }
                    440:     else {
                    441: 	$cvstime='n/a';
                    442:     }
                    443:     return $cvstime;
                    444: }
1.1       harris41  445: 
1.2       harris41  446: sub utctime {
                    447:     my ($f)=@_;
                    448:     my $utctime=`date -d '$f UTC' --utc +"%s"`;
                    449:     chomp $utctime;
                    450:     return $utctime;
                    451: }
1.1       harris41  452: 
1.2       harris41  453: sub dowarn {
                    454:     my ($msg)=@_;
                    455:     warn($msg) unless $buildmode;
                    456: }
1.5       harris41  457: 
                    458: # ----------------------------------- POD (plain old documentation, CPAN style)
1.4       harris41  459: 
                    460: =head1 NAME
                    461: 
                    462: filecompare.pl - script used to help probe and compare file statistics
                    463: 
                    464: =head1 SYNOPSIS
                    465: 
                    466: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
                    467: 
                    468: or
                    469: 
                    470: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
                    471: 
                    472: Restrictions: a list of space separated values (after the file/dir names)
                    473: can restrict the comparison.
                    474: These values can be: existence, cvstime, age, md5sum, size, lines,
                    475: and/or diffs.
                    476: 
                    477: Options (before file/dir names):
                    478: 
                    479:  -p show all files that have the same comparison
                    480: 
                    481:  -n show all files that have different comparisons
                    482: 
                    483:  -a show all files (with comparisons)
                    484: 
                    485:  -q only show file names (based on first file/dir)
                    486: 
                    487:  -v verbose mode (default)
                    488: 
                    489: =head1 DESCRIPTION
                    490: 
                    491: filecompare.pl can work in two modes: file comparison mode, or directory
                    492: comparison mode.
                    493: 
                    494: Comparisons can be a function of:
                    495: * existence similarity
                    496: * cvs time similarity (first argument treated as CVS source)
                    497: * age similarity (modification time)
                    498: * md5sum similarity
                    499: * size similarity (bytes)
                    500: * line count difference
                    501: * number of different lines
                    502: 
                    503: filecompare.pl integrates smoothly with the LPML installation language
                    504: (linux packaging markup language).  filecompare.pl is a tool that can
                    505: be used for safe CVS source-to-target installations.
                    506: 
                    507: =head1 README
                    508: 
                    509: filecompare.pl integrates smoothly with the LPML installation language
                    510: (linux packaging markup language).  filecompare.pl is a tool that can
                    511: be used for safe CVS source-to-target installations.
                    512: 
                    513: The unique identifier is considered to be the file name(s) independent
                    514: of the directory path.
                    515: 
                    516: =head1 PREREQUISITES
                    517: 
                    518: =head1 COREQUISITES
                    519: 
                    520: =head1 OSNAMES
                    521: 
                    522: linux
                    523: 
                    524: =head1 SCRIPT CATEGORIES
                    525: 
                    526: Packaging/Administrative
                    527: 
                    528: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>