--- loncom/loncron	2011/04/23 16:18:49	1.88
+++ loncom/loncron	2013/02/02 00:22:30	1.96
@@ -2,7 +2,7 @@
 
 # Housekeeping program, started by cron, loncontrol and loncron.pl
 #
-# $Id: loncron,v 1.88 2011/04/23 16:18:49 raeburn Exp $
+# $Id: loncron,v 1.96 2013/02/02 00:22:30 raeburn Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -32,6 +32,8 @@ use strict;
 
 use lib '/home/httpd/lib/perl/';
 use LONCAPA::Configuration;
+use LONCAPA::Checksumming;
+use LONCAPA;
 use Apache::lonnet;
 use Apache::loncommon;
 
@@ -269,12 +271,7 @@ sub log_machine_info {
 
     &log($fh,"<h3>distprobe</h3>");
     &log($fh,"<pre>");
-    open(DSH,"$perlvar{'lonDaemons'}/distprobe |");
-    while (my $line=<DSH>) { 
-	&log($fh,&encode_entities($line,'<>&"')); 
-	$psproc++;
-    }
-    close(DSH);
+    &get_distro($perlvar{'lonDaemons'},$fh);
     &log($fh,"</pre>");
 
     &errout($fh);
@@ -514,21 +511,13 @@ sub clean_sockets {
 # ----------------------------------------------------------------------- httpd
 sub check_httpd_logs {
     my ($fh)=@_;
-    &log($fh,'<hr /><a name="httpd" /><h2>httpd</h2><h3>Access Log</h3><pre>');
-    
-    open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
-    while (my $line=<DFH>) { &log($fh,&encode_entities($line,'<>&"')) };
-    close (DFH);
-	
-    &log($fh,"</pre><h3>Error Log</h3><pre>");
-	
-    open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
-    while (my $line=<DFH>) { 
-	&log($fh,"$line");
-	if ($line=~/\[error\]/) { $notices++; } 
+    if (open(PIPE,"./lchttpdlogs|")) {
+        while (my $line=<PIPE>) {
+            &log($fh,$line);
+            if ($line=~/\[error\]/) { $notices++; }
+        }
+        close(PIPE);
     }
-    close (DFH);
-    &log($fh,"</pre>");
     &errout($fh);
 }
 
@@ -634,7 +623,9 @@ sub check_delayed_msg {
     }
 
     &log($fh,"<p>Total unsend messages: <b>$unsend</b></p>\n");
-    $warnings=$warnings+5*$unsend;
+    if ($unsend > 0) {
+        $warnings=$warnings+5*$unsend;
+    }
 
     if ($unsend) { $simplestatus{'unsend'}=$unsend; }
     &log($fh,"<h3>Outgoing Buffer</h3>\n<pre>");
@@ -648,11 +639,28 @@ sub check_delayed_msg {
     }
     &log($fh,"</pre>\n");
     close (DFH);
+    my %hostname = &Apache::lonnet::all_hostnames();
+    my $numhosts = scalar(keys(%hostname));
 # pong to all servers that have delayed messages
 # this will trigger a reverse connection, which should flush the buffers
-    foreach my $tryserver (keys %servers) {
-	my $answer=&Apache::lonnet::reply("pong",$tryserver);
-	&log($fh,"Pong to $tryserver: $answer<br />");
+    foreach my $tryserver (sort(keys(%servers))) {
+        if ($hostname{$tryserver} || !$numhosts) {
+            my $answer;
+            eval {
+                local $SIG{ ALRM } = sub { die "TIMEOUT" };
+                alarm(20);
+                $answer = &Apache::lonnet::reply("pong",$tryserver);
+                alarm(0);
+            };
+            if ($@ && $@ =~ m/TIMEOUT/) {
+                &log($fh,"Attempted pong to $tryserver timed out<br />");
+                print "time out while contacting: $tryserver for pong\n";
+            } else {
+                &log($fh,"Pong to $tryserver: $answer<br />");
+            }
+        } else {
+            &log($fh,"$tryserver has delayed messages, but is not part of the cluster -- skipping 'Pong'.<br />");
+        }
     }
 }
 
@@ -686,11 +694,22 @@ sub log_simplestatus {
 }
 
 sub write_loncaparevs {
+    print "Retrieving LON-CAPA version information\n";
     if (open(my $fh,">$perlvar{'lonTabDir'}/loncaparevs.tab")) {
         my %hostname = &Apache::lonnet::all_hostnames();
         foreach my $id (sort(keys(%hostname))) {
             if ($id ne '') {
-                my $loncaparev = &Apache::lonnet::get_server_loncaparev('',$id,1,'loncron');
+                my $loncaparev;
+                eval {
+                    local $SIG{ ALRM } = sub { die "TIMEOUT" };
+                    alarm(10);
+                    $loncaparev =
+                        &Apache::lonnet::get_server_loncaparev('',$id,1,'loncron');
+                    alarm(0);
+                };
+                if ($@ && $@ =~ m/TIMEOUT/) {
+                    print "time out while contacting lonHost: $id for version\n";   
+                }
                 if ($loncaparev =~ /^[\w.\-]+$/) {
                     print $fh $id.':'.$loncaparev."\n";
                 }
@@ -702,12 +721,23 @@ sub write_loncaparevs {
 }
 
 sub write_serverhomeIDs {
+    print "Retrieving LON-CAPA lonHostID information\n";
     if (open(my $fh,">$perlvar{'lonTabDir'}/serverhomeIDs.tab")) {
         my %name_to_host = &Apache::lonnet::all_names();
         foreach my $name (sort(keys(%name_to_host))) {
             if ($name ne '') {
                 if (ref($name_to_host{$name}) eq 'ARRAY') {
-                    my $serverhomeID = &Apache::lonnet::get_server_homeID($name,1,'loncron');
+                    my $serverhomeID;
+                    eval {
+                        local $SIG{ ALRM } = sub { die "TIMEOUT" };
+                        alarm(10);
+                        $serverhomeID = 
+                            &Apache::lonnet::get_server_homeID($name,1,'loncron');
+                        alarm(0);
+                    };
+                    if ($@ && $@ =~ m/TIMEOUT/) {
+                        print "Time out while contacting server: $name\n"; 
+                    }
                     if ($serverhomeID ne '') {
                         print $fh $name.':'.$serverhomeID."\n";
                     } else {
@@ -721,6 +751,27 @@ sub write_serverhomeIDs {
     return;
 }
 
+sub write_checksums {
+    my ($perlvar) = @_;
+    return unless (ref($perlvar) eq 'HASH');
+    my $distro = &get_distro($perlvar->{'lonDaemons'});
+    if ($distro) {
+        print "Retrieving file version and checksumming.\n";
+        my ($chksumsref,$versionsref) =
+            &LONCAPA::Checksumming::get_checksums($distro,$perlvar->{'lonDaemons'},
+                                                  $perlvar->{'lonLib'},
+                                                  $perlvar->{'lonIncludes'},
+                                                  $perlvar->{'lonTabDir'});
+        if (ref($chksumsref) eq 'HASH') {
+            $numchksums = scalar(keys(%{$chksumsref}));
+        }
+        print "File version retrieved and checksumming completed for $numchksums files.\n";
+    } else {
+        print "File version retrieval and checksumming skipped - could not determine Linux distro.\n"; 
+    }
+    return'
+}
+
 sub send_mail {
     print "sending mail\n";
     my $defdom = $perlvar{'lonDefDomain'};
@@ -738,6 +789,21 @@ sub send_mail {
     }
 }
 
+sub get_distro {
+    my ($dir,$fh) = @_;
+    my $distro;
+    if (open(my $disth,"$dir/distprobe |")) {
+        while (my $line=<$disth>) {
+            if ($fh) {
+                &log($fh,&encode_entities($line,'<>&"'));
+            }
+            $distro .= $line;
+        }
+        close($disth);
+    }
+    return $distro;
+}
+
 sub usage {
     print(<<USAGE);
 loncron - housekeeping program that checks up on various parts of Lon-CAPA
@@ -821,8 +887,11 @@ sub main () {
                 print $fh "$key\n";
             }
             close($fh);
-            my $execpath = $perlvar{'lonDaemons'}.'/lciptables';
-            system("$execpath $tmpfile");
+            if (&LONCAPA::try_to_lock('/tmp/lock_lciptables')) {
+                my $execpath = $perlvar{'lonDaemons'}.'/lciptables';
+                system("$execpath $tmpfile");
+                unlink('/tmp/lock_lciptables');  # Remove the lock file. 
+            }
             unlink($tmpfile);
         }
     }
@@ -868,7 +937,7 @@ sub main () {
 	&log_simplestatus();
         &write_loncaparevs();
         &write_serverhomeIDs();
-	
+	&write_checksums(\%perlvar);
 	if ($totalcount>200 && !$noemail) { &send_mail(); }
     }
 }
@@ -876,10 +945,3 @@ sub main () {
 &main();
 1;
 
-
-
-
-
-
-
-