--- loncom/loncron	2001/02/12 18:53:32	1.16
+++ loncom/loncron	2001/11/26 21:00:42	1.25
@@ -9,9 +9,13 @@
 #
 # 7/14,7/15,7/19,7/21,7/22,11/18,
 # 2/8 Gerd Kortemeyer
-# Dec 00 Scott Harrison
+# 12/6/2000,12/8 Scott Harrison
 # 12/23 Gerd Kortemeyer
-# 02/12/2001 Scott Harrison
+# YEAR=2001
+# 1/10/2001, 2/12/, 2/26, 3/15, 04/11, 04/21,8/27 Scott Harrison
+# 09/04,09/06,11/26 Gerd Kortemeyer
+
+$|=1;
 
 use IO::File;
 use IO::Socket;
@@ -56,6 +60,8 @@ ENDERROUT
            $perlvar{$varname}=$varvalue;
         }
     }
+    delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
+    delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
 }
 
 # --------------------------------------- Make sure that LON-CAPA is configured
@@ -201,7 +207,8 @@ while ($line=<DFH>) {
    $usage=$parts[4];
    $usage=~s/\W//g;
    if ($usage>90) { 
-      $warnings++; 
+      $warnings++;
+      $notices++; 
    } elsif ($usage>80) {
       $warnings++;
    } elsif ($usage>60) {
@@ -211,6 +218,23 @@ while ($line=<DFH>) {
 }
 close (DFH);
 print $fh "</pre>";
+
+
+print $fh "<h3>ps</h3>";
+print $fh "<pre>";
+$psproc=0;
+
+open (PSH,"ps -aux|");
+while ($line=<PSH>) { 
+   print $fh "$line"; 
+   $psproc++;
+}
+close (PSH);
+print $fh "</pre>";
+
+if ($psproc>200) { $notices++; }
+if ($psproc>250) { $notices++; }
+
 &errout($fh);
 
 # --------------------------------------------------------------- clean out tmp
@@ -258,13 +282,13 @@ print $fh "<h3>$active open session(s)</
 
 print $fh '<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>';
 
-open (DFH,"tail -n40 /etc/httpd/logs/access_log|");
+open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
 while ($line=<DFH>) { print $fh "$line" };
 close (DFH);
 
 print $fh "</pre><h3>Error Log</h3><pre>";
 
-open (DFH,"tail -n50 /etc/httpd/logs/error_log|");
+open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
 while ($line=<DFH>) { 
    print $fh "$line";
    if ($line=~/\[error\]/) { $notices++; } 
@@ -275,13 +299,12 @@ print $fh "</pre>";
 
 
 # ---------------------------------------------------------------------- lonsql
-#
-# Do not run for now
-#
-if ($perlvar{'lonRole'} eq "library" && 1==0) {
+
+my $restartflag=1;
+if ($perlvar{'lonRole'} eq "library") {
 
     print $fh '<hr><a name="lonsql"><h2>lonsql</h2><h3>Log</h3><pre>';
-    
+    print "lonsql\n";
     if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
 	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
 	while ($line=<DFH>) { 
@@ -295,23 +318,36 @@ if ($perlvar{'lonRole'} eq "library" &&
     print $fh "</pre>";
     
     my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
-    
+ 
+    $restartflag=1;
+   
     if (-e $lonsqlfile) {
 	my $lfh=IO::File->new("$lonsqlfile");
 	my $lonsqlpid=<$lfh>;
 	chomp($lonsqlpid);
 	if (kill 0 => $lonsqlpid) {
 	    print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
+	    $restartflag=0;
 	} else {
 	    $errors++; $errors++;
 	    print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
+		$restartflag=1;
+	print $fh 
+	    "<h3>Decided to clean up stale .pid file and restart lonsql</h3>";
 	}
-    } else {
+    }
+    if ($restartflag==1) {
 	$errors++;
+	         print $fh '<br><font color="red">Killall lonsql: '.
+                    system('killall lonsql').' - ';
+                    sleep 60;
+                    print $fh unlink($lonsqlfile).' - '.
+                              system('killall -9 lonsql').
+                    '</font><br>';
 	print $fh "<h3>lonsql not running, trying to start</h3>";
 	system(
  "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
-	sleep 120;
+	sleep 10;
 	if (-e $lonsqlfile) {
 	    print $fh "Seems like it started ...<p>";
 	    my $lfh=IO::File->new("$lonsqlfile");
@@ -326,7 +362,7 @@ if ($perlvar{'lonRole'} eq "library" &&
 		print $fh "Give it one more try ...<p>";
 		system(
  "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
-		sleep 120;
+		sleep 10;
 	    }
 	} else {
 	    print $fh "Seems like that did not work!<p>";
@@ -364,9 +400,10 @@ if ($perlvar{'lonRole'} eq "library" &&
 # ------------------------------------------------------------------------ lond
 
 print $fh '<hr><a name="lond"><h2>lond</h2><h3>Log</h3><pre>';
+print "lond\n";
 
 if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
-open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
+open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lond.log|");
 while ($line=<DFH>) { 
    print $fh "$line";
    if ($line=~/INFO/) { $notices++; }
@@ -379,42 +416,34 @@ print $fh "</pre>";
 
 my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid";
 
-my $restartflag=1;
+$restartflag=1;
 if (-e $londfile) {    
    my $lfh=IO::File->new("$londfile");
    my $londpid=<$lfh>;
    chomp($londpid);
    if (kill 0 => $londpid) {
-      print $fh "<h3>lond at pid $londpid responding</h3>";
+      print $fh "<h3>lond at pid $londpid responding, sending USR1</h3>";
+      kill USR1 => $londpid;
       $restartflag=0;
    } else {
       $errors++;
       print $fh "<h3>lond at pid $londpid not responding</h3>";
-      # Intelligently handle this.
-      # Possibility #1: there is no process
-      # Solution: remove .pid file and restart
-      if (getpgrp($londpid)==-1) {
-	  unlink($londfile);
-	  $restartflag=1;
-      }
-      else {
-      # Possibility #2: there is a live process that is not responding
-      #                 for an unknown reason
-      # Solution: kill parent and children processes, remove .pid and restart
-	  `killall -9 lond`;
-	  unlink($londfile);
-	  $restartflag=1;
-      }
+      $restartflag=1;
       print $fh 
-	  "<h3>Deciding to clean up stale .pid file and restart lond</h3>";
+	  "<h3>Decided to clean up stale .pid file and restart lond</h3>";
    }
 } 
 if ($restartflag==1) {
    $errors++;
+	  print $fh '<br><font color="red">Killall lond: '.
+                    system('killall lond').' - ';
+          sleep 60;
+          print $fh unlink($londfile).' - '.system('killall -9 lond').
+                    '</font><br>';
    print $fh "<h3>lond not running, trying to start</h3>";
    system(
      "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
-   sleep 120;
+   sleep 10;
    if (-e $londfile) {
        print $fh "Seems like it started ...<p>";
        my $lfh=IO::File->new("$londfile");
@@ -429,7 +458,7 @@ if ($restartflag==1) {
           print $fh "Give it one more try ...<p>";
 	  system(
  "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
-          sleep 120;
+          sleep 10;
        }
    } else {
        print $fh "Seems like that did not work!<p>";
@@ -466,9 +495,10 @@ if ($size>40000) {
 # ------------------------------------------------------------------------ lonc
 
 print $fh '<hr><a name="lonc"><h2>lonc</h2><h3>Log</h3><pre>';
+print "lonc\n";
 
 if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
-open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
+open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lonc.log|");
 while ($line=<DFH>) { 
    print $fh "$line";
    if ($line=~/INFO/) { $notices++; }
@@ -493,31 +523,23 @@ if (-e $loncfile) {
    } else {
       $errors++;
       print $fh "<h3>lonc at pid $loncpid not responding</h3>";
-      # Intelligently handle this.
-      # Possibility #1: there is no process
-      # Solution: remove .pid file and restart
-      if (getpgrp($loncpid)==-1) {
-	  unlink($loncfile);
-	  $restartflag=1;
-      }
-      else {
-      # Possibility #2: there is a live process that is not responding
-      #                 for an unknown reason
       # Solution: kill parent and children processes, remove .pid and restart
-	  `killall -9 lonc`;
-	  unlink($loncfile);
 	  $restartflag=1;
-      }
       print $fh 
-	  "<h3>Deciding to clean up stale .pid file and restart lonc</h3>";
+	  "<h3>Decided to clean up stale .pid file and restart lonc</h3>";
    }
 } 
 if ($restartflag==1) {
    $errors++;
+	  print $fh '<br><font color="red">Killall lonc: '.
+	            system('killall lonc').' - ';
+          sleep 60;
+          print $fh unlink($loncfile).' - '.system('killall -9 lonc').
+                    '</font><br>';
    print $fh "<h3>lonc not running, trying to start</h3>";
 	system(
- "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
-   sleep 120;
+ "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
+   sleep 10;
    if (-e $loncfile) {
        print $fh "Seems like it started ...<p>";
        my $lfh=IO::File->new("$loncfile");
@@ -531,8 +553,8 @@ if ($restartflag==1) {
           print $fh "<h3>lonc at pid $loncpid not responding</h3>";
           print $fh "Give it one more try ...<p>";
  	  system(
- "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
-          sleep 120;
+ "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
+          sleep 10;
        }
    } else {
        print $fh "Seems like that did not work!<p>";
@@ -570,6 +592,7 @@ if ($size>40000) {
 # ---------------------------------------------------------------------- lonnet
 
 print $fh '<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>';
+print "lonnet\n";
 if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
 open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
 while ($line=<DFH>) { 
@@ -627,6 +650,7 @@ print $fh "</table>";
 # ------------------------------------------------------------ Delayed messages
 
 print $fh '<hr><a name="delayed"><h2>Delayed Messages</h2>';
+print "buffers\n";
 
 print $fh '<h3>Scanning Permanent Log</h3>';
 
@@ -663,12 +687,13 @@ print $fh "<h1>Total Error Count: $total
 $now=time;
 $date=localtime($now);
 print $fh "<hr>$date ($now)</body></html>\n";
-
+print "writing done\n";
 }
 
 rename ("$statusdir/newstatus.html","$statusdir/index.html");
 
 if ($totalcount>200) {
+   print "mailing\n";
    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
    $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; 
    system(