--- loncom/loncron	1999/10/13 17:48:51	1.1
+++ loncom/loncron	2001/04/24 15:55:13	1.21
@@ -7,11 +7,21 @@
 # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30,
 # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer)
 #
-# 7/14,7/15,7/19,7/21,7/22 Gerd Kortemeyer
+# 7/14,7/15,7/19,7/21,7/22,11/18,
+# 2/8 Gerd Kortemeyer
+# 12/6/2000,12/8 Scott Harrison
+# 12/23 Gerd Kortemeyer
+# 1/10/2001, 2/12/, 2/26, 3/15, 04/11, 04/21 Scott Harrison
 
 use IO::File;
 use IO::Socket;
 
+my $qflag=0;
+if (@ARGV) {
+    my $arg=shift @ARGV;
+    $qflag=1 if $arg eq 'quick';
+}
+
 # -------------------------------------------------- Non-critical communication
 sub reply {
     my ($cmd,$server)=@_;
@@ -40,46 +50,8 @@ sub errout {
 ENDERROUT
 }
 
-# -------------------------------------------------------------- Permanent logs
-sub logperm {
-    my $message=shift;
-    my $execdir=$perlvar{'lonDaemons'};
-    my $now=time;
-    my $local=localtime($now);
-    my $fh=Apache::File->new(">>$execdir/logs/lonnet.perm.log");
-    print $fh "$now:$message:$local\n";
-    return 1;
-}
-
-# ------------------------------------------------ Try to send delayed messages
-sub senddelayed {
-    my $fh=shift;
-    my $dfname;
-    my $path="$perlvar{'lonSockDir'}/delayed";
-    print $fh "<h3>Attempting to send delayed messages</h3>";
-    while ($dfname=<$path/*>) {
-        my $wcmd;
-        {
-         my $dfh=IO::File->new($dfname);
-         $wcmd=<$dfh>;
-        }
-        my ($server,$cmd)=split(/:/,$wcmd);
-        chomp($cmd);
-        my $answer=reply($cmd,$server);
-        if ($answer ne 'con_lost') {
-	    unlink("$dfname");
-            print $fh "Send $cmd to $server: $answer<br>\n";
-            &logperm("S:$server:$cmd");
-        } else {
-            print $fh "Failed to deliver $cmd to $server<br>\n";
-            $warnings++;
-        }        
-    }
-}
-
 # ================================================================ Main Program
 
-
 # ------------------------------------------------------------ Read access.conf
 {
     my $config=IO::File->new("/etc/httpd/conf/access.conf");
@@ -90,6 +62,33 @@ sub senddelayed {
            $perlvar{$varname}=$varvalue;
         }
     }
+    delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
+    delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
+}
+
+# --------------------------------------- Make sure that LON-CAPA is configured
+# I only test for one thing here (lonHostID).  This is just a safeguard.
+if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
+   print("Unconfigured machine.\n");
+   $emailto=$perlvar{'lonSysEMail'};
+   $hostname=`/bin/hostname`;
+   chop $hostname;
+   $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
+   $subj="LON: Unconfigured machine $hostname";
+   system("echo 'Unconfigured machine $hostname.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+    exit 1;
+}
+
+# ----------------------------- Make sure this process is running from user=www
+my $wwwid=getpwnam('www');
+if ($wwwid!=$<) {
+   print("User ID mismatch.  This program must be run as user 'www'\n");
+   $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+   $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+   system("echo 'User ID mismatch.  loncron must be run as user www.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+   exit 1;
 }
 
 # ------------------------------------------------------------- Read hosts file
@@ -139,14 +138,17 @@ print $fh (<<ENDHEADERS);
 <head>
 <title>LON Status Report $perlvar{'lonHostID'}</title>
 </head>
-<body bgcolor="#FFFFFF">
+<body bgcolor="#AAAAAA">
 <a name="top">
 <h1>LON Status Report $perlvar{'lonHostID'}</h1>
 <h2>$date ($now)</h2>
 <ol>
 <li><a href="#configuration">Configuration</a>
 <li><a href="#machine">Machine Information</a>
+<li><a href="#tmp">Temporary Files</a>
+<li><a href="#tokens">Session Tokens</a>
 <li><a href="#httpd">httpd</a>
+<li><a href="#lonsql">lonsql</a>
 <li><a href="#lond">lond</a>
 <li><a href="#lonc">lonc</a>
 <li><a href="#lonnet">lonnet</a>
@@ -189,7 +191,7 @@ close (LOADAVGH);
 print $fh "<tt>$loadavg</tt>";
 
 @parts=split(/\s+/,$loadavg);
-if ($parts[1]>3.0) {
+if ($parts[1]>4.0) {
     $errors++;
 } elsif ($parts[1]>2.0) {
     $warnings++;
@@ -207,17 +209,59 @@ while ($line=<DFH>) {
    $usage=$parts[4];
    $usage=~s/\W//g;
    if ($usage>90) { 
-      $errors++; 
+      $warnings++; 
    } elsif ($usage>80) {
       $warnings++;
    } elsif ($usage>60) {
       $notices++;
    }
-   if ($usage>95) { $errors++; }
+   if ($usage>95) { $warnings++; $warnings++ }
 }
 close (DFH);
 print $fh "</pre>";
 &errout($fh);
+
+# --------------------------------------------------------------- clean out tmp
+print $fh '<hr><a name="tmp"><h2>Temporary Files</h2>';
+$cleaned=0;
+while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
+                          my ($dev,$ino,$mode,$nlink,
+                              $uid,$gid,$rdev,$size,
+                              $atime,$mtime,$ctime,
+                              $blksize,$blocks)=stat($fname);
+                          $now=time;
+                          $since=$now-$mtime;
+                          if ($since>$perlvar{'lonExpire'}) {
+                              $cleaned++;
+                              unlink("$fname");
+                          }
+    
+}
+print $fh "Cleaned up ".$cleaned." files.";
+
+# ------------------------------------------------------------ clean out lonIDs
+print $fh '<hr><a name="tokens"><h2>Session Tokens</h2>';
+$cleaned=0;
+$active=0;
+while ($fname=<$perlvar{'lonIDsDir'}/*>) {
+                          my ($dev,$ino,$mode,$nlink,
+                              $uid,$gid,$rdev,$size,
+                              $atime,$mtime,$ctime,
+                              $blksize,$blocks)=stat($fname);
+                          $now=time;
+                          $since=$now-$mtime;
+                          if ($since>$perlvar{'lonExpire'}) {
+                              $cleaned++;
+                              print $fh "Unlinking $fname<br>";
+                              unlink("$fname");
+                          } else {
+                              $active++;
+                          }
+    
+}
+print $fh "<p>Cleaned up ".$cleaned." stale session token(s).";
+print $fh "<h3>$active open session(s)</h3>";
+
 # ----------------------------------------------------------------------- httpd
 
 print $fh '<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>';
@@ -236,15 +280,106 @@ while ($line=<DFH>) {
 close (DFH);
 print $fh "</pre>";
 &errout($fh);
+
+
+# ---------------------------------------------------------------------- lonsql
+#
+# Do not run for now
+#
+if ($perlvar{'lonRole'} eq "library") {
+
+    print $fh '<hr><a name="lonsql"><h2>lonsql</h2><h3>Log</h3><pre>';
+    
+    if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
+	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
+	while ($line=<DFH>) { 
+	    print $fh "$line";
+	    if ($line=~/INFO/) { $notices++; }
+	    if ($line=~/WARNING/) { $notices++; }
+	    if ($line=~/CRITICAL/) { $warnings++; }
+	};
+	close (DFH);
+    }
+    print $fh "</pre>";
+    
+    my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
+    
+    if (-e $lonsqlfile) {
+	my $lfh=IO::File->new("$lonsqlfile");
+	my $lonsqlpid=<$lfh>;
+	chomp($lonsqlpid);
+	if (kill 0 => $lonsqlpid) {
+	    print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
+	} else {
+	    $errors++; $errors++;
+	    print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
+	}
+    } else {
+	$errors++;
+	print $fh "<h3>lonsql not running, trying to start</h3>";
+	system(
+ "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
+	sleep 120 unless $qflag;
+	if (-e $lonsqlfile) {
+	    print $fh "Seems like it started ...<p>";
+	    my $lfh=IO::File->new("$lonsqlfile");
+	    my $lonsqlpid=<$lfh>;
+	    chomp($lonsqlpid);
+	    sleep 30 unless $qflag;
+	    if (kill 0 => $lonsqlpid) {
+		print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
+	    } else {
+		$errors++; $errors++;
+		print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
+		print $fh "Give it one more try ...<p>";
+		system(
+ "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
+		sleep 120 unless $qflag;
+	    }
+	} else {
+	    print $fh "Seems like that did not work!<p>";
+	    $errors++;
+	}
+	if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
+	    print $fh "<p><pre>";
+	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
+	    while ($line=<DFH>) { 
+		print $fh "$line";
+		if ($line=~/WARNING/) { $notices++; }
+		if ($line=~/CRITICAL/) { $notices++; }
+	    };
+	    close (DFH);
+	    print $fh "</pre>";
+	}
+    }
+
+    $fname="$perlvar{'lonDaemons'}/logs/lonsql.log";
+
+    my ($dev,$ino,$mode,$nlink,
+	$uid,$gid,$rdev,$size,
+	$atime,$mtime,$ctime,
+	$blksize,$blocks)=stat($fname);
+
+    if ($size>40000) {
+	print $fh "Rotating logs ...<p>";
+	rename("$fname.2","$fname.3");
+	rename("$fname.1","$fname.2");
+	rename("$fname","$fname.1");
+    }
+
+    &errout($fh);
+}
 # ------------------------------------------------------------------------ lond
 
 print $fh '<hr><a name="lond"><h2>lond</h2><h3>Log</h3><pre>';
 
 if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
-open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lond.log|");
+open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
 while ($line=<DFH>) { 
    print $fh "$line";
-   if ($line=~/giving up/) { $notices++; }
+   if ($line=~/INFO/) { $notices++; }
+   if ($line=~/WARNING/) { $notices++; }
+   if ($line=~/CRITICAL/) { $warnings++; }
 };
 close (DFH);
 }
@@ -252,40 +387,73 @@ print $fh "</pre>";
 
 my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid";
 
-if (-e $londfile) {
+my $restartflag=1;
+if (-e $londfile) {    
    my $lfh=IO::File->new("$londfile");
    my $londpid=<$lfh>;
    chomp($londpid);
    if (kill 0 => $londpid) {
       print $fh "<h3>lond at pid $londpid responding</h3>";
+      $restartflag=0;
    } else {
-      $errors++; $errors++;
+      $errors++;
       print $fh "<h3>lond at pid $londpid not responding</h3>";
+      # Intelligently handle this.
+      # Possibility #1: there is no process
+      # Solution: remove .pid file and restart
+      if (getpgrp($londpid)==-1) {
+	  unlink($londfile);
+	  $restartflag=1;
+      }
+      else {
+      # Possibility #2: there is a live process that is not responding
+      #                 for an unknown reason
+      # Solution: kill parent and children processes, remove .pid and restart
+	  `killall -9 lond`;
+	  unlink($londfile);
+	  $restartflag=1;
+      }
+      print $fh 
+	  "<h3>Deciding to clean up stale .pid file and restart lond</h3>";
    }
-} else {
+} 
+if ($restartflag==1) {
    $errors++;
    print $fh "<h3>lond not running, trying to start</h3>";
-   system("$perlvar{'lonDaemons'}/lond");
-   sleep 120;
+   system(
+     "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
+   sleep 120 unless $qflag;
    if (-e $londfile) {
        print $fh "Seems like it started ...<p>";
        my $lfh=IO::File->new("$londfile");
        my $londpid=<$lfh>;
        chomp($londpid);
-       sleep 30;
+       sleep 30 unless $qflag;
        if (kill 0 => $londpid) {
           print $fh "<h3>lond at pid $londpid responding</h3>";
        } else {
           $errors++; $errors++;
           print $fh "<h3>lond at pid $londpid not responding</h3>";
           print $fh "Give it one more try ...<p>";
-          system("$perlvar{'lonDaemons'}/lond");
-          sleep 120;
+	  system(
+ "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
+          sleep 120 unless $qflag;
        }
    } else {
        print $fh "Seems like that did not work!<p>";
        $errors++;
    }
+   if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
+    print $fh "<p><pre>";
+    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
+    while ($line=<DFH>) { 
+      print $fh "$line";
+      if ($line=~/WARNING/) { $notices++; }
+      if ($line=~/CRITICAL/) { $notices++; }
+    };
+    close (DFH);
+    print $fh "</pre>";
+   }
 }
 
 $fname="$perlvar{'lonDaemons'}/logs/lond.log";
@@ -308,10 +476,12 @@ if ($size>40000) {
 print $fh '<hr><a name="lonc"><h2>lonc</h2><h3>Log</h3><pre>';
 
 if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
-open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonc.log|");
+open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
 while ($line=<DFH>) { 
    print $fh "$line";
-   if ($line=~/died/) { $notices++; }
+   if ($line=~/INFO/) { $notices++; }
+   if ($line=~/WARNING/) { $notices++; }
+   if ($line=~/CRITICAL/) { $warnings++; }
 };
 close (DFH);
 }
@@ -319,6 +489,7 @@ print $fh "</pre>";
 
 my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
 
+$restartflag=1;
 if (-e $loncfile) {
    my $lfh=IO::File->new("$loncfile");
    my $loncpid=<$lfh>;
@@ -326,34 +497,66 @@ if (-e $loncfile) {
    if (kill 0 => $loncpid) {
       print $fh "<h3>lonc at pid $loncpid responding, sending USR1</h3>";
       kill USR1 => $loncpid;
+      $restartflag=0;
    } else {
-      $errors++; $errors++;
+      $errors++;
       print $fh "<h3>lonc at pid $loncpid not responding</h3>";
+      # Intelligently handle this.
+      # Possibility #1: there is no process
+      # Solution: remove .pid file and restart
+      if (getpgrp($loncpid)==-1) {
+	  unlink($loncfile);
+	  $restartflag=1;
+      }
+      else {
+      # Possibility #2: there is a live process that is not responding
+      #                 for an unknown reason
+      # Solution: kill parent and children processes, remove .pid and restart
+	  `killall -9 lonc`;
+	  unlink($loncfile);
+	  $restartflag=1;
+      }
+      print $fh 
+	  "<h3>Deciding to clean up stale .pid file and restart lonc</h3>";
    }
-} else {
+} 
+if ($restartflag==1) {
    $errors++;
    print $fh "<h3>lonc not running, trying to start</h3>";
-   system("$perlvar{'lonDaemons'}/lonc");
-   sleep 120;
+	system(
+ "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
+   sleep 120 unless $qflag;
    if (-e $loncfile) {
        print $fh "Seems like it started ...<p>";
        my $lfh=IO::File->new("$loncfile");
        my $loncpid=<$lfh>;
        chomp($loncpid);
-       sleep 30;
+       sleep 30 unless $qflag;
        if (kill 0 => $loncpid) {
           print $fh "<h3>lonc at pid $loncpid responding</h3>";
        } else {
           $errors++; $errors++;
           print $fh "<h3>lonc at pid $loncpid not responding</h3>";
           print $fh "Give it one more try ...<p>";
-          system("$perlvar{'lonDaemons'}/lonc");
-          sleep 120;
+ 	  system(
+ "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
+          sleep 120 unless $qflag;
        }
    } else {
        print $fh "Seems like that did not work!<p>";
        $errors++;
    }
+   if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") {
+    print $fh "<p><pre>";
+    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
+    while ($line=<DFH>) { 
+      print $fh "$line";
+      if ($line=~/WARNING/) { $notices++; }
+      if ($line=~/CRITICAL/) { $notices++; }
+    };
+    close (DFH);
+    print $fh "</pre>";
+   }
 }
 
 $fname="$perlvar{'lonDaemons'}/logs/lonc.log";
@@ -379,13 +582,10 @@ if (-e "$perlvar{'lonDaemons'}/logs/lonn
 open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
 while ($line=<DFH>) { 
     print $fh "$line";
-    if ($line=~/Delayed/) { $warnings++; }
-    if ($line=~/giving up/) { $warnings++; }
-    if ($line=~/FAILED/) { $errors++; }
 };
 close (DFH);
 }
-print $fh "</pre><h3>Perm Log</h3>";
+print $fh "</pre><h3>Perm Log</h3><pre>";
 
 if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
     open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
@@ -436,8 +636,6 @@ print $fh "</table>";
 
 print $fh '<hr><a name="delayed"><h2>Delayed Messages</h2>';
 
-&senddelayed($fh);
-
 print $fh '<h3>Scanning Permanent Log</h3>';
 
 $unsend=0;
@@ -482,7 +680,8 @@ if ($totalcount>200) {
    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
    $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; 
    system(
- "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
+ "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html")
+    unless $qflag;
 }
 1;