--- loncom/loncron	2000/02/08 17:34:24	1.3
+++ loncom/loncron	2001/08/27 13:54:50	1.22
@@ -9,10 +9,20 @@
 #
 # 7/14,7/15,7/19,7/21,7/22,11/18,
 # 2/8 Gerd Kortemeyer
+# 12/6/2000,12/8 Scott Harrison
+# 12/23 Gerd Kortemeyer
+# YEAR=2001
+# 1/10/2001, 2/12/, 2/26, 3/15, 04/11, 04/21,8/27 Scott Harrison
 
 use IO::File;
 use IO::Socket;
 
+my $qflag=0;
+if (@ARGV) {
+    my $arg=shift @ARGV;
+    $qflag=1 if $arg eq 'quick';
+}
+
 # -------------------------------------------------- Non-critical communication
 sub reply {
     my ($cmd,$server)=@_;
@@ -43,7 +53,6 @@ ENDERROUT
 
 # ================================================================ Main Program
 
-
 # ------------------------------------------------------------ Read access.conf
 {
     my $config=IO::File->new("/etc/httpd/conf/access.conf");
@@ -54,6 +63,33 @@ ENDERROUT
            $perlvar{$varname}=$varvalue;
         }
     }
+    delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
+    delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
+}
+
+# --------------------------------------- Make sure that LON-CAPA is configured
+# I only test for one thing here (lonHostID).  This is just a safeguard.
+if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
+   print("Unconfigured machine.\n");
+   $emailto=$perlvar{'lonSysEMail'};
+   $hostname=`/bin/hostname`;
+   chop $hostname;
+   $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
+   $subj="LON: Unconfigured machine $hostname";
+   system("echo 'Unconfigured machine $hostname.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+    exit 1;
+}
+
+# ----------------------------- Make sure this process is running from user=www
+my $wwwid=getpwnam('www');
+if ($wwwid!=$<) {
+   print("User ID mismatch.  This program must be run as user 'www'\n");
+   $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+   $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+   system("echo 'User ID mismatch.  loncron must be run as user www.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+   exit 1;
 }
 
 # ------------------------------------------------------------- Read hosts file
@@ -110,7 +146,10 @@ print $fh (<<ENDHEADERS);
 <ol>
 <li><a href="#configuration">Configuration</a>
 <li><a href="#machine">Machine Information</a>
+<li><a href="#tmp">Temporary Files</a>
+<li><a href="#tokens">Session Tokens</a>
 <li><a href="#httpd">httpd</a>
+<li><a href="#lonsql">lonsql</a>
 <li><a href="#lond">lond</a>
 <li><a href="#lonc">lonc</a>
 <li><a href="#lonnet">lonnet</a>
@@ -153,7 +192,7 @@ close (LOADAVGH);
 print $fh "<tt>$loadavg</tt>";
 
 @parts=split(/\s+/,$loadavg);
-if ($parts[1]>3.0) {
+if ($parts[1]>4.0) {
     $errors++;
 } elsif ($parts[1]>2.0) {
     $warnings++;
@@ -171,17 +210,59 @@ while ($line=<DFH>) {
    $usage=$parts[4];
    $usage=~s/\W//g;
    if ($usage>90) { 
-      $errors++; 
+      $warnings++; 
    } elsif ($usage>80) {
       $warnings++;
    } elsif ($usage>60) {
       $notices++;
    }
-   if ($usage>95) { $errors++; }
+   if ($usage>95) { $warnings++; $warnings++ }
 }
 close (DFH);
 print $fh "</pre>";
 &errout($fh);
+
+# --------------------------------------------------------------- clean out tmp
+print $fh '<hr><a name="tmp"><h2>Temporary Files</h2>';
+$cleaned=0;
+while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
+                          my ($dev,$ino,$mode,$nlink,
+                              $uid,$gid,$rdev,$size,
+                              $atime,$mtime,$ctime,
+                              $blksize,$blocks)=stat($fname);
+                          $now=time;
+                          $since=$now-$mtime;
+                          if ($since>$perlvar{'lonExpire'}) {
+                              $cleaned++;
+                              unlink("$fname");
+                          }
+    
+}
+print $fh "Cleaned up ".$cleaned." files.";
+
+# ------------------------------------------------------------ clean out lonIDs
+print $fh '<hr><a name="tokens"><h2>Session Tokens</h2>';
+$cleaned=0;
+$active=0;
+while ($fname=<$perlvar{'lonIDsDir'}/*>) {
+                          my ($dev,$ino,$mode,$nlink,
+                              $uid,$gid,$rdev,$size,
+                              $atime,$mtime,$ctime,
+                              $blksize,$blocks)=stat($fname);
+                          $now=time;
+                          $since=$now-$mtime;
+                          if ($since>$perlvar{'lonExpire'}) {
+                              $cleaned++;
+                              print $fh "Unlinking $fname<br>";
+                              unlink("$fname");
+                          } else {
+                              $active++;
+                          }
+    
+}
+print $fh "<p>Cleaned up ".$cleaned." stale session token(s).";
+print $fh "<h3>$active open session(s)</h3>";
+
 # ----------------------------------------------------------------------- httpd
 
 print $fh '<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>';
@@ -200,6 +281,114 @@ while ($line=<DFH>) {
 close (DFH);
 print $fh "</pre>";
 &errout($fh);
+
+
+# ---------------------------------------------------------------------- lonsql
+
+my $restartflag=1;
+if ($perlvar{'lonRole'} eq "library") {
+
+    print $fh '<hr><a name="lonsql"><h2>lonsql</h2><h3>Log</h3><pre>';
+    
+    if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
+	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
+	while ($line=<DFH>) { 
+	    print $fh "$line";
+	    if ($line=~/INFO/) { $notices++; }
+	    if ($line=~/WARNING/) { $notices++; }
+	    if ($line=~/CRITICAL/) { $warnings++; }
+	};
+	close (DFH);
+    }
+    print $fh "</pre>";
+    
+    my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
+    
+    if (-e $lonsqlfile) {
+	my $lfh=IO::File->new("$lonsqlfile");
+	my $lonsqlpid=<$lfh>;
+	chomp($lonsqlpid);
+	if (kill 0 => $lonsqlpid) {
+	    print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
+	    $restartflag=0;
+	} else {
+	    $errors++; $errors++;
+	    print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
+	    # Intelligently handle this.
+	    # Possibility #1: there is no process
+	    # Solution: remove .pid file and restart
+	    if (getpgrp($lonsqlpid)==-1) {
+		unlink($lonsqlfile);
+		$restartflag=1;
+	    }
+	    else {
+		# Possibility #2: there is a live process that is not
+		# responding for an unknown reason
+		# Solution: kill parent and children processes, remove .pid
+		# and restart
+		`killall -9 lonsql`;
+		unlink($lonsqlfile);
+		$restartflag=1;
+	    }
+	}
+	print $fh 
+	    "<h3>Deciding to clean up stale .pid file and restart lonsql</h3>";
+    }
+    if ($restartflag==1) {
+	$errors++;
+	print $fh "<h3>lonsql not running, trying to start</h3>";
+	system(
+ "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
+	sleep 120 unless $qflag;
+	if (-e $lonsqlfile) {
+	    print $fh "Seems like it started ...<p>";
+	    my $lfh=IO::File->new("$lonsqlfile");
+	    my $lonsqlpid=<$lfh>;
+	    chomp($lonsqlpid);
+	    sleep 30 unless $qflag;
+	    if (kill 0 => $lonsqlpid) {
+		print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
+	    } else {
+		$errors++; $errors++;
+		print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
+		print $fh "Give it one more try ...<p>";
+		system(
+ "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
+		sleep 120 unless $qflag;
+	    }
+	} else {
+	    print $fh "Seems like that did not work!<p>";
+	    $errors++;
+	}
+	if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
+	    print $fh "<p><pre>";
+	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
+	    while ($line=<DFH>) { 
+		print $fh "$line";
+		if ($line=~/WARNING/) { $notices++; }
+		if ($line=~/CRITICAL/) { $notices++; }
+	    };
+	    close (DFH);
+	    print $fh "</pre>";
+	}
+    }
+
+    $fname="$perlvar{'lonDaemons'}/logs/lonsql.log";
+
+    my ($dev,$ino,$mode,$nlink,
+	$uid,$gid,$rdev,$size,
+	$atime,$mtime,$ctime,
+	$blksize,$blocks)=stat($fname);
+
+    if ($size>40000) {
+	print $fh "Rotating logs ...<p>";
+	rename("$fname.2","$fname.3");
+	rename("$fname.1","$fname.2");
+	rename("$fname","$fname.1");
+    }
+
+    &errout($fh);
+}
 # ------------------------------------------------------------------------ lond
 
 print $fh '<hr><a name="lond"><h2>lond</h2><h3>Log</h3><pre>';
@@ -209,8 +398,8 @@ open (DFH,"tail -n100 $perlvar{'lonDaemo
 while ($line=<DFH>) { 
    print $fh "$line";
    if ($line=~/INFO/) { $notices++; }
-   if ($line=~/WARNING/) { $warnings++; }
-   if ($line=~/CRITICAL/) { $errors++; }
+   if ($line=~/WARNING/) { $notices++; }
+   if ($line=~/CRITICAL/) { $warnings++; }
 };
 close (DFH);
 }
@@ -218,35 +407,57 @@ print $fh "</pre>";
 
 my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid";
 
-if (-e $londfile) {
+$restartflag=1;
+if (-e $londfile) {    
    my $lfh=IO::File->new("$londfile");
    my $londpid=<$lfh>;
    chomp($londpid);
    if (kill 0 => $londpid) {
       print $fh "<h3>lond at pid $londpid responding</h3>";
+      $restartflag=0;
    } else {
-      $errors++; $errors++;
+      $errors++;
       print $fh "<h3>lond at pid $londpid not responding</h3>";
+      # Intelligently handle this.
+      # Possibility #1: there is no process
+      # Solution: remove .pid file and restart
+      if (getpgrp($londpid)==-1) {
+	  unlink($londfile);
+	  $restartflag=1;
+      }
+      else {
+      # Possibility #2: there is a live process that is not responding
+      #                 for an unknown reason
+      # Solution: kill parent and children processes, remove .pid and restart
+	  `killall -9 lond`;
+	  unlink($londfile);
+	  $restartflag=1;
+      }
+      print $fh 
+	  "<h3>Deciding to clean up stale .pid file and restart lond</h3>";
    }
-} else {
+} 
+if ($restartflag==1) {
    $errors++;
    print $fh "<h3>lond not running, trying to start</h3>";
-   system("$perlvar{'lonDaemons'}/lond");
-   sleep 120;
+   system(
+     "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
+   sleep 120 unless $qflag;
    if (-e $londfile) {
        print $fh "Seems like it started ...<p>";
        my $lfh=IO::File->new("$londfile");
        my $londpid=<$lfh>;
        chomp($londpid);
-       sleep 30;
+       sleep 30 unless $qflag;
        if (kill 0 => $londpid) {
           print $fh "<h3>lond at pid $londpid responding</h3>";
        } else {
           $errors++; $errors++;
           print $fh "<h3>lond at pid $londpid not responding</h3>";
           print $fh "Give it one more try ...<p>";
-          system("$perlvar{'lonDaemons'}/lond");
-          sleep 120;
+	  system(
+ "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
+          sleep 120 unless $qflag;
        }
    } else {
        print $fh "Seems like that did not work!<p>";
@@ -257,9 +468,8 @@ if (-e $londfile) {
     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
     while ($line=<DFH>) { 
       print $fh "$line";
-      if ($line=~/INFO/) { $notices++; }
-      if ($line=~/WARNING/) { $warnings++; }
-      if ($line=~/CRITICAL/) { $errors++; }
+      if ($line=~/WARNING/) { $notices++; }
+      if ($line=~/CRITICAL/) { $notices++; }
     };
     close (DFH);
     print $fh "</pre>";
@@ -290,8 +500,8 @@ open (DFH,"tail -n100 $perlvar{'lonDaemo
 while ($line=<DFH>) { 
    print $fh "$line";
    if ($line=~/INFO/) { $notices++; }
-   if ($line=~/WARNING/) { $warnings++; }
-   if ($line=~/CRITICAL/) { $errors++; }
+   if ($line=~/WARNING/) { $notices++; }
+   if ($line=~/CRITICAL/) { $warnings++; }
 };
 close (DFH);
 }
@@ -299,6 +509,7 @@ print $fh "</pre>";
 
 my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
 
+$restartflag=1;
 if (-e $loncfile) {
    my $lfh=IO::File->new("$loncfile");
    my $loncpid=<$lfh>;
@@ -306,29 +517,50 @@ if (-e $loncfile) {
    if (kill 0 => $loncpid) {
       print $fh "<h3>lonc at pid $loncpid responding, sending USR1</h3>";
       kill USR1 => $loncpid;
+      $restartflag=0;
    } else {
-      $errors++; $errors++;
+      $errors++;
       print $fh "<h3>lonc at pid $loncpid not responding</h3>";
+      # Intelligently handle this.
+      # Possibility #1: there is no process
+      # Solution: remove .pid file and restart
+      if (getpgrp($loncpid)==-1) {
+	  unlink($loncfile);
+	  $restartflag=1;
+      }
+      else {
+      # Possibility #2: there is a live process that is not responding
+      #                 for an unknown reason
+      # Solution: kill parent and children processes, remove .pid and restart
+	  `killall -9 lonc`;
+	  unlink($loncfile);
+	  $restartflag=1;
+      }
+      print $fh 
+	  "<h3>Deciding to clean up stale .pid file and restart lonc</h3>";
    }
-} else {
+} 
+if ($restartflag==1) {
    $errors++;
    print $fh "<h3>lonc not running, trying to start</h3>";
-   system("$perlvar{'lonDaemons'}/lonc");
-   sleep 120;
+	system(
+ "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
+   sleep 120 unless $qflag;
    if (-e $loncfile) {
        print $fh "Seems like it started ...<p>";
        my $lfh=IO::File->new("$loncfile");
        my $loncpid=<$lfh>;
        chomp($loncpid);
-       sleep 30;
+       sleep 30 unless $qflag;
        if (kill 0 => $loncpid) {
           print $fh "<h3>lonc at pid $loncpid responding</h3>";
        } else {
           $errors++; $errors++;
           print $fh "<h3>lonc at pid $loncpid not responding</h3>";
           print $fh "Give it one more try ...<p>";
-          system("$perlvar{'lonDaemons'}/lonc");
-          sleep 120;
+ 	  system(
+ "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
+          sleep 120 unless $qflag;
        }
    } else {
        print $fh "Seems like that did not work!<p>";
@@ -339,9 +571,8 @@ if (-e $loncfile) {
     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
     while ($line=<DFH>) { 
       print $fh "$line";
-      if ($line=~/INFO/) { $notices++; }
-      if ($line=~/WARNING/) { $warnings++; }
-      if ($line=~/CRITICAL/) { $errors++; }
+      if ($line=~/WARNING/) { $notices++; }
+      if ($line=~/CRITICAL/) { $notices++; }
     };
     close (DFH);
     print $fh "</pre>";
@@ -374,7 +605,7 @@ while ($line=<DFH>) {
 };
 close (DFH);
 }
-print $fh "</pre><h3>Perm Log</h3>";
+print $fh "</pre><h3>Perm Log</h3><pre>";
 
 if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
     open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
@@ -469,7 +700,8 @@ if ($totalcount>200) {
    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
    $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; 
    system(
- "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
+ "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html")
+    unless $qflag;
 }
 1;