--- loncom/loncron 2000/12/23 16:56:46 1.11 +++ loncom/loncron 2003/09/11 19:49:59 1.43 @@ -9,8 +9,14 @@ # # 7/14,7/15,7/19,7/21,7/22,11/18, # 2/8 Gerd Kortemeyer -# Dec 00 Scott Harrison # 12/23 Gerd Kortemeyer +# YEAR=2001 +# 09/04,09/06,11/26 Gerd Kortemeyer + +$|=1; + +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration; use IO::File; use IO::Socket; @@ -43,46 +49,194 @@ sub errout { ENDERROUT } -# ================================================================ Main Program +sub start_daemon { + my ($fh,$daemon,$pidfile) = @_; + system("$perlvar{'lonDaemons'}/$daemon 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); + sleep 2; + if (-e $pidfile) { + print $fh "Seems like it started ...
"; + my $lfh=IO::File->new("$pidfile"); + my $daemonpid=<$lfh>; + chomp($daemonpid); + sleep 2; + if (kill 0 => $daemonpid) { + return 1; + } else { + return 0; + } + } + print $fh "Seems like that did not work!
"; + $errors++; + return 0; +} +sub checkon_daemon { + my ($fh,$daemon,$maxsize,$sendusr1)=@_; -# ------------------------------------------------------------ Read access.conf -{ - my $config=IO::File->new("/etc/httpd/conf/access.conf"); + print $fh '
'; + printf("%-10s ",$daemon); + if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ + open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|"); + while ($line="; + + my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid"; + + my $restartflag=1; + + if (-e $pidfile) { + my $lfh=IO::File->new("$pidfile"); + my $daemonpid=<$lfh>; + chomp($daemonpid); + if (kill 0 => $daemonpid) { + print $fh ") { + print $fh "$line"; + if ($line=~/INFO/) { $notices++; } + if ($line=~/WARNING/) { $notices++; } + if ($line=~/CRITICAL/) { $warnings++; } + }; + close (DFH); + } + print $fh "
"; + print " "; + if (&start_daemon($fh,$daemon,$pidfile)) { + print $fh "
"; + } + } - while (my $configline=<$config>) { - if ($configline =~ /PerlSetVar/) { - my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); - $perlvar{$varname}=$varvalue; - } + if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ + print $fh "
"; + open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|"); + while ($line="; + } } + + $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; + + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + + if ($size>$maxsize) { + print $fh "Rotating logs ...) { + print $fh "$line"; + if ($line=~/WARNING/) { $notices++; } + if ($line=~/CRITICAL/) { $notices++; } + }; + close (DFH); + print $fh "
";
+ rename("$fname.2","$fname.3");
+ rename("$fname.1","$fname.2");
+ rename("$fname","$fname.1");
+ }
+
+ &errout($fh);
+}
+# ================================================================ Main Program
+
+# --------------------------------- Read loncapa_apache.conf and loncapa.conf
+my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
+%perlvar=%{$perlvarref};
+undef $perlvarref;
+delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
+delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
+
+# --------------------------------------- Make sure that LON-CAPA is configured
+# I only test for one thing here (lonHostID). This is just a safeguard.
+if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
+ print("Unconfigured machine.\n");
+ $emailto=$perlvar{'lonSysEMail'};
+ $hostname=`/bin/hostname`;
+ chop $hostname;
+ $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
+ $subj="LON: Unconfigured machine $hostname";
+ system("echo 'Unconfigured machine $hostname.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+ exit 1;
+}
+
+# ----------------------------- Make sure this process is running from user=www
+my $wwwid=getpwnam('www');
+if ($wwwid!=$<) {
+ print("User ID mismatch. This program must be run as user 'www'\n");
+ $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+ $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+ system("echo 'User ID mismatch. loncron must be run as user www.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+ exit 1;
}
# ------------------------------------------------------------- Read hosts file
{
my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
-
+
while (my $configline=<$config>) {
- my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
- $hostname{$id}=$name;
- $hostdom{$id}=$domain;
- $hostrole{$id}=$role;
- $hostip{$id}=$ip;
- if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
- $libserv{$id}=$name;
- }
+ my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline);
+ if ($id && $domain && $role && $name && $ip) {
+ $hostname{$id}=$name;
+ $hostdom{$id}=$domain;
+ $hostip{$id}=$ip;
+ $hostrole{$id}=$role;
+ if ($domdescr) { $domaindescription{$domain}=$domdescr; }
+ if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
+ $libserv{$id}=$name;
+ }
+ } else {
+ if ($configline) {
+# &logthis("Skipping hosts.tab line -$configline-");
+ }
+ }
}
}
# ------------------------------------------------------ Read spare server file
{
my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
-
+
while (my $configline=<$config>) {
- chomp($configline);
- if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
- $spareid{$configline}=1;
- }
+ chomp($configline);
+ if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
+ $spareid{$configline}=1;
+ }
}
}
@@ -98,9 +252,10 @@ $now=time;
$date=localtime($now);
{
-my $fh=IO::File->new(">$statusdir/newstatus.html");
-
-print $fh (< Cleaned up ".$cleaned." stale session token(s).";
-print $fh " Cleaned up ".$cleaned." stale session token(s).";
+ print $fh " ";
- my $lfh=IO::File->new("$lonsqlfile");
- my $lonsqlpid=<$lfh>;
- chomp($lonsqlpid);
- sleep 30;
- if (kill 0 => $lonsqlpid) {
- print $fh " ";
- system("$perlvar{'lonDaemons'}/lonsql");
- sleep 120;
- }
- } else {
- print $fh "Seems like that did not work! ";
- $errors++;
- }
- if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
- print $fh " ";
- my $lfh=IO::File->new("$londfile");
- my $londpid=<$lfh>;
- chomp($londpid);
- sleep 30;
- if (kill 0 => $londpid) {
- print $fh " ";
- system("$perlvar{'lonDaemons'}/lond");
- sleep 120;
- }
- } else {
- print $fh "Seems like that did not work! ";
- $errors++;
- }
- if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
- print $fh " ";
- rename("$fname.2","$fname.3");
- rename("$fname.1","$fname.2");
- rename("$fname","$fname.1");
-}
+ print $fh ' \n";
+ $warnings=$warnings+5*$unsend;
-print $fh ' ";
- my $lfh=IO::File->new("$loncfile");
- my $loncpid=<$lfh>;
- chomp($loncpid);
- sleep 30;
- if (kill 0 => $loncpid) {
- print $fh " ";
- system("$perlvar{'lonDaemons'}/lonc");
- sleep 120;
- }
- } else {
- print $fh "Seems like that did not work! ";
- $errors++;
- }
- if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") {
- print $fh " ";
- rename("$fname.2","$fname.3");
- rename("$fname.1","$fname.2");
- rename("$fname","$fname.1");
-}
-
-
-&errout($fh);
-# ---------------------------------------------------------------------- lonnet
-
-print $fh ' ";
- rename("$fname.2","$fname.3");
- rename("$fname.1","$fname.2");
- rename("$fname","$fname.1");
-}
-
-print $fh " \n";
-$warnings=$warnings+5*$unsend;
-
-print $fh " \n";
-}
-print $fh "$varname $perlvar{$varname} Hosts
";
-foreach $id (keys %hostname) {
-print $fh
- "
\n";
-}
-print $fh "$id $hostdom{$id} $hostrole{$id} ";
-print $fh "$hostname{$id} $hostip{$id} Spare Hosts
";
-foreach $id (keys %spareid) {
- print $fh "
\n";
+ foreach $varname (sort(keys(%perlvar))) {
+ print $fh " \n";
+ }
+ print $fh "$varname $perlvar{$varname} Hosts
";
+ foreach $id (sort(keys(%hostname))) {
+ print $fh
+ "
\n";
+ }
+ print $fh "$id $hostdom{$id} $hostrole{$id} ";
+ print $fh "$hostname{$id} $hostip{$id} Spare Hosts
";
+ foreach $id (sort(keys(%spareid))) {
+ print $fh "
\n";
# --------------------------------------------------------------------- Machine
+
+ print $fh 'Machine Information
';
+ print $fh "loadavg
";
+
+ open (LOADAVGH,"/proc/loadavg");
+ $loadavg=Machine Information
';
-print $fh "loadavg
";
+ print $fh "df
";
+ print $fh "";
-open (LOADAVGH,"/proc/loadavg");
-$loadavg=
";
-print $fh "$loadavg";
-@parts=split(/\s+/,$loadavg);
-if ($parts[1]>4.0) {
- $errors++;
-} elsif ($parts[1]>2.0) {
- $warnings++;
-} elsif ($parts[1]>1.0) {
- $notices++;
-}
-
-print $fh "df
";
-print $fh "";
-
-open (DFH,"df|");
-while ($line=
";
-&errout($fh);
+ print $fh "ps
";
+ print $fh "";
+ $psproc=0;
+
+ open (PSH,"ps -aux|");
+ while ($line=
";
+
+ if ($psproc>200) { $notices++; }
+ if ($psproc>250) { $notices++; }
+
+ &errout($fh);
# --------------------------------------------------------------- clean out tmp
-print $fh 'Temporary Files
';
-$cleaned=0;
-while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
- my ($dev,$ino,$mode,$nlink,
- $uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,
- $blksize,$blocks)=stat($fname);
- $now=time;
- $since=$now-$mtime;
- if ($since>$perlvar{'lonExpire'}) {
- $cleaned++;
- unlink("$fname");
- }
+ print $fh 'Temporary Files
';
+ $cleaned=0;
+ $old=0;
+ while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
+ my ($dev,$ino,$mode,$nlink,
+ $uid,$gid,$rdev,$size,
+ $atime,$mtime,$ctime,
+ $blksize,$blocks)=stat($fname);
+ $now=time;
+ $since=$now-$mtime;
+ if ($since>$perlvar{'lonExpire'}) {
+ $line='';
+ if (open(PROBE,$fname)) {
+ $line=Session Tokens
';
-$cleaned=0;
-$active=0;
-while ($fname=<$perlvar{'lonIDsDir'}/*>) {
- my ($dev,$ino,$mode,$nlink,
- $uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,
- $blksize,$blocks)=stat($fname);
- $now=time;
- $since=$now-$mtime;
- if ($since>$perlvar{'lonExpire'}) {
- $cleaned++;
- print $fh "Unlinking $fname
";
- unlink("$fname");
- } else {
- $active++;
- }
-
-}
-print $fh "$active open session(s)
";
+ print $fh 'Session Tokens
';
+ $cleaned=0;
+ $active=0;
+ while ($fname=<$perlvar{'lonIDsDir'}/*>) {
+ my ($dev,$ino,$mode,$nlink,
+ $uid,$gid,$rdev,$size,
+ $atime,$mtime,$ctime,
+ $blksize,$blocks)=stat($fname);
+ $now=time;
+ $since=$now-$mtime;
+ if ($since>$perlvar{'lonExpire'}) {
+ $cleaned++;
+ print $fh "Unlinking $fname
";
+ unlink("$fname");
+ } else {
+ $active++;
+ }
+
+ }
+ print $fh "$active open session(s)
";
# ----------------------------------------------------------------------- httpd
-print $fh 'httpd
Access Log
';
+ print $fh '
httpd
Access Log
';
+
+ open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
+ while ($line=
Error Log
";
-
-open (DFH,"tail -n50 /etc/httpd/logs/error_log|");
-while ($line=
";
-&errout($fh);
+ print $fh "Error Log
";
+
+ open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
+ while ($line=
";
+ &errout($fh);
# ---------------------------------------------------------------------- lonsql
-#
-# Do not run for now
-#
-if ($perlvar{'lonRole'} eq "library" && 1==0) {
- print $fh 'lonsql
Log
';
-
- if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
+ &checkon_daemon($fh,'lonsql',200000);
+
+# ------------------------------------------------------------------------ lond
+
+ &checkon_daemon($fh,'lond',40000,1);
+
+# ------------------------------------------------------------------------ lonc
+
+ &checkon_daemon($fh,'lonc',40000,1);
+
+# -------------------------------------------------------------------- lonhttpd
+
+ &checkon_daemon($fh,'lonhttpd',40000);
+
+# ---------------------------------------------------------------------- lonnet
+
+ print $fh '
lonnet
Temp Log
';
+ print "checking logs\n";
+ if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
+ open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
while ($line=
";
-
- my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
+ print $fh "Perm Log
";
- if (-e $lonsqlfile) {
- my $lfh=IO::File->new("$lonsqlfile");
- my $lonsqlpid=<$lfh>;
- chomp($lonsqlpid);
- if (kill 0 => $lonsqlpid) {
- print $fh "
";
&errout($fh);
-}
-# ------------------------------------------------------------------------ lond
+# ----------------------------------------------------------------- Connections
-print $fh 'lonsql at pid $lonsqlpid responding
";
- } else {
- $errors++; $errors++;
- print $fh "lonsql at pid $lonsqlpid not responding
";
- }
- } else {
- $errors++;
- print $fh "lonsql not running, trying to start
";
- system("$perlvar{'lonDaemons'}/lonsql");
- sleep 120;
- if (-e $lonsqlfile) {
- print $fh "Seems like it started ...lonsql at pid $lonsqlpid responding
";
- } else {
- $errors++; $errors++;
- print $fh "lonsql at pid $lonsqlpid not responding
";
- print $fh "Give it one more try ...";
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
- while ($line=
";
- }
- }
+ if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
+ open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
+ while ($line=lond
Log
';
+ print $fh '
Connections
';
+ print "testing connections\n";
+ print $fh "";
+ foreach $tryserver (sort(keys(%hostname))) {
+ print(".");
+ $answer=reply("pong",$tryserver);
+ if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
+ $result="ok";
+ } else {
+ $result=$answer;
+ $warnings++;
+ if ($answer eq 'con_lost') { $warnings++; }
+ }
+ if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
+ print $fh "
";
-$fname="$perlvar{'lonDaemons'}/logs/lond.log";
+ &errout($fh);
+# ------------------------------------------------------------ Delayed messages
- my ($dev,$ino,$mode,$nlink,
- $uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,
- $blksize,$blocks)=stat($fname);
+ print $fh ' \n";
-if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
-open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
-while ($line=$tryserver $result lond at pid $londpid responding
";
- $restartflag=0;
- } else {
- $errors++;
- print $fh "lond at pid $londpid not responding
";
- # Intelligently handle this.
- # Possibility #1: there is no process
- # Solution: remove .pid file and restart
- if (getpgrp($londpid)==-1) {
- unlink($londfile);
- $restartflag=1;
- }
- else {
- # Possibility #2: there is a live process that is not responding
- # for an unknown reason
- # Solution: kill parent and children processes, remove .pid and restart
- `killall -9 lond`;
- unlink($londfile);
- $restartflag=1;
- }
- print $fh
- "Deciding to clean up stale .pid file and restart lond
";
- }
-}
-if ($restartflag==1) {
- $errors++;
- print $fh "lond not running, trying to start
";
- system("$perlvar{'lonDaemons'}/lond");
- sleep 120;
- if (-e $londfile) {
- print $fh "Seems like it started ...lond at pid $londpid responding
";
- } else {
- $errors++; $errors++;
- print $fh "lond at pid $londpid not responding
";
- print $fh "Give it one more try ...";
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
- while ($line=
";
- }
-}
+ }
+ print $fh "Delayed Messages
';
+ print "checking buffers\n";
-if ($size>40000) {
- print $fh "Rotating logs ...Scanning Permanent Log
';
-&errout($fh);
-# ------------------------------------------------------------------------ lonc
+ $unsend=0;
+ {
+ my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
+ while ($line=<$dfh>) {
+ ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
+ if ($sdf eq 'F') {
+ $local=localtime($time);
+ print $fh "Failed: $time, $dserv, $dcmd
";
+ $warnings++;
+ }
+ if ($sdf eq 'S') { $unsend--; }
+ if ($sdf eq 'D') { $unsend++; }
+ }
+ }
+ print $fh "Total unsend messages: $unsendlonc
Log
';
+ if ($unsend) { $simplestatus{'unsend'}=$unsend; }
+ print $fh "
";
-
-my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
-
-$restartflag=1;
-if (-e $loncfile) {
- my $lfh=IO::File->new("$loncfile");
- my $loncpid=<$lfh>;
- chomp($loncpid);
- if (kill 0 => $loncpid) {
- print $fh "Outgoing Buffer
";
-if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
-open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
-while ($line=lonc at pid $loncpid responding, sending USR1
";
- kill USR1 => $loncpid;
- $restartflag=0;
- } else {
- $errors++;
- print $fh "lonc at pid $loncpid not responding
";
- # Intelligently handle this.
- # Possibility #1: there is no process
- # Solution: remove .pid file and restart
- if (getpgrp($loncpid)==-1) {
- unlink($loncfile);
- $restartflag=1;
- }
- else {
- # Possibility #2: there is a live process that is not responding
- # for an unknown reason
- # Solution: kill parent and children processes, remove .pid and restart
- `killall -9 lonc`;
- unlink($loncfile);
- $restartflag=1;
- }
- print $fh
- "Deciding to clean up stale .pid file and restart lonc
";
- }
-}
-if ($restartflag==1) {
- $errors++;
- print $fh "lonc not running, trying to start
";
- system("$perlvar{'lonDaemons'}/lonc");
- sleep 120;
- if (-e $loncfile) {
- print $fh "Seems like it started ...lonc at pid $loncpid responding
";
- } else {
- $errors++; $errors++;
- print $fh "lonc at pid $loncpid not responding
";
- print $fh "Give it one more try ...";
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
+ open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
while ($line=
";
- }
-}
-
-$fname="$perlvar{'lonDaemons'}/logs/lonc.log";
-
- my ($dev,$ino,$mode,$nlink,
- $uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,
- $blksize,$blocks)=stat($fname);
-
-if ($size>40000) {
- print $fh "Rotating logs ...
";
};
close (DFH);
- print $fh "lonnet
Temp Log
';
-if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
-open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
-while ($line=
Perm Log
";
-
-if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
- open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
-while ($line=
";
-&errout($fh);
-# ----------------------------------------------------------------- Connections
-
-print $fh 'Connections
';
-
-print $fh "";
-foreach $tryserver (keys %hostname) {
-
- $answer=reply("pong",$tryserver);
- if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
- $result="ok";
- } else {
- $result=$answer;
- $warnings++;
- if ($answer eq 'con_lost') { $warnings++; }
- }
- print $fh "
";
-
-&errout($fh);
-# ------------------------------------------------------------ Delayed messages
-
-print $fh ' \n";
-
-}
-print $fh "$tryserver $result Delayed Messages
';
-
-print $fh 'Scanning Permanent Log
';
-
-$unsend=0;
-{
- my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
- while ($line=<$dfh>) {
- ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
- if ($sdf eq 'F') {
- $local=localtime($time);
- print "Failed: $time, $dserv, $dcmd
";
- $warnings++;
- }
- if ($sdf eq 'S') { $unsend--; }
- if ($sdf eq 'D') { $unsend++; }
- }
-}
-print $fh "Total unsend messages: $unsendOutgoing Buffer
";
-
-open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
-while ($line=
";
-};
-close (DFH);
# ------------------------------------------------------------------------- End
-print $fh "\n";
-$totalcount=$notices+4*$warnings+100*$errors;
-&errout($fh);
-print $fh "Total Error Count: $totalcount
";
-$now=time;
-$date=localtime($now);
-print $fh "
$date ($now)