--- loncom/loncron 2003/09/10 19:13:09 1.42
+++ loncom/loncron 2020/05/09 19:24:25 1.103.2.8
@@ -1,590 +1,1168 @@
#!/usr/bin/perl
-# The LearningOnline Network
-# Housekeeping program, started by cron
+# Housekeeping program, started by cron, loncontrol and loncron.pl
#
-# (TCP networking package
-# 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30,
-# 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer)
+# $Id: loncron,v 1.103.2.8 2020/05/09 19:24:25 raeburn Exp $
+#
+# Copyright Michigan State University Board of Trustees
+#
+# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
+#
+# LON-CAPA is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# LON-CAPA is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with LON-CAPA; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# /home/httpd/html/adm/gpl.txt
+#
+# http://www.lon-capa.org/
#
-# 7/14,7/15,7/19,7/21,7/22,11/18,
-# 2/8 Gerd Kortemeyer
-# 12/23 Gerd Kortemeyer
-# YEAR=2001
-# 09/04,09/06,11/26 Gerd Kortemeyer
$|=1;
+use strict;
use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;
+use LONCAPA::Checksumming;
+use LONCAPA;
+use Apache::lonnet;
+use Apache::loncommon;
use IO::File;
use IO::Socket;
+use HTML::Entities;
+use Getopt::Long;
+use File::Copy;
+use Sys::Hostname::FQDN();
+
+#globals
+use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount);
+
+my $statusdir="/home/httpd/html/lon-status";
-# -------------------------------------------------- Non-critical communication
-sub reply {
- my ($cmd,$server)=@_;
- my $peerfile="$perlvar{'lonSockDir'}/$server";
- my $client=IO::Socket::UNIX->new(Peer =>"$peerfile",
- Type => SOCK_STREAM,
- Timeout => 10)
- or return "con_lost";
- print $client "$cmd\n";
- my $answer=<$client>;
- chomp($answer);
- if (!$answer) { $answer="con_lost"; }
- return $answer;
-}
# --------------------------------------------------------- Output error status
+sub log {
+ my $fh=shift;
+ if ($fh) { print $fh @_ }
+}
+
sub errout {
my $fh=shift;
- print $fh (<
+ Rotating $description ... ";
+ &log($fh," Seems like it started ... ";
+ &log($fh," Seems like that did not work! ');
+ printf("%-15s ",$daemon);
+ if ($fh) {
+ if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
+ if (open(DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|")) {
+ while (my $line= ";
+ &log($fh," Give it one more try ... ";
+ &log($fh," Unable to start $daemon
+ &log($fh,(<
Notices $notices Warnings $warnings
- Errors $errors '.$daemon.'
Log
';
- printf("%-10s ",$daemon);
- if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
- open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
- while ($line=
";
-
+
my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid";
my $restartflag=1;
-
+ my $daemonpid;
if (-e $pidfile) {
my $lfh=IO::File->new("$pidfile");
- my $daemonpid=<$lfh>;
+ $daemonpid=<$lfh>;
chomp($daemonpid);
- if (kill 0 => $daemonpid) {
- print $fh "'.$daemon.'
Log
$daemon at pid $daemonpid responding";
- if ($sendusr1) { print $fh ", sending USR1"; }
- print $fh "
";
- if ($sendusr1) { kill USR1 => $daemonpid; }
+ if ($daemonpid =~ /^\d+$/ && kill 0 => $daemonpid) {
+ &log($fh,"$daemon at pid $daemonpid responding");
+ if ($send) { &log($fh,", sending $send"); }
+ &log($fh,"
");
+ if ($send eq 'USR1') { kill USR1 => $daemonpid; }
+ if ($send eq 'USR2') { kill USR2 => $daemonpid; }
$restartflag=0;
- print "running\n";
+ if ($send eq 'USR2') {
+ $result = 'reloaded';
+ print "reloaded\n";
+ } else {
+ $result = 'running';
+ print "running\n";
+ }
} else {
$errors++;
- print $fh "$daemon at pid $daemonpid not responding
";
+ &log($fh,"$daemon at pid $daemonpid not responding
");
$restartflag=1;
- print $fh "Decided to clean up stale .pid file and restart $daemon
";
+ &log($fh,"Decided to clean up stale .pid file and restart $daemon
");
}
}
if ($restartflag==1) {
$simplestatus{$daemon}='off';
$errors++;
- print $fh '
Killall '.$daemon.': '.
- `killall $daemon 2>&1`.' - ';
- sleep 2;
- print $fh unlink($pidfile).' - '.
- `killall -9 $daemon 2>&1`.
- '
';
- print $fh "$daemon not running, trying to start
";
-
- if (&start_daemon($fh,$daemon,$pidfile)) {
- print $fh "$daemon at pid $daemonpid responding
";
+ my $kadaemon=$daemon;
+ if ($kadaemon eq 'lonmemcached') { $kadaemon='memcached'; }
+ &log($fh,'
Killall '.$daemon.': '.
+ `killall $kadaemon 2>&1`.' - ');
+ sleep 1;
+ &log($fh,unlink($pidfile).' - '.
+ `killall -9 $kadaemon 2>&1`.
+ '
');
+ &log($fh,"$daemon not running, trying to start
");
+
+ if (&start_daemon($fh,$daemon,$pidfile,$args)) {
+ &log($fh,"$daemon at pid $daemonpid responding
");
$simplestatus{$daemon}='restarted';
+ $result = 'started';
print "started\n";
} else {
$errors++;
- print $fh "$daemon at pid $daemonpid not responding
";
- print $fh "Give it one more try ...$daemon at pid $daemonpid not responding
");
+ &log($fh,"$daemon at pid $daemonpid responding
";
+ if (&start_daemon($fh,$daemon,$pidfile,$args)) {
+ &log($fh,"$daemon at pid $daemonpid responding
");
$simplestatus{$daemon}='restarted';
+ $result = 'started';
print "started\n";
} else {
+ $result = 'failed';
print " failed\n";
$simplestatus{$daemon}='failed';
$errors++; $errors++;
- print $fh "$daemon at pid $daemonpid not responding
";
- print $fh "Unable to start $daemon$daemon at pid $daemonpid not responding
");
+ &log($fh,"";
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
- while ($line=
";
- }
+ if ($fh) {
+ if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
+ &log($fh,"");
+ if (open(DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|")) {
+ while (my $line=
"; - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); - } + my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; + &rotate_logfile($fname,$fh,'logs'); &errout($fh); + return $result; } -# ================================================================ Main Program -# --------------------------------- Read loncapa_apache.conf and loncapa.conf -my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); -%perlvar=%{$perlvarref}; -undef $perlvarref; -delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed -delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed +# --------------------------------------------------------------------- Machine +sub log_machine_info { + my ($fh)=@_; + &log($fh,'
"); -# ----------------------------- Make sure this process is running from user=www -my $wwwid=getpwnam('www'); -if ($wwwid!=$<) { - print("User ID mismatch. This program must be run as user 'www'\n"); - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; - system("echo 'User ID mismatch. loncron must be run as user www.' |\ - mailto $emailto -s '$subj' > /dev/null"); - exit 1; -} - -# ------------------------------------------------------------- Read hosts file -{ - my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); - - while (my $configline=<$config>) { - my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline); - if ($id && $domain && $role && $name && $ip) { - $hostname{$id}=$name; - $hostdom{$id}=$domain; - $hostip{$id}=$ip; - $hostrole{$id}=$role; - if ($domdescr) { $domaindescription{$domain}=$domdescr; } - if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { - $libserv{$id}=$name; - } - } else { - if ($configline) { -# &logthis("Skipping hosts.tab line -$configline-"); - } + open (DFH,"df|"); + while (my $line="); -# ------------------------------------------------------ Read spare server file -{ - my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab"); - - while (my $configline=<$config>) { - chomp($configline); - if (($configline) && ($configline ne $perlvar{'lonHostID'})) { - $spareid{$configline}=1; - } - } -} -# ---------------------------------------------------------------- Start report + &log($fh,") { + &log($fh,&encode_entities($line,'<>&"')); + @parts=split(/\s+/,$line); + my $usage=$parts[4]; + $usage=~s/\W//g; + if ($usage>90) { + $warnings++; + $notices++; + } elsif ($usage>80) { + $warnings++; + } elsif ($usage>60) { + $notices++; } + if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; } } -} + close (DFH); + &log($fh,"
"); + my $psproc=0; + + open (PSH,"ps aux --cols 140 |"); + while (my $line="); -$statusdir="/home/httpd/html/lon-status"; + if ($psproc>200) { $notices++; } + if ($psproc>250) { $notices++; } -$errors=0; -$warnings=0; -$notices=0; + &log($fh,") { + &log($fh,&encode_entities($line,'<>&"')); + $psproc++; + } + close (PSH); + &log($fh,"
"); + &log($fh,&encode_entities(&LONCAPA::distro(),'<>&"')); + &log($fh,""); -$now=time; -$date=localtime($now); + &errout($fh); +} -{ -my $fh=IO::File->new(">$statusdir/newstatus.html"); -my %simplestatus=(); +sub start_logging { + my $fh=IO::File->new(">$statusdir/newstatus.html"); + my %simplestatus=(); + my $now=time; + my $date=localtime($now); + -print $fh (<
$varname | $perlvar{$varname} |
$id | $hostdom{$id} | $hostrole{$id} | "; - print $fh "$hostname{$id} | $hostip{$id} |
$id | ".&Apache::lonnet::host_domain($id). + " | ".$role. + " | ".&Apache::lonnet::hostname($id)." |
"; - -open (DFH,"df|"); -while ($line="; - - -print $fh ") { - print $fh "$line"; - @parts=split(/\s+/,$line); - $usage=$parts[4]; - $usage=~s/\W//g; - if ($usage>90) { - $warnings++; - $notices++; - } elsif ($usage>80) { - $warnings++; - } elsif ($usage>60) { - $notices++; - } - if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; } -} -close (DFH); -print $fh "
"; -$psproc=0; - -open (PSH,"ps -aux|"); -while ($line="; -if ($psproc>200) { $notices++; } -if ($psproc>250) { $notices++; } +# -------------------------------------------------------- clean out balanceIDs -&errout($fh); +sub clean_balanceIDs { + my ($fh)=@_; + &log($fh,') { - print $fh "$line"; - $psproc++; +# ------------------------------------------------------------ clean out lonIDs +sub clean_lonIDs { + my ($fh)=@_; + &log($fh,' Session Tokens
'); + my $cleaned=0; + my $active=0; + while (my $fname=<$perlvar{'lonIDsDir'}/*>) { + my $now=time; + if (-l $fname) { + my $linkfname = readlink($fname); + if (-f $linkfname) { + if ($linkfname =~ m{^$perlvar{'lonIDsDir'}/[^/]+\.id$}) { + my @data = stat($linkfname); + my $mtime = $data[9]; + my $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + if (unlink($linkfname)) { + $cleaned++; + &log($fh,"Unlinking $linkfname
"); + unlink($fname); + } + } + } + } else { + unlink($fname); + } + } elsif (-f $fname) { + my @data = stat($fname); + my $mtime = $data[9]; + my $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + if (unlink($fname)) { + $cleaned++; + &log($fh,"Unlinking $fname
"); + } + } else { + $active++; + } + } + } + &log($fh,"Cleaned up ".$cleaned." stale session token(s).
"); + &log($fh,"$active open session(s)
"); } -close (PSH); -print $fh "
Cleaned up ".$cleaned." stale balancer files
"); + &log($fh,"Cleaned up ".$cleaned." stale webDAV session token(s).
"); + &log($fh,"Cleaned up ".$cleaned." stale sockets.
"); } -print $fh "Cleaned up ".$cleaned." stale session token(s)."; -print $fh "
'; +# ---------------------------------------------------------------------- lonnet -open (DFH,"tail -n25 /etc/httpd/logs/access_log|"); -while ($line="); + &errout($fh); +} +sub rotate_other_logs { + my ($fh) = @_; + my %logs = ( + autoenroll => 'Auto Enroll log', + autocreate => 'Create Course log', + searchcat => 'Search Cataloguing log', + autoupdate => 'Auto Update log', + refreshcourseids_db => 'Refresh CourseIDs db log', + ); + foreach my $item (keys(%logs)) { + my $fname=$perlvar{'lonDaemons'}.'/logs/'.$item.'.log'; + &rotate_logfile($fname,$fh,$logs{$item}); + } +} -# ---------------------------------------------------------------------- lonsql +# ----------------------------------------------------------------- Connections +sub test_connections { + my ($fh)=@_; + &log($fh,') { print $fh "$line" }; -close (DFH); +sub rotate_lonnet_logs { + my ($fh)=@_; + &log($fh,' lonnet
Temp Log
'); + print "Checking logs.\n"; + if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){ + open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|"); + while (my $line=) { + &log($fh,&encode_entities($line,'<>&"')); + } + close (DFH); + } + &log($fh," Perm Log
"); + + if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") { + open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|"); + while (my $line=) { + &log($fh,&encode_entities($line,'<>&"')); + } + close (DFH); + } else { &log($fh,"No perm log\n") } -print $fh " Error Log
"; + my $fname="$perlvar{'lonDaemons'}/logs/lonnet.log"; + &rotate_logfile($fname,$fh,'lonnet log'); -open (DFH,"tail -n25 /etc/httpd/logs/error_log|"); -while ($line="; -&errout($fh); + &log($fh,") { - print $fh "$line"; - if ($line=~/\[error\]/) { $notices++; } -}; -close (DFH); -print $fh "
$tryserver | $result |
Total unsend messages: $unsend
\n"); + if ($unsend > 0) { + $warnings=$warnings+5*$unsend; + } -&checkon_daemon($fh,'lonhttpd',40000); + if ($unsend) { $simplestatus{'unsend'}=$unsend; } + &log($fh,""); +# list directory with delayed messages and remember offline servers + my %servers=(); + open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|"); + while (my $line=\n"); + close (DFH); +# pong to all servers that have delayed messages +# this will trigger a reverse connection, which should flush the buffers + foreach my $tryserver (sort(keys(%servers))) { + if ($hostname{$tryserver} || !$numhosts) { + my $answer; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(20); + $answer = &Apache::lonnet::reply("pong",$tryserver); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + &log($fh,"Attempted pong to $tryserver timed out) { + my ($server)=($line=~/\.(\w+)$/); + if ($server) { $servers{$server}=1; } + &log($fh,&encode_entities($line,'<>&"')); + } + &log($fh,"
'; -print "checking logs\n"; -if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){ -open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|"); -while ($line=) { - print $fh "$line"; -}; -close (DFH); -} -print $fh "
"; - -if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") { - open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|"); -while ($line="; -&errout($fh); -# ----------------------------------------------------------------- Connections +sub write_loncaparevs { + print "Retrieving LON-CAPA version information.\n"; + my %hostname = &Apache::lonnet::all_hostnames(); + my $output; + foreach my $id (sort(keys(%hostname))) { + if ($id ne '') { + my $loncaparev; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(10); + $loncaparev = + &Apache::lonnet::get_server_loncaparev('',$id,1,'loncron'); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + print "Time out while contacting lonHost: $id for version.\n"; + } + if ($loncaparev =~ /^[\w.\-]+$/) { + $output .= $id.':'.$loncaparev."\n"; + } + } + } + if ($output) { + if (open(my $fh,">$perlvar{'lonTabDir'}/loncaparevs.tab")) { + print $fh $output; + close($fh); + &Apache::lonnet::load_loncaparevs(); + } + } + return; +} + +sub write_serverhomeIDs { + print "Retrieving LON-CAPA lonHostID information.\n"; + my %name_to_host = &Apache::lonnet::all_names(); + my $output; + foreach my $name (sort(keys(%name_to_host))) { + if ($name ne '') { + if (ref($name_to_host{$name}) eq 'ARRAY') { + my $serverhomeID; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(10); + $serverhomeID = + &Apache::lonnet::get_server_homeID($name,1,'loncron'); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + print "Time out while contacting server: $name\n"; + } + if ($serverhomeID ne '') { + $output .= $name.':'.$serverhomeID."\n"; + } else { + $output .= $name.':'.$name_to_host{$name}->[0]."\n"; + } + } + } + } + if ($output) { + if (open(my $fh,">$perlvar{'lonTabDir'}/serverhomeIDs.tab")) { + print $fh $output; + close($fh); + &Apache::lonnet::load_serverhomeIDs(); + } + } + return; +} -print $fh ') { - print $fh "$line"; -}; -close (DFH); -} else { print $fh "No perm log\n" } - -$fname="$perlvar{'lonDaemons'}/logs/lonnet.log"; - - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - -if ($size>40000) { - print $fh "Rotating logs ... "; - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); +sub log_simplestatus { + rename("$statusdir/newstatus.html","$statusdir/index.html"); + + my $sfh=IO::File->new(">$statusdir/loncron_simple.txt"); + foreach (keys %simplestatus) { + print $sfh $_.'='.$simplestatus{$_}.'&'; + } + print $sfh "\n"; + $sfh->close(); } -print $fh "
$tryserver | $result |
\n"; -$warnings=$warnings+5*$unsend; - -if ($unsend) { $simplestatus{'unsend'}=$unsend; } -print $fh "