--- loncom/loncron 2024/07/28 23:52:11 1.103.2.16 +++ loncom/loncron 2017/02/28 05:42:06 1.104 @@ -2,7 +2,7 @@ # Housekeeping program, started by cron, loncontrol and loncron.pl # -# $Id: loncron,v 1.103.2.16 2024/07/28 23:52:11 raeburn Exp $ +# $Id: loncron,v 1.104 2017/02/28 05:42:06 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -37,18 +37,12 @@ use LONCAPA; use Apache::lonnet; use Apache::loncommon; -use LWP::UserAgent(); -use HTTP::Request(); use IO::File; use IO::Socket; use HTML::Entities; use Getopt::Long; -use GDBM_File qw(GDBM_READER); +use GDBM_File; use Storable qw(thaw); -use File::ReadBackwards; -use File::Copy; -use Sys::Hostname::FQDN(); - #globals use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount); @@ -81,14 +75,14 @@ sub rotate_logfile { rename("$file.2","$file.3"); rename("$file.1","$file.2"); rename("$file","$file.1"); - } + } } sub start_daemon { my ($fh,$daemon,$pidfile,$args) = @_; my $progname=$daemon; if ($daemon eq 'lonc') { - $progname='loncnew'; + $progname='loncnew'; } my $error_fname="$perlvar{'lonDaemons'}/logs/${daemon}_errors"; &rotate_logfile($error_fname,$fh,'error logs'); @@ -119,23 +113,20 @@ sub checkon_daemon { my $result; &log($fh,'
');
printf("%-15s ",$daemon);
- if ($fh) {
- if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
- if (open(DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|")) {
- while (my $line=
Unable to start $daemon
"); } } - if ($fh) { - if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ - &log($fh,""); - if (open(DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|")) { - while (my $line="); - } - } - } + if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ + &log($fh,") { - &log($fh,"$line"); - if ($line=~/WARNING/) { $notices++; } - if ($line=~/CRITICAL/) { $notices++; } - } - close (DFH); - } - &log($fh,"
"); + open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|"); + while (my $line="); + } + } + my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; &rotate_logfile($fname,$fh,'logs'); @@ -228,34 +217,19 @@ sub log_machine_info { my ($fh)=@_; &log($fh,') { + &log($fh,"$line"); + if ($line=~/WARNING/) { $notices++; } + if ($line=~/CRITICAL/) { $notices++; } + }; + close (DFH); + &log($fh,"
"); open (DFH,"df|"); - while (my $line=) { - &log($fh,&encode_entities($line,'<>&"')); + while (my $line= ) { + &log($fh,&encode_entities($line,'<>&"')); @parts=split(/\s+/,$line); my $usage=$parts[4]; $usage=~s/\W//g; - if ($usage>90) { + if ($usage>90) { $warnings++; - $notices++; + $notices++; } elsif ($usage>80) { $warnings++; } elsif ($usage>60) { @@ -287,8 +261,8 @@ sub log_machine_info { my $psproc=0; open (PSH,"ps aux --cols 140 |"); - while (my $line= ) { - &log($fh,&encode_entities($line,'<>&"')); + while (my $line= ) { + &log($fh,&encode_entities($line,'<>&"')); $psproc++; } close (PSH); @@ -310,7 +284,7 @@ sub start_logging { my %simplestatus=(); my $now=time; my $date=localtime($now); - + &log($fh,(< @@ -335,7 +309,6 @@ sub start_logging { lonc lonnet Connections -bash readline config Delayed Messages Error Count @@ -427,12 +400,12 @@ sub recursive_clean_tmp { ($cleaned,$old,$removed) = &recursive_clean_tmp($innerdir,$cleaned,$old,$removed,$errors); my @doms = &Apache::lonnet::current_machine_domains(); - + if (open(my $dirhandle,$fname)) { unless (($innerdir eq 'helprequests') || (($innerdir =~ /^addcourse/) && ($innerdir !~ m{/\d+$}))) { my @contents = grep {!/^\.\.?$/} readdir($dirhandle); - join('&&',@contents)."\n"; + join('&&',@contents)."\n"; if (scalar(grep {!/^\.\.?$/} readdir($dirhandle)) == 0) { closedir($dirhandle); if ($fname =~ m{^\Q$perlvar{'lonDaemons'}\E/tmp/}) { @@ -485,7 +458,7 @@ sub recursive_clean_tmp { } } } elsif (ref($errors->{failopen}) eq 'ARRAY') { - push(@{$errors->{failopen}},$fname); + push(@{$errors->{failopen}},$fname); } } else { if (unlink($fname)) { @@ -511,71 +484,24 @@ sub clean_lonIDs { my $cleaned=0; my $active=0; while (my $fname=<$perlvar{'lonIDsDir'}/*>) { - my $now=time; - if (-l $fname) { - my $linkfname = readlink($fname); - if (-f $linkfname) { - if ($linkfname =~ m{^$perlvar{'lonIDsDir'}/[^/]+\.id$}) { - my @data = stat($linkfname); - my $mtime = $data[9]; - my $since=$now-$mtime; - if ($since>$perlvar{'lonExpire'}) { - if (unlink($linkfname)) { - $cleaned++; - &log($fh,"Unlinking $linkfname
"); - unlink($fname); - } - } - } - } else { - unlink($fname); - } - } elsif (-f $fname) { - my @data = stat($fname); - my $mtime = $data[9]; - my $since=$now-$mtime; - if ($since>$perlvar{'lonExpire'}) { - if (unlink($fname)) { - $cleaned++; - &log($fh,"Unlinking $fname
"); - } - } else { - $active++; - } - } + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + my $now=time; + my $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + $cleaned++; + &log($fh,"Unlinking $fname
"); + unlink("$fname"); + } else { + $active++; + } } &log($fh,"Cleaned up ".$cleaned." stale session token(s).
"); &log($fh,"$active open session(s)
"); } -# -------------------------------------------------------- clean out balanceIDs - -sub clean_balanceIDs { - my ($fh)=@_; - &log($fh,'Session Tokens
'); - my $cleaned=0; - my $active=0; - if (-d $perlvar{'lonBalanceDir'}) { - while (my $fname=<$perlvar{'lonBalanceDir'}/*.id>) { - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - my $now=time; - my $since=$now-$mtime; - if ($since>$perlvar{'lonExpire'}) { - $cleaned++; - &log($fh,"Unlinking $fname
"); - unlink("$fname"); - } else { - $active++; - } - } - } - &log($fh,"Cleaned up ".$cleaned." stale balancer files
"); - &log($fh,"$active unexpired balancer files
"); -} - # ------------------------------------------------ clean out webDAV Session IDs sub clean_webDAV_sessionIDs { my ($fh)=@_; @@ -639,16 +565,16 @@ sub rotate_lonnet_logs { print "Checking logs.\n"; if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){ open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|"); - while (my $line=) { + while (my $line= ) { &log($fh,&encode_entities($line,'<>&"')); } close (DFH); } &log($fh,"
"); - + if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") { open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|"); - while (my $line=\n"); close (DFH); + my %hostname = &Apache::lonnet::all_hostnames(); + my $numhosts = scalar(keys(%hostname)); # pong to all servers that have delayed messages # this will trigger a reverse connection, which should flush the buffers foreach my $tryserver (sort(keys(%servers))) { @@ -943,13 +701,9 @@ sub check_delayed_msg { } sub finish_logging { - my ($fh,$weightsref)=@_; - my %weights; - if (ref($weightsref) eq 'HASH') { - %weights = %{$weightsref}; - } + my ($fh)=@_; &log($fh,"\n"); - $totalcount=($weights{'N'}*$notices)+($weights{'W'}*$warnings)+($weights{'E'}*$errors); + $totalcount=$notices+4*$warnings+100*$errors; &errout($fh); &log($fh,") { + while (my $line= ) { &log($fh,&encode_entities($line,'<>&"')); } close (DFH); @@ -712,199 +638,29 @@ sub test_connections { # ------------------------------------------------------------ Delayed messages sub check_delayed_msg { - my ($fh,$weightsref,$exclusionsref)=@_; + my ($fh)=@_; &log($fh,' Delayed Messages
'); print "Checking buffers.\n"; &log($fh,'Scanning Permanent Log
'); my $unsend=0; - my $ignored=0; - - my %hostname = &Apache::lonnet::all_hostnames(); - my $numhosts = scalar(keys(%hostname)); - my $checkbackwards = 0; - my $checkfrom = 0; - my $checkexcluded = 0; - my (%bymachine,%weights,%exclusions,%serverhomes); - if (ref($weightsref) eq 'HASH') { - %weights = %{$weightsref}; - } - if (ref($exclusionsref) eq 'HASH') { - %exclusions = %{$exclusionsref}; - if (keys(%exclusions)) { - $checkexcluded = 1; - %serverhomes = &read_serverhomeIDs(); - } - } -# -# For LON-CAPA 1.2.0 to 2.1.3 (release dates: 8/31/2004 and 3/31/2006) any -# entry logged in lonnet.perm.log for completion of a delayed (critical) -# transaction lacked the hostID for the remote node to which the command -# to be completed was sent. -# -# Because of this, exclusion of items in lonnet.perm.log for nodes which are -# no longer part of the cluster from adding to the overall "unsend" count -# needs additional effort besides the changes made in loncron rev. 1.105. -# -# For "S" (completion) events logging in LON-CAPA 1.2.0 through 2.1.3 included -# "LondTransaction=HASH(hexadecimal)->getClient() :$cmd, where the hexadecimal -# is a memory location, and $cmd is the command sent to the remote node. -# -# Starting with 2.2.0 (released 8/21/2006) logging for "S" (completion) events -# had sethost:$host_id:$cmd after LondTransaction=HASH(hexadecimal)->getClient() -# -# Starting with 2.4.1 (released 6/13/2007) logging for "S" replaced echoing the -# getClient() call with the result of the Transaction->getClient() call itself -# undef for completion of delivery of a delayed message. -# -# The net effect of these changes is that lonnet.perm.log is now accessed three -# times: (a) oldest record is checked, if earlier than release date for 2.5.0 -# then (b) file is read backwards, with timestamp recorded for most recent -# instance of logged "S" event for "update" command without "sethost:$host_id:" -# then (c) file is read forward with records ignored which predate the timestamp -# recorded in (b), if one was found. -# -# In (c), when calculating the unsend total, i.e., the difference between delayed -# transactions ("D") and sent transactions ("S"), transactions are ignored if the -# target node is no longer in the cluster, and also (for "update" commands), if -# the target node is in the list of nodes excluded from the count, in the domain -# configuration for this machine's default domain. The idea here is to remove -# delayed "update" commands for nodes for which inbound access to port 5663, -# is blocked, but are still part of the LON-CAPA network, (i.e., they can still -# replicate content from other nodes). -# - - my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log","r"); - if (defined($dfh)) { - while (my $line=<$dfh>) { - my ($time,$sdf,$rest)=split(/:/,$line,3); - if ($time < 1541185772) { - $checkbackwards = 1; - } - last; - } - undef $dfh; + my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); + while (my $line=<$dfh>) { + my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); + if ($sdf eq 'F') { + my $local=localtime($time); + &log($fh,"Failed: $time, $dserv, $dcmd
"); + $warnings++; + } + if ($sdf eq 'S') { $unsend--; } + if ($sdf eq 'D') { $unsend++; } } - if ($checkbackwards) { - if (tie *BW, 'File::ReadBackwards', "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") { - while(my $line=) { - if ($line =~ /\QLondTransaction=HASH\E[^:]+:update:/) { - ($checkfrom) = split(/:/,$line,2); - last; - } - } - close(BW); - } - } - $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log","r"); - if (defined($dfh)) { - while (my $line=<$dfh>) { - my ($time,$sdf,$rest)=split(/:/,$line,3); - next unless (($sdf eq 'F') || ($sdf eq 'S') || ($sdf eq 'D')); - next if (($checkfrom) && ($time <= $checkfrom)); - my ($dserv,$dcmd); - if ($sdf eq 'S') { - my ($serva,$cmda,$servb,$cmdb) = split(/:/,$rest); - if ($cmda eq 'sethost') { - chomp($cmdb); - $dcmd = $cmdb; - } else { - $dcmd = $cmda; - } - if (($serva =~ /^LondTransaction/) || ($serva eq '')) { - unless (($servb eq '') || ($servb =~ m{^/})) { - $dserv = $servb; - } - } else { - $dserv = $serva; - } - } else { - ($dserv,$dcmd) = split(/:/,$rest); - } - if ($sdf eq 'F') { - my $local=localtime($time); - &log($fh,"Failed: $time, $dserv, $dcmd
"); - $warnings++; - } - next if ((($dserv eq '') || ($dcmd eq '')) && ($sdf ne 'F')); - if ($sdf eq 'S') { - if ($dcmd eq 'update') { - if ($hostname{$dserv}) { - if ($exclusions{$serverhomes{$hostname{$dserv}}}) { - $ignored --; - } else { - $unsend --; - } - } - if (exists($bymachine{$dserv})) { - $bymachine{$dserv} --; - } else { - $bymachine{$dserv} = -1; - } - } else { - if ($hostname{$dserv}) { - $unsend --; - } - } - } elsif ($sdf eq 'D') { - if ($dcmd eq 'update') { - if ($hostname{$dserv}) { - if ($exclusions{$serverhomes{$hostname{$dserv}}}) { - $ignored ++; - } else { - $unsend ++; - } - } - if (exists($bymachine{$dserv})) { - $bymachine{$dserv} ++; - } else { - $bymachine{$dserv} = 1; - } - } else { - if ($hostname{$dserv}) { - $unsend ++; - } - } - } - } - undef $dfh; - my $nodest = 0; - my $retired = 0; - my %active; - if (keys(%bymachine)) { - unless ($checkexcluded) { - %serverhomes = &read_serverhomeIDs(); - } - foreach my $key (keys(%bymachine)) { - if ($bymachine{$key} > 0) { - if ($hostname{$key}) { - $active{$serverhomes{$hostname{$key}}} += $bymachine{$key}; - } else { - $retired ++; - $nodest += $bymachine{$key}; - } - } - } - } - if (keys(%active)) { - &log($fh,"Unsend messages by node, active (undegraded) nodes in cluster
\n"); - foreach my $key (sort(keys(%active))) { - &log($fh,&encode_entities("$key => $active{$key}",'<>&"')."\n"); - } - } - &log($fh,"Total unsend messages: $unsend for ".scalar(keys(%active))." active (undegraded) nodes in cluster.
\n"); - if (keys(%exclusions) > 0) { - &log($fh,"Total incomplete updates $ignored for ".scalar(keys(%exclusions))." degraded nodes in cluster.
\n"); - } - if ($retired) { - &log($fh,"Total unsent $nodest for $retired nodes no longer in cluster.
\n"); - } - if ($unsend > 0) { - $warnings=$warnings+$weights{'U'}*$unsend; - } + &log($fh,"Total unsend messages: $unsend
\n"); + if ($unsend > 0) { + $warnings=$warnings+5*$unsend; } if ($unsend) { $simplestatus{'unsend'}=$unsend; } @@ -919,6 +675,8 @@ sub check_delayed_msg { } &log($fh,"
Distros using bash readline library 8.1 or later need bracketed paste disabled for the R bash shell for the www user so R commands sent to lonr daemon will be processed.
'); - my $bash_www_cnf = '/home/www/.inputrc'; - my $non_empty_conffile; - unless ($bracketed_warning) { - if (-e $bash_www_cnf) { - my $filesize = (stat($bash_www_cnf))[7]; - if ($filesize > 0) { - $non_empty_conffile = 1; - } - } - } - if (($bracketed_warning) || ($non_empty_conffile)) { - my $bash_update = &set_bracketed_paste_off(); - if ($bash_update) { - &log($fh,''.$bash_update.'
'."\n"); - } - } else { - &log($fh,'No action needed; /home/www/.inputrc already set.
'."\n"); - } - } else { - &log($fh,'No action needed for distros using pre-8.1 bash readline library
'."\n"); - } - my $domconf = &get_domain_config(); - my ($threshold,$sysmail,$reportstatus,$weightsref,$exclusionsref) = - &get_permcount_settings($domconf); - &check_delayed_msg($fh,$weightsref,$exclusionsref); - &finish_logging($fh,$weightsref); + if (!$justcheckdaemons && !$justcheckconnections && !$justreload) { + &check_delayed_msg($fh); + &finish_logging($fh); &log_simplestatus(); &write_loncaparevs(); &write_serverhomeIDs(); &write_checksums(); - &write_hostips(); - if ($totalcount>$threshold && !$noemail) { &send_mail($sysmail,$reportstatus); } + &write_connection_config(); + &write_hosttypes(); + if ($totalcount>200 && !$noemail) { &send_mail(); } } }