version 1.110, 2018/10/25 03:27:22
|
version 1.114, 2018/11/18 22:50:46
|
Line 43 use HTML::Entities;
|
Line 43 use HTML::Entities;
|
use Getopt::Long; |
use Getopt::Long; |
use GDBM_File; |
use GDBM_File; |
use Storable qw(thaw); |
use Storable qw(thaw); |
|
use File::ReadBackwards; |
#globals |
#globals |
use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount); |
use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount); |
|
|
Line 646 sub test_connections {
|
Line 647 sub test_connections {
|
|
|
# ------------------------------------------------------------ Delayed messages |
# ------------------------------------------------------------ Delayed messages |
sub check_delayed_msg { |
sub check_delayed_msg { |
my ($fh)=@_; |
my ($fh,$weightsref,$exclusionsref)=@_; |
&log($fh,'<hr /><a name="delayed" /><h2>Delayed Messages</h2>'); |
&log($fh,'<hr /><a name="delayed" /><h2>Delayed Messages</h2>'); |
print "Checking buffers.\n"; |
print "Checking buffers.\n"; |
|
|
&log($fh,'<h3>Scanning Permanent Log</h3>'); |
&log($fh,'<h3>Scanning Permanent Log</h3>'); |
|
|
my $unsend=0; |
my $unsend=0; |
|
my $ignored=0; |
|
|
my %hostname = &Apache::lonnet::all_hostnames(); |
my %hostname = &Apache::lonnet::all_hostnames(); |
my $numhosts = scalar(keys(%hostname)); |
my $numhosts = scalar(keys(%hostname)); |
|
my $checkbackwards = 0; |
my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); |
my $checkfrom = 0; |
while (my $line=<$dfh>) { |
my $checkexcluded = 0; |
my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); |
my (%bymachine,%weights,%exclusions,%serverhomes); |
if ($numhosts) { |
if (ref($weightsref) eq 'HASH') { |
next unless ($hostname{$dserv}); |
%weights = %{$weightsref}; |
} |
} |
if ($sdf eq 'F') { |
if (ref($exclusionsref) eq 'HASH') { |
my $local=localtime($time); |
%exclusions = %{$exclusionsref}; |
&log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br />"); |
if (keys(%exclusions)) { |
$warnings++; |
$checkexcluded = 1; |
} |
%serverhomes = &read_serverhomeIDs(); |
if ($sdf eq 'S') { $unsend--; } |
} |
if ($sdf eq 'D') { $unsend++; } |
|
} |
} |
|
|
&log($fh,"<p>Total unsend messages: <b>$unsend</b></p>\n"); |
# |
if ($unsend > 0) { |
# For LON-CAPA 1.2.0 to 2.1.3 (release dates: 8/31/2004 and 3/31/2006) any |
$warnings=$warnings+5*$unsend; |
# entry logged in lonnet.perm.log for completion of a delayed (critical) |
|
# transaction lacked the hostID for the remote node to which the command |
|
# to be completed was sent. |
|
# |
|
# Because of this, exclusion of items in lonnet.perm.log for nodes which are |
|
# no longer part of the cluster from adding to the overall "unsend" count |
|
# needs additional effort besides the changes made in loncron rev. 1.105. |
|
# |
|
# For "S" (completion) events logging in LON-CAPA 1.2.0 through 2.1.3 included |
|
# "LondTransaction=HASH(hexadecimal)->getClient() :$cmd, where the hexadecimal |
|
# is a memory location, and $cmd is the command sent to the remote node. |
|
# |
|
# Starting with 2.2.0 (released 8/21/2006) logging for "S" (completion) events |
|
# had sethost:$host_id:$cmd after LondTransaction=HASH(hexadecimal)->getClient() |
|
# |
|
# Starting with 2.4.1 (released 6/13/2007) logging for "S" replaced echoing the |
|
# getClient() call with the result of the Transaction->getClient() call itself |
|
# undef for completion of delivery of a delayed message. |
|
# |
|
# The net effect of these changes is that lonnet.perm.log is now accessed three |
|
# times: (a) oldest record is checked, if earlier than release date for 2.5.0 |
|
# then (b) file is read backwards, with timestamp recorded for most recent |
|
# instance of logged "S" event for "update" command without "sethost:$host_id:" |
|
# then (c) file is read forward with records ignored which predate the timestamp |
|
# recorded in (b), if one was found. |
|
# |
|
# In (c), when calculating the unsend total, i.e., the difference between delayed |
|
# transactions ("D") and sent transactions ("S"), transactions are ignored if the |
|
# target node is no longer in the cluster, and also (for "update" commands), if |
|
# the target node is in the list of nodes excluded from the count, in the domain |
|
# configuration for this machine's default domain. The idea here is to remove |
|
# delayed "update" commands for nodes for which inbound access to port 5663, |
|
# is blocked, but are still part of the LON-CAPA network, (i.e., they can still |
|
# replicate content from other nodes). |
|
# |
|
|
|
my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log","r"); |
|
if (defined($dfh)) { |
|
while (my $line=<$dfh>) { |
|
my ($time,$sdf,$rest)=split(/:/,$line,3); |
|
if ($time < 1541185772) { |
|
$checkbackwards = 1; |
|
} |
|
last; |
|
} |
|
undef $dfh; |
|
} |
|
|
|
if ($checkbackwards) { |
|
if (tie *BW, 'File::ReadBackwards', "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") { |
|
while(my $line=<BW>) { |
|
if ($line =~ /\QLondTransaction=HASH\E[^:]+:update:/) { |
|
($checkfrom) = split(/:/,$line,2); |
|
last; |
|
} |
|
} |
|
close(BW); |
|
} |
|
} |
|
$dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log","r"); |
|
if (defined($dfh)) { |
|
while (my $line=<$dfh>) { |
|
my ($time,$sdf,$rest)=split(/:/,$line,3); |
|
next unless (($sdf eq 'F') || ($sdf eq 'S') || ($sdf eq 'D')); |
|
next if (($checkfrom) && ($time <= $checkfrom)); |
|
my ($dserv,$dcmd); |
|
if ($sdf eq 'S') { |
|
my ($serva,$cmda,$servb,$cmdb) = split(/:/,$rest); |
|
if ($cmda eq 'sethost') { |
|
chomp($cmdb); |
|
$dcmd = $cmdb; |
|
} else { |
|
$dcmd = $cmda; |
|
} |
|
if (($serva =~ /^LondTransaction/) || ($serva eq '')) { |
|
unless (($servb eq '') || ($servb =~ m{^/})) { |
|
$dserv = $servb; |
|
} |
|
} else { |
|
$dserv = $serva; |
|
} |
|
} else { |
|
($dserv,$dcmd) = split(/:/,$rest); |
|
} |
|
if ($sdf eq 'F') { |
|
my $local=localtime($time); |
|
&log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br />"); |
|
$warnings++; |
|
} |
|
next if ((($dserv eq '') || ($dcmd eq '')) && ($sdf ne 'F')); |
|
if ($sdf eq 'S') { |
|
if ($dcmd eq 'update') { |
|
if ($hostname{$dserv}) { |
|
if ($exclusions{$serverhomes{$hostname{$dserv}}}) { |
|
$ignored --; |
|
} else { |
|
$unsend --; |
|
} |
|
} |
|
if (exists($bymachine{$dserv})) { |
|
$bymachine{$dserv} --; |
|
} else { |
|
$bymachine{$dserv} = -1; |
|
} |
|
} else { |
|
if ($hostname{$dserv}) { |
|
$unsend --; |
|
} |
|
} |
|
} elsif ($sdf eq 'D') { |
|
if ($dcmd eq 'update') { |
|
if ($hostname{$dserv}) { |
|
if ($exclusions{$serverhomes{$hostname{$dserv}}}) { |
|
$ignored ++; |
|
} else { |
|
$unsend ++; |
|
} |
|
} |
|
if (exists($bymachine{$dserv})) { |
|
$bymachine{$dserv} ++; |
|
} else { |
|
$bymachine{$dserv} = 1; |
|
} |
|
} else { |
|
if ($hostname{$dserv}) { |
|
$unsend ++; |
|
} |
|
} |
|
} |
|
} |
|
undef $dfh; |
|
my $nodest = 0; |
|
my $retired = 0; |
|
my %active; |
|
if (keys(%bymachine)) { |
|
unless ($checkexcluded) { |
|
%serverhomes = &read_serverhomeIDs(); |
|
} |
|
foreach my $key (keys(%bymachine)) { |
|
if ($bymachine{$key} > 0) { |
|
if ($hostname{$key}) { |
|
$active{$serverhomes{$hostname{$key}}} += $bymachine{$key}; |
|
} else { |
|
$retired ++; |
|
$nodest += $bymachine{$key}; |
|
} |
|
} |
|
} |
|
} |
|
if (keys(%active)) { |
|
&log($fh,"<p>Unsend messages by node, active (undegraded) nodes in cluster</p>\n"); |
|
foreach my $key (sort(keys(%active))) { |
|
&log($fh,&encode_entities("$key => $active{$key}",'<>&"')."\n"); |
|
} |
|
} |
|
&log($fh,"<p>Total unsend messages: <b>$unsend</b> for ".scalar(keys(%active))." active (undegraded) nodes in cluster.</p>\n"); |
|
if (keys(%exclusions) > 0) { |
|
&log($fh,"<p>Total incomplete updates <b>$ignored</b> for ".scalar(keys(%exclusions))." degraded nodes in cluster.</p>\n"); |
|
} |
|
if ($retired) { |
|
&log($fh,"<p>Total unsent <b>$nodest</b> for $retired nodes no longer in cluster.</p>\n"); |
|
} |
|
if ($unsend > 0) { |
|
$warnings=$warnings+$weights{'U'}*$unsend; |
|
} |
} |
} |
|
|
if ($unsend) { $simplestatus{'unsend'}=$unsend; } |
if ($unsend) { $simplestatus{'unsend'}=$unsend; } |
Line 713 sub check_delayed_msg {
|
Line 878 sub check_delayed_msg {
|
} |
} |
|
|
sub finish_logging { |
sub finish_logging { |
my ($fh)=@_; |
my ($fh,$weightsref)=@_; |
|
my %weights; |
|
if (ref($weightsref) eq 'HASH') { |
|
%weights = %{$weightsref}; |
|
} |
&log($fh,"<a name='errcount' />\n"); |
&log($fh,"<a name='errcount' />\n"); |
$totalcount=$notices+4*$warnings+100*$errors; |
$totalcount=($weights{'N'}*$notices)+($weights{'W'}*$warnings)+($weights{'E'}*$errors); |
&errout($fh); |
&errout($fh); |
&log($fh,"<h1>Total Error Count: $totalcount</h1>"); |
&log($fh,"<h1>Total Error Count: $totalcount</h1>"); |
my $now=time; |
my $now=time; |
Line 861 sub clean_lonc_childpids {
|
Line 1030 sub clean_lonc_childpids {
|
} |
} |
|
|
sub write_connection_config { |
sub write_connection_config { |
my ($isprimary,$domconf,$url,%connectssl,%changes); |
my ($domconf,%connectssl,%changes); |
my $primaryLibServer = &Apache::lonnet::domain($perlvar{'lonDefDomain'},'primary'); |
$domconf = &get_domain_config(); |
if ($primaryLibServer eq $perlvar{'lonHostID'}) { |
|
$isprimary = 1; |
|
} elsif ($primaryLibServer ne '') { |
|
my $protocol = $Apache::lonnet::protocol{$primaryLibServer}; |
|
my $hostname = &Apache::lonnet::hostname($primaryLibServer); |
|
unless ($protocol eq 'https') { |
|
$protocol = 'http'; |
|
} |
|
$url = $protocol.'://'.$hostname.'/cgi-bin/listdomconfig.pl'; |
|
} |
|
my $domconf = &get_domain_config($perlvar{'lonDefDomain'},$primaryLibServer,$isprimary, |
|
$url); |
|
if (ref($domconf) eq 'HASH') { |
if (ref($domconf) eq 'HASH') { |
if (ref($domconf->{'ssl'}) eq 'HASH') { |
if (ref($domconf->{'ssl'}) eq 'HASH') { |
foreach my $connect ('connto','connfrom') { |
foreach my $connect ('connto','connfrom') { |
Line 927 sub write_connection_config {
|
Line 1084 sub write_connection_config {
|
} |
} |
|
|
sub get_domain_config { |
sub get_domain_config { |
my ($dom,$primlibserv,$isprimary,$url) = @_; |
my ($dom,$primlibserv,$isprimary,$url,%confhash); |
my %confhash; |
$dom = $perlvar{'lonDefDomain'}; |
|
$primlibserv = &Apache::lonnet::domain($dom,'primary'); |
|
if ($primlibserv eq $perlvar{'lonHostID'}) { |
|
$isprimary = 1; |
|
} elsif ($primlibserv ne '') { |
|
my $protocol = $Apache::lonnet::protocol{$primlibserv}; |
|
my $hostname = &Apache::lonnet::hostname($primlibserv); |
|
unless ($protocol eq 'https') { |
|
$protocol = 'http'; |
|
} |
|
$url = $protocol.'://'.$hostname.'/cgi-bin/listdomconfig.pl'; |
|
} |
if ($isprimary) { |
if ($isprimary) { |
my $lonusersdir = $perlvar{'lonUsersDir'}; |
my $lonusersdir = $perlvar{'lonUsersDir'}; |
my $fname = $lonusersdir.'/'.$dom.'/configuration.db'; |
my $fname = $lonusersdir.'/'.$dom.'/configuration.db'; |
Line 1087 sub reset_nosslverify_pids {
|
Line 1255 sub reset_nosslverify_pids {
|
return; |
return; |
} |
} |
|
|
|
sub get_permcount_settings { |
|
my ($domconf) = @_; |
|
my ($defaults,$names) = &Apache::loncommon::lon_status_items(); |
|
my (%weights,$threshold,$sysmail,$reportstatus,%exclusions); |
|
foreach my $type ('E','W','N','U') { |
|
$weights{$type} = $defaults->{$type}; |
|
} |
|
$threshold = $defaults->{'threshold'}; |
|
$sysmail = $defaults->{'sysmail'}; |
|
$reportstatus = 1; |
|
if (ref($domconf) eq 'HASH') { |
|
if (ref($domconf->{'contacts'}) eq 'HASH') { |
|
if ($domconf->{'contacts'}{'reportstatus'} == 0) { |
|
$reportstatus = 0; |
|
} |
|
if (ref($domconf->{'contacts'}{'lonstatus'}) eq 'HASH') { |
|
if (ref($domconf->{'contacts'}{'lonstatus'}{weights}) eq 'HASH') { |
|
foreach my $type ('E','W','N','U') { |
|
if (exists($domconf->{'contacts'}{'lonstatus'}{weights}{$type})) { |
|
$weights{$type} = $domconf->{'contacts'}{'lonstatus'}{weights}{$type}; |
|
} |
|
} |
|
} |
|
if (ref($domconf->{'contacts'}{'lonstatus'}{'excluded'}) eq 'ARRAY') { |
|
my @excluded = @{$domconf->{'contacts'}{'lonstatus'}{'excluded'}}; |
|
if (@excluded) { |
|
map { $exclusions{$_} = 1; } @excluded; |
|
} |
|
} |
|
if (exists($domconf->{'contacts'}{'lonstatus'}{'threshold'})) { |
|
$threshold = $domconf->{'contacts'}{'lonstatus'}{'threshold'}; |
|
} |
|
if (exists($domconf->{'contacts'}{'lonstatus'}{'sysmail'})) { |
|
$sysmail = $domconf->{'contacts'}{'lonstatus'}{'sysmail'}; |
|
} |
|
} |
|
} |
|
} |
|
return ($threshold,$sysmail,$reportstatus,\%weights,\%exclusions); |
|
} |
|
|
|
sub read_serverhomeIDs { |
|
my %server; |
|
if (-e "$perlvar{'lonTabDir'}/serverhomeIDs.tab") { |
|
if (open(my $fh,'<',"$perlvar{'lonTabDir'}/serverhomeIDs.tab")) { |
|
while (<$fh>) { |
|
my($host,$id) = split(/:/); |
|
chomp($id); |
|
$server{$host} = $id; |
|
} |
|
close($fh); |
|
} |
|
} |
|
return %server; |
|
} |
|
|
sub send_mail { |
sub send_mail { |
|
my ($sysmail,$reportstatus) = @_; |
my $defdom = $perlvar{'lonDefDomain'}; |
my $defdom = $perlvar{'lonDefDomain'}; |
my $origmail = $perlvar{'lonAdmEMail'}; |
my $origmail = $perlvar{'lonAdmEMail'}; |
my $emailto = &Apache::loncommon::build_recipient_list(undef, |
my $emailto = &Apache::loncommon::build_recipient_list(undef, |
'lonstatusmail',$defdom,$origmail); |
'lonstatusmail',$defdom,$origmail); |
if ($totalcount>2500) { |
if (($totalcount>$sysmail) && ($reportstatus)) { |
$emailto.=",$perlvar{'lonSysEMail'}"; |
$emailto.=",$perlvar{'lonSysEMail'}"; |
} |
} |
my $from; |
my $from; |
Line 1176 sub main () {
|
Line 1401 sub main () {
|
chop $hostname; |
chop $hostname; |
$hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell |
$hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell |
my $subj="LON: Unconfigured machine $hostname"; |
my $subj="LON: Unconfigured machine $hostname"; |
system("echo 'Unconfigured machine $hostname.' |\ |
system("echo 'Unconfigured machine $hostname.' |". |
mailto $emailto -s '$subj' > /dev/null"); |
" mail -s '$subj' $emailto > /dev/null"); |
exit 1; |
exit 1; |
} |
} |
|
|
Line 1187 sub main () {
|
Line 1412 sub main () {
|
print("User ID mismatch. This program must be run as user 'www'.\n"); |
print("User ID mismatch. This program must be run as user 'www'.\n"); |
my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; |
my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; |
my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; |
my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; |
system("echo 'User ID mismatch. loncron must be run as user www.' |\ |
system("echo 'User ID mismatch. loncron must be run as user www.' |". |
mailto $emailto -s '$subj' > /dev/null"); |
" mail -s '$subj' $emailto > /dev/null"); |
exit 1; |
exit 1; |
} |
} |
|
|
Line 1273 sub main () {
|
Line 1498 sub main () {
|
&test_connections($fh); |
&test_connections($fh); |
} |
} |
if (!$justcheckdaemons && !$justcheckconnections && !$justreload) { |
if (!$justcheckdaemons && !$justcheckconnections && !$justreload) { |
&check_delayed_msg($fh); |
my $domconf = &get_domain_config(); |
&log_simplestatus(); |
my ($threshold,$sysmail,$reportstatus,$weightsref,$exclusionsref) = |
|
&get_permcount_settings($domconf); |
|
&check_delayed_msg($fh,$weightsref,$exclusionsref); |
&write_loncaparevs(); |
&write_loncaparevs(); |
&write_serverhomeIDs(); |
&write_serverhomeIDs(); |
&write_checksums(); |
&write_checksums(); |
Line 1287 sub main () {
|
Line 1514 sub main () {
|
&checkon_daemon($fh,'lond',40000,'USR2'); |
&checkon_daemon($fh,'lond',40000,'USR2'); |
&reset_nosslverify_pids($fh,%sslrem); |
&reset_nosslverify_pids($fh,%sslrem); |
} |
} |
&finish_logging($fh); |
&finish_logging($fh,$weightsref); |
if ($totalcount>200 && !$noemail) { &send_mail(); } |
&log_simplestatus(); |
|
if ($totalcount>$threshold && !$noemail) { &send_mail($sysmail,$reportstatus); } |
} |
} |
} |
} |
|
|