--- loncom/lond 2003/11/17 09:32:17 1.163
+++ loncom/lond 2004/03/08 20:13:07 1.165.2.3
@@ -2,7 +2,7 @@
# The LearningOnline Network
# lond "LON Daemon" Server (port "LOND" 5663)
#
-# $Id: lond,v 1.163 2003/11/17 09:32:17 foxr Exp $
+# $Id: lond,v 1.165.2.3 2004/03/08 20:13:07 albertel Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -52,7 +52,7 @@ my $DEBUG = 0; # Non zero to ena
my $status='';
my $lastlog='';
-my $VERSION='$Revision: 1.163 $'; #' stupid emacs
+my $VERSION='$Revision: 1.165.2.3 $'; #' stupid emacs
my $remoteVERSION;
my $currenthostid;
my $currentdomainid;
@@ -513,6 +513,7 @@ sub catchexception {
my ($error)=@_;
$SIG{'QUIT'}='DEFAULT';
$SIG{__DIE__}='DEFAULT';
+ &status("Catching exception");
&logthis("CRITICAL: "
."ABNORMAL EXIT. Child $$ for server $thisserver died through "
."a crash with this error msg->[$error]");
@@ -523,6 +524,7 @@ sub catchexception {
}
sub timeout {
+ &status("Handling Timeout");
&logthis("CRITICAL: TIME OUT ".$$."");
&catchexception('Timeout');
}
@@ -575,37 +577,51 @@ $server = IO::Socket::INET->new(LocalPor
# global variables
my %children = (); # keys are current child process IDs
-my $children = 0; # current number of children
sub REAPER { # takes care of dead children
$SIG{CHLD} = \&REAPER;
- my $pid = wait;
- if (defined($children{$pid})) {
- &logthis("Child $pid died");
- $children --;
- delete $children{$pid};
- } else {
- &logthis("Unknown Child $pid died");
+ &status("Handling child death");
+ my $pid;
+ do {
+ $pid = waitpid(-1,&WNOHANG());
+ if (defined($children{$pid})) {
+ &logthis("Child $pid died");
+ delete($children{$pid});
+ } else {
+ &logthis("Unknown Child $pid died");
+ }
+ } while ( $pid > 0 );
+ foreach my $child (keys(%children)) {
+ $pid = waitpid($child,&WNOHANG());
+ if ($pid > 0) {
+ &logthis("Child $child - $pid looks like we missed it's death");
+ delete($children{$pid});
+ }
}
+ &status("Finished Handling child death");
}
sub HUNTSMAN { # signal handler for SIGINT
+ &status("Killing children (INT)");
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
kill 'INT' => keys %children;
&logthis("Free socket: ".shutdown($server,2)); # free up socket
my $execdir=$perlvar{'lonDaemons'};
unlink("$execdir/logs/lond.pid");
&logthis("CRITICAL: Shutting down");
+ &status("Done killing children");
exit; # clean up with dignity
}
sub HUPSMAN { # signal handler for SIGHUP
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
+ &status("Killing children for restart (HUP)");
kill 'INT' => keys %children;
&logthis("Free socket: ".shutdown($server,2)); # free up socket
&logthis("CRITICAL: Restarting");
my $execdir=$perlvar{'lonDaemons'};
unlink("$execdir/logs/lond.pid");
+ &status("Restarting self (HUP)");
exec("$execdir/lond"); # here we go again
}
@@ -636,12 +652,14 @@ sub ReadHostTable {
open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file";
while (my $configline=) {
- my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
- chomp($ip); $ip=~s/\D+$//;
- $hostid{$ip}=$id;
- $hostdom{$id}=$domain;
- $hostip{$id}=$ip;
- if ($id eq $perlvar{'lonHostID'}) { $thisserver=$name; }
+ if (!($configline =~ /^\s*\#/)) {
+ my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
+ chomp($ip); $ip=~s/\D+$//;
+ $hostid{$ip}=$id;
+ $hostdom{$id}=$domain;
+ $hostip{$id}=$ip;
+ if ($id eq $perlvar{'lonHostID'}) { $thisserver=$name; }
+ }
}
close(CONFIG);
}
@@ -667,6 +685,7 @@ sub ReloadApache {
# now be honored.
#
sub UpdateHosts {
+ &status("Reload hosts.tab");
logthis(' Updating connections ');
#
# The %children hash has the set of IP's we currently have children
@@ -691,10 +710,12 @@ sub UpdateHosts {
}
}
ReloadApache;
+ &status("Finished reloading hosts.tab");
}
sub checkchildren {
+ &status("Checking on the children (sending signals)");
&initnewstatus();
&logstatus();
&logthis('Going to check on the children');
@@ -709,6 +730,7 @@ sub checkchildren {
sleep 5;
$SIG{ALRM} = sub { die "timeout" };
$SIG{__DIE__} = 'DEFAULT';
+ &status("Checking on the children (waiting for reports)");
foreach (sort keys %children) {
unless (-e "$docdir/lon-status/londchld/$_.txt") {
eval {
@@ -726,6 +748,7 @@ sub checkchildren {
}
$SIG{ALRM} = 'DEFAULT';
$SIG{__DIE__} = \&catchexception;
+ &status("Finished checking children");
}
# --------------------------------------------------------------------- Logging
@@ -768,17 +791,20 @@ sub Reply {
# ------------------------------------------------------------------ Log status
sub logstatus {
+ &status("Doing logging");
my $docdir=$perlvar{'lonDocRoot'};
{
my $fh=IO::File->new(">>$docdir/lon-status/londstatus.txt");
- print $fh $$."\t".$currenthostid."\t".$status."\t".$lastlog."\n";
+ print $fh $$."\t".$clientname."\t".$currenthostid."\t".$status."\t".$lastlog."\n";
$fh->close();
}
+ &status("Finished londstatus.txt");
{
my $fh=IO::File->new(">$docdir/lon-status/londchld/$$.txt");
print $fh $status."\n".$lastlog."\n".time;
$fh->close();
}
+ &status("Finished logging");
}
sub initnewstatus {
@@ -966,8 +992,11 @@ ReadHostTable;
# along the connection.
while (1) {
+ &status('Starting accept');
$client = $server->accept() or next;
+ &status('Accepted '.$client.' off to spawn');
make_new_child($client);
+ &status('Finished spawning');
}
sub make_new_child {
@@ -976,6 +1005,7 @@ sub make_new_child {
my $sigset;
$client = shift;
+ &status('Starting new child '.$client);
&logthis(' Attempting to start child ('.$client.
")");
# block signal for fork
@@ -1001,7 +1031,6 @@ sub make_new_child {
sigprocmask(SIG_UNBLOCK, $sigset)
or die "Can't unblock SIGINT for fork: $!\n";
$children{$pid} = $clientip;
- $children++;
&status('Started child '.$pid);
return;
} else {
@@ -1419,7 +1448,7 @@ sub make_new_child {
unless (mkdir($fpnow,0777)) {
$fperror="error: ".($!+0)
." mkdir failed while attempting "
- ."makeuser\n";
+ ."makeuser";
}
}
}
@@ -2500,6 +2529,7 @@ sub make_new_child {
&logthis(
"Client $clientip ($clientname) hanging up: $userinput");
print $client "bye\n";
+ $client->shutdown(2); # shutdown the socket forcibly.
$client->close();
last;