--- loncom/Attic/lonc 2002/02/25 15:48:11 1.29
+++ loncom/Attic/lonc 2002/04/04 22:04:54 1.38
@@ -5,7 +5,7 @@
# provides persistent TCP connections to the other servers in the network
# through multiplexed domain sockets
#
-# $Id: lonc,v 1.29 2002/02/25 15:48:11 www Exp $
+# $Id: lonc,v 1.38 2002/04/04 22:04:54 foxr Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -45,7 +45,7 @@
# 12/20 Scott Harrison
# YEAR=2002
# 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer
-#
+# 3/07/02 Ron Fox
# based on nonforker from Perl Cookbook
# - server who multiplexes without forking
@@ -57,65 +57,13 @@ use Socket;
use Fcntl;
use Tie::RefHash;
use Crypt::IDEA;
-use Net::Ping;
+#use Net::Ping;
use LWP::UserAgent();
-my $status='';
-my $lastlog='';
-
-# grabs exception and records it to log before exiting
-sub catchexception {
- my ($signal)=@_;
- $SIG{QUIT}='DEFAULT';
- $SIG{__DIE__}='DEFAULT';
- chomp($signal);
- &logthis("CRITICAL: "
- ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through "
- ."\"$signal\" with parameter [$@]");
- die($@);
-}
-
-$childmaxattempts=5;
-
-# -------------------------------------- Routines to see if other box available
-
-sub online {
- my $host=shift;
- &status("Pinging ".$host);
- my $p=Net::Ping->new("tcp",20);
- my $online=$p->ping("$host");
- $p->close();
- undef ($p);
- return $online;
-}
-
-sub connected {
- my ($local,$remote)=@_;
- &status("Checking connection $local to $remote");
- $local=~s/\W//g;
- $remote=~s/\W//g;
-
- unless ($hostname{$local}) { return 'local_unknown'; }
- unless ($hostname{$remote}) { return 'remote_unknown'; }
-
- unless (&online($hostname{$local})) { return 'local_offline'; }
-
- my $ua=new LWP::UserAgent;
-
- my $request=new HTTP::Request('GET',
- "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote);
-
- my $response=$ua->request($request);
-
- unless ($response->is_success) { return 'local_error'; }
-
- my $reply=$response->content;
- $reply=(split("\n",$reply))[0];
- $reply=~s/\W//g;
- if ($reply ne $remote) { return $reply; }
- return 'ok';
-}
-
+$status='';
+$lastlog='';
+$conserver='SHELL';
+$DEBUG = 0; # Set to 1 for annoyingly complete logs.
# -------------------------------- Set signal handlers to record abnormal exits
@@ -182,139 +130,7 @@ close(CONFIG);
%childatt = (); # number of attempts to start server
# for ID
-sub REAPER { # takes care of dead children
- $SIG{CHLD} = \&REAPER;
- my $pid = wait;
- my $wasserver=$children{$pid};
- &logthis("CRITICAL: "
- ."Child $pid for server $wasserver died ($childatt{$wasserver})");
- delete $children{$pid};
- delete $childpid{$wasserver};
- my $port = "$perlvar{'lonSockDir'}/$wasserver";
- unlink($port);
-}
-
-sub hangup {
- foreach (keys %children) {
- $wasserver=$children{$_};
- &status("Closing $wasserver");
- &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
- &status("Kill PID $_ for $wasserver");
- kill ('INT',$_);
- }
-}
-
-sub HUNTSMAN { # signal handler for SIGINT
- local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
- &hangup();
- my $execdir=$perlvar{'lonDaemons'};
- unlink("$execdir/logs/lonc.pid");
- &logthis("CRITICAL: Shutting down");
- exit; # clean up with dignity
-}
-
-sub HUPSMAN { # signal handler for SIGHUP
- local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
- &hangup();
- &logthis("CRITICAL: Restarting");
- unlink("$execdir/logs/lonc.pid");
- my $execdir=$perlvar{'lonDaemons'};
- exec("$execdir/lonc"); # here we go again
-}
-
-sub checkchildren {
- &initnewstatus();
- &logstatus();
- &logthis('Going to check on the children');
- foreach (sort keys %children) {
- sleep 1;
- unless (kill 'USR1' => $_) {
- &logthis ('CRITICAL: Child '.$_.' is dead');
- &logstatus($$.' is dead');
- }
- }
-}
-
-sub USRMAN {
- &logthis("USR1: Trying to establish connections again");
- %childatt=();
- &checkchildren();
-}
-
-# -------------------------------------------------- Non-critical communication
-sub subreply {
- my ($cmd,$server)=@_;
- my $answer='';
- if ($server ne $perlvar{'lonHostID'}) {
- my $peerfile="$perlvar{'lonSockDir'}/$server";
- my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile",
- Type => SOCK_STREAM,
- Timeout => 10)
- or return "con_lost";
-
-
- $SIG{ALRM}=sub { die "timeout" };
- $SIG{__DIE__}='DEFAULT';
- eval {
- alarm(10);
- print $sclient "$cmd\n";
- $answer=<$sclient>;
- chomp($answer);
- alarm(0);
- };
- if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
- $SIG{ALRM}='DEFAULT';
- $SIG{__DIE__}=\&catchexception;
- } else { $answer='self_reply'; }
- return $answer;
-}
-
-# --------------------------------------------------------------------- Logging
-
-sub logthis {
- my $message=shift;
- my $execdir=$perlvar{'lonDaemons'};
- my $fh=IO::File->new(">>$execdir/logs/lonc.log");
- my $now=time;
- my $local=localtime($now);
- $lastlog=$local.': '.$message;
- print $fh "$local ($$) [$status]: $message\n";
-}
-
-
-sub logperm {
- my $message=shift;
- my $execdir=$perlvar{'lonDaemons'};
- my $now=time;
- my $local=localtime($now);
- my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
- print $fh "$now:$message:$local\n";
-}
-# ------------------------------------------------------------------ Log status
-
-sub logstatus {
- my $docdir=$perlvar{'lonDocRoot'};
- my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
- print $fh $$."\t".$status."\t".$lastlog."\n";
-}
-
-sub initnewstatus {
- my $docdir=$perlvar{'lonDocRoot'};
- my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
- my $now=time;
- my $local=localtime($now);
- print $fh "LONC status $local - parent $$\n\n";
-}
-
-# -------------------------------------------------------------- Status setting
-
-sub status {
- my $what=shift;
- my $now=time;
- my $local=localtime($now);
- $status=$local.': '.$what;
-}
-
+$childmaxattempts=5;
# ---------------------------------------------------- Fork once and dissociate
&status("Fork and dissociate");
@@ -324,6 +140,8 @@ die "Couldn't fork: $!" unless defined (
POSIX::setsid() or die "Can't start new session: $!";
+$conserver='PARENT';
+
# ------------------------------------------------------- Write our PID on disk
&status("Write PID");
$execdir=$perlvar{'lonDaemons'};
@@ -341,52 +159,59 @@ $SIG{HUP}=$SIG{USR1}='IGNORE';
&status("Forking ...");
foreach $thisserver (keys %hostip) {
- if (&online($hostname{$thisserver})) {
+ #if (&online($hostname{$thisserver})) {
make_new_child($thisserver);
- }
+ #}
}
&logthis("Done starting initial servers");
# ----------------------------------------------------- Install signal handlers
-$SIG{CHLD} = \&REAPER;
+
$SIG{INT} = $SIG{TERM} = \&HUNTSMAN;
$SIG{HUP} = \&HUPSMAN;
$SIG{USR1} = \&USRMAN;
# And maintain the population.
while (1) {
- &status("Sleeping");
- sleep; # wait for a signal (i.e., child's death)
+ my $deadpid = wait; # Wait for the next child to die.
# See who died and start new one
&status("Woke up");
- foreach $thisserver (keys %hostip) {
- if (!$childpid{$thisserver}) {
- if (($childatt{$thisserver}<$childmaxattempts) &&
- (&online($hostname{$thisserver}))) {
- $childatt{$thisserver}++;
- &logthis(
- "INFO: Trying to reconnect for $thisserver "
- ."(".($childatt{$thisserver}?$childatt{$thisserver}:'none').
- " of $childmaxattempts attempts)");
- make_new_child($thisserver);
- } else {
- &logthis(
- "INFO: Skipping $thisserver "
- ."($childatt{$thisserver} of $childmaxattempts attempts)");
- }
-
- }
+ my $skipping='';
+
+ if(exists($children{$deadpid})) {
+
+ $thisserver = $children{$deadpid}; # Look name of dead guy's peer.
+
+ delete($children{$deadpid}); # Get rid of dead hash entry.
+
+ if($childatt{$thisserver} < $childmaxattempts) {
+ $childatt{$thisserver}++;
+ &logthis(
+ "INFO: Trying to reconnect for $thisserver "
+ ."($childatt{$thisserver} of $childmaxattempts attempts)");
+ make_new_child($thisserver);
+
+ }
+ else {
+ $skipping .= $thisserver.' ';
+ }
+ if($skipping) {
+ &logthis("WARNING: Skipped $skipping");
+
+ }
}
+
}
+
sub make_new_child {
- my $conserver=shift;
+ $newserver=shift;
my $pid;
my $sigset;
- &logthis("Attempting to start child for server $conserver");
+ &logthis("Attempting to start child for server $newserver");
# block signal for fork
$sigset = POSIX::SigSet->new(SIGINT);
sigprocmask(SIG_BLOCK, $sigset)
@@ -398,10 +223,11 @@ sub make_new_child {
# Parent records the child's birth and returns.
sigprocmask(SIG_UNBLOCK, $sigset)
or die "Can't unblock SIGINT for fork: $!\n";
- $children{$pid} = $conserver;
- $childpid{$conserver} = $pid;
+ $children{$pid} = $newserver;
+ $childpid{$newserver} = $pid;
return;
} else {
+ $conserver=$newserver;
# Child can *not* return from this subroutine.
$SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before
$SIG{USR1}= \&logstatus;
@@ -419,9 +245,10 @@ unlink($port);
# -------------------------------------------------------------- Open other end
&openremote($conserver);
-
+ &logthis(" Connection to $conserver open ");
# ----------------------------------------- We're online, send delayed messages
&status("Checking for delayed messages");
+
my @allbuffered;
my $path="$perlvar{'lonSockDir'}/delayed";
opendir(DIRHANDLE,$path);
@@ -429,9 +256,9 @@ unlink($port);
closedir(DIRHANDLE);
my $dfname;
foreach (@allbuffered) {
- &status("Sending delayed $conserver $_");
+ &status("Sending delayed: $_");
$dfname="$path/$_";
- &logthis('Sending '.$dfname);
+ if($DEBUG) { &logthis('Sending '.$dfname); }
my $wcmd;
{
my $dfh=IO::File->new($dfname);
@@ -452,27 +279,19 @@ unlink($port);
}
$cmd="enc:$cmdlength:$encrequest\n";
}
- $SIG{ALRM}=sub { die "timeout" };
- $SIG{__DIE__}='DEFAULT';
- eval {
- alarm(60);
- print $remotesock "$cmd\n";
- $answer=<$remotesock>;
+ $answer = londtransaction($remotesock, $cmd, 60);
chomp($answer);
- alarm(0);
- };
- $SIG{ALRM}='DEFAULT';
- $SIG{__DIE__}=\&catchexception;
if (($answer ne '') && ($@!~/timeout/)) {
unlink("$dfname");
- &logthis("Delayed $cmd to $conserver: >$answer<");
+ &logthis("Delayed $cmd: >$answer<");
&logperm("S:$conserver:$bcmd");
}
}
+ if($DEBUG) { &logthis(" Delayed transactions sent"); }
# ------------------------------------------------------- Listen to UNIX socket
-&status("Opening socket $conserver");
+&status("Opening socket");
unless (
$server = IO::Socket::UNIX->new(Local => $port,
Type => SOCK_STREAM,
@@ -481,11 +300,11 @@ unless (
my $st=120+int(rand(240));
&logthis(
"WARNING: ".
- "Can't make server socket $conserver ($st secs): $@");
+ "Can't make server socket ($st secs): .. exiting");
sleep($st);
exit;
};
-
+
# -----------------------------------------------------------------------------
&logthis("$conserver online");
@@ -495,128 +314,308 @@ unless (
%inbuffer = ();
%outbuffer = ();
%ready = ();
+%servers = (); # To be compatible with make filevector. indexed by
+ # File ids, values are sockets.
+ # note that the accept socket is omitted.
tie %ready, 'Tie::RefHash';
-nonblock($server);
-$select = IO::Select->new($server);
+# nonblock($server);
+# $select = IO::Select->new($server);
# Main loop: check reads/accepts, check writes, check ready to process
+
+status("Main loop");
while (1) {
my $client;
my $rv;
my $data;
- # check for new information on the connections we have
+ my $infdset; # bit vec of fd's to select on input.
- # anything to read or accept?
- foreach $client ($select->can_read(0.1)) {
+ my $outfdset; # Bit vec of fd's to select on output.
- if ($client == $server) {
- # accept a new connection
- &status("Accept new connection: $conserver");
- $client = $server->accept();
- $select->add($client);
- nonblock($client);
- } else {
- # read data
- $data = '';
- $rv = $client->recv($data, POSIX::BUFSIZ, 0);
-
- unless (defined($rv) && length $data) {
- # This would be the end of file, so close the client
- delete $inbuffer{$client};
- delete $outbuffer{$client};
- delete $ready{$client};
-
- &status("Idle $conserver");
- $select->remove($client);
- close $client;
- next;
- }
- $inbuffer{$client} .= $data;
+ $infdset = MakeFileVector(\%servers);
+ $outfdset= MakeFileVector(\%outbuffer);
+ vec($infdset, $server->fileno, 1) = 1;
+ if($DEBUG) {
+ &logthis("Adding ".$server->fileno.
+ " to input select vector (listner)".
+ unpack("b*",$infdset)."\n");
+ }
+ DoSelect(\$infdset, \$outfdset); # Wait for input.
+ if($DEBUG) {
+ &logthis("Doselect completed!");
+ &logthis("ins = ".unpack("b*",$infdset)."\n");
+ &logthis("outs= ".unpack("b*",$outfdset)."\n");
+
+ }
- # test whether the data in the buffer or the data we
- # just read means there is a complete request waiting
- # to be fulfilled. If there is, set $ready{$client}
- # to the requests waiting to be fulfilled.
- while ($inbuffer{$client} =~ s/(.*\n)//) {
- push( @{$ready{$client}}, $1 );
- }
- }
+ # Checkfor new connections:
+ if (vec($infdset, $server->fileno, 1)) {
+ if($DEBUG) {
+ &logthis("New connection established");
+ }
+ # accept a new connection
+ &status("Accept new connection: $conserver");
+ $client = $server->accept();
+ if($DEBUG) {
+ &logthis("New client fd = ".$client->fileno."\n");
+ }
+ $servers{$client->fileno} = $client;
+ nonblock($client);
}
+ HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready);
+ HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer,
+ \%ready);
+# -------------------------------------------------------- Wow, connection lost
- # Any complete requests to process?
- foreach $client (keys %ready) {
- handle($client);
+}
+
}
+}
- # Buffers to flush?
- foreach $client ($select->can_write(1)) {
- # Skip this client if we have nothing to say
- next unless exists $outbuffer{$client};
+# ------------------------------------------------------- End of make_new_child
- $rv = $client->send($outbuffer{$client}, 0);
- unless ($outbuffer{$client}=~/con_lost\n$/) {
- unless (defined $rv) {
- # Whine, but move on.
- &logthis("I was told I could write, but I can't.\n");
- next;
- }
- $errno=$!;
- if (($rv == length $outbuffer{$client}) ||
- ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) {
- substr($outbuffer{$client}, 0, $rv) = '';
- delete $outbuffer{$client} unless length $outbuffer{$client};
- } else {
- # Couldn't write all the data, and it wasn't because
- # it would have blocked. Shutdown and move on.
+#
+# Make a vector of file descriptors to wait for in a select.
+# parameters:
+# \%fdhash -reference to a hash which has IO::Socket's as indices.
+# We only care about the indices, not the values.
+# A select vector is created from all indices of the hash.
+
+sub MakeFileVector
+{
+ my $fdhash = shift;
+ my $selvar = "";
+
+ foreach $socket (keys %$fdhash) {
+ if($DEBUG) {
+ &logthis("Adding ".$socket.
+ "to select vector. (client)\n");
+ }
+ vec($selvar, $socket, 1) = 1;
+ }
+ return $selvar;
+}
- &logthis("Dropping data with ".$errno.": ".
- length($outbuffer{$client}).", $rv");
- delete $inbuffer{$client};
- delete $outbuffer{$client};
- delete $ready{$client};
-
- $select->remove($client);
- close($client);
- next;
- }
- } else {
-# -------------------------------------------------------- Wow, connection lost
- &logthis(
- "CRITICAL: Closing connection $conserver");
- &status("Connection lost $conserver");
- $remotesock->shutdown(2);
- &logthis("Attempting to open new connection");
- &openremote($conserver);
- }
+#
+# HandleOutput:
+# Processes output on a buffered set of file descriptors which are
+# ready to be read.
+# Parameters:
+# $selvector - Vector of file descriptors which are writable.
+# \%sockets - Vector of socket references indexed by socket.
+# \%buffers - Reference to a hash containing output buffers.
+# Hashes are indexed by sockets. The file descriptors of some
+# of those sockets will be present in $selvector.
+# For each one of those, we will attempt to write the output
+# buffer to the socket. Note that we will assume that
+# the sockets are being run in non blocking mode.
+# \%inbufs - Reference to hash containing input buffers.
+# \%readys - Reference to hash containing flags for items with complete
+# requests.
+#
+sub HandleOutput
+{
+ my $selvector = shift;
+ my $sockets = shift;
+ my $buffers = shift;
+ my $inbufs = shift;
+ my $readys = shift;
+ my $sock;
+
+ if($DEBUG) {
+ &logthis("HandleOutput entered\n");
}
-
+
+ foreach $sock (keys %$sockets) {
+ my $socket = $sockets->{$sock};
+ if(vec($selvector, $sock, 1)) { # $socket is writable.
+ if($DEBUG) {
+ &logthis("Sending $buffers->{$sock} \n");
+ }
+ my $rv = $socket->send($buffers->{$sock}, 0);
+ $errno = $!;
+ unless ($buffers->{$sock} eq "con_lost\n") {
+ unless (defined $rv) { # Write failed... could be EINTR
+ unless ($errno == POSIX::EINTR) {
+ &logthis("Write failed on writable socket");
+ } # EINTR is not an error .. just retry.
+ next;
+ }
+ if( ($rv == length $buffers->{$sock}) ||
+ ($errno == POSIX::EWOULDBLOCK) ||
+ ($errno == POSIX::EAGAIN) || # same as above.
+ ($errno == POSIX::EINTR) || # signal during IO
+ ($errno == 0)) {
+ substr($buffers->{$sock}, 0, $rv)=""; # delete written part
+ delete $buffers->{$sock} unless length $buffers->{$sock};
+ } else {
+ # For some reason the write failed with an error code
+ # we didn't look for. Shutdown the socket.
+ &logthis("Unable to write data with ".$errno.": ".
+ "Dropping data: ".length($buffers->{$sock}).
+ ", $rv");
+ #
+ # kill off the buffers in the hash:
+
+ delete $buffers->{$sock};
+ delete $inbufs->{$sock};
+ delete $readys->{$sock};
+
+ close($socket); # Close the client socket.
+ next;
+ }
+ } else { # Kludgy way to mark lond connection lost.
+ &logthis(
+ "CRITICAL lond connection lost");
+ status("Connection lost");
+ $remotesock->shutdown(2);
+ &logthis("Attempting to open a new connection");
+ &openremote($conserver);
+ }
+
+ }
+ }
+
}
+#
+# HandleInput - Deals with input on client sockets.
+# Each socket has an associated input buffer.
+# For each readable socket, the currently available
+# data is appended to this buffer.
+# If necessary, the buffer is created.
+# On various failures, we may shutdown the client.
+# Parameters:
+# $selvec - Vector of readable sockets.
+# \%sockets - Refers to the Hash of sockets indexed by sockets.
+# Each of these may or may not have it's fd bit set
+# in the $selvec.
+# \%ibufs - Refers to the hash of input buffers indexed by socket.
+# \%obufs - Hash of output buffers indexed by socket.
+# \%ready - Hash of ready flags indicating the existence of a completed
+# Request.
+sub HandleInput
+{
+
+ # Marshall the parameters. Note that the hashes are actually
+ # references not values.
+
+ my $selvec = shift;
+ my $sockets = shift;
+ my $ibufs = shift;
+ my $obufs = shift;
+ my $ready = shift;
+ my $sock;
+
+ if($DEBUG) {
+ &logthis("Entered HandleInput\n");
+ }
+ foreach $sock (keys %$sockets) {
+ my $socket = $sockets->{$sock};
+ if(vec($selvec, $sock, 1)) { # Socket which is readable.
+
+ # Attempt to read the data and do error management.
+ my $data = '';
+ my $rv = $socket->recv($data, POSIX::BUFSIZ, 0);
+ if($DEBUG) {
+ &logthis("Received $data from socket");
+ }
+ unless (defined($rv) && length $data) {
+
+ # Read an end of file.. this is a disconnect from the peer.
+
+ delete $sockets->{$sock};
+ delete $ibufs->{$sock};
+ delete $obufs->{$sock};
+ delete $ready->{$sock};
+
+ status("Idle");
+ close $socket;
+ next;
+ }
+ # Append the read data to the input buffer. If the buffer
+ # now contains a \n the request is complete and we can
+ # mark this in the $ready hash (one request for each \n.)
+
+ $ibufs->{$sock} .= $data;
+ while($ibufs->{$sock} =~ s/(.*\n)//) {
+ push(@{$ready->{$sock}}, $1);
+ }
+
+ }
+ }
+ # Now handle any requests which are ready:
+
+ foreach $client (keys %ready) {
+ handle($client);
+ }
}
-# ------------------------------------------------------- End of make_new_child
+# DoSelect: does a select with no timeout. On signal (errno == EINTR),
+# the select is retried until there are items in the returned
+# vectors.
+#
+# Parameters:
+# \$readvec - Reference to a vector of file descriptors to
+# check for readability.
+# \$writevec - Reference to a vector of file descriptors to check for
+# writability.
+# On exit, the referents are modified with vectors indicating which
+# file handles are readable/writable.
+#
+sub DoSelect {
+ my $readvec = shift;
+ my $writevec= shift;
+ my $outs;
+ my $ins;
+
+ while (1) {
+ my $nfds = select( $ins = $$readvec, $outs = $$writevec, undef, undef);
+ if($nfds) {
+ if($DEBUG) {
+ &logthis("select exited with ".$nfds." fds\n");
+ &logthis("ins = ".unpack("b*",$ins).
+ " readvec = ".unpack("b*",$$readvec)."\n");
+ &logthis("outs = ".unpack("b*",$outs).
+ " writevec = ".unpack("b*",$$writevec)."\n");
+ }
+ $$readvec = $ins;
+ $$writevec = $outs;
+ return;
+ } else {
+ if($DEBUG) {
+ &logthis("Select exited with no bits set in mask\n");
+ }
+ die "Select failed" unless $! == EINTR;
+ }
+ }
+}
# handle($socket) deals with all pending requests for $client
+#
sub handle {
# requests are in $ready{$client}
# send output to $outbuffer{$client}
my $client = shift;
my $request;
-
foreach $request (@{$ready{$client}}) {
# ============================================================= Process request
# $request is the text of the request
# put text of reply into $outbuffer{$client}
# ------------------------------------------------------------ Is this the end?
+ chomp($request);
+ if($DEBUG) {
+ &logthis(" Request $request processing starts");
+ }
if ($request eq "close_connection_exit\n") {
- &status("Request close connection: $conserver");
+ &status("Request close connection");
&logthis(
- "CRITICAL: Request Close Connection $conserver");
+ "CRITICAL: Request Close Connection ... exiting");
$remotesock->shutdown(2);
$server->close();
exit;
@@ -633,27 +632,19 @@ sub handle {
$encrequest.=
unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8)));
}
- $request="enc:$cmdlength:$encrequest\n";
+ $request="enc:$cmdlength:$encrequest";
}
# --------------------------------------------------------------- Main exchange
- $SIG{ALRM}=sub { die "timeout" };
- $SIG{__DIE__}='DEFAULT';
- eval {
- alarm(300);
- &status("Sending $conserver: $request");
- print $remotesock "$request";
- &status("Waiting for reply from $conserver: $request");
- $answer=<$remotesock>;
- &status("Received reply: $request");
- alarm(0);
- };
- if ($@=~/timeout/) {
- $answer='';
- &logthis(
- "CRITICAL: Timeout $conserver: $request");
- }
- $SIG{ALRM}='DEFAULT';
- $SIG{__DIE__}=\&catchexception;
+ $answer = londtransaction($remotesock, $request, 300);
+
+ if($DEBUG) {
+ &logthis(" Request data exchange complete");
+ }
+ if ($@=~/timeout/) {
+ $answer='';
+ &logthis(
+ "CRITICAL: Timeout: $request");
+ }
if ($answer) {
@@ -669,19 +660,27 @@ sub handle {
$answer=substr($answer,0,$cmdlength);
$answer.="\n";
}
+ if($DEBUG) {
+ &logthis("sending $answer to client\n");
+ }
$outbuffer{$client} .= $answer;
} else {
$outbuffer{$client} .= "con_lost\n";
}
+ &status("Completed: $request");
+ if($DEBUG) {
+ &logthis(" Request processing complete");
+ }
# ===================================================== Done processing request
}
delete $ready{$client};
- &status("Completed $conserver: $request");
# -------------------------------------------------------------- End non-forker
+ if($DEBUG) {
+ &logthis(" requests for child handled");
+ }
}
# ---------------------------------------------------------- End make_new_child
-}
# nonblock($socket) puts socket into nonblocking mode
sub nonblock {
@@ -701,7 +700,8 @@ sub openremote {
my $conserver=shift;
-&status("Opening TCP: $conserver");
+&status("Opening TCP");
+ my $st=120+int(rand(240)); # Sleep before opening:
unless (
$remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
@@ -709,55 +709,47 @@ unless (
Proto => "tcp",
Type => SOCK_STREAM)
) {
- my $st=120+int(rand(240));
+
&logthis(
-"WARNING: Couldn't connect $conserver ($st secs): $@");
+"WARNING: Couldn't connect to $conserver ($st secs): ");
sleep($st);
exit;
};
# ----------------------------------------------------------------- Init dialog
+&logthis("INFO Connected to $conserver, initing ");
&status("Init dialogue: $conserver");
- $SIG{ALRM}=sub { die "timeout" };
- $SIG{__DIE__}='DEFAULT';
- eval {
- alarm(60);
-print $remotesock "init\n";
-$answer=<$remotesock>;
-print $remotesock "$answer";
-$answer=<$remotesock>;
-chomp($answer);
- alarm(0);
- };
- $SIG{ALRM}='DEFAULT';
- $SIG{__DIE__}=\&catchexception;
+ $answer = londtransaction($remotesock, "init", 60);
+ chomp($answer);
+ $answer = londtransaction($remotesock, $answer, 60);
+ chomp($answer);
if ($@=~/timeout/) {
- &logthis("Timed out during init: $conserver");
+ &logthis("Timed out during init.. exiting");
exit;
}
if ($answer ne 'ok') {
- &logthis("Init reply for $conserver: >$answer<");
+ &logthis("Init reply: >$answer<");
my $st=120+int(rand(240));
&logthis(
-"WARNING: Init failed $conserver ($st secs)");
+"WARNING: Init failed ($st secs)");
sleep($st);
exit;
}
sleep 5;
-&status("Ponging $conserver");
+&status("Ponging");
print $remotesock "pong\n";
$answer=<$remotesock>;
chomp($answer);
-if ($answer!~/^$converver/) {
- &logthis("Pong reply for $conserver: >$answer<");
+if ($answer!~/^$conserver/) {
+ &logthis("Pong reply: >$answer<");
}
# ----------------------------------------------------------- Initialize cipher
-&status("Initialize cipher: $conserver");
+&status("Initialize cipher");
print $remotesock "ekey\n";
my $buildkey=<$remotesock>;
my $key=$conserver.$perlvar{'lonHostID'};
@@ -768,18 +760,266 @@ $key=$key.$buildkey.$key.$buildkey.$key.
$key=substr($key,0,32);
my $cipherkey=pack("H32",$key);
if ($cipher=new IDEA $cipherkey) {
- &logthis("Secure connection initialized: $conserver");
+ &logthis("Secure connection initialized");
} else {
my $st=120+int(rand(240));
&logthis(
"WARNING: ".
- "Could not establish secure connection, $conserver ($st secs)!");
+ "Could not establish secure connection ($st secs)!");
sleep($st);
exit;
}
+ &logthis(" Remote open success ");
+}
+
+
+
+# grabs exception and records it to log before exiting
+sub catchexception {
+ my ($signal)=@_;
+ $SIG{QUIT}='DEFAULT';
+ $SIG{__DIE__}='DEFAULT';
+ chomp($signal);
+ &logthis("CRITICAL: "
+ ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through "
+ ."\"$signal\" with parameter ");
+ die("Signal abend");
+}
+
+# -------------------------------------- Routines to see if other box available
+
+#sub online {
+# my $host=shift;
+# &status("Pinging ".$host);
+# my $p=Net::Ping->new("tcp",20);
+# my $online=$p->ping("$host");
+# $p->close();
+# undef ($p);
+# return $online;
+#}
+
+sub connected {
+ my ($local,$remote)=@_;
+ &status("Checking connection $local to $remote");
+ $local=~s/\W//g;
+ $remote=~s/\W//g;
+
+ unless ($hostname{$local}) { return 'local_unknown'; }
+ unless ($hostname{$remote}) { return 'remote_unknown'; }
+
+ #unless (&online($hostname{$local})) { return 'local_offline'; }
+
+ my $ua=new LWP::UserAgent;
+
+ my $request=new HTTP::Request('GET',
+ "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote);
+
+ my $response=$ua->request($request);
+
+ unless ($response->is_success) { return 'local_error'; }
+
+ my $reply=$response->content;
+ $reply=(split("\n",$reply))[0];
+ $reply=~s/\W//g;
+ if ($reply ne $remote) { return $reply; }
+ return 'ok';
+}
+
+
+
+sub hangup {
+ foreach (keys %children) {
+ $wasserver=$children{$_};
+ &status("Closing $wasserver");
+ &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
+ &status("Kill PID $_ for $wasserver");
+ kill ('INT',$_);
+ }
+}
+
+sub HUNTSMAN { # signal handler for SIGINT
+ local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
+ &hangup();
+ my $execdir=$perlvar{'lonDaemons'};
+ unlink("$execdir/logs/lonc.pid");
+ &logthis("CRITICAL: Shutting down");
+ exit; # clean up with dignity
+}
+
+sub HUPSMAN { # signal handler for SIGHUP
+ local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
+ &hangup();
+ &logthis("CRITICAL: Restarting");
+ unlink("$execdir/logs/lonc.pid");
+ my $execdir=$perlvar{'lonDaemons'};
+ exec("$execdir/lonc"); # here we go again
+}
+
+sub checkchildren {
+ &initnewstatus();
+ &logstatus();
+ &logthis('Going to check on the children');
+ foreach (sort keys %children) {
+ sleep 1;
+ unless (kill 'USR1' => $_) {
+ &logthis ('CRITICAL: Child '.$_.' is dead');
+ &logstatus($$.' is dead');
+ }
+ }
+}
+
+sub USRMAN {
+ &logthis("USR1: Trying to establish connections again");
+ %childatt=();
+ &checkchildren();
+}
+
+# -------------------------------------------------- Non-critical communication
+sub subreply {
+ my ($cmd,$server)=@_;
+ my $answer='';
+ if ($server ne $perlvar{'lonHostID'}) {
+ my $peerfile="$perlvar{'lonSockDir'}/$server";
+ my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile",
+ Type => SOCK_STREAM,
+ Timeout => 10)
+ or return "con_lost";
+
+
+ $answer = londtransaction($sclient, $cmd, 10);
+
+ if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+ } else { $answer='self_reply'; }
+ return $answer;
+}
+
+# --------------------------------------------------------------------- Logging
+
+sub logthis {
+ my $message=shift;
+ my $execdir=$perlvar{'lonDaemons'};
+ my $fh=IO::File->new(">>$execdir/logs/lonc.log");
+ my $now=time;
+ my $local=localtime($now);
+ $lastlog=$local.': '.$message;
+ print $fh "$local ($$) [$conserver] [$status]: $message\n";
+}
+
+#-------------------------------------- londtransaction:
+#
+# Performs a transaction with lond with timeout support.
+# result = londtransaction(socket,request,timeout)
+#
+sub londtransaction {
+ my ($socket, $request, $tmo) = @_;
+
+ if($DEBUG) {
+ &logthis("londtransaction request: $request");
+ }
+
+ # Set the signal handlers: ALRM for timeout and disble the others.
+
+ $SIG{ALRM} = sub { die "timeout" };
+ $SIG{__DIE__} = 'DEFAULT';
+
+ # Disable all but alarm so that only that can interupt the
+ # send /receive.
+ #
+ my $sigset = POSIX::SigSet->new(QUIT, USR1, HUP, INT, TERM);
+ my $priorsigs = POSIX::SigSet->new;
+ unless (defined sigprocmask(SIG_BLOCK, $sigset, $priorsigs)) {
+ &logthis(" CRITICAL -- londtransaction ".
+ "failed to block signals ");
+ die "could not block signals in londtransaction";
+ }
+ $answer = '';
+ #
+ # Send request to lond.
+ #
+ eval {
+ alarm($tmo);
+ print $socket "$request\n";
+ alarm(0);
+ };
+ # If request didn't timeout, try for the response.
+ #
+
+ if ($@!~/timeout/) {
+ eval {
+ alarm($tmo);
+ $answer = <$socket>;
+ if($DEBUG) {
+ &logthis("Received $answer in londtransaction");
+ }
+ alarm(0);
+ };
+ } else {
+ if($DEBUG) {
+ &logthis("Timeout on send in londtransaction");
+ }
+ }
+ if( ($@ =~ /timeout/) && ($DEBUG)) {
+ &logthis("Timeout on receive in londtransaction");
+ }
+ #
+ # Restore the initial sigmask set.
+ #
+ unless (defined sigprocmask(SIG_UNBLOCK, $priorsigs)) {
+ &logthis(" CRITICAL -- londtransaction ".
+ "failed to re-enable signal processing. ");
+ die "londtransaction failed to re-enable signals";
+ }
+ #
+ # go back to the prior handler set.
+ #
+ $SIG{ALRM} = 'DEFAULT';
+ $SIG{__DIE__} = \&cathcexception;
+
+ # chomp $answer;
+ if ($DEBUG) {
+ &logthis("Returning $answer in londtransaction");
+ }
+ return $answer;
+
+}
+
+sub logperm {
+ my $message=shift;
+ my $execdir=$perlvar{'lonDaemons'};
+ my $now=time;
+ my $local=localtime($now);
+ my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
+ print $fh "$now:$message:$local\n";
+}
+# ------------------------------------------------------------------ Log status
+sub logstatus {
+ my $docdir=$perlvar{'lonDocRoot'};
+ my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
+ print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n";
}
+sub initnewstatus {
+ my $docdir=$perlvar{'lonDocRoot'};
+ my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
+ my $now=time;
+ my $local=localtime($now);
+ print $fh "LONC status $local - parent $$\n\n";
+}
+
+# -------------------------------------------------------------- Status setting
+
+sub status {
+ my $what=shift;
+ my $now=time;
+ my $local=localtime($now);
+ $status=$local.': '.$what;
+}
+
+
+
# ----------------------------------- POD (plain old documentation, CPAN style)
=head1 NAME
@@ -788,22 +1028,104 @@ lonc - LON TCP-MySQL-Server Daemon for h
=head1 SYNOPSIS
+Usage: B
+
Should only be run as user=www. This is a command-line script which
-is invoked by loncron.
+is invoked by B. There is no expectation that a typical user
+will manually start B from the command-line. (In other words,
+DO NOT START B YOURSELF.)
=head1 DESCRIPTION
Provides persistent TCP connections to the other servers in the network
through multiplexed domain sockets
- PID in subdir logs/lonc.pid
- kill kills
- HUP restarts
- USR1 tries to open connections again
+B forks off children processes that correspond to the other servers
+in the network. Management of these processes can be done at the
+parent process level or the child process level.
+
+ After forking off the children, B the B
+executes a main loop which simply waits for processes to exit.
+As a process exits, a new process managing a link to the same
+peer as the exiting process is created.
+
+B is the location of log messages.
+
+The process management is now explained in terms of linux shell commands,
+subroutines internal to this code, and signal assignments:
+
+=over 4
+
+=item *
+
+PID is stored in B
+
+This is the process id number of the parent B process.
+
+=item *
+
+SIGTERM and SIGINT
+
+Parent signal assignment:
+ $SIG{INT} = $SIG{TERM} = \&HUNTSMAN;
+
+Child signal assignment:
+ $SIG{INT} = 'DEFAULT'; (and SIGTERM is DEFAULT also)
+(The child dies and a SIGALRM is sent to parent, awaking parent from slumber
+ to restart a new child.)
+
+Command-line invocations:
+ B B<-s> SIGTERM I
+ B B<-s> SIGINT I
+
+Subroutine B:
+ This is only invoked for the B parent I.
+This kills all the children, and then the parent.
+The B file is cleared.
+
+=item *
+
+SIGHUP
+
+Current bug:
+ This signal can only be processed the first time
+on the parent process. Subsequent SIGHUP signals
+have no effect.
+
+Parent signal assignment:
+ $SIG{HUP} = \&HUPSMAN;
+
+Child signal assignment:
+ none (nothing happens)
+
+Command-line invocations:
+ B B<-s> SIGHUP I
+
+Subroutine B:
+ This is only invoked for the B parent I,
+This kills all the children, and then the parent.
+The B file is cleared.
+
+=item *
+
+SIGUSR1
+
+Parent signal assignment:
+ $SIG{USR1} = \&USRMAN;
+
+Child signal assignment:
+ $SIG{USR1}= \&logstatus;
+
+Command-line invocations:
+ B B<-s> SIGUSR1 I
+
+Subroutine B:
+ When invoked for the B parent I,
+SIGUSR1 is sent to all the children, and the status of
+each connection is logged.
-=head1 README
-Not yet written.
+=back
=head1 PREREQUISITES