--- loncom/Attic/lonc 2000/12/05 16:51:41 1.9
+++ loncom/Attic/lonc 2002/03/03 18:13:07 1.31
@@ -5,6 +5,30 @@
# provides persistent TCP connections to the other servers in the network
# through multiplexed domain sockets
#
+# $Id: lonc,v 1.31 2002/03/03 18:13:07 harris41 Exp $
+#
+# Copyright Michigan State University Board of Trustees
+#
+# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
+#
+# LON-CAPA is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# LON-CAPA is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with LON-CAPA; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# /home/httpd/html/adm/gpl.txt
+#
+# http://www.lon-capa.org/
+#
# PID in subdir logs/lonc.pid
# kill kills
# HUP restarts
@@ -12,7 +36,16 @@
# 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19,
# 10/8,10/9,10/15,11/18,12/22,
-# 2/8,7/25 Gerd Kortemeyer
+# 2/8,7/25 Gerd Kortemeyer
+# 12/05 Scott Harrison
+# 12/05 Gerd Kortemeyer
+# YEAR=2001
+# 01/10/01 Scott Harrison
+# 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer
+# 12/20 Scott Harrison
+# YEAR=2002
+# 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer
+#
# based on nonforker from Perl Cookbook
# - server who multiplexes without forking
@@ -24,39 +57,22 @@ use Socket;
use Fcntl;
use Tie::RefHash;
use Crypt::IDEA;
+use Net::Ping;
+use LWP::UserAgent();
-# grabs exception and records it to log before exiting
-sub catchexception {
- my ($signal)=@_;
- &logthis("CRITICAL: "
- ."ABNORMAL EXIT. Child $$ for server $wasserver died through "
- ."$signal with this parameter->[$@]");
- die($@);
-}
-
-# grabs exception and records it to log before exiting
-# NOTE: we must NOT use the regular (non-overrided) die function in
-# the code because a handler CANNOT be attached to it
-# (despite what some of the documentation says about SIG{__DIE__}.
-sub catchdie {
- my ($message)=@_;
- &logthis("CRITICAL: "
- ."ABNORMAL EXIT. Child $$ for server $wasserver died through "
- ."\_\_DIE\_\_ with this parameter->[$message]");
- die($message);
-}
-
-$childmaxattempts=10;
+$status='';
+$lastlog='';
+$conserver='SHELL';
# -------------------------------- Set signal handlers to record abnormal exits
-$SIG{'QUIT'}=\&catchexception;
+&status("Init exception handlers");
+$SIG{QUIT}=\&catchexception;
$SIG{__DIE__}=\&catchexception;
# ------------------------------------ Read httpd access.conf and get variables
-
-open (CONFIG,"/etc/httpd/conf/access.conf")
- || catchdie "Can't read access.conf";
+&status("Read access.conf");
+open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";
while ($configline=) {
if ($configline =~ /PerlSetVar/) {
@@ -67,6 +83,17 @@ while ($configline=) {
}
close(CONFIG);
+# ----------------------------- Make sure this process is running from user=www
+&status("Check user ID");
+my $wwwid=getpwnam('www');
+if ($wwwid!=$<) {
+ $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+ $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+ system("echo 'User ID mismatch. lonc must be run as user www.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+ exit 1;
+}
+
# --------------------------------------------- Check if other instance running
my $pidfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
@@ -75,19 +102,22 @@ if (-e $pidfile) {
my $lfh=IO::File->new("$pidfile");
my $pide=<$lfh>;
chomp($pide);
- if (kill 0 => $pide) { catchdie "already running"; }
+ if (kill 0 => $pide) { die "already running"; }
}
# ------------------------------------------------------------- Read hosts file
-open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab")
- || catchdie "Can't read host file";
+open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file";
while ($configline=) {
my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
chomp($ip);
- $hostip{$id}=$ip;
+ if ($ip) {
+ $hostip{$id}=$ip;
+ $hostname{$id}=$name;
+ }
}
+
close(CONFIG);
# -------------------------------------------------------- Routines for forking
@@ -99,95 +129,20 @@ close(CONFIG);
%childatt = (); # number of attempts to start server
# for ID
-sub REAPER { # takes care of dead children
- $SIG{CHLD} = \&REAPER;
- my $pid = wait;
- my $wasserver=$children{$pid};
- &logthis("CRITICAL: "
- ."Child $pid for server $wasserver died ($childatt{$wasserver})");
- delete $children{$pid};
- delete $childpid{$wasserver};
- my $port = "$perlvar{'lonSockDir'}/$wasserver";
- unlink($port);
-}
-
-sub HUNTSMAN { # signal handler for SIGINT
- local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
- kill 'INT' => keys %children;
- my $execdir=$perlvar{'lonDaemons'};
- unlink("$execdir/logs/lonc.pid");
- &logthis("CRITICAL: Shutting down");
- exit; # clean up with dignity
-}
-
-sub HUPSMAN { # signal handler for SIGHUP
- local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
- kill 'INT' => keys %children;
- &logthis("CRITICAL: Restarting");
- my $execdir=$perlvar{'lonDaemons'};
- exec("$execdir/lonc"); # here we go again
-}
-
-sub USRMAN {
- &logthis("USR1: Trying to establish connections again");
- foreach $thisserver (keys %hostip) {
- $answer=subreply("ping",$thisserver);
- &logthis("USR1: Ping $thisserver "
- ."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): "
- ." >$answer<");
- }
- %childatt=();
-}
-
-# -------------------------------------------------- Non-critical communication
-sub subreply {
- my ($cmd,$server)=@_;
- my $answer='';
- if ($server ne $perlvar{'lonHostID'}) {
- my $peerfile="$perlvar{'lonSockDir'}/$server";
- my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile",
- Type => SOCK_STREAM,
- Timeout => 10)
- or return "con_lost";
- print $sclient "$cmd\n";
- my $answer=<$sclient>;
- chomp($answer);
- if (!$answer) { $answer="con_lost"; }
- } else { $answer='self_reply'; }
- return $answer;
-}
-
-# --------------------------------------------------------------------- Logging
-
-sub logthis {
- my $message=shift;
- my $execdir=$perlvar{'lonDaemons'};
- my $fh=IO::File->new(">>$execdir/logs/lonc.log");
- my $now=time;
- my $local=localtime($now);
- print $fh "$local ($$): $message\n";
-}
-
-
-sub logperm {
- my $message=shift;
- my $execdir=$perlvar{'lonDaemons'};
- my $now=time;
- my $local=localtime($now);
- my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
- print $fh "$now:$message:$local\n";
-}
+$childmaxattempts=5;
# ---------------------------------------------------- Fork once and dissociate
-
+&status("Fork and dissociate");
$fpid=fork;
exit if $fpid;
-catchdie "Couldn't fork: $!" unless defined ($fpid);
+die "Couldn't fork: $!" unless defined ($fpid);
-POSIX::setsid() or catchdie "Can't start new session: $!";
+POSIX::setsid() or die "Can't start new session: $!";
-# ------------------------------------------------------- Write our PID on disk
+$conserver='PARENT';
+# ------------------------------------------------------- Write our PID on disk
+&status("Write PID");
$execdir=$perlvar{'lonDaemons'};
open (PIDSAVE,">$execdir/logs/lonc.pid");
print PIDSAVE "$$\n";
@@ -200,8 +155,12 @@ $SIG{HUP}=$SIG{USR1}='IGNORE';
# Fork off our children, one for every server
+&status("Forking ...");
+
foreach $thisserver (keys %hostip) {
- make_new_child($thisserver);
+ if (&online($hostname{$thisserver})) {
+ make_new_child($thisserver);
+ }
}
&logthis("Done starting initial servers");
@@ -214,113 +173,84 @@ $SIG{USR1} = \&USRMAN;
# And maintain the population.
while (1) {
+ &status("Sleeping");
sleep; # wait for a signal (i.e., child's death)
# See who died and start new one
+ &status("Woke up");
+ my $skipping='';
foreach $thisserver (keys %hostip) {
if (!$childpid{$thisserver}) {
- if ($childatt{$thisserver}<=$childmaxattempts) {
+ if (($childatt{$thisserver}<$childmaxattempts) &&
+ (&online($hostname{$thisserver}))) {
$childatt{$thisserver}++;
&logthis(
"INFO: Trying to reconnect for $thisserver "
."($childatt{$thisserver} of $childmaxattempts attempts)");
make_new_child($thisserver);
- }
+ } else {
+ $skipping.=$thisserver.' ';
+ }
+
}
}
+ if ($skipping) {
+ &logthis("WARNING: Skipped $skipping");
+ }
}
sub make_new_child {
- my $conserver=shift;
+ $newserver=shift;
my $pid;
my $sigset;
- &logthis("Attempting to start child for server $conserver");
+ &logthis("Attempting to start child for server $newserver");
# block signal for fork
$sigset = POSIX::SigSet->new(SIGINT);
sigprocmask(SIG_BLOCK, $sigset)
- or catchdie "Can't block SIGINT for fork: $!\n";
+ or die "Can't block SIGINT for fork: $!\n";
- catchdie "fork: $!" unless defined ($pid = fork);
+ die "fork: $!" unless defined ($pid = fork);
if ($pid) {
# Parent records the child's birth and returns.
sigprocmask(SIG_UNBLOCK, $sigset)
- or catchdie "Can't unblock SIGINT for fork: $!\n";
- $children{$pid} = $conserver;
+ or die "Can't unblock SIGINT for fork: $!\n";
+ $children{$pid} = $newserver;
$childpid{$conserver} = $pid;
return;
} else {
+ $conserver=$newserver;
# Child can *not* return from this subroutine.
$SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before
-
+ $SIG{USR1}= \&logstatus;
+
# unblock signals
sigprocmask(SIG_UNBLOCK, $sigset)
- or catchdie "Can't unblock SIGINT for fork: $!\n";
+ or die "Can't unblock SIGINT for fork: $!\n";
# ----------------------------- This is the modified main program of non-forker
$port = "$perlvar{'lonSockDir'}/$conserver";
unlink($port);
-# ---------------------------------------------------- Client to network server
-unless (
- $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
- PeerPort => $perlvar{'londPort'},
- Proto => "tcp",
- Type => SOCK_STREAM)
- ) {
- my $st=120+int(rand(240));
- &logthis(
-"WARNING: Couldn't connect $conserver ($st secs): $@");
- sleep($st);
- exit;
- };
-# --------------------------------------- Send a ping to make other end do USR1
-print $remotesock "init\n";
-$answer=<$remotesock>;
-print $remotesock "$answer";
-$answer=<$remotesock>;
-chomp($answer);
-&logthis("Init reply for $conserver: >$answer<");
-sleep 5;
-print $remotesock "pong\n";
-$answer=<$remotesock>;
-chomp($answer);
-&logthis("Pong reply for $conserver: >$answer<");
-# ----------------------------------------------------------- Initialize cipher
-print $remotesock "ekey\n";
-my $buildkey=<$remotesock>;
-my $key=$conserver.$perlvar{'lonHostID'};
-$key=~tr/a-z/A-Z/;
-$key=~tr/G-P/0-9/;
-$key=~tr/Q-Z/0-9/;
-$key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
-$key=substr($key,0,32);
-my $cipherkey=pack("H32",$key);
-if ($cipher=new IDEA $cipherkey) {
- &logthis("Secure connection inititalized: $conserver");
-} else {
- my $st=120+int(rand(240));
- &logthis(
- "WARNING: ".
- "Could not establish secure connection, $conserver ($st secs)!");
- sleep($st);
- exit;
-}
+# -------------------------------------------------------------- Open other end
-# ----------------------------------------- We're online, send delayed messages
+&openremote($conserver);
+# ----------------------------------------- We're online, send delayed messages
+ &status("Checking for delayed messages");
my @allbuffered;
my $path="$perlvar{'lonSockDir'}/delayed";
opendir(DIRHANDLE,$path);
@allbuffered=grep /\.$conserver$/, readdir DIRHANDLE;
closedir(DIRHANDLE);
my $dfname;
- map {
+ foreach (@allbuffered) {
+ &status("Sending delayed: $_");
$dfname="$path/$_";
- &logthis($dfname);
+ &logthis('Sending '.$dfname);
my $wcmd;
{
my $dfh=IO::File->new($dfname);
@@ -341,18 +271,27 @@ if ($cipher=new IDEA $cipherkey) {
}
$cmd="enc:$cmdlength:$encrequest\n";
}
-
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(60);
print $remotesock "$cmd\n";
$answer=<$remotesock>;
chomp($answer);
- if ($answer ne '') {
+ alarm(0);
+ };
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+ if (($answer ne '') && ($@!~/timeout/)) {
unlink("$dfname");
- &logthis("Delayed $cmd to $conserver: >$answer<");
+ &logthis("Delayed $cmd: >$answer<");
&logperm("S:$conserver:$bcmd");
}
- } @allbuffered;
+ }
# ------------------------------------------------------- Listen to UNIX socket
+&status("Opening socket");
unless (
$server = IO::Socket::UNIX->new(Local => $port,
Type => SOCK_STREAM,
@@ -361,7 +300,7 @@ unless (
my $st=120+int(rand(240));
&logthis(
"WARNING: ".
- "Can't make server socket $conserver ($st secs): $@");
+ "Can't make server socket ($st secs): $@");
sleep($st);
exit;
};
@@ -390,11 +329,11 @@ while (1) {
# check for new information on the connections we have
# anything to read or accept?
- foreach $client ($select->can_read(1)) {
+ foreach $client ($select->can_read(0.1)) {
if ($client == $server) {
# accept a new connection
-
+ &status("Accept new connection: $conserver");
$client = $server->accept();
$select->add($client);
nonblock($client);
@@ -409,6 +348,7 @@ while (1) {
delete $outbuffer{$client};
delete $ready{$client};
+ &status("Idle");
$select->remove($client);
close $client;
next;
@@ -437,18 +377,25 @@ while (1) {
next unless exists $outbuffer{$client};
$rv = $client->send($outbuffer{$client}, 0);
+
+ unless ($outbuffer{$client} eq "con_lost\n") {
unless (defined $rv) {
# Whine, but move on.
- warn "I was told I could write, but I can't.\n";
+ &logthis("I was told I could write, but I can't.\n");
next;
}
+ $errno=$!;
if (($rv == length $outbuffer{$client}) ||
- ($! == POSIX::EWOULDBLOCK)) {
+ ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) {
substr($outbuffer{$client}, 0, $rv) = '';
delete $outbuffer{$client} unless length $outbuffer{$client};
} else {
# Couldn't write all the data, and it wasn't because
# it would have blocked. Shutdown and move on.
+
+ &logthis("Dropping data with ".$errno.": ".
+ length($outbuffer{$client}).", $rv");
+
delete $inbuffer{$client};
delete $outbuffer{$client};
delete $ready{$client};
@@ -457,7 +404,17 @@ while (1) {
close($client);
next;
}
+ } else {
+# -------------------------------------------------------- Wow, connection lost
+ &logthis(
+ "CRITICAL: Closing connection");
+ &status("Connection lost");
+ $remotesock->shutdown(2);
+ &logthis("Attempting to open new connection");
+ &openremote($conserver);
+ }
}
+
}
}
@@ -474,6 +431,15 @@ sub handle {
# ============================================================= Process request
# $request is the text of the request
# put text of reply into $outbuffer{$client}
+# ------------------------------------------------------------ Is this the end?
+ if ($request eq "close_connection_exit\n") {
+ &status("Request close connection");
+ &logthis(
+ "CRITICAL: Request Close Connection");
+ $remotesock->shutdown(2);
+ $server->close();
+ exit;
+ }
# -----------------------------------------------------------------------------
if ($request =~ /^encrypt\:/) {
my $cmd=$request;
@@ -488,8 +454,27 @@ sub handle {
}
$request="enc:$cmdlength:$encrequest\n";
}
+# --------------------------------------------------------------- Main exchange
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(300);
+ &status("Sending: $request");
print $remotesock "$request";
+ &status("Waiting for reply from $conserver: $request");
$answer=<$remotesock>;
+ &status("Received reply: $request");
+ alarm(0);
+ };
+ if ($@=~/timeout/) {
+ $answer='';
+ &logthis(
+ "CRITICAL: Timeout: $request");
+ }
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+
if ($answer) {
if ($answer =~ /^enc/) {
my ($cmd,$cmdlength,$encinput)=split(/:/,$answer);
@@ -508,6 +493,8 @@ sub handle {
$outbuffer{$client} .= "con_lost\n";
}
+ &status("Completed: $request");
+
# ===================================================== Done processing request
}
delete $ready{$client};
@@ -523,8 +510,423 @@ sub nonblock {
$flags = fcntl($socket, F_GETFL, 0)
- or catchdie "Can't get flags for socket: $!\n";
+ or die "Can't get flags for socket: $!\n";
fcntl($socket, F_SETFL, $flags | O_NONBLOCK)
- or catchdie "Can't make socket nonblocking: $!\n";
+ or die "Can't make socket nonblocking: $!\n";
}
+
+sub openremote {
+# ---------------------------------------------------- Client to network server
+
+ my $conserver=shift;
+
+&status("Opening TCP");
+
+unless (
+ $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
+ PeerPort => $perlvar{'londPort'},
+ Proto => "tcp",
+ Type => SOCK_STREAM)
+ ) {
+ my $st=120+int(rand(240));
+ &logthis(
+"WARNING: Couldn't connect ($st secs): $@");
+ sleep($st);
+ exit;
+ };
+# ----------------------------------------------------------------- Init dialog
+
+&status("Init dialogue: $conserver");
+
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(60);
+print $remotesock "init\n";
+$answer=<$remotesock>;
+print $remotesock "$answer";
+$answer=<$remotesock>;
+chomp($answer);
+ alarm(0);
+ };
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+ if ($@=~/timeout/) {
+ &logthis("Timed out during init");
+ exit;
+ }
+
+if ($answer ne 'ok') {
+ &logthis("Init reply: >$answer<");
+ my $st=120+int(rand(240));
+ &logthis(
+"WARNING: Init failed ($st secs)");
+ sleep($st);
+ exit;
+}
+
+sleep 5;
+&status("Ponging");
+print $remotesock "pong\n";
+$answer=<$remotesock>;
+chomp($answer);
+if ($answer!~/^$conserver/) {
+ &logthis("Pong reply: >$answer<");
+}
+# ----------------------------------------------------------- Initialize cipher
+
+&status("Initialize cipher");
+print $remotesock "ekey\n";
+my $buildkey=<$remotesock>;
+my $key=$conserver.$perlvar{'lonHostID'};
+$key=~tr/a-z/A-Z/;
+$key=~tr/G-P/0-9/;
+$key=~tr/Q-Z/0-9/;
+$key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
+$key=substr($key,0,32);
+my $cipherkey=pack("H32",$key);
+if ($cipher=new IDEA $cipherkey) {
+ &logthis("Secure connection initialized");
+} else {
+ my $st=120+int(rand(240));
+ &logthis(
+ "WARNING: ".
+ "Could not establish secure connection ($st secs)!");
+ sleep($st);
+ exit;
+}
+
+}
+
+
+
+# grabs exception and records it to log before exiting
+sub catchexception {
+ my ($signal)=@_;
+ $SIG{QUIT}='DEFAULT';
+ $SIG{__DIE__}='DEFAULT';
+ chomp($signal);
+ &logthis("CRITICAL: "
+ ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through "
+ ."\"$signal\" with parameter [$@]");
+ die($@);
+}
+
+# -------------------------------------- Routines to see if other box available
+
+sub online {
+ my $host=shift;
+ &status("Pinging ".$host);
+ my $p=Net::Ping->new("tcp",20);
+ my $online=$p->ping("$host");
+ $p->close();
+ undef ($p);
+ return $online;
+}
+
+sub connected {
+ my ($local,$remote)=@_;
+ &status("Checking connection $local to $remote");
+ $local=~s/\W//g;
+ $remote=~s/\W//g;
+
+ unless ($hostname{$local}) { return 'local_unknown'; }
+ unless ($hostname{$remote}) { return 'remote_unknown'; }
+
+ unless (&online($hostname{$local})) { return 'local_offline'; }
+
+ my $ua=new LWP::UserAgent;
+
+ my $request=new HTTP::Request('GET',
+ "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote);
+
+ my $response=$ua->request($request);
+
+ unless ($response->is_success) { return 'local_error'; }
+
+ my $reply=$response->content;
+ $reply=(split("\n",$reply))[0];
+ $reply=~s/\W//g;
+ if ($reply ne $remote) { return $reply; }
+ return 'ok';
+}
+
+
+sub REAPER { # takes care of dead children
+ $SIG{CHLD} = \&REAPER;
+ my $pid = wait;
+ my $wasserver=$children{$pid};
+ &logthis("CRITICAL: "
+ ."Child $pid for server $wasserver died ($childatt{$wasserver})");
+ delete $children{$pid};
+ delete $childpid{$wasserver};
+ my $port = "$perlvar{'lonSockDir'}/$wasserver";
+ unlink($port);
+}
+
+sub hangup {
+ foreach (keys %children) {
+ $wasserver=$children{$_};
+ &status("Closing $wasserver");
+ &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
+ &status("Kill PID $_ for $wasserver");
+ kill ('INT',$_);
+ }
+}
+
+sub HUNTSMAN { # signal handler for SIGINT
+ local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
+ &hangup();
+ my $execdir=$perlvar{'lonDaemons'};
+ unlink("$execdir/logs/lonc.pid");
+ &logthis("CRITICAL: Shutting down");
+ exit; # clean up with dignity
+}
+
+sub HUPSMAN { # signal handler for SIGHUP
+ local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
+ &hangup();
+ &logthis("CRITICAL: Restarting");
+ unlink("$execdir/logs/lonc.pid");
+ my $execdir=$perlvar{'lonDaemons'};
+ exec("$execdir/lonc"); # here we go again
+}
+
+sub checkchildren {
+ &initnewstatus();
+ &logstatus();
+ &logthis('Going to check on the children');
+ foreach (sort keys %children) {
+ sleep 1;
+ unless (kill 'USR1' => $_) {
+ &logthis ('CRITICAL: Child '.$_.' is dead');
+ &logstatus($$.' is dead');
+ }
+ }
+}
+
+sub USRMAN {
+ &logthis("USR1: Trying to establish connections again");
+ %childatt=();
+ &checkchildren();
+}
+
+# -------------------------------------------------- Non-critical communication
+sub subreply {
+ my ($cmd,$server)=@_;
+ my $answer='';
+ if ($server ne $perlvar{'lonHostID'}) {
+ my $peerfile="$perlvar{'lonSockDir'}/$server";
+ my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile",
+ Type => SOCK_STREAM,
+ Timeout => 10)
+ or return "con_lost";
+
+
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(10);
+ print $sclient "$cmd\n";
+ $answer=<$sclient>;
+ chomp($answer);
+ alarm(0);
+ };
+ if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+ } else { $answer='self_reply'; }
+ return $answer;
+}
+
+# --------------------------------------------------------------------- Logging
+
+sub logthis {
+ my $message=shift;
+ my $execdir=$perlvar{'lonDaemons'};
+ my $fh=IO::File->new(">>$execdir/logs/lonc.log");
+ my $now=time;
+ my $local=localtime($now);
+ $lastlog=$local.': '.$message;
+ print $fh "$local ($$) [$conserver] [$status]: $message\n";
+}
+
+
+sub logperm {
+ my $message=shift;
+ my $execdir=$perlvar{'lonDaemons'};
+ my $now=time;
+ my $local=localtime($now);
+ my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
+ print $fh "$now:$message:$local\n";
+}
+# ------------------------------------------------------------------ Log status
+
+sub logstatus {
+ my $docdir=$perlvar{'lonDocRoot'};
+ my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
+ print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n";
+}
+
+sub initnewstatus {
+ my $docdir=$perlvar{'lonDocRoot'};
+ my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
+ my $now=time;
+ my $local=localtime($now);
+ print $fh "LONC status $local - parent $$\n\n";
+}
+
+# -------------------------------------------------------------- Status setting
+
+sub status {
+ my $what=shift;
+ my $now=time;
+ my $local=localtime($now);
+ $status=$local.': '.$what;
+}
+
+
+
+# ----------------------------------- POD (plain old documentation, CPAN style)
+
+=head1 NAME
+
+lonc - LON TCP-MySQL-Server Daemon for handling database requests.
+
+=head1 SYNOPSIS
+
+Usage: B
+
+Should only be run as user=www. This is a command-line script which
+is invoked by B. There is no expectation that a typical user
+will manually start B from the command-line. (In other words,
+DO NOT START B YOURSELF.)
+
+=head1 DESCRIPTION
+
+Provides persistent TCP connections to the other servers in the network
+through multiplexed domain sockets
+
+B forks off children processes that correspond to the other servers
+in the network. Management of these processes can be done at the
+parent process level or the child process level.
+
+B is the location of log messages.
+
+The process management is now explained in terms of linux shell commands,
+subroutines internal to this code, and signal assignments:
+
+=over 4
+
+=item *
+
+PID is stored in B
+
+This is the process id number of the parent B process.
+
+=item *
+
+SIGTERM and SIGINT
+
+Parent signal assignment:
+ $SIG{INT} = $SIG{TERM} = \&HUNTSMAN;
+
+Child signal assignment:
+ $SIG{INT} = 'DEFAULT'; (and SIGTERM is DEFAULT also)
+(The child dies and a SIGALRM is sent to parent, awaking parent from slumber
+ to restart a new child.)
+
+Command-line invocations:
+ B B<-s> SIGTERM I
+ B B<-s> SIGINT I
+
+Subroutine B:
+ This is only invoked for the B parent I.
+This kills all the children, and then the parent.
+The B file is cleared.
+
+=item *
+
+SIGHUP
+
+Current bug:
+ This signal can only be processed the first time
+on the parent process. Subsequent SIGHUP signals
+have no effect.
+
+Parent signal assignment:
+ $SIG{HUP} = \&HUPSMAN;
+
+Child signal assignment:
+ none (nothing happens)
+
+Command-line invocations:
+ B B<-s> SIGHUP I
+
+Subroutine B:
+ This is only invoked for the B parent I,
+This kills all the children, and then the parent.
+The B file is cleared.
+
+=item *
+
+SIGUSR1
+
+Parent signal assignment:
+ $SIG{USR1} = \&USRMAN;
+
+Child signal assignment:
+ $SIG{USR1}= \&logstatus;
+
+Command-line invocations:
+ B B<-s> SIGUSR1 I
+
+Subroutine B:
+ When invoked for the B parent I,
+SIGUSR1 is sent to all the children, and the status of
+each connection is logged.
+
+=item *
+
+SIGCHLD
+
+Parent signal assignment:
+ $SIG{CHLD} = \&REAPER;
+
+Child signal assignment:
+ none
+
+Command-line invocations:
+ B B<-s> SIGCHLD I
+
+Subroutine B:
+ This is only invoked for the B parent I.
+Information pertaining to the child is removed.
+The socket port is cleaned up.
+
+=back
+
+=head1 PREREQUISITES
+
+POSIX
+IO::Socket
+IO::Select
+IO::File
+Socket
+Fcntl
+Tie::RefHash
+Crypt::IDEA
+
+=head1 COREQUISITES
+
+=head1 OSNAMES
+
+linux
+
+=head1 SCRIPT CATEGORIES
+
+Server/Process
+
+=cut