--- loncom/Attic/lonc 2000/02/08 17:34:24 1.5
+++ loncom/Attic/lonc 2002/02/06 14:13:19 1.24
@@ -5,6 +5,30 @@
# provides persistent TCP connections to the other servers in the network
# through multiplexed domain sockets
#
+# $Id: lonc,v 1.24 2002/02/06 14:13:19 albertel Exp $
+#
+# Copyright Michigan State University Board of Trustees
+#
+# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
+#
+# LON-CAPA is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# LON-CAPA is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with LON-CAPA; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# /home/httpd/html/adm/gpl.txt
+#
+# http://www.lon-capa.org/
+#
# PID in subdir logs/lonc.pid
# kill kills
# HUP restarts
@@ -12,7 +36,14 @@
# 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19,
# 10/8,10/9,10/15,11/18,12/22,
-# 2/8 Gerd Kortemeyer
+# 2/8,7/25 Gerd Kortemeyer
+# 12/05 Scott Harrison
+# 12/05 Gerd Kortemeyer
+# YEAR=2001
+# 01/10/01 Scott Harrison
+# 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer
+# 12/20 Scott Harrison
+#
# based on nonforker from Perl Cookbook
# - server who multiplexes without forking
@@ -25,7 +56,26 @@ use Fcntl;
use Tie::RefHash;
use Crypt::IDEA;
-$childmaxattempts=10;
+my $status='';
+my $lastlog='';
+
+# grabs exception and records it to log before exiting
+sub catchexception {
+ my ($signal)=@_;
+ $SIG{'QUIT'}='DEFAULT';
+ $SIG{__DIE__}='DEFAULT';
+ &logthis("CRITICAL: "
+ ."ABNORMAL EXIT. Child $$ for server $wasserver died through "
+ ."\"$signal\" with this parameter->[$@]");
+ die($@);
+}
+
+$childmaxattempts=5;
+
+# -------------------------------- Set signal handlers to record abnormal exits
+
+$SIG{'QUIT'}=\&catchexception;
+$SIG{__DIE__}=\&catchexception;
# ------------------------------------ Read httpd access.conf and get variables
@@ -40,6 +90,27 @@ while ($configline=) {
}
close(CONFIG);
+# ----------------------------- Make sure this process is running from user=www
+my $wwwid=getpwnam('www');
+if ($wwwid!=$<) {
+ $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+ $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+ system("echo 'User ID mismatch. lonc must be run as user www.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+ exit 1;
+}
+
+# --------------------------------------------- Check if other instance running
+
+my $pidfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
+
+if (-e $pidfile) {
+ my $lfh=IO::File->new("$pidfile");
+ my $pide=<$lfh>;
+ chomp($pide);
+ if (kill 0 => $pide) { die "already running"; }
+}
+
# ------------------------------------------------------------- Read hosts file
open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file";
@@ -64,8 +135,8 @@ sub REAPER { # ta
$SIG{CHLD} = \&REAPER;
my $pid = wait;
my $wasserver=$children{$pid};
- &logthis(
- "CRITICAL: Child $pid for server $wasserver died");
+ &logthis("CRITICAL: "
+ ."Child $pid for server $wasserver died ($childatt{$wasserver})");
delete $children{$pid};
delete $childpid{$wasserver};
my $port = "$perlvar{'lonSockDir'}/$wasserver";
@@ -74,7 +145,13 @@ sub REAPER { # ta
sub HUNTSMAN { # signal handler for SIGINT
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
- kill 'INT' => keys %children;
+ foreach (keys %children) {
+ $wasserver=$children{$_};
+ &status("Closing $wasserver");
+ &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
+ &status("Kill PID $_ for $wasserver");
+ kill ('INT',$_);
+ }
my $execdir=$perlvar{'lonDaemons'};
unlink("$execdir/logs/lonc.pid");
&logthis("CRITICAL: Shutting down");
@@ -83,20 +160,42 @@ sub HUNTSMAN { # si
sub HUPSMAN { # signal handler for SIGHUP
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
- kill 'INT' => keys %children;
+ foreach (keys %children) {
+ $wasserver=$children{$_};
+ &status("Closing $wasserver");
+ &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
+ &status("Kill PID $_ for $wasserver");
+ kill ('INT',$_);
+ }
&logthis("CRITICAL: Restarting");
+ unlink("$execdir/logs/lonc.pid");
my $execdir=$perlvar{'lonDaemons'};
exec("$execdir/lonc"); # here we go again
}
+sub checkchildren {
+ &initnewstatus();
+ &logstatus();
+ &logthis('Going to check on the children');
+ foreach (sort keys %children) {
+ sleep 1;
+ unless (kill 'USR1' => $_) {
+ &logthis ('Child '.$_.' is dead');
+ &logstatus($$.' is dead');
+ }
+ }
+}
+
sub USRMAN {
- %childatt=();
&logthis("USR1: Trying to establish connections again");
foreach $thisserver (keys %hostip) {
$answer=subreply("ping",$thisserver);
- &logthis(
- "USR1: Ping $thisserver (pid >$childpid{$thisserver}<): >$answer<");
+ &logthis("USR1: Ping $thisserver "
+ ."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): "
+ ." >$answer<");
}
+ %childatt=();
+ &checkchildren();
}
# -------------------------------------------------- Non-critical communication
@@ -109,10 +208,20 @@ sub subreply {
Type => SOCK_STREAM,
Timeout => 10)
or return "con_lost";
- print $sclient "$cmd\n";
- my $answer=<$sclient>;
- chomp($answer);
- if (!$answer) { $answer="con_lost"; }
+
+
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(10);
+ print $sclient "$cmd\n";
+ $answer=<$sclient>;
+ chomp($answer);
+ alarm(0);
+ };
+ if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
} else { $answer='self_reply'; }
return $answer;
}
@@ -125,6 +234,7 @@ sub logthis {
my $fh=IO::File->new(">>$execdir/logs/lonc.log");
my $now=time;
my $local=localtime($now);
+ $lastlog=$local.': '.$message;
print $fh "$local ($$): $message\n";
}
@@ -137,6 +247,31 @@ sub logperm {
my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
print $fh "$now:$message:$local\n";
}
+# ------------------------------------------------------------------ Log status
+
+sub logstatus {
+ my $docdir=$perlvar{'lonDocRoot'};
+ my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
+ print $fh $$."\t".$status."\t".$lastlog."\n";
+}
+
+sub initnewstatus {
+ my $docdir=$perlvar{'lonDocRoot'};
+ my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
+ my $now=time;
+ my $local=localtime($now);
+ print $fh "LONC status $local - parent $$\n\n";
+}
+
+# -------------------------------------------------------------- Status setting
+
+sub status {
+ my $what=shift;
+ my $now=time;
+ my $local=localtime($now);
+ $status=$local.': '.$what;
+}
+
# ---------------------------------------------------- Fork once and dissociate
@@ -160,6 +295,8 @@ $SIG{HUP}=$SIG{USR1}='IGNORE';
# Fork off our children, one for every server
+&status("Forking ...");
+
foreach $thisserver (keys %hostip) {
make_new_child($thisserver);
}
@@ -174,17 +311,18 @@ $SIG{USR1} = \&USRMAN;
# And maintain the population.
while (1) {
+ &status("Sleeping");
sleep; # wait for a signal (i.e., child's death)
# See who died and start new one
+ &status("Woke up");
foreach $thisserver (keys %hostip) {
if (!$childpid{$thisserver}) {
- if ($childatt{$thisserver}<=$childmaxattempt) {
- my $ainfoatt=1*$childatt{$thisserver};
+ if ($childatt{$thisserver}<$childmaxattempts) {
+ $childatt{$thisserver}++;
&logthis(
"INFO: Trying to reconnect for $thisserver "
- ."($ainfoatt of $childmaxattempts attempts)");
+ ."($childatt{$thisserver} of $childmaxattempts attempts)");
make_new_child($thisserver);
- $childatt{$thisserver}++;
}
}
}
@@ -214,7 +352,8 @@ sub make_new_child {
} else {
# Child can *not* return from this subroutine.
$SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before
-
+ $SIG{USR1}= \&logstatus;
+
# unblock signals
sigprocmask(SIG_UNBLOCK, $sigset)
or die "Can't unblock SIGINT for fork: $!\n";
@@ -224,7 +363,11 @@ sub make_new_child {
$port = "$perlvar{'lonSockDir'}/$conserver";
unlink($port);
+
# ---------------------------------------------------- Client to network server
+
+&status("Opening TCP: $conserver");
+
unless (
$remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
PeerPort => $perlvar{'londPort'},
@@ -237,20 +380,47 @@ unless (
sleep($st);
exit;
};
-# --------------------------------------- Send a ping to make other end do USR1
+# ----------------------------------------------------------------- Init dialog
+
+&status("Init dialogue: $conserver");
+
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(60);
print $remotesock "init\n";
$answer=<$remotesock>;
print $remotesock "$answer";
$answer=<$remotesock>;
chomp($answer);
+ alarm(0);
+ };
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+ if ($@=~/timeout/) {
+ &logthis("Timed out during init: $conserver");
+ exit;
+ }
+
+
&logthis("Init reply for $conserver: >$answer<");
+if ($answer ne 'ok') {
+ my $st=120+int(rand(240));
+ &logthis(
+"WARNING: Init failed $conserver ($st secs)");
+ sleep($st);
+ exit;
+}
sleep 5;
+&status("Ponging $conserver");
print $remotesock "pong\n";
$answer=<$remotesock>;
chomp($answer);
&logthis("Pong reply for $conserver: >$answer<");
# ----------------------------------------------------------- Initialize cipher
+&status("Initialize cipher: $conserver");
print $remotesock "ekey\n";
my $buildkey=<$remotesock>;
my $key=$conserver.$perlvar{'lonHostID'};
@@ -261,7 +431,7 @@ $key=$key.$buildkey.$key.$buildkey.$key.
$key=substr($key,0,32);
my $cipherkey=pack("H32",$key);
if ($cipher=new IDEA $cipherkey) {
- &logthis("Secure connection inititalized: $conserver");
+ &logthis("Secure connection initialized: $conserver");
} else {
my $st=120+int(rand(240));
&logthis(
@@ -272,14 +442,15 @@ if ($cipher=new IDEA $cipherkey) {
}
# ----------------------------------------- We're online, send delayed messages
-
+ &status("Checking for delayed messages");
my @allbuffered;
my $path="$perlvar{'lonSockDir'}/delayed";
opendir(DIRHANDLE,$path);
@allbuffered=grep /\.$conserver$/, readdir DIRHANDLE;
closedir(DIRHANDLE);
my $dfname;
- map {
+ foreach (@allbuffered) {
+ &status("Sending delayed $conserver $_");
$dfname="$path/$_";
&logthis($dfname);
my $wcmd;
@@ -302,18 +473,27 @@ if ($cipher=new IDEA $cipherkey) {
}
$cmd="enc:$cmdlength:$encrequest\n";
}
-
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(60);
print $remotesock "$cmd\n";
$answer=<$remotesock>;
chomp($answer);
- if ($answer ne '') {
+ alarm(0);
+ };
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+ if (($answer ne '') && ($@!~/timeout/)) {
unlink("$dfname");
&logthis("Delayed $cmd to $conserver: >$answer<");
&logperm("S:$conserver:$bcmd");
}
- } @allbuffered;
+ }
# ------------------------------------------------------- Listen to UNIX socket
+&status("Opening socket $conserver");
unless (
$server = IO::Socket::UNIX->new(Local => $port,
Type => SOCK_STREAM,
@@ -351,11 +531,11 @@ while (1) {
# check for new information on the connections we have
# anything to read or accept?
- foreach $client ($select->can_read(1)) {
+ foreach $client ($select->can_read(0.1)) {
if ($client == $server) {
# accept a new connection
-
+ &status("Accept new connection: $conserver");
$client = $server->accept();
$select->add($client);
nonblock($client);
@@ -370,6 +550,7 @@ while (1) {
delete $outbuffer{$client};
delete $ready{$client};
+ &status("Idle $conserver");
$select->remove($client);
close $client;
next;
@@ -389,27 +570,31 @@ while (1) {
# Any complete requests to process?
foreach $client (keys %ready) {
- handle($client);
+ handle($client,$conserver);
}
# Buffers to flush?
foreach $client ($select->can_write(1)) {
# Skip this client if we have nothing to say
next unless exists $outbuffer{$client};
-
$rv = $client->send($outbuffer{$client}, 0);
unless (defined $rv) {
# Whine, but move on.
- warn "I was told I could write, but I can't.\n";
+ &logthis("I was told I could write, but I can't.\n");
next;
}
+ $errno=$!;
if (($rv == length $outbuffer{$client}) ||
- ($! == POSIX::EWOULDBLOCK)) {
+ ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) {
substr($outbuffer{$client}, 0, $rv) = '';
delete $outbuffer{$client} unless length $outbuffer{$client};
} else {
# Couldn't write all the data, and it wasn't because
# it would have blocked. Shutdown and move on.
+
+ &logthis("Dropping data with ".$errno.": ".
+ length($outbuffer{$client}).", $rv");
+
delete $inbuffer{$client};
delete $outbuffer{$client};
delete $ready{$client};
@@ -421,7 +606,7 @@ while (1) {
}
}
}
-
+}
# ------------------------------------------------------- End of make_new_child
# handle($socket) deals with all pending requests for $client
@@ -429,6 +614,7 @@ sub handle {
# requests are in $ready{$client}
# send output to $outbuffer{$client}
my $client = shift;
+ my $conserver = shift;
my $request;
foreach $request (@{$ready{$client}}) {
@@ -449,8 +635,30 @@ sub handle {
}
$request="enc:$cmdlength:$encrequest\n";
}
+# --------------------------------------------------------------- Main exchange
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(300);
+ &status("Sending $conserver: $request");
+ &logthis("Sending $conserver: $request");
print $remotesock "$request";
+ &status("Waiting for reply from $conserver: $request");
+ &logthis("Waiting for reply from $conserver: $request");
$answer=<$remotesock>;
+ &status("Received reply: $request");
+ &logthis("Received reply $conserver: $answer");
+ alarm(0);
+ };
+ if ($@=~/timeout/) {
+ $answer='';
+ &logthis(
+ "CRITICAL: Timeout $conserver: $request");
+ }
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+
if ($answer) {
if ($answer =~ /^enc/) {
my ($cmd,$cmdlength,$encinput)=split(/:/,$answer);
@@ -470,12 +678,13 @@ sub handle {
}
# ===================================================== Done processing request
+ &logthis("Completed $conserver: $request");
}
delete $ready{$client};
+ &status("Completed $conserver: $request");
# -------------------------------------------------------------- End non-forker
}
# ---------------------------------------------------------- End make_new_child
-}
# nonblock($socket) puts socket into nonblocking mode
sub nonblock {
@@ -489,7 +698,50 @@ sub nonblock {
or die "Can't make socket nonblocking: $!\n";
}
+# ----------------------------------- POD (plain old documentation, CPAN style)
+
+=head1 NAME
+
+lonc - LON TCP-MySQL-Server Daemon for handling database requests.
+
+=head1 SYNOPSIS
+
+Should only be run as user=www. This is a command-line script which
+is invoked by loncron.
+
+=head1 DESCRIPTION
+
+Provides persistent TCP connections to the other servers in the network
+through multiplexed domain sockets
+
+ PID in subdir logs/lonc.pid
+ kill kills
+ HUP restarts
+ USR1 tries to open connections again
+
+=head1 README
+
+Not yet written.
+
+=head1 PREREQUISITES
+
+POSIX
+IO::Socket
+IO::Select
+IO::File
+Socket
+Fcntl
+Tie::RefHash
+Crypt::IDEA
+
+=head1 COREQUISITES
+
+=head1 OSNAMES
+linux
+=head1 SCRIPT CATEGORIES
+Server/Process
+=cut