version 1.14, 2001/03/13 21:15:40
|
version 1.52, 2003/07/25 01:16:29
|
Line 5
|
Line 5
|
# provides persistent TCP connections to the other servers in the network |
# provides persistent TCP connections to the other servers in the network |
# through multiplexed domain sockets |
# through multiplexed domain sockets |
# |
# |
|
# $Id$ |
|
# |
|
# Copyright Michigan State University Board of Trustees |
|
# |
|
# This file is part of the LearningOnline Network with CAPA (LON-CAPA). |
|
# |
|
# LON-CAPA is free software; you can redistribute it and/or modify |
|
# it under the terms of the GNU General Public License as published by |
|
# the Free Software Foundation; either version 2 of the License, or |
|
# (at your option) any later version. |
|
# |
|
# LON-CAPA is distributed in the hope that it will be useful, |
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
# GNU General Public License for more details. |
|
# |
|
# You should have received a copy of the GNU General Public License |
|
# along with LON-CAPA; if not, write to the Free Software |
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
# |
|
# /home/httpd/html/adm/gpl.txt |
|
# |
|
# http://www.lon-capa.org/ |
|
# |
# PID in subdir logs/lonc.pid |
# PID in subdir logs/lonc.pid |
# kill kills |
# kill kills |
# HUP restarts |
# HUP restarts |
Line 13
|
Line 37
|
# 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19, |
# 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19, |
# 10/8,10/9,10/15,11/18,12/22, |
# 10/8,10/9,10/15,11/18,12/22, |
# 2/8,7/25 Gerd Kortemeyer |
# 2/8,7/25 Gerd Kortemeyer |
# 12/05 Scott Harrison |
|
# 12/05 Gerd Kortemeyer |
# 12/05 Gerd Kortemeyer |
# 01/10/01 Scott Harrison |
# YEAR=2001 |
# 03/14/01 Gerd Kortemeyer |
# 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer |
# |
# YEAR=2002 |
|
# 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer |
|
# 3/07/02 Ron Fox |
# based on nonforker from Perl Cookbook |
# based on nonforker from Perl Cookbook |
# - server who multiplexes without forking |
# - server who multiplexes without forking |
|
|
|
use lib '/home/httpd/lib/perl/'; |
|
use LONCAPA::Configuration; |
|
|
use POSIX; |
use POSIX; |
use IO::Socket; |
use IO::Socket; |
use IO::Select; |
use IO::Select; |
Line 29 use Socket;
|
Line 57 use Socket;
|
use Fcntl; |
use Fcntl; |
use Tie::RefHash; |
use Tie::RefHash; |
use Crypt::IDEA; |
use Crypt::IDEA; |
|
#use Net::Ping; |
|
use LWP::UserAgent(); |
|
|
# grabs exception and records it to log before exiting |
$status=''; |
sub catchexception { |
$lastlog=''; |
my ($signal)=@_; |
$conserver='SHELL'; |
$SIG{'QUIT'}='DEFAULT'; |
$DEBUG = 0; # Set to 1 for annoyingly complete logs. |
$SIG{__DIE__}='DEFAULT'; |
$VERSION='$Revison$'; #' stupid emacs |
&logthis("<font color=red>CRITICAL: " |
$remoteVERSION; |
."ABNORMAL EXIT. Child $$ for server $wasserver died through " |
|
."\"$signal\" with this parameter->[$@]</font>"); |
|
die($@); |
|
} |
|
|
|
$childmaxattempts=10; |
|
|
|
# -------------------------------- Set signal handlers to record abnormal exits |
# -------------------------------- Set signal handlers to record abnormal exits |
|
|
$SIG{'QUIT'}=\&catchexception; |
&status("Init exception handlers"); |
|
$SIG{QUIT}=\&catchexception; |
$SIG{__DIE__}=\&catchexception; |
$SIG{__DIE__}=\&catchexception; |
|
|
# ------------------------------------ Read httpd access.conf and get variables |
# ---------------------------------- Read loncapa_apache.conf and loncapa.conf |
|
&status("Read loncapa.conf and loncapa_apache.conf"); |
open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; |
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); |
|
my %perlvar=%{$perlvarref}; |
while ($configline=<CONFIG>) { |
undef $perlvarref; |
if ($configline =~ /PerlSetVar/) { |
|
my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); |
|
chomp($varvalue); |
|
$perlvar{$varname}=$varvalue; |
|
} |
|
} |
|
close(CONFIG); |
|
|
|
# ----------------------------- Make sure this process is running from user=www |
# ----------------------------- Make sure this process is running from user=www |
|
&status("Check user ID"); |
my $wwwid=getpwnam('www'); |
my $wwwid=getpwnam('www'); |
if ($wwwid!=$<) { |
if ($wwwid!=$<) { |
$emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; |
$emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; |
Line 89 open (CONFIG,"$perlvar{'lonTabDir'}/host
|
Line 107 open (CONFIG,"$perlvar{'lonTabDir'}/host
|
while ($configline=<CONFIG>) { |
while ($configline=<CONFIG>) { |
my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); |
my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); |
chomp($ip); |
chomp($ip); |
$hostip{$id}=$ip; |
if ($ip) { |
|
$hostip{$id}=$ip; |
|
$hostname{$id}=$name; |
|
} |
} |
} |
|
|
close(CONFIG); |
close(CONFIG); |
|
|
# -------------------------------------------------------- Routines for forking |
# -------------------------------------------------------- Routines for forking |
Line 102 close(CONFIG);
|
Line 124 close(CONFIG);
|
%childatt = (); # number of attempts to start server |
%childatt = (); # number of attempts to start server |
# for ID |
# for ID |
|
|
sub REAPER { # takes care of dead children |
$childmaxattempts=5; |
$SIG{CHLD} = \&REAPER; |
|
my $pid = wait; |
|
my $wasserver=$children{$pid}; |
|
&logthis("<font color=red>CRITICAL: " |
|
."Child $pid for server $wasserver died ($childatt{$wasserver})</font>"); |
|
delete $children{$pid}; |
|
delete $childpid{$wasserver}; |
|
my $port = "$perlvar{'lonSockDir'}/$wasserver"; |
|
unlink($port); |
|
} |
|
|
|
sub HUNTSMAN { # signal handler for SIGINT |
|
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children |
|
kill 'INT' => keys %children; |
|
my $execdir=$perlvar{'lonDaemons'}; |
|
unlink("$execdir/logs/lonc.pid"); |
|
&logthis("<font color=red>CRITICAL: Shutting down</font>"); |
|
exit; # clean up with dignity |
|
} |
|
|
|
sub HUPSMAN { # signal handler for SIGHUP |
|
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children |
|
kill 'INT' => keys %children; |
|
&logthis("<font color=red>CRITICAL: Restarting</font>"); |
|
unlink("$execdir/logs/lonc.pid"); |
|
my $execdir=$perlvar{'lonDaemons'}; |
|
exec("$execdir/lonc"); # here we go again |
|
} |
|
|
|
sub USRMAN { |
|
&logthis("USR1: Trying to establish connections again"); |
|
foreach $thisserver (keys %hostip) { |
|
$answer=subreply("ping",$thisserver); |
|
&logthis("USR1: Ping $thisserver " |
|
."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): " |
|
." >$answer<"); |
|
} |
|
%childatt=(); |
|
} |
|
|
|
# -------------------------------------------------- Non-critical communication |
|
sub subreply { |
|
my ($cmd,$server)=@_; |
|
my $answer=''; |
|
if ($server ne $perlvar{'lonHostID'}) { |
|
my $peerfile="$perlvar{'lonSockDir'}/$server"; |
|
my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", |
|
Type => SOCK_STREAM, |
|
Timeout => 10) |
|
or return "con_lost"; |
|
print $sclient "$cmd\n"; |
|
my $answer=<$sclient>; |
|
chomp($answer); |
|
if (!$answer) { $answer="con_lost"; } |
|
} else { $answer='self_reply'; } |
|
return $answer; |
|
} |
|
|
|
# --------------------------------------------------------------------- Logging |
|
|
|
sub logthis { |
|
my $message=shift; |
|
my $execdir=$perlvar{'lonDaemons'}; |
|
my $fh=IO::File->new(">>$execdir/logs/lonc.log"); |
|
my $now=time; |
|
my $local=localtime($now); |
|
print $fh "$local ($$): $message\n"; |
|
} |
|
|
|
|
|
sub logperm { |
|
my $message=shift; |
|
my $execdir=$perlvar{'lonDaemons'}; |
|
my $now=time; |
|
my $local=localtime($now); |
|
my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); |
|
print $fh "$now:$message:$local\n"; |
|
} |
|
|
|
# ---------------------------------------------------- Fork once and dissociate |
# ---------------------------------------------------- Fork once and dissociate |
|
&status("Fork and dissociate"); |
$fpid=fork; |
$fpid=fork; |
exit if $fpid; |
exit if $fpid; |
die "Couldn't fork: $!" unless defined ($fpid); |
die "Couldn't fork: $!" unless defined ($fpid); |
|
|
POSIX::setsid() or die "Can't start new session: $!"; |
POSIX::setsid() or die "Can't start new session: $!"; |
|
|
# ------------------------------------------------------- Write our PID on disk |
$conserver='PARENT'; |
|
|
|
# ------------------------------------------------------- Write our PID on disk |
|
&status("Write PID"); |
$execdir=$perlvar{'lonDaemons'}; |
$execdir=$perlvar{'lonDaemons'}; |
open (PIDSAVE,">$execdir/logs/lonc.pid"); |
open (PIDSAVE,">$execdir/logs/lonc.pid"); |
print PIDSAVE "$$\n"; |
print PIDSAVE "$$\n"; |
Line 204 $SIG{HUP}=$SIG{USR1}='IGNORE';
|
Line 150 $SIG{HUP}=$SIG{USR1}='IGNORE';
|
|
|
# Fork off our children, one for every server |
# Fork off our children, one for every server |
|
|
|
&status("Forking ..."); |
|
|
foreach $thisserver (keys %hostip) { |
foreach $thisserver (keys %hostip) { |
make_new_child($thisserver); |
#if (&online($hostname{$thisserver})) { |
|
make_new_child($thisserver); |
|
#} |
} |
} |
|
|
&logthis("Done starting initial servers"); |
&logthis("Done starting initial servers"); |
# ----------------------------------------------------- Install signal handlers |
# ----------------------------------------------------- Install signal handlers |
|
|
$SIG{CHLD} = \&REAPER; |
|
$SIG{INT} = $SIG{TERM} = \&HUNTSMAN; |
$SIG{INT} = $SIG{TERM} = \&HUNTSMAN; |
$SIG{HUP} = \&HUPSMAN; |
$SIG{HUP} = \&HUPSMAN; |
$SIG{USR1} = \&USRMAN; |
$SIG{USR1} = \&USRMAN; |
|
|
# And maintain the population. |
# And maintain the population. |
while (1) { |
while (1) { |
sleep; # wait for a signal (i.e., child's death) |
my $deadpid = wait; # Wait for the next child to die. |
# See who died and start new one |
# See who died and start new one |
foreach $thisserver (keys %hostip) { |
# or a signal (e.g. USR1 for restart). |
if (!$childpid{$thisserver}) { |
# if a signal, the wait will fail |
if ($childatt{$thisserver}<=$childmaxattempts) { |
# This is ordinarily detected by |
$childatt{$thisserver}++; |
# checking for the existence of the |
&logthis( |
# pid index inthe children hash since |
"<font color=yellow>INFO: Trying to reconnect for $thisserver " |
# the return value from a failed wait is -1 |
."($childatt{$thisserver} of $childmaxattempts attempts)</font>"); |
# which is an impossible PID. |
make_new_child($thisserver); |
&status("Woke up"); |
} |
my $skipping=''; |
} |
|
|
if(exists($children{$deadpid})) { |
|
|
|
$thisserver = $children{$deadpid}; # Look name of dead guy's peer. |
|
|
|
delete($children{$deadpid}); # Get rid of dead hash entry. |
|
|
|
if($childatt{$thisserver} < $childmaxattempts) { |
|
$childatt{$thisserver}++; |
|
&logthis( |
|
"<font color=yellow>INFO: Trying to reconnect for $thisserver " |
|
."($childatt{$thisserver} of $childmaxattempts attempts)</font>"); |
|
make_new_child($thisserver); |
|
|
|
} |
|
else { |
|
$skipping .= $thisserver.' '; |
|
} |
|
if($skipping) { |
|
&logthis("<font color=blue>WARNING: Skipped $skipping</font>"); |
|
|
|
} |
} |
} |
|
|
} |
} |
|
|
|
|
|
|
sub make_new_child { |
sub make_new_child { |
|
|
my $conserver=shift; |
$newserver=shift; |
my $pid; |
my $pid; |
my $sigset; |
my $sigset; |
&logthis("Attempting to start child for server $conserver"); |
&logthis("Attempting to start child for server $newserver"); |
# block signal for fork |
# block signal for fork |
$sigset = POSIX::SigSet->new(SIGINT); |
$sigset = POSIX::SigSet->new(SIGINT); |
sigprocmask(SIG_BLOCK, $sigset) |
sigprocmask(SIG_BLOCK, $sigset) |
Line 251 sub make_new_child {
|
Line 224 sub make_new_child {
|
# Parent records the child's birth and returns. |
# Parent records the child's birth and returns. |
sigprocmask(SIG_UNBLOCK, $sigset) |
sigprocmask(SIG_UNBLOCK, $sigset) |
or die "Can't unblock SIGINT for fork: $!\n"; |
or die "Can't unblock SIGINT for fork: $!\n"; |
$children{$pid} = $conserver; |
$children{$pid} = $newserver; |
$childpid{$conserver} = $pid; |
$childpid{$newserver} = $pid; |
return; |
return; |
} else { |
} else { |
|
$conserver=$newserver; |
# Child can *not* return from this subroutine. |
# Child can *not* return from this subroutine. |
$SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before |
$SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before |
|
$SIG{USR1}= \&logstatus; |
|
|
# unblock signals |
# unblock signals |
sigprocmask(SIG_UNBLOCK, $sigset) |
sigprocmask(SIG_UNBLOCK, $sigset) |
or die "Can't unblock SIGINT for fork: $!\n"; |
or die "Can't unblock SIGINT for fork: $!\n"; |
Line 267 sub make_new_child {
|
Line 242 sub make_new_child {
|
$port = "$perlvar{'lonSockDir'}/$conserver"; |
$port = "$perlvar{'lonSockDir'}/$conserver"; |
|
|
unlink($port); |
unlink($port); |
# ---------------------------------------------------- Client to network server |
|
unless ( |
|
$remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, |
|
PeerPort => $perlvar{'londPort'}, |
|
Proto => "tcp", |
|
Type => SOCK_STREAM) |
|
) { |
|
my $st=120+int(rand(240)); |
|
&logthis( |
|
"<font color=blue>WARNING: Couldn't connect $conserver ($st secs): $@</font>"); |
|
sleep($st); |
|
exit; |
|
}; |
|
# --------------------------------------- Send a ping to make other end do USR1 |
|
print $remotesock "init\n"; |
|
$answer=<$remotesock>; |
|
print $remotesock "$answer"; |
|
$answer=<$remotesock>; |
|
chomp($answer); |
|
&logthis("Init reply for $conserver: >$answer<"); |
|
sleep 5; |
|
print $remotesock "pong\n"; |
|
$answer=<$remotesock>; |
|
chomp($answer); |
|
&logthis("Pong reply for $conserver: >$answer<"); |
|
# ----------------------------------------------------------- Initialize cipher |
|
|
|
print $remotesock "ekey\n"; |
# -------------------------------------------------------------- Open other end |
my $buildkey=<$remotesock>; |
|
my $key=$conserver.$perlvar{'lonHostID'}; |
|
$key=~tr/a-z/A-Z/; |
|
$key=~tr/G-P/0-9/; |
|
$key=~tr/Q-Z/0-9/; |
|
$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; |
|
$key=substr($key,0,32); |
|
my $cipherkey=pack("H32",$key); |
|
if ($cipher=new IDEA $cipherkey) { |
|
&logthis("Secure connection initialized: $conserver"); |
|
} else { |
|
my $st=120+int(rand(240)); |
|
&logthis( |
|
"<font color=blue>WARNING: ". |
|
"Could not establish secure connection, $conserver ($st secs)!</font>"); |
|
sleep($st); |
|
exit; |
|
} |
|
|
|
|
&openremote($conserver); |
|
&logthis("<font color=green> Connection to $conserver open </font>"); |
# ----------------------------------------- We're online, send delayed messages |
# ----------------------------------------- We're online, send delayed messages |
|
&status("Checking for delayed messages"); |
|
|
my @allbuffered; |
my @allbuffered; |
my $path="$perlvar{'lonSockDir'}/delayed"; |
my $path="$perlvar{'lonSockDir'}/delayed"; |
Line 322 if ($cipher=new IDEA $cipherkey) {
|
Line 256 if ($cipher=new IDEA $cipherkey) {
|
@allbuffered=grep /\.$conserver$/, readdir DIRHANDLE; |
@allbuffered=grep /\.$conserver$/, readdir DIRHANDLE; |
closedir(DIRHANDLE); |
closedir(DIRHANDLE); |
my $dfname; |
my $dfname; |
map { |
foreach (sort @allbuffered) { |
|
&status("Sending delayed: $_"); |
$dfname="$path/$_"; |
$dfname="$path/$_"; |
&logthis($dfname); |
if($DEBUG) { &logthis('Sending '.$dfname); } |
my $wcmd; |
my $wcmd; |
{ |
{ |
my $dfh=IO::File->new($dfname); |
my $dfh=IO::File->new($dfname); |
Line 345 if ($cipher=new IDEA $cipherkey) {
|
Line 280 if ($cipher=new IDEA $cipherkey) {
|
} |
} |
$cmd="enc:$cmdlength:$encrequest\n"; |
$cmd="enc:$cmdlength:$encrequest\n"; |
} |
} |
|
$answer = londtransaction($remotesock, $cmd, 60); |
print $remotesock "$cmd\n"; |
|
$answer=<$remotesock>; |
|
chomp($answer); |
chomp($answer); |
if ($answer ne '') { |
|
|
if (($answer ne '') && ($@!~/timeout/)) { |
unlink("$dfname"); |
unlink("$dfname"); |
&logthis("Delayed $cmd to $conserver: >$answer<"); |
&logthis("Delayed $cmd: >$answer<"); |
&logperm("S:$conserver:$bcmd"); |
&logperm("S:$conserver:$bcmd"); |
} |
} |
} @allbuffered; |
} |
|
if($DEBUG) { &logthis("<font color=green> Delayed transactions sent"); } |
|
|
# ------------------------------------------------------- Listen to UNIX socket |
# ------------------------------------------------------- Listen to UNIX socket |
|
&status("Opening socket"); |
unless ( |
unless ( |
$server = IO::Socket::UNIX->new(Local => $port, |
$server = IO::Socket::UNIX->new(Local => $port, |
Type => SOCK_STREAM, |
Type => SOCK_STREAM, |
Line 365 unless (
|
Line 301 unless (
|
my $st=120+int(rand(240)); |
my $st=120+int(rand(240)); |
&logthis( |
&logthis( |
"<font color=blue>WARNING: ". |
"<font color=blue>WARNING: ". |
"Can't make server socket $conserver ($st secs): $@</font>"); |
"Can't make server socket ($st secs): .. exiting</font>"); |
sleep($st); |
sleep($st); |
exit; |
exit; |
}; |
}; |
|
|
# ----------------------------------------------------------------------------- |
# ----------------------------------------------------------------------------- |
|
|
&logthis("<font color=green>$conserver online</font>"); |
&logthis("<font color=green>$conserver online</font>"); |
Line 379 unless (
|
Line 315 unless (
|
%inbuffer = (); |
%inbuffer = (); |
%outbuffer = (); |
%outbuffer = (); |
%ready = (); |
%ready = (); |
|
%servers = (); # To be compatible with make filevector. indexed by |
|
# File ids, values are sockets. |
|
# note that the accept socket is omitted. |
|
|
tie %ready, 'Tie::RefHash'; |
tie %ready, 'Tie::RefHash'; |
|
|
nonblock($server); |
# nonblock($server); |
$select = IO::Select->new($server); |
# $select = IO::Select->new($server); |
|
|
# Main loop: check reads/accepts, check writes, check ready to process |
# Main loop: check reads/accepts, check writes, check ready to process |
|
|
|
status("Main loop $conserver"); |
while (1) { |
while (1) { |
my $client; |
my $client; |
my $rv; |
my $rv; |
my $data; |
my $data; |
|
|
# check for new information on the connections we have |
my $infdset; # bit vec of fd's to select on input. |
|
|
# anything to read or accept? |
my $outfdset; # Bit vec of fd's to select on output. |
foreach $client ($select->can_read(1)) { |
|
|
|
if ($client == $server) { |
|
# accept a new connection |
|
|
|
$client = $server->accept(); |
$infdset = MakeFileVector(\%servers); |
$select->add($client); |
$outfdset= MakeFileVector(\%outbuffer); |
nonblock($client); |
vec($infdset, $server->fileno, 1) = 1; |
} else { |
if($DEBUG) { |
# read data |
&logthis("Adding ".$server->fileno. |
$data = ''; |
" to input select vector (listner)". |
$rv = $client->recv($data, POSIX::BUFSIZ, 0); |
unpack("b*",$infdset)."\n"); |
|
} |
unless (defined($rv) && length $data) { |
DoSelect(\$infdset, \$outfdset); # Wait for input. |
# This would be the end of file, so close the client |
if($DEBUG) { |
delete $inbuffer{$client}; |
&logthis("Doselect completed!"); |
delete $outbuffer{$client}; |
&logthis("ins = ".unpack("b*",$infdset)."\n"); |
delete $ready{$client}; |
&logthis("outs= ".unpack("b*",$outfdset)."\n"); |
|
|
$select->remove($client); |
} |
close $client; |
|
next; |
|
} |
|
|
|
$inbuffer{$client} .= $data; |
# Checkfor new connections: |
|
if (vec($infdset, $server->fileno, 1)) { |
|
if($DEBUG) { |
|
&logthis("New connection established"); |
|
} |
|
# accept a new connection |
|
&status("Accept new connection: $conserver"); |
|
$client = $server->accept(); |
|
if($DEBUG) { |
|
&logthis("New client fd = ".$client->fileno."\n"); |
|
} |
|
$servers{$client->fileno} = $client; |
|
nonblock($client); |
|
$client->sockopt(SO_KEEPALIVE, 1);# Enable monitoring of |
|
# connection liveness. |
|
} |
|
HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready); |
|
HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer, |
|
\%ready); |
|
# -------------------------------------------------------- Wow, connection lost |
|
|
# test whether the data in the buffer or the data we |
} |
# just read means there is a complete request waiting |
|
# to be fulfilled. If there is, set $ready{$client} |
|
# to the requests waiting to be fulfilled. |
|
while ($inbuffer{$client} =~ s/(.*\n)//) { |
|
push( @{$ready{$client}}, $1 ); |
|
} |
|
} |
|
} |
} |
|
} |
|
|
# Any complete requests to process? |
# ------------------------------------------------------- End of make_new_child |
foreach $client (keys %ready) { |
|
handle($client); |
|
|
# |
|
# Make a vector of file descriptors to wait for in a select. |
|
# parameters: |
|
# \%fdhash -reference to a hash which has IO::Socket's as indices. |
|
# We only care about the indices, not the values. |
|
# A select vector is created from all indices of the hash. |
|
|
|
sub MakeFileVector |
|
{ |
|
my $fdhash = shift; |
|
my $selvar = ""; |
|
|
|
foreach $socket (keys %$fdhash) { |
|
if($DEBUG) { |
|
&logthis("Adding ".$socket. |
|
"to select vector. (client)\n"); |
|
} |
|
vec($selvar, $socket, 1) = 1; |
} |
} |
|
return $selvar; |
|
} |
|
|
# Buffers to flush? |
|
foreach $client ($select->can_write(1)) { |
# |
# Skip this client if we have nothing to say |
# HandleOutput: |
next unless exists $outbuffer{$client}; |
# Processes output on a buffered set of file descriptors which are |
|
# ready to be read. |
$rv = $client->send($outbuffer{$client}, 0); |
# Parameters: |
unless (defined $rv) { |
# $selvector - Vector of file descriptors which are writable. |
# Whine, but move on. |
# \%sockets - Vector of socket references indexed by socket. |
warn "I was told I could write, but I can't.\n"; |
# \%buffers - Reference to a hash containing output buffers. |
next; |
# Hashes are indexed by sockets. The file descriptors of some |
} |
# of those sockets will be present in $selvector. |
if (($rv == length $outbuffer{$client}) || |
# For each one of those, we will attempt to write the output |
($! == POSIX::EWOULDBLOCK)) { |
# buffer to the socket. Note that we will assume that |
substr($outbuffer{$client}, 0, $rv) = ''; |
# the sockets are being run in non blocking mode. |
delete $outbuffer{$client} unless length $outbuffer{$client}; |
# \%inbufs - Reference to hash containing input buffers. |
} else { |
# \%readys - Reference to hash containing flags for items with complete |
# Couldn't write all the data, and it wasn't because |
# requests. |
# it would have blocked. Shutdown and move on. |
# |
delete $inbuffer{$client}; |
sub HandleOutput |
delete $outbuffer{$client}; |
{ |
delete $ready{$client}; |
my $selvector = shift; |
|
my $sockets = shift; |
$select->remove($client); |
my $buffers = shift; |
close($client); |
my $inbufs = shift; |
next; |
my $readys = shift; |
} |
my $sock; |
|
|
|
if($DEBUG) { |
|
&logthis("HandleOutput entered\n"); |
|
} |
|
|
|
foreach $sock (keys %$sockets) { |
|
my $socket = $sockets->{$sock}; |
|
if(vec($selvector, $sock, 1)) { # $socket is writable. |
|
if($DEBUG) { |
|
&logthis("Sending $buffers->{$sock} \n"); |
|
} |
|
my $rv = $socket->send($buffers->{$sock}, 0); |
|
$errno = $!; |
|
unless ($buffers->{$sock} eq "con_lost\n") { |
|
unless (defined $rv) { # Write failed... could be EINTR |
|
unless ($errno == POSIX::EINTR) { |
|
&logthis("Write failed on writable socket"); |
|
} # EINTR is not an error .. just retry. |
|
next; |
|
} |
|
if( ($rv == length $buffers->{$sock}) || |
|
($errno == POSIX::EWOULDBLOCK) || |
|
($errno == POSIX::EAGAIN) || # same as above. |
|
($errno == POSIX::EINTR) || # signal during IO |
|
($errno == 0)) { |
|
substr($buffers->{$sock}, 0, $rv)=""; # delete written part |
|
delete $buffers->{$sock} unless length $buffers->{$sock}; |
|
} else { |
|
# For some reason the write failed with an error code |
|
# we didn't look for. Shutdown the socket. |
|
&logthis("Unable to write data with ".$errno.": ". |
|
"Dropping data: ".length($buffers->{$sock}). |
|
", $rv"); |
|
# |
|
# kill off the buffers in the hash: |
|
|
|
delete $buffers->{$sock}; |
|
delete $inbufs->{$sock}; |
|
delete $readys->{$sock}; |
|
|
|
close($socket); # Close the client socket. |
|
next; |
|
} |
|
} else { # Kludgy way to mark lond connection lost. |
|
&logthis( |
|
"<font color=red>CRITICAL lond connection lost</font>"); |
|
status("Connection lost"); |
|
$remotesock->shutdown(2); |
|
&logthis("Attempting to open a new connection"); |
|
&openremote($conserver); |
|
} |
|
|
|
} |
} |
} |
|
|
} |
} |
|
# |
|
# HandleInput - Deals with input on client sockets. |
|
# Each socket has an associated input buffer. |
|
# For each readable socket, the currently available |
|
# data is appended to this buffer. |
|
# If necessary, the buffer is created. |
|
# On various failures, we may shutdown the client. |
|
# Parameters: |
|
# $selvec - Vector of readable sockets. |
|
# \%sockets - Refers to the Hash of sockets indexed by sockets. |
|
# Each of these may or may not have it's fd bit set |
|
# in the $selvec. |
|
# \%ibufs - Refers to the hash of input buffers indexed by socket. |
|
# \%obufs - Hash of output buffers indexed by socket. |
|
# \%ready - Hash of ready flags indicating the existence of a completed |
|
# Request. |
|
sub HandleInput |
|
{ |
|
|
|
# Marshall the parameters. Note that the hashes are actually |
|
# references not values. |
|
|
|
my $selvec = shift; |
|
my $sockets = shift; |
|
my $ibufs = shift; |
|
my $obufs = shift; |
|
my $ready = shift; |
|
my $sock; |
|
|
|
if($DEBUG) { |
|
&logthis("Entered HandleInput\n"); |
|
} |
|
foreach $sock (keys %$sockets) { |
|
my $socket = $sockets->{$sock}; |
|
if(vec($selvec, $sock, 1)) { # Socket which is readable. |
|
|
|
# Attempt to read the data and do error management. |
|
my $data = ''; |
|
my $rv = $socket->recv($data, POSIX::BUFSIZ, 0); |
|
if($DEBUG) { |
|
&logthis("Received $data from socket"); |
|
} |
|
unless (defined($rv) && length $data) { |
|
|
|
# Read an end of file.. this is a disconnect from the peer. |
|
|
|
delete $sockets->{$sock}; |
|
delete $ibufs->{$sock}; |
|
delete $obufs->{$sock}; |
|
delete $ready->{$sock}; |
|
|
|
status("Idle"); |
|
close $socket; |
|
next; |
|
} |
|
# Append the read data to the input buffer. If the buffer |
|
# now contains a \n the request is complete and we can |
|
# mark this in the $ready hash (one request for each \n.) |
|
|
|
$ibufs->{$sock} .= $data; |
|
while($ibufs->{$sock} =~ s/(.*\n)//) { |
|
push(@{$ready->{$sock}}, $1); |
|
} |
|
|
|
} |
|
} |
|
# Now handle any requests which are ready: |
|
|
|
foreach $client (keys %ready) { |
|
handle($client); |
|
} |
} |
} |
|
|
# ------------------------------------------------------- End of make_new_child |
# DoSelect: does a select with no timeout. On signal (errno == EINTR), |
|
# the select is retried until there are items in the returned |
|
# vectors. |
|
# |
|
# Parameters: |
|
# \$readvec - Reference to a vector of file descriptors to |
|
# check for readability. |
|
# \$writevec - Reference to a vector of file descriptors to check for |
|
# writability. |
|
# On exit, the referents are modified with vectors indicating which |
|
# file handles are readable/writable. |
|
# |
|
sub DoSelect { |
|
my $readvec = shift; |
|
my $writevec= shift; |
|
my $outs; |
|
my $ins; |
|
|
|
while (1) { |
|
my $nfds = select( $ins = $$readvec, $outs = $$writevec, undef, undef); |
|
if($nfds) { |
|
if($DEBUG) { |
|
&logthis("select exited with ".$nfds." fds\n"); |
|
&logthis("ins = ".unpack("b*",$ins). |
|
" readvec = ".unpack("b*",$$readvec)."\n"); |
|
&logthis("outs = ".unpack("b*",$outs). |
|
" writevec = ".unpack("b*",$$writevec)."\n"); |
|
} |
|
$$readvec = $ins; |
|
$$writevec = $outs; |
|
return; |
|
} else { |
|
if($DEBUG) { |
|
&logthis("Select exited with no bits set in mask\n"); |
|
} |
|
die "Select failed" unless $! == EINTR; |
|
} |
|
} |
|
} |
|
|
# handle($socket) deals with all pending requests for $client |
# handle($socket) deals with all pending requests for $client |
|
# |
sub handle { |
sub handle { |
# requests are in $ready{$client} |
# requests are in $ready{$client} |
# send output to $outbuffer{$client} |
# send output to $outbuffer{$client} |
my $client = shift; |
my $client = shift; |
my $request; |
my $request; |
|
|
foreach $request (@{$ready{$client}}) { |
foreach $request (@{$ready{$client}}) { |
# ============================================================= Process request |
# ============================================================= Process request |
# $request is the text of the request |
# $request is the text of the request |
# put text of reply into $outbuffer{$client} |
# put text of reply into $outbuffer{$client} |
|
# ------------------------------------------------------------ Is this the end? |
|
chomp($request); |
|
if($DEBUG) { |
|
&logthis("<font color=green> Request $request processing starts</font>"); |
|
} |
|
if ($request eq "close_connection_exit\n") { |
|
&status("Request close connection"); |
|
&logthis( |
|
"<font color=red>CRITICAL: Request Close Connection ... exiting</font>"); |
|
$remotesock->shutdown(2); |
|
$server->close(); |
|
exit; |
|
} |
# ----------------------------------------------------------------------------- |
# ----------------------------------------------------------------------------- |
if ($request =~ /^encrypt\:/) { |
if ($request =~ /^encrypt\:/) { |
my $cmd=$request; |
my $cmd=$request; |
Line 490 sub handle {
|
Line 635 sub handle {
|
$encrequest.= |
$encrequest.= |
unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8))); |
unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8))); |
} |
} |
$request="enc:$cmdlength:$encrequest\n"; |
$request="enc:$cmdlength:$encrequest"; |
} |
} |
print $remotesock "$request"; |
# --------------------------------------------------------------- Main exchange |
$answer=<$remotesock>; |
$answer = londtransaction($remotesock, $request, 300); |
|
|
|
if($DEBUG) { |
|
&logthis("<font color=green> Request data exchange complete"); |
|
} |
|
if ($@=~/timeout/) { |
|
$answer=''; |
|
&logthis( |
|
"<font color=red>CRITICAL: Timeout: $request</font>"); |
|
} |
|
|
|
|
if ($answer) { |
if ($answer) { |
if ($answer =~ /^enc/) { |
if ($answer =~ /^enc/) { |
my ($cmd,$cmdlength,$encinput)=split(/:/,$answer); |
my ($cmd,$cmdlength,$encinput)=split(/:/,$answer); |
Line 507 sub handle {
|
Line 663 sub handle {
|
$answer=substr($answer,0,$cmdlength); |
$answer=substr($answer,0,$cmdlength); |
$answer.="\n"; |
$answer.="\n"; |
} |
} |
|
if($DEBUG) { |
|
&logthis("sending $answer to client\n"); |
|
} |
$outbuffer{$client} .= $answer; |
$outbuffer{$client} .= $answer; |
} else { |
} else { |
$outbuffer{$client} .= "con_lost\n"; |
$outbuffer{$client} .= "con_lost\n"; |
} |
} |
|
|
|
&status("Completed: $request"); |
|
if($DEBUG) { |
|
&logthis("<font color=green> Request processing complete</font>"); |
|
} |
# ===================================================== Done processing request |
# ===================================================== Done processing request |
} |
} |
delete $ready{$client}; |
delete $ready{$client}; |
# -------------------------------------------------------------- End non-forker |
# -------------------------------------------------------------- End non-forker |
|
if($DEBUG) { |
|
&logthis("<font color=green> requests for child handled</font>"); |
|
} |
} |
} |
# ---------------------------------------------------------- End make_new_child |
# ---------------------------------------------------------- End make_new_child |
} |
|
|
|
# nonblock($socket) puts socket into nonblocking mode |
# nonblock($socket) puts socket into nonblocking mode |
sub nonblock { |
sub nonblock { |
Line 532 sub nonblock {
|
Line 697 sub nonblock {
|
or die "Can't make socket nonblocking: $!\n"; |
or die "Can't make socket nonblocking: $!\n"; |
} |
} |
|
|
|
|
|
sub openremote { |
|
# ---------------------------------------------------- Client to network server |
|
|
|
my $conserver=shift; |
|
|
|
&status("Opening TCP $conserver"); |
|
my $st=120+int(rand(240)); # Sleep before opening: |
|
|
|
unless ( |
|
$remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, |
|
PeerPort => $perlvar{'londPort'}, |
|
Proto => "tcp", |
|
Type => SOCK_STREAM) |
|
) { |
|
|
|
&logthis( |
|
"<font color=blue>WARNING: Couldn't connect to $conserver ($st secs): </font>"); |
|
sleep($st); |
|
exit; |
|
}; |
|
# ----------------------------------------------------------------- Init dialog |
|
|
|
&logthis("<font color=green>INFO Connected to $conserver, initing</font>"); |
|
&status("Init dialogue: $conserver"); |
|
|
|
$answer = londtransaction($remotesock, "init", 60); |
|
chomp($answer); |
|
$answer = londtransaction($remotesock, $answer, 60); |
|
chomp($answer); |
|
|
|
if ($@=~/timeout/) { |
|
&logthis("Timed out during init.. exiting"); |
|
exit; |
|
} |
|
|
|
if ($answer ne 'ok') { |
|
&logthis("Init reply: >$answer<"); |
|
my $st=120+int(rand(240)); |
|
&logthis("<font color=blue>WARNING: Init failed ($st secs)</font>"); |
|
sleep($st); |
|
exit; |
|
} |
|
|
|
$answer = londtransaction($remotesock,"sethost:$conserver",60); |
|
chomp($answer); |
|
if ( $answer ne 'ok') { |
|
&logthis('<font color="blue">WARNING: unable to specify remote host'. |
|
$answer.'</font>'); |
|
} |
|
|
|
$answer = londtransaction($remotesock,"version:$VERSION",60); |
|
chomp($answer); |
|
if ($answer =~ /^version:/) { |
|
$remoteVERSION=(split(/:/,$answer))[1]; |
|
} else { |
|
&logthis('<font color="blue">WARNING: request remote version failed :'. |
|
$answer.': my version is :'.$VERSION.':</font>'); |
|
} |
|
|
|
sleep 5; |
|
&status("Ponging $conserver"); |
|
print $remotesock "pong\n"; |
|
$answer=<$remotesock>; |
|
chomp($answer); |
|
if ($answer!~/^$conserver/) { |
|
&logthis("Pong reply: >$answer<"); |
|
} |
|
# ----------------------------------------------------------- Initialize cipher |
|
|
|
&status("Initialize cipher"); |
|
print $remotesock "ekey\n"; |
|
my $buildkey=<$remotesock>; |
|
my $key=$conserver.$perlvar{'lonHostID'}; |
|
$key=~tr/a-z/A-Z/; |
|
$key=~tr/G-P/0-9/; |
|
$key=~tr/Q-Z/0-9/; |
|
$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; |
|
$key=substr($key,0,32); |
|
my $cipherkey=pack("H32",$key); |
|
if ($cipher=new IDEA $cipherkey) { |
|
&logthis("Secure connection initialized"); |
|
} else { |
|
my $st=120+int(rand(240)); |
|
&logthis("<font color=blue>WARNING: ". |
|
"Could not establish secure connection ($st secs)!</font>"); |
|
sleep($st); |
|
exit; |
|
} |
|
&logthis("<font color=green> Remote open success </font>"); |
|
} |
|
|
|
|
|
|
|
# grabs exception and records it to log before exiting |
|
sub catchexception { |
|
my ($signal)=@_; |
|
$SIG{QUIT}='DEFAULT'; |
|
$SIG{__DIE__}='DEFAULT'; |
|
chomp($signal); |
|
&logthis("<font color=red>CRITICAL: " |
|
."ABNORMAL EXIT. Child $$ for server [$wasserver] died through " |
|
."\"$signal\" with parameter </font>"); |
|
die("Signal abend"); |
|
} |
|
|
|
# -------------------------------------- Routines to see if other box available |
|
|
|
#sub online { |
|
# my $host=shift; |
|
# &status("Pinging ".$host); |
|
# my $p=Net::Ping->new("tcp",20); |
|
# my $online=$p->ping("$host"); |
|
# $p->close(); |
|
# undef ($p); |
|
# return $online; |
|
#} |
|
|
|
sub connected { |
|
my ($local,$remote)=@_; |
|
&status("Checking connection $local to $remote"); |
|
$local=~s/\W//g; |
|
$remote=~s/\W//g; |
|
|
|
unless ($hostname{$local}) { return 'local_unknown'; } |
|
unless ($hostname{$remote}) { return 'remote_unknown'; } |
|
|
|
#unless (&online($hostname{$local})) { return 'local_offline'; } |
|
|
|
my $ua=new LWP::UserAgent; |
|
|
|
my $request=new HTTP::Request('GET', |
|
"http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote); |
|
|
|
my $response=$ua->request($request); |
|
|
|
unless ($response->is_success) { return 'local_error'; } |
|
|
|
my $reply=$response->content; |
|
$reply=(split("\n",$reply))[0]; |
|
$reply=~s/\W//g; |
|
if ($reply ne $remote) { return $reply; } |
|
return 'ok'; |
|
} |
|
|
|
|
|
|
|
sub hangup { |
|
foreach (keys %children) { |
|
$wasserver=$children{$_}; |
|
&status("Closing $wasserver"); |
|
&logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); |
|
&status("Kill PID $_ for $wasserver"); |
|
kill ('INT',$_); |
|
} |
|
} |
|
|
|
sub HUNTSMAN { # signal handler for SIGINT |
|
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children |
|
&hangup(); |
|
my $execdir=$perlvar{'lonDaemons'}; |
|
unlink("$execdir/logs/lonc.pid"); |
|
&logthis("<font color=red>CRITICAL: Shutting down</font>"); |
|
exit; # clean up with dignity |
|
} |
|
|
|
sub HUPSMAN { # signal handler for SIGHUP |
|
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children |
|
&hangup(); |
|
&logthis("<font color=red>CRITICAL: Restarting</font>"); |
|
my $execdir=$perlvar{'lonDaemons'}; |
|
unlink("$execdir/logs/lonc.pid"); |
|
exec("$execdir/lonc"); # here we go again |
|
} |
|
|
|
sub checkchildren { |
|
&initnewstatus(); |
|
&logstatus(); |
|
&logthis('Going to check on the children'); |
|
foreach (sort keys %children) { |
|
sleep 1; |
|
unless (kill 'USR1' => $_) { |
|
&logthis ('<font color=red>CRITICAL: Child '.$_.' is dead</font>'); |
|
&logstatus($$.' is dead'); |
|
} |
|
} |
|
} |
|
|
|
sub USRMAN { |
|
&logthis("USR1: Trying to establish connections again"); |
|
# |
|
# It is really important not to just clear the childatt hash or we will |
|
# lose all memory of the children. What we really want to do is this: |
|
# For each index where childatt is >= $childmaxattempts |
|
# Zero the associated counter and do a make_child for the host. |
|
# Regardles, the childatt entry is zeroed: |
|
my $host; |
|
foreach $host (keys %childatt) { |
|
if ($childatt{$host} >= $childmaxattempts) { |
|
$childatt{$host} = 0; |
|
&logthis("<font color=green>INFO: Restarting child for server: " |
|
.$host."</font>\n"); |
|
make_new_child($host); |
|
} |
|
else { |
|
$childatt{$host} = 0; |
|
} |
|
} |
|
&checkchildren(); # See if any children are still dead... |
|
} |
|
|
|
# -------------------------------------------------- Non-critical communication |
|
sub subreply { |
|
my ($cmd,$server)=@_; |
|
my $answer=''; |
|
if ($server ne $perlvar{'lonHostID'}) { |
|
my $peerfile="$perlvar{'lonSockDir'}/$server"; |
|
my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", |
|
Type => SOCK_STREAM, |
|
Timeout => 10) |
|
or return "con_lost"; |
|
|
|
|
|
$answer = londtransaction($sclient, $cmd, 10); |
|
|
|
if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; } |
|
$SIG{ALRM}='DEFAULT'; |
|
$SIG{__DIE__}=\&catchexception; |
|
} else { $answer='self_reply'; } |
|
return $answer; |
|
} |
|
|
|
# --------------------------------------------------------------------- Logging |
|
|
|
sub logthis { |
|
my $message=shift; |
|
my $execdir=$perlvar{'lonDaemons'}; |
|
my $fh=IO::File->new(">>$execdir/logs/lonc.log"); |
|
my $now=time; |
|
my $local=localtime($now); |
|
$lastlog=$local.': '.$message; |
|
print $fh "$local ($$) [$conserver] [$status]: $message\n"; |
|
} |
|
|
|
#-------------------------------------- londtransaction: |
|
# |
|
# Performs a transaction with lond with timeout support. |
|
# result = londtransaction(socket,request,timeout) |
|
# |
|
sub londtransaction { |
|
my ($socket, $request, $tmo) = @_; |
|
|
|
if($DEBUG) { |
|
&logthis("londtransaction request: $request"); |
|
} |
|
|
|
# Set the signal handlers: ALRM for timeout and disble the others. |
|
|
|
$SIG{ALRM} = sub { die "timeout" }; |
|
$SIG{__DIE__} = 'DEFAULT'; |
|
|
|
# Disable all but alarm so that only that can interupt the |
|
# send /receive. |
|
# |
|
my $sigset = POSIX::SigSet->new(QUIT, USR1, HUP, INT, TERM); |
|
my $priorsigs = POSIX::SigSet->new; |
|
unless (defined sigprocmask(SIG_BLOCK, $sigset, $priorsigs)) { |
|
&logthis("<font color=red> CRITICAL -- londtransaction ". |
|
"failed to block signals </font>"); |
|
die "could not block signals in londtransaction"; |
|
} |
|
$answer = ''; |
|
# |
|
# Send request to lond. |
|
# |
|
eval { |
|
alarm($tmo); |
|
print $socket "$request\n"; |
|
alarm(0); |
|
}; |
|
# If request didn't timeout, try for the response. |
|
# |
|
|
|
if ($@!~/timeout/) { |
|
eval { |
|
alarm($tmo); |
|
$answer = <$socket>; |
|
if($DEBUG) { |
|
&logthis("Received $answer in londtransaction"); |
|
} |
|
alarm(0); |
|
}; |
|
} else { |
|
&logthis("lonc - suiciding on send Timeout"); |
|
die("lonc - suiciding on send Timeout"); |
|
} |
|
if ($@ =~ /timeout/) { |
|
&logthis("lonc - suiciding on read Timeout"); |
|
die("lonc - suiciding on read Timeout"); |
|
} |
|
# |
|
# Restore the initial sigmask set. |
|
# |
|
unless (defined sigprocmask(SIG_UNBLOCK, $priorsigs)) { |
|
&logthis("<font color=red> CRITICAL -- londtransaction ". |
|
"failed to re-enable signal processing. </font>"); |
|
die "londtransaction failed to re-enable signals"; |
|
} |
|
# |
|
# go back to the prior handler set. |
|
# |
|
$SIG{ALRM} = 'DEFAULT'; |
|
$SIG{__DIE__} = \&cathcexception; |
|
|
|
# chomp $answer; |
|
if ($DEBUG) { |
|
&logthis("Returning $answer in londtransaction"); |
|
} |
|
return $answer; |
|
|
|
} |
|
|
|
sub logperm { |
|
my $message=shift; |
|
my $execdir=$perlvar{'lonDaemons'}; |
|
my $now=time; |
|
my $local=localtime($now); |
|
my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); |
|
print $fh "$now:$message:$local\n"; |
|
} |
|
# ------------------------------------------------------------------ Log status |
|
|
|
sub logstatus { |
|
my $docdir=$perlvar{'lonDocRoot'}; |
|
my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt"); |
|
print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n"; |
|
} |
|
|
|
sub initnewstatus { |
|
my $docdir=$perlvar{'lonDocRoot'}; |
|
my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt"); |
|
my $now=time; |
|
my $local=localtime($now); |
|
print $fh "LONC status $local - parent $$\n\n"; |
|
} |
|
|
|
# -------------------------------------------------------------- Status setting |
|
|
|
sub status { |
|
my $what=shift; |
|
my $now=time; |
|
my $local=localtime($now); |
|
$status=$local.': '.$what; |
|
$0='lonc: '.$what.' '.$local; |
|
} |
|
|
|
|
|
|
|
# ----------------------------------- POD (plain old documentation, CPAN style) |
|
|
|
=head1 NAME |
|
|
|
lonc - LON TCP-MySQL-Server Daemon for handling database requests. |
|
|
|
=head1 SYNOPSIS |
|
|
|
Usage: B<lonc> |
|
|
|
Should only be run as user=www. This is a command-line script which |
|
is invoked by B<loncron>. There is no expectation that a typical user |
|
will manually start B<lonc> from the command-line. (In other words, |
|
DO NOT START B<lonc> YOURSELF.) |
|
|
|
=head1 OVERVIEW |
|
|
|
=head2 Physical Overview |
|
|
|
=begin latex |
|
|
|
\begin{figure} |
|
\begin{center} |
|
\includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram} |
|
\end{center} |
|
\caption{\label{Overview_Of_Network}Overview of Network} |
|
\end{figure} |
|
|
|
=end latex |
|
|
|
Physically, the Network consists of relatively inexpensive |
|
upper-PC-class server machines which are linked through the commodity |
|
internet in a load-balancing, dynamically content-replicating and |
|
failover-secure way. |
|
|
|
All machines in the Network are connected with each other through |
|
two-way persistent TCP/IP connections. Clients (B<B>, B<F>, B<G> and |
|
B<H> in Fig. Overview of Network) connect to the servers via standard |
|
HTTP. There are two classes of servers, B<Library Servers> (B<A> and |
|
B<E> in Fig. Overview of Network) and B<Access Servers> (B<C>, B<D>, |
|
B<I> and B<J> in Fig. Overview of Network). |
|
|
|
B<Library Servers> X<library server> X<server, library> are used to |
|
store all personal records of a set of users, and are responsible for |
|
their initial authentication when a session is opened on any server in |
|
the Network. For Authors, Library Servers also hosts their |
|
construction area and the authoritative copy of the current and |
|
previous versions of every resource that was published by that |
|
author. Library servers can be used as backups to host sessions when |
|
all access servers in the Network are overloaded. Otherwise, for |
|
learners, access servers are used to host the sessions. Library |
|
servers need to have strong I/O capabilities. |
|
|
|
B<Access Servers> X<access server> X<server, access> provide LON-CAPA |
|
service to users, using the library servers as their data source. The |
|
network is designed so that the number of concurrent sessions can be |
|
increased over a wide range by simply adding additional access servers |
|
before having to add additional library servers. Preliminary tests |
|
showed that a library server could handle up to 10 access servers |
|
fully parallel. Access servers can generally be cheaper hardware then |
|
library servers require. |
|
|
|
The Network is divided into B<domains> X<domain>, which are logical |
|
boundaries between participating institutions. These domains can be |
|
used to limit the flow of personal user information across the |
|
network, set access privileges and enforce royalty schemes. LON-CAPA |
|
domains bear no relationship to any other domain, including domains |
|
used by the DNS system; LON-CAPA domains may be freely configured in |
|
any manner that suits your use pattern. |
|
|
|
=head2 Example Transactions |
|
|
|
Fig. Overview of Network also depicts examples for several kinds of |
|
transactions conducted across the Network. |
|
|
|
An instructor at client B<B> modifies and publishes a resource on her |
|
Home Server B<A>. Server B<A> has a record of all server machines |
|
currently subscribed to this resource, and replicates it to servers |
|
B<D> and B<I>. However, server B<D> is currently offline, so the |
|
update notification gets buffered on B<A> until B<D> comes online |
|
again. Servers B<C> and B<J> are currently not subscribed to this |
|
resource. |
|
|
|
Learners B<F> and B<G> have open sessions on server B<I>, and the new |
|
resource is immediately available to them. |
|
|
|
Learner B<H> tries to connect to server B<I> for a new session, |
|
however, the machine is not reachable, so he connects to another |
|
Access Server B<J> instead. This server currently does not have all |
|
necessary resources locally present to host learner B<H>, but |
|
subscribes to them and replicates them as they are accessed by B<H>. |
|
|
|
Learner B<H> solves a problem on server B<J>. Library Server B<E> is |
|
B<H>'s Home Server, so this information gets forwarded to B<E>, where |
|
the records of H are updated. |
|
|
|
=head2 lond, lonc, and lonnet |
|
|
|
=begin latex |
|
|
|
\begin{figure} |
|
\includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram2} |
|
\caption{\label{Overview_Of_Network_Communication}Overview of |
|
Network Communication} \end{figure} |
|
|
|
=end latex |
|
|
|
Fig. Overview of Network Communication elaborates on the details of |
|
this network infrastructure. It depicts three servers (B<A>, B<B> and |
|
B<C>) and a client who has a session on server B<C>. |
|
|
|
As B<C> accesses different resources in the system, different |
|
handlers, which are incorporated as modules into the child processes |
|
of the web server software, process these requests. |
|
|
|
Our current implementation uses C<mod_perl> inside of the Apache web |
|
server software. As an example, server B<C> currently has four active |
|
web server software child processes. The chain of handlers dealing |
|
with a certain resource is determined by both the server content |
|
resource area (see below) and the MIME type, which in turn is |
|
determined by the URL extension. For most URL structures, both an |
|
authentication handler and a content handler are registered. |
|
|
|
Handlers use a common library C<lonnet> X<lonnet> to interact with |
|
both locally present temporary session data and data across the server |
|
network. For example, lonnet provides routines for finding the home |
|
server of a user, finding the server with the lowest loadavg, sending |
|
simple command-reply sequences, and sending critical messages such as |
|
a homework completion, etc. For a non-critical message, the routines |
|
reply with a simple "connection lost" if the message could not be |
|
delivered. For critical messages, lonnet tries to re-establish |
|
connections, re-send the command, etc. If no valid reply could be |
|
received, it answers "connection deferred" and stores the message in |
|
buffer space to be sent at a later point in time. Also, failed |
|
critical messages are logged. |
|
|
|
The interface between C<lonnet> and the Network is established by a |
|
multiplexed UNIX domain socket, denoted B<DS> in Fig. Overview of |
|
Network Communication. The rationale behind this rather involved |
|
architecture is that httpd processes (Apache children) dynamically |
|
come and go on the timescale of minutes, based on workload and number |
|
of processed requests. Over the lifetime of an httpd child, however, |
|
it has to establish several hundred connections to several different |
|
servers in the Network. |
|
|
|
On the other hand, establishing a TCP/IP connection is resource |
|
consuming for both ends of the line, and to optimize this connectivity |
|
between different servers, connections in the Network are designed to |
|
be persistent on the timescale of months, until either end is |
|
rebooted. This mechanism will be elaborated on below. |
|
|
|
=begin latex |
|
|
|
\begin{figure} |
|
\begin{lyxcode} |
|
msul1:msu:library:zaphod.lite.msu.edu:35.8.63.51 |
|
|
|
msua1:msu:access:agrajag.lite.msu.edu:35.8.63.68 |
|
|
|
msul2:msu:library:frootmig.lite.msu.edu:35.8.63.69 |
|
|
|
msua2:msu:access:bistromath.lite.msu.edu:35.8.63.67 |
|
|
|
hubl14:hub:library:hubs128-pc-14.cl.msu.edu:35.8.116.34 |
|
|
|
hubl15:hub:library:hubs128-pc-15.cl.msu.edu:35.8.116.35 |
|
|
|
hubl16:hub:library:hubs128-pc-16.cl.msu.edu:35.8.116.36 |
|
|
|
huba20:hub:access:hubs128-pc-20.cl.msu.edu:35.8.116.40 |
|
|
|
huba21:hub:access:hubs128-pc-21.cl.msu.edu:35.8.116.41 |
|
|
|
huba22:hub:access:hubs128-pc-22.cl.msu.edu:35.8.116.42 |
|
|
|
huba23:hub:access:hubs128-pc-23.cl.msu.edu:35.8.116.43 |
|
|
|
hubl25:other:library:hubs128-pc-25.cl.msu.edu:35.8.116.45 |
|
|
|
huba27:other:access:hubs128-pc-27.cl.msu.edu:35.8.116.47 |
|
\end{lyxcode} |
|
|
|
\caption{\label{Example_Of_hosts.tab}Example of Hosts Lookup table\texttt{/home/httpd/lonTabs/hosts.tab}} |
|
\end{figure} |
|
|
|
=end latex |
|
|
|
Establishing a connection to a UNIX domain socket is far less resource |
|
consuming than the establishing of a TCP/IP connection. C<lonc> |
|
X<lonc> is a proxy daemon that forks off a child for every server in |
|
the Network. Which servers are members of the Network is determined by |
|
a lookup table, such as the one in Fig. Examples of Hosts. In order, |
|
the entries denote an internal name for the server, the domain of the |
|
server, the type of the server, the host name and the IP address. |
|
|
|
The C<lonc> parent process maintains the population and listens for |
|
signals to restart or shutdown, as well as I<USR1>. Every child |
|
establishes a multiplexed UNIX domain socket for its server and opens |
|
a TCP/IP connection to the lond daemon (discussed below) on the remote |
|
machine, which it keeps alive. If the connection is interrupted, the |
|
child dies, whereupon the parent makes several attempts to fork |
|
another child for that server. |
|
|
|
When starting a new child (a new connection), first an init-sequence |
|
is carried out, which includes receiving the information from the |
|
remote C<lond> which is needed to establish the 128-bit encryption key |
|
- the key is different for every connection. Next, any buffered |
|
(delayed) messages for the server are sent. |
|
|
|
In normal operation, the child listens to the UNIX socket, forwards |
|
requests to the TCP connection, gets the reply from C<lond>, and sends |
|
it back to the UNIX socket. Also, C<lonc> takes care to the encryption |
|
and decryption of messages. |
|
|
|
C<lond> X<lond> is the remote end of the TCP/IP connection and acts as |
|
a remote command processor. It receives commands, executes them, and |
|
sends replies. In normal operation, a C<lonc> child is constantly |
|
connected to a dedicated C<lond> child on the remote server, and the |
|
same is true vice versa (two persistent connections per server |
|
combination). |
|
|
|
lond listens to a TCP/IP port (denoted B<P> in Fig. Overview of |
|
Network Communication) and forks off enough child processes to have |
|
one for each other server in the network plus two spare children. The |
|
parent process maintains the population and listens for signals to |
|
restart or shutdown. Client servers are authenticated by IP. |
|
|
|
When a new client server comes online, C<lond> sends a signal I<USR1> |
|
to lonc, whereupon C<lonc> tries again to reestablish all lost |
|
connections, even if it had given up on them before - a new client |
|
connecting could mean that that machine came online again after an |
|
interruption. |
|
|
|
The gray boxes in Fig. Overview of Network Communication denote the |
|
entities involved in an example transaction of the Network. The Client |
|
is logged into server B<C>, while server B<B> is her Home |
|
Server. Server B<C> can be an access server or a library server, while |
|
server B<B> is a library server. She submits a solution to a homework |
|
problem, which is processed by the appropriate handler for the MIME |
|
type "problem". Through C<lonnet>, the handler writes information |
|
about this transaction to the local session data. To make a permanent |
|
log entry, C<lonnet> establishes a connection to the UNIX domain |
|
socket for server B<B>. C<lonc> receives this command, encrypts it, |
|
and sends it through the persistent TCP/IP connection to the TCP/IP |
|
port of the remote C<lond>. C<lond> decrypts the command, executes it |
|
by writing to the permanent user data files of the client, and sends |
|
back a reply regarding the success of the operation. If the operation |
|
was unsuccessful, or the connection would have broken down, C<lonc> |
|
would write the command into a FIFO buffer stack to be sent again |
|
later. C<lonc> now sends a reply regarding the overall success of the |
|
operation to C<lonnet> via the UNIX domain port, which is eventually |
|
received back by the handler. |
|
|
|
=head2 Dynamic Resource Replication |
|
|
|
Since resources are assembled into higher order resources simply by |
|
reference, in principle it would be sufficient to retrieve them from |
|
the respective Home Servers of the authors. However, there are several |
|
problems with this simple approach: since the resource assembly |
|
mechanism is designed to facilitate content assembly from a large |
|
number of widely distributed sources, individual sessions would depend |
|
on a large number of machines and network connections to be available, |
|
thus be rather fragile. Also, frequently accessed resources could |
|
potentially drive individual machines in the network into overload |
|
situations. |
|
|
|
Finally, since most resources depend on content handlers on the Access |
|
Servers to be served to a client within the session context, the raw |
|
source would first have to be transferred across the Network from the |
|
respective Library Server to the Access Server, processed there, and |
|
then transferred on to the client. |
|
|
|
=begin latex |
|
|
|
\begin{figure} |
|
\includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Request} |
|
\caption{\label{Dynamic_Replication}Dynamic Replication} |
|
\end{figure} |
|
|
|
=end latex |
|
|
|
To enable resource assembly in a reliable and scalable way, a dynamic |
|
resource replication scheme was developed. Fig. "Dynamic Replication" |
|
shows the details of this mechanism. |
|
|
|
Anytime a resource out of the resource space is requested, a handler |
|
routine is called which in turn calls the replication routine. As a |
|
first step, this routines determines whether or not the resource is |
|
currently in replication transfer (Step B<D1a>). During replication |
|
transfer, the incoming data is stored in a temporary file, and Step |
|
B<D1a> checks for the presence of that file. If transfer of a resource |
|
is actively going on, the controlling handler receives an error |
|
message, waits for a few seconds, and then calls the replication |
|
routine again. If the resource is still in transfer, the client will |
|
receive the message "Service currently not available". |
|
|
|
In the next step (Step B<D1b>), the replication routine checks if the |
|
URL is locally present. If it is, the replication routine returns OK |
|
to the controlling handler, which in turn passes the request on to the |
|
next handler in the chain. |
|
|
|
If the resource is not locally present, the Home Server of the |
|
resource author (as extracted from the URL) is determined (Step |
|
B<D2>). This is done by contacting all library servers in the author?s |
|
domain (as determined from the lookup table, see Fig. 1.1.2B). In Step |
|
B<D2b> a query is sent to the remote server whether or not it is the |
|
Home Server of the author (in our current implementation, an |
|
additional cache is used to store already identified Home Servers (not |
|
shown in the figure)). In Step B<D2c>, the remote server answers the |
|
query with True or False. If the Home Server was found, the routine |
|
continues, otherwise it contacts the next server (Step D2a). If no |
|
server could be found, a "File not Found" error message is issued. In |
|
our current implementation, in this step the Home Server is also |
|
written into a cache for faster access if resources by the same author |
|
are needed again (not shown in the figure). |
|
|
|
=begin latex |
|
|
|
\begin{figure} |
|
\includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Change} |
|
\caption{\label{Dynamic_Replication_Change}Dynamic Replication: Change} \end{figure} |
|
|
|
=end latex |
|
|
|
In Step B<D3a>, the routine sends a subscribe command for the URL to |
|
the Home Server of the author. The Home Server first determines if the |
|
resource is present, and if the access privileges allow it to be |
|
copied to the requesting server (B<D3b>). If this is true, the |
|
requesting server is added to the list of subscribed servers for that |
|
resource (Step B<D3c>). The Home Server will reply with either OK or |
|
an error message, which is determined in Step D4. If the remote |
|
resource was not present, the error message "File not Found" will be |
|
passed on to the client, if the access was not allowed, the error |
|
message "Access Denied" is passed on. If the operation succeeded, the |
|
requesting server sends an HTTP request for the resource out of the |
|
C</raw> server content resource area of the Home Server. |
|
|
|
The Home Server will then check if the requesting server is part of |
|
the network, and if it is subscribed to the resource (Step B<D5b>). If |
|
it is, it will send the resource via HTTP to the requesting server |
|
without any content handlers processing it (Step B<D5c>). The |
|
requesting server will store the incoming data in a temporary data |
|
file (Step B<D5a>) - this is the file that Step B<D1a> checks for. If |
|
the transfer could not complete, and appropriate error message is sent |
|
to the client (Step B<D6>). Otherwise, the transferred temporary file |
|
is renamed as the actual resource, and the replication routine returns |
|
OK to the controlling handler (Step B<D7>). |
|
|
|
Fig. "Dynamic Replication: Change" depicts the process of modifying a |
|
resource. When an author publishes a new version of a resource, the |
|
Home Server will contact every server currently subscribed to the |
|
resource (Step B<U1>), as determined from the list of subscribed |
|
servers for the resource generated in Step B<D3c>. The subscribing |
|
servers will receive and acknowledge the update message (Step |
|
B<U1c>). The update mechanism finishes when the last subscribed server |
|
has been contacted (messages to unreachable servers are buffered). |
|
|
|
Each subscribing server will check if the resource in question had |
|
been accessed recently, that is, within a configurable amount of time |
|
(Step B<U2>). |
|
|
|
If the resource had not been accessed recently, the local copy of the |
|
resource is deleted (Step B<U3a>) and an unsubscribe command is sent |
|
to the Home Server (Step B<U3b>). The Home Server will check if the |
|
server had indeed originally subscribed to the resource (Step B<U3c>) |
|
and then delete the server from the list of subscribed servers for the |
|
resource (Step B<U3d>). |
|
|
|
If the resource had been accessed recently, the modified resource will |
|
be copied over using the same mechanism as in Step B<D5a> through |
|
B<D7>, which represents steps Steps B<U4a> through B<U6> in the |
|
replication figure. |
|
|
|
=head2 Load Balancing |
|
|
|
X<load balancing>C<lond> provides a function to query the server's current loadavg. As |
|
a configuration parameter, one can determine the value of loadavg, |
|
which is to be considered 100%, for example, 2.00. |
|
|
|
Access servers can have a list of spare access servers, |
|
C</home/httpd/lonTabs/spares.tab>, to offload sessions depending on |
|
own workload. This check happens is done by the login handler. It |
|
re-directs the login information and session to the least busy spare |
|
server if itself is overloaded. An additional round-robin IP scheme |
|
possible. See Fig. "Load Balancing Sample" for an example of a |
|
load-balancing scheme. |
|
|
|
=begin latex |
|
|
|
\begin{figure} |
|
\includegraphics[width=0.75\paperwidth,keepaspectratio]{Load_Balancing_Example} |
|
\caption{\label{Load_Balancing_Example}Load Balancing Example} \end{figure} |
|
|
|
=end latex |
|
|
|
=head1 DESCRIPTION |
|
|
|
Provides persistent TCP connections to the other servers in the network |
|
through multiplexed domain sockets |
|
|
|
B<lonc> forks off children processes that correspond to the other servers |
|
in the network. Management of these processes can be done at the |
|
parent process level or the child process level. |
|
|
|
After forking off the children, B<lonc> the B<parent> executes a main |
|
loop which simply waits for processes to exit. As a process exits, a |
|
new process managing a link to the same peer as the exiting process is |
|
created. |
|
|
|
B<logs/lonc.log> is the location of log messages. |
|
|
|
The process management is now explained in terms of linux shell commands, |
|
subroutines internal to this code, and signal assignments: |
|
|
|
=over 4 |
|
|
|
=item * |
|
|
|
PID is stored in B<logs/lonc.pid> |
|
|
|
This is the process id number of the parent B<lonc> process. |
|
|
|
=item * |
|
|
|
SIGTERM and SIGINT |
|
|
|
Parent signal assignment: |
|
$SIG{INT} = $SIG{TERM} = \&HUNTSMAN; |
|
|
|
Child signal assignment: |
|
$SIG{INT} = 'DEFAULT'; (and SIGTERM is DEFAULT also) |
|
(The child dies and a SIGALRM is sent to parent, awaking parent from slumber |
|
to restart a new child.) |
|
|
|
Command-line invocations: |
|
B<kill> B<-s> SIGTERM I<PID> |
|
B<kill> B<-s> SIGINT I<PID> |
|
|
|
Subroutine B<HUNTSMAN>: |
|
This is only invoked for the B<lonc> parent I<PID>. |
|
This kills all the children, and then the parent. |
|
The B<lonc.pid> file is cleared. |
|
|
|
=item * |
|
|
|
SIGHUP |
|
|
|
Current bug: |
|
This signal can only be processed the first time |
|
on the parent process. Subsequent SIGHUP signals |
|
have no effect. |
|
|
|
Parent signal assignment: |
|
$SIG{HUP} = \&HUPSMAN; |
|
|
|
Child signal assignment: |
|
none (nothing happens) |
|
|
|
Command-line invocations: |
|
B<kill> B<-s> SIGHUP I<PID> |
|
|
|
Subroutine B<HUPSMAN>: |
|
This is only invoked for the B<lonc> parent I<PID>, |
|
This kills all the children, and then the parent. |
|
The B<lonc.pid> file is cleared. |
|
|
|
=item * |
|
|
|
SIGUSR1 |
|
|
|
Parent signal assignment: |
|
$SIG{USR1} = \&USRMAN; |
|
|
|
Child signal assignment: |
|
$SIG{USR1}= \&logstatus; |
|
|
|
Command-line invocations: |
|
B<kill> B<-s> SIGUSR1 I<PID> |
|
|
|
Subroutine B<USRMAN>: |
|
When invoked for the B<lonc> parent I<PID>, |
|
SIGUSR1 is sent to all the children, and the status of |
|
each connection is logged. |
|
|
|
|
|
=back |
|
|
|
=cut |