loncom/lonc - diff

Return to lonc CVS log

Up to [LON-CAPA] / loncom

Diff for /loncom/Attic/lonc between versions 1.14 and 1.52

-version 1.14, 2001/03/13 21:15:40
+version 1.52, 2003/07/25 01:16:29
  Line 5
  # provides persistent TCP connections to the other servers in the network
  # through multiplexed domain sockets
  #
+ # $Id$
+ #
+ # Copyright Michigan State University Board of Trustees
+ #
+ # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
+ #
+ # LON-CAPA is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or
+ # (at your option) any later version.
+ #
+ # LON-CAPA is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with LON-CAPA; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ #
+ # /home/httpd/html/adm/gpl.txt
+ #
+ # http://www.lon-capa.org/
+ #
  # PID in subdir logs/lonc.pid
  # kill kills
  # HUP restarts
- Line 13
+ Line 37
  # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19,
  # 10/8,10/9,10/15,11/18,12/22,
  # 2/8,7/25 Gerd Kortemeyer
- # 12/05 Scott Harrison
  # 12/05 Gerd Kortemeyer
- # 01/10/01 Scott Harrison
+ # YEAR=2001
- # 03/14/01 Gerd Kortemeyer
+ # 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer
- #
+ # YEAR=2002
+ # 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer
+ # 3/07/02 Ron Fox
  # based on nonforker from Perl Cookbook
  # - server who multiplexes without forking
+ use lib '/home/httpd/lib/perl/';
+ use LONCAPA::Configuration;
  use POSIX;
  use IO::Socket;
  use IO::Select;
- Line 29  use Socket;
+ Line 57  use Socket;
  use Fcntl;
  use Tie::RefHash;
  use Crypt::IDEA;
+ #use Net::Ping;
+ use LWP::UserAgent();
- # grabs exception and records it to log before exiting
+ $status='';
- sub catchexception {
+ $lastlog='';
-     my ($signal)=@_;
+ $conserver='SHELL';
-     $SIG{'QUIT'}='DEFAULT';
+ $DEBUG = 0;			# Set to 1 for annoyingly complete logs.
-     $SIG{__DIE__}='DEFAULT';
+ $VERSION='$Revison$'; #' stupid emacs
-     &logthis("<font color=red>CRITICAL: "
+ $remoteVERSION;
-      ."ABNORMAL EXIT. Child $$ for server $wasserver died through "
-      ."\"$signal\" with this parameter->[$@]</font>");
-     die($@);
- }
- $childmaxattempts=10;
  # -------------------------------- Set signal handlers to record abnormal exits
- $SIG{'QUIT'}=\&catchexception;
+ &status("Init exception handlers");
+ $SIG{QUIT}=\&catchexception;
  $SIG{__DIE__}=\&catchexception;
- # ------------------------------------ Read httpd access.conf and get variables
+ # ---------------------------------- Read loncapa_apache.conf and loncapa.conf
+ &status("Read loncapa.conf and loncapa_apache.conf");
- open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";
+ my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
+ my %perlvar=%{$perlvarref};
- while ($configline=<CONFIG>) {
+ undef $perlvarref;
-     if ($configline =~ /PerlSetVar/) {
- 	my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
-         chomp($varvalue);
-         $perlvar{$varname}=$varvalue;
-     }
- }
- close(CONFIG);
  # ----------------------------- Make sure this process is running from user=www
+ &status("Check user ID");
  my $wwwid=getpwnam('www');
  if ($wwwid!=$<) {
     $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
- Line 89  open (CONFIG,"$perlvar{'lonTabDir'}/host
+ Line 107  open (CONFIG,"$perlvar{'lonTabDir'}/host
  while ($configline=<CONFIG>) {
      my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
      chomp($ip);
-     $hostip{$id}=$ip;
+     if ($ip) {
+      $hostip{$id}=$ip;
+      $hostname{$id}=$name;
+     }
  }
  close(CONFIG);
  # -------------------------------------------------------- Routines for forking
- Line 102  close(CONFIG);
+ Line 124  close(CONFIG);
  %childatt               = ();       # number of attempts to start server
                                      # for ID
- sub REAPER {                        # takes care of dead children
+ $childmaxattempts=5;
-     $SIG{CHLD} = \&REAPER;
-     my $pid = wait;
-     my $wasserver=$children{$pid};
-     &logthis("<font color=red>CRITICAL: "
-      ."Child $pid for server $wasserver died ($childatt{$wasserver})</font>");
-     delete $children{$pid};
-     delete $childpid{$wasserver};
-     my $port = "$perlvar{'lonSockDir'}/$wasserver";
-     unlink($port);
- }
- sub HUNTSMAN {                      # signal handler for SIGINT
-     local($SIG{CHLD}) = 'IGNORE';   # we're going to kill our children
-     kill 'INT' => keys %children;
-     my $execdir=$perlvar{'lonDaemons'};
-     unlink("$execdir/logs/lonc.pid");
-     &logthis("<font color=red>CRITICAL: Shutting down</font>");
-     exit;                           # clean up with dignity
- }
- sub HUPSMAN {                      # signal handler for SIGHUP
-     local($SIG{CHLD}) = 'IGNORE';  # we're going to kill our children
-     kill 'INT' => keys %children;
-     &logthis("<font color=red>CRITICAL: Restarting</font>");
-     unlink("$execdir/logs/lonc.pid");
-     my $execdir=$perlvar{'lonDaemons'};
-     exec("$execdir/lonc");         # here we go again
- }
- sub USRMAN {
-     &logthis("USR1: Trying to establish connections again");
-     foreach $thisserver (keys %hostip) {
- 	$answer=subreply("ping",$thisserver);
-         &logthis("USR1: Ping $thisserver "
-         ."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): "
-         ." >$answer<");
-     }
-     %childatt=();
- }
- # -------------------------------------------------- Non-critical communication
- sub subreply {
-  my ($cmd,$server)=@_;
-  my $answer='';
-  if ($server ne $perlvar{'lonHostID'}) {
-     my $peerfile="$perlvar{'lonSockDir'}/$server";
-     my $sclient=IO::Socket::UNIX->new(Peer    =>"$peerfile",
-                                       Type    => SOCK_STREAM,
-                                       Timeout => 10)
-        or return "con_lost";
-     print $sclient "$cmd\n";
-     my $answer=<$sclient>;
-     chomp($answer);
-     if (!$answer) { $answer="con_lost"; }
-  } else { $answer='self_reply'; }
-  return $answer;
- }
- # --------------------------------------------------------------------- Logging
- sub logthis {
-     my $message=shift;
-     my $execdir=$perlvar{'lonDaemons'};
-     my $fh=IO::File->new(">>$execdir/logs/lonc.log");
-     my $now=time;
-     my $local=localtime($now);
-     print $fh "$local ($$): $message\n";
- }
- sub logperm {
-     my $message=shift;
-     my $execdir=$perlvar{'lonDaemons'};
-     my $now=time;
-     my $local=localtime($now);
-     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
-     print $fh "$now:$message:$local\n";
- }
  # ---------------------------------------------------- Fork once and dissociate
+ &status("Fork and dissociate");
  $fpid=fork;
  exit if $fpid;
  die "Couldn't fork: $!" unless defined ($fpid);
  POSIX::setsid() or die "Can't start new session: $!";
- # ------------------------------------------------------- Write our PID on disk
+ $conserver='PARENT';
+ # ------------------------------------------------------- Write our PID on disk
+ &status("Write PID");
  $execdir=$perlvar{'lonDaemons'};
  open (PIDSAVE,">$execdir/logs/lonc.pid");
  print PIDSAVE "$$\n";
- Line 204  $SIG{HUP}=$SIG{USR1}='IGNORE';
+ Line 150  $SIG{HUP}=$SIG{USR1}='IGNORE';
  # Fork off our children, one for every server
+ &status("Forking ...");
  foreach $thisserver (keys %hostip) {
-     make_new_child($thisserver);
+     #if (&online($hostname{$thisserver})) {
+        make_new_child($thisserver);
+     #}
  }
  &logthis("Done starting initial servers");
  # ----------------------------------------------------- Install signal handlers
- $SIG{CHLD} = \&REAPER;
  $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
  $SIG{HUP}  = \&HUPSMAN;
  $SIG{USR1} = \&USRMAN;
  # And maintain the population.
  while (1) {
-     sleep;                          # wait for a signal (i.e., child's death)
+     my $deadpid = wait;		# Wait for the next child to die.
-                                     # See who died and start new one
+                                 # See who died and start new one
-     foreach $thisserver (keys %hostip) {
+                                 # or a signal (e.g. USR1 for restart).
-         if (!$childpid{$thisserver}) {
+                                 # if a signal, the wait will fail
- 	    if ($childatt{$thisserver}<=$childmaxattempts) {
+                                 # This is ordinarily detected by
- 	       $childatt{$thisserver}++;
+                                 # checking for the existence of the
-                &logthis(
+                                 # pid index inthe children hash since
-    "<font color=yellow>INFO: Trying to reconnect for $thisserver "
+                                 # the return value from a failed wait is -1
-   ."($childatt{$thisserver} of $childmaxattempts attempts)</font>");
+                                 # which is an impossible PID.
-                make_new_child($thisserver);
+     &status("Woke up");
- 	    }
+     my $skipping='';
-         }
+     if(exists($children{$deadpid})) {
+ 	$thisserver = $children{$deadpid}; # Look name of dead guy's peer.
+ 	delete($children{$deadpid}); # Get rid of dead hash entry.
+ 	if($childatt{$thisserver} < $childmaxattempts) {
+ 	    $childatt{$thisserver}++;
+ 	    &logthis(
+ 	       "<font color=yellow>INFO: Trying to reconnect for $thisserver "
+             ."($childatt{$thisserver} of $childmaxattempts attempts)</font>");
+ 	    make_new_child($thisserver);
+ 	}
+ 	else {
+ 	    $skipping .= $thisserver.' ';
+ 	}
+ 	if($skipping) {
+ 	    &logthis("<font color=blue>WARNING: Skipped $skipping</font>");
+ 	}
      }
  }
  sub make_new_child {
-     my $conserver=shift;
+     $newserver=shift;
      my $pid;
      my $sigset;
-     &logthis("Attempting to start child for server $conserver");
+     &logthis("Attempting to start child for server $newserver");
      # block signal for fork
      $sigset = POSIX::SigSet->new(SIGINT);
      sigprocmask(SIG_BLOCK, $sigset)
- Line 251  sub make_new_child {
+ Line 224  sub make_new_child {
          # Parent records the child's birth and returns.
          sigprocmask(SIG_UNBLOCK, $sigset)
              or die "Can't unblock SIGINT for fork: $!\n";
-         $children{$pid} = $conserver;
+         $children{$pid} = $newserver;
-         $childpid{$conserver} = $pid;
+         $childpid{$newserver} = $pid;
          return;
      } else {
+         $conserver=$newserver;
          # Child can *not* return from this subroutine.
          $SIG{INT} = 'DEFAULT';      # make SIGINT kill us as it did before
+         $SIG{USR1}= \&logstatus;
          # unblock signals
          sigprocmask(SIG_UNBLOCK, $sigset)
              or die "Can't unblock SIGINT for fork: $!\n";
- Line 267  sub make_new_child {
+ Line 242  sub make_new_child {
  $port = "$perlvar{'lonSockDir'}/$conserver";
  unlink($port);
- # ---------------------------------------------------- Client to network server
- unless (
-   $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
-                                       PeerPort => $perlvar{'londPort'},
-                                       Proto    => "tcp",
-                                       Type     => SOCK_STREAM)
-    ) {
-        my $st=120+int(rand(240));
-        &logthis(
- "<font color=blue>WARNING: Couldn't connect $conserver ($st secs): $@</font>");
-        sleep($st);
-        exit;
-      };
- # --------------------------------------- Send a ping to make other end do USR1
- print $remotesock "init\n";
- $answer=<$remotesock>;
- print $remotesock "$answer";
- $answer=<$remotesock>;
- chomp($answer);
- &logthis("Init reply for $conserver: >$answer<");
- sleep 5;
- print $remotesock "pong\n";
- $answer=<$remotesock>;
- chomp($answer);
- &logthis("Pong reply for $conserver: >$answer<");
- # ----------------------------------------------------------- Initialize cipher
- print $remotesock "ekey\n";
+ # -------------------------------------------------------------- Open other end
- my $buildkey=<$remotesock>;
- my $key=$conserver.$perlvar{'lonHostID'};
- $key=~tr/a-z/A-Z/;
- $key=~tr/G-P/0-9/;
- $key=~tr/Q-Z/0-9/;
- $key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
- $key=substr($key,0,32);
- my $cipherkey=pack("H32",$key);
- if ($cipher=new IDEA $cipherkey) {
-    &logthis("Secure connection initialized: $conserver");
- } else {
-    my $st=120+int(rand(240));
-    &logthis(
-      "<font color=blue>WARNING: ".
-      "Could not establish secure connection, $conserver ($st secs)!</font>");
-    sleep($st);
-    exit;
- }
+ &openremote($conserver);
+ 	&logthis("<font color=green> Connection to $conserver open </font>");
  # ----------------------------------------- We're online, send delayed messages
+     &status("Checking for delayed messages");
      my @allbuffered;
      my $path="$perlvar{'lonSockDir'}/delayed";
- Line 322  if ($cipher=new IDEA $cipherkey) {
+ Line 256  if ($cipher=new IDEA $cipherkey) {
      @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE;
      closedir(DIRHANDLE);
      my $dfname;
-     map {
+     foreach (sort @allbuffered) {
+         &status("Sending delayed: $_");
          $dfname="$path/$_";
-         &logthis($dfname);
+         if($DEBUG) { &logthis('Sending '.$dfname); }
          my $wcmd;
          {
           my $dfh=IO::File->new($dfname);
- Line 345  if ($cipher=new IDEA $cipherkey) {
+ Line 280  if ($cipher=new IDEA $cipherkey) {
              }
              $cmd="enc:$cmdlength:$encrequest\n";
          }
+ 	$answer = londtransaction($remotesock, $cmd, 60);
-         print $remotesock "$cmd\n";
-         $answer=<$remotesock>;
  	chomp($answer);
-         if ($answer ne '') {
+         if (($answer ne '') && ($@!~/timeout/)) {
  	    unlink("$dfname");
-             &logthis("Delayed $cmd to $conserver: >$answer<");
+             &logthis("Delayed $cmd: >$answer<");
              &logperm("S:$conserver:$bcmd");
          }
-     } @allbuffered;
+     }
+ 	if($DEBUG) { &logthis("<font color=green> Delayed transactions sent"); }
  # ------------------------------------------------------- Listen to UNIX socket
+ &status("Opening socket");
  unless (
    $server = IO::Socket::UNIX->new(Local  => $port,
                                    Type   => SOCK_STREAM,
- Line 365  unless (
+ Line 301  unless (
         my $st=120+int(rand(240));
         &logthis(
           "<font color=blue>WARNING: ".
-          "Can't make server socket $conserver ($st secs): $@</font>");
+          "Can't make server socket ($st secs):  .. exiting</font>");
         sleep($st);
         exit;
       };
  # -----------------------------------------------------------------------------
  &logthis("<font color=green>$conserver online</font>");
- Line 379  unless (
+ Line 315  unless (
  %inbuffer  = ();
  %outbuffer = ();
  %ready     = ();
+ %servers   = ();	# To be compatible with make filevector.  indexed by
+ 			# File ids, values are sockets.
+ 			# note that the accept socket is omitted.
  tie %ready, 'Tie::RefHash';
- nonblock($server);
+ # nonblock($server);
- $select = IO::Select->new($server);
+ # $select = IO::Select->new($server);
  # Main loop: check reads/accepts, check writes, check ready to process
+ status("Main loop $conserver");
  while (1) {
      my $client;
      my $rv;
      my $data;
-     # check for new information on the connections we have
+     my $infdset;		# bit vec of fd's to select on input.
-     # anything to read or accept?
+     my $outfdset;		# Bit vec of fd's to select on output.
-     foreach $client ($select->can_read(1)) {
-         if ($client == $server) {
-             # accept a new connection
-             $client = $server->accept();
+     $infdset = MakeFileVector(\%servers);
-             $select->add($client);
+     $outfdset= MakeFileVector(\%outbuffer);
-             nonblock($client);
+     vec($infdset, $server->fileno, 1) = 1;
-         } else {
+     if($DEBUG) {
-             # read data
+ 	&logthis("Adding ".$server->fileno.
-             $data = '';
+ 		 " to input select vector (listner)".
-             $rv   = $client->recv($data, POSIX::BUFSIZ, 0);
+ 		 unpack("b*",$infdset)."\n");
+     }
-             unless (defined($rv) && length $data) {
+     DoSelect(\$infdset, \$outfdset); # Wait for input.
-                 # This would be the end of file, so close the client
+     if($DEBUG) {
-                 delete $inbuffer{$client};
+ 	&logthis("Doselect completed!");
-                 delete $outbuffer{$client};
+ 	&logthis("ins = ".unpack("b*",$infdset)."\n");
-                 delete $ready{$client};
+ 	&logthis("outs= ".unpack("b*",$outfdset)."\n");
-                 $select->remove($client);
+     }
-                 close $client;
-                 next;
-             }
-             $inbuffer{$client} .= $data;
+     # Checkfor new connections:
+     if (vec($infdset, $server->fileno, 1)) {
+ 	if($DEBUG) {
+ 	    &logthis("New connection established");
+ 	}
+ 	# accept a new connection
+ 	&status("Accept new connection: $conserver");
+ 	$client = $server->accept();
+ 	if($DEBUG) {
+ 	    &logthis("New client fd = ".$client->fileno."\n");
+ 	}
+ 	$servers{$client->fileno} = $client;
+ 	nonblock($client);
+ 	$client->sockopt(SO_KEEPALIVE, 1);# Enable monitoring of
+ 	                                  # connection liveness.
+     }
+     HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready);
+     HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer,
+ 		 \%ready);
+ # -------------------------------------------------------- Wow, connection lost
-             # test whether the data in the buffer or the data we
+ }
-             # just read means there is a complete request waiting
-             # to be fulfilled.  If there is, set $ready{$client}
-             # to the requests waiting to be fulfilled.
-             while ($inbuffer{$client} =~ s/(.*\n)//) {
-                 push( @{$ready{$client}}, $1 );
-             }
-         }
      }
+ }
-     # Any complete requests to process?
+ # ------------------------------------------------------- End of make_new_child
-     foreach $client (keys %ready) {
-         handle($client);
+ #
+ #  Make a vector of file descriptors to wait for in a select.
+ #  parameters:
+ #     \%fdhash  -reference to a hash which has IO::Socket's as indices.
+ #                We only care about the indices, not the values.
+ #  A select vector is created from all indices of the hash.
+ sub MakeFileVector
+ {
+     my $fdhash = shift;
+     my $selvar = "";
+     foreach $socket (keys %$fdhash) {
+ 	if($DEBUG) {
+ 	    &logthis("Adding  ".$socket.
+ 		     "to select vector. (client)\n");
+ 	}
+ 	vec($selvar, $socket, 1) = 1;
      }
+     return $selvar;
+ }
-     # Buffers to flush?
-     foreach $client ($select->can_write(1)) {
+ #
-         # Skip this client if we have nothing to say
+ #  HandleOutput:
-         next unless exists $outbuffer{$client};
+ #    Processes output on a buffered set of file descriptors which are
+ #    ready to be read.
-         $rv = $client->send($outbuffer{$client}, 0);
+ #  Parameters:
-         unless (defined $rv) {
+ #    $selvector - Vector of file descriptors which are writable.
-             # Whine, but move on.
+ #    \%sockets  - Vector of socket references indexed by socket.
-             warn "I was told I could write, but I can't.\n";
+ #    \%buffers  - Reference to a hash containing output buffers.
-             next;
+ #                 Hashes are indexed by sockets.  The file descriptors of some
-         }
+ #                 of those sockets will be present in $selvector.
-         if (($rv == length $outbuffer{$client}) ||
+ #                 For each one of those, we will attempt to write the output
-             ($! == POSIX::EWOULDBLOCK)) {
+ #                 buffer to the socket.  Note that we will assume that
-             substr($outbuffer{$client}, 0, $rv) = '';
+ #                 the sockets are being run in non blocking mode.
-             delete $outbuffer{$client} unless length $outbuffer{$client};
+ #   \%inbufs    - Reference to hash containing input buffers.
-         } else {
+ #   \%readys    - Reference to hash containing flags for items with complete
-             # Couldn't write all the data, and it wasn't because
+ #                 requests.
-             # it would have blocked.  Shutdown and move on.
+ #
-             delete $inbuffer{$client};
+ sub HandleOutput
-             delete $outbuffer{$client};
+ {
-             delete $ready{$client};
+     my $selvector = shift;
+     my $sockets   = shift;
-             $select->remove($client);
+     my $buffers   = shift;
-             close($client);
+     my $inbufs    = shift;
-             next;
+     my $readys    = shift;
-         }
+     my $sock;
+     if($DEBUG) {
+ 	&logthis("HandleOutput entered\n");
+     }
+     foreach $sock (keys %$sockets) {
+ 	my $socket = $sockets->{$sock};
+ 	if(vec($selvector, $sock, 1)) { # $socket is writable.
+ 	    if($DEBUG) {
+ 		&logthis("Sending $buffers->{$sock} \n");
+ 	    }
+ 	    my $rv = $socket->send($buffers->{$sock}, 0);
+ 	    $errno = $!;
+ 	    unless ($buffers->{$sock} eq "con_lost\n") {
+ 		unless (defined $rv) { # Write failed... could be EINTR
+ 		    unless ($errno == POSIX::EINTR) {
+ 			&logthis("Write failed on writable socket");
+ 		    }		# EINTR is not an error .. just retry.
+ 		    next;
+ 		}
+ 		if( ($rv == length $buffers->{$sock})    ||
+ 		    ($errno == POSIX::EWOULDBLOCK)       ||
+ 		    ($errno == POSIX::EAGAIN)            || # same as above.
+ 		    ($errno == POSIX::EINTR)             || # signal during IO
+ 		    ($errno == 0)) {
+ 		    substr($buffers->{$sock}, 0, $rv)=""; # delete written part
+ 		    delete $buffers->{$sock} unless length $buffers->{$sock};
+ 		} else {
+ 		    # For some reason the write failed with an error code
+ 		    # we didn't look for.  Shutdown the socket.
+ 		    &logthis("Unable to write data with ".$errno.": ".
+ 			     "Dropping data: ".length($buffers->{$sock}).
+ 			     ", $rv");
+ 		    #
+ 		    # kill off the buffers in the hash:
+ 		    delete $buffers->{$sock};
+ 		    delete $inbufs->{$sock};
+ 		    delete $readys->{$sock};
+ 		    close($socket); # Close the client socket.
+ 		    next;
+ 		}
+ 	    } else {		# Kludgy way to mark lond connection lost.
+ 		&logthis(
+ 		 "<font color=red>CRITICAL lond connection lost</font>");
+ 		status("Connection lost");
+ 		$remotesock->shutdown(2);
+ 		&logthis("Attempting to open a new connection");
+ 		&openremote($conserver);
+ 	    }
+ 	}
      }
  }
+ #
+ #   HandleInput - Deals with input on client sockets.
+ #                 Each socket has an associated input buffer.
+ #                 For each readable socket, the currently available
+ #                 data is appended to this buffer.
+ #                 If necessary, the buffer is created.
+ #                 On various failures, we may shutdown the client.
+ #  Parameters:
+ #     $selvec   - Vector of readable sockets.
+ #     \%sockets - Refers to the  Hash of sockets indexed by sockets.
+ #                 Each of these may or may not have it's fd bit set
+ #                 in the $selvec.
+ #     \%ibufs   - Refers to the hash of input buffers indexed by socket.
+ #     \%obufs   - Hash of output buffers indexed by socket.
+ #     \%ready   - Hash of ready flags indicating the existence of a completed
+ #                 Request.
+ sub HandleInput
+ {
+     # Marshall the parameters.   Note that the hashes are actually
+     # references not values.
+     my $selvec  = shift;
+     my $sockets = shift;
+     my $ibufs   = shift;
+     my $obufs   = shift;
+     my $ready   = shift;
+     my $sock;
+     if($DEBUG) {
+ 	&logthis("Entered HandleInput\n");
+     }
+     foreach $sock (keys %$sockets) {
+ 	my $socket = $sockets->{$sock};
+ 	if(vec($selvec, $sock, 1)) { # Socket which is readable.
+ 	    #  Attempt to read the data and do error management.
+ 	    my $data = '';
+ 	    my $rv = $socket->recv($data, POSIX::BUFSIZ, 0);
+ 	    if($DEBUG) {
+ 		&logthis("Received $data from socket");
+ 	    }
+ 	    unless (defined($rv) && length $data) {
+ 		# Read an end of file.. this is a disconnect from the peer.
+ 		delete $sockets->{$sock};
+ 		delete $ibufs->{$sock};
+ 		delete $obufs->{$sock};
+ 		delete $ready->{$sock};
+ 		status("Idle");
+ 		close $socket;
+ 		next;
+ 	    }
+ 	    #  Append the read data to the input buffer. If the buffer
+ 	    # now contains a \n the request is complete and we can
+ 	    # mark this in the $ready hash (one request for each \n.)
+ 	    $ibufs->{$sock} .= $data;
+ 	    while($ibufs->{$sock} =~ s/(.*\n)//) {
+ 		push(@{$ready->{$sock}}, $1);
+ 	    }
+ 	}
+     }
+     #  Now handle any requests which are ready:
+     foreach $client (keys %ready) {
+ 	handle($client);
+     }
  }
- # ------------------------------------------------------- End of make_new_child
+ # DoSelect:  does a select with no timeout.  On signal (errno == EINTR),
+ #            the select is retried until there are items in the returned
+ #            vectors.
+ #
+ # Parameters:
+ #   \$readvec   - Reference to a vector of file descriptors to
+ #                 check for readability.
+ #   \$writevec  - Reference to a vector of file descriptors to check for
+ #                 writability.
+ #  On exit, the referents are modified with vectors indicating which
+ #  file handles are readable/writable.
+ #
+ sub DoSelect {
+     my $readvec = shift;
+     my $writevec= shift;
+     my $outs;
+     my $ins;
+     while (1) {
+ 	my $nfds = select( $ins = $$readvec, $outs = $$writevec, undef, undef);
+ 	if($nfds) {
+ 	    if($DEBUG) {
+ 		&logthis("select exited with ".$nfds." fds\n");
+ 		&logthis("ins = ".unpack("b*",$ins).
+ 			 " readvec = ".unpack("b*",$$readvec)."\n");
+ 		&logthis("outs = ".unpack("b*",$outs).
+ 			 " writevec = ".unpack("b*",$$writevec)."\n");
+ 	    }
+ 	    $$readvec  = $ins;
+ 	    $$writevec = $outs;
+ 	    return;
+ 	} else {
+ 	    if($DEBUG) {
+ 		&logthis("Select exited with no bits set in mask\n");
+ 	    }
+ 	    die "Select failed" unless $! == EINTR;
+ 	}
+     }
+ }
  # handle($socket) deals with all pending requests for $client
+ #
  sub handle {
      # requests are in $ready{$client}
      # send output to $outbuffer{$client}
      my $client = shift;
      my $request;
      foreach $request (@{$ready{$client}}) {
  # ============================================================= Process request
          # $request is the text of the request
          # put text of reply into $outbuffer{$client}
+ # ------------------------------------------------------------ Is this the end?
+ 	chomp($request);
+ 	if($DEBUG) {
+      &logthis("<font color=green> Request $request processing starts</font>");
+         }
+         if ($request eq "close_connection_exit\n") {
+ 	    &status("Request close connection");
+            &logthis(
+      "<font color=red>CRITICAL: Request Close Connection ... exiting</font>");
+            $remotesock->shutdown(2);
+            $server->close();
+            exit;
+         }
  # -----------------------------------------------------------------------------
          if ($request =~ /^encrypt\:/) {
  	    my $cmd=$request;
- Line 490  sub handle {
+ Line 635  sub handle {
                  $encrequest.=
                      unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8)));
              }
-             $request="enc:$cmdlength:$encrequest\n";
+             $request="enc:$cmdlength:$encrequest";
          }
-         print $remotesock "$request";
+ # --------------------------------------------------------------- Main exchange
-         $answer=<$remotesock>;
+ 	$answer = londtransaction($remotesock, $request, 300);
+ 	if($DEBUG) {
+ 	    &logthis("<font color=green> Request data exchange complete");
+ 	}
+ 	if ($@=~/timeout/) {
+ 	    $answer='';
+ 	    &logthis(
+ 		     "<font color=red>CRITICAL: Timeout: $request</font>");
+ 	}
          if ($answer) {
  	   if ($answer =~ /^enc/) {
                 my ($cmd,$cmdlength,$encinput)=split(/:/,$answer);
- Line 507  sub handle {
+ Line 663  sub handle {
  	      $answer=substr($answer,0,$cmdlength);
  	      $answer.="\n";
  	   }
+ 	   if($DEBUG) {
+ 	       &logthis("sending $answer to client\n");
+ 	   }
             $outbuffer{$client} .= $answer;
          } else {
             $outbuffer{$client} .= "con_lost\n";
          }
+      &status("Completed: $request");
+ 	if($DEBUG) {
+ 	    &logthis("<font color=green> Request processing complete</font>");
+ 	}
  # ===================================================== Done processing request
      }
      delete $ready{$client};
  # -------------------------------------------------------------- End non-forker
+     if($DEBUG) {
+ 	&logthis("<font color=green> requests for child handled</font>");
+     }
  }
  # ---------------------------------------------------------- End make_new_child
- }
  # nonblock($socket) puts socket into nonblocking mode
  sub nonblock {
- Line 532  sub nonblock {
+ Line 697  sub nonblock {
              or die "Can't make socket nonblocking: $!\n";
  }
+ sub openremote {
+ # ---------------------------------------------------- Client to network server
+     my $conserver=shift;
+     &status("Opening TCP $conserver");
+     my $st=120+int(rand(240)); # Sleep before opening:
+     unless (
+ 	    $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
+ 						PeerPort => $perlvar{'londPort'},
+ 						Proto    => "tcp",
+ 						Type     => SOCK_STREAM)
+ 	   ) {
+ 	&logthis(
+ 		 "<font color=blue>WARNING: Couldn't connect to $conserver ($st secs): </font>");
+ 	sleep($st);
+ 	exit;
+     };
+ # ----------------------------------------------------------------- Init dialog
+     &logthis("<font color=green>INFO Connected to $conserver, initing</font>");
+     &status("Init dialogue: $conserver");
+     $answer = londtransaction($remotesock, "init", 60);
+     chomp($answer);
+     $answer = londtransaction($remotesock, $answer, 60);
+     chomp($answer);
+     if ($@=~/timeout/) {
+ 	&logthis("Timed out during init.. exiting");
+ 	exit;
+     }
+     if ($answer ne 'ok') {
+ 	&logthis("Init reply: >$answer<");
+ 	my $st=120+int(rand(240));
+ 	&logthis("<font color=blue>WARNING: Init failed ($st secs)</font>");
+ 	sleep($st);
+ 	exit;
+     }
+     $answer = londtransaction($remotesock,"sethost:$conserver",60);
+     chomp($answer);
+     if ( $answer ne 'ok') {
+ 	&logthis('<font color="blue">WARNING: unable to specify remote host'.
+ 		 $answer.'</font>');
+     }
+     $answer = londtransaction($remotesock,"version:$VERSION",60);
+     chomp($answer);
+     if ($answer =~ /^version:/) {
+ 	$remoteVERSION=(split(/:/,$answer))[1];
+     } else {
+ 	&logthis('<font color="blue">WARNING: request remote version failed :'.
+ 		 $answer.': my version is :'.$VERSION.':</font>');
+     }
+     sleep 5;
+     &status("Ponging $conserver");
+     print $remotesock "pong\n";
+     $answer=<$remotesock>;
+     chomp($answer);
+     if ($answer!~/^$conserver/) {
+ 	&logthis("Pong reply: >$answer<");
+     }
+ # ----------------------------------------------------------- Initialize cipher
+     &status("Initialize cipher");
+     print $remotesock "ekey\n";
+     my $buildkey=<$remotesock>;
+     my $key=$conserver.$perlvar{'lonHostID'};
+     $key=~tr/a-z/A-Z/;
+     $key=~tr/G-P/0-9/;
+     $key=~tr/Q-Z/0-9/;
+     $key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
+     $key=substr($key,0,32);
+     my $cipherkey=pack("H32",$key);
+     if ($cipher=new IDEA $cipherkey) {
+ 	&logthis("Secure connection initialized");
+     } else {
+ 	my $st=120+int(rand(240));
+ 	&logthis("<font color=blue>WARNING: ".
+ 		 "Could not establish secure connection ($st secs)!</font>");
+ 	sleep($st);
+ 	exit;
+     }
+     &logthis("<font color=green> Remote open success </font>");
+ }
+ # grabs exception and records it to log before exiting
+ sub catchexception {
+     my ($signal)=@_;
+     $SIG{QUIT}='DEFAULT';
+     $SIG{__DIE__}='DEFAULT';
+     chomp($signal);
+     &logthis("<font color=red>CRITICAL: "
+      ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through "
+      ."\"$signal\" with parameter </font>");
+     die("Signal abend");
+ }
+ # -------------------------------------- Routines to see if other box available
+ #sub online {
+ #    my $host=shift;
+ #    &status("Pinging ".$host);
+ #    my $p=Net::Ping->new("tcp",20);
+ #    my $online=$p->ping("$host");
+ #    $p->close();
+ #    undef ($p);
+ #    return $online;
+ #}
+ sub connected {
+     my ($local,$remote)=@_;
+     &status("Checking connection $local to $remote");
+     $local=~s/\W//g;
+     $remote=~s/\W//g;
+     unless ($hostname{$local}) { return 'local_unknown'; }
+     unless ($hostname{$remote}) { return 'remote_unknown'; }
+     #unless (&online($hostname{$local})) { return 'local_offline'; }
+     my $ua=new LWP::UserAgent;
+     my $request=new HTTP::Request('GET',
+       "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote);
+     my $response=$ua->request($request);
+     unless ($response->is_success) { return 'local_error'; }
+     my $reply=$response->content;
+     $reply=(split("\n",$reply))[0];
+     $reply=~s/\W//g;
+     if ($reply ne $remote) { return $reply; }
+     return 'ok';
+ }
+ sub hangup {
+     foreach (keys %children) {
+         $wasserver=$children{$_};
+         &status("Closing $wasserver");
+         &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
+         &status("Kill PID $_ for $wasserver");
+ 	kill ('INT',$_);
+     }
+ }
+ sub HUNTSMAN {                      # signal handler for SIGINT
+     local($SIG{CHLD}) = 'IGNORE';   # we're going to kill our children
+     &hangup();
+     my $execdir=$perlvar{'lonDaemons'};
+     unlink("$execdir/logs/lonc.pid");
+     &logthis("<font color=red>CRITICAL: Shutting down</font>");
+     exit;                           # clean up with dignity
+ }
+ sub HUPSMAN {                      # signal handler for SIGHUP
+     local($SIG{CHLD}) = 'IGNORE';  # we're going to kill our children
+     &hangup();
+     &logthis("<font color=red>CRITICAL: Restarting</font>");
+     my $execdir=$perlvar{'lonDaemons'};
+     unlink("$execdir/logs/lonc.pid");
+     exec("$execdir/lonc");         # here we go again
+ }
+ sub checkchildren {
+     &initnewstatus();
+     &logstatus();
+     &logthis('Going to check on the children');
+     foreach (sort keys %children) {
+ 	sleep 1;
+         unless (kill 'USR1' => $_) {
+ 	    &logthis ('<font color=red>CRITICAL: Child '.$_.' is dead</font>');
+             &logstatus($$.' is dead');
+         }
+     }
+ }
+ sub USRMAN {
+     &logthis("USR1: Trying to establish connections again");
+     #
+     #  It is really important not to just clear the childatt hash or we will
+     #  lose all memory of the children.  What we really want to do is this:
+     #  For each index where childatt is >= $childmaxattempts
+     #  Zero the associated counter and do a make_child for the host.
+     #  Regardles, the childatt entry is zeroed:
+     my $host;
+     foreach $host (keys %childatt) {
+ 	if ($childatt{$host} >= $childmaxattempts) {
+ 	    $childatt{$host} = 0;
+ 	    &logthis("<font color=green>INFO: Restarting child for server: "
+ 		     .$host."</font>\n");
+ 	    make_new_child($host);
+ 	}
+ 	else {
+ 	    $childatt{$host} = 0;
+ 	}
+     }
+     &checkchildren();		# See if any children are still dead...
+ }
+ # -------------------------------------------------- Non-critical communication
+ sub subreply {
+  my ($cmd,$server)=@_;
+  my $answer='';
+  if ($server ne $perlvar{'lonHostID'}) {
+     my $peerfile="$perlvar{'lonSockDir'}/$server";
+     my $sclient=IO::Socket::UNIX->new(Peer    =>"$peerfile",
+                                       Type    => SOCK_STREAM,
+                                       Timeout => 10)
+        or return "con_lost";
+     $answer = londtransaction($sclient, $cmd, 10);
+     if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
+     $SIG{ALRM}='DEFAULT';
+     $SIG{__DIE__}=\&catchexception;
+  } else { $answer='self_reply'; }
+  return $answer;
+ }
+ # --------------------------------------------------------------------- Logging
+ sub logthis {
+     my $message=shift;
+     my $execdir=$perlvar{'lonDaemons'};
+     my $fh=IO::File->new(">>$execdir/logs/lonc.log");
+     my $now=time;
+     my $local=localtime($now);
+     $lastlog=$local.': '.$message;
+     print $fh "$local ($$) [$conserver] [$status]: $message\n";
+ }
+ #--------------------------------------  londtransaction:
+ #
+ #  Performs a transaction with lond with timeout support.
+ #    result = londtransaction(socket,request,timeout)
+ #
+ sub londtransaction {
+     my ($socket, $request, $tmo) = @_;
+     if($DEBUG) {
+ 	&logthis("londtransaction request: $request");
+     }
+     # Set the signal handlers: ALRM for timeout and disble the others.
+     $SIG{ALRM} = sub { die "timeout" };
+     $SIG{__DIE__} = 'DEFAULT';
+     # Disable all but alarm so that only that can interupt the
+     # send /receive.
+     #
+     my $sigset = POSIX::SigSet->new(QUIT, USR1, HUP, INT, TERM);
+     my $priorsigs = POSIX::SigSet->new;
+     unless (defined sigprocmask(SIG_BLOCK, $sigset, $priorsigs)) {
+ 	&logthis("<font color=red> CRITICAL -- londtransaction ".
+ 		"failed to block signals </font>");
+ 	die "could not block signals in londtransaction";
+     }
+     $answer = '';
+     #
+     #  Send request to lond.
+     #
+     eval {
+ 	alarm($tmo);
+ 	print $socket "$request\n";
+ 	alarm(0);
+     };
+     #  If request didn't timeout, try for the response.
+     #
+     if ($@!~/timeout/) {
+ 	eval {
+ 	    alarm($tmo);
+ 	    $answer = <$socket>;
+ 	    if($DEBUG) {
+ 		&logthis("Received $answer in londtransaction");
+ 	    }
+ 	    alarm(0);
+ 	};
+     } else {
+ 	&logthis("lonc - suiciding on send Timeout");
+ 	die("lonc - suiciding on send Timeout");
+     }
+     if ($@ =~ /timeout/) {
+ 	&logthis("lonc - suiciding on read Timeout");
+ 	die("lonc - suiciding on read Timeout");
+     }
+     #
+     # Restore the initial sigmask set.
+     #
+     unless (defined sigprocmask(SIG_UNBLOCK, $priorsigs)) {
+ 	&logthis("<font color=red> CRITICAL -- londtransaction ".
+ 		"failed to re-enable signal processing. </font>");
+ 	die "londtransaction failed to re-enable signals";
+     }
+     #
+     # go back to the prior handler set.
+     #
+     $SIG{ALRM} = 'DEFAULT';
+     $SIG{__DIE__} = \&cathcexception;
+     #    chomp $answer;
+     if ($DEBUG) {
+ 	&logthis("Returning $answer in londtransaction");
+     }
+     return $answer;
+ }
+ sub logperm {
+     my $message=shift;
+     my $execdir=$perlvar{'lonDaemons'};
+     my $now=time;
+     my $local=localtime($now);
+     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
+     print $fh "$now:$message:$local\n";
+ }
+ # ------------------------------------------------------------------ Log status
+ sub logstatus {
+     my $docdir=$perlvar{'lonDocRoot'};
+     my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
+     print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n";
+ }
+ sub initnewstatus {
+     my $docdir=$perlvar{'lonDocRoot'};
+     my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
+     my $now=time;
+     my $local=localtime($now);
+     print $fh "LONC status $local - parent $$\n\n";
+ }
+ # -------------------------------------------------------------- Status setting
+ sub status {
+     my $what=shift;
+     my $now=time;
+     my $local=localtime($now);
+     $status=$local.': '.$what;
+     $0='lonc: '.$what.' '.$local;
+ }
+ # ----------------------------------- POD (plain old documentation, CPAN style)
+ =head1 NAME
+ lonc - LON TCP-MySQL-Server Daemon for handling database requests.
+ =head1 SYNOPSIS
+ Usage: B<lonc>
+ Should only be run as user=www.  This is a command-line script which
+ is invoked by B<loncron>.  There is no expectation that a typical user
+ will manually start B<lonc> from the command-line.  (In other words,
+ DO NOT START B<lonc> YOURSELF.)
+ =head1 OVERVIEW
+ =head2 Physical Overview
+ =begin latex
+ \begin{figure}
+   \begin{center}
+     \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram}
+   \end{center}
+   \caption{\label{Overview_Of_Network}Overview of Network}
+ \end{figure}
+ =end latex
+ Physically, the Network consists of relatively inexpensive
+ upper-PC-class server machines which are linked through the commodity
+ internet in a load-balancing, dynamically content-replicating and
+ failover-secure way.
+ All machines in the Network are connected with each other through
+ two-way persistent TCP/IP connections. Clients (B<B>, B<F>, B<G> and
+ B<H> in Fig. Overview of Network) connect to the servers via standard
+ HTTP. There are two classes of servers, B<Library Servers> (B<A> and
+ B<E> in Fig. Overview of Network) and B<Access Servers> (B<C>, B<D>,
+ B<I> and B<J> in Fig. Overview of Network).
+ B<Library Servers> X<library server> X<server, library> are used to
+ store all personal records of a set of users, and are responsible for
+ their initial authentication when a session is opened on any server in
+ the Network. For Authors, Library Servers also hosts their
+ construction area and the authoritative copy of the current and
+ previous versions of every resource that was published by that
+ author. Library servers can be used as backups to host sessions when
+ all access servers in the Network are overloaded. Otherwise, for
+ learners, access servers are used to host the sessions. Library
+ servers need to have strong I/O capabilities.
+ B<Access Servers> X<access server> X<server, access> provide LON-CAPA
+ service to users, using the library servers as their data source. The
+ network is designed so that the number of concurrent sessions can be
+ increased over a wide range by simply adding additional access servers
+ before having to add additional library servers. Preliminary tests
+ showed that a library server could handle up to 10 access servers
+ fully parallel. Access servers can generally be cheaper hardware then
+ library servers require.
+ The Network is divided into B<domains> X<domain>, which are logical
+ boundaries between participating institutions. These domains can be
+ used to limit the flow of personal user information across the
+ network, set access privileges and enforce royalty schemes. LON-CAPA
+ domains bear no relationship to any other domain, including domains
+ used by the DNS system; LON-CAPA domains may be freely configured in
+ any manner that suits your use pattern.
+ =head2 Example Transactions
+ Fig. Overview of Network also depicts examples for several kinds of
+ transactions conducted across the Network.
+ An instructor at client B<B> modifies and publishes a resource on her
+ Home Server B<A>. Server B<A> has a record of all server machines
+ currently subscribed to this resource, and replicates it to servers
+ B<D> and B<I>. However, server B<D> is currently offline, so the
+ update notification gets buffered on B<A> until B<D> comes online
+ again. Servers B<C> and B<J> are currently not subscribed to this
+ resource.
+ Learners B<F> and B<G> have open sessions on server B<I>, and the new
+ resource is immediately available to them.
+ Learner B<H> tries to connect to server B<I> for a new session,
+ however, the machine is not reachable, so he connects to another
+ Access Server B<J> instead. This server currently does not have all
+ necessary resources locally present to host learner B<H>, but
+ subscribes to them and replicates them as they are accessed by B<H>.
+ Learner B<H> solves a problem on server B<J>. Library Server B<E> is
+ B<H>'s Home Server, so this information gets forwarded to B<E>, where
+ the records of H are updated.
+ =head2 lond, lonc, and lonnet
+ =begin latex
+ \begin{figure}
+ \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram2}
+   \caption{\label{Overview_Of_Network_Communication}Overview of
+ Network Communication} \end{figure}
+ =end latex
+ Fig. Overview of Network Communication elaborates on the details of
+ this network infrastructure. It depicts three servers (B<A>, B<B> and
+ B<C>) and a client who has a session on server B<C>.
+ As B<C> accesses different resources in the system, different
+ handlers, which are incorporated as modules into the child processes
+ of the web server software, process these requests.
+ Our current implementation uses C<mod_perl> inside of the Apache web
+ server software. As an example, server B<C> currently has four active
+ web server software child processes. The chain of handlers dealing
+ with a certain resource is determined by both the server content
+ resource area (see below) and the MIME type, which in turn is
+ determined by the URL extension. For most URL structures, both an
+ authentication handler and a content handler are registered.
+ Handlers use a common library C<lonnet> X<lonnet> to interact with
+ both locally present temporary session data and data across the server
+ network. For example, lonnet provides routines for finding the home
+ server of a user, finding the server with the lowest loadavg, sending
+ simple command-reply sequences, and sending critical messages such as
+ a homework completion, etc. For a non-critical message, the routines
+ reply with a simple "connection lost" if the message could not be
+ delivered. For critical messages, lonnet tries to re-establish
+ connections, re-send the command, etc. If no valid reply could be
+ received, it answers "connection deferred" and stores the message in
+ buffer space to be sent at a later point in time. Also, failed
+ critical messages are logged.
+ The interface between C<lonnet> and the Network is established by a
+ multiplexed UNIX domain socket, denoted B<DS> in Fig. Overview of
+ Network Communication. The rationale behind this rather involved
+ architecture is that httpd processes (Apache children) dynamically
+ come and go on the timescale of minutes, based on workload and number
+ of processed requests. Over the lifetime of an httpd child, however,
+ it has to establish several hundred connections to several different
+ servers in the Network.
+ On the other hand, establishing a TCP/IP connection is resource
+ consuming for both ends of the line, and to optimize this connectivity
+ between different servers, connections in the Network are designed to
+ be persistent on the timescale of months, until either end is
+ rebooted. This mechanism will be elaborated on below.
+ =begin latex
+ \begin{figure}
+ \begin{lyxcode}
+ msul1:msu:library:zaphod.lite.msu.edu:35.8.63.51
+ msua1:msu:access:agrajag.lite.msu.edu:35.8.63.68
+ msul2:msu:library:frootmig.lite.msu.edu:35.8.63.69
+ msua2:msu:access:bistromath.lite.msu.edu:35.8.63.67
+ hubl14:hub:library:hubs128-pc-14.cl.msu.edu:35.8.116.34
+ hubl15:hub:library:hubs128-pc-15.cl.msu.edu:35.8.116.35
+ hubl16:hub:library:hubs128-pc-16.cl.msu.edu:35.8.116.36
+ huba20:hub:access:hubs128-pc-20.cl.msu.edu:35.8.116.40
+ huba21:hub:access:hubs128-pc-21.cl.msu.edu:35.8.116.41
+ huba22:hub:access:hubs128-pc-22.cl.msu.edu:35.8.116.42
+ huba23:hub:access:hubs128-pc-23.cl.msu.edu:35.8.116.43
+ hubl25:other:library:hubs128-pc-25.cl.msu.edu:35.8.116.45
+ huba27:other:access:hubs128-pc-27.cl.msu.edu:35.8.116.47
+ \end{lyxcode}
+ \caption{\label{Example_Of_hosts.tab}Example of Hosts Lookup table\texttt{/home/httpd/lonTabs/hosts.tab}}
+ \end{figure}
+ =end latex
+ Establishing a connection to a UNIX domain socket is far less resource
+ consuming than the establishing of a TCP/IP connection. C<lonc>
+ X<lonc> is a proxy daemon that forks off a child for every server in
+ the Network. Which servers are members of the Network is determined by
+ a lookup table, such as the one in Fig. Examples of Hosts. In order,
+ the entries denote an internal name for the server, the domain of the
+ server, the type of the server, the host name and the IP address.
+ The C<lonc> parent process maintains the population and listens for
+ signals to restart or shutdown, as well as I<USR1>. Every child
+ establishes a multiplexed UNIX domain socket for its server and opens
+ a TCP/IP connection to the lond daemon (discussed below) on the remote
+ machine, which it keeps alive. If the connection is interrupted, the
+ child dies, whereupon the parent makes several attempts to fork
+ another child for that server.
+ When starting a new child (a new connection), first an init-sequence
+ is carried out, which includes receiving the information from the
+ remote C<lond> which is needed to establish the 128-bit encryption key
+ - the key is different for every connection. Next, any buffered
+ (delayed) messages for the server are sent.
+ In normal operation, the child listens to the UNIX socket, forwards
+ requests to the TCP connection, gets the reply from C<lond>, and sends
+ it back to the UNIX socket. Also, C<lonc> takes care to the encryption
+ and decryption of messages.
+ C<lond> X<lond> is the remote end of the TCP/IP connection and acts as
+ a remote command processor. It receives commands, executes them, and
+ sends replies. In normal operation, a C<lonc> child is constantly
+ connected to a dedicated C<lond> child on the remote server, and the
+ same is true vice versa (two persistent connections per server
+ combination).
+ lond listens to a TCP/IP port (denoted B<P> in Fig. Overview of
+ Network Communication) and forks off enough child processes to have
+ one for each other server in the network plus two spare children. The
+ parent process maintains the population and listens for signals to
+ restart or shutdown. Client servers are authenticated by IP.
+ When a new client server comes online, C<lond> sends a signal I<USR1>
+ to lonc, whereupon C<lonc> tries again to reestablish all lost
+ connections, even if it had given up on them before - a new client
+ connecting could mean that that machine came online again after an
+ interruption.
+ The gray boxes in Fig. Overview of Network Communication denote the
+ entities involved in an example transaction of the Network. The Client
+ is logged into server B<C>, while server B<B> is her Home
+ Server. Server B<C> can be an access server or a library server, while
+ server B<B> is a library server. She submits a solution to a homework
+ problem, which is processed by the appropriate handler for the MIME
+ type "problem". Through C<lonnet>, the handler writes information
+ about this transaction to the local session data. To make a permanent
+ log entry, C<lonnet> establishes a connection to the UNIX domain
+ socket for server B<B>. C<lonc> receives this command, encrypts it,
+ and sends it through the persistent TCP/IP connection to the TCP/IP
+ port of the remote C<lond>. C<lond> decrypts the command, executes it
+ by writing to the permanent user data files of the client, and sends
+ back a reply regarding the success of the operation. If the operation
+ was unsuccessful, or the connection would have broken down, C<lonc>
+ would write the command into a FIFO buffer stack to be sent again
+ later. C<lonc> now sends a reply regarding the overall success of the
+ operation to C<lonnet> via the UNIX domain port, which is eventually
+ received back by the handler.
+ =head2 Dynamic Resource Replication
+ Since resources are assembled into higher order resources simply by
+ reference, in principle it would be sufficient to retrieve them from
+ the respective Home Servers of the authors. However, there are several
+ problems with this simple approach: since the resource assembly
+ mechanism is designed to facilitate content assembly from a large
+ number of widely distributed sources, individual sessions would depend
+ on a large number of machines and network connections to be available,
+ thus be rather fragile. Also, frequently accessed resources could
+ potentially drive individual machines in the network into overload
+ situations.
+ Finally, since most resources depend on content handlers on the Access
+ Servers to be served to a client within the session context, the raw
+ source would first have to be transferred across the Network from the
+ respective Library Server to the Access Server, processed there, and
+ then transferred on to the client.
+ =begin latex
+ \begin{figure}
+ \includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Request}
+   \caption{\label{Dynamic_Replication}Dynamic Replication}
+ \end{figure}
+ =end latex
+ To enable resource assembly in a reliable and scalable way, a dynamic
+ resource replication scheme was developed. Fig. "Dynamic Replication"
+ shows the details of this mechanism.
+ Anytime a resource out of the resource space is requested, a handler
+ routine is called which in turn calls the replication routine. As a
+ first step, this routines determines whether or not the resource is
+ currently in replication transfer (Step B<D1a>). During replication
+ transfer, the incoming data is stored in a temporary file, and Step
+ B<D1a> checks for the presence of that file. If transfer of a resource
+ is actively going on, the controlling handler receives an error
+ message, waits for a few seconds, and then calls the replication
+ routine again. If the resource is still in transfer, the client will
+ receive the message "Service currently not available".
+ In the next step (Step B<D1b>), the replication routine checks if the
+ URL is locally present. If it is, the replication routine returns OK
+ to the controlling handler, which in turn passes the request on to the
+ next handler in the chain.
+ If the resource is not locally present, the Home Server of the
+ resource author (as extracted from the URL) is determined (Step
+ B<D2>). This is done by contacting all library servers in the author?s
+ domain (as determined from the lookup table, see Fig. 1.1.2B). In Step
+ B<D2b> a query is sent to the remote server whether or not it is the
+ Home Server of the author (in our current implementation, an
+ additional cache is used to store already identified Home Servers (not
+ shown in the figure)). In Step B<D2c>, the remote server answers the
+ query with True or False. If the Home Server was found, the routine
+ continues, otherwise it contacts the next server (Step D2a). If no
+ server could be found, a "File not Found" error message is issued. In
+ our current implementation, in this step the Home Server is also
+ written into a cache for faster access if resources by the same author
+ are needed again (not shown in the figure).
+ =begin latex
+ \begin{figure}
+ \includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Change}
+   \caption{\label{Dynamic_Replication_Change}Dynamic Replication: Change} \end{figure}
+ =end latex
+ In Step B<D3a>, the routine sends a subscribe command for the URL to
+ the Home Server of the author. The Home Server first determines if the
+ resource is present, and if the access privileges allow it to be
+ copied to the requesting server (B<D3b>). If this is true, the
+ requesting server is added to the list of subscribed servers for that
+ resource (Step B<D3c>). The Home Server will reply with either OK or
+ an error message, which is determined in Step D4. If the remote
+ resource was not present, the error message "File not Found" will be
+ passed on to the client, if the access was not allowed, the error
+ message "Access Denied" is passed on. If the operation succeeded, the
+ requesting server sends an HTTP request for the resource out of the
+ C</raw> server content resource area of the Home Server.
+ The Home Server will then check if the requesting server is part of
+ the network, and if it is subscribed to the resource (Step B<D5b>). If
+ it is, it will send the resource via HTTP to the requesting server
+ without any content handlers processing it (Step B<D5c>). The
+ requesting server will store the incoming data in a temporary data
+ file (Step B<D5a>) - this is the file that Step B<D1a> checks for. If
+ the transfer could not complete, and appropriate error message is sent
+ to the client (Step B<D6>). Otherwise, the transferred temporary file
+ is renamed as the actual resource, and the replication routine returns
+ OK to the controlling handler (Step B<D7>).
+ Fig. "Dynamic Replication: Change" depicts the process of modifying a
+ resource. When an author publishes a new version of a resource, the
+ Home Server will contact every server currently subscribed to the
+ resource (Step B<U1>), as determined from the list of subscribed
+ servers for the resource generated in Step B<D3c>. The subscribing
+ servers will receive and acknowledge the update message (Step
+ B<U1c>). The update mechanism finishes when the last subscribed server
+ has been contacted (messages to unreachable servers are buffered).
+ Each subscribing server will check if the resource in question had
+ been accessed recently, that is, within a configurable amount of time
+ (Step B<U2>).
+ If the resource had not been accessed recently, the local copy of the
+ resource is deleted (Step B<U3a>) and an unsubscribe command is sent
+ to the Home Server (Step B<U3b>). The Home Server will check if the
+ server had indeed originally subscribed to the resource (Step B<U3c>)
+ and then delete the server from the list of subscribed servers for the
+ resource (Step B<U3d>).
+ If the resource had been accessed recently, the modified resource will
+ be copied over using the same mechanism as in Step B<D5a> through
+ B<D7>, which represents steps Steps B<U4a> through B<U6> in the
+ replication figure.
+ =head2 Load Balancing
+ X<load balancing>C<lond> provides a function to query the server's current loadavg. As
+ a configuration parameter, one can determine the value of loadavg,
+ which is to be considered 100%, for example, 2.00.
+ Access servers can have a list of spare access servers,
+ C</home/httpd/lonTabs/spares.tab>, to offload sessions depending on
+ own workload. This check happens is done by the login handler. It
+ re-directs the login information and session to the least busy spare
+ server if itself is overloaded. An additional round-robin IP scheme
+ possible. See Fig. "Load Balancing Sample" for an example of a
+ load-balancing scheme.
+ =begin latex
+ \begin{figure}
+ \includegraphics[width=0.75\paperwidth,keepaspectratio]{Load_Balancing_Example}
+   \caption{\label{Load_Balancing_Example}Load Balancing Example} \end{figure}
+ =end latex
+ =head1 DESCRIPTION
+ Provides persistent TCP connections to the other servers in the network
+ through multiplexed domain sockets
+ B<lonc> forks off children processes that correspond to the other servers
+ in the network.  Management of these processes can be done at the
+ parent process level or the child process level.
+ After forking off the children, B<lonc> the B<parent> executes a main
+ loop which simply waits for processes to exit.  As a process exits, a
+ new process managing a link to the same peer as the exiting process is
+ created.
+ B<logs/lonc.log> is the location of log messages.
+ The process management is now explained in terms of linux shell commands,
+ subroutines internal to this code, and signal assignments:
+ =over 4
+ =item *
+ PID is stored in B<logs/lonc.pid>
+ This is the process id number of the parent B<lonc> process.
+ =item *
+ SIGTERM and SIGINT
+ Parent signal assignment:
+  $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
+ Child signal assignment:
+  $SIG{INT}  = 'DEFAULT'; (and SIGTERM is DEFAULT also)
+ (The child dies and a SIGALRM is sent to parent, awaking parent from slumber
+  to restart a new child.)
+ Command-line invocations:
+  B<kill> B<-s> SIGTERM I<PID>
+  B<kill> B<-s> SIGINT I<PID>
+ Subroutine B<HUNTSMAN>:
+  This is only invoked for the B<lonc> parent I<PID>.
+ This kills all the children, and then the parent.
+ The B<lonc.pid> file is cleared.
+ =item *
+ SIGHUP
+ Current bug:
+  This signal can only be processed the first time
+ on the parent process.  Subsequent SIGHUP signals
+ have no effect.
+ Parent signal assignment:
+  $SIG{HUP}  = \&HUPSMAN;
+ Child signal assignment:
+  none (nothing happens)
+ Command-line invocations:
+  B<kill> B<-s> SIGHUP I<PID>
+ Subroutine B<HUPSMAN>:
+  This is only invoked for the B<lonc> parent I<PID>,
+ This kills all the children, and then the parent.
+ The B<lonc.pid> file is cleared.
+ =item *
+ SIGUSR1
+ Parent signal assignment:
+  $SIG{USR1} = \&USRMAN;
+ Child signal assignment:
+  $SIG{USR1}= \&logstatus;
+ Command-line invocations:
+  B<kill> B<-s> SIGUSR1 I<PID>
+ Subroutine B<USRMAN>:
+  When invoked for the B<lonc> parent I<PID>,
+ SIGUSR1 is sent to all the children, and the status of
+ each connection is logged.
+ =back
+ =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.14
changed lines
	Added in v.1.52