--- loncom/Attic/lonc 2002/03/27 04:07:02 1.36 +++ loncom/Attic/lonc 2003/09/17 19:05:03 1.55 @@ -5,7 +5,7 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # -# $Id: lonc,v 1.36 2002/03/27 04:07:02 foxr Exp $ +# $Id: lonc,v 1.55 2003/09/17 19:05:03 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -37,18 +37,18 @@ # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19, # 10/8,10/9,10/15,11/18,12/22, # 2/8,7/25 Gerd Kortemeyer -# 12/05 Scott Harrison # 12/05 Gerd Kortemeyer # YEAR=2001 -# 01/10/01 Scott Harrison # 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer -# 12/20 Scott Harrison # YEAR=2002 # 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer # 3/07/02 Ron Fox # based on nonforker from Perl Cookbook # - server who multiplexes without forking +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration; + use POSIX; use IO::Socket; use IO::Select; @@ -64,25 +64,19 @@ $status=''; $lastlog=''; $conserver='SHELL'; $DEBUG = 0; # Set to 1 for annoyingly complete logs. - +$VERSION='$Revison$'; #' stupid emacs +$remoteVERSION; # -------------------------------- Set signal handlers to record abnormal exits &status("Init exception handlers"); $SIG{QUIT}=\&catchexception; $SIG{__DIE__}=\&catchexception; -# ------------------------------------ Read httpd access.conf and get variables -&status("Read access.conf"); -open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; - -while ($configline=) { - if ($configline =~ /PerlSetVar/) { - my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); - chomp($varvalue); - $perlvar{$varname}=$varvalue; - } -} -close(CONFIG); +# ---------------------------------- Read loncapa_apache.conf and loncapa.conf +&status("Read loncapa.conf and loncapa_apache.conf"); +my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); +my %perlvar=%{$perlvarref}; +undef $perlvarref; # ----------------------------- Make sure this process is running from user=www &status("Check user ID"); @@ -130,7 +124,7 @@ close(CONFIG); %childatt = (); # number of attempts to start server # for ID -$childmaxattempts=5; +$childmaxattempts=15; # ---------------------------------------------------- Fork once and dissociate &status("Fork and dissociate"); @@ -175,7 +169,14 @@ $SIG{USR1} = \&USRMAN; # And maintain the population. while (1) { my $deadpid = wait; # Wait for the next child to die. - # See who died and start new one + # See who died and start new one + # or a signal (e.g. USR1 for restart). + # if a signal, the wait will fail + # This is ordinarily detected by + # checking for the existence of the + # pid index inthe children hash since + # the return value from a failed wait is -1 + # which is an impossible PID. &status("Woke up"); my $skipping=''; @@ -255,7 +256,7 @@ unlink($port); @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE; closedir(DIRHANDLE); my $dfname; - foreach (@allbuffered) { + foreach (sort @allbuffered) { &status("Sending delayed: $_"); $dfname="$path/$_"; if($DEBUG) { &logthis('Sending '.$dfname); } @@ -315,120 +316,71 @@ unless ( %outbuffer = (); %ready = (); %servers = (); # To be compatible with make filevector. indexed by - # File descriptors, values are file descriptors. + # File ids, values are sockets. # note that the accept socket is omitted. tie %ready, 'Tie::RefHash'; -nonblock($server); -$select = IO::Select->new($server); +# nonblock($server); +# $select = IO::Select->new($server); # Main loop: check reads/accepts, check writes, check ready to process + +status("Main loop $conserver"); while (1) { my $client; my $rv; my $data; my $infdset; # bit vec of fd's to select on input. - my $inreadyset; # Bit vec of fd's ready for input. my $outfdset; # Bit vec of fd's to select on output. - my $outreadyset; # bit vec of fds ready for output. $infdset = MakeFileVector(\%servers); $outfdset= MakeFileVector(\%outbuffer); - - # check for new information on the connections we have - # anything to read or accept? - - foreach $client ($select->can_read(00.10)) { - if ($client == $server) { - # accept a new connection - &status("Accept new connection: $conserver"); - $client = $server->accept(); - $select->add($client); - nonblock($client); - } else { - # read data - $data = ''; - $rv = $client->recv($data, POSIX::BUFSIZ, 0); - - unless (defined($rv) && length $data) { - # This would be the end of file, so close the client - delete $inbuffer{$client}; - delete $outbuffer{$client}; - delete $ready{$client}; - - &status("Idle"); - $select->remove($client); - close $client; - next; - } - - $inbuffer{$client} .= $data; - - - # test whether the data in the buffer or the data we - # just read means there is a complete request waiting - # to be fulfilled. If there is, set $ready{$client} - # to the requests waiting to be fulfilled. - while ($inbuffer{$client} =~ s/(.*\n)//) { - push( @{$ready{$client}}, $1 ); - } - } + vec($infdset, $server->fileno, 1) = 1; + if($DEBUG) { + &logthis("Adding ".$server->fileno. + " to input select vector (listner)". + unpack("b*",$infdset)."\n"); } - - # Any complete requests to process? - foreach $client (keys %ready) { - handle($client); + DoSelect(\$infdset, \$outfdset); # Wait for input. + if($DEBUG) { + &logthis("Doselect completed!"); + &logthis("ins = ".unpack("b*",$infdset)."\n"); + &logthis("outs= ".unpack("b*",$outfdset)."\n"); + } - - # Buffers to flush? - foreach $client ($select->can_write(1)) { - # Skip this client if we have nothing to say - next unless exists $outbuffer{$client}; - - $rv = $client->send($outbuffer{$client}, 0); - - unless ($outbuffer{$client} eq "con_lost\n") { - unless (defined $rv) { - # Whine, but move on. - &logthis("I was told I could write, but I can't.\n"); - next; - } - $errno=$!; - if (($rv == length $outbuffer{$client}) || - ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) { - substr($outbuffer{$client}, 0, $rv) = ''; - delete $outbuffer{$client} unless length $outbuffer{$client}; - } else { - # Couldn't write all the data, and it wasn't because - # it would have blocked. Shutdown and move on. - &logthis("Dropping data with ".$errno.": ". - length($outbuffer{$client}).", $rv"); - - delete $inbuffer{$client}; - delete $outbuffer{$client}; - delete $ready{$client}; - - $select->remove($client); - close($client); - next; - } - } else { -# -------------------------------------------------------- Wow, connection lost - &logthis( - "CRITICAL: Closing connection"); - &status("Connection lost"); - $remotesock->shutdown(2); - &logthis("Attempting to open new connection"); - &openremote($conserver); - } + # Checkfor new connections: + if (vec($infdset, $server->fileno, 1)) { + if($DEBUG) { + &logthis("New connection established"); + } + # accept a new connection + &status("Accept new connection: $conserver"); + $client = $server->accept(); + if (!$client) { + &logthis("Got stupid nonexisent client on ".$server->fileno." $conserver \n"); + } else { + if($DEBUG) { + &logthis("New client fd = ".$client->fileno."\n"); + } + $servers{$client->fileno} = $client; + nonblock($client); + $client->sockopt(SO_KEEPALIVE, 1); # Enable monitoring of + # connection liveness. + } } - + HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready); + HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer, + \%ready); +# -------------------------------------------------------- Wow, connection lost + } + + } } # ------------------------------------------------------- End of make_new_child @@ -446,8 +398,12 @@ sub MakeFileVector my $fdhash = shift; my $selvar = ""; - foreach $socket (keys %fdhash) { - vec($selvar, ($fdhash->{$socket})->fileno, 1) = 1; + foreach $socket (keys %$fdhash) { + if($DEBUG) { + &logthis("Adding ".$socket. + "to select vector. (client)\n"); + } + vec($selvar, $socket, 1) = 1; } return $selvar; } @@ -458,7 +414,7 @@ sub MakeFileVector # Processes output on a buffered set of file descriptors which are # ready to be read. # Parameters: -# $selvector - Vector of writable file descriptors which are writable. +# $selvector - Vector of file descriptors which are writable. # \%sockets - Vector of socket references indexed by socket. # \%buffers - Reference to a hash containing output buffers. # Hashes are indexed by sockets. The file descriptors of some @@ -477,11 +433,19 @@ sub HandleOutput my $buffers = shift; my $inbufs = shift; my $readys = shift; + my $sock; - foreach $sock (keys %buffers) { + if($DEBUG) { + &logthis("HandleOutput entered\n"); + } + + foreach $sock (keys %$sockets) { my $socket = $sockets->{$sock}; - if(vec($selvector, $$socket->fileno, 1)) { # $socket is writable. - my $rv = $$socket->send($buffers->{$sock}, 0); + if(vec($selvector, $sock, 1)) { # $socket is writable. + if($DEBUG) { + &logthis("Sending $buffers->{$sock} \n"); + } + my $rv = $socket->send($buffers->{$sock}, 0); $errno = $!; unless ($buffers->{$sock} eq "con_lost\n") { unless (defined $rv) { # Write failed... could be EINTR @@ -510,7 +474,7 @@ sub HandleOutput delete $inbufs->{$sock}; delete $readys->{$sock}; - close($$socket); # Close the client socket. + close($socket); # Close the client socket. next; } } else { # Kludgy way to mark lond connection lost. @@ -519,7 +483,7 @@ sub HandleOutput status("Connection lost"); $remotesock->shutdown(2); &logthis("Attempting to open a new connection"); - &openremot($conserver); + &openremote($conserver); } } @@ -553,14 +517,21 @@ sub HandleInput my $ibufs = shift; my $obufs = shift; my $ready = shift; + my $sock; - foreach $sock (keys %sockets) { + if($DEBUG) { + &logthis("Entered HandleInput\n"); + } + foreach $sock (keys %$sockets) { my $socket = $sockets->{$sock}; - if(vec($selvec, $$socket->fileno, 1)) { # Socket which is readable. + if(vec($selvec, $sock, 1)) { # Socket which is readable. # Attempt to read the data and do error management. my $data = ''; - my $rv = $$socket->recv($data, POSIX::BUFSIZ, 0); + my $rv = $socket->recv($data, POSIX::BUFSIZ, 0); + if($DEBUG) { + &logthis("Received $data from socket"); + } unless (defined($rv) && length $data) { # Read an end of file.. this is a disconnect from the peer. @@ -571,7 +542,7 @@ sub HandleInput delete $ready->{$sock}; status("Idle"); - close $$socket; + close $socket; next; } # Append the read data to the input buffer. If the buffer @@ -611,12 +582,22 @@ sub DoSelect { my $ins; while (1) { - my $nfds = select($outs = $$writevec, $ins = $$readvec, undef, undef); - if($nfound) { + my $nfds = select( $ins = $$readvec, $outs = $$writevec, undef, undef); + if($nfds) { + if($DEBUG) { + &logthis("select exited with ".$nfds." fds\n"); + &logthis("ins = ".unpack("b*",$ins). + " readvec = ".unpack("b*",$$readvec)."\n"); + &logthis("outs = ".unpack("b*",$outs). + " writevec = ".unpack("b*",$$writevec)."\n"); + } $$readvec = $ins; $$writevec = $outs; return; } else { + if($DEBUG) { + &logthis("Select exited with no bits set in mask\n"); + } die "Select failed" unless $! == EINTR; } } @@ -661,7 +642,7 @@ sub handle { $request="enc:$cmdlength:$encrequest"; } # --------------------------------------------------------------- Main exchange - $answer = londtransaction($remotesock, $request, 300); + $answer = londtransaction($remotesock, $request, 60); if($DEBUG) { &logthis(" Request data exchange complete"); @@ -707,7 +688,6 @@ sub handle { } } # ---------------------------------------------------------- End make_new_child -} # nonblock($socket) puts socket into nonblocking mode sub nonblock { @@ -727,75 +707,87 @@ sub openremote { my $conserver=shift; -&status("Opening TCP"); + &status("Opening TCP $conserver"); my $st=120+int(rand(240)); # Sleep before opening: -unless ( - $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, - PeerPort => $perlvar{'londPort'}, - Proto => "tcp", - Type => SOCK_STREAM) - ) { - - &logthis( -"WARNING: Couldn't connect to $conserver ($st secs): "); - sleep($st); - exit; - }; + unless ( + $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, + PeerPort => $perlvar{'londPort'}, + Proto => "tcp", + Type => SOCK_STREAM) + ) { + + &logthis( + "WARNING: Couldn't connect to $conserver ($st secs): "); + sleep($st); + exit; + }; # ----------------------------------------------------------------- Init dialog -&logthis("INFO Connected to $conserver, initing "); -&status("Init dialogue: $conserver"); + &logthis("INFO Connected to $conserver, initing"); + &status("Init dialogue: $conserver"); $answer = londtransaction($remotesock, "init", 60); chomp($answer); $answer = londtransaction($remotesock, $answer, 60); chomp($answer); - - if ($@=~/timeout/) { - &logthis("Timed out during init.. exiting"); - exit; - } -if ($answer ne 'ok') { - &logthis("Init reply: >$answer<"); - my $st=120+int(rand(240)); - &logthis( -"WARNING: Init failed ($st secs)"); - sleep($st); - exit; -} + if ($@=~/timeout/) { + &logthis("Timed out during init.. exiting"); + exit; + } -sleep 5; -&status("Ponging"); -print $remotesock "pong\n"; -$answer=<$remotesock>; -chomp($answer); -if ($answer!~/^$conserver/) { - &logthis("Pong reply: >$answer<"); -} + if ($answer ne 'ok') { + &logthis("Init reply: >$answer<"); + my $st=120+int(rand(240)); + &logthis("WARNING: Init failed ($st secs)"); + sleep($st); + exit; + } + + $answer = londtransaction($remotesock,"sethost:$conserver",60); + chomp($answer); + if ( $answer ne 'ok') { + &logthis('WARNING: unable to specify remote host'. + $answer.''); + } + + $answer = londtransaction($remotesock,"version:$VERSION",60); + chomp($answer); + if ($answer =~ /^version:/) { + $remoteVERSION=(split(/:/,$answer))[1]; + } else { + &logthis('WARNING: request remote version failed :'. + $answer.': my version is :'.$VERSION.':'); + } + + sleep 5; + &status("Ponging $conserver"); + $answer= londtransaction($remotesock,"pong",60); + chomp($answer); + if ($answer!~/^$conserver/) { + &logthis("Pong reply: >$answer<"); + } # ----------------------------------------------------------- Initialize cipher -&status("Initialize cipher"); -print $remotesock "ekey\n"; -my $buildkey=<$remotesock>; -my $key=$conserver.$perlvar{'lonHostID'}; -$key=~tr/a-z/A-Z/; -$key=~tr/G-P/0-9/; -$key=~tr/Q-Z/0-9/; -$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; -$key=substr($key,0,32); -my $cipherkey=pack("H32",$key); -if ($cipher=new IDEA $cipherkey) { - &logthis("Secure connection initialized"); -} else { - my $st=120+int(rand(240)); - &logthis( - "WARNING: ". - "Could not establish secure connection ($st secs)!"); - sleep($st); - exit; -} + &status("Initialize cipher"); + my $buildkey=londtransaction($remotesock,"ekey",60); + my $key=$conserver.$perlvar{'lonHostID'}; + $key=~tr/a-z/A-Z/; + $key=~tr/G-P/0-9/; + $key=~tr/Q-Z/0-9/; + $key=$key.$buildkey.$key.$buildkey.$key.$buildkey; + $key=substr($key,0,32); + my $cipherkey=pack("H32",$key); + if ($cipher=new IDEA $cipherkey) { + &logthis("Secure connection initialized"); + } else { + my $st=120+int(rand(240)); + &logthis("WARNING: ". + "Could not establish secure connection ($st secs)!"); + sleep($st); + exit; + } &logthis(" Remote open success "); } @@ -877,8 +869,8 @@ sub HUPSMAN { # sig local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children &hangup(); &logthis("CRITICAL: Restarting"); - unlink("$execdir/logs/lonc.pid"); my $execdir=$perlvar{'lonDaemons'}; + unlink("$execdir/logs/lonc.pid"); exec("$execdir/lonc"); # here we go again } @@ -897,8 +889,25 @@ sub checkchildren { sub USRMAN { &logthis("USR1: Trying to establish connections again"); - %childatt=(); - &checkchildren(); + # + # It is really important not to just clear the childatt hash or we will + # lose all memory of the children. What we really want to do is this: + # For each index where childatt is >= $childmaxattempts + # Zero the associated counter and do a make_child for the host. + # Regardles, the childatt entry is zeroed: + my $host; + foreach $host (keys %childatt) { + if ($childatt{$host} >= $childmaxattempts) { + $childatt{$host} = 0; + &logthis("INFO: Restarting child for server: " + .$host."\n"); + make_new_child($host); + } + else { + $childatt{$host} = 0; + } + } + &checkchildren(); # See if any children are still dead... } # -------------------------------------------------- Non-critical communication @@ -983,12 +992,12 @@ sub londtransaction { alarm(0); }; } else { - if($DEBUG) { - &logthis("Timeout on send in londtransaction"); - } + &logthis("lonc - $conserver - suiciding on send Timeout"); + die("lonc - $conserver - suiciding on send Timeout"); } - if( ($@ =~ /timeout/) && ($DEBUG)) { - &logthis("Timeout on receive in londtransaction"); + if ($@ =~ /timeout/) { + &logthis("lonc - $conserver - suiciding on read Timeout"); + die("lonc - $conserver - suiciding on read Timeout"); } # # Restore the initial sigmask set. @@ -1043,6 +1052,7 @@ sub status { my $now=time; my $local=localtime($now); $status=$local.': '.$what; + $0='lonc: '.$what.' '.$local; } @@ -1062,6 +1072,386 @@ is invoked by B. There is no e will manually start B from the command-line. (In other words, DO NOT START B YOURSELF.) +=head1 OVERVIEW + +=head2 Physical Overview + +=begin latex + +\begin{figure} + \begin{center} + \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram} + \end{center} + \caption{\label{Overview_Of_Network}Overview of Network} +\end{figure} + +=end latex + +Physically, the Network consists of relatively inexpensive +upper-PC-class server machines which are linked through the commodity +internet in a load-balancing, dynamically content-replicating and +failover-secure way. + +All machines in the Network are connected with each other through +two-way persistent TCP/IP connections. Clients (B, B, B and +B in Fig. Overview of Network) connect to the servers via standard +HTTP. There are two classes of servers, B (B and +B in Fig. Overview of Network) and B (B, B, +B and B in Fig. Overview of Network). + +B X X are used to +store all personal records of a set of users, and are responsible for +their initial authentication when a session is opened on any server in +the Network. For Authors, Library Servers also hosts their +construction area and the authoritative copy of the current and +previous versions of every resource that was published by that +author. Library servers can be used as backups to host sessions when +all access servers in the Network are overloaded. Otherwise, for +learners, access servers are used to host the sessions. Library +servers need to have strong I/O capabilities. + +B X X provide LON-CAPA +service to users, using the library servers as their data source. The +network is designed so that the number of concurrent sessions can be +increased over a wide range by simply adding additional access servers +before having to add additional library servers. Preliminary tests +showed that a library server could handle up to 10 access servers +fully parallel. Access servers can generally be cheaper hardware then +library servers require. + +The Network is divided into B X, which are logical +boundaries between participating institutions. These domains can be +used to limit the flow of personal user information across the +network, set access privileges and enforce royalty schemes. LON-CAPA +domains bear no relationship to any other domain, including domains +used by the DNS system; LON-CAPA domains may be freely configured in +any manner that suits your use pattern. + +=head2 Example Transactions + +Fig. Overview of Network also depicts examples for several kinds of +transactions conducted across the Network. + +An instructor at client B modifies and publishes a resource on her +Home Server B. Server B has a record of all server machines +currently subscribed to this resource, and replicates it to servers +B and B. However, server B is currently offline, so the +update notification gets buffered on B until B comes online +again. Servers B and B are currently not subscribed to this +resource. + +Learners B and B have open sessions on server B, and the new +resource is immediately available to them. + +Learner B tries to connect to server B for a new session, +however, the machine is not reachable, so he connects to another +Access Server B instead. This server currently does not have all +necessary resources locally present to host learner B, but +subscribes to them and replicates them as they are accessed by B. + +Learner B solves a problem on server B. Library Server B is +B's Home Server, so this information gets forwarded to B, where +the records of H are updated. + +=head2 lond, lonc, and lonnet + +=begin latex + +\begin{figure} +\includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram2} + \caption{\label{Overview_Of_Network_Communication}Overview of +Network Communication} \end{figure} + +=end latex + +Fig. Overview of Network Communication elaborates on the details of +this network infrastructure. It depicts three servers (B, B and +B) and a client who has a session on server B. + +As B accesses different resources in the system, different +handlers, which are incorporated as modules into the child processes +of the web server software, process these requests. + +Our current implementation uses C inside of the Apache web +server software. As an example, server B currently has four active +web server software child processes. The chain of handlers dealing +with a certain resource is determined by both the server content +resource area (see below) and the MIME type, which in turn is +determined by the URL extension. For most URL structures, both an +authentication handler and a content handler are registered. + +Handlers use a common library C X to interact with +both locally present temporary session data and data across the server +network. For example, lonnet provides routines for finding the home +server of a user, finding the server with the lowest loadavg, sending +simple command-reply sequences, and sending critical messages such as +a homework completion, etc. For a non-critical message, the routines +reply with a simple "connection lost" if the message could not be +delivered. For critical messages, lonnet tries to re-establish +connections, re-send the command, etc. If no valid reply could be +received, it answers "connection deferred" and stores the message in +buffer space to be sent at a later point in time. Also, failed +critical messages are logged. + +The interface between C and the Network is established by a +multiplexed UNIX domain socket, denoted B in Fig. Overview of +Network Communication. The rationale behind this rather involved +architecture is that httpd processes (Apache children) dynamically +come and go on the timescale of minutes, based on workload and number +of processed requests. Over the lifetime of an httpd child, however, +it has to establish several hundred connections to several different +servers in the Network. + +On the other hand, establishing a TCP/IP connection is resource +consuming for both ends of the line, and to optimize this connectivity +between different servers, connections in the Network are designed to +be persistent on the timescale of months, until either end is +rebooted. This mechanism will be elaborated on below. + +=begin latex + +\begin{figure} +\begin{lyxcode} +msul1:msu:library:zaphod.lite.msu.edu:35.8.63.51 + +msua1:msu:access:agrajag.lite.msu.edu:35.8.63.68 + +msul2:msu:library:frootmig.lite.msu.edu:35.8.63.69 + +msua2:msu:access:bistromath.lite.msu.edu:35.8.63.67 + +hubl14:hub:library:hubs128-pc-14.cl.msu.edu:35.8.116.34 + +hubl15:hub:library:hubs128-pc-15.cl.msu.edu:35.8.116.35 + +hubl16:hub:library:hubs128-pc-16.cl.msu.edu:35.8.116.36 + +huba20:hub:access:hubs128-pc-20.cl.msu.edu:35.8.116.40 + +huba21:hub:access:hubs128-pc-21.cl.msu.edu:35.8.116.41 + +huba22:hub:access:hubs128-pc-22.cl.msu.edu:35.8.116.42 + +huba23:hub:access:hubs128-pc-23.cl.msu.edu:35.8.116.43 + +hubl25:other:library:hubs128-pc-25.cl.msu.edu:35.8.116.45 + +huba27:other:access:hubs128-pc-27.cl.msu.edu:35.8.116.47 +\end{lyxcode} + +\caption{\label{Example_Of_hosts.tab}Example of Hosts Lookup table\texttt{/home/httpd/lonTabs/hosts.tab}} +\end{figure} + +=end latex + +Establishing a connection to a UNIX domain socket is far less resource +consuming than the establishing of a TCP/IP connection. C +X is a proxy daemon that forks off a child for every server in +the Network. Which servers are members of the Network is determined by +a lookup table, such as the one in Fig. Examples of Hosts. In order, +the entries denote an internal name for the server, the domain of the +server, the type of the server, the host name and the IP address. + +The C parent process maintains the population and listens for +signals to restart or shutdown, as well as I. Every child +establishes a multiplexed UNIX domain socket for its server and opens +a TCP/IP connection to the lond daemon (discussed below) on the remote +machine, which it keeps alive. If the connection is interrupted, the +child dies, whereupon the parent makes several attempts to fork +another child for that server. + +When starting a new child (a new connection), first an init-sequence +is carried out, which includes receiving the information from the +remote C which is needed to establish the 128-bit encryption key +- the key is different for every connection. Next, any buffered +(delayed) messages for the server are sent. + +In normal operation, the child listens to the UNIX socket, forwards +requests to the TCP connection, gets the reply from C, and sends +it back to the UNIX socket. Also, C takes care to the encryption +and decryption of messages. + +C X is the remote end of the TCP/IP connection and acts as +a remote command processor. It receives commands, executes them, and +sends replies. In normal operation, a C child is constantly +connected to a dedicated C child on the remote server, and the +same is true vice versa (two persistent connections per server +combination). + +lond listens to a TCP/IP port (denoted B

in Fig. Overview of +Network Communication) and forks off enough child processes to have +one for each other server in the network plus two spare children. The +parent process maintains the population and listens for signals to +restart or shutdown. Client servers are authenticated by IP. + +When a new client server comes online, C sends a signal I +to lonc, whereupon C tries again to reestablish all lost +connections, even if it had given up on them before - a new client +connecting could mean that that machine came online again after an +interruption. + +The gray boxes in Fig. Overview of Network Communication denote the +entities involved in an example transaction of the Network. The Client +is logged into server B, while server B is her Home +Server. Server B can be an access server or a library server, while +server B is a library server. She submits a solution to a homework +problem, which is processed by the appropriate handler for the MIME +type "problem". Through C, the handler writes information +about this transaction to the local session data. To make a permanent +log entry, C establishes a connection to the UNIX domain +socket for server B. C receives this command, encrypts it, +and sends it through the persistent TCP/IP connection to the TCP/IP +port of the remote C. C decrypts the command, executes it +by writing to the permanent user data files of the client, and sends +back a reply regarding the success of the operation. If the operation +was unsuccessful, or the connection would have broken down, C +would write the command into a FIFO buffer stack to be sent again +later. C now sends a reply regarding the overall success of the +operation to C via the UNIX domain port, which is eventually +received back by the handler. + +=head2 Dynamic Resource Replication + +Since resources are assembled into higher order resources simply by +reference, in principle it would be sufficient to retrieve them from +the respective Home Servers of the authors. However, there are several +problems with this simple approach: since the resource assembly +mechanism is designed to facilitate content assembly from a large +number of widely distributed sources, individual sessions would depend +on a large number of machines and network connections to be available, +thus be rather fragile. Also, frequently accessed resources could +potentially drive individual machines in the network into overload +situations. + +Finally, since most resources depend on content handlers on the Access +Servers to be served to a client within the session context, the raw +source would first have to be transferred across the Network from the +respective Library Server to the Access Server, processed there, and +then transferred on to the client. + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Request} + \caption{\label{Dynamic_Replication}Dynamic Replication} +\end{figure} + +=end latex + +To enable resource assembly in a reliable and scalable way, a dynamic +resource replication scheme was developed. Fig. "Dynamic Replication" +shows the details of this mechanism. + +Anytime a resource out of the resource space is requested, a handler +routine is called which in turn calls the replication routine. As a +first step, this routines determines whether or not the resource is +currently in replication transfer (Step B). During replication +transfer, the incoming data is stored in a temporary file, and Step +B checks for the presence of that file. If transfer of a resource +is actively going on, the controlling handler receives an error +message, waits for a few seconds, and then calls the replication +routine again. If the resource is still in transfer, the client will +receive the message "Service currently not available". + +In the next step (Step B), the replication routine checks if the +URL is locally present. If it is, the replication routine returns OK +to the controlling handler, which in turn passes the request on to the +next handler in the chain. + +If the resource is not locally present, the Home Server of the +resource author (as extracted from the URL) is determined (Step +B). This is done by contacting all library servers in the author?s +domain (as determined from the lookup table, see Fig. 1.1.2B). In Step +B a query is sent to the remote server whether or not it is the +Home Server of the author (in our current implementation, an +additional cache is used to store already identified Home Servers (not +shown in the figure)). In Step B, the remote server answers the +query with True or False. If the Home Server was found, the routine +continues, otherwise it contacts the next server (Step D2a). If no +server could be found, a "File not Found" error message is issued. In +our current implementation, in this step the Home Server is also +written into a cache for faster access if resources by the same author +are needed again (not shown in the figure). + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Change} + \caption{\label{Dynamic_Replication_Change}Dynamic Replication: Change} \end{figure} + +=end latex + +In Step B, the routine sends a subscribe command for the URL to +the Home Server of the author. The Home Server first determines if the +resource is present, and if the access privileges allow it to be +copied to the requesting server (B). If this is true, the +requesting server is added to the list of subscribed servers for that +resource (Step B). The Home Server will reply with either OK or +an error message, which is determined in Step D4. If the remote +resource was not present, the error message "File not Found" will be +passed on to the client, if the access was not allowed, the error +message "Access Denied" is passed on. If the operation succeeded, the +requesting server sends an HTTP request for the resource out of the +C server content resource area of the Home Server. + +The Home Server will then check if the requesting server is part of +the network, and if it is subscribed to the resource (Step B). If +it is, it will send the resource via HTTP to the requesting server +without any content handlers processing it (Step B). The +requesting server will store the incoming data in a temporary data +file (Step B) - this is the file that Step B checks for. If +the transfer could not complete, and appropriate error message is sent +to the client (Step B). Otherwise, the transferred temporary file +is renamed as the actual resource, and the replication routine returns +OK to the controlling handler (Step B). + +Fig. "Dynamic Replication: Change" depicts the process of modifying a +resource. When an author publishes a new version of a resource, the +Home Server will contact every server currently subscribed to the +resource (Step B), as determined from the list of subscribed +servers for the resource generated in Step B. The subscribing +servers will receive and acknowledge the update message (Step +B). The update mechanism finishes when the last subscribed server +has been contacted (messages to unreachable servers are buffered). + +Each subscribing server will check if the resource in question had +been accessed recently, that is, within a configurable amount of time +(Step B). + +If the resource had not been accessed recently, the local copy of the +resource is deleted (Step B) and an unsubscribe command is sent +to the Home Server (Step B). The Home Server will check if the +server had indeed originally subscribed to the resource (Step B) +and then delete the server from the list of subscribed servers for the +resource (Step B). + +If the resource had been accessed recently, the modified resource will +be copied over using the same mechanism as in Step B through +B, which represents steps Steps B through B in the +replication figure. + +=head2 Load Balancing + +XC provides a function to query the server's current loadavg. As +a configuration parameter, one can determine the value of loadavg, +which is to be considered 100%, for example, 2.00. + +Access servers can have a list of spare access servers, +C, to offload sessions depending on +own workload. This check happens is done by the login handler. It +re-directs the login information and session to the least busy spare +server if itself is overloaded. An additional round-robin IP scheme +possible. See Fig. "Load Balancing Sample" for an example of a +load-balancing scheme. + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Load_Balancing_Example} + \caption{\label{Load_Balancing_Example}Load Balancing Example} \end{figure} + +=end latex + =head1 DESCRIPTION Provides persistent TCP connections to the other servers in the network @@ -1071,10 +1461,10 @@ B forks off children processes tha in the network. Management of these processes can be done at the parent process level or the child process level. - After forking off the children, B the B -executes a main loop which simply waits for processes to exit. -As a process exits, a new process managing a link to the same -peer as the exiting process is created. +After forking off the children, B the B executes a main +loop which simply waits for processes to exit. As a process exits, a +new process managing a link to the same peer as the exiting process is +created. B is the location of log messages. @@ -1154,25 +1544,4 @@ each connection is logged. =back -=head1 PREREQUISITES - -POSIX -IO::Socket -IO::Select -IO::File -Socket -Fcntl -Tie::RefHash -Crypt::IDEA - -=head1 COREQUISITES - -=head1 OSNAMES - -linux - -=head1 SCRIPT CATEGORIES - -Server/Process - =cut