--- loncom/Attic/lonc 2002/10/30 14:50:04 1.43 +++ loncom/Attic/lonc 2003/07/31 21:32:44 1.53 @@ -5,7 +5,7 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # -# $Id: lonc,v 1.43 2002/10/30 14:50:04 www Exp $ +# $Id: lonc,v 1.53 2003/07/31 21:32:44 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -37,18 +37,14 @@ # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19, # 10/8,10/9,10/15,11/18,12/22, # 2/8,7/25 Gerd Kortemeyer -# 12/05 Scott Harrison # 12/05 Gerd Kortemeyer # YEAR=2001 -# 01/10/01 Scott Harrison # 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer -# 12/20 Scott Harrison # YEAR=2002 # 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer # 3/07/02 Ron Fox # based on nonforker from Perl Cookbook # - server who multiplexes without forking -# 5/11/2002 Scott Harrison use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; @@ -68,7 +64,8 @@ $status=''; $lastlog=''; $conserver='SHELL'; $DEBUG = 0; # Set to 1 for annoyingly complete logs. - +$VERSION='$Revison$'; #' stupid emacs +$remoteVERSION; # -------------------------------- Set signal handlers to record abnormal exits &status("Init exception handlers"); @@ -259,7 +256,7 @@ unlink($port); @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE; closedir(DIRHANDLE); my $dfname; - foreach (@allbuffered) { + foreach (sort @allbuffered) { &status("Sending delayed: $_"); $dfname="$path/$_"; if($DEBUG) { &logthis('Sending '.$dfname); } @@ -329,7 +326,7 @@ tie %ready, 'Tie::RefHash'; # Main loop: check reads/accepts, check writes, check ready to process -status("Main loop"); +status("Main loop $conserver"); while (1) { my $client; my $rv; @@ -369,6 +366,8 @@ while (1) { } $servers{$client->fileno} = $client; nonblock($client); + $client->sockopt(SO_KEEPALIVE, 1);# Enable monitoring of + # connection liveness. } HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready); HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer, @@ -704,75 +703,87 @@ sub openremote { my $conserver=shift; -&status("Opening TCP"); + &status("Opening TCP $conserver"); my $st=120+int(rand(240)); # Sleep before opening: -unless ( - $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, - PeerPort => $perlvar{'londPort'}, - Proto => "tcp", - Type => SOCK_STREAM) - ) { - - &logthis( -"WARNING: Couldn't connect to $conserver ($st secs): "); - sleep($st); - exit; - }; + unless ( + $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, + PeerPort => $perlvar{'londPort'}, + Proto => "tcp", + Type => SOCK_STREAM) + ) { + + &logthis( + "WARNING: Couldn't connect to $conserver ($st secs): "); + sleep($st); + exit; + }; # ----------------------------------------------------------------- Init dialog -&logthis("INFO Connected to $conserver, initing "); -&status("Init dialogue: $conserver"); + &logthis("INFO Connected to $conserver, initing"); + &status("Init dialogue: $conserver"); $answer = londtransaction($remotesock, "init", 60); chomp($answer); $answer = londtransaction($remotesock, $answer, 60); chomp($answer); - - if ($@=~/timeout/) { - &logthis("Timed out during init.. exiting"); - exit; - } -if ($answer ne 'ok') { - &logthis("Init reply: >$answer<"); - my $st=120+int(rand(240)); - &logthis( -"WARNING: Init failed ($st secs)"); - sleep($st); - exit; -} + if ($@=~/timeout/) { + &logthis("Timed out during init.. exiting"); + exit; + } -sleep 5; -&status("Ponging"); -print $remotesock "pong\n"; -$answer=<$remotesock>; -chomp($answer); -if ($answer!~/^$conserver/) { - &logthis("Pong reply: >$answer<"); -} + if ($answer ne 'ok') { + &logthis("Init reply: >$answer<"); + my $st=120+int(rand(240)); + &logthis("WARNING: Init failed ($st secs)"); + sleep($st); + exit; + } + + $answer = londtransaction($remotesock,"sethost:$conserver",60); + chomp($answer); + if ( $answer ne 'ok') { + &logthis('WARNING: unable to specify remote host'. + $answer.''); + } + + $answer = londtransaction($remotesock,"version:$VERSION",60); + chomp($answer); + if ($answer =~ /^version:/) { + $remoteVERSION=(split(/:/,$answer))[1]; + } else { + &logthis('WARNING: request remote version failed :'. + $answer.': my version is :'.$VERSION.':'); + } + + sleep 5; + &status("Ponging $conserver"); + $answer= londtransaction($remotesock,"pong",60); + chomp($answer); + if ($answer!~/^$conserver/) { + &logthis("Pong reply: >$answer<"); + } # ----------------------------------------------------------- Initialize cipher -&status("Initialize cipher"); -print $remotesock "ekey\n"; -my $buildkey=<$remotesock>; -my $key=$conserver.$perlvar{'lonHostID'}; -$key=~tr/a-z/A-Z/; -$key=~tr/G-P/0-9/; -$key=~tr/Q-Z/0-9/; -$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; -$key=substr($key,0,32); -my $cipherkey=pack("H32",$key); -if ($cipher=new IDEA $cipherkey) { - &logthis("Secure connection initialized"); -} else { - my $st=120+int(rand(240)); - &logthis( - "WARNING: ". - "Could not establish secure connection ($st secs)!"); - sleep($st); - exit; -} + &status("Initialize cipher"); + my $buildkey=londtransaction($remotesock,"ekey",60); + my $key=$conserver.$perlvar{'lonHostID'}; + $key=~tr/a-z/A-Z/; + $key=~tr/G-P/0-9/; + $key=~tr/Q-Z/0-9/; + $key=$key.$buildkey.$key.$buildkey.$key.$buildkey; + $key=substr($key,0,32); + my $cipherkey=pack("H32",$key); + if ($cipher=new IDEA $cipherkey) { + &logthis("Secure connection initialized"); + } else { + my $st=120+int(rand(240)); + &logthis("WARNING: ". + "Could not establish secure connection ($st secs)!"); + sleep($st); + exit; + } &logthis(" Remote open success "); } @@ -854,8 +865,8 @@ sub HUPSMAN { # sig local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children &hangup(); &logthis("CRITICAL: Restarting"); - unlink("$execdir/logs/lonc.pid"); my $execdir=$perlvar{'lonDaemons'}; + unlink("$execdir/logs/lonc.pid"); exec("$execdir/lonc"); # here we go again } @@ -977,12 +988,12 @@ sub londtransaction { alarm(0); }; } else { - if($DEBUG) { - &logthis("Timeout on send in londtransaction"); - } + &logthis("lonc - suiciding on send Timeout"); + die("lonc - suiciding on send Timeout"); } - if( ($@ =~ /timeout/) && ($DEBUG)) { - &logthis("Timeout on receive in londtransaction"); + if ($@ =~ /timeout/) { + &logthis("lonc - suiciding on read Timeout"); + die("lonc - suiciding on read Timeout"); } # # Restore the initial sigmask set. @@ -1057,6 +1068,386 @@ is invoked by B. There is no e will manually start B from the command-line. (In other words, DO NOT START B YOURSELF.) +=head1 OVERVIEW + +=head2 Physical Overview + +=begin latex + +\begin{figure} + \begin{center} + \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram} + \end{center} + \caption{\label{Overview_Of_Network}Overview of Network} +\end{figure} + +=end latex + +Physically, the Network consists of relatively inexpensive +upper-PC-class server machines which are linked through the commodity +internet in a load-balancing, dynamically content-replicating and +failover-secure way. + +All machines in the Network are connected with each other through +two-way persistent TCP/IP connections. Clients (B, B, B and +B in Fig. Overview of Network) connect to the servers via standard +HTTP. There are two classes of servers, B (B and +B in Fig. Overview of Network) and B (B, B, +B and B in Fig. Overview of Network). + +B X X are used to +store all personal records of a set of users, and are responsible for +their initial authentication when a session is opened on any server in +the Network. For Authors, Library Servers also hosts their +construction area and the authoritative copy of the current and +previous versions of every resource that was published by that +author. Library servers can be used as backups to host sessions when +all access servers in the Network are overloaded. Otherwise, for +learners, access servers are used to host the sessions. Library +servers need to have strong I/O capabilities. + +B X X provide LON-CAPA +service to users, using the library servers as their data source. The +network is designed so that the number of concurrent sessions can be +increased over a wide range by simply adding additional access servers +before having to add additional library servers. Preliminary tests +showed that a library server could handle up to 10 access servers +fully parallel. Access servers can generally be cheaper hardware then +library servers require. + +The Network is divided into B X, which are logical +boundaries between participating institutions. These domains can be +used to limit the flow of personal user information across the +network, set access privileges and enforce royalty schemes. LON-CAPA +domains bear no relationship to any other domain, including domains +used by the DNS system; LON-CAPA domains may be freely configured in +any manner that suits your use pattern. + +=head2 Example Transactions + +Fig. Overview of Network also depicts examples for several kinds of +transactions conducted across the Network. + +An instructor at client B modifies and publishes a resource on her +Home Server B. Server B has a record of all server machines +currently subscribed to this resource, and replicates it to servers +B and B. However, server B is currently offline, so the +update notification gets buffered on B until B comes online +again. Servers B and B are currently not subscribed to this +resource. + +Learners B and B have open sessions on server B, and the new +resource is immediately available to them. + +Learner B tries to connect to server B for a new session, +however, the machine is not reachable, so he connects to another +Access Server B instead. This server currently does not have all +necessary resources locally present to host learner B, but +subscribes to them and replicates them as they are accessed by B. + +Learner B solves a problem on server B. Library Server B is +B's Home Server, so this information gets forwarded to B, where +the records of H are updated. + +=head2 lond, lonc, and lonnet + +=begin latex + +\begin{figure} +\includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram2} + \caption{\label{Overview_Of_Network_Communication}Overview of +Network Communication} \end{figure} + +=end latex + +Fig. Overview of Network Communication elaborates on the details of +this network infrastructure. It depicts three servers (B, B and +B) and a client who has a session on server B. + +As B accesses different resources in the system, different +handlers, which are incorporated as modules into the child processes +of the web server software, process these requests. + +Our current implementation uses C inside of the Apache web +server software. As an example, server B currently has four active +web server software child processes. The chain of handlers dealing +with a certain resource is determined by both the server content +resource area (see below) and the MIME type, which in turn is +determined by the URL extension. For most URL structures, both an +authentication handler and a content handler are registered. + +Handlers use a common library C X to interact with +both locally present temporary session data and data across the server +network. For example, lonnet provides routines for finding the home +server of a user, finding the server with the lowest loadavg, sending +simple command-reply sequences, and sending critical messages such as +a homework completion, etc. For a non-critical message, the routines +reply with a simple "connection lost" if the message could not be +delivered. For critical messages, lonnet tries to re-establish +connections, re-send the command, etc. If no valid reply could be +received, it answers "connection deferred" and stores the message in +buffer space to be sent at a later point in time. Also, failed +critical messages are logged. + +The interface between C and the Network is established by a +multiplexed UNIX domain socket, denoted B in Fig. Overview of +Network Communication. The rationale behind this rather involved +architecture is that httpd processes (Apache children) dynamically +come and go on the timescale of minutes, based on workload and number +of processed requests. Over the lifetime of an httpd child, however, +it has to establish several hundred connections to several different +servers in the Network. + +On the other hand, establishing a TCP/IP connection is resource +consuming for both ends of the line, and to optimize this connectivity +between different servers, connections in the Network are designed to +be persistent on the timescale of months, until either end is +rebooted. This mechanism will be elaborated on below. + +=begin latex + +\begin{figure} +\begin{lyxcode} +msul1:msu:library:zaphod.lite.msu.edu:35.8.63.51 + +msua1:msu:access:agrajag.lite.msu.edu:35.8.63.68 + +msul2:msu:library:frootmig.lite.msu.edu:35.8.63.69 + +msua2:msu:access:bistromath.lite.msu.edu:35.8.63.67 + +hubl14:hub:library:hubs128-pc-14.cl.msu.edu:35.8.116.34 + +hubl15:hub:library:hubs128-pc-15.cl.msu.edu:35.8.116.35 + +hubl16:hub:library:hubs128-pc-16.cl.msu.edu:35.8.116.36 + +huba20:hub:access:hubs128-pc-20.cl.msu.edu:35.8.116.40 + +huba21:hub:access:hubs128-pc-21.cl.msu.edu:35.8.116.41 + +huba22:hub:access:hubs128-pc-22.cl.msu.edu:35.8.116.42 + +huba23:hub:access:hubs128-pc-23.cl.msu.edu:35.8.116.43 + +hubl25:other:library:hubs128-pc-25.cl.msu.edu:35.8.116.45 + +huba27:other:access:hubs128-pc-27.cl.msu.edu:35.8.116.47 +\end{lyxcode} + +\caption{\label{Example_Of_hosts.tab}Example of Hosts Lookup table\texttt{/home/httpd/lonTabs/hosts.tab}} +\end{figure} + +=end latex + +Establishing a connection to a UNIX domain socket is far less resource +consuming than the establishing of a TCP/IP connection. C +X is a proxy daemon that forks off a child for every server in +the Network. Which servers are members of the Network is determined by +a lookup table, such as the one in Fig. Examples of Hosts. In order, +the entries denote an internal name for the server, the domain of the +server, the type of the server, the host name and the IP address. + +The C parent process maintains the population and listens for +signals to restart or shutdown, as well as I. Every child +establishes a multiplexed UNIX domain socket for its server and opens +a TCP/IP connection to the lond daemon (discussed below) on the remote +machine, which it keeps alive. If the connection is interrupted, the +child dies, whereupon the parent makes several attempts to fork +another child for that server. + +When starting a new child (a new connection), first an init-sequence +is carried out, which includes receiving the information from the +remote C which is needed to establish the 128-bit encryption key +- the key is different for every connection. Next, any buffered +(delayed) messages for the server are sent. + +In normal operation, the child listens to the UNIX socket, forwards +requests to the TCP connection, gets the reply from C, and sends +it back to the UNIX socket. Also, C takes care to the encryption +and decryption of messages. + +C X is the remote end of the TCP/IP connection and acts as +a remote command processor. It receives commands, executes them, and +sends replies. In normal operation, a C child is constantly +connected to a dedicated C child on the remote server, and the +same is true vice versa (two persistent connections per server +combination). + +lond listens to a TCP/IP port (denoted B

in Fig. Overview of +Network Communication) and forks off enough child processes to have +one for each other server in the network plus two spare children. The +parent process maintains the population and listens for signals to +restart or shutdown. Client servers are authenticated by IP. + +When a new client server comes online, C sends a signal I +to lonc, whereupon C tries again to reestablish all lost +connections, even if it had given up on them before - a new client +connecting could mean that that machine came online again after an +interruption. + +The gray boxes in Fig. Overview of Network Communication denote the +entities involved in an example transaction of the Network. The Client +is logged into server B, while server B is her Home +Server. Server B can be an access server or a library server, while +server B is a library server. She submits a solution to a homework +problem, which is processed by the appropriate handler for the MIME +type "problem". Through C, the handler writes information +about this transaction to the local session data. To make a permanent +log entry, C establishes a connection to the UNIX domain +socket for server B. C receives this command, encrypts it, +and sends it through the persistent TCP/IP connection to the TCP/IP +port of the remote C. C decrypts the command, executes it +by writing to the permanent user data files of the client, and sends +back a reply regarding the success of the operation. If the operation +was unsuccessful, or the connection would have broken down, C +would write the command into a FIFO buffer stack to be sent again +later. C now sends a reply regarding the overall success of the +operation to C via the UNIX domain port, which is eventually +received back by the handler. + +=head2 Dynamic Resource Replication + +Since resources are assembled into higher order resources simply by +reference, in principle it would be sufficient to retrieve them from +the respective Home Servers of the authors. However, there are several +problems with this simple approach: since the resource assembly +mechanism is designed to facilitate content assembly from a large +number of widely distributed sources, individual sessions would depend +on a large number of machines and network connections to be available, +thus be rather fragile. Also, frequently accessed resources could +potentially drive individual machines in the network into overload +situations. + +Finally, since most resources depend on content handlers on the Access +Servers to be served to a client within the session context, the raw +source would first have to be transferred across the Network from the +respective Library Server to the Access Server, processed there, and +then transferred on to the client. + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Request} + \caption{\label{Dynamic_Replication}Dynamic Replication} +\end{figure} + +=end latex + +To enable resource assembly in a reliable and scalable way, a dynamic +resource replication scheme was developed. Fig. "Dynamic Replication" +shows the details of this mechanism. + +Anytime a resource out of the resource space is requested, a handler +routine is called which in turn calls the replication routine. As a +first step, this routines determines whether or not the resource is +currently in replication transfer (Step B). During replication +transfer, the incoming data is stored in a temporary file, and Step +B checks for the presence of that file. If transfer of a resource +is actively going on, the controlling handler receives an error +message, waits for a few seconds, and then calls the replication +routine again. If the resource is still in transfer, the client will +receive the message "Service currently not available". + +In the next step (Step B), the replication routine checks if the +URL is locally present. If it is, the replication routine returns OK +to the controlling handler, which in turn passes the request on to the +next handler in the chain. + +If the resource is not locally present, the Home Server of the +resource author (as extracted from the URL) is determined (Step +B). This is done by contacting all library servers in the author?s +domain (as determined from the lookup table, see Fig. 1.1.2B). In Step +B a query is sent to the remote server whether or not it is the +Home Server of the author (in our current implementation, an +additional cache is used to store already identified Home Servers (not +shown in the figure)). In Step B, the remote server answers the +query with True or False. If the Home Server was found, the routine +continues, otherwise it contacts the next server (Step D2a). If no +server could be found, a "File not Found" error message is issued. In +our current implementation, in this step the Home Server is also +written into a cache for faster access if resources by the same author +are needed again (not shown in the figure). + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Change} + \caption{\label{Dynamic_Replication_Change}Dynamic Replication: Change} \end{figure} + +=end latex + +In Step B, the routine sends a subscribe command for the URL to +the Home Server of the author. The Home Server first determines if the +resource is present, and if the access privileges allow it to be +copied to the requesting server (B). If this is true, the +requesting server is added to the list of subscribed servers for that +resource (Step B). The Home Server will reply with either OK or +an error message, which is determined in Step D4. If the remote +resource was not present, the error message "File not Found" will be +passed on to the client, if the access was not allowed, the error +message "Access Denied" is passed on. If the operation succeeded, the +requesting server sends an HTTP request for the resource out of the +C server content resource area of the Home Server. + +The Home Server will then check if the requesting server is part of +the network, and if it is subscribed to the resource (Step B). If +it is, it will send the resource via HTTP to the requesting server +without any content handlers processing it (Step B). The +requesting server will store the incoming data in a temporary data +file (Step B) - this is the file that Step B checks for. If +the transfer could not complete, and appropriate error message is sent +to the client (Step B). Otherwise, the transferred temporary file +is renamed as the actual resource, and the replication routine returns +OK to the controlling handler (Step B). + +Fig. "Dynamic Replication: Change" depicts the process of modifying a +resource. When an author publishes a new version of a resource, the +Home Server will contact every server currently subscribed to the +resource (Step B), as determined from the list of subscribed +servers for the resource generated in Step B. The subscribing +servers will receive and acknowledge the update message (Step +B). The update mechanism finishes when the last subscribed server +has been contacted (messages to unreachable servers are buffered). + +Each subscribing server will check if the resource in question had +been accessed recently, that is, within a configurable amount of time +(Step B). + +If the resource had not been accessed recently, the local copy of the +resource is deleted (Step B) and an unsubscribe command is sent +to the Home Server (Step B). The Home Server will check if the +server had indeed originally subscribed to the resource (Step B) +and then delete the server from the list of subscribed servers for the +resource (Step B). + +If the resource had been accessed recently, the modified resource will +be copied over using the same mechanism as in Step B through +B, which represents steps Steps B through B in the +replication figure. + +=head2 Load Balancing + +XC provides a function to query the server's current loadavg. As +a configuration parameter, one can determine the value of loadavg, +which is to be considered 100%, for example, 2.00. + +Access servers can have a list of spare access servers, +C, to offload sessions depending on +own workload. This check happens is done by the login handler. It +re-directs the login information and session to the least busy spare +server if itself is overloaded. An additional round-robin IP scheme +possible. See Fig. "Load Balancing Sample" for an example of a +load-balancing scheme. + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Load_Balancing_Example} + \caption{\label{Load_Balancing_Example}Load Balancing Example} \end{figure} + +=end latex + =head1 DESCRIPTION Provides persistent TCP connections to the other servers in the network @@ -1066,10 +1457,10 @@ B forks off children processes tha in the network. Management of these processes can be done at the parent process level or the child process level. - After forking off the children, B the B -executes a main loop which simply waits for processes to exit. -As a process exits, a new process managing a link to the same -peer as the exiting process is created. +After forking off the children, B the B executes a main +loop which simply waits for processes to exit. As a process exits, a +new process managing a link to the same peer as the exiting process is +created. B is the location of log messages. @@ -1149,25 +1540,4 @@ each connection is logged. =back -=head1 PREREQUISITES - -POSIX -IO::Socket -IO::Select -IO::File -Socket -Fcntl -Tie::RefHash -Crypt::IDEA - -=head1 COREQUISITES - -=head1 OSNAMES - -linux - -=head1 SCRIPT CATEGORIES - -Server/Process - =cut