--- loncom/loncnew	2004/10/05 10:10:31	1.64
+++ loncom/loncnew	2005/06/16 22:33:45	1.71
@@ -2,7 +2,7 @@
 # The LearningOnline Network with CAPA
 # lonc maintains the connections to remote computers
 #
-# $Id: loncnew,v 1.64 2004/10/05 10:10:31 foxr Exp $
+# $Id: loncnew,v 1.71 2005/06/16 22:33:45 albertel Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -61,6 +61,7 @@ use LONCAPA::LondConnection;
 use LONCAPA::LondTransaction;
 use LONCAPA::Configuration;
 use LONCAPA::HashIterator;
+use Fcntl qw(:flock);
 
 
 # Read the httpd configuration file to get perl variables
@@ -79,6 +80,8 @@ my %listening_to;		# Socket->host table
                                 # is listening to.
 my %parent_dispatchers;         # host-> listener watcher events. 
 
+my %parent_handlers;		# Parent signal handlers...
+
 my $MaxConnectionCount = 10;	# Will get from config later.
 my $ClientConnection = 0;	# Uniquifier for client events.
 
@@ -87,6 +90,7 @@ my $NextDebugLevel= 2;		# So Sigint can
 my $IdleTimeout= 600;		# Wait 10 minutes before pruning connections.
 
 my $LogTransactions = 0;	# When True, all transactions/replies get logged.
+my $executable      = $0;	# Get the full path to me.
 
 #
 #  The variables below are only used by the child processes.
@@ -109,9 +113,8 @@ my $KeyMode         = "";       # e.g. s
 my $LondConnecting  = 0;       # True when a connection is being built.
 
 
-# DO NOT SET THE NEXT VARIABLE TO NON ZERO!!!!!!!!!!!!!!!
 
-my $DieWhenIdle     = 0;	# When true children die when trimmed -> 0.
+my $DieWhenIdle     = 1;	# When true children die when trimmed -> 0.
 my $I_am_child      = 0;	# True if this is the child process.
 
 #
@@ -149,6 +152,7 @@ sub UpdateStatus {
 Makes an entry into the permanent log file.
 
 =cut
+
 sub LogPerm {
     my $message=shift;
     my $execdir=$perlvar{'lonDaemons'};
@@ -268,6 +272,7 @@ sub SocketDump {
  and as what we return in a SIGUSR1
 
 =cut
+
 sub ShowStatus {
     my $state = shift;
     my $now = time;
@@ -278,13 +283,14 @@ sub ShowStatus {
 
 =pod
 
-=head 2 SocketTimeout
+=head2 SocketTimeout
 
     Called when an action on the socket times out.  The socket is 
    destroyed and any active transaction is failed.
 
 
 =cut
+
 sub SocketTimeout {
     my $Socket = shift;
     Log("WARNING", "A socket timeout was detected");
@@ -417,6 +423,7 @@ sub Tick {
 	$KeyMode = ""; 
 	$clock_watcher->cancel();
     }
+    &UpdateStatus();
 }
 
 =pod
@@ -670,9 +677,9 @@ sub FailTransaction {
 
     if ($ConnectionRetriesLeft > 0) {
 	Log("WARNING", "Failing transaction "
-	    .$transaction->getRequest());
+	    .$transaction->getLoggableRequest());
     }
-    Debug(1, "Failing transaction: ".$transaction->getRequest());
+    Debug(1, "Failing transaction: ".$transaction->getLoggableRequest());
     if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
 	my $client  = $transaction->getClient();
 	Debug(1," Replying con_lost to ".$transaction->getRequest());
@@ -682,12 +689,14 @@ sub FailTransaction {
 }
 
 =pod
+
 =head1  EmptyQueue
 
   Fails all items in the work queue with con_lost.
   Note that each item in the work queue is a transaction.
 
 =cut
+
 sub EmptyQueue {
     $ConnectionRetriesLeft--;	# Counts as connection failure too.
     while($WorkQueue->Count()) {
@@ -703,6 +712,7 @@ sub EmptyQueue {
 Close all connections open on lond prior to exit e.g.
 
 =cut
+
 sub CloseAllLondConnections {
     foreach my $Socket (keys %ActiveConnections) {
       if(exists($ActiveTransactions{$Socket})) {
@@ -711,7 +721,6 @@ sub CloseAllLondConnections {
       KillSocket($Socket);
     }
 }
-=cut
 
 =pod
 
@@ -733,8 +742,8 @@ Parameters:
 
 nonzero if we are allowed to create a new connection.
 
-
 =cut
+
 sub KillSocket {
     my $Socket = shift;
 
@@ -1147,6 +1156,7 @@ sub LondWritable {
 =pod
     
 =cut
+
 sub QueueDelayed {
     Debug(3,"QueueDelayed called");
 
@@ -1555,6 +1565,7 @@ into the status file.
 
 We also use this to reset the retries count in order to allow the
 client to retry connections with a previously dead server.
+
 =cut
 
 sub ChildStatus {
@@ -1563,26 +1574,31 @@ sub ChildStatus {
 
     Debug(2, "Reporting child status because : ".$watcher->data);
     my $docdir = $perlvar{'lonDocRoot'};
-    my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");
-    print $fh $$."\t".$RemoteHost."\t".$Status."\t".
+    
+    open(LOG,">>$docdir/lon-status/loncstatus.txt");
+    flock(LOG,LOCK_EX);
+    print LOG $$."\t".$RemoteHost."\t".$Status."\t".
 	$RecentLogEntry."\n";
     #
     #  Write out information about each of the connections:
     #
     if ($DebugLevel > 2) {
-	print $fh "Active connection statuses: \n";
+	print LOG "Active connection statuses: \n";
 	my $i = 1;
 	print STDERR  "================================= Socket Status Dump:\n";
 	foreach my $item (keys %ActiveConnections) {
 	    my $Socket = $ActiveConnections{$item}->data;
 	    my $state  = $Socket->GetState();
-	    print $fh "Connection $i State: $state\n";
+	    print LOG "Connection $i State: $state\n";
 	    print STDERR "---------------------- Connection $i \n";
 	    $Socket->Dump(-1);	# Ensure it gets dumped..
 	    $i++;	
 	}
     }
+    flock(LOG,LOCK_UN);
+    close(LOG);
     $ConnectionRetriesLeft = $ConnectionRetries;
+    UpdateStatus();
 }
 
 =pod
@@ -1603,12 +1619,14 @@ sub SignalledToDeath {
     chomp($signal);
     Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
 	."died through "."\"$signal\"");
-    LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
-	    ."\"$signal\"");
+    #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
+#	    ."\"$signal\"");
     exit 0;
 
 }
 
+=pod
+
 =head2 ToggleDebug
 
 This sub toggles trace debugging on and off.
@@ -1624,6 +1642,8 @@ sub ToggleDebug {
 
 }
 
+=pod
+
 =head2 ChildProcess
 
 This sub implements a child process for a single lonc daemon.
@@ -1650,9 +1670,19 @@ sub ChildProcess {
 	Debug(5, "Killing watcher for $listener");
 
 	$watcher->cancel();
-	undef         $parent_dispatchers{$listener};
+	delete($parent_dispatchers{$listener});
 
     }
+
+    #  kill off the parent's signal handlers too!  
+    #
+
+    for my $handler (keys %parent_handlers) {
+	my $watcher = $parent_handlers{$handler};
+	$watcher->cancel();
+	delete($parent_handlers{$handler});
+    }
+
     $I_am_child    = 1;		# Seems like in spite of it all I may still getting
                                 # parent event dispatches.. flag I'm a child.
 
@@ -1965,6 +1995,28 @@ if ($DieWhenIdle) {
 		   signal   => "CHLD");
 
 
+    # Set up all the other signals we set up.  We'll vector them off to the
+    # same subs as we would for DieWhenIdle false and, if necessary, conditionalize
+    # the code there.
+
+    $parent_handlers{INT} = Event->signal(cb       => \&Terminate,
+					  desc     => "Parent INT handler",
+					  signal   => "INT");
+    $parent_handlers{TERM} = Event->signal(cb       => \&Terminate,
+					   desc     => "Parent TERM handler",
+					   signal   => "TERM");
+    $parent_handlers{HUP}  = Event->signal(cb       => \&Restart,
+					   desc     => "Parent HUP handler.",
+					   signal   => "HUP");
+    $parent_handlers{USR1} = Event->signal(cb       => \&CheckKids,
+					   desc     => "Parent USR1 handler",
+					   signal   => "USR1");
+    $parent_handlers{USR2} = Event->signal(cb       => \&UpdateKids,
+					   desc     => "Parent USR2 handler.",
+					   signal   => "USR2");
+    
+    #  Start procdesing events.
+
     $Event::DebugLevel = $DebugLevel;
     Debug(9, "Parent entering event loop");
     my $ret = Event::loop();
@@ -2016,11 +2068,14 @@ sub CheckKids {
     my $now=time;
     my $local=localtime($now);
     print $fh "LONC status $local - parent $$ \n\n";
+    foreach my $host (keys %parent_dispatchers) {
+	print $fh "LONC Parent process listening for $host\n";
+    }
     foreach my $pid (keys %ChildHash) {
 	Debug(2, "Sending USR1 -> $pid");
 	kill 'USR1' => $pid;	# Tell Child to report status.
-	sleep 1;		# Wait so file doesn't intermix.
     }
+
 }
 
 =pod
@@ -2053,81 +2108,15 @@ sub UpdateKids {
 
     Log("INFO", "Updating connections via SIGUSR2");
 
-    #  Just in case we need to kill our own lonc, we wait a few seconds to
-    #  give it a chance to receive and relay lond's response to the 
-    #  re-init command.
-    #
+    #  I'm not sure what I was thinking in the first implementation.
+    # someone will have to work hard to convince me the effect is any
+    # different than Restart, especially now that we don't start up 
+    # per host servers automatically, may as well just restart.
+    # The down side is transactions that are in flight will get timed out
+    # (lost unless they are critical).
 
-    sleep(2);			# Wait a couple of seconds.
+    &Restart();
 
-    my %hosts;                   # Indexed by loncapa hostname, value=ip.
-    
-    # Need to re-read  the host table:
-    
-    
-    LondConnection::ReadConfig();
-    my $I = LondConnection::GetHostIterator;
-    while (! $I->end()) {
-	my $item = $I->get();
-	$hosts{$item->[0]} = $item->[4];
-	$I->next();
-    }
-
-    #  The logic below is written for clarity not for efficiency.
-    #  Since I anticipate that this function is only rarely called, that's
-    #  appropriate.  There are certainly ways to combine the loops below,
-    #  and anyone wishing to obscure the logic is welcome to go for it.
-    #  Note that we don't re-direct sigchild.  Instead we do what's needed
-    #  to the data structures that keep track of children to ensure that
-    #  when sigchild is honored, no new child is born.
-    #
-
-    #  For each existing child; if it's host doesn't exist, kill the child.
-
-    foreach my $child (keys %ChildHash) {
-	my $oldhost = $ChildHash{$child};
-	if (!(exists $hosts{$oldhost})) {
-	    Log("CRITICAL", "Killing child for $oldhost  host no longer exists");
-	    delete $ChildHash{$child};
-	    delete $HostToPid{$oldhost};
-	    kill 'QUIT' => $child;
-	}
-    }
-    # For each remaining existing child; if it's host's ip has changed,
-    # Restart the child on the new IP.
-
-    foreach my $child (keys %ChildHash) {
-	my $oldhost = $ChildHash{$child};
-	my $oldip   = $HostHash{$oldhost};
-	if ($hosts{$oldhost} ne $oldip) {
-
-	    # kill the old child.
-
-	    Log("CRITICAL", "Killing child for $oldhost host ip has changed...");
-	    delete $ChildHash{$child};
-	    delete $HostToPid{$oldhost};
-	    kill 'QUIT' => $child;
-
-	    # Do the book-keeping needed to start a new child on the
-	    # new ip.
-
-	    $HostHash{$oldhost} = $hosts{$oldhost};
-	    CreateChild($oldhost);
-	}
-    }
-    # Finally, for each new host, not in the host hash, create a
-    # enter the host and create a new child.
-    # Force a status display of any existing process.
-
-    foreach my $host (keys %hosts) {
-	if(!(exists $HostHash{$host})) {
-	    Log("INFO", "New host $host discovered in hosts.tab...");
-	    $HostHash{$host} = $hosts{$host};
-	    CreateChild($host);
-	} else {
-	    kill 'HUP' => $HostToPid{$host};    # status display.
-	}
-    }
 }
 
 
@@ -2146,7 +2135,7 @@ sub Restart {
     Log("CRITICAL", "Restarting");
     my $execdir = $perlvar{'lonDaemons'};
     unlink("$execdir/logs/lonc.pid");
-    exec("$execdir/loncnew");
+    exec("$executable");
 }
 
 =pod
@@ -2190,6 +2179,7 @@ sub really_kill_them_all_dammit
 	unlink("$execdir/logs/lonc.pid");
     }
 }
+
 =pod
 
 =head1 Terminate