--- loncom/loncnew	2003/10/27 10:09:21	1.30
+++ loncom/loncnew	2004/01/05 09:29:36	1.38
@@ -2,7 +2,7 @@
 # The LearningOnline Network with CAPA
 # lonc maintains the connections to remote computers
 #
-# $Id: loncnew,v 1.30 2003/10/27 10:09:21 foxr Exp $
+# $Id: loncnew,v 1.38 2004/01/05 09:29:36 foxr Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -35,118 +35,16 @@
 #    - Add ability to create/negotiate lond connections (done).
 #    - Add general logic for dispatching requests and timeouts. (done).
 #    - Add support for the lonc/lond requests.          (done).
-#    - Add logging/status monitoring.
-#    - Add Signal handling - HUP restarts. USR1 status report.
+#    - Add logging/status monitoring.                    (done)
+#    - Add Signal handling - HUP restarts. USR1 status report. (done)
 #    - Add Configuration file I/O                       (done).
-#    - Add management/status request interface.
+#    - Add management/status request interface.         (done)
 #    - Add deferred request capability.                  (done)
-#    - Detect transmission timeouts.
+#    - Detect transmission timeouts.                     (done)
 #
 
-# Change log:
-#    $Log: loncnew,v $
-#    Revision 1.30  2003/10/27 10:09:21  foxr
-#    Tighten up a few compares to eq and flip a few debug levels around... nothing
-#    critical
-#
-#    Revision 1.29  2003/10/21 14:24:42  foxr
-#    Fix little typo that may explain growth of connections
-#
-#    Revision 1.28  2003/10/14 15:36:21  albertel
-#    - making it easier to run loncnew,
-#       /etc/init.d/loncontrol startnew
-#       /etc/init.d/loncontrol restartnew
-#      will now start loncnew in place of lonc
-#
-#    Revision 1.27  2003/10/07 11:23:03  foxr
-#    Installed and tested code to process reinit in parent server.
-#
-#    Revision 1.26  2003/09/30 11:11:17  foxr
-#    Add book-keeping hashes to support the re-init procedure.
-#
-#    Revision 1.25  2003/09/23 11:22:14  foxr
-#    Tested ability to receive sigusr2  This is now logged and must be
-#    properly implemented as a re-read of hosts and re-init of appropriate
-#    children.
-#
-#    Revision 1.24  2003/09/16 09:46:42  foxr
-#    Added skeletal infrastructure to support SIGUSR2 update hosts request.
-#
-#    Revision 1.23  2003/09/15 09:24:49  foxr
-#    Add use strict and fix all the fallout from that.
-#
-#    Revision 1.22  2003/09/02 10:34:47  foxr
-#    - Fix errors in host dead detection logic (too many cases where the
-#      retries left were not getting incremented or just not checked).
-#    - Added some additional status to the ps axuww display:
-#      o Remaining retries on a host.
-#      o >>> DEAD <<< indicator if I've given up on a host.
-#    - Tested the SIGHUP will reset the retries remaining count (thanks to
-#      the above status stuff, and get allow the loncnew to re-try again
-#      on the host (thanks to the log).
-#
-#    Revision 1.21  2003/08/26 09:19:51  foxr
-#    How embarrassing... put in the SocketTimeout function in loncnew and forgot
-#    to actually hook it into the LondTransaction.  Added this to MakeLondConnection
-#    where it belongs... hopefully transactions (not just connection attempts) will
-#    timeout more speedily than the socket errors will catch it.
-#
-#    Revision 1.20  2003/08/25 18:48:11  albertel
-#    - fixing a forgotten ;
-#
-#    Revision 1.19  2003/08/19 09:31:46  foxr
-#    Get socket directory from configuration rather than the old hard coded test
-#    way that I forgot to un-hard code.
-#
-#    Revision 1.18  2003/08/06 09:52:29  foxr
-#    Also needed to remember to fail in-flight transactions if their sends fail.
-#
-#    Revision 1.17  2003/08/03 00:44:31  foxr
-#    1. Correct handling of connection failure: Assume it means the host is
-#       unreachable and fail all of the queued transactions.  Note that the
-#       inflight transactions should fail on their own time due either to timeout
-#       or send/receive failures.
-#    2. Correct handling of logs for forced death signals.  Pull the signal
-#       from the event watcher.
-#
-#    Revision 1.16  2003/07/29 02:33:05  foxr
-#    Add SIGINT processing to child processes to toggle annoying trace mode
-#    on/off.. will try to use this to isolate the compute boud process issue.
-#
-#    Revision 1.15  2003/07/15 02:07:05  foxr
-#    Added code for lonc/lond transaction timeouts.  Who knows if it works right.
-#    The intent is for a timeout to fail any transaction in progress and kill
-#    off the sockt that timed out.
-#
-#    Revision 1.14  2003/07/03 02:10:18  foxr
-#    Get all of the signals to work correctly.
-#
-#    Revision 1.13  2003/07/02 01:31:55  foxr
-#    Added kill -HUP logic (restart).
-#
-#    Revision 1.11  2003/06/25 01:54:44  foxr
-#    Fix more problems with transaction failure.
-#
-#    Revision 1.10  2003/06/24 02:46:04  foxr
-#    Put a limit on  the number of times we'll retry a connection.
-#    Start getting the signal stuff put in as well...note that need to get signals
-#    going or else the client will permanently give up on dead servers.
-#
-#    Revision 1.9  2003/06/13 02:38:43  foxr
-#    Add logging in 'expected format'
-#
-#    Revision 1.8  2003/06/11 02:04:35  foxr
-#    Support delayed transactions... this is done uniformly by encapsulating
-#    transactions in an object ... a LondTransaction that is implemented by
-#    LondTransaction.pm
-#
-#    Revision 1.7  2003/06/03 01:59:39  foxr
-#    complete coding to support deferred transactions.
-#
-#
 use strict;
 use lib "/home/httpd/lib/perl/";
-use lib "/home/foxr/newloncapa/types";
 use Event qw(:DEFAULT );
 use POSIX qw(:signal_h);
 use POSIX;
@@ -312,7 +210,6 @@ sub GetPeername {
 	return $peerfile;
     }
 }
-#----------------------------- Timer management ------------------------
 =pod
 
 =head2 Debug
@@ -364,11 +261,14 @@ sub ShowStatus {
 =cut
 sub SocketTimeout {
     my $Socket = shift;
-    
+    Log("WARNING", "A socket timeout was detected");
+    Debug(0, " SocketTimeout called: ");
+    $Socket->Dump();
     KillSocket($Socket);	# A transaction timeout also counts as
                                 # a connection failure:
     $ConnectionRetriesLeft--;
 }
+#----------------------------- Timer management ------------------------
 
 =pod
 
@@ -403,9 +303,13 @@ sub Tick {
     #
     #  For each inflight transaction, tick down its timeout counter.
     #
-    foreach my $item (keys %ActiveTransactions) {
-	my $Socket = $ActiveTransactions{$item}->getServer();
-	$Socket->Tick();
+
+    foreach my $item (keys %ActiveConnections) {
+	my $State = $ActiveConnections{$item}->data->GetState();
+	if ($State ne 'Idle') {
+	    Debug(5,"Ticking Socket $State $item");
+	    $ActiveConnections{$item}->data->Tick();
+	}
     }
     # Do we have work in the queue, but no connections to service them?
     # If so, try to make some new connections to get things going again.
@@ -452,7 +356,7 @@ Trigger disconnections of idle sockets.
 
 sub SetupTimer {
     Debug(6, "SetupTimer");
-    Event->timer(interval => 1, debug => 1, cb => \&Tick );
+    Event->timer(interval => 1, cb => \&Tick );
 }
 
 =pod
@@ -753,9 +657,9 @@ sub KillSocket {
     }
     if(exists($ActiveConnections{$Socket})) {
 	delete($ActiveConnections{$Socket});
+	$ConnectionCount--;
+	if ($ConnectionCount < 0) { $ConnectionCount = 0; }
     }
-    $ConnectionCount--;
-
     #  If the connection count has gone to zero and there is work in the
     #  work queue, the work all gets failed with con_lost.
     #
@@ -1440,6 +1344,20 @@ sub ChildStatus {
     my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");
     print $fh $$."\t".$RemoteHost."\t".$Status."\t".
 	$RecentLogEntry."\n";
+    #
+    #  Write out information about each of the connections:
+    #
+    print $fh "Active connection statuses: \n";
+    my $i = 1;
+    print STDERR  "================================= Socket Status Dump:\n";
+    foreach my $item (keys %ActiveConnections) {
+	my $Socket = $ActiveConnections{$item}->data;
+	my $state  = $Socket->GetState();
+	print $fh "Connection $i State: $state\n";
+	print STDERR "---------------------- Connection $i \n";
+	$Socket->Dump();
+	$i++;	
+    }
     $ConnectionRetriesLeft = $ConnectionRetries;
 }
 
@@ -1598,6 +1516,7 @@ ShowStatus("Forking node servers");
 
 Log("CRITICAL", "--------------- Starting children ---------------");
 
+LondConnection::ReadConfig;               # Read standard config files.
 my $HostIterator = LondConnection::GetHostIterator;
 while (! $HostIterator->end()) {