--- loncom/loncnew	2017/02/28 05:42:06	1.101
+++ loncom/loncnew	2020/01/12 01:21:33	1.109
@@ -2,7 +2,7 @@
 # The LearningOnline Network with CAPA
 # lonc maintains the connections to remote computers
 #
-# $Id: loncnew,v 1.101 2017/02/28 05:42:06 raeburn Exp $
+# $Id: loncnew,v 1.109 2020/01/12 01:21:33 raeburn Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -74,6 +74,7 @@ my %perlvar    = %{$perlvarref};
 
 my %ChildPid;			# by pid -> host.
 my %ChildHost;			# by host.
+my %ChildKeyMode;               # by pid -> keymode
 my %listening_to;		# Socket->host table for who the parent
                                 # is listening to.
 my %parent_dispatchers;         # host-> listener watcher events. 
@@ -93,8 +94,10 @@ my $executable      = $0;	# Get the full
 #
 #  The variables below are only used by the child processes.
 #
-my $RemoteHost;			# Name of host child is talking to.
-my $RemoteHostId;		# default lonid of host child is talking to.
+my $RemoteHost;			# Hostname of host child is talking to.
+my $RemoteHostId;		# lonid of host child is talking to.
+my $RemoteDefHostId;		# default lonhostID of host child is talking to.
+my $RemoteLoncapaRev;           # LON-CAPA version of host child is talking to.
 my @all_host_ids;
 my $UnixSocketDir= $perlvar{'lonSockDir'};
 my $IdleConnections = Stack->new(); # Set of idle connections
@@ -669,7 +672,6 @@ Parameters:
 =item client  
  
    The LondTransaction we are failing.
- 
 
 =cut
 
@@ -741,7 +743,7 @@ Parameters:
  
   The socket to kill off.
 
-=item Restart
+=item restart
 
 non-zero if we are allowed to create a new connection.
 
@@ -749,6 +751,7 @@ non-zero if we are allowed to create a n
 
 sub KillSocket {
     my $Socket = shift;
+    my $restart = shift;
 
     Log("WARNING", "Shutting down a socket");
     $Socket->Shutdown();
@@ -765,16 +768,24 @@ sub KillSocket {
     if(exists($ActiveConnections{$Socket})) {
 	$ActiveConnections{$Socket}->cancel;
 	delete($ActiveConnections{$Socket});
-	$ConnectionCount--;
+        # Decrement ConnectionCount unless we will immediately
+        # re-connect (i.e., $restart is true), because this was
+        # a connection where the SSL channel for exchange of the
+        # shared key failed, and we may use an insecure channel.
+        unless ($restart) {
+	    $ConnectionCount--;
+        }
 	if ($ConnectionCount < 0) { $ConnectionCount = 0; }
     }
     #  If the connection count has gone to zero and there is work in the
     #  work queue, the work all gets failed with con_lost.
     #
+  
     if($ConnectionCount == 0) {
 	$LondConnecting = 0;	# No connections so also not connecting.
 	EmptyQueue();
-	CloseAllLondConnections; # Should all already be closed but...
+	CloseAllLondConnections(); # Should all already be closed but...
+        &clear_childpid($$);
     }
     UpdateStatus();
 }
@@ -871,17 +882,33 @@ sub LondReadable {
 
 	Log("WARNING",
 	    "Lond connection lost.");
+        my $state_on_exit = $Socket->GetState();
 	if(exists($ActiveTransactions{$Socket})) {
 	    FailTransaction($ActiveTransactions{$Socket});
 	} else {
 	    #  Socket is connecting and failed... need to mark
 	    #  no longer connecting.
-	   
 	    $LondConnecting = 0;
 	}
 	$Watcher->cancel();
-	KillSocket($Socket);
-	$ConnectionRetriesLeft--;       # Counts as connection failure
+        if ($state_on_exit eq 'ReInitNoSSL') {
+            # SSL certificate verification failed, and insecure connection
+            # allowed. Send restart arg to KillSocket(), so EmptyQueue() 
+            # is not called, as we still hope to process queued request.
+
+            KillSocket($Socket,1);
+
+            # Re-initiate creation of Lond Connection for use with queued
+            # request.
+
+            ShowStatus("Connected to ".$RemoteHost);
+            Log("WARNING","No SSL channel (verification failed), will try with insecure channel");
+            &MakeLondConnection(1);
+
+        } else {
+	    KillSocket($Socket);
+	    $ConnectionRetriesLeft--;       # Counts as connection failure         
+        }
 	return;
     }
     SocketDump(6,$Socket);
@@ -892,6 +919,8 @@ sub LondReadable {
     if($State eq "Initialized") {
 
 
+    } elsif ($State eq "ReInitNoSSL") {
+
     } elsif ($State eq "ChallengeReceived") {
 	#  The challenge must be echoed back;  The state machine
 	# in the connection takes care of setting that up.  Just
@@ -928,7 +957,14 @@ sub LondReadable {
     } elsif ($State eq "ReceivingKey") {
 
     } elsif ($State eq "Idle") {
-   
+
+        if ($ConnectionCount == 1) { 
+            # Write child Pid file to keep track of ssl and insecure
+            # connections
+
+            &record_childpid($Socket);
+        }
+
 	# This is as good a spot as any to get the peer version
 	# string:
    
@@ -1093,7 +1129,9 @@ sub LondWritable {
 
 	$Watcher->cb(\&LondReadable);
 	$Watcher->poll("r");
-	
+
+    } elsif ($State eq "ReInitNoSSL") {
+
     } elsif ($State eq "ChallengeReceived") {
 	# We received the challenge, now we 
 	# are echoing it back. This is a no-op,
@@ -1204,20 +1242,22 @@ start off on it.
 
 =cut
 
-sub MakeLondConnection {     
+sub MakeLondConnection {
+    my ($restart) = @_;
     Debug(4,"MakeLondConnection to ".GetServerHost()." on port "
 	  .GetServerPort());
 
     my $Connection = LondConnection->new(&GetServerHost(),
 					 &GetServerPort(),
-					 &GetHostId());
+					 &GetHostId(),
+					 &GetDefHostId(),
+					 &GetLoncapaRev());
 
-    if($Connection eq undef) {	
+    if($Connection eq undef) {
 	Log("CRITICAL","Failed to make a connection with lond.");
 	$ConnectionRetriesLeft--;
 	return 0;		# Failure.
     }  else {
-
 	$LondConnecting = 1;	# Connection in progress.
 	# The connection needs to have writability 
 	# monitored in order to send the init sequence
@@ -1242,7 +1282,9 @@ sub MakeLondConnection {
 	if ($ConnectionCount == 0) {
 	    &SetupTimer;	# Need to handle timeouts with connections...
 	}
-	$ConnectionCount++;
+        unless ($restart) {
+	    $ConnectionCount++;
+        }
 	$Connection->SetClientData($ConnectionCount);
 	Debug(4, "Connection count = ".$ConnectionCount);
 	if($ConnectionCount == 1) { # First Connection:
@@ -1510,7 +1552,7 @@ sub GetServerHost {
 
 =pod
 
-=head2 GetServerId
+=head2 GetHostId
 
 Returns the hostid whose lond we talk with.
 
@@ -1522,6 +1564,30 @@ sub GetHostId {
 
 =pod
 
+=head2 GetDefHostId
+
+Returns the default hostid for the node whose lond we talk with.
+
+=cut
+
+sub GetDefHostId {                      # Setup by the fork.
+    return $RemoteDefHostId;
+}
+
+=pod
+
+=head2 GetLoncapaRev
+
+Returns the LON-CAPA version for the node whose lond we talk with.
+
+=cut
+
+sub GetLoncapaRev {
+    return $RemoteLoncapaRev;           # Setup by the fork.
+}
+
+=pod
+
 =head2 GetServerPort
 
 Returns the lond port number.
@@ -1652,6 +1718,7 @@ sub SignalledToDeath {
 	."died through "."\"$signal\"");
     #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
 #	    ."\"$signal\"");
+    &clear_childpid($$);
     exit 0;
 
 }
@@ -1782,7 +1849,7 @@ sub ChildProcess {
 #  Create a new child for host passed in:
 
 sub CreateChild {
-    my ($host, $hostid) = @_;
+    my ($host, $hostid, $defhostid, $loncaparev) = @_;
 
     my $sigset = POSIX::SigSet->new(SIGINT);
     sigprocmask(SIG_BLOCK, $sigset);
@@ -1797,6 +1864,8 @@ sub CreateChild {
 	undef(@all_host_ids);
     } else {			# child.
 	$RemoteHostId = $hostid;
+	$RemoteDefHostId = $defhostid;
+        $RemoteLoncapaRev = $loncaparev;
 	ShowStatus("Connected to ".$RemoteHost);
 	$SIG{INT} = 'DEFAULT';
 	sigprocmask(SIG_UNBLOCK, $sigset);
@@ -1864,7 +1933,7 @@ sub get_remote_hostname {
     (my $hostname,my $lonid,@all_host_ids) = split(':',$data);
     $ChildHost{$hostname}++;
     if ($ChildHost{$hostname} == 1) {
-	&CreateChild($hostname,$lonid);
+	&CreateChild($hostname,$lonid,$all_host_ids[-1]);
     } else {
 	&Log('WARNING',"Request for a second child on $hostname");
     }
@@ -1972,6 +2041,7 @@ sub server_died {
 	my $host = $ChildPid{$pid};
 	if($host) {		# It's for real...
 	    &Debug(9, "Caught sigchild for $host");
+            &clear_childpid($pid);
 	    delete($ChildPid{$pid});
 	    delete($ChildHost{$host});
 	    &parent_clean_up($host);
@@ -2143,6 +2213,7 @@ sub UpdateKids {
 
     &KillThemAll();
     LondConnection->ResetReadConfig();
+    ShowStatus('Parent keeping the flock');
 }
 
 
@@ -2187,6 +2258,7 @@ sub KillThemAll {
 	ShowStatus("Nicely Killing lonc for $serving pid = $pid");
 	Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");
 	kill 'QUIT' => $pid;
+        &clear_childpid($pid);
     }
     ShowStatus("Finished killing child processes off.");
 }
@@ -2206,6 +2278,7 @@ sub really_kill_them_all_dammit
 	Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid");
 	kill 'KILL' => $pid;
 	delete($ChildPid{$pid});
+        delete($ChildKeyMode{$pid});
 	my $execdir = $perlvar{'lonDaemons'};
 	unlink("$execdir/logs/lonc.pid");
     }
@@ -2234,13 +2307,60 @@ sub Terminate {
 
 }
 
+=pod
+
+=cut
+
 sub my_hostname {
-    use Sys::Hostname;
-    my $name = &hostname();
+    use Sys::Hostname::FQDN();
+    my $name = Sys::Hostname::FQDN::fqdn();
     &Debug(9,"Name is $name");
     return $name;
 }
 
+sub record_childpid {
+    my ($Socket) = @_;
+    my $docdir = $perlvar{'lonDocRoot'};
+    my $authmode = $Socket->GetKeyMode();
+    my $peer = $Socket->PeerLoncapaHim();
+    if (($authmode eq 'ssl') || ($authmode eq 'insecure')) {
+        my $childpid = $$;
+        if ($childpid) {
+            unless (exists($ChildKeyMode{$childpid})) {
+                $ChildKeyMode{$childpid} = $authmode;
+            }
+            if (-d "$docdir/lon-status/loncchld") {
+                unless (-e "$docdir/lon-status/loncchld/$childpid") {
+                    if (open (my $pidfh,'>',"$docdir/lon-status/loncchld/$childpid")) {
+                        print $pidfh "$peer:$authmode\n";
+                        close($pidfh);
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+sub clear_childpid {
+    my ($childpid) = @_; 
+    my $docdir = $perlvar{'lonDocRoot'};
+    if (-d "$docdir/lon-status/loncchld") {
+        if ($childpid =~ /^\d+$/) {
+            if (($ChildKeyMode{$childpid} eq 'insecure') ||
+                ($ChildKeyMode{$childpid} eq 'ssl')) {
+                if (-e "$docdir/lon-status/loncchld/$childpid") {
+                    unlink("$docdir/lon-status/loncchld/$childpid");
+                }
+            }
+        }
+    }
+    if (exists($ChildKeyMode{$childpid})) {
+        delete($ChildKeyMode{$childpid});
+    }
+    return;
+}
+
 =pod
 
 =head1 Theory
@@ -2345,6 +2465,12 @@ connection or died.  This should be foll
 
  "WARNING Failing transaction..." msgs for each in-flight or queued transaction.
 
+=item WARNING No SSL channel (verification failed), will try with insecure channel.
+
+Called when promotion of a socket to SSL failed because SSL certificate verification failed.
+Domain configuration must also permit insecure channel use for key exchange. Connection
+negotiation will start again from the beginning, but with Authentication Mode not set to ssl.
+
 =item INFO Connected to lond version:  <version> 
 
 When connection negotiation is complete, the lond version is requested and logged here.