--- loncom/loncnew 2018/09/02 15:57:16 1.100.10.1 +++ loncom/loncnew 2020/01/12 01:21:33 1.109 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.100.10.1 2018/09/02 15:57:16 raeburn Exp $ +# $Id: loncnew,v 1.109 2020/01/12 01:21:33 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -74,6 +74,7 @@ my %perlvar = %{$perlvarref}; my %ChildPid; # by pid -> host. my %ChildHost; # by host. +my %ChildKeyMode; # by pid -> keymode my %listening_to; # Socket->host table for who the parent # is listening to. my %parent_dispatchers; # host-> listener watcher events. @@ -93,8 +94,10 @@ my $executable = $0; # Get the full # # The variables below are only used by the child processes. # -my $RemoteHost; # Name of host child is talking to. -my $RemoteHostId; # default lonid of host child is talking to. +my $RemoteHost; # Hostname of host child is talking to. +my $RemoteHostId; # lonid of host child is talking to. +my $RemoteDefHostId; # default lonhostID of host child is talking to. +my $RemoteLoncapaRev; # LON-CAPA version of host child is talking to. my @all_host_ids; my $UnixSocketDir= $perlvar{'lonSockDir'}; my $IdleConnections = Stack->new(); # Set of idle connections @@ -611,7 +614,7 @@ sub CompleteTransaction { StartClientReply($Transaction, $data); } else { # Delete deferred transaction file. Log("SUCCESS", "A delayed transaction was completed"); - LogPerm("S:".$Socket->PeerLoncapaHim().":".$Transaction->getRequest()); + LogPerm("S:".$Transaction->getClient().":".$Transaction->getRequest()); unlink($Transaction->getFile()); } } @@ -669,7 +672,6 @@ Parameters: =item client The LondTransaction we are failing. - =cut @@ -741,7 +743,7 @@ Parameters: The socket to kill off. -=item Restart +=item restart non-zero if we are allowed to create a new connection. @@ -749,6 +751,7 @@ non-zero if we are allowed to create a n sub KillSocket { my $Socket = shift; + my $restart = shift; Log("WARNING", "Shutting down a socket"); $Socket->Shutdown(); @@ -765,16 +768,24 @@ sub KillSocket { if(exists($ActiveConnections{$Socket})) { $ActiveConnections{$Socket}->cancel; delete($ActiveConnections{$Socket}); - $ConnectionCount--; + # Decrement ConnectionCount unless we will immediately + # re-connect (i.e., $restart is true), because this was + # a connection where the SSL channel for exchange of the + # shared key failed, and we may use an insecure channel. + unless ($restart) { + $ConnectionCount--; + } if ($ConnectionCount < 0) { $ConnectionCount = 0; } } # If the connection count has gone to zero and there is work in the # work queue, the work all gets failed with con_lost. # + if($ConnectionCount == 0) { $LondConnecting = 0; # No connections so also not connecting. EmptyQueue(); - CloseAllLondConnections; # Should all already be closed but... + CloseAllLondConnections(); # Should all already be closed but... + &clear_childpid($$); } UpdateStatus(); } @@ -871,17 +882,33 @@ sub LondReadable { Log("WARNING", "Lond connection lost."); + my $state_on_exit = $Socket->GetState(); if(exists($ActiveTransactions{$Socket})) { FailTransaction($ActiveTransactions{$Socket}); } else { # Socket is connecting and failed... need to mark # no longer connecting. - $LondConnecting = 0; } $Watcher->cancel(); - KillSocket($Socket); - $ConnectionRetriesLeft--; # Counts as connection failure + if ($state_on_exit eq 'ReInitNoSSL') { + # SSL certificate verification failed, and insecure connection + # allowed. Send restart arg to KillSocket(), so EmptyQueue() + # is not called, as we still hope to process queued request. + + KillSocket($Socket,1); + + # Re-initiate creation of Lond Connection for use with queued + # request. + + ShowStatus("Connected to ".$RemoteHost); + Log("WARNING","No SSL channel (verification failed), will try with insecure channel"); + &MakeLondConnection(1); + + } else { + KillSocket($Socket); + $ConnectionRetriesLeft--; # Counts as connection failure + } return; } SocketDump(6,$Socket); @@ -892,6 +919,8 @@ sub LondReadable { if($State eq "Initialized") { + } elsif ($State eq "ReInitNoSSL") { + } elsif ($State eq "ChallengeReceived") { # The challenge must be echoed back; The state machine # in the connection takes care of setting that up. Just @@ -928,7 +957,14 @@ sub LondReadable { } elsif ($State eq "ReceivingKey") { } elsif ($State eq "Idle") { - + + if ($ConnectionCount == 1) { + # Write child Pid file to keep track of ssl and insecure + # connections + + &record_childpid($Socket); + } + # This is as good a spot as any to get the peer version # string: @@ -1093,7 +1129,9 @@ sub LondWritable { $Watcher->cb(\&LondReadable); $Watcher->poll("r"); - + + } elsif ($State eq "ReInitNoSSL") { + } elsif ($State eq "ChallengeReceived") { # We received the challenge, now we # are echoing it back. This is a no-op, @@ -1204,20 +1242,22 @@ start off on it. =cut -sub MakeLondConnection { +sub MakeLondConnection { + my ($restart) = @_; Debug(4,"MakeLondConnection to ".GetServerHost()." on port " .GetServerPort()); my $Connection = LondConnection->new(&GetServerHost(), &GetServerPort(), - &GetHostId()); + &GetHostId(), + &GetDefHostId(), + &GetLoncapaRev()); - if($Connection eq undef) { + if($Connection eq undef) { Log("CRITICAL","Failed to make a connection with lond."); $ConnectionRetriesLeft--; return 0; # Failure. } else { - $LondConnecting = 1; # Connection in progress. # The connection needs to have writability # monitored in order to send the init sequence @@ -1242,7 +1282,9 @@ sub MakeLondConnection { if ($ConnectionCount == 0) { &SetupTimer; # Need to handle timeouts with connections... } - $ConnectionCount++; + unless ($restart) { + $ConnectionCount++; + } $Connection->SetClientData($ConnectionCount); Debug(4, "Connection count = ".$ConnectionCount); if($ConnectionCount == 1) { # First Connection: @@ -1510,7 +1552,7 @@ sub GetServerHost { =pod -=head2 GetServerId +=head2 GetHostId Returns the hostid whose lond we talk with. @@ -1522,6 +1564,30 @@ sub GetHostId { =pod +=head2 GetDefHostId + +Returns the default hostid for the node whose lond we talk with. + +=cut + +sub GetDefHostId { # Setup by the fork. + return $RemoteDefHostId; +} + +=pod + +=head2 GetLoncapaRev + +Returns the LON-CAPA version for the node whose lond we talk with. + +=cut + +sub GetLoncapaRev { + return $RemoteLoncapaRev; # Setup by the fork. +} + +=pod + =head2 GetServerPort Returns the lond port number. @@ -1652,6 +1718,7 @@ sub SignalledToDeath { ."died through "."\"$signal\""); #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: " # ."\"$signal\""); + &clear_childpid($$); exit 0; } @@ -1782,7 +1849,7 @@ sub ChildProcess { # Create a new child for host passed in: sub CreateChild { - my ($host, $hostid) = @_; + my ($host, $hostid, $defhostid, $loncaparev) = @_; my $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset); @@ -1797,6 +1864,8 @@ sub CreateChild { undef(@all_host_ids); } else { # child. $RemoteHostId = $hostid; + $RemoteDefHostId = $defhostid; + $RemoteLoncapaRev = $loncaparev; ShowStatus("Connected to ".$RemoteHost); $SIG{INT} = 'DEFAULT'; sigprocmask(SIG_UNBLOCK, $sigset); @@ -1864,7 +1933,7 @@ sub get_remote_hostname { (my $hostname,my $lonid,@all_host_ids) = split(':',$data); $ChildHost{$hostname}++; if ($ChildHost{$hostname} == 1) { - &CreateChild($hostname,$lonid); + &CreateChild($hostname,$lonid,$all_host_ids[-1]); } else { &Log('WARNING',"Request for a second child on $hostname"); } @@ -1972,6 +2041,7 @@ sub server_died { my $host = $ChildPid{$pid}; if($host) { # It's for real... &Debug(9, "Caught sigchild for $host"); + &clear_childpid($pid); delete($ChildPid{$pid}); delete($ChildHost{$host}); &parent_clean_up($host); @@ -2142,6 +2212,8 @@ sub UpdateKids { # (lost unless they are critical). &KillThemAll(); + LondConnection->ResetReadConfig(); + ShowStatus('Parent keeping the flock'); } @@ -2157,6 +2229,7 @@ the config file. sub Restart { &KillThemAll; # First kill all the children. + LondConnection->ResetReadConfig(); Log("CRITICAL", "Restarting"); my $execdir = $perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); @@ -2185,6 +2258,7 @@ sub KillThemAll { ShowStatus("Nicely Killing lonc for $serving pid = $pid"); Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid"); kill 'QUIT' => $pid; + &clear_childpid($pid); } ShowStatus("Finished killing child processes off."); } @@ -2204,6 +2278,7 @@ sub really_kill_them_all_dammit Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid"); kill 'KILL' => $pid; delete($ChildPid{$pid}); + delete($ChildKeyMode{$pid}); my $execdir = $perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); } @@ -2232,6 +2307,10 @@ sub Terminate { } +=pod + +=cut + sub my_hostname { use Sys::Hostname::FQDN(); my $name = Sys::Hostname::FQDN::fqdn(); @@ -2239,6 +2318,49 @@ sub my_hostname { return $name; } +sub record_childpid { + my ($Socket) = @_; + my $docdir = $perlvar{'lonDocRoot'}; + my $authmode = $Socket->GetKeyMode(); + my $peer = $Socket->PeerLoncapaHim(); + if (($authmode eq 'ssl') || ($authmode eq 'insecure')) { + my $childpid = $$; + if ($childpid) { + unless (exists($ChildKeyMode{$childpid})) { + $ChildKeyMode{$childpid} = $authmode; + } + if (-d "$docdir/lon-status/loncchld") { + unless (-e "$docdir/lon-status/loncchld/$childpid") { + if (open (my $pidfh,'>',"$docdir/lon-status/loncchld/$childpid")) { + print $pidfh "$peer:$authmode\n"; + close($pidfh); + } + } + } + } + } + return; +} + +sub clear_childpid { + my ($childpid) = @_; + my $docdir = $perlvar{'lonDocRoot'}; + if (-d "$docdir/lon-status/loncchld") { + if ($childpid =~ /^\d+$/) { + if (($ChildKeyMode{$childpid} eq 'insecure') || + ($ChildKeyMode{$childpid} eq 'ssl')) { + if (-e "$docdir/lon-status/loncchld/$childpid") { + unlink("$docdir/lon-status/loncchld/$childpid"); + } + } + } + } + if (exists($ChildKeyMode{$childpid})) { + delete($ChildKeyMode{$childpid}); + } + return; +} + =pod =head1 Theory @@ -2343,6 +2465,12 @@ connection or died. This should be foll "WARNING Failing transaction..." msgs for each in-flight or queued transaction. +=item WARNING No SSL channel (verification failed), will try with insecure channel. + +Called when promotion of a socket to SSL failed because SSL certificate verification failed. +Domain configuration must also permit insecure channel use for key exchange. Connection +negotiation will start again from the beginning, but with Authentication Mode not set to ssl. + =item INFO Connected to lond version: When connection negotiation is complete, the lond version is requested and logged here.