--- loncom/lonnet/perl/lonnet.pm 2004/09/21 22:38:10 1.545 +++ loncom/lonnet/perl/lonnet.pm 2004/10/06 09:48:39 1.550 @@ -1,7 +1,7 @@ # The LearningOnline Network # TCP networking package # -# $Id: lonnet.pm,v 1.545 2004/09/21 22:38:10 banghart Exp $ +# $Id: lonnet.pm,v 1.550 2004/10/06 09:48:39 foxr Exp $ # # Copyright Michigan State University Board of Trustees # @@ -52,6 +52,7 @@ use Apache::lonlocal; use Storable qw(lock_store lock_nstore lock_retrieve freeze thaw); use Time::HiRes qw( gettimeofday tv_interval ); my $readit; +my $max_connection_retries = 10; # Or some such value. =pod @@ -116,14 +117,40 @@ sub logperm { sub subreply { my ($cmd,$server)=@_; my $peerfile="$perlvar{'lonSockDir'}/$server"; - my $client=IO::Socket::UNIX->new(Peer =>"$peerfile", - Type => SOCK_STREAM, - Timeout => 10) - or return "con_lost"; - print $client "$cmd\n"; - my $answer=<$client>; - if (!$answer) { $answer="con_lost"; } - chomp($answer); + # + # With loncnew process trimming, there's a timing hole between lonc server + # process exit and the master server picking up the listen on the AF_UNIX + # socket. In that time interval, a lock file will exist: + + my $lockfile=$peerfile.".lock"; + while (-e $lockfile) { # Need to wait for the lockfile to disappear. + sleep(1); + } + # At this point, either a loncnew parent is listening or an old lonc + # or loncnew child is listening so we can connect or everything's dead. + # + # We'll give the connection a few tries before abandoning it. If + # connection is not possible, we'll con_lost back to the client. + # + my $client; + for (my $retries = 0; $retries < $max_connection_retries; $retries++) { + $client=IO::Socket::UNIX->new(Peer =>"$peerfile", + Type => SOCK_STREAM, + Timeout => 10); + if($client) { + last; # Connected! + } + sleep(1); # Try again later if failed connection. + } + my $answer; + if ($client) { + print $client "$cmd\n"; + $answer=<$client>; + if (!$answer) { $answer="con_lost"; } + chomp($answer); + } else { + $answer = 'con_lost'; # Failed connection. + } return $answer; } @@ -795,11 +822,11 @@ sub getsection { if ($key eq $courseid.'_st') { $section=''; } my ($dummy,$end,$start)=split(/\_/,&unescape($value)); my $now=time; - if (defined($end) && ($now > $end)) { + if (defined($end) && $end && ($now > $end)) { $Expired{$end}=$section; next; } - if (defined($start) && ($now < $start)) { + if (defined($start) && $start && ($now < $start)) { $Pending{$start}=$section; next; } @@ -2785,8 +2812,9 @@ sub allowed { } # Free bre access to user's own portfolio contents - $uri=~m:([^/]+)/([^/]+)/([^/]+)/([^/]+)/:; - if (('uploaded' eq $1)&&($ENV{'user.name'} eq $3) && ($ENV{'user.domain'} eq $2) && ('portfolio' eq $4)) { + my ($space,$domain,$name,$dir)=split('/',$uri); + if (('uploaded' eq $space) && ($ENV{'user.name'} eq $name) && + ($ENV{'user.domain'} eq $domain) && ('portfolio' eq $dir)) { return 'F'; } @@ -3191,8 +3219,10 @@ sub log_query { sub fetch_enrollment_query { my ($context,$affiliatesref,$replyref,$dom,$cnum) = @_; my $homeserver; + my $maxtries = 1; if ($context eq 'automated') { $homeserver = $perlvar{'lonHostID'}; + $maxtries = 10; # will wait for up to 2000s for retrieval of classlist data before timeout } else { $homeserver = &homeserver($cnum,$dom); } @@ -3210,8 +3240,13 @@ sub fetch_enrollment_query { return 'error: '.$queryid; } my $reply = &get_query_reply($queryid); + my $tries = 1; + while (($reply=~/^timeout/) && ($tries < $maxtries)) { + $reply = &get_query_reply($queryid); + $tries ++; + } if ( ($reply =~/^timeout/) || ($reply =~/^error/) ) { - &logthis('fetch_enrollment_query error: '.$reply.' for '.$dom.' '.$ENV{'user.name'}.' for '.$queryid.' context: '.$context.' '.$cnum); + &logthis('fetch_enrollment_query error: '.$reply.' for '.$dom.' '.$ENV{'user.name'}.' for '.$queryid.' context: '.$context.' '.$cnum.' maxtries: '.$maxtries.' tries: '.$tries); } else { my @responses = split/:/,$reply; if ($homeserver eq $perlvar{'lonHostID'}) {