--- loncom/lonnet/perl/lonnet.pm	2016/08/16 17:43:36	1.1172.2.78
+++ loncom/lonnet/perl/lonnet.pm	2016/09/17 19:23:42	1.1172.2.80
@@ -1,7 +1,7 @@
 # The LearningOnline Network
 # TCP networking package
 #
-# $Id: lonnet.pm,v 1.1172.2.78 2016/08/16 17:43:36 raeburn Exp $
+# $Id: lonnet.pm,v 1.1172.2.80 2016/09/17 19:23:42 raeburn Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -89,7 +89,7 @@ use GDBM_File;
 use HTML::LCParser;
 use Fcntl qw(:flock);
 use Storable qw(thaw nfreeze);
-use Time::HiRes qw( gettimeofday tv_interval );
+use Time::HiRes qw( sleep gettimeofday tv_interval );
 use Cache::Memcached;
 use Digest::MD5;
 use Math::Random;
@@ -102,7 +102,7 @@ use LONCAPA::Lond;
 use File::Copy;
 
 my $readit;
-my $max_connection_retries = 10;     # Or some such value.
+my $max_connection_retries = 20;     # Or some such value.
 
 require Exporter;
 
@@ -370,7 +370,7 @@ sub subreply {
 
     my $lockfile=$peerfile.".lock";
     while (-e $lockfile) {	# Need to wait for the lockfile to disappear.
-	sleep(1);
+	sleep(0.1);
     }
     # At this point, either a loncnew parent is listening or an old lonc
     # or loncnew child is listening so we can connect or everything's dead.
@@ -388,7 +388,7 @@ sub subreply {
 	} else {
 	    &create_connection(&hostname($server),$server);
 	}
-        sleep(1);		# Try again later if failed connection.
+        sleep(0.1);		# Try again later if failed connection.
     }
     my $answer;
     if ($client) {
@@ -481,7 +481,7 @@ sub critical {
 		    close($dfh);
 		}
             }
-            sleep 2;
+            sleep 1;
             my $wcmd='';
             {
 		my $dfh;
@@ -7755,10 +7755,12 @@ sub update_allusers_table {
 
 sub fetch_enrollment_query {
     my ($context,$affiliatesref,$replyref,$dom,$cnum) = @_;
-    my $homeserver;
+    my ($homeserver,$sleep,$loopmax);
     my $maxtries = 1;
     if ($context eq 'automated') {
         $homeserver = $perlvar{'lonHostID'};
+        $sleep = 2;
+        $loopmax = 100;
         $maxtries = 10; # will wait for up to 2000s for retrieval of classlist data before timeout
     } else {
         $homeserver = &homeserver($cnum,$dom);
@@ -7776,10 +7778,10 @@ sub fetch_enrollment_query {
         &logthis('fetch_enrollment_query: invalid queryid: '.$queryid.' for host: '.$host.' and homeserver: '.$homeserver.' context: '.$context.' '.$cnum); 
         return 'error: '.$queryid;
     }
-    my $reply = &get_query_reply($queryid);
+    my $reply = &get_query_reply($queryid,$sleep.$loopmax);
     my $tries = 1;
     while (($reply=~/^timeout/) && ($tries < $maxtries)) {
-        $reply = &get_query_reply($queryid);
+        $reply = &get_query_reply($queryid,$sleep,$loopmax);
         $tries ++;
     }
     if ( ($reply =~/^timeout/) || ($reply =~/^error/) ) {
@@ -7821,11 +7823,17 @@ sub fetch_enrollment_query {
 }
 
 sub get_query_reply {
-    my $queryid=shift;
+    my ($queryid,$sleep,$loopmax) = @_;
+    if (($sleep eq '') || ($sleep !~ /^\d+\.?\d*$/)) {
+        $sleep = 0.2;
+    }
+    if (($loopmax eq '') || ($loopmax =~ /\D/)) {
+        $loopmax = 100;
+    }
     my $replyfile=LONCAPA::tempdir().$queryid;
     my $reply='';
-    for (1..100) {
-	sleep 2;
+    for (1..$loopmax) {
+	sleep($sleep);
         if (-e $replyfile.'.end') {
 	    if (open(my $fh,$replyfile)) {
 		$reply = join('',<$fh>);
@@ -9610,9 +9618,9 @@ sub modify_access_controls {
     my $tries = 0;
     my $gotlock = &newput('file_permissions',$lockhash,$domain,$user);
    
-    while (($gotlock ne 'ok') && $tries <3) {
+    while (($gotlock ne 'ok') && $tries < 10) {
         $tries ++;
-        sleep 1;
+        sleep(0.1);
         $gotlock = &newput('file_permissions',$lockhash,$domain,$user);
     }
     if ($gotlock eq 'ok') {
@@ -9905,7 +9913,23 @@ sub dirlist {
             foreach my $user (sort(keys(%allusers))) {
                 push(@alluserslist,$user.'&user');
             }
-            return (\@alluserslist);
+            if (!%listerror) {
+                # no errors
+                return (\@alluserslist);
+            } elsif (scalar(keys(%servers)) == 1) {
+                # one library server, one error
+                my ($key) = keys(%listerror);
+                return (\@alluserslist, $listerror{$key});
+            } elsif ( grep { $_ eq 'con_lost' } values(%listerror) ) {
+                # con_lost indicates that we might miss data from at least one
+                # library server
+                return (\@alluserslist, 'con_lost');
+            } else {
+                # multiple library servers and no con_lost -> data should be
+                # complete.
+                return (\@alluserslist);
+            }
+
         } else {
             return ([],'missing username');
         }