--- loncom/cgi/clusterstatus.pl 2003/07/31 16:07:47 1.10 +++ loncom/cgi/clusterstatus.pl 2003/08/01 19:20:26 1.14 @@ -3,7 +3,7 @@ $|=1; # The LearningOnline Network with CAPA # Cluster Status # -# $Id: clusterstatus.pl,v 1.10 2003/07/31 16:07:47 www Exp $ +# $Id: clusterstatus.pl,v 1.14 2003/08/01 19:20:26 www Exp $ use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; @@ -19,6 +19,7 @@ my %connectionstatus=(); my %perlvar=(); my $mode; +my $concount=0; sub select_form { my ($def,$name,%hash) = @_; @@ -47,6 +48,7 @@ sub hidden { sub request { my ($local,$url,$cachetime)=@_; + $cachetime*=(0.5+rand); my $key=&key($local,$url); my $reply=''; if ($FORM{$key.'_time'}) { @@ -61,7 +63,7 @@ sub request { $reply='local_unknown'; } else { - my $ua=new LWP::UserAgent(timeout => 20); + my $ua=new LWP::UserAgent(timeout => 10); my $request=new HTTP::Request('GET', "http://".$hostname{$local}.$url); @@ -91,10 +93,15 @@ sub connected { unless ($hostname{$remote}) { return 'remote_unknown'; } my $url='/cgi-bin/ping.pl?'.$remote; # -# Slowly phase this in: if not cached, only do 10 percent of the cases +# Slowly phase this in: if not cached, only do 5 percent of the cases, +# but always do the first five. # unless ($FORM{&key($local,$url)}) { - unless (rand>0.9) { return 'not_yet'; } + unless (($concount<=5) || (rand>0.95)) { + return 'not_yet'; + } else { + $concount++; + } } # # Actually do the query @@ -169,11 +176,23 @@ sub server { print &otherwindow($local,'/server-status','Server Status'); } +# ========================================================= Produce a green bar +sub bar { + my $parm=shift; + my $number=int($parm+0.5); + print "
"; + for (my $i=0;$i<$number;$i++) { + print "+"; + } + print "
"; +} + # ========================================================== Show server status sub serverstatus { - my $local=shift; + my ($local,$trouble)=@_; print (< "; + if ($trouble) { + print (""); + } + print "
$local $hostdom{$local} ($hostname{$local}; $hostrole{$local}) @@ -182,7 +201,11 @@ $local $hostdom{$local} ($hostna ENDHEADER &login($local);&server($local);&users($local);&versions($local); &loncron($local);&lond($local);&lonc($local);&runloncron($local); - print "
"; + print "
$trouble
"; # load if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) { print "
Load: ".$host{$local.'_load'} @@ -200,6 +223,19 @@ ENDHEADER if ($host{$local.'_mysql'}) { print "
MySQL Database: ".$host{$local.'_mysql'} } +# connections + if ($host{$local.'_notconnected'}) { + print "
Not connected: "; + foreach (split(/ /,$host{$local.'_notconnected'})) { + if ($_) { + print " $_"; + } + } + } +# errors + if ($host{$local.'_errors'}) { + print "
loncron errors: ".$host{$local.'_errors'}; + } print "

"; } @@ -256,7 +292,7 @@ foreach $pair (@pairs) { # ====================================================== Determine refresh rate -my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:60); +my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:120); if ($refresh<30) { $refresh=30; } my $starttime=time; @@ -271,6 +307,7 @@ my %modes=('trouble' => 'Trouble', 'load_doomed' => 'Doomed: Load', 'unresponsive_doomed' => 'Doomed: Status could not be determined', 'users' => 'User Report', + 'load' => 'Load Report', 'connections' => 'Connections Matrix'); $mode=$FORM{'mode'}; @@ -323,7 +360,6 @@ delete $perlvar{'lonSqlAccess'}; # remov } } - print "

LON-CAPA Cluster Status ".localtime()."

"; print "
\n". "
". @@ -334,6 +370,10 @@ print 'Choose next report: '.&select_for # ==================================================== Main Loop over all Hosts +my $maxusers=0; +my $maxload=0; +my $totalusers=0; + foreach $local (sort keys %hostname) { $host{$local.'_unresponsive_doomed'}=0; # -- Check general status @@ -347,6 +387,10 @@ foreach $local (sort keys %hostname) { $host{$local.'_loncron'}='Stale.'; $host{$local.'_unresponsive_doomed'}++; } else { + $host{$local.'_loncron_doomed'}=$loncron{'notices'} + +4*$loncron{'warnings'} + +100*$loncron{'errors'}; + $host{$local.'_errors'}=$loncron{'errors'}; } } # -- Check user status @@ -358,8 +402,16 @@ foreach $local (sort keys %hostname) { } else { $host{$local.'_users_doomed'}=$userstatus{'Active'}; $host{$local.'_users'}=$userstatus{'Active'}; + unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; } + if ($host{$local.'_users'}>$maxusers) { + $maxusers=$host{$local.'_users'}; + } + $totalusers+=$host{$local.'_users'}; my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'}); $host{$local.'_load_doomed'}=$mload; + if ($mload>$maxload) { + $maxload=$mload; + } $host{$local.'_load'}=$userstatus{'loadavg'}; } # -- Check mysql status @@ -428,25 +480,36 @@ foreach $local (sort keys %hostname) { print "". ""; - foreach (sort keys %hostname) { - my $remote=$_; + foreach my $remote (sort keys %hostname) { print ''; } print "\n"; # connection matrix - foreach (sort keys %hostname) { - my $local=$_; + foreach my $local (sort keys %hostname) { print ''; - foreach (sort keys %hostname) { - my $remote=$_; + foreach my $remote (sort keys %hostname) { if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') { - print ''; + my $cellcolor='#FFFFFF'; + if ($local eq $remote) { $cellcolor='#DDDDDD'; } + print ''; } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') { + my $cellcolor='#BBDDBB'; + if ($local eq $remote) { $cellcolor='#99DD99'; } print -''; +''; } else { + my $cellcolor='#DDBBBB'; + if ($connectionstatus{$local.'_TO_'.$remote} eq 'local_error') { + if ($local eq $remote) { + $cellcolor='#DD88AA'; + } else { + $cellcolor='#DDAACC'; + } + } else { + if ($local eq $remote) { $cellcolor='#DD9999'; } + } print - ''; @@ -454,16 +517,90 @@ foreach $local (sort keys %hostname) { } print "\n"; } + print "
 '.$remote.'
'.$local.' not yet testedokok'. + ''. $connectionstatus{$local.'_TO_'.$remote}.'
'; &lonc($local); &lond($remote); print '
"; + } elsif ($mode eq 'users') { +# Users + if ($maxusers) { + my $factor=50/$maxusers; + print "

Total active user(s): $totalusers

". + ""; + + foreach $local (sort keys %hostname) { + if (defined($host{$local.'_users'})) { + print +'\n"; + } + } + print "
'.$local. + ''; + &users($local); + print + ''. + $host{$local.'_users'}.'
"; + } else { + print "No active users logged in."; + } + } elsif ($mode eq 'load') { +# Load + if ($maxload) { + my $factor=50/$maxload; + print + ""; + foreach $local (sort keys %hostname) { + if (defined($host{$local.'_load_doomed'})) { + print +'\n"; + } + } + print "
'. + $local. + ''; + &server($local); + print + ''. + $host{$local.'_load_doomed'}.'
"; + } else { + print "No workload."; + } + } elsif ($mode eq 'trouble') { + my $count=0; + foreach $local (sort keys %hostname) { + my $trouble=''; + if ($host{$local.'_errors'}) { + $trouble='Has loncron errors.
'; + } elsif ($host{$local.'_loncron_doomed'}>600) { + $trouble='High loncron count.
'; + } + if ($host{$local.'_load_doomed'}>5) { + $trouble='High load.
'; + } + if ($host{$local.'_users_doomed'}>200) { + $trouble='High user volume.
'; + } + if ($host{$local.'_mysql_doomed'}>1) { + $trouble='MySQL database apparently offline.
'; + } + if ($host{$local.'_checkrpms_doomed'}>100) { + $trouble='RPMs outdated.
'; + } + if ($trouble) { $count++; &serverstatus($local,$trouble); } + } + unless ($count) { print "No mayor trouble."; } } - print ""; # ============================================================== Close, refresh print "
"; exit 0;