--- loncom/cgi/clusterstatus.pl 2002/02/18 23:45:23 1.1
+++ loncom/cgi/clusterstatus.pl 2003/08/05 18:47:21 1.17
@@ -2,61 +2,628 @@
$|=1;
# The LearningOnline Network with CAPA
# Cluster Status
-# (Versions
-# (Running loncron
-# 09/06/01 Gerd Kortemeyer)
-# 02/18/02 Gerd Kortemeyer)
#
+# $Id: clusterstatus.pl,v 1.17 2003/08/05 18:47:21 www Exp $
+
+use lib '/home/httpd/lib/perl/';
+use LONCAPA::Configuration;
+use strict;
+
use LWP::UserAgent();
use HTTP::Headers;
use IO::File;
+my %host=();
+my $oneday=60*60*24;
+
+my %connectionstatus=();
+my %perlvar=();
+
+my $mode;
+my $concount=0;
+my $fromcache;
+
+my %domaindescription = ();
+my %domain_auth_def = ();
+my %domain_auth_arg_def = ();
+
+my %hostname=();
+my %hostip=();
+my %hostdom=();
+my %hostrole=();
+my %libserv=();
+
+my $maxusers=0;
+my $maxload=0;
+my $totalusers=0;
+
+my %FORM=();
+
+my $stat_total=0;
+my $stat_notyet=0;
+my $stat_fromcache=0;
+
+sub select_form {
+ my ($def,$name,%hash) = @_;
+ my $selectform = "";
+ return $selectform;
+}
+
+
+sub key {
+ my ($local,$url)=@_;
+ my $key=$local.'_'.$url;
+ $key=~s/\W/\_/gs;
+ return $key;
+}
+
+sub hidden {
+ my ($name,$value)=@_;
+ print "\n";
+}
+
+sub request {
+ my ($local,$url,$cachetime)=@_;
+ $cachetime*=(0.5+rand);
+ my $key=&key($local,$url);
+ my $reply='';
+ $stat_total++;
+# if fromcache flag is set, only return cached values
+ if ($fromcache) {
+ if ($FORM{$key.'_time'}) {
+ return $FORM{$key};
+ $stat_fromcache++;
+ } else {
+ return 'not_yet';
+ $stat_notyet++;
+ }
+ }
+# normal mode, refresh when expired or not yet present
+ if ($FORM{$key.'_time'}) {
+ if ((time-$FORM{$key.'_time'})<$cachetime) {
+ $reply=$FORM{$key};
+ &hidden($key.'_time',$FORM{$key.'_time'});
+ $stat_fromcache++;
+ }
+ }
+ unless ($reply) {
+ unless ($hostname{$local}) {
+ $reply='local_unknown';
+ } else {
+
+ my $ua=new LWP::UserAgent(timeout => 10);
+
+ my $request=new HTTP::Request('GET',
+ "http://".$hostname{$local}.$url);
+ $request->authorization_basic('lonadm','litelite');
+
+ my $response=$ua->request($request);
+
+ unless ($response->is_success) {
+ $reply='local_error';
+ } else {
+ $reply=$response->content;
+ chomp($reply);
+ }
+ }
+ &hidden($key.'_time',time);
+ }
+ &hidden($key,$reply);
+ return $reply;
+}
+
+# ============================================= Are local and remote connected?
sub connected {
my ($local,$remote)=@_;
$local=~s/\W//g;
$remote=~s/\W//g;
- unless ($hostname{$local}) { return 'local_unknown'; }
unless ($hostname{$remote}) { return 'remote_unknown'; }
-
- my $ua=new LWP::UserAgent;
-
- my $request=new HTTP::Request('GET',
- "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote);
-
- my $response=$ua->request($request);
-
- unless ($response->is_success) { return 'local_offline'; }
-
- my $reply=$response->content;
+ my $url='/cgi-bin/ping.pl?'.$remote;
+#
+# Slowly phase this in: if not cached, only do 5 percent of the cases,
+# but always do the first five.
+#
+ unless ($FORM{&key($local,$url)}) {
+ unless (($concount<=5) || (rand>0.95)) {
+ $stat_total++;
+ $stat_notyet++;
+ return 'not_yet';
+ } else {
+ $concount++;
+ }
+ }
+#
+# Actually do the query
+#
+ &statuslist($local,'connecting '.$remote);
+ my $reply=&request($local,$url,3600);
$reply=(split("\n",$reply))[0];
$reply=~s/\W//g;
if ($reply ne $remote) { return $reply; }
return 'ok';
}
+# ============================================================ Get a reply hash
-
-print "Content-type: text/html\n\n".
- "
\n";
-# ------------------------------------------------------------ Read access.conf
-{
- my $config=IO::File->new("/etc/httpd/conf/access.conf");
+sub replyhash {
+ my %returnhash=();
+ foreach (split(/\&/,&request(@_))) {
+ my ($name,$value)=split(/\=/,$_);
+ if ($name) {
+ unless ($value) { $value=''; }
+ $returnhash{$name}=$value;
+ }
+ }
+ return %returnhash;
+}
- while (my $configline=<$config>) {
- if ($configline =~ /PerlSetVar/) {
- my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
- $perlvar{$varname}=$varvalue;
- }
+# ================================================================ Link to host
+
+sub otherwindow {
+ my ($local,$url,$label)=@_;
+ return
+ " $label ";
+}
+
+sub login {
+ my $local=shift;
+ print &otherwindow($local,'/adm/login?domain='.$perlvar{'lonDefDomain'},
+ 'Login');
+}
+
+sub runloncron {
+ my $local=shift;
+ print &otherwindow($local,'/cgi-bin/loncron.pl','Run loncron');
+}
+
+sub loncron {
+ my $local=shift;
+ print &otherwindow($local,'/lon-status','loncron');
+}
+
+sub lonc {
+ my $local=shift;
+ print &otherwindow($local,'/lon-status/loncstatus.txt','lonc');
+}
+
+sub lond {
+ my $local=shift;
+ print &otherwindow($local,'/lon-status/londstatus.txt','lond');
+}
+
+sub users {
+ my $local=shift;
+ print &otherwindow($local,'/cgi-bin/userstatus.pl','Users');
+}
+
+sub versions {
+ my $local=shift;
+ print &otherwindow($local,'/cgi-bin/lonversions.pl','Versions');
+}
+
+sub server {
+ my $local=shift;
+ print &otherwindow($local,'/server-status','Server Status');
+}
+
+# ========================================================= Produce a green bar
+sub bar {
+ my $parm=shift;
+ my $number=int($parm+0.5);
+ print "";
+ for (my $i=0;$i<$number;$i++) {
+ print "+";
+ }
+ print " |
";
+}
+
+# ========================================================== Show server status
+
+sub serverstatus {
+ my ($local,$trouble)=@_;
+ print (<
+
+
+$local $hostdom{$local} ($hostname{$local}; $hostrole{$local})
+ $domaindescription{$hostdom{$local}}
+ |
+ENDHEADER
+ &login($local);&server($local);&users($local);&versions($local);
+ &loncron($local);&lond($local);&lonc($local);&runloncron($local);
+ print " |
";
+ if ($trouble) {
+ print ("$trouble |
");
+ }
+ print "";
+# version
+ if ($host{$local.'_version'}) {
+ print " Version: ".$host{$local.'_version'}
+ }
+# load
+ if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) {
+ print " Load: ".$host{$local.'_load'}
+ }
+# users
+ if (($host{$local.'_users_doomed'}>10) || ($mode eq 'users_doomed')) {
+ print " Active Users: ".$host{$local.'_users'}
+ }
+
+# checkrpms
+ if ($host{$local.'_checkrpms'}) {
+ print " RPMs: ".$host{$local.'_checkrpms'}
+ }
+# mysql
+ if ($host{$local.'_mysql'}) {
+ print " MySQL Database: ".$host{$local.'_mysql'}
+ }
+# connections
+ if ($host{$local.'_notconnected'}) {
+ print " Not connected: ";
+ foreach (split(/ /,$host{$local.'_notconnected'})) {
+ if ($_) {
+ print " $_";
+ }
+ }
+ }
+# errors
+ if ($host{$local.'_errors'}) {
+ print " loncron errors: ".$host{$local.'_errors'};
+ }
+ print " |
";
+}
+
+# =========================================================== Doomedness sorted
+
+sub doomedness {
+ my $crit=shift;
+ my %alldoomed=();
+ my @allhosts=();
+ foreach (keys %host) {
+ if ($_=~/^(\w+)\_$crit$/) {
+ if ($host{$_}) {
+ push (@allhosts,$1);
+ $alldoomed{$1}=$host{$_};
+ }
+ }
+ }
+ return sort { $alldoomed{$b} <=> $alldoomed{$a} } @allhosts;
+}
+
+sub resetvars {
+ $maxusers=0;
+ $maxload=0;
+ $totalusers=0;
+ $stat_total=0;
+ $stat_notyet=0;
+ $stat_fromcache=0;
+ $concount=0;
+ undef %host;
+ %host=();
+}
+
+sub mainloop {
+ &resetvars();
+# ==================================================== Main Loop over all Hosts
+
+foreach my $local (sort keys %hostname) {
+ $host{$local.'_unresponsive_doomed'}=0;
+# -- Check general status
+ &statuslist($local,'General');
+ my %loncron=&replyhash($local,'/lon-status/loncron_simple.txt',1200);
+ if (defined($loncron{'local_error'})) {
+ $host{$local.'_loncron'}='Could not determine.';
+ $host{$local.'_unresponsive_doomed'}++;
+ } else {
+ if ((time-$loncron{'time'})>$oneday) {
+ $host{$local.'_loncron'}='Stale.';
+ $host{$local.'_unresponsive_doomed'}++;
+ } else {
+ $host{$local.'_loncron_doomed'}=$loncron{'notices'}
+ +4*$loncron{'warnings'}
+ +100*$loncron{'errors'};
+ $host{$local.'_errors'}=$loncron{'errors'};
+ }
+ }
+# -- Check version
+ &statuslist($local,'Version');
+ my $version=&request($local,'/lon-status/version.txt',7200);
+ if ($version eq 'local_error') {
+ $host{$local.'_version'}='Could not determine.';
+ $host{$local.'_unresponsive_doomed'}++;
+ } else {
+ $host{$local.'_version'}=$version;
+ }
+# -- Check user status
+ &statuslist($local,'Users');
+ my %userstatus=&replyhash($local,'/cgi-bin/userstatus.pl?simple',600);
+ if (defined($userstatus{'local_error'})) {
+ $host{$local.'_userstatus'}='Could not determine.';
+ $host{$local.'_unresponsive_doomed'}++;
+ } else {
+ $host{$local.'_users_doomed'}=$userstatus{'Active'};
+ $host{$local.'_users'}=$userstatus{'Active'};
+ unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; }
+ if ($host{$local.'_users'}>$maxusers) {
+ $maxusers=$host{$local.'_users'};
+ }
+ $totalusers+=$host{$local.'_users'};
+ my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'});
+ $host{$local.'_load_doomed'}=$mload;
+ if ($mload>$maxload) {
+ $maxload=$mload;
+ }
+ $host{$local.'_load'}=$userstatus{'loadavg'};
+ }
+# -- Check mysql status
+ &statuslist($local,'Database');
+ my %mysql=&replyhash($local,'/lon-status/mysql.txt',3600);
+ if (defined($mysql{'local_error'})) {
+ $host{$local.'_mysql'}='Could not determine.';
+ $host{$local.'_unresponsive_doomed'}++;
+ } else {
+ if ((time-$mysql{'time'})>(7*$oneday)) {
+ if ($hostrole{$local} eq 'library') {
+ $host{$local.'_mysql'}='Stale.';
+ $host{$local.'_mysql_doomed'}=1;
+ }
+ if ($mysql{'mysql'} eq 'defunct') {
+ $host{$local.'_mysql'}='Defunct (maybe stale).';
+ $host{$local.'_mysql_doomed'}=2;
+ }
+ } elsif ($mysql{'mysql'} eq 'defunct') {
+ $host{$local.'_mysql'}='Defunct.';
+ $host{$local.'_mysql_doomed'}=3;
+ }
+ }
+# -- Check rpm status
+ &statuslist($local,'RPMs');
+ my %checkrpms=&replyhash($local,'/lon-status/checkrpms.txt',7200);
+ if (defined($checkrpms{'local_error'})) {
+ $host{$local.'_checkrpms'}='Could not determine.';
+ $host{$local.'_unresponsive_doomed'}++;
+ } else {
+ if ((time-$checkrpms{'time'})>(4*$oneday)) {
+ $host{$local.'_checkrpms'}='Stale.';
+ $host{$local.'_checkrpms_doomed'}=50;
+ $host{$local.'_unresponsive_doomed'}++;
+ } elsif ($checkrpms{'status'} eq 'fail') {
+ $host{$local.'_checkrpms'}='Could not checked RPMs.';
+ $host{$local.'_checkrpms_doomed'}=100;
+ } elsif ($checkrpms{'rpmcount'}) {
+ $host{$local.'_checkrpms'}='Outdated RPMs: '.
+ $checkrpms{'rpmcount'};
+ $host{$local.'_checkrpms_doomed'}=$checkrpms{'rpmcount'};
+ }
+ }
+# -- Check connections
+ &statuslist($local,'Connections');
+ $host{$local.'_notconnected'}='';
+ $host{$local.'_notconnected_doomed'}=0;
+ foreach my $remote (sort keys %hostname) {
+ my $status=&connected($local,$remote);
+ $connectionstatus{$local.'_TO_'.$remote}=$status;
+ unless (($status eq 'ok') || ($status eq 'not_yet')) {
+ $host{$local.'_notconnected'}.=' '.$remote;
+ $host{$local.'_notconnected_doomed'}++;
+ }
}
- delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
- delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
+# =============================================================== End Main Loop
+}
+
+}
+
+sub reports {
+# ====================================================================== Output
+ if ($mode=~/\_doomed$/) {
+# Output by doomedness
+ foreach (&doomedness($mode)) {
+ &serverstatus($_);
+ }
+ } elsif ($mode eq 'connections') {
+ print
+ "".
+ " | ";
+ foreach my $remote (sort keys %hostname) {
+ print ''.$remote.' | ';
+ }
+ print "
\n";
+# connection matrix
+ foreach my $local (sort keys %hostname) {
+ print ''.$local.' | ';
+ foreach my $remote (sort keys %hostname) {
+ if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') {
+ my $cellcolor='#FFFFFF';
+ if ($local eq $remote) { $cellcolor='#DDDDDD'; }
+ print 'not yet tested | ';
+ } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') {
+ my $cellcolor='#BBDDBB';
+ if ($local eq $remote) { $cellcolor='#99DD99'; }
+ print
+'ok | ';
+ } else {
+ my $cellcolor='#DDBBBB';
+ if ($connectionstatus{$local.'_TO_'.$remote} eq 'local_error') {
+ if ($local eq $remote) {
+ $cellcolor='#DD88AA';
+ } else {
+ $cellcolor='#DDAACC';
+ }
+ } else {
+ if ($local eq $remote) { $cellcolor='#DD9999'; }
+ }
+ print
+ ''.
+ $connectionstatus{$local.'_TO_'.$remote}.' ';
+ &lonc($local); &lond($remote);
+ print ' | ';
+ }
+ }
+ print "
\n";
+ }
+ print "
";
+ } elsif ($mode eq 'users') {
+# Users
+ if ($maxusers) {
+ my $factor=50/$maxusers;
+ print "Total active user(s): $totalusers
".
+ "";
+
+ foreach my $local (sort keys %hostname) {
+ if (defined($host{$local.'_users'})) {
+ print
+''.$local.
+ ' '.
+ $domaindescription{$hostdom{$local}}.
+ ' | ';
+ &users($local);
+ print
+ ' | '.
+ $host{$local.'_users'}.' | |
\n";
+ }
+ }
+ print "
";
+ } else {
+ print "No active users logged in.";
+ }
+ } elsif ($mode eq 'load') {
+# Load
+ if ($maxload) {
+ my $factor=50/$maxload;
+ print
+ "";
+ foreach my $local (sort keys %hostname) {
+ if (defined($host{$local.'_load_doomed'})) {
+ print
+''.
+ $local.
+ ' '.
+ $domaindescription{$hostdom{$local}}.
+ ' | ';
+ &server($local);
+ print
+ ' | '.
+ $host{$local.'_load_doomed'}.' | |
\n";
+ }
+ }
+ print "
";
+ } else {
+ print "No workload.";
+ }
+ } elsif ($mode eq 'trouble') {
+ my $count=0;
+ foreach my $local (sort keys %hostname) {
+ my $trouble='';
+ if ($host{$local.'_unresponsive_doomed'}>3) {
+ $trouble='Does not respond to several queries.
';
+ }
+ if ($host{$local.'_errors'}) {
+ $trouble='Has loncron errors.
';
+ } elsif ($host{$local.'_loncron_doomed'}>600) {
+ $trouble='High loncron count.
';
+ }
+ if ($host{$local.'_load_doomed'}>5) {
+ $trouble='High load.
';
+ }
+ if ($host{$local.'_users_doomed'}>200) {
+ $trouble='High user volume.
';
+ }
+ if ($host{$local.'_mysql_doomed'}>1) {
+ $trouble='MySQL database apparently offline.
';
+ }
+ if ($host{$local.'_checkrpms_doomed'}>100) {
+ $trouble='RPMs outdated.
';
+ }
+ if ($trouble) { $count++; &serverstatus($local,$trouble); }
+ }
+ unless ($count) { print "No mayor trouble."; }
+ }
+}
+
+# ====================================================================== Status
+sub statuslist {
+ my ($local,$what)=@_;
+ print
+"\n";
}
+# =============================================================================
+# =============================================================================
+# Main program
+#
+# ========================================================= Get form parameters
+my $buffer;
+
+read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
+my @pairs=split(/&/,$buffer);
+my $pair; my $name; my $value;
+undef %FORM;
+%FORM=();
+foreach $pair (@pairs) {
+ ($name,$value) = split(/=/,$pair);
+ $value =~ tr/+/ /;
+ $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
+ $FORM{$name}=$value;
+}
+
+$buffer=$ENV{'QUERY_STRING'};
+@pairs=split(/&/,$buffer);
+foreach $pair (@pairs) {
+ ($name,$value) = split(/=/,$pair);
+ $value =~ tr/+/ /;
+ $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
+ $FORM{$name}=$value;
+}
+
+# ====================================================== Determine refresh rate
+
+my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:30);
+if ($refresh<30) { $refresh=30; }
+my $starttime=time;
+
+# ============================================================== Determine mode
+
+my %modes=('trouble' => 'Trouble',
+ 'users_doomed' => 'Doomed: Users',
+ 'loncron_doomed' => 'Doomed: General (loncron)',
+ 'mysql_doomed' => 'Doomed: Database (mysql)',
+ 'notconnected_doomed' => 'Doomed: Connections',
+ 'checkrpms_doomed' => 'Doomed: RPMs',
+ 'load_doomed' => 'Doomed: Load',
+ 'unresponsive_doomed' => 'Doomed: Status could not be determined',
+ 'users' => 'User Report',
+ 'load' => 'Load Report',
+ 'connections' => 'Connections Matrix');
+
+$mode=$FORM{'mode'};
+unless ($modes{$mode}) { $mode='trouble'; }
+# ================================================================ Send Headers
+print "Content-type: text/html\n\n".
+ "\n";
+# -------------------- Read loncapa.conf (and by default, loncapa_apache.conf).
+my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
+%perlvar=%{$perlvarref};
+undef $perlvarref; # remove since sensitive and not needed
+delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
+delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
+
# ------------------------------------------------------------- Read hosts file
{
my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
while (my $configline=<$config>) {
+ $configline=~s/#.*$//;
+ unless ($configline=~/\w/) { next; }
my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
$hostname{$id}=$name;
$hostdom{$id}=$domain;
@@ -67,8 +634,61 @@ print "Content-type: text/html\n\n".
}
}
}
+# ------------------------------------------------------------ Read domain file
+{
+ my $fh=IO::File->new($perlvar{'lonTabDir'}.'/domain.tab');
+ if ($fh) {
+ while (<$fh>) {
+ next if (/^(\#|\s*$)/);
+ chomp;
+ my ($domain, $domain_description, $def_auth, $def_auth_arg)
+ = split(/:/,$_,4);
+ $domain_auth_def{$domain}=$def_auth;
+ $domain_auth_arg_def{$domain}=$def_auth_arg;
+ $domaindescription{$domain}=$domain_description;
+ }
+ }
+}
-print "Cluster Status
\n";
-print &connected('gerdl1','msul1');
-
-print "";
+print "LON-CAPA Cluster Status ".localtime()."
";
+print "\n";;
+print "";
+exit 0;