Annotation of loncom/cgi/clusterstatus.pl, revision 1.11
1.1 www 1: #!/usr/bin/perl
2: $|=1;
3: # The LearningOnline Network with CAPA
4: # Cluster Status
1.9 www 5: #
1.11 ! www 6: # $Id: clusterstatus.pl,v 1.10 2003/07/31 16:07:47 www Exp $
1.3 harris41 7:
8: use lib '/home/httpd/lib/perl/';
9: use LONCAPA::Configuration;
10:
1.1 www 11: use LWP::UserAgent();
12: use HTTP::Headers;
13: use IO::File;
14:
1.8 www 15: my %host=();
16: my $oneday=60*60*24;
17:
18: my %connectionstatus=();
1.9 www 19: my %perlvar=();
20:
21: my $mode;
22:
23: sub select_form {
24: my ($def,$name,%hash) = @_;
25: my $selectform = "<select name=\"$name\" size=\"1\">\n";
26: foreach (sort keys %hash) {
27: $selectform.="<option value=\"$_\" ".
28: ($_ eq $def ? 'selected' : '').
29: ">".$hash{$_}."</option>\n";
30: }
31: $selectform.="</select>";
32: return $selectform;
33: }
34:
1.8 www 35:
36: sub key {
37: my ($local,$url)=@_;
38: my $key=$local.'_'.$url;
39: $key=~s/\W/\_/gs;
40: return $key;
41: }
42:
43: sub hidden {
44: my ($name,$value)=@_;
45: print "\n<input type='hidden' name='$name' value='$value' />";
46: }
47:
48: sub request {
49: my ($local,$url,$cachetime)=@_;
50: my $key=&key($local,$url);
51: my $reply='';
52: if ($FORM{$key.'_time'}) {
53: if ((time-$FORM{$key.'_time'})<$cachetime) {
54: $reply=$FORM{$key};
55: &hidden($key.'_time',$FORM{$key.'_time'});
56: &hidden($key.'_fromcache',1);
57: }
58: }
59: unless ($reply) {
60: unless ($hostname{$local}) {
61: $reply='local_unknown';
62: } else {
63:
64: my $ua=new LWP::UserAgent(timeout => 20);
65:
66: my $request=new HTTP::Request('GET',
67: "http://".$hostname{$local}.$url);
68: $request->authorization_basic('lonadm','litelite');
69:
70: my $response=$ua->request($request);
71:
72: unless ($response->is_success) {
73: $reply='local_error';
74: } else {
75: $reply=$response->content;
76: chomp($reply);
77: }
78: }
79: &hidden($key.'_time',time);
80: }
81: &hidden($key,$reply);
82: return $reply;
83: }
84:
85: # ============================================= Are local and remote connected?
1.1 www 86: sub connected {
87: my ($local,$remote)=@_;
88: $local=~s/\W//g;
89: $remote=~s/\W//g;
90:
91: unless ($hostname{$remote}) { return 'remote_unknown'; }
1.8 www 92: my $url='/cgi-bin/ping.pl?'.$remote;
93: #
94: # Slowly phase this in: if not cached, only do 10 percent of the cases
95: #
96: unless ($FORM{&key($local,$url)}) {
97: unless (rand>0.9) { return 'not_yet'; }
98: }
99: #
100: # Actually do the query
101: #
102: &statuslist($local,'connecting '.$remote);
1.9 www 103: my $reply=&request($local,$url,3600);
1.8 www 104: $reply=(split("\n",$reply))[0];
105: $reply=~s/\W//g;
106: if ($reply ne $remote) { return $reply; }
107: return 'ok';
108: }
109: # ============================================================ Get a reply hash
110:
111: sub replyhash {
112: my %returnhash=();
113: foreach (split(/\&/,&request(@_))) {
114: my ($name,$value)=split(/\=/,$_);
115: if ($name) {
116: unless ($value) { $value=''; }
117: $returnhash{$name}=$value;
118: }
119: }
120: return %returnhash;
121: }
1.1 www 122:
1.9 www 123: # ================================================================ Link to host
1.1 www 124:
1.8 www 125: sub otherwindow {
126: my ($local,$url,$label)=@_;
127: return
1.9 www 128: " <a href='http://$hostname{$local}$url' target='newwin$local'>$label</a> ";
129: }
130:
131: sub login {
132: my $local=shift;
133: print &otherwindow($local,'/adm/login?domain='.$perlvar{'lonDefDomain'},
134: 'Login');
135: }
136:
137: sub runloncron {
138: my $local=shift;
139: print &otherwindow($local,'/cgi-bin/loncron.pl','Run loncron');
140: }
141:
142: sub loncron {
143: my $local=shift;
144: print &otherwindow($local,'/lon-status','loncron');
145: }
146:
147: sub lonc {
148: my $local=shift;
149: print &otherwindow($local,'/lon-status/loncstatus.txt','lonc');
150: }
151:
152: sub lond {
153: my $local=shift;
154: print &otherwindow($local,'/lon-status/londstatus.txt','lond');
155: }
156:
157: sub users {
158: my $local=shift;
159: print &otherwindow($local,'/cgi-bin/userstatus.pl','Users');
160: }
161:
162: sub versions {
163: my $local=shift;
164: print &otherwindow($local,'/cgi-bin/lonversions.pl','Versions');
165: }
166:
167: sub server {
168: my $local=shift;
169: print &otherwindow($local,'/server-status','Server Status');
1.8 www 170: }
1.1 www 171:
1.11 ! www 172: # ========================================================= Produce a green bar
! 173: sub bar {
! 174: my $parm=shift;
! 175: my $number=int($parm+0.5);
! 176: print "<table><tr><td bgcolor='#225522'><font color='#225522'>";
! 177: for (my $i=0;$i<$number;$i++) {
! 178: print "+";
! 179: }
! 180: print "</font></table>";
! 181: }
! 182:
1.9 www 183: # ========================================================== Show server status
184:
1.8 www 185: sub serverstatus {
1.11 ! www 186: my ($local,$trouble)=@_;
1.9 www 187: print (<<ENDHEADER);
1.11 ! www 188: <a name="$local" />
1.9 www 189: <table width="100%" bgcolor="#225522" cellspacing="2" cellpadding="2" border="0">
190: <tr><td bgcolor="#BBDDBB"><font color="#225522" face="arial"><b>
191: $local $hostdom{$local}</b> <tt>($hostname{$local}; $hostrole{$local})</tt>
192: <br />$domaindescription{$hostdom{$local}}
1.10 www 193: </font></th></tr><tr><td bgcolor="#DDDDBB"><font color="#225522">
1.9 www 194: ENDHEADER
195: &login($local);&server($local);&users($local);&versions($local);
196: &loncron($local);&lond($local);&lonc($local);&runloncron($local);
1.11 ! www 197: print "</font></td></tr>";
! 198: if ($trouble) {
! 199: print ("<tr><td bgcolor='#DDBBBB'><font color='#552222' size='+2'>$trouble</font></td></tr>");
! 200: }
! 201: print "<tr><td bgcolor='#BBBBBB'>";
1.9 www 202: # load
203: if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) {
204: print "<br />Load: ".$host{$local.'_load'}
205: }
206: # users
207: if (($host{$local.'_users_doomed'}>10) || ($mode eq 'users_doomed')) {
208: print "<br />Active Users: ".$host{$local.'_users'}
209: }
210:
1.8 www 211: # checkrpms
212: if ($host{$local.'_checkrpms'}) {
213: print "<br />RPMs: ".$host{$local.'_checkrpms'}
214: }
215: # mysql
216: if ($host{$local.'_mysql'}) {
217: print "<br />MySQL Database: ".$host{$local.'_mysql'}
218: }
1.11 ! www 219: # connections
! 220: if ($host{$local.'_notconnected'}) {
! 221: print "<br />Not connected: ";
! 222: foreach (split(/ /,$host{$local.'_notconnected'})) {
! 223: if ($_) {
! 224: print " <a href='#$_'>$_</a>";
! 225: }
! 226: }
! 227: }
! 228: # errors
! 229: if ($host{$local.'_errors'}) {
! 230: print "<br />loncron errors: ".$host{$local.'_errors'};
! 231: }
1.9 www 232: print "</td></tr></table><br />";
233: }
234:
235: # =========================================================== Doomedness sorted
236:
237: sub doomedness {
238: my $crit=shift;
239: my %alldoomed=();
240: my @allhosts=();
241: foreach (keys %host) {
242: if ($_=~/^(\w+)\_$crit$/) {
243: if ($host{$_}) {
244: push (@allhosts,$1);
245: $alldoomed{$1}=$host{$_};
246: }
247: }
248: }
249: return sort { $alldoomed{$b} <=> $alldoomed{$a} } @allhosts;
1.8 www 250: }
1.1 www 251:
1.8 www 252: # ====================================================================== Status
253: sub statuslist {
254: my ($local,$what)=@_;
255: print
256: "<script>document.prgstat.progress.value='Testing $local ($hostname{$local}): $what';</script>\n";
1.1 www 257: }
258:
1.8 www 259: #
260: # Main program
261: #
262: # ========================================================= Get form parameters
263: my $buffer;
264:
265: read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
266: my @pairs=split(/&/,$buffer);
267: my $pair; my $name; my $value;
268: undef %FORM;
269: %FORM=();
270: foreach $pair (@pairs) {
271: ($name,$value) = split(/=/,$pair);
272: $value =~ tr/+/ /;
273: $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
274: $FORM{$name}=$value;
275: }
276:
277: $buffer=$ENV{'QUERY_STRING'};
278: @pairs=split(/&/,$buffer);
279: foreach $pair (@pairs) {
280: ($name,$value) = split(/=/,$pair);
281: $value =~ tr/+/ /;
282: $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
283: $FORM{$name}=$value;
284: }
285:
286: # ====================================================== Determine refresh rate
287:
1.11 ! www 288: my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:120);
1.8 www 289: if ($refresh<30) { $refresh=30; }
290: my $starttime=time;
1.9 www 291:
292: # ============================================================== Determine mode
293:
294: my %modes=('trouble' => 'Trouble',
295: 'users_doomed' => 'Doomed: Users',
296: 'loncron_doomed' => 'Doomed: General (loncron)',
297: 'mysql_doomed' => 'Doomed: Database (mysql)',
298: 'notconnected_doomed' => 'Doomed: Connections',
299: 'checkrpms_doomed' => 'Doomed: RPMs',
300: 'load_doomed' => 'Doomed: Load',
301: 'unresponsive_doomed' => 'Doomed: Status could not be determined',
302: 'users' => 'User Report',
1.11 ! www 303: 'load' => 'Load Report',
1.9 www 304: 'connections' => 'Connections Matrix');
305:
306: $mode=$FORM{'mode'};
307: unless ($modes{$mode}) { $mode='trouble'; }
1.8 www 308: # ================================================================ Send Headers
1.1 www 309: print "Content-type: text/html\n\n".
1.10 www 310: "<html><body bgcolor='#FFFFFF'>\n";
1.4 harris41 311: # -------------------- Read loncapa.conf (and by default, loncapa_apache.conf).
312: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
1.9 www 313: %perlvar=%{$perlvarref};
1.3 harris41 314: undef $perlvarref; # remove since sensitive and not needed
315: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
316: delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
1.1 www 317:
318: # ------------------------------------------------------------- Read hosts file
319: {
320: my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
321:
1.2 www 322: $total=0;
1.1 www 323: while (my $configline=<$config>) {
1.7 www 324: $configline=~s/#.*$//;
325: unless ($configline=~/\w/) { next; }
1.1 www 326: my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
327: $hostname{$id}=$name;
328: $hostdom{$id}=$domain;
329: $hostrole{$id}=$role;
330: $hostip{$id}=$ip;
1.2 www 331: $total++;
1.1 www 332: if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
333: $libserv{$id}=$name;
334: }
335: }
336: }
1.9 www 337: # ------------------------------------------------------------ Read domain file
338: {
339: my $fh=IO::File->new($perlvar{'lonTabDir'}.'/domain.tab');
340: %domaindescription = ();
341: %domain_auth_def = ();
342: %domain_auth_arg_def = ();
343: if ($fh) {
344: while (<$fh>) {
345: next if (/^(\#|\s*$)/);
346: chomp;
347: my ($domain, $domain_description, $def_auth, $def_auth_arg)
348: = split(/:/,$_,4);
349: $domain_auth_def{$domain}=$def_auth;
350: $domain_auth_arg_def{$domain}=$def_auth_arg;
351: $domaindescription{$domain}=$domain_description;
352: }
353: }
354: }
355:
1.10 www 356: print "<img src='/adm/lonIcons/lonlogos.gif' align='right' /><h1>LON-CAPA Cluster Status ".localtime()."</h1>";
1.8 www 357: print "<form name='prgstat'>\n".
358: "<input type='text' name='progress' value='Starting ...' size='100' /><br />".
359: "</form>\n";;
360: print "<form name='status' method='post'>\n";
1.9 www 361: print 'Choose next report: '.&select_form($mode,'mode',%modes).'<hr />';
1.8 www 362: &hidden('refresh',$refresh);
363:
364: # ==================================================== Main Loop over all Hosts
365:
1.11 ! www 366: my $maxusers=0;
! 367: my $maxload=0;
! 368: my $totalusers=0;
! 369:
1.8 www 370: foreach $local (sort keys %hostname) {
1.9 www 371: $host{$local.'_unresponsive_doomed'}=0;
1.8 www 372: # -- Check general status
373: &statuslist($local,'General');
374: my %loncron=&replyhash($local,'/lon-status/loncron_simple.txt',1200);
375: if (defined($loncron{'local_error'})) {
376: $host{$local.'_loncron'}='Could not determine.';
1.9 www 377: $host{$local.'_unresponsive_doomed'}++;
1.8 www 378: } else {
379: if ((time-$loncron{'time'})>$oneday) {
380: $host{$local.'_loncron'}='Stale.';
1.9 www 381: $host{$local.'_unresponsive_doomed'}++;
1.8 www 382: } else {
1.11 ! www 383: $host{$local.'_loncron_doomed'}=$loncron{'notices'}
! 384: +4*$loncron{'warnings'}
! 385: +100*$loncron{'errors'};
! 386: $host{$local.'_errors'}=$loncron{'errors'};
1.8 www 387: }
388: }
389: # -- Check user status
390: &statuslist($local,'Users');
391: my %userstatus=&replyhash($local,'/cgi-bin/userstatus.pl?simple',600);
392: if (defined($userstatus{'local_error'})) {
393: $host{$local.'_userstatus'}='Could not determine.';
1.9 www 394: $host{$local.'_unresponsive_doomed'}++;
1.8 www 395: } else {
1.9 www 396: $host{$local.'_users_doomed'}=$userstatus{'Active'};
397: $host{$local.'_users'}=$userstatus{'Active'};
1.11 ! www 398: unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; }
! 399: if ($host{$local.'_users'}>$maxusers) {
! 400: $maxusers=$host{$local.'_users'};
! 401: }
! 402: $totalusers+=$host{$local.'_users'};
1.9 www 403: my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'});
404: $host{$local.'_load_doomed'}=$mload;
1.11 ! www 405: if ($mload>$maxload) {
! 406: $maxload=$mload;
! 407: }
1.9 www 408: $host{$local.'_load'}=$userstatus{'loadavg'};
1.8 www 409: }
410: # -- Check mysql status
411: &statuslist($local,'Database');
1.9 www 412: my %mysql=&replyhash($local,'/lon-status/mysql.txt',3600);
1.8 www 413: if (defined($mysql{'local_error'})) {
414: $host{$local.'_mysql'}='Could not determine.';
1.9 www 415: $host{$local.'_unresponsive_doomed'}++;
1.8 www 416: } else {
417: if ((time-$mysql{'time'})>(7*$oneday)) {
418: if ($hostrole{$local} eq 'library') {
419: $host{$local.'_mysql'}='Stale.';
420: $host{$local.'_mysql_doomed'}=1;
421: }
422: if ($mysql{'mysql'} eq 'defunct') {
423: $host{$local.'_mysql'}='Defunct (maybe stale).';
424: $host{$local.'_mysql_doomed'}=2;
425: }
426: } elsif ($mysql{'mysql'} eq 'defunct') {
427: $host{$local.'_mysql'}='Defunct.';
428: $host{$local.'_mysql_doomed'}=3;
429: }
430: }
431: # -- Check rpm status
432: &statuslist($local,'RPMs');
1.9 www 433: my %checkrpms=&replyhash($local,'/lon-status/checkrpms.txt',7200);
1.8 www 434: if (defined($checkrpms{'local_error'})) {
435: $host{$local.'_checkrpms'}='Could not determine.';
1.9 www 436: $host{$local.'_unresponsive_doomed'}++;
1.8 www 437: } else {
438: if ((time-$checkrpms{'time'})>(4*$oneday)) {
439: $host{$local.'_checkrpms'}='Stale.';
440: $host{$local.'_checkrpms_doomed'}=50;
1.9 www 441: $host{$local.'_unresponsive_doomed'}++;
1.8 www 442: } elsif ($checkrpms{'status'} eq 'fail') {
443: $host{$local.'_checkrpms'}='Could not checked RPMs.';
444: $host{$local.'_checkrpms_doomed'}=100;
445: } elsif ($checkrpms{'rpmcount'}) {
446: $host{$local.'_checkrpms'}='Outdated RPMs: '.
447: $checkrpms{'rpmcount'};
448: $host{$local.'_checkrpms_doomed'}=$checkrpms{'rpmcount'};
449: }
450: }
451: # -- Check connections
452: &statuslist($local,'Connections');
453: $host{$local.'_notconnected'}='';
454: $host{$local.'_notconnected_doomed'}=0;
455: foreach $remote (sort keys %hostname) {
456: my $status=&connected($local,$remote);
457: $connectionstatus{$local.'_TO_'.$remote}=$status;
458: unless (($status eq 'ok') || ($status eq 'not_yet')) {
459: $host{$local.'_notconnected'}.=' '.$remote;
460: $host{$local.'_notconnected_doomed'}++;
461: }
462: }
1.9 www 463: # =============================================================== End Mail Loop
1.8 www 464: }
1.9 www 465: &statuslist('Done.');
466: # ====================================================================== Output
467: if ($mode=~/\_doomed$/) {
468: # Output by doomedness
469: foreach (&doomedness($mode)) {
470: &serverstatus($_);
471: }
1.10 www 472: } elsif ($mode eq 'connections') {
473: print
474: "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>".
475: "<tr><td bgcolor='#225522'> </td>";
1.11 ! www 476: foreach my $remote (sort keys %hostname) {
1.10 www 477: print '<th bgcolor="#DDDDBB">'.$remote.'</th>';
478: }
479: print "</tr>\n";
480: # connection matrix
1.11 ! www 481: foreach my $local (sort keys %hostname) {
1.10 www 482: print '<tr><th bgcolor="#DDDDBB">'.$local.'</th>';
1.11 ! www 483: foreach my $remote (sort keys %hostname) {
1.10 www 484: if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') {
485: print '<td bgcolor="#FFFFFF"> </td>';
486: } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') {
487: print
488: '<td bgcolor="#BBDDBB"><font color="#225522" face="arial"><b>ok</b></td>';
489: } else {
490: print
491: '<td bgcolor="#DDBBBB"><font color="#552222" size="-2">'.
492: $connectionstatus{$local.'_TO_'.$remote}.'<br />';
493: &lonc($local); &lond($remote);
494: print '</td>';
495: }
496: }
497: print "</tr>\n";
498: }
1.11 ! www 499: print "</table>";
! 500: } elsif ($mode eq 'users') {
! 501: # Users
! 502: if ($maxusers) {
! 503: my $factor=50/$maxusers;
! 504: print "<h3>Total active user(s): $totalusers</h3>".
! 505: "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>";
! 506:
! 507: foreach $local (sort keys %hostname) {
! 508: if (defined($host{$local.'_users'})) {
! 509: print
! 510: '<tr><th bgcolor="#BBDDBB"><font face="arial" color="#225522" size="+1">'.$local.
! 511: '</font></th><td bgcolor="#DDDDBB">';
! 512: &users();
! 513: print
! 514: '</td><td bgcolor="#DDDDBB"><font face="arial" color="#225522">'.
! 515: $host{$local.'_users'}.'</font></td><td bgcolor="#DDDDBB"';
! 516: &bar($factor*$host{$local.'_users'});
! 517: print "</td></tr>\n";
! 518: }
! 519: }
! 520: print "</table>";
! 521: } else {
! 522: print "No active users logged in.";
! 523: }
! 524: } elsif ($mode eq 'load') {
! 525: # Load
! 526: if ($maxload) {
! 527: my $factor=50/$maxload;
! 528: print
! 529: "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>";
! 530: foreach $local (sort keys %hostname) {
! 531: if (defined($host{$local.'_load_doomed'})) {
! 532: print
! 533: '<tr><th bgcolor="#BBDDBB"><font face="arial" color="#225522" size="+1">'.
! 534: $local.
! 535: '</font></th><td bgcolor="#DDDDBB">';
! 536: &server();
! 537: print
! 538: '</td><td bgcolor="#DDDDBB"><font face="arial" color="#225522">'.
! 539: $host{$local.'_load_doomed'}.'</font></td><td bgcolor="#DDDDBB"';
! 540: &bar($factor*$host{$local.'_load_doomed'});
! 541: print "</td></tr>\n";
! 542: }
! 543: }
! 544: print "</table>";
! 545: } else {
! 546: print "No workload.";
! 547: }
! 548: } elsif ($mode eq 'trouble') {
! 549: my $count=0;
! 550: foreach $local (sort keys %hostname) {
! 551: my $trouble='';
! 552: if ($host{$local.'_errors'}) {
! 553: $trouble='Has loncron errors.<br />';
! 554: } elsif ($host{$local.'_loncron_doomed'}>600) {
! 555: $trouble='High loncron count.<br />';
! 556: }
! 557: if ($host{$local.'_load_doomed'}>5) {
! 558: $trouble='High load.<br />';
! 559: }
! 560: if ($host{$local.'_users_doomed'}>200) {
! 561: $trouble='High user volume.<br />';
! 562: }
! 563: if ($host{$local.'_mysql_doomed'}>1) {
! 564: $trouble='MySQL database apparently offline.<br />';
! 565: }
! 566: if ($host{$local.'_checkrpms_doomed'}>100) {
! 567: $trouble='RPMs outdated.<br />';
! 568: }
! 569: if ($trouble) { $count++; &serverstatus($local,$trouble); }
! 570: }
! 571: unless ($count) { print "No mayor trouble."; }
1.9 www 572: }
573: # ============================================================== Close, refresh
1.8 www 574: print "</form><script>";
575: $runtime=time-$starttime;
576: if ($runtime>=$refresh) {
577: print 'document.status.submit();';
578: } else {
579: $refreshtime=int(1000*($refresh-$runtime));
1.11 ! www 580: print "setTimeout('document.status.submit()',$refreshtime);\n".
! 581: "document.prgstat.progress.value='Will automatically refresh ($refresh secs refresh cycle)'";
1.2 www 582: }
1.8 www 583: print "</script></body></html>";
584: exit 0;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>