Annotation of loncom/cgi/clusterstatus.pl, revision 1.13
1.1 www 1: #!/usr/bin/perl
2: $|=1;
3: # The LearningOnline Network with CAPA
4: # Cluster Status
1.9 www 5: #
1.13 ! www 6: # $Id: clusterstatus.pl,v 1.12 2003/07/31 19:55:37 www Exp $
1.3 harris41 7:
8: use lib '/home/httpd/lib/perl/';
9: use LONCAPA::Configuration;
10:
1.1 www 11: use LWP::UserAgent();
12: use HTTP::Headers;
13: use IO::File;
14:
1.8 www 15: my %host=();
16: my $oneday=60*60*24;
17:
18: my %connectionstatus=();
1.9 www 19: my %perlvar=();
20:
21: my $mode;
22:
23: sub select_form {
24: my ($def,$name,%hash) = @_;
25: my $selectform = "<select name=\"$name\" size=\"1\">\n";
26: foreach (sort keys %hash) {
27: $selectform.="<option value=\"$_\" ".
28: ($_ eq $def ? 'selected' : '').
29: ">".$hash{$_}."</option>\n";
30: }
31: $selectform.="</select>";
32: return $selectform;
33: }
34:
1.8 www 35:
36: sub key {
37: my ($local,$url)=@_;
38: my $key=$local.'_'.$url;
39: $key=~s/\W/\_/gs;
40: return $key;
41: }
42:
43: sub hidden {
44: my ($name,$value)=@_;
45: print "\n<input type='hidden' name='$name' value='$value' />";
46: }
47:
48: sub request {
49: my ($local,$url,$cachetime)=@_;
1.13 ! www 50: $cachetime*=(0.5+rand);
1.8 www 51: my $key=&key($local,$url);
52: my $reply='';
53: if ($FORM{$key.'_time'}) {
54: if ((time-$FORM{$key.'_time'})<$cachetime) {
55: $reply=$FORM{$key};
56: &hidden($key.'_time',$FORM{$key.'_time'});
57: &hidden($key.'_fromcache',1);
58: }
59: }
60: unless ($reply) {
61: unless ($hostname{$local}) {
62: $reply='local_unknown';
63: } else {
64:
1.13 ! www 65: my $ua=new LWP::UserAgent(timeout => 15);
1.8 www 66:
67: my $request=new HTTP::Request('GET',
68: "http://".$hostname{$local}.$url);
69: $request->authorization_basic('lonadm','litelite');
70:
71: my $response=$ua->request($request);
72:
73: unless ($response->is_success) {
74: $reply='local_error';
75: } else {
76: $reply=$response->content;
77: chomp($reply);
78: }
79: }
80: &hidden($key.'_time',time);
81: }
82: &hidden($key,$reply);
83: return $reply;
84: }
85:
86: # ============================================= Are local and remote connected?
1.1 www 87: sub connected {
88: my ($local,$remote)=@_;
89: $local=~s/\W//g;
90: $remote=~s/\W//g;
91:
92: unless ($hostname{$remote}) { return 'remote_unknown'; }
1.8 www 93: my $url='/cgi-bin/ping.pl?'.$remote;
94: #
95: # Slowly phase this in: if not cached, only do 10 percent of the cases
96: #
97: unless ($FORM{&key($local,$url)}) {
1.13 ! www 98: unless (rand>0.95) { return 'not_yet'; }
1.8 www 99: }
100: #
101: # Actually do the query
102: #
103: &statuslist($local,'connecting '.$remote);
1.9 www 104: my $reply=&request($local,$url,3600);
1.8 www 105: $reply=(split("\n",$reply))[0];
106: $reply=~s/\W//g;
107: if ($reply ne $remote) { return $reply; }
108: return 'ok';
109: }
110: # ============================================================ Get a reply hash
111:
112: sub replyhash {
113: my %returnhash=();
114: foreach (split(/\&/,&request(@_))) {
115: my ($name,$value)=split(/\=/,$_);
116: if ($name) {
117: unless ($value) { $value=''; }
118: $returnhash{$name}=$value;
119: }
120: }
121: return %returnhash;
122: }
1.1 www 123:
1.9 www 124: # ================================================================ Link to host
1.1 www 125:
1.8 www 126: sub otherwindow {
127: my ($local,$url,$label)=@_;
128: return
1.9 www 129: " <a href='http://$hostname{$local}$url' target='newwin$local'>$label</a> ";
130: }
131:
132: sub login {
133: my $local=shift;
134: print &otherwindow($local,'/adm/login?domain='.$perlvar{'lonDefDomain'},
135: 'Login');
136: }
137:
138: sub runloncron {
139: my $local=shift;
140: print &otherwindow($local,'/cgi-bin/loncron.pl','Run loncron');
141: }
142:
143: sub loncron {
144: my $local=shift;
145: print &otherwindow($local,'/lon-status','loncron');
146: }
147:
148: sub lonc {
149: my $local=shift;
150: print &otherwindow($local,'/lon-status/loncstatus.txt','lonc');
151: }
152:
153: sub lond {
154: my $local=shift;
155: print &otherwindow($local,'/lon-status/londstatus.txt','lond');
156: }
157:
158: sub users {
159: my $local=shift;
160: print &otherwindow($local,'/cgi-bin/userstatus.pl','Users');
161: }
162:
163: sub versions {
164: my $local=shift;
165: print &otherwindow($local,'/cgi-bin/lonversions.pl','Versions');
166: }
167:
168: sub server {
169: my $local=shift;
170: print &otherwindow($local,'/server-status','Server Status');
1.8 www 171: }
1.1 www 172:
1.11 www 173: # ========================================================= Produce a green bar
174: sub bar {
175: my $parm=shift;
176: my $number=int($parm+0.5);
177: print "<table><tr><td bgcolor='#225522'><font color='#225522'>";
178: for (my $i=0;$i<$number;$i++) {
179: print "+";
180: }
181: print "</font></table>";
182: }
183:
1.9 www 184: # ========================================================== Show server status
185:
1.8 www 186: sub serverstatus {
1.11 www 187: my ($local,$trouble)=@_;
1.9 www 188: print (<<ENDHEADER);
1.11 www 189: <a name="$local" />
1.9 www 190: <table width="100%" bgcolor="#225522" cellspacing="2" cellpadding="2" border="0">
191: <tr><td bgcolor="#BBDDBB"><font color="#225522" face="arial"><b>
192: $local $hostdom{$local}</b> <tt>($hostname{$local}; $hostrole{$local})</tt>
193: <br />$domaindescription{$hostdom{$local}}
1.10 www 194: </font></th></tr><tr><td bgcolor="#DDDDBB"><font color="#225522">
1.9 www 195: ENDHEADER
196: &login($local);&server($local);&users($local);&versions($local);
197: &loncron($local);&lond($local);&lonc($local);&runloncron($local);
1.11 www 198: print "</font></td></tr>";
199: if ($trouble) {
200: print ("<tr><td bgcolor='#DDBBBB'><font color='#552222' size='+2'>$trouble</font></td></tr>");
201: }
202: print "<tr><td bgcolor='#BBBBBB'>";
1.9 www 203: # load
204: if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) {
205: print "<br />Load: ".$host{$local.'_load'}
206: }
207: # users
208: if (($host{$local.'_users_doomed'}>10) || ($mode eq 'users_doomed')) {
209: print "<br />Active Users: ".$host{$local.'_users'}
210: }
211:
1.8 www 212: # checkrpms
213: if ($host{$local.'_checkrpms'}) {
214: print "<br />RPMs: ".$host{$local.'_checkrpms'}
215: }
216: # mysql
217: if ($host{$local.'_mysql'}) {
218: print "<br />MySQL Database: ".$host{$local.'_mysql'}
219: }
1.11 www 220: # connections
221: if ($host{$local.'_notconnected'}) {
222: print "<br />Not connected: ";
223: foreach (split(/ /,$host{$local.'_notconnected'})) {
224: if ($_) {
225: print " <a href='#$_'>$_</a>";
226: }
227: }
228: }
229: # errors
230: if ($host{$local.'_errors'}) {
231: print "<br />loncron errors: ".$host{$local.'_errors'};
232: }
1.9 www 233: print "</td></tr></table><br />";
234: }
235:
236: # =========================================================== Doomedness sorted
237:
238: sub doomedness {
239: my $crit=shift;
240: my %alldoomed=();
241: my @allhosts=();
242: foreach (keys %host) {
243: if ($_=~/^(\w+)\_$crit$/) {
244: if ($host{$_}) {
245: push (@allhosts,$1);
246: $alldoomed{$1}=$host{$_};
247: }
248: }
249: }
250: return sort { $alldoomed{$b} <=> $alldoomed{$a} } @allhosts;
1.8 www 251: }
1.1 www 252:
1.8 www 253: # ====================================================================== Status
254: sub statuslist {
255: my ($local,$what)=@_;
256: print
257: "<script>document.prgstat.progress.value='Testing $local ($hostname{$local}): $what';</script>\n";
1.1 www 258: }
259:
1.8 www 260: #
261: # Main program
262: #
263: # ========================================================= Get form parameters
264: my $buffer;
265:
266: read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
267: my @pairs=split(/&/,$buffer);
268: my $pair; my $name; my $value;
269: undef %FORM;
270: %FORM=();
271: foreach $pair (@pairs) {
272: ($name,$value) = split(/=/,$pair);
273: $value =~ tr/+/ /;
274: $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
275: $FORM{$name}=$value;
276: }
277:
278: $buffer=$ENV{'QUERY_STRING'};
279: @pairs=split(/&/,$buffer);
280: foreach $pair (@pairs) {
281: ($name,$value) = split(/=/,$pair);
282: $value =~ tr/+/ /;
283: $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
284: $FORM{$name}=$value;
285: }
286:
287: # ====================================================== Determine refresh rate
288:
1.11 www 289: my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:120);
1.8 www 290: if ($refresh<30) { $refresh=30; }
291: my $starttime=time;
1.9 www 292:
293: # ============================================================== Determine mode
294:
295: my %modes=('trouble' => 'Trouble',
296: 'users_doomed' => 'Doomed: Users',
297: 'loncron_doomed' => 'Doomed: General (loncron)',
298: 'mysql_doomed' => 'Doomed: Database (mysql)',
299: 'notconnected_doomed' => 'Doomed: Connections',
300: 'checkrpms_doomed' => 'Doomed: RPMs',
301: 'load_doomed' => 'Doomed: Load',
302: 'unresponsive_doomed' => 'Doomed: Status could not be determined',
303: 'users' => 'User Report',
1.11 www 304: 'load' => 'Load Report',
1.9 www 305: 'connections' => 'Connections Matrix');
306:
307: $mode=$FORM{'mode'};
308: unless ($modes{$mode}) { $mode='trouble'; }
1.8 www 309: # ================================================================ Send Headers
1.1 www 310: print "Content-type: text/html\n\n".
1.10 www 311: "<html><body bgcolor='#FFFFFF'>\n";
1.4 harris41 312: # -------------------- Read loncapa.conf (and by default, loncapa_apache.conf).
313: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
1.9 www 314: %perlvar=%{$perlvarref};
1.3 harris41 315: undef $perlvarref; # remove since sensitive and not needed
316: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
317: delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
1.1 www 318:
319: # ------------------------------------------------------------- Read hosts file
320: {
321: my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
322:
1.2 www 323: $total=0;
1.1 www 324: while (my $configline=<$config>) {
1.7 www 325: $configline=~s/#.*$//;
326: unless ($configline=~/\w/) { next; }
1.1 www 327: my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
328: $hostname{$id}=$name;
329: $hostdom{$id}=$domain;
330: $hostrole{$id}=$role;
331: $hostip{$id}=$ip;
1.2 www 332: $total++;
1.1 www 333: if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
334: $libserv{$id}=$name;
335: }
336: }
337: }
1.9 www 338: # ------------------------------------------------------------ Read domain file
339: {
340: my $fh=IO::File->new($perlvar{'lonTabDir'}.'/domain.tab');
341: %domaindescription = ();
342: %domain_auth_def = ();
343: %domain_auth_arg_def = ();
344: if ($fh) {
345: while (<$fh>) {
346: next if (/^(\#|\s*$)/);
347: chomp;
348: my ($domain, $domain_description, $def_auth, $def_auth_arg)
349: = split(/:/,$_,4);
350: $domain_auth_def{$domain}=$def_auth;
351: $domain_auth_arg_def{$domain}=$def_auth_arg;
352: $domaindescription{$domain}=$domain_description;
353: }
354: }
355: }
356:
1.10 www 357: print "<img src='/adm/lonIcons/lonlogos.gif' align='right' /><h1>LON-CAPA Cluster Status ".localtime()."</h1>";
1.8 www 358: print "<form name='prgstat'>\n".
359: "<input type='text' name='progress' value='Starting ...' size='100' /><br />".
360: "</form>\n";;
361: print "<form name='status' method='post'>\n";
1.9 www 362: print 'Choose next report: '.&select_form($mode,'mode',%modes).'<hr />';
1.8 www 363: &hidden('refresh',$refresh);
364:
365: # ==================================================== Main Loop over all Hosts
366:
1.11 www 367: my $maxusers=0;
368: my $maxload=0;
369: my $totalusers=0;
370:
1.8 www 371: foreach $local (sort keys %hostname) {
1.9 www 372: $host{$local.'_unresponsive_doomed'}=0;
1.8 www 373: # -- Check general status
374: &statuslist($local,'General');
375: my %loncron=&replyhash($local,'/lon-status/loncron_simple.txt',1200);
376: if (defined($loncron{'local_error'})) {
377: $host{$local.'_loncron'}='Could not determine.';
1.9 www 378: $host{$local.'_unresponsive_doomed'}++;
1.8 www 379: } else {
380: if ((time-$loncron{'time'})>$oneday) {
381: $host{$local.'_loncron'}='Stale.';
1.9 www 382: $host{$local.'_unresponsive_doomed'}++;
1.8 www 383: } else {
1.11 www 384: $host{$local.'_loncron_doomed'}=$loncron{'notices'}
385: +4*$loncron{'warnings'}
386: +100*$loncron{'errors'};
387: $host{$local.'_errors'}=$loncron{'errors'};
1.8 www 388: }
389: }
390: # -- Check user status
391: &statuslist($local,'Users');
392: my %userstatus=&replyhash($local,'/cgi-bin/userstatus.pl?simple',600);
393: if (defined($userstatus{'local_error'})) {
394: $host{$local.'_userstatus'}='Could not determine.';
1.9 www 395: $host{$local.'_unresponsive_doomed'}++;
1.8 www 396: } else {
1.9 www 397: $host{$local.'_users_doomed'}=$userstatus{'Active'};
398: $host{$local.'_users'}=$userstatus{'Active'};
1.11 www 399: unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; }
400: if ($host{$local.'_users'}>$maxusers) {
401: $maxusers=$host{$local.'_users'};
402: }
403: $totalusers+=$host{$local.'_users'};
1.9 www 404: my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'});
405: $host{$local.'_load_doomed'}=$mload;
1.11 www 406: if ($mload>$maxload) {
407: $maxload=$mload;
408: }
1.9 www 409: $host{$local.'_load'}=$userstatus{'loadavg'};
1.8 www 410: }
411: # -- Check mysql status
412: &statuslist($local,'Database');
1.9 www 413: my %mysql=&replyhash($local,'/lon-status/mysql.txt',3600);
1.8 www 414: if (defined($mysql{'local_error'})) {
415: $host{$local.'_mysql'}='Could not determine.';
1.9 www 416: $host{$local.'_unresponsive_doomed'}++;
1.8 www 417: } else {
418: if ((time-$mysql{'time'})>(7*$oneday)) {
419: if ($hostrole{$local} eq 'library') {
420: $host{$local.'_mysql'}='Stale.';
421: $host{$local.'_mysql_doomed'}=1;
422: }
423: if ($mysql{'mysql'} eq 'defunct') {
424: $host{$local.'_mysql'}='Defunct (maybe stale).';
425: $host{$local.'_mysql_doomed'}=2;
426: }
427: } elsif ($mysql{'mysql'} eq 'defunct') {
428: $host{$local.'_mysql'}='Defunct.';
429: $host{$local.'_mysql_doomed'}=3;
430: }
431: }
432: # -- Check rpm status
433: &statuslist($local,'RPMs');
1.9 www 434: my %checkrpms=&replyhash($local,'/lon-status/checkrpms.txt',7200);
1.8 www 435: if (defined($checkrpms{'local_error'})) {
436: $host{$local.'_checkrpms'}='Could not determine.';
1.9 www 437: $host{$local.'_unresponsive_doomed'}++;
1.8 www 438: } else {
439: if ((time-$checkrpms{'time'})>(4*$oneday)) {
440: $host{$local.'_checkrpms'}='Stale.';
441: $host{$local.'_checkrpms_doomed'}=50;
1.9 www 442: $host{$local.'_unresponsive_doomed'}++;
1.8 www 443: } elsif ($checkrpms{'status'} eq 'fail') {
444: $host{$local.'_checkrpms'}='Could not checked RPMs.';
445: $host{$local.'_checkrpms_doomed'}=100;
446: } elsif ($checkrpms{'rpmcount'}) {
447: $host{$local.'_checkrpms'}='Outdated RPMs: '.
448: $checkrpms{'rpmcount'};
449: $host{$local.'_checkrpms_doomed'}=$checkrpms{'rpmcount'};
450: }
451: }
452: # -- Check connections
453: &statuslist($local,'Connections');
454: $host{$local.'_notconnected'}='';
455: $host{$local.'_notconnected_doomed'}=0;
456: foreach $remote (sort keys %hostname) {
457: my $status=&connected($local,$remote);
458: $connectionstatus{$local.'_TO_'.$remote}=$status;
459: unless (($status eq 'ok') || ($status eq 'not_yet')) {
460: $host{$local.'_notconnected'}.=' '.$remote;
461: $host{$local.'_notconnected_doomed'}++;
462: }
463: }
1.9 www 464: # =============================================================== End Mail Loop
1.8 www 465: }
1.9 www 466: &statuslist('Done.');
467: # ====================================================================== Output
468: if ($mode=~/\_doomed$/) {
469: # Output by doomedness
470: foreach (&doomedness($mode)) {
471: &serverstatus($_);
472: }
1.10 www 473: } elsif ($mode eq 'connections') {
474: print
475: "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>".
476: "<tr><td bgcolor='#225522'> </td>";
1.11 www 477: foreach my $remote (sort keys %hostname) {
1.10 www 478: print '<th bgcolor="#DDDDBB">'.$remote.'</th>';
479: }
480: print "</tr>\n";
481: # connection matrix
1.11 www 482: foreach my $local (sort keys %hostname) {
1.10 www 483: print '<tr><th bgcolor="#DDDDBB">'.$local.'</th>';
1.11 www 484: foreach my $remote (sort keys %hostname) {
1.10 www 485: if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') {
1.13 ! www 486: print '<td bgcolor="#FFFFFF"><font color="#555522" size="-2">not yet tested</font></td>';
1.10 www 487: } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') {
488: print
489: '<td bgcolor="#BBDDBB"><font color="#225522" face="arial"><b>ok</b></td>';
490: } else {
491: print
492: '<td bgcolor="#DDBBBB"><font color="#552222" size="-2">'.
493: $connectionstatus{$local.'_TO_'.$remote}.'<br />';
494: &lonc($local); &lond($remote);
495: print '</td>';
496: }
497: }
498: print "</tr>\n";
499: }
1.11 www 500: print "</table>";
501: } elsif ($mode eq 'users') {
502: # Users
503: if ($maxusers) {
504: my $factor=50/$maxusers;
505: print "<h3>Total active user(s): $totalusers</h3>".
506: "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>";
507:
508: foreach $local (sort keys %hostname) {
509: if (defined($host{$local.'_users'})) {
510: print
511: '<tr><th bgcolor="#BBDDBB"><font face="arial" color="#225522" size="+1">'.$local.
512: '</font></th><td bgcolor="#DDDDBB">';
1.12 www 513: &users($local);
1.11 www 514: print
515: '</td><td bgcolor="#DDDDBB"><font face="arial" color="#225522">'.
516: $host{$local.'_users'}.'</font></td><td bgcolor="#DDDDBB"';
517: &bar($factor*$host{$local.'_users'});
518: print "</td></tr>\n";
519: }
520: }
521: print "</table>";
522: } else {
523: print "No active users logged in.";
524: }
525: } elsif ($mode eq 'load') {
526: # Load
527: if ($maxload) {
528: my $factor=50/$maxload;
529: print
530: "<table cellspacing='3' cellpadding='3' border='0' bgcolor='#225522'>";
531: foreach $local (sort keys %hostname) {
532: if (defined($host{$local.'_load_doomed'})) {
533: print
534: '<tr><th bgcolor="#BBDDBB"><font face="arial" color="#225522" size="+1">'.
535: $local.
536: '</font></th><td bgcolor="#DDDDBB">';
1.12 www 537: &server($local);
1.11 www 538: print
539: '</td><td bgcolor="#DDDDBB"><font face="arial" color="#225522">'.
540: $host{$local.'_load_doomed'}.'</font></td><td bgcolor="#DDDDBB"';
541: &bar($factor*$host{$local.'_load_doomed'});
542: print "</td></tr>\n";
543: }
544: }
545: print "</table>";
546: } else {
547: print "No workload.";
548: }
549: } elsif ($mode eq 'trouble') {
550: my $count=0;
551: foreach $local (sort keys %hostname) {
552: my $trouble='';
553: if ($host{$local.'_errors'}) {
554: $trouble='Has loncron errors.<br />';
555: } elsif ($host{$local.'_loncron_doomed'}>600) {
556: $trouble='High loncron count.<br />';
557: }
558: if ($host{$local.'_load_doomed'}>5) {
559: $trouble='High load.<br />';
560: }
561: if ($host{$local.'_users_doomed'}>200) {
562: $trouble='High user volume.<br />';
563: }
564: if ($host{$local.'_mysql_doomed'}>1) {
565: $trouble='MySQL database apparently offline.<br />';
566: }
567: if ($host{$local.'_checkrpms_doomed'}>100) {
568: $trouble='RPMs outdated.<br />';
569: }
570: if ($trouble) { $count++; &serverstatus($local,$trouble); }
571: }
572: unless ($count) { print "No mayor trouble."; }
1.9 www 573: }
574: # ============================================================== Close, refresh
1.8 www 575: print "</form><script>";
576: $runtime=time-$starttime;
1.13 ! www 577: if (($refresh-$runtime)<30) {
! 578: print "setTimeout('document.status.submit()',30000);\n".
! 579: "document.prgstat.progress.value='Will automatically refresh.'";
1.8 www 580: } else {
581: $refreshtime=int(1000*($refresh-$runtime));
1.11 www 582: print "setTimeout('document.status.submit()',$refreshtime);\n".
583: "document.prgstat.progress.value='Will automatically refresh ($refresh secs refresh cycle)'";
1.2 www 584: }
1.8 www 585: print "</script></body></html>";
586: exit 0;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>