Annotation of loncom/loncron, revision 1.42
1.1 albertel 1: #!/usr/bin/perl
2:
3: # The LearningOnline Network
4: # Housekeeping program, started by cron
5: #
6: # (TCP networking package
7: # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30,
8: # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer)
9: #
1.3 www 10: # 7/14,7/15,7/19,7/21,7/22,11/18,
11: # 2/8 Gerd Kortemeyer
1.11 www 12: # 12/23 Gerd Kortemeyer
1.22 harris41 13: # YEAR=2001
1.25 www 14: # 09/04,09/06,11/26 Gerd Kortemeyer
1.24 www 15:
16: $|=1;
1.1 albertel 17:
1.26 harris41 18: use lib '/home/httpd/lib/perl/';
19: use LONCAPA::Configuration;
20:
1.1 albertel 21: use IO::File;
22: use IO::Socket;
23:
24: # -------------------------------------------------- Non-critical communication
25: sub reply {
26: my ($cmd,$server)=@_;
27: my $peerfile="$perlvar{'lonSockDir'}/$server";
28: my $client=IO::Socket::UNIX->new(Peer =>"$peerfile",
29: Type => SOCK_STREAM,
30: Timeout => 10)
31: or return "con_lost";
32: print $client "$cmd\n";
33: my $answer=<$client>;
34: chomp($answer);
35: if (!$answer) { $answer="con_lost"; }
36: return $answer;
37: }
38:
39: # --------------------------------------------------------- Output error status
40:
41: sub errout {
42: my $fh=shift;
43: print $fh (<<ENDERROUT);
44: <p><table border=2 bgcolor="#CCCCCC">
45: <tr><td>Notices</td><td>$notices</td></tr>
46: <tr><td>Warnings</td><td>$warnings</td></tr>
47: <tr><td>Errors</td><td>$errors</td></tr>
48: </table><p><a href="#top">Top</a><p>
49: ENDERROUT
50: }
51:
1.42 ! albertel 52: sub start_daemon {
! 53: my ($fh,$daemon,$pidfile) = @_;
! 54: system("$perlvar{'lonDaemons'}/$daemon 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors");
! 55: sleep 2;
! 56: if (-e $pidfile) {
! 57: print $fh "Seems like it started ...<p>";
! 58: my $lfh=IO::File->new("$pidfile");
! 59: my $daemonpid=<$lfh>;
! 60: chomp($daemonpid);
! 61: sleep 2;
! 62: if (kill 0 => $daemonpid) {
! 63: return 1;
! 64: } else {
! 65: return 0;
! 66: }
! 67: }
! 68: print $fh "Seems like that did not work!<p>";
! 69: $errors++;
! 70: return 0;
! 71: }
! 72:
! 73: sub checkon_daemon {
! 74: my ($fh,$daemon,$maxsize,$sendusr1)=@_;
! 75:
! 76: print $fh '<hr><a name="'.$daemon.'"><h2>'.$daemon.'</h2><h3>Log</h3><pre>';
! 77: printf("%-10s ",$daemon);
! 78: if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
! 79: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
! 80: while ($line=<DFH>) {
! 81: print $fh "$line";
! 82: if ($line=~/INFO/) { $notices++; }
! 83: if ($line=~/WARNING/) { $notices++; }
! 84: if ($line=~/CRITICAL/) { $warnings++; }
! 85: };
! 86: close (DFH);
! 87: }
! 88: print $fh "</pre>";
! 89:
! 90: my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid";
! 91:
! 92: my $restartflag=1;
! 93:
! 94: if (-e $pidfile) {
! 95: my $lfh=IO::File->new("$pidfile");
! 96: my $daemonpid=<$lfh>;
! 97: chomp($daemonpid);
! 98: if (kill 0 => $daemonpid) {
! 99: print $fh "<h3>$daemon at pid $daemonpid responding";
! 100: if ($sendusr1) { print $fh ", sending USR1"; }
! 101: print $fh "</h3>";
! 102: if ($sendusr1) { kill USR1 => $daemonpid; }
! 103: $restartflag=0;
! 104: print "running\n";
! 105: } else {
! 106: $errors++;
! 107: print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
! 108: $restartflag=1;
! 109: print $fh "<h3>Decided to clean up stale .pid file and restart $daemon</h3>";
! 110: }
! 111: }
! 112: if ($restartflag==1) {
! 113: $simplestatus{$daemon}='off';
! 114: $errors++;
! 115: print $fh '<br><font color="red">Killall '.$daemon.': '.
! 116: `killall $daemon 2>&1`.' - ';
! 117: sleep 2;
! 118: print $fh unlink($pidfile).' - '.
! 119: `killall -9 $daemon 2>&1`.
! 120: '</font><br>';
! 121: print $fh "<h3>$daemon not running, trying to start</h3>";
! 122:
! 123: if (&start_daemon($fh,$daemon,$pidfile)) {
! 124: print $fh "<h3>$daemon at pid $daemonpid responding</h3>";
! 125: $simplestatus{$daemon}='restarted';
! 126: print "started\n";
! 127: } else {
! 128: $errors++;
! 129: print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
! 130: print $fh "Give it one more try ...<p>";
! 131: print " ";
! 132: if (&start_daemon($fh,$daemon,$pidfile)) {
! 133: print $fh "<h3>$daemon at pid $daemonpid responding</h3>";
! 134: $simplestatus{$daemon}='restarted';
! 135: print "started\n";
! 136: } else {
! 137: print " failed\n";
! 138: $simplestatus{$daemon}='failed';
! 139: $errors++; $errors++;
! 140: print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
! 141: print $fh "Unable to start $daemon<p>";
! 142: }
! 143: }
! 144:
! 145: if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
! 146: print $fh "<p><pre>";
! 147: open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
! 148: while ($line=<DFH>) {
! 149: print $fh "$line";
! 150: if ($line=~/WARNING/) { $notices++; }
! 151: if ($line=~/CRITICAL/) { $notices++; }
! 152: };
! 153: close (DFH);
! 154: print $fh "</pre>";
! 155: }
! 156: }
! 157:
! 158: $fname="$perlvar{'lonDaemons'}/logs/$daemon.log";
! 159:
! 160: my ($dev,$ino,$mode,$nlink,
! 161: $uid,$gid,$rdev,$size,
! 162: $atime,$mtime,$ctime,
! 163: $blksize,$blocks)=stat($fname);
! 164:
! 165: if ($size>$maxsize) {
! 166: print $fh "Rotating logs ...<p>";
! 167: rename("$fname.2","$fname.3");
! 168: rename("$fname.1","$fname.2");
! 169: rename("$fname","$fname.1");
! 170: }
! 171:
! 172: &errout($fh);
! 173: }
1.1 albertel 174: # ================================================================ Main Program
175:
1.27 matthew 176: # --------------------------------- Read loncapa_apache.conf and loncapa.conf
1.33 harris41 177: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
1.28 albertel 178: %perlvar=%{$perlvarref};
1.26 harris41 179: undef $perlvarref;
180: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
181: delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
1.13 harris41 182:
1.14 harris41 183: # --------------------------------------- Make sure that LON-CAPA is configured
184: # I only test for one thing here (lonHostID). This is just a safeguard.
185: if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
1.15 harris41 186: print("Unconfigured machine.\n");
1.14 harris41 187: $emailto=$perlvar{'lonSysEMail'};
188: $hostname=`/bin/hostname`;
189: chop $hostname;
190: $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
191: $subj="LON: Unconfigured machine $hostname";
192: system("echo 'Unconfigured machine $hostname.' |\
193: mailto $emailto -s '$subj' > /dev/null");
194: exit 1;
195: }
196:
1.13 harris41 197: # ----------------------------- Make sure this process is running from user=www
198: my $wwwid=getpwnam('www');
199: if ($wwwid!=$<) {
1.14 harris41 200: print("User ID mismatch. This program must be run as user 'www'\n");
1.13 harris41 201: $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
202: $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
203: system("echo 'User ID mismatch. loncron must be run as user www.' |\
204: mailto $emailto -s '$subj' > /dev/null");
1.14 harris41 205: exit 1;
1.1 albertel 206: }
207:
208: # ------------------------------------------------------------- Read hosts file
209: {
210: my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
211:
212: while (my $configline=<$config>) {
1.31 albertel 213: my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline);
214: if ($id && $domain && $role && $name && $ip) {
215: $hostname{$id}=$name;
216: $hostdom{$id}=$domain;
217: $hostip{$id}=$ip;
218: $hostrole{$id}=$role;
219: if ($domdescr) { $domaindescription{$domain}=$domdescr; }
220: if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
221: $libserv{$id}=$name;
222: }
223: } else {
224: if ($configline) {
225: # &logthis("Skipping hosts.tab line -$configline-");
226: }
227: }
1.1 albertel 228: }
229: }
230:
231: # ------------------------------------------------------ Read spare server file
232: {
233: my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
234:
235: while (my $configline=<$config>) {
236: chomp($configline);
237: if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
238: $spareid{$configline}=1;
239: }
240: }
241: }
242:
243: # ---------------------------------------------------------------- Start report
244:
245: $statusdir="/home/httpd/html/lon-status";
246:
247: $errors=0;
248: $warnings=0;
249: $notices=0;
250:
251: $now=time;
252: $date=localtime($now);
253:
254: {
255: my $fh=IO::File->new(">$statusdir/newstatus.html");
1.41 www 256: my %simplestatus=();
1.1 albertel 257:
258: print $fh (<<ENDHEADERS);
259: <html>
260: <head>
261: <title>LON Status Report $perlvar{'lonHostID'}</title>
262: </head>
1.3 www 263: <body bgcolor="#AAAAAA">
1.1 albertel 264: <a name="top">
265: <h1>LON Status Report $perlvar{'lonHostID'}</h1>
266: <h2>$date ($now)</h2>
267: <ol>
268: <li><a href="#configuration">Configuration</a>
269: <li><a href="#machine">Machine Information</a>
1.11 www 270: <li><a href="#tmp">Temporary Files</a>
271: <li><a href="#tokens">Session Tokens</a>
1.1 albertel 272: <li><a href="#httpd">httpd</a>
1.11 www 273: <li><a href="#lonsql">lonsql</a>
1.1 albertel 274: <li><a href="#lond">lond</a>
275: <li><a href="#lonc">lonc</a>
1.34 www 276: <li><a href="#lonhttpd">lonhttpd</a>
1.1 albertel 277: <li><a href="#lonnet">lonnet</a>
278: <li><a href="#connections">Connections</a>
279: <li><a href="#delayed">Delayed Messages</a>
280: <li><a href="#errcount">Error Count</a>
281: </ol>
282: <hr>
283: <a name="configuration">
284: <h2>Configuration</h2>
285: <h3>PerlVars</h3>
286: <table border=2>
287: ENDHEADERS
288:
1.31 albertel 289: foreach $varname (sort(keys(%perlvar))) {
1.1 albertel 290: print $fh "<tr><td>$varname</td><td>$perlvar{$varname}</td></tr>\n";
291: }
292: print $fh "</table><h3>Hosts</h3><table border=2>";
1.31 albertel 293: foreach $id (sort(keys(%hostname))) {
294: print $fh
295: "<tr><td>$id</td><td>$hostdom{$id}</td><td>$hostrole{$id}</td>";
296: print $fh "<td>$hostname{$id}</td><td>$hostip{$id}</td></tr>\n";
1.1 albertel 297: }
298: print $fh "</table><h3>Spare Hosts</h3><ol>";
1.31 albertel 299: foreach $id (sort(keys(%spareid))) {
1.1 albertel 300: print $fh "<li>$id\n";
301: }
302:
303: print $fh "</ol>\n";
304:
305: # --------------------------------------------------------------------- Machine
306:
307: print $fh '<hr><a name="machine"><h2>Machine Information</h2>';
308: print $fh "<h3>loadavg</h3>";
309:
310: open (LOADAVGH,"/proc/loadavg");
311: $loadavg=<LOADAVGH>;
312: close (LOADAVGH);
313:
314: print $fh "<tt>$loadavg</tt>";
315:
316: @parts=split(/\s+/,$loadavg);
1.4 www 317: if ($parts[1]>4.0) {
1.1 albertel 318: $errors++;
319: } elsif ($parts[1]>2.0) {
320: $warnings++;
321: } elsif ($parts[1]>1.0) {
322: $notices++;
323: }
324:
325: print $fh "<h3>df</h3>";
326: print $fh "<pre>";
327:
328: open (DFH,"df|");
329: while ($line=<DFH>) {
330: print $fh "$line";
331: @parts=split(/\s+/,$line);
332: $usage=$parts[4];
333: $usage=~s/\W//g;
334: if ($usage>90) {
1.24 www 335: $warnings++;
336: $notices++;
1.1 albertel 337: } elsif ($usage>80) {
338: $warnings++;
339: } elsif ($usage>60) {
340: $notices++;
341: }
1.41 www 342: if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
1.1 albertel 343: }
344: close (DFH);
345: print $fh "</pre>";
1.24 www 346:
347:
348: print $fh "<h3>ps</h3>";
349: print $fh "<pre>";
350: $psproc=0;
351:
352: open (PSH,"ps -aux|");
353: while ($line=<PSH>) {
354: print $fh "$line";
355: $psproc++;
356: }
357: close (PSH);
358: print $fh "</pre>";
359:
360: if ($psproc>200) { $notices++; }
361: if ($psproc>250) { $notices++; }
362:
1.1 albertel 363: &errout($fh);
1.11 www 364:
365: # --------------------------------------------------------------- clean out tmp
366: print $fh '<hr><a name="tmp"><h2>Temporary Files</h2>';
367: $cleaned=0;
1.29 www 368: $old=0;
1.11 www 369: while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
370: my ($dev,$ino,$mode,$nlink,
371: $uid,$gid,$rdev,$size,
372: $atime,$mtime,$ctime,
373: $blksize,$blocks)=stat($fname);
374: $now=time;
375: $since=$now-$mtime;
376: if ($since>$perlvar{'lonExpire'}) {
1.29 www 377: $line='';
378: if (open(PROBE,$fname)) {
379: $line=<PROBE>;
380: close(PROBE);
381: }
382: unless ($line=~/^CHECKOUTTOKEN\&/) {
383: $cleaned++;
384: unlink("$fname");
385: } else {
1.32 www 386: if ($since>365*$perlvar{'lonExpire'}) {
1.29 www 387: $cleaned++;
388: unlink("$fname");
389: } else { $old++; }
390: }
1.11 www 391: }
392:
393: }
1.29 www 394: print $fh "Cleaned up ".$cleaned." files (".$old." old checkout tokens).";
1.11 www 395:
396: # ------------------------------------------------------------ clean out lonIDs
397: print $fh '<hr><a name="tokens"><h2>Session Tokens</h2>';
398: $cleaned=0;
399: $active=0;
400: while ($fname=<$perlvar{'lonIDsDir'}/*>) {
401: my ($dev,$ino,$mode,$nlink,
402: $uid,$gid,$rdev,$size,
403: $atime,$mtime,$ctime,
404: $blksize,$blocks)=stat($fname);
405: $now=time;
406: $since=$now-$mtime;
407: if ($since>$perlvar{'lonExpire'}) {
408: $cleaned++;
409: print $fh "Unlinking $fname<br>";
410: unlink("$fname");
411: } else {
412: $active++;
413: }
414:
415: }
416: print $fh "<p>Cleaned up ".$cleaned." stale session token(s).";
417: print $fh "<h3>$active open session(s)</h3>";
418:
1.1 albertel 419: # ----------------------------------------------------------------------- httpd
420:
421: print $fh '<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>';
422:
1.23 www 423: open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
1.1 albertel 424: while ($line=<DFH>) { print $fh "$line" };
425: close (DFH);
426:
427: print $fh "</pre><h3>Error Log</h3><pre>";
428:
1.23 www 429: open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
1.1 albertel 430: while ($line=<DFH>) {
431: print $fh "$line";
432: if ($line=~/\[error\]/) { $notices++; }
433: };
434: close (DFH);
435: print $fh "</pre>";
436: &errout($fh);
1.5 harris41 437:
438:
1.11 www 439: # ---------------------------------------------------------------------- lonsql
1.22 harris41 440:
1.42 ! albertel 441: &checkon_daemon($fh,'lonsql',200000);
1.5 harris41 442:
1.1 albertel 443: # ------------------------------------------------------------------------ lond
444:
1.42 ! albertel 445: &checkon_daemon($fh,'lond',40000,1);
1.1 albertel 446:
447: # ------------------------------------------------------------------------ lonc
448:
1.42 ! albertel 449: &checkon_daemon($fh,'lonc',40000,1);
1.1 albertel 450:
1.34 www 451: # -------------------------------------------------------------------- lonhttpd
452:
1.42 ! albertel 453: &checkon_daemon($fh,'lonhttpd',40000);
1.1 albertel 454:
455: # ---------------------------------------------------------------------- lonnet
456:
457: print $fh '<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>';
1.42 ! albertel 458: print "checking logs\n";
1.1 albertel 459: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
460: open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
461: while ($line=<DFH>) {
462: print $fh "$line";
463: };
464: close (DFH);
465: }
1.11 www 466: print $fh "</pre><h3>Perm Log</h3><pre>";
1.1 albertel 467:
468: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
469: open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
470: while ($line=<DFH>) {
471: print $fh "$line";
472: };
473: close (DFH);
474: } else { print $fh "No perm log\n" }
475:
476: $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
477:
478: my ($dev,$ino,$mode,$nlink,
479: $uid,$gid,$rdev,$size,
480: $atime,$mtime,$ctime,
481: $blksize,$blocks)=stat($fname);
482:
483: if ($size>40000) {
484: print $fh "Rotating logs ...<p>";
485: rename("$fname.2","$fname.3");
486: rename("$fname.1","$fname.2");
487: rename("$fname","$fname.1");
488: }
489:
490: print $fh "</pre>";
491: &errout($fh);
492: # ----------------------------------------------------------------- Connections
493:
494: print $fh '<hr><a name="connections"><h2>Connections</h2>';
1.42 ! albertel 495: print "testing connections\n";
1.1 albertel 496: print $fh "<table border=2>";
1.31 albertel 497: foreach $tryserver (sort(keys(%hostname))) {
1.42 ! albertel 498: print(".");
1.1 albertel 499: $answer=reply("pong",$tryserver);
500: if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
501: $result="<b>ok</b>";
502: } else {
503: $result=$answer;
504: $warnings++;
505: if ($answer eq 'con_lost') { $warnings++; }
506: }
1.42 ! albertel 507: if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
1.1 albertel 508: print $fh "<tr><td>$tryserver</td><td>$result</td></tr>\n";
509:
510: }
511: print $fh "</table>";
512:
513: &errout($fh);
514: # ------------------------------------------------------------ Delayed messages
515:
516: print $fh '<hr><a name="delayed"><h2>Delayed Messages</h2>';
1.42 ! albertel 517: print "checking buffers\n";
1.1 albertel 518:
519: print $fh '<h3>Scanning Permanent Log</h3>';
520:
521: $unsend=0;
522: {
523: my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
524: while ($line=<$dfh>) {
525: ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
526: if ($sdf eq 'F') {
527: $local=localtime($time);
1.42 ! albertel 528: print $fh "<b>Failed: $time, $dserv, $dcmd</b><br>";
1.1 albertel 529: $warnings++;
530: }
531: if ($sdf eq 'S') { $unsend--; }
532: if ($sdf eq 'D') { $unsend++; }
533: }
534: }
535: print $fh "Total unsend messages: <b>$unsend</b><p>\n";
536: $warnings=$warnings+5*$unsend;
537:
1.41 www 538: if ($unsend) { $simplestatus{'unsend'}=$unsend; }
1.1 albertel 539: print $fh "<h3>Outgoing Buffer</h3>";
540:
541: open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
542: while ($line=<DFH>) {
543: print $fh "$line<br>";
544: };
545: close (DFH);
546:
547: # ------------------------------------------------------------------------- End
548: print $fh "<a name=errcount>\n";
549: $totalcount=$notices+4*$warnings+100*$errors;
550: &errout($fh);
551: print $fh "<h1>Total Error Count: $totalcount</h1>";
552: $now=time;
553: $date=localtime($now);
554: print $fh "<hr>$date ($now)</body></html>\n";
1.42 ! albertel 555: print "lon-status webpage updated\n";
1.41 www 556: $fh->close();
1.1 albertel 557: }
1.41 www 558: if ($errors) { $simplestatus{'errors'}=$errors; }
559: if ($warnings) { $simplestatus{'warnings'}=$warnings; }
560: if ($notices) { $simplestatus{'notices'}=$notices; }
561: $simplestatus{'time'}=time;
1.1 albertel 562:
563: rename ("$statusdir/newstatus.html","$statusdir/index.html");
1.41 www 564: {
565: my $sfh=IO::File->new(">$statusdir/loncron_simple.txt");
566: foreach (keys %simplestatus) {
567: print $sfh $_.'='.$simplestatus{$_}.'&';
568: }
569: print $sfh "\n";
570: $sfh->close();
571: }
1.1 albertel 572: if ($totalcount>200) {
1.42 ! albertel 573: print "sending mail\n";
1.37 www 574: $emailto="$perlvar{'lonAdmEMail'}";
1.40 www 575: if ($totalcount>1000) {
1.37 www 576: $emailto.=",$perlvar{'lonSysEMail'}";
577: }
1.1 albertel 578: $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices";
579: system(
1.24 www 580: "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
1.1 albertel 581: }
582: 1;
583:
584:
585:
586:
587:
588:
589:
590:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>