Annotation of loncom/loncron, revision 1.45
1.1 albertel 1: #!/usr/bin/perl
2:
3: # The LearningOnline Network
4: # Housekeeping program, started by cron
5: #
6: # (TCP networking package
7: # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30,
8: # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer)
9: #
1.3 www 10: # 7/14,7/15,7/19,7/21,7/22,11/18,
11: # 2/8 Gerd Kortemeyer
1.11 www 12: # 12/23 Gerd Kortemeyer
1.22 harris41 13: # YEAR=2001
1.25 www 14: # 09/04,09/06,11/26 Gerd Kortemeyer
1.24 www 15:
16: $|=1;
1.1 albertel 17:
1.26 harris41 18: use lib '/home/httpd/lib/perl/';
19: use LONCAPA::Configuration;
20:
1.1 albertel 21: use IO::File;
22: use IO::Socket;
23:
24: # -------------------------------------------------- Non-critical communication
25: sub reply {
26: my ($cmd,$server)=@_;
27: my $peerfile="$perlvar{'lonSockDir'}/$server";
28: my $client=IO::Socket::UNIX->new(Peer =>"$peerfile",
29: Type => SOCK_STREAM,
30: Timeout => 10)
31: or return "con_lost";
32: print $client "$cmd\n";
33: my $answer=<$client>;
34: chomp($answer);
35: if (!$answer) { $answer="con_lost"; }
36: return $answer;
37: }
38:
39: # --------------------------------------------------------- Output error status
40:
41: sub errout {
42: my $fh=shift;
43: print $fh (<<ENDERROUT);
44: <p><table border=2 bgcolor="#CCCCCC">
45: <tr><td>Notices</td><td>$notices</td></tr>
46: <tr><td>Warnings</td><td>$warnings</td></tr>
47: <tr><td>Errors</td><td>$errors</td></tr>
48: </table><p><a href="#top">Top</a><p>
49: ENDERROUT
50: }
51:
1.42 albertel 52: sub start_daemon {
53: my ($fh,$daemon,$pidfile) = @_;
1.44 albertel 54: my $progname=$daemon;
55: if ($daemon eq 'lonc' && $ARGV[0] eq 'new') {
56: $progname='loncnew';
57: print "new ";
58: }
59: system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors");
1.42 albertel 60: sleep 2;
61: if (-e $pidfile) {
62: print $fh "Seems like it started ...<p>";
63: my $lfh=IO::File->new("$pidfile");
64: my $daemonpid=<$lfh>;
65: chomp($daemonpid);
66: sleep 2;
67: if (kill 0 => $daemonpid) {
68: return 1;
69: } else {
70: return 0;
71: }
72: }
73: print $fh "Seems like that did not work!<p>";
74: $errors++;
75: return 0;
76: }
77:
78: sub checkon_daemon {
79: my ($fh,$daemon,$maxsize,$sendusr1)=@_;
80:
81: print $fh '<hr><a name="'.$daemon.'"><h2>'.$daemon.'</h2><h3>Log</h3><pre>';
82: printf("%-10s ",$daemon);
83: if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
84: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
85: while ($line=<DFH>) {
86: print $fh "$line";
87: if ($line=~/INFO/) { $notices++; }
88: if ($line=~/WARNING/) { $notices++; }
89: if ($line=~/CRITICAL/) { $warnings++; }
90: };
91: close (DFH);
92: }
93: print $fh "</pre>";
94:
95: my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid";
96:
97: my $restartflag=1;
98:
99: if (-e $pidfile) {
100: my $lfh=IO::File->new("$pidfile");
101: my $daemonpid=<$lfh>;
102: chomp($daemonpid);
103: if (kill 0 => $daemonpid) {
104: print $fh "<h3>$daemon at pid $daemonpid responding";
105: if ($sendusr1) { print $fh ", sending USR1"; }
106: print $fh "</h3>";
107: if ($sendusr1) { kill USR1 => $daemonpid; }
108: $restartflag=0;
109: print "running\n";
110: } else {
111: $errors++;
112: print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
113: $restartflag=1;
114: print $fh "<h3>Decided to clean up stale .pid file and restart $daemon</h3>";
115: }
116: }
117: if ($restartflag==1) {
118: $simplestatus{$daemon}='off';
119: $errors++;
120: print $fh '<br><font color="red">Killall '.$daemon.': '.
121: `killall $daemon 2>&1`.' - ';
122: sleep 2;
123: print $fh unlink($pidfile).' - '.
124: `killall -9 $daemon 2>&1`.
125: '</font><br>';
126: print $fh "<h3>$daemon not running, trying to start</h3>";
127:
128: if (&start_daemon($fh,$daemon,$pidfile)) {
129: print $fh "<h3>$daemon at pid $daemonpid responding</h3>";
130: $simplestatus{$daemon}='restarted';
131: print "started\n";
132: } else {
133: $errors++;
134: print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
135: print $fh "Give it one more try ...<p>";
136: print " ";
137: if (&start_daemon($fh,$daemon,$pidfile)) {
138: print $fh "<h3>$daemon at pid $daemonpid responding</h3>";
139: $simplestatus{$daemon}='restarted';
140: print "started\n";
141: } else {
142: print " failed\n";
143: $simplestatus{$daemon}='failed';
144: $errors++; $errors++;
145: print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
146: print $fh "Unable to start $daemon<p>";
147: }
148: }
149:
150: if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
151: print $fh "<p><pre>";
152: open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
153: while ($line=<DFH>) {
154: print $fh "$line";
155: if ($line=~/WARNING/) { $notices++; }
156: if ($line=~/CRITICAL/) { $notices++; }
157: };
158: close (DFH);
159: print $fh "</pre>";
160: }
161: }
162:
163: $fname="$perlvar{'lonDaemons'}/logs/$daemon.log";
164:
165: my ($dev,$ino,$mode,$nlink,
166: $uid,$gid,$rdev,$size,
167: $atime,$mtime,$ctime,
168: $blksize,$blocks)=stat($fname);
169:
170: if ($size>$maxsize) {
171: print $fh "Rotating logs ...<p>";
172: rename("$fname.2","$fname.3");
173: rename("$fname.1","$fname.2");
174: rename("$fname","$fname.1");
175: }
176:
177: &errout($fh);
178: }
1.1 albertel 179: # ================================================================ Main Program
180:
1.27 matthew 181: # --------------------------------- Read loncapa_apache.conf and loncapa.conf
1.33 harris41 182: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
1.28 albertel 183: %perlvar=%{$perlvarref};
1.26 harris41 184: undef $perlvarref;
185: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
186: delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
1.13 harris41 187:
1.14 harris41 188: # --------------------------------------- Make sure that LON-CAPA is configured
189: # I only test for one thing here (lonHostID). This is just a safeguard.
190: if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
1.43 albertel 191: print("Unconfigured machine.\n");
192: $emailto=$perlvar{'lonSysEMail'};
193: $hostname=`/bin/hostname`;
194: chop $hostname;
195: $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
196: $subj="LON: Unconfigured machine $hostname";
197: system("echo 'Unconfigured machine $hostname.' |\
1.14 harris41 198: mailto $emailto -s '$subj' > /dev/null");
199: exit 1;
200: }
201:
1.13 harris41 202: # ----------------------------- Make sure this process is running from user=www
203: my $wwwid=getpwnam('www');
204: if ($wwwid!=$<) {
1.43 albertel 205: print("User ID mismatch. This program must be run as user 'www'\n");
206: $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
207: $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
208: system("echo 'User ID mismatch. loncron must be run as user www.' |\
1.13 harris41 209: mailto $emailto -s '$subj' > /dev/null");
1.43 albertel 210: exit 1;
1.1 albertel 211: }
212:
213: # ------------------------------------------------------------- Read hosts file
214: {
215: my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
1.43 albertel 216:
1.1 albertel 217: while (my $configline=<$config>) {
1.31 albertel 218: my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline);
219: if ($id && $domain && $role && $name && $ip) {
220: $hostname{$id}=$name;
221: $hostdom{$id}=$domain;
222: $hostip{$id}=$ip;
223: $hostrole{$id}=$role;
224: if ($domdescr) { $domaindescription{$domain}=$domdescr; }
225: if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
226: $libserv{$id}=$name;
227: }
228: } else {
229: if ($configline) {
230: # &logthis("Skipping hosts.tab line -$configline-");
231: }
232: }
1.1 albertel 233: }
234: }
235:
236: # ------------------------------------------------------ Read spare server file
237: {
238: my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
1.43 albertel 239:
1.1 albertel 240: while (my $configline=<$config>) {
1.43 albertel 241: chomp($configline);
242: if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
243: $spareid{$configline}=1;
244: }
1.1 albertel 245: }
246: }
247:
248: # ---------------------------------------------------------------- Start report
249:
250: $statusdir="/home/httpd/html/lon-status";
251:
252: $errors=0;
253: $warnings=0;
254: $notices=0;
255:
256: $now=time;
257: $date=localtime($now);
258:
259: {
1.43 albertel 260: my $fh=IO::File->new(">$statusdir/newstatus.html");
261: my %simplestatus=();
262:
263: print $fh (<<ENDHEADERS);
1.1 albertel 264: <html>
265: <head>
266: <title>LON Status Report $perlvar{'lonHostID'}</title>
267: </head>
1.3 www 268: <body bgcolor="#AAAAAA">
1.1 albertel 269: <a name="top">
270: <h1>LON Status Report $perlvar{'lonHostID'}</h1>
271: <h2>$date ($now)</h2>
272: <ol>
273: <li><a href="#configuration">Configuration</a>
274: <li><a href="#machine">Machine Information</a>
1.11 www 275: <li><a href="#tmp">Temporary Files</a>
276: <li><a href="#tokens">Session Tokens</a>
1.1 albertel 277: <li><a href="#httpd">httpd</a>
1.11 www 278: <li><a href="#lonsql">lonsql</a>
1.1 albertel 279: <li><a href="#lond">lond</a>
280: <li><a href="#lonc">lonc</a>
1.34 www 281: <li><a href="#lonhttpd">lonhttpd</a>
1.1 albertel 282: <li><a href="#lonnet">lonnet</a>
283: <li><a href="#connections">Connections</a>
284: <li><a href="#delayed">Delayed Messages</a>
285: <li><a href="#errcount">Error Count</a>
286: </ol>
287: <hr>
288: <a name="configuration">
289: <h2>Configuration</h2>
290: <h3>PerlVars</h3>
291: <table border=2>
292: ENDHEADERS
293:
1.43 albertel 294: foreach $varname (sort(keys(%perlvar))) {
295: print $fh "<tr><td>$varname</td><td>$perlvar{$varname}</td></tr>\n";
296: }
297: print $fh "</table><h3>Hosts</h3><table border=2>";
298: foreach $id (sort(keys(%hostname))) {
299: print $fh
300: "<tr><td>$id</td><td>$hostdom{$id}</td><td>$hostrole{$id}</td>";
301: print $fh "<td>$hostname{$id}</td><td>$hostip{$id}</td></tr>\n";
302: }
303: print $fh "</table><h3>Spare Hosts</h3><ol>";
304: foreach $id (sort(keys(%spareid))) {
305: print $fh "<li>$id\n";
306: }
307:
308: print $fh "</ol>\n";
1.1 albertel 309:
310: # --------------------------------------------------------------------- Machine
1.43 albertel 311:
312: print $fh '<hr><a name="machine"><h2>Machine Information</h2>';
313: print $fh "<h3>loadavg</h3>";
314:
315: open (LOADAVGH,"/proc/loadavg");
316: $loadavg=<LOADAVGH>;
317: close (LOADAVGH);
318:
319: print $fh "<tt>$loadavg</tt>";
320:
321: @parts=split(/\s+/,$loadavg);
322: if ($parts[1]>4.0) {
323: $errors++;
324: } elsif ($parts[1]>2.0) {
325: $warnings++;
326: } elsif ($parts[1]>1.0) {
327: $notices++;
328: }
1.1 albertel 329:
1.43 albertel 330: print $fh "<h3>df</h3>";
331: print $fh "<pre>";
1.1 albertel 332:
1.43 albertel 333: open (DFH,"df|");
334: while ($line=<DFH>) {
335: print $fh "$line";
336: @parts=split(/\s+/,$line);
337: $usage=$parts[4];
338: $usage=~s/\W//g;
339: if ($usage>90) {
340: $warnings++;
341: $notices++;
342: } elsif ($usage>80) {
343: $warnings++;
344: } elsif ($usage>60) {
345: $notices++;
346: }
347: if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
348: }
349: close (DFH);
350: print $fh "</pre>";
1.1 albertel 351:
352:
1.43 albertel 353: print $fh "<h3>ps</h3>";
354: print $fh "<pre>";
355: $psproc=0;
356:
1.45 ! albertel 357: open (PSH,"ps -aux --cols 140 |");
1.43 albertel 358: while ($line=<PSH>) {
359: print $fh "$line";
360: $psproc++;
361: }
362: close (PSH);
363: print $fh "</pre>";
1.24 www 364:
1.43 albertel 365: if ($psproc>200) { $notices++; }
366: if ($psproc>250) { $notices++; }
1.24 www 367:
1.43 albertel 368: &errout($fh);
1.11 www 369:
370: # --------------------------------------------------------------- clean out tmp
1.43 albertel 371: print $fh '<hr><a name="tmp"><h2>Temporary Files</h2>';
372: $cleaned=0;
373: $old=0;
374: while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
375: my ($dev,$ino,$mode,$nlink,
376: $uid,$gid,$rdev,$size,
377: $atime,$mtime,$ctime,
378: $blksize,$blocks)=stat($fname);
379: $now=time;
380: $since=$now-$mtime;
381: if ($since>$perlvar{'lonExpire'}) {
382: $line='';
383: if (open(PROBE,$fname)) {
384: $line=<PROBE>;
385: close(PROBE);
386: }
387: unless ($line=~/^CHECKOUTTOKEN\&/) {
388: $cleaned++;
389: unlink("$fname");
390: } else {
391: if ($since>365*$perlvar{'lonExpire'}) {
392: $cleaned++;
393: unlink("$fname");
394: } else { $old++; }
395: }
396: }
1.11 www 397:
1.43 albertel 398: }
399: print $fh "Cleaned up ".$cleaned." files (".$old." old checkout tokens).";
1.11 www 400:
401: # ------------------------------------------------------------ clean out lonIDs
1.43 albertel 402: print $fh '<hr><a name="tokens"><h2>Session Tokens</h2>';
403: $cleaned=0;
404: $active=0;
405: while ($fname=<$perlvar{'lonIDsDir'}/*>) {
406: my ($dev,$ino,$mode,$nlink,
407: $uid,$gid,$rdev,$size,
408: $atime,$mtime,$ctime,
409: $blksize,$blocks)=stat($fname);
410: $now=time;
411: $since=$now-$mtime;
412: if ($since>$perlvar{'lonExpire'}) {
413: $cleaned++;
414: print $fh "Unlinking $fname<br>";
415: unlink("$fname");
416: } else {
417: $active++;
418: }
419:
420: }
421: print $fh "<p>Cleaned up ".$cleaned." stale session token(s).";
422: print $fh "<h3>$active open session(s)</h3>";
1.11 www 423:
1.1 albertel 424: # ----------------------------------------------------------------------- httpd
425:
1.43 albertel 426: print $fh '<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>';
427:
428: open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
429: while ($line=<DFH>) { print $fh "$line" };
430: close (DFH);
431:
432: print $fh "</pre><h3>Error Log</h3><pre>";
433:
434: open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
435: while ($line=<DFH>) {
436: print $fh "$line";
437: if ($line=~/\[error\]/) { $notices++; }
438: };
439: close (DFH);
440: print $fh "</pre>";
441: &errout($fh);
1.5 harris41 442:
443:
1.11 www 444: # ---------------------------------------------------------------------- lonsql
1.22 harris41 445:
1.43 albertel 446: &checkon_daemon($fh,'lonsql',200000);
1.5 harris41 447:
1.1 albertel 448: # ------------------------------------------------------------------------ lond
449:
1.43 albertel 450: &checkon_daemon($fh,'lond',40000,1);
1.1 albertel 451:
452: # ------------------------------------------------------------------------ lonc
453:
1.43 albertel 454: &checkon_daemon($fh,'lonc',40000,1);
1.1 albertel 455:
1.34 www 456: # -------------------------------------------------------------------- lonhttpd
457:
1.43 albertel 458: &checkon_daemon($fh,'lonhttpd',40000);
1.1 albertel 459:
460: # ---------------------------------------------------------------------- lonnet
461:
1.43 albertel 462: print $fh '<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>';
463: print "checking logs\n";
464: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
465: open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
466: while ($line=<DFH>) {
467: print $fh "$line";
468: };
469: close (DFH);
470: }
471: print $fh "</pre><h3>Perm Log</h3><pre>";
472:
473: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
474: open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
475: while ($line=<DFH>) {
476: print $fh "$line";
477: };
478: close (DFH);
479: } else { print $fh "No perm log\n" }
480:
481: $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
482:
483: my ($dev,$ino,$mode,$nlink,
484: $uid,$gid,$rdev,$size,
485: $atime,$mtime,$ctime,
486: $blksize,$blocks)=stat($fname);
487:
488: if ($size>40000) {
489: print $fh "Rotating logs ...<p>";
490: rename("$fname.2","$fname.3");
491: rename("$fname.1","$fname.2");
492: rename("$fname","$fname.1");
493: }
1.1 albertel 494:
1.43 albertel 495: print $fh "</pre>";
496: &errout($fh);
497: # ----------------------------------------------------------------- Connections
1.1 albertel 498:
1.43 albertel 499: print $fh '<hr><a name="connections"><h2>Connections</h2>';
500: print "testing connections\n";
501: print $fh "<table border=2>";
502: foreach $tryserver (sort(keys(%hostname))) {
503: print(".");
504: $answer=reply("pong",$tryserver);
505: if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
506: $result="<b>ok</b>";
507: } else {
508: $result=$answer;
509: $warnings++;
510: if ($answer eq 'con_lost') { $warnings++; }
511: }
512: if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
513: print $fh "<tr><td>$tryserver</td><td>$result</td></tr>\n";
1.1 albertel 514:
515: }
1.43 albertel 516: print $fh "</table>";
1.1 albertel 517:
1.43 albertel 518: &errout($fh);
1.1 albertel 519: # ------------------------------------------------------------ Delayed messages
520:
1.43 albertel 521: print $fh '<hr><a name="delayed"><h2>Delayed Messages</h2>';
522: print "checking buffers\n";
1.1 albertel 523:
1.43 albertel 524: print $fh '<h3>Scanning Permanent Log</h3>';
1.1 albertel 525:
1.43 albertel 526: $unsend=0;
527: {
528: my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
529: while ($line=<$dfh>) {
530: ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
531: if ($sdf eq 'F') {
532: $local=localtime($time);
533: print $fh "<b>Failed: $time, $dserv, $dcmd</b><br>";
534: $warnings++;
535: }
536: if ($sdf eq 'S') { $unsend--; }
537: if ($sdf eq 'D') { $unsend++; }
538: }
1.1 albertel 539: }
1.43 albertel 540: print $fh "Total unsend messages: <b>$unsend</b><p>\n";
541: $warnings=$warnings+5*$unsend;
1.1 albertel 542:
1.43 albertel 543: if ($unsend) { $simplestatus{'unsend'}=$unsend; }
544: print $fh "<h3>Outgoing Buffer</h3>";
1.1 albertel 545:
1.43 albertel 546: open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
547: while ($line=<DFH>) {
548: print $fh "$line<br>";
549: };
550: close (DFH);
1.1 albertel 551:
552: # ------------------------------------------------------------------------- End
1.43 albertel 553: print $fh "<a name=errcount>\n";
554: $totalcount=$notices+4*$warnings+100*$errors;
555: &errout($fh);
556: print $fh "<h1>Total Error Count: $totalcount</h1>";
557: $now=time;
558: $date=localtime($now);
559: print $fh "<hr>$date ($now)</body></html>\n";
560: print "lon-status webpage updated\n";
561: $fh->close();
1.1 albertel 562: }
1.41 www 563: if ($errors) { $simplestatus{'errors'}=$errors; }
564: if ($warnings) { $simplestatus{'warnings'}=$warnings; }
565: if ($notices) { $simplestatus{'notices'}=$notices; }
566: $simplestatus{'time'}=time;
1.1 albertel 567:
568: rename ("$statusdir/newstatus.html","$statusdir/index.html");
1.41 www 569: {
1.43 albertel 570: my $sfh=IO::File->new(">$statusdir/loncron_simple.txt");
571: foreach (keys %simplestatus) {
572: print $sfh $_.'='.$simplestatus{$_}.'&';
573: }
574: print $sfh "\n";
575: $sfh->close();
1.41 www 576: }
1.1 albertel 577: if ($totalcount>200) {
1.43 albertel 578: print "sending mail\n";
579: $emailto="$perlvar{'lonAdmEMail'}";
580: if ($totalcount>1000) {
581: $emailto.=",$perlvar{'lonSysEMail'}";
582: }
583: $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices";
584: system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
1.1 albertel 585: }
586: 1;
587:
588:
589:
590:
591:
592:
593:
594:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>