--- loncom/loncron 2004/05/11 20:19:46 1.48 +++ loncom/loncron 2005/02/14 00:29:48 1.57 @@ -2,7 +2,7 @@ # Housekeeping program, started by cron, loncontrol and loncron.pl # -# $Id: loncron,v 1.48 2004/05/11 20:19:46 albertel Exp $ +# $Id: loncron,v 1.57 2005/02/14 00:29:48 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -36,6 +36,7 @@ use LONCAPA::Configuration; use IO::File; use IO::Socket; use HTML::Entities; +use Getopt::Long; #globals use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount); @@ -76,13 +77,21 @@ ENDERROUT } sub start_daemon { - my ($fh,$daemon,$pidfile) = @_; + my ($fh,$daemon,$pidfile,$args) = @_; my $progname=$daemon; - if ($daemon eq 'lonc' && $ARGV[0] eq 'new') { + if ($daemon eq 'lonc' && $args eq 'new') { $progname='loncnew'; print "new "; } - system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); + my $error_fname="$perlvar{'lonDaemons'}/logs/${daemon}_errors"; + my $size=(stat($error_fname))[7]; + if ($size>40000) { + &log($fh,"
Rotating error logs ...
"); + rename("$error_fname.2","$error_fname.3"); + rename("$error_fname.1","$error_fname.2"); + rename("$error_fname","$error_fname.1"); + } + system("$perlvar{'lonDaemons'}/$progname 2>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); sleep 2; if (-e $pidfile) { &log($fh,"Seems like it started ...
"); @@ -102,10 +111,10 @@ sub start_daemon { } sub checkon_daemon { - my ($fh,$daemon,$maxsize,$sendusr1)=@_; + my ($fh,$daemon,$maxsize,$sendusr1,$args)=@_; &log($fh,'');
- printf("%-10s ",$daemon);
+ printf("%-15s ",$daemon);
if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
while (my $line= Give it one more try ...
Killall '.$daemon.': '.
- `killall $daemon 2>&1`.' - ');
+ `killall $kadaemon 2>&1`.' - ');
sleep 2;
&log($fh,unlink($pidfile).' - '.
- `killall -9 $daemon 2>&1`.
+ `killall -9 $kadaemon 2>&1`.
'
');
&log($fh,"$daemon not running, trying to start
");
- if (&start_daemon($fh,$daemon,$pidfile)) {
+ if (&start_daemon($fh,$daemon,$pidfile,$args)) {
&log($fh,"$daemon at pid $daemonpid responding
");
$simplestatus{$daemon}='restarted';
print "started\n";
@@ -160,7 +171,7 @@ sub checkon_daemon {
&log($fh,"$daemon at pid $daemonpid not responding
");
&log($fh,"$daemon at pid $daemonpid responding
");
$simplestatus{$daemon}='restarted';
print "started\n";
@@ -251,7 +262,7 @@ sub log_machine_info {
&log($fh,"");
my $psproc=0;
- open (PSH,"ps -aux --cols 140 |");
+ open (PSH,"ps aux --cols 140 |");
while (my $line=
Connections
');
print "testing connections\n";
&log($fh,"");
+ my ($good,$bad)=(0,0);
foreach my $tryserver (sort(keys(%{$hostname}))) {
print(".");
my $result;
- my $answer=reply("pong",$tryserver);
+ my $answer=reply("ping",$tryserver);
if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
$result="ok";
+ $good++;
} else {
$result=$answer;
$warnings++;
- if ($answer eq 'con_lost') { $warnings++; }
+ if ($answer eq 'con_lost') {
+ $bad++;
+ $warnings++;
+ } else {
+ $good++; #self connection
+ }
}
if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
&log($fh,"
");
-
+ print "\n$good good, $bad bad connections\n";
&errout($fh);
}
@@ -538,15 +556,47 @@ sub log_simplestatus {
sub send_mail {
print "sending mail\n";
my $emailto="$perlvar{'lonAdmEMail'}";
- if ($totalcount>1000) {
+ if ($totalcount>2500) {
$emailto.=",$perlvar{'lonSysEMail'}";
}
my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices";
- system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
+
+ my $result=system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html >& /dev/null");
+ if ($result != 0) {
+ $result=system("mail -s '$subj' $emailto < $statusdir/index.html");
+ }
+}
+
+sub usage {
+ print(< \n");
}
&log($fh,"$tryserver $result