#!/usr/bin/perl
# The LearningOnline Network
# lonsql - LON TCP-MySQL-Server Daemon for handling database requests.
#
# $Id: lonsql,v 1.61 2004/05/20 14:15:29 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
=pod
=head1 NAME
lonsql - LON TCP-MySQL-Server Daemon for handling database requests.
=head1 SYNOPSIS
This script should be run as user=www.
Note that a lonsql.pid file contains the pid of the parent process.
=head1 OVERVIEW
=head2 Purpose within LON-CAPA
LON-CAPA is meant to distribute A LOT of educational content to A LOT
of people. It is ineffective to directly rely on contents within the
ext2 filesystem to be speedily scanned for on-the-fly searches of
content descriptions. (Simply put, it takes a cumbersome amount of
time to open, read, analyze, and close thousands of files.)
The solution is to index various data fields that are descriptive of
the educational resources on a LON-CAPA server machine in a
database. Descriptive data fields are referred to as "metadata". The
question then arises as to how this metadata is handled in terms of
the rest of the LON-CAPA network without burdening client and daemon
processes.
The obvious solution, using lonc to send a query to a lond process,
doesn't work so well in general as you can see in the following
example:
lonc= loncapa client process A-lonc= a lonc process on Server A
lond= loncapa daemon process
database command
A-lonc --------TCP/IP----------------> B-lond
The problem emerges that A-lonc and B-lond are kept waiting for the
MySQL server to "do its stuff", or in other words, perform the
conceivably sophisticated, data-intensive, time-sucking database
transaction. By tying up a lonc and lond process, this significantly
cripples the capabilities of LON-CAPA servers.
The solution is to offload the work onto another process, and use
lonc and lond just for requests and notifications of completed
processing:
database command
A-lonc ---------TCP/IP-----------------> B-lond =====> B-lonsql
<---------------------------------/ |
"ok, I'll get back to you..." |
|
/
A-lond <------------------------------- B-lonc <======
"Guess what? I have the result!"
Of course, depending on success or failure, the messages may vary, but
the principle remains the same where a separate pool of children
processes (lonsql's) handle the MySQL database manipulations.
Thus, lonc and lond spend effectively no time waiting on results from
the database.
=head1 Internals
=over 4
=cut
use strict;
use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;
use LONCAPA::lonmetadata();
use IO::Socket;
use Symbol;
use POSIX;
use IO::Select;
use IO::File;
use Socket;
use Fcntl;
use Tie::RefHash;
use DBI;
use File::Find;
########################################################
########################################################
=pod
=item Global Variables
=over 4
=item dbh
=back
=cut
########################################################
########################################################
my $dbh;
########################################################
########################################################
=pod
=item Variables required for forking
=over 4
=item $MAX_CLIENTS_PER_CHILD
The number of clients each child should process.
=item %children
The keys to %children are the current child process IDs
=item $children
The current number of children
=back
=cut
########################################################
########################################################
my $MAX_CLIENTS_PER_CHILD = 5; # number of clients each child should process
my %children = (); # keys are current child process IDs
my $children = 0; # current number of children
###################################################################
###################################################################
=pod
=item Main body of code.
=over 4
=item Read data from loncapa_apache.conf and loncapa.conf.
=item Ensure we can access the database.
=item Determine if there are other instances of lonsql running.
=item Read the hosts file.
=item Create a socket for lonsql.
=item Fork once and dissociate from parent.
=item Write PID to disk.
=item Prefork children and maintain the population of children.
=back
=cut
###################################################################
###################################################################
my $childmaxattempts=10;
my $run =0; # running counter to generate the query-id
#
# Read loncapa_apache.conf and loncapa.conf
#
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
my %perlvar=%{$perlvarref};
#
# Make sure that database can be accessed
#
my $dbh;
unless ($dbh = DBI->connect("DBI:mysql:loncapa","www",
$perlvar{'lonSqlAccess'},
{ RaiseError =>0,PrintError=>0})) {
print "Cannot connect to database!\n";
my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
my $subj="LON: $perlvar{'lonHostID'} Cannot connect to database!";
system("echo 'Cannot connect to MySQL database!' |".
" mailto $emailto -s '$subj' > /dev/null");
open(SMP,'>/home/httpd/html/lon-status/mysql.txt');
print SMP 'time='.time.'&mysql=defunct'."\n";
close(SMP);
exit 1;
} else {
$dbh->disconnect;
}
#
# Check if other instance running
#
my $pidfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
if (-e $pidfile) {
my $lfh=IO::File->new("$pidfile");
my $pide=<$lfh>;
chomp($pide);
if (kill 0 => $pide) { die "already running"; }
}
#
# Read hosts file
#
my %hostip;
my $thisserver;
my $PREFORK=4; # number of children to maintain, at least four spare
open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file";
while (my $configline=<CONFIG>) {
my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
chomp($ip);
$hostip{$ip}=$id;
$thisserver=$name if ($id eq $perlvar{'lonHostID'});
$PREFORK++;
}
close(CONFIG);
#
$PREFORK=int($PREFORK/4);
#
# Create a socket to talk to lond
#
my $unixsock = "mysqlsock";
my $localfile="$perlvar{'lonSockDir'}/$unixsock";
my $server;
unlink ($localfile);
unless ($server=IO::Socket::UNIX->new(Local =>"$localfile",
Type => SOCK_STREAM,
Listen => 10)) {
print "in socket error:$@\n";
}
#
# Fork once and dissociate
#
my $fpid=fork;
exit if $fpid;
die "Couldn't fork: $!" unless defined ($fpid);
POSIX::setsid() or die "Can't start new session: $!";
#
# Write our PID on disk
my $execdir=$perlvar{'lonDaemons'};
open (PIDSAVE,">$execdir/logs/lonsql.pid");
print PIDSAVE "$$\n";
close(PIDSAVE);
&logthis("<font color='red'>CRITICAL: ---------- Starting ----------</font>");
#
# Ignore signals generated during initial startup
$SIG{HUP}=$SIG{USR1}='IGNORE';
# Now we are on our own
# Fork off our children.
for (1 .. $PREFORK) {
make_new_child();
}
#
# Install signal handlers.
$SIG{CHLD} = \&REAPER;
$SIG{INT} = $SIG{TERM} = \&HUNTSMAN;
$SIG{HUP} = \&HUPSMAN;
#
# And maintain the population.
while (1) {
sleep; # wait for a signal (i.e., child's death)
for (my $i = $children; $i < $PREFORK; $i++) {
make_new_child(); # top up the child pool
}
}
########################################################
########################################################
=pod
=item &make_new_child
Inputs: None
Returns: None
=cut
########################################################
########################################################
sub make_new_child {
my $pid;
my $sigset;
#
# block signal for fork
$sigset = POSIX::SigSet->new(SIGINT);
sigprocmask(SIG_BLOCK, $sigset)
or die "Can't block SIGINT for fork: $!\n";
#
die "fork: $!" unless defined ($pid = fork);
#
if ($pid) {
# Parent records the child's birth and returns.
sigprocmask(SIG_UNBLOCK, $sigset)
or die "Can't unblock SIGINT for fork: $!\n";
$children{$pid} = 1;
$children++;
return;
} else {
# Child can *not* return from this subroutine.
$SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before
# unblock signals
sigprocmask(SIG_UNBLOCK, $sigset)
or die "Can't unblock SIGINT for fork: $!\n";
#open database handle
# making dbh global to avoid garbage collector
unless ($dbh = DBI->connect("DBI:mysql:loncapa","www",
$perlvar{'lonSqlAccess'},
{ RaiseError =>0,PrintError=>0})) {
sleep(10+int(rand(20)));
&logthis("<font color='blue'>WARNING: Couldn't connect to database".
": $@</font>");
# "($st secs): $@</font>");
print "database handle error\n";
exit;
}
# make sure that a database disconnection occurs with
# ending kill signals
$SIG{TERM}=$SIG{INT}=$SIG{QUIT}=$SIG{__DIE__}=\&DISCONNECT;
# handle connections until we've reached $MAX_CLIENTS_PER_CHILD
for (my $i=0; $i < $MAX_CLIENTS_PER_CHILD; $i++) {
my $client = $server->accept() or last;
# do something with the connection
$run = $run+1;
my $userinput = <$client>;
chomp($userinput);
#
my ($conserver,$query,
$arg1,$arg2,$arg3)=split(/&/,$userinput);
my $query=unescape($query);
#
#send query id which is pid_unixdatetime_runningcounter
my $queryid = $thisserver;
$queryid .="_".($$)."_";
$queryid .= time."_";
$queryid .= $run;
print $client "$queryid\n";
#
# &logthis("QUERY: $query - $arg1 - $arg2 - $arg3");
sleep 1;
#
my $result='';
#
# At this point, query is received, query-ID assigned and sent
# back, $query eq 'logquery' will mean that this is a query
# against log-files
if (($query eq 'userlog') || ($query eq 'courselog')) {
# beginning of log query
my $udom = &unescape($arg1);
my $uname = &unescape($arg2);
my $command = &unescape($arg3);
my $path = &propath($udom,$uname);
if (-e "$path/activity.log") {
if ($query eq 'userlog') {
$result=&userlog($path,$command);
} else {
$result=&courselog($path,$command);
}
} else {
&logthis('Unable to do log query: '.$uname.'@'.$udom);
$result='no_such_file';
}
# end of log query
} else {
# Do an sql query
$result = &do_sql_query($query,$arg1,$arg2);
}
# result does not need to be escaped because it has already been
# escaped.
#$result=&escape($result);
&reply("queryreply:$queryid:$result",$conserver);
}
# tidy up gracefully and finish
#
# close the database handle
$dbh->disconnect
or &logthis("<font color='blue'>WARNING: Couldn't disconnect".
" from database $DBI::errstr : $@</font>");
# this exit is VERY important, otherwise the child will become
# a producer of more and more children, forking yourself into
# process death.
exit;
}
}
########################################################
########################################################
=pod
=item &do_sql_query
Runs an sql metadata table query.
Inputs: $query, $custom, $customshow
Returns: A string containing escaped results.
=cut
########################################################
########################################################
{
my @metalist;
sub process_file {
if ( -e $_ && # file exists
-f $_ && # and is a normal file
/\.meta$/ && # ends in meta
! /^.+\.\d+\.[^\.]+\.meta$/ # is not a previous version
) {
push(@metalist,$File::Find::name);
}
}
sub do_sql_query {
my ($query,$custom,$customshow) = @_;
$custom = &unescape($custom);
$customshow = &unescape($customshow);
#
@metalist = ();
#
my $result = '';
my @results = ();
my @files;
my $subsetflag=0;
#
if ($query) {
#prepare and execute the query
my $sth = $dbh->prepare($query);
unless ($sth->execute()) {
&logthis('<font color="blue">'.
'WARNING: Could not retrieve from database:'.
$sth->errstr().'</font>');
} else {
my $aref=$sth->fetchall_arrayref;
foreach my $row (@$aref) {
push @files,@{$row}[3] if ($custom or $customshow);
my @b=map { &escape($_); } @$row;
push @results,join(",", @b);
# Build up the @files array with the LON-CAPA urls
# of the resources.
}
}
}
# do custom metadata searching here and build into result
return join("&",@results) if (! ($custom or $customshow));
# Only get here if there is a custom query or custom show request
&logthis("Doing custom query for $custom");
if ($query) {
@metalist=map {
$perlvar{'lonDocRoot'}.$_.'.meta';
} @files;
} else {
my $dir = "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}";
@metalist=();
opendir(RESOURCES,$dir);
my @homeusers=grep {
&ishome($dir.'/'.$_);
} grep {!/^\.\.?$/} readdir(RESOURCES);
closedir RESOURCES;
# Define the
foreach my $user (@homeusers) {
find (\&process_file,$dir.'/'.$user);
}
}
# if file is indicated in sql database and
# not part of sql-relevant query, do not pattern match.
#
# if file is not in sql database, output error.
#
# if file is indicated in sql database and is
# part of query result list, then do the pattern match.
my $customresult='';
my @results;
foreach my $metafile (@metalist) {
my $fh=IO::File->new($metafile);
my @lines=<$fh>;
my $stuff=join('',@lines);
if ($stuff=~/$custom/s) {
foreach my $f ('abstract','author','copyright',
'creationdate','keywords','language',
'lastrevisiondate','mime','notes',
'owner','subject','title') {
$stuff=~s/\n?\<$f[^\>]*\>.*?<\/$f[^\>]*\>\n?//s;
}
my $mfile=$metafile;
my $docroot=$perlvar{'lonDocRoot'};
$mfile=~s/^$docroot//;
$mfile=~s/\.meta$//;
unless ($query) {
my $q2="SELECT * FROM metadata WHERE url ".
" LIKE BINARY '?'";
my $sth = $dbh->prepare($q2);
$sth->execute($mfile);
my $aref=$sth->fetchall_arrayref;
foreach my $a (@$aref) {
my @b=map { &escape($_)} @$a;
push @results,join(",", @b);
}
}
# &logthis("found: $stuff");
$customresult.='&custom='.&escape($mfile).','.
escape($stuff);
}
}
$result=join("&",@results) unless $query;
$result.=$customresult;
#
return $result;
} # End of &do_sql_query
} # End of scoping curly braces for &process_file and &do_sql_query
########################################################
########################################################
=pod
=item &logthis
Inputs: $message, the message to log
Returns: nothing
Writes $message to the logfile.
=cut
########################################################
########################################################
sub logthis {
my $message=shift;
my $execdir=$perlvar{'lonDaemons'};
my $fh=IO::File->new(">>$execdir/logs/lonsql.log");
my $now=time;
my $local=localtime($now);
print $fh "$local ($$): $message\n";
}
# -------------------------------------------------- Non-critical communication
########################################################
########################################################
=pod
=item &subreply
Sends a command to a server. Called only by &reply.
Inputs: $cmd,$server
Returns: The results of the message or 'con_lost' on error.
=cut
########################################################
########################################################
sub subreply {
my ($cmd,$server)=@_;
my $peerfile="$perlvar{'lonSockDir'}/$server";
my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile",
Type => SOCK_STREAM,
Timeout => 10)
or return "con_lost";
print $sclient "$cmd\n";
my $answer=<$sclient>;
chomp($answer);
$answer="con_lost" if (!$answer);
return $answer;
}
########################################################
########################################################
=pod
=item &reply
Sends a command to a server.
Inputs: $cmd,$server
Returns: The results of the message or 'con_lost' on error.
=cut
########################################################
########################################################
sub reply {
my ($cmd,$server)=@_;
my $answer;
if ($server ne $perlvar{'lonHostID'}) {
$answer=subreply($cmd,$server);
if ($answer eq 'con_lost') {
$answer=subreply("ping",$server);
$answer=subreply($cmd,$server);
}
} else {
$answer='self_reply';
$answer=subreply($cmd,$server);
}
return $answer;
}
########################################################
########################################################
=pod
=item &escape
Escape special characters in a string.
Inputs: string to escape
Returns: The input string with special characters escaped.
=cut
########################################################
########################################################
sub escape {
my $str=shift;
$str =~ s/(\W)/"%".unpack('H2',$1)/eg;
return $str;
}
########################################################
########################################################
=pod
=item &unescape
Unescape special characters in a string.
Inputs: string to unescape
Returns: The input string with special characters unescaped.
=cut
########################################################
########################################################
sub unescape {
my $str=shift;
$str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
return $str;
}
########################################################
########################################################
=pod
=item &ishome
Determine if the current machine is the home server for a user.
The determination is made by checking the filesystem for the users information.
Inputs: $author
Returns: 0 - this is not the authors home server, 1 - this is.
=cut
########################################################
########################################################
sub ishome {
my $author=shift;
$author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
my ($udom,$uname)=split(/\//,$author);
my $proname=propath($udom,$uname);
if (-e $proname) {
return 1;
} else {
return 0;
}
}
########################################################
########################################################
=pod
=item &propath
Inputs: user name, user domain
Returns: The full path to the users directory.
=cut
########################################################
########################################################
sub propath {
my ($udom,$uname)=@_;
$udom=~s/\W//g;
$uname=~s/\W//g;
my $subdir=$uname.'__';
$subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
return $proname;
}
########################################################
########################################################
=pod
=item &courselog
Inputs: $path, $command
Returns: unescaped string of values.
=cut
########################################################
########################################################
sub courselog {
my ($path,$command)=@_;
my %filters=();
foreach (split(/\:/,&unescape($command))) {
my ($name,$value)=split(/\=/,$_);
$filters{$name}=$value;
}
my @results=();
open(IN,$path.'/activity.log') or return ('file_error');
while (my $line=<IN>) {
chomp($line);
my ($timestamp,$host,$log)=split(/\:/,$line);
#
# $log has the actual log entries; currently still escaped, and
# %26(timestamp)%3a(url)%3a(user)%3a(domain)
# then additionally
# %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
# or
# %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
#
# get delimiter between timestamped entries to be &&&
$log=~s/\%26(\d+)\%3a/\&\&\&$1\%3a/g;
# now go over all log entries
foreach (split(/\&\&\&/,&unescape($log))) {
my ($time,$res,$uname,$udom,$action,@values)=split(/\:/,$_);
my $values=&unescape(join(':',@values));
$values=~s/\&/\:/g;
$res=&unescape($res);
my $include=1;
if (($filters{'username'}) && ($uname ne $filters{'username'}))
{ $include=0; }
if (($filters{'domain'}) && ($udom ne $filters{'domain'}))
{ $include=0; }
if (($filters{'url'}) && ($res!~/$filters{'url'}/))
{ $include=0; }
if (($filters{'start'}) && ($time<$filters{'start'}))
{ $include=0; }
if (($filters{'end'}) && ($time>$filters{'end'}))
{ $include=0; }
if (($filters{'action'} eq 'view') && ($action))
{ $include=0; }
if (($filters{'action'} eq 'submit') && ($action ne 'POST'))
{ $include=0; }
if (($filters{'action'} eq 'grade') && ($action ne 'CSTORE'))
{ $include=0; }
if ($include) {
push(@results,($time<1000000000?'0':'').$time.':'.$res.':'.
$uname.':'.$udom.':'.
$action.':'.$values);
}
}
}
close IN;
return join('&',sort(@results));
}
########################################################
########################################################
=pod
=item &userlog
Inputs: $path, $command
Returns: unescaped string of values.
=cut
########################################################
########################################################
sub userlog {
my ($path,$command)=@_;
my %filters=();
foreach (split(/\:/,&unescape($command))) {
my ($name,$value)=split(/\=/,$_);
$filters{$name}=$value;
}
my @results=();
open(IN,$path.'/activity.log') or return ('file_error');
while (my $line=<IN>) {
chomp($line);
my ($timestamp,$host,$log)=split(/\:/,$line);
$log=&unescape($log);
my $include=1;
if (($filters{'start'}) && ($timestamp<$filters{'start'}))
{ $include=0; }
if (($filters{'end'}) && ($timestamp>$filters{'end'}))
{ $include=0; }
if (($filters{'action'} eq 'log') && ($log!~/^Log/)) { $include=0; }
if (($filters{'action'} eq 'check') && ($log!~/^Check/))
{ $include=0; }
if ($include) {
push(@results,$timestamp.':'.$log);
}
}
close IN;
return join('&',sort(@results));
}
########################################################
########################################################
=pod
=item Functions required for forking
=over 4
=item REAPER
REAPER takes care of dead children.
=item HUNTSMAN
Signal handler for SIGINT.
=item HUPSMAN
Signal handler for SIGHUP
=item DISCONNECT
Disconnects from database.
=back
=cut
########################################################
########################################################
sub REAPER { # takes care of dead children
$SIG{CHLD} = \&REAPER;
my $pid = wait;
$children --;
&logthis("Child $pid died");
delete $children{$pid};
}
sub HUNTSMAN { # signal handler for SIGINT
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
kill 'INT' => keys %children;
my $execdir=$perlvar{'lonDaemons'};
unlink("$execdir/logs/lonsql.pid");
&logthis("<font color='red'>CRITICAL: Shutting down</font>");
$unixsock = "mysqlsock";
my $port="$perlvar{'lonSockDir'}/$unixsock";
unlink($port);
exit; # clean up with dignity
}
sub HUPSMAN { # signal handler for SIGHUP
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
kill 'INT' => keys %children;
close($server); # free up socket
&logthis("<font color='red'>CRITICAL: Restarting</font>");
my $execdir=$perlvar{'lonDaemons'};
$unixsock = "mysqlsock";
my $port="$perlvar{'lonSockDir'}/$unixsock";
unlink($port);
exec("$execdir/lonsql"); # here we go again
}
sub DISCONNECT {
$dbh->disconnect or
&logthis("<font color='blue'>WARNING: Couldn't disconnect from database ".
" $DBI::errstr : $@</font>");
exit;
}
=pod
=back
=cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>