File:  [LON-CAPA] / loncom / lonsql
Revision 1.59: download - view: text, annotated - select for diffs
Tue May 11 20:03:05 2004 UTC (20 years, 8 months ago) by albertel
Branches: MAIN
CVS tags: HEAD

-more xhtmlized

#!/usr/bin/perl

# The LearningOnline Network
# lonsql - LON TCP-MySQL-Server Daemon for handling database requests.
#
# $Id: lonsql,v 1.59 2004/05/11 20:03:05 albertel Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#

=pod

=head1 NAME

lonsql - LON TCP-MySQL-Server Daemon for handling database requests.

=head1 SYNOPSIS

This script should be run as user=www.  
Note that a lonsql.pid file contains the pid of the parent process.

=head1 OVERVIEW

=head2 Purpose within LON-CAPA

LON-CAPA is meant to distribute A LOT of educational content to A LOT
of people. It is ineffective to directly rely on contents within the
ext2 filesystem to be speedily scanned for on-the-fly searches of
content descriptions. (Simply put, it takes a cumbersome amount of
time to open, read, analyze, and close thousands of files.)

The solution is to index various data fields that are descriptive of
the educational resources on a LON-CAPA server machine in a
database. Descriptive data fields are referred to as "metadata". The
question then arises as to how this metadata is handled in terms of
the rest of the LON-CAPA network without burdening client and daemon
processes.

The obvious solution, using lonc to send a query to a lond process,
doesn't work so well in general as you can see in the following
example:

    lonc= loncapa client process    A-lonc= a lonc process on Server A
    lond= loncapa daemon process

                 database command
    A-lonc  --------TCP/IP----------------> B-lond

The problem emerges that A-lonc and B-lond are kept waiting for the
MySQL server to "do its stuff", or in other words, perform the
conceivably sophisticated, data-intensive, time-sucking database
transaction.  By tying up a lonc and lond process, this significantly
cripples the capabilities of LON-CAPA servers.

The solution is to offload the work onto another process, and use
lonc and lond just for requests and notifications of completed
processing:

                database command

  A-lonc  ---------TCP/IP-----------------> B-lond =====> B-lonsql
         <---------------------------------/                |
           "ok, I'll get back to you..."                    |
                                                            |
                                                            /
  A-lond  <-------------------------------  B-lonc   <======
           "Guess what? I have the result!"

Of course, depending on success or failure, the messages may vary, but
the principle remains the same where a separate pool of children
processes (lonsql's) handle the MySQL database manipulations.

Thus, lonc and lond spend effectively no time waiting on results from
the database.

=head1 Internals

=over 4

=cut

use strict;

use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;
use LONCAPA::lonmetadata();

use IO::Socket;
use Symbol;
use POSIX;
use IO::Select;
use IO::File;
use Socket;
use Fcntl;
use Tie::RefHash;
use DBI;
use File::Find;

########################################################
########################################################

=pod

=item Global Variables

=over 4

=item dbh

=back

=cut

########################################################
########################################################
my $dbh;

########################################################
########################################################

=pod 

=item Variables required for forking

=over 4

=item $MAX_CLIENTS_PER_CHILD

The number of clients each child should process.

=item %children 

The keys to %children  are the current child process IDs

=item $children

The current number of children

=back

=cut 

########################################################
########################################################
my $MAX_CLIENTS_PER_CHILD  = 5;   # number of clients each child should process
my %children               = ();  # keys are current child process IDs
my $children               = 0;   # current number of children
                               
###################################################################
###################################################################

=pod

=item Main body of code.

=over 4

=item Read data from loncapa_apache.conf and loncapa.conf.

=item Ensure we can access the database.

=item Determine if there are other instances of lonsql running.

=item Read the hosts file.

=item Create a socket for lonsql.

=item Fork once and dissociate from parent.

=item Write PID to disk.

=item Prefork children and maintain the population of children.

=back

=cut

###################################################################
###################################################################
my $childmaxattempts=10;
my $run =0;              # running counter to generate the query-id
#
# Read loncapa_apache.conf and loncapa.conf
#
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
my %perlvar=%{$perlvarref};
#
# Make sure that database can be accessed
#
my $dbh;
unless ($dbh = DBI->connect("DBI:mysql:loncapa","www",
                            $perlvar{'lonSqlAccess'},
                            { RaiseError =>0,PrintError=>0})) { 
    print "Cannot connect to database!\n";
    my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
    my $subj="LON: $perlvar{'lonHostID'} Cannot connect to database!";
    system("echo 'Cannot connect to MySQL database!' |".
           " mailto $emailto -s '$subj' > /dev/null");

    open(SMP,'>/home/httpd/html/lon-status/mysql.txt');
    print SMP 'time='.time.'&mysql=defunct'."\n";
    close(SMP);

    exit 1;
} else {
    $dbh->disconnect;
}

#
# Check if other instance running
#
my $pidfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
if (-e $pidfile) {
   my $lfh=IO::File->new("$pidfile");
   my $pide=<$lfh>;
   chomp($pide);
   if (kill 0 => $pide) { die "already running"; }
}

#
# Read hosts file
#
my %hostip;
my $thisserver;
my $PREFORK=4; # number of children to maintain, at least four spare
open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file";
while (my $configline=<CONFIG>) {
    my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
    chomp($ip);
    $hostip{$ip}=$id;
    $thisserver=$name if ($id eq $perlvar{'lonHostID'});
    $PREFORK++;
}
close(CONFIG);
#
$PREFORK=int($PREFORK/4);

#
# Create a socket to talk to lond
#
my $unixsock = "mysqlsock";
my $localfile="$perlvar{'lonSockDir'}/$unixsock";
my $server;
unlink ($localfile);
unless ($server=IO::Socket::UNIX->new(Local    =>"$localfile",
                                      Type    => SOCK_STREAM,
                                      Listen => 10)) {
    print "in socket error:$@\n";
}

#
# Fork once and dissociate
#
my $fpid=fork;
exit if $fpid;
die "Couldn't fork: $!" unless defined ($fpid);
POSIX::setsid() or die "Can't start new session: $!";

#
# Write our PID on disk
my $execdir=$perlvar{'lonDaemons'};
open (PIDSAVE,">$execdir/logs/lonsql.pid");
print PIDSAVE "$$\n";
close(PIDSAVE);
&logthis("<font color='red'>CRITICAL: ---------- Starting ----------</font>");

#
# Ignore signals generated during initial startup
$SIG{HUP}=$SIG{USR1}='IGNORE';
# Now we are on our own    
#    Fork off our children.
for (1 .. $PREFORK) {
    make_new_child();
}

#
# Install signal handlers.
$SIG{CHLD} = \&REAPER;
$SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
$SIG{HUP}  = \&HUPSMAN;

#
# And maintain the population.
while (1) {
    sleep;                          # wait for a signal (i.e., child's death)
    for (my $i = $children; $i < $PREFORK; $i++) {
        make_new_child();           # top up the child pool
    }
}

########################################################
########################################################

=pod

=item &make_new_child

Inputs: None

Returns: None

=cut

########################################################
########################################################
sub make_new_child {
    my $pid;
    my $sigset;
    #
    # block signal for fork
    $sigset = POSIX::SigSet->new(SIGINT);
    sigprocmask(SIG_BLOCK, $sigset)
        or die "Can't block SIGINT for fork: $!\n";
    #
    die "fork: $!" unless defined ($pid = fork);
    #
    if ($pid) {
        # Parent records the child's birth and returns.
        sigprocmask(SIG_UNBLOCK, $sigset)
            or die "Can't unblock SIGINT for fork: $!\n";
        $children{$pid} = 1;
        $children++;
        return;
    } else {
        # Child can *not* return from this subroutine.
        $SIG{INT} = 'DEFAULT';      # make SIGINT kill us as it did before
        # unblock signals
        sigprocmask(SIG_UNBLOCK, $sigset)
            or die "Can't unblock SIGINT for fork: $!\n";
        #open database handle
	# making dbh global to avoid garbage collector
	unless ($dbh = DBI->connect("DBI:mysql:loncapa","www",
                                    $perlvar{'lonSqlAccess'},
                                    { RaiseError =>0,PrintError=>0})) { 
            sleep(10+int(rand(20)));
            &logthis("<font color='blue'>WARNING: Couldn't connect to database".
                     ": $@</font>");
                     #  "($st secs): $@</font>");
            print "database handle error\n";
            exit;
        }
	# make sure that a database disconnection occurs with 
        # ending kill signals
	$SIG{TERM}=$SIG{INT}=$SIG{QUIT}=$SIG{__DIE__}=\&DISCONNECT;
        # handle connections until we've reached $MAX_CLIENTS_PER_CHILD
        for (my $i=0; $i < $MAX_CLIENTS_PER_CHILD; $i++) {
            my $client = $server->accept() or last;
            # do something with the connection
	    $run = $run+1;
	    my $userinput = <$client>;
	    chomp($userinput);
            #
	    my ($conserver,$query,
		$arg1,$arg2,$arg3)=split(/&/,$userinput);
	    my $query=unescape($query);
            #
            #send query id which is pid_unixdatetime_runningcounter
	    my $queryid = $thisserver;
	    $queryid .="_".($$)."_";
	    $queryid .= time."_";
	    $queryid .= $run;
	    print $client "$queryid\n";
	    #
	    &logthis("QUERY: $query - $arg1 - $arg2 - $arg3");
	    sleep 1;
            #
            my $result='';
            #
            # At this point, query is received, query-ID assigned and sent 
            # back, $query eq 'logquery' will mean that this is a query 
            # against log-files
            if (($query eq 'userlog') || ($query eq 'courselog')) {
                # beginning of log query
                my $udom    = &unescape($arg1);
                my $uname   = &unescape($arg2);
                my $command = &unescape($arg3);
                my $path    = &propath($udom,$uname);
                if (-e "$path/activity.log") {
                    if ($query eq 'userlog') {
                        $result=&userlog($path,$command);
                    } else {
                        $result=&courselog($path,$command);
                    }
                } else {
                    &logthis('Unable to do log query: '.$uname.'@'.$udom);
                    $result='no_such_file';
                }
                # end of log query
            } else {
                # Do an sql query
                $result = &do_sql_query($query,$arg1,$arg2);
            }
            # result does not need to be escaped because it has already been
            # escaped.
            #$result=&escape($result);
            # reply with result, append \n unless already there
	    $result.="\n" unless ($result=~/\n$/);
            &reply("queryreply:$queryid:$result",$conserver);
        }
        # tidy up gracefully and finish
        #
        # close the database handle
	$dbh->disconnect
            or &logthis("<font color='blue'>WARNING: Couldn't disconnect".
                        " from database  $DBI::errstr : $@</font>");
        # this exit is VERY important, otherwise the child will become
        # a producer of more and more children, forking yourself into
        # process death.
        exit;
    }
}

########################################################
########################################################

=pod

=item &do_sql_query

Runs an sql metadata table query.

Inputs: $query, $custom, $customshow

Returns: A string containing escaped results.

=cut

########################################################
########################################################
{
    my @metalist;

sub process_file {
    if ( -e $_ &&  # file exists
         -f $_ &&  # and is a normal file
         /\.meta$/ &&  # ends in meta
         ! /^.+\.\d+\.[^\.]+\.meta$/  # is not a previous version
         ) {
        push(@metalist,$File::Find::name);
    }
}

sub do_sql_query {
    my ($query,$custom,$customshow) = @_;
    $custom     = &unescape($custom);
    $customshow = &unescape($customshow);
    #
    @metalist = ();
    #
    my $result = '';
    my @results = ();
    my @files;
    my $subsetflag=0;
    #
    if ($query) {
        #prepare and execute the query
        my $sth = $dbh->prepare($query);
        unless ($sth->execute()) {
            &logthis('<font color="blue">'.
                     'WARNING: Could not retrieve from database:'.
                     $sth->errstr().'</font>');
        } else {
            my $aref=$sth->fetchall_arrayref;
            foreach my $row (@$aref) {
                push @files,@{$row}[3] if ($custom or $customshow);
                my @b=map { &escape($_); } @$row;
                push @results,join(",", @b);
                # Build up the @files array with the LON-CAPA urls 
                # of the resources.
            }
        }
    }
    # do custom metadata searching here and build into result
    return join("&",@results) if (! ($custom or $customshow));
    # Only get here if there is a custom query or custom show request
    &logthis("Doing custom query for $custom");
    if ($query) {
        @metalist=map {
            $perlvar{'lonDocRoot'}.$_.'.meta';
        } @files;
    } else {
        my $dir = "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}";
        @metalist=(); 
        opendir(RESOURCES,$dir);
        my @homeusers=grep {
            &ishome($dir.'/'.$_);
        } grep {!/^\.\.?$/} readdir(RESOURCES);
        closedir RESOURCES;
        # Define the
        foreach my $user (@homeusers) {
            find (\&process_file,$dir.'/'.$user);
        }
    } 
    # if file is indicated in sql database and
    #     not part of sql-relevant query, do not pattern match.
    #
    # if file is not in sql database, output error.
    #
    # if file is indicated in sql database and is
    #     part of query result list, then do the pattern match.
    my $customresult='';
    my @results;
    foreach my $metafile (@metalist) {
        my $fh=IO::File->new($metafile);
        my @lines=<$fh>;
        my $stuff=join('',@lines);
        if ($stuff=~/$custom/s) {
            foreach my $f ('abstract','author','copyright',
                           'creationdate','keywords','language',
                           'lastrevisiondate','mime','notes',
                           'owner','subject','title') {
                $stuff=~s/\n?\<$f[^\>]*\>.*?<\/$f[^\>]*\>\n?//s;
            }
            my $mfile=$metafile; 
            my $docroot=$perlvar{'lonDocRoot'};
            $mfile=~s/^$docroot//;
            $mfile=~s/\.meta$//;
            unless ($query) {
                my $q2="SELECT * FROM metadata WHERE url ".
                    " LIKE BINARY '?'";
                my $sth = $dbh->prepare($q2);
                $sth->execute($mfile);
                my $aref=$sth->fetchall_arrayref;
                foreach my $a (@$aref) {
                    my @b=map { &escape($_)} @$a;
                    push @results,join(",", @b);
                }
            }
            # &logthis("found: $stuff");
            $customresult.='&custom='.&escape($mfile).','.
                escape($stuff);
        }
    }
    $result=join("&",@results) unless $query;
    $result.=$customresult;
    #
    return $result;
} # End of &do_sql_query

} # End of scoping curly braces for &process_file and &do_sql_query
########################################################
########################################################

=pod

=item &logthis

Inputs: $message, the message to log

Returns: nothing

Writes $message to the logfile.

=cut

########################################################
########################################################
sub logthis {
    my $message=shift;
    my $execdir=$perlvar{'lonDaemons'};
    my $fh=IO::File->new(">>$execdir/logs/lonsql.log");
    my $now=time;
    my $local=localtime($now);
    print $fh "$local ($$): $message\n";
}

# -------------------------------------------------- Non-critical communication

########################################################
########################################################

=pod

=item &subreply

Sends a command to a server.  Called only by &reply.

Inputs: $cmd,$server

Returns: The results of the message or 'con_lost' on error.

=cut

########################################################
########################################################
sub subreply {
    my ($cmd,$server)=@_;
    my $peerfile="$perlvar{'lonSockDir'}/$server";
    my $sclient=IO::Socket::UNIX->new(Peer    =>"$peerfile",
                                      Type    => SOCK_STREAM,
                                      Timeout => 10)
       or return "con_lost";
    print $sclient "$cmd\n";
    my $answer=<$sclient>;
    chomp($answer);
    $answer="con_lost" if (!$answer);
    return $answer;
}

########################################################
########################################################

=pod

=item &reply

Sends a command to a server.

Inputs: $cmd,$server

Returns: The results of the message or 'con_lost' on error.

=cut

########################################################
########################################################
sub reply {
  my ($cmd,$server)=@_;
  my $answer;
  if ($server ne $perlvar{'lonHostID'}) { 
    $answer=subreply($cmd,$server);
    if ($answer eq 'con_lost') {
	$answer=subreply("ping",$server);
        $answer=subreply($cmd,$server);
    }
  } else {
    $answer='self_reply';
    $answer=subreply($cmd,$server);
  } 
  return $answer;
}

########################################################
########################################################

=pod

=item &escape

Escape special characters in a string.

Inputs: string to escape

Returns: The input string with special characters escaped.

=cut

########################################################
########################################################
sub escape {
    my $str=shift;
    $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
    return $str;
}

########################################################
########################################################

=pod

=item &unescape

Unescape special characters in a string.

Inputs: string to unescape

Returns: The input string with special characters unescaped.

=cut

########################################################
########################################################
sub unescape {
    my $str=shift;
    $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    return $str;
}

########################################################
########################################################

=pod

=item &ishome

Determine if the current machine is the home server for a user.
The determination is made by checking the filesystem for the users information.

Inputs: $author

Returns: 0 - this is not the authors home server, 1 - this is.

=cut

########################################################
########################################################
sub ishome {
    my $author=shift;
    $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
    my ($udom,$uname)=split(/\//,$author);
    my $proname=propath($udom,$uname);
    if (-e $proname) {
	return 1;
    } else {
        return 0;
    }
}

########################################################
########################################################

=pod

=item &propath

Inputs: user name, user domain

Returns: The full path to the users directory.

=cut

########################################################
########################################################
sub propath {
    my ($udom,$uname)=@_;
    $udom=~s/\W//g;
    $uname=~s/\W//g;
    my $subdir=$uname.'__';
    $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
    my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
    return $proname;
} 

########################################################
########################################################

=pod

=item &courselog

Inputs: $path, $command

Returns: unescaped string of values.

=cut

########################################################
########################################################
sub courselog {
    my ($path,$command)=@_;
    my %filters=();
    foreach (split(/\:/,&unescape($command))) {
	my ($name,$value)=split(/\=/,$_);
        $filters{$name}=$value;
    }
    my @results=();
    open(IN,$path.'/activity.log') or return ('file_error');
    while (my $line=<IN>) {
        chomp($line);
        my ($timestamp,$host,$log)=split(/\:/,$line);
#
# $log has the actual log entries; currently still escaped, and
# %26(timestamp)%3a(url)%3a(user)%3a(domain)
# then additionally
# %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
# or
# %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
#
# get delimiter between timestamped entries to be &&&
        $log=~s/\%26(\d+)\%3a/\&\&\&$1\%3a/g;
# now go over all log entries 
        foreach (split(/\&\&\&/,&unescape($log))) {
	    my ($time,$res,$uname,$udom,$action,@values)=split(/\:/,$_);
            my $values=&unescape(join(':',@values));
            $values=~s/\&/\:/g;
            $res=&unescape($res);
            my $include=1;
            if (($filters{'username'}) && ($uname ne $filters{'username'})) 
                                                               { $include=0; }
            if (($filters{'domain'}) && ($udom ne $filters{'domain'})) 
                                                               { $include=0; }
            if (($filters{'url'}) && ($res!~/$filters{'url'}/)) 
                                                               { $include=0; }
            if (($filters{'start'}) && ($time<$filters{'start'})) 
                                                               { $include=0; }
            if (($filters{'end'}) && ($time>$filters{'end'})) 
                                                               { $include=0; }
            if (($filters{'action'} eq 'view') && ($action)) 
                                                               { $include=0; }
            if (($filters{'action'} eq 'submit') && ($action ne 'POST')) 
                                                               { $include=0; }
            if (($filters{'action'} eq 'grade') && ($action ne 'CSTORE')) 
                                                               { $include=0; }
            if ($include) {
	       push(@results,($time<1000000000?'0':'').$time.':'.$res.':'.
                                            $uname.':'.$udom.':'.
                                            $action.':'.$values);
            }
       }
    }
    close IN;
    return join('&',sort(@results));
}

########################################################
########################################################

=pod

=item &userlog

Inputs: $path, $command

Returns: unescaped string of values.

=cut

########################################################
########################################################
sub userlog {
    my ($path,$command)=@_;
    my %filters=();
    foreach (split(/\:/,&unescape($command))) {
	my ($name,$value)=split(/\=/,$_);
        $filters{$name}=$value;
    }
    my @results=();
    open(IN,$path.'/activity.log') or return ('file_error');
    while (my $line=<IN>) {
        chomp($line);
        my ($timestamp,$host,$log)=split(/\:/,$line);
        $log=&unescape($log);
        my $include=1;
        if (($filters{'start'}) && ($timestamp<$filters{'start'})) 
                                                             { $include=0; }
        if (($filters{'end'}) && ($timestamp>$filters{'end'})) 
                                                             { $include=0; }
        if (($filters{'action'} eq 'log') && ($log!~/^Log/)) { $include=0; }
        if (($filters{'action'} eq 'check') && ($log!~/^Check/)) 
                                                             { $include=0; }
        if ($include) {
	   push(@results,$timestamp.':'.$log);
        }
    }
    close IN;
    return join('&',sort(@results));
}

########################################################
########################################################

=pod

=item Functions required for forking

=over 4

=item REAPER

REAPER takes care of dead children.

=item HUNTSMAN

Signal handler for SIGINT.

=item HUPSMAN

Signal handler for SIGHUP

=item DISCONNECT

Disconnects from database.

=back

=cut

########################################################
########################################################
sub REAPER {                   # takes care of dead children
    $SIG{CHLD} = \&REAPER;
    my $pid = wait;
    $children --;
    &logthis("Child $pid died");
    delete $children{$pid};
}

sub HUNTSMAN {                      # signal handler for SIGINT
    local($SIG{CHLD}) = 'IGNORE';   # we're going to kill our children
    kill 'INT' => keys %children;
    my $execdir=$perlvar{'lonDaemons'};
    unlink("$execdir/logs/lonsql.pid");
    &logthis("<font color='red'>CRITICAL: Shutting down</font>");
    $unixsock = "mysqlsock";
    my $port="$perlvar{'lonSockDir'}/$unixsock";
    unlink($port);
    exit;                           # clean up with dignity
}

sub HUPSMAN {                      # signal handler for SIGHUP
    local($SIG{CHLD}) = 'IGNORE';  # we're going to kill our children
    kill 'INT' => keys %children;
    close($server);                # free up socket
    &logthis("<font color='red'>CRITICAL: Restarting</font>");
    my $execdir=$perlvar{'lonDaemons'};
    $unixsock = "mysqlsock";
    my $port="$perlvar{'lonSockDir'}/$unixsock";
    unlink($port);
    exec("$execdir/lonsql");         # here we go again
}

sub DISCONNECT {
    $dbh->disconnect or 
    &logthis("<font color='blue'>WARNING: Couldn't disconnect from database ".
             " $DBI::errstr : $@</font>");
    exit;
}


=pod

=back

=cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>