#!/usr/bin/perl # The LearningOnline Network # lonsql - LON TCP-MySQL-Server Daemon for handling database requests. # # $Id: lonsql,v 1.71.2.1 2006/02/10 09:50:50 albertel Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # =pod =head1 NAME lonsql - LON TCP-MySQL-Server Daemon for handling database requests. =head1 SYNOPSIS This script should be run as user=www. Note that a lonsql.pid file contains the pid of the parent process. =head1 OVERVIEW =head2 Purpose within LON-CAPA LON-CAPA is meant to distribute A LOT of educational content to A LOT of people. It is ineffective to directly rely on contents within the ext2 filesystem to be speedily scanned for on-the-fly searches of content descriptions. (Simply put, it takes a cumbersome amount of time to open, read, analyze, and close thousands of files.) The solution is to index various data fields that are descriptive of the educational resources on a LON-CAPA server machine in a database. Descriptive data fields are referred to as "metadata". The question then arises as to how this metadata is handled in terms of the rest of the LON-CAPA network without burdening client and daemon processes. The obvious solution, using lonc to send a query to a lond process, doesn't work so well in general as you can see in the following example: lonc= loncapa client process A-lonc= a lonc process on Server A lond= loncapa daemon process database command A-lonc --------TCP/IP----------------> B-lond The problem emerges that A-lonc and B-lond are kept waiting for the MySQL server to "do its stuff", or in other words, perform the conceivably sophisticated, data-intensive, time-sucking database transaction. By tying up a lonc and lond process, this significantly cripples the capabilities of LON-CAPA servers. The solution is to offload the work onto another process, and use lonc and lond just for requests and notifications of completed processing: database command A-lonc ---------TCP/IP-----------------> B-lond =====> B-lonsql <---------------------------------/ | "ok, I'll get back to you..." | | / A-lond <------------------------------- B-lonc <====== "Guess what? I have the result!" Of course, depending on success or failure, the messages may vary, but the principle remains the same where a separate pool of children processes (lonsql's) handle the MySQL database manipulations. Thus, lonc and lond spend effectively no time waiting on results from the database. =head1 Internals =over 4 =cut use strict; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use LONCAPA::lonmetadata(); use IO::Socket; use Symbol; use POSIX; use IO::Select; use IO::File; use Socket; use Fcntl; use Tie::RefHash; use DBI; use File::Find; use localenroll; ######################################################## ######################################################## =pod =item Global Variables =over 4 =item dbh =back =cut ######################################################## ######################################################## my $dbh; ######################################################## ######################################################## =pod =item Variables required for forking =over 4 =item $MAX_CLIENTS_PER_CHILD The number of clients each child should process. =item %children The keys to %children are the current child process IDs =item $children The current number of children =back =cut ######################################################## ######################################################## my $MAX_CLIENTS_PER_CHILD = 5; # number of clients each child should process my %children = (); # keys are current child process IDs my $children = 0; # current number of children ################################################################### ################################################################### =pod =item Main body of code. =over 4 =item Read data from loncapa_apache.conf and loncapa.conf. =item Ensure we can access the database. =item Determine if there are other instances of lonsql running. =item Read the hosts file. =item Create a socket for lonsql. =item Fork once and dissociate from parent. =item Write PID to disk. =item Prefork children and maintain the population of children. =back =cut ################################################################### ################################################################### my $childmaxattempts=10; my $run =0; # running counter to generate the query-id # # Read loncapa_apache.conf and loncapa.conf # my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); my %perlvar=%{$perlvarref}; # # Write the /home/www/.my.cnf file my $conf_file = '/home/www/.my.cnf'; if (! -e $conf_file) { if (open MYCNF, ">$conf_file") { print MYCNF <<"ENDMYCNF"; [client] user=www password=$perlvar{'lonSqlAccess'} ENDMYCNF close MYCNF; } else { warn "Unable to write $conf_file, continuing"; } } # # Make sure that database can be accessed # my $dbh; unless ($dbh = DBI->connect("DBI:mysql:loncapa","www", $perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0})) { print "Cannot connect to database!\n"; my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; my $subj="LON: $perlvar{'lonHostID'} Cannot connect to database!"; system("echo 'Cannot connect to MySQL database!' |". " mailto $emailto -s '$subj' > /dev/null"); open(SMP,'>/home/httpd/html/lon-status/mysql.txt'); print SMP 'time='.time.'&mysql=defunct'."\n"; close(SMP); exit 1; } else { unlink('/home/httpd/html/lon-status/mysql.txt'); $dbh->disconnect; } # # Check if other instance running # my $pidfile="$perlvar{'lonDaemons'}/logs/lonsql.pid"; if (-e $pidfile) { my $lfh=IO::File->new("$pidfile"); my $pide=<$lfh>; chomp($pide); if (kill 0 => $pide) { die "already running"; } } # # Read hosts file # my $thisserver; my $PREFORK=4; # number of children to maintain, at least four spare open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file"; while (my $configline=) { my ($id,$domain,$role,$name)=split(/:/,$configline); $name=~s/\s//g; $thisserver=$name if ($id eq $perlvar{'lonHostID'}); #$PREFORK++; } close(CONFIG); # #$PREFORK=int($PREFORK/4); # # Create a socket to talk to lond # my $unixsock = "mysqlsock"; my $localfile="$perlvar{'lonSockDir'}/$unixsock"; my $server; unlink ($localfile); unless ($server=IO::Socket::UNIX->new(Local =>"$localfile", Type => SOCK_STREAM, Listen => 10)) { print "in socket error:$@\n"; } # # Fork once and dissociate # my $fpid=fork; exit if $fpid; die "Couldn't fork: $!" unless defined ($fpid); POSIX::setsid() or die "Can't start new session: $!"; # # Write our PID on disk my $execdir=$perlvar{'lonDaemons'}; open (PIDSAVE,">$execdir/logs/lonsql.pid"); print PIDSAVE "$$\n"; close(PIDSAVE); &logthis("CRITICAL: ---------- Starting ----------"); # # Ignore signals generated during initial startup $SIG{HUP}=$SIG{USR1}='IGNORE'; # Now we are on our own # Fork off our children. for (1 .. $PREFORK) { make_new_child(); } # # Install signal handlers. $SIG{CHLD} = \&REAPER; $SIG{INT} = $SIG{TERM} = \&HUNTSMAN; $SIG{HUP} = \&HUPSMAN; # # And maintain the population. while (1) { sleep; # wait for a signal (i.e., child's death) for (my $i = $children; $i < $PREFORK; $i++) { make_new_child(); # top up the child pool } } ######################################################## ######################################################## =pod =item &make_new_child Inputs: None Returns: None =cut ######################################################## ######################################################## sub make_new_child { my $pid; my $sigset; # # block signal for fork $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset) or die "Can't block SIGINT for fork: $!\n"; # die "fork: $!" unless defined ($pid = fork); # if ($pid) { # Parent records the child's birth and returns. sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; $children{$pid} = 1; $children++; return; } else { # Child can *not* return from this subroutine. $SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before # unblock signals sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; #open database handle # making dbh global to avoid garbage collector unless ($dbh = DBI->connect("DBI:mysql:loncapa","www", $perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0})) { sleep(10+int(rand(20))); &logthis("WARNING: Couldn't connect to database". ": $@"); # "($st secs): $@"); print "database handle error\n"; exit; } # make sure that a database disconnection occurs with # ending kill signals $SIG{TERM}=$SIG{INT}=$SIG{QUIT}=$SIG{__DIE__}=\&DISCONNECT; # handle connections until we've reached $MAX_CLIENTS_PER_CHILD for (my $i=0; $i < $MAX_CLIENTS_PER_CHILD; $i++) { my $client = $server->accept() or last; # do something with the connection $run = $run+1; my $userinput = <$client>; chomp($userinput); # my ($conserver,$query, $arg1,$arg2,$arg3)=split(/&/,$userinput); my $query=unescape($query); # #send query id which is pid_unixdatetime_runningcounter my $queryid = $thisserver; $queryid .="_".($$)."_"; $queryid .= time."_"; $queryid .= $run; print $client "$queryid\n"; # # &logthis("QUERY: $query - $arg1 - $arg2 - $arg3"); sleep 1; # my $result=''; # # At this point, query is received, query-ID assigned and sent # back, $query eq 'logquery' will mean that this is a query # against log-files if (($query eq 'userlog') || ($query eq 'courselog')) { # beginning of log query my $udom = &unescape($arg1); my $uname = &unescape($arg2); my $command = &unescape($arg3); my $path = &propath($udom,$uname); if (-e "$path/activity.log") { if ($query eq 'userlog') { $result=&userlog($path,$command); } else { $result=&courselog($path,$command); } } else { &logthis('Unable to do log query: '.$uname.'@'.$udom); $result='no_such_file'; } # end of log query } elsif (($query eq 'fetchenrollment') || ($query eq 'institutionalphotos')) { # retrieve institutional class lists my $dom = &unescape($arg1); my %affiliates = (); my %replies = (); my $locresult = ''; my $querystr = &unescape($arg3); foreach (split/%%/,$querystr) { if (/^([^=]+)=([^=]+)$/) { @{$affiliates{$1}} = split/,/,$2; } } if ($query eq 'fetchenrollment') { $locresult = &localenroll::fetch_enrollment($dom,\%affiliates,\%replies); } elsif ($query eq 'institutionalphotos') { my $crs = &unescape($arg2); eval { local($SIG{__DIE__})='DEFAULT'; $locresult = &localenroll::institutional_photos($dom,$crs,\%affiliates,\%replies,'update'); }; if ($@) { $locresult = 'error'; } } $result = &escape($locresult.':'); if ($locresult) { $result .= &escape(join(':',map{$_.'='.$replies{$_}} keys %replies)); } } elsif ($query eq 'prepare activity log') { my ($cid,$domain) = map {&unescape($_);} ($arg1,$arg2); &logthis('preparing activity log tables for '.$cid); my $command = qq{$perlvar{'lonDaemons'}/parse_activity_log.pl -course=$cid -domain=$domain}; system($command); &logthis($command); my $returnvalue = $?>>8; if ($returnvalue) { $result = 'error: parse_activity_log.pl returned '. $returnvalue; } else { $result = 'success'; } } else { # Do an sql query $result = &do_sql_query($query,$arg1,$arg2); } # result does not need to be escaped because it has already been # escaped. #$result=&escape($result); &reply("queryreply:$queryid:$result",$conserver); } # tidy up gracefully and finish # # close the database handle $dbh->disconnect or &logthis("WARNING: Couldn't disconnect". " from database $DBI::errstr : $@"); # this exit is VERY important, otherwise the child will become # a producer of more and more children, forking yourself into # process death. exit; } } ######################################################## ######################################################## =pod =item &do_sql_query Runs an sql metadata table query. Inputs: $query, $custom, $customshow Returns: A string containing escaped results. =cut ######################################################## ######################################################## { my @metalist; sub process_file { if ( -e $_ && # file exists -f $_ && # and is a normal file /\.meta$/ && # ends in meta ! /^.+\.\d+\.[^\.]+\.meta$/ # is not a previous version ) { push(@metalist,$File::Find::name); } } sub do_sql_query { my ($query,$custom,$customshow) = @_; # &logthis('doing query '.$query); $custom = &unescape($custom); $customshow = &unescape($customshow); # @metalist = (); # my $result = ''; my @results = (); my @files; my $subsetflag=0; # if ($query) { #prepare and execute the query my $sth = $dbh->prepare($query); unless ($sth->execute()) { &logthis(''. 'WARNING: Could not retrieve from database:'. $sth->errstr().''); } else { my $aref=$sth->fetchall_arrayref; foreach my $row (@$aref) { push @files,@{$row}[3] if ($custom or $customshow); my @b=map { &escape($_); } @$row; push @results,join(",", @b); # Build up the @files array with the LON-CAPA urls # of the resources. } } } # do custom metadata searching here and build into result return join("&",@results) if (! ($custom or $customshow)); # Only get here if there is a custom query or custom show request &logthis("Doing custom query for $custom"); if ($query) { @metalist=map { $perlvar{'lonDocRoot'}.$_.'.meta'; } @files; } else { my $dir = "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"; @metalist=(); opendir(RESOURCES,$dir); my @homeusers=grep { &ishome($dir.'/'.$_); } grep {!/^\.\.?$/} readdir(RESOURCES); closedir RESOURCES; # Define the foreach my $user (@homeusers) { find (\&process_file,$dir.'/'.$user); } } # if file is indicated in sql database and # not part of sql-relevant query, do not pattern match. # # if file is not in sql database, output error. # # if file is indicated in sql database and is # part of query result list, then do the pattern match. my $customresult=''; my @results; foreach my $metafile (@metalist) { my $fh=IO::File->new($metafile); my @lines=<$fh>; my $stuff=join('',@lines); if ($stuff=~/$custom/s) { foreach my $f ('abstract','author','copyright', 'creationdate','keywords','language', 'lastrevisiondate','mime','notes', 'owner','subject','title') { $stuff=~s/\n?\<$f[^\>]*\>.*?<\/$f[^\>]*\>\n?//s; } my $mfile=$metafile; my $docroot=$perlvar{'lonDocRoot'}; $mfile=~s/^$docroot//; $mfile=~s/\.meta$//; unless ($query) { my $q2="SELECT * FROM metadata WHERE url ". " LIKE BINARY '?'"; my $sth = $dbh->prepare($q2); $sth->execute($mfile); my $aref=$sth->fetchall_arrayref; foreach my $a (@$aref) { my @b=map { &escape($_)} @$a; push @results,join(",", @b); } } # &logthis("found: $stuff"); $customresult.='&custom='.&escape($mfile).','. escape($stuff); } } $result=join("&",@results) unless $query; $result.=$customresult; # return $result; } # End of &do_sql_query } # End of scoping curly braces for &process_file and &do_sql_query ######################################################## ######################################################## =pod =item &logthis Inputs: $message, the message to log Returns: nothing Writes $message to the logfile. =cut ######################################################## ######################################################## sub logthis { my $message=shift; my $execdir=$perlvar{'lonDaemons'}; my $fh=IO::File->new(">>$execdir/logs/lonsql.log"); my $now=time; my $local=localtime($now); print $fh "$local ($$): $message\n"; } # -------------------------------------------------- Non-critical communication ######################################################## ######################################################## =pod =item &subreply Sends a command to a server. Called only by &reply. Inputs: $cmd,$server Returns: The results of the message or 'con_lost' on error. =cut ######################################################## ######################################################## sub subreply { my ($cmd,$server)=@_; my $peerfile="$perlvar{'lonSockDir'}/$server"; my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; print $sclient "$cmd\n"; my $answer=<$sclient>; chomp($answer); $answer="con_lost" if (!$answer); return $answer; } ######################################################## ######################################################## =pod =item &reply Sends a command to a server. Inputs: $cmd,$server Returns: The results of the message or 'con_lost' on error. =cut ######################################################## ######################################################## sub reply { my ($cmd,$server)=@_; my $answer; if ($server ne $perlvar{'lonHostID'}) { $answer=subreply($cmd,$server); if ($answer eq 'con_lost') { $answer=subreply("ping",$server); $answer=subreply($cmd,$server); } } else { $answer='self_reply'; $answer=subreply($cmd,$server); } return $answer; } ######################################################## ######################################################## =pod =item &escape Escape special characters in a string. Inputs: string to escape Returns: The input string with special characters escaped. =cut ######################################################## ######################################################## sub escape { my $str=shift; $str =~ s/(\W)/"%".unpack('H2',$1)/eg; return $str; } ######################################################## ######################################################## =pod =item &unescape Unescape special characters in a string. Inputs: string to unescape Returns: The input string with special characters unescaped. =cut ######################################################## ######################################################## sub unescape { my $str=shift; $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; return $str; } ######################################################## ######################################################## =pod =item &ishome Determine if the current machine is the home server for a user. The determination is made by checking the filesystem for the users information. Inputs: $author Returns: 0 - this is not the authors home server, 1 - this is. =cut ######################################################## ######################################################## sub ishome { my $author=shift; $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; my ($udom,$uname)=split(/\//,$author); my $proname=propath($udom,$uname); if (-e $proname) { return 1; } else { return 0; } } ######################################################## ######################################################## =pod =item &propath Inputs: user name, user domain Returns: The full path to the users directory. =cut ######################################################## ######################################################## sub propath { my ($udom,$uname)=@_; $udom=~s/\W//g; $uname=~s/\W//g; my $subdir=$uname.'__'; $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; return $proname; } ######################################################## ######################################################## =pod =item &courselog Inputs: $path, $command Returns: unescaped string of values. =cut ######################################################## ######################################################## sub courselog { my ($path,$command)=@_; my %filters=(); foreach (split(/\:/,&unescape($command))) { my ($name,$value)=split(/\=/,$_); $filters{$name}=$value; } my @results=(); open(IN,$path.'/activity.log') or return ('file_error'); while (my $line=) { chomp($line); my ($timestamp,$host,$log)=split(/\:/,$line); # # $log has the actual log entries; currently still escaped, and # %26(timestamp)%3a(url)%3a(user)%3a(domain) # then additionally # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value) # or # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value) # # get delimiter between timestamped entries to be &&& $log=~s/\%26(\d+)\%3a/\&\&\&$1\%3a/g; # now go over all log entries foreach (split(/\&\&\&/,&unescape($log))) { my ($time,$res,$uname,$udom,$action,@values)=split(/\:/,$_); my $values=&unescape(join(':',@values)); $values=~s/\&/\:/g; $res=&unescape($res); my $include=1; if (($filters{'username'}) && ($uname ne $filters{'username'})) { $include=0; } if (($filters{'domain'}) && ($udom ne $filters{'domain'})) { $include=0; } if (($filters{'url'}) && ($res!~/$filters{'url'}/)) { $include=0; } if (($filters{'start'}) && ($time<$filters{'start'})) { $include=0; } if (($filters{'end'}) && ($time>$filters{'end'})) { $include=0; } if (($filters{'action'} eq 'view') && ($action)) { $include=0; } if (($filters{'action'} eq 'submit') && ($action ne 'POST')) { $include=0; } if (($filters{'action'} eq 'grade') && ($action ne 'CSTORE')) { $include=0; } if ($include) { push(@results,($time<1000000000?'0':'').$time.':'.$res.':'. $uname.':'.$udom.':'. $action.':'.$values); } } } close IN; return join('&',sort(@results)); } ######################################################## ######################################################## =pod =item &userlog Inputs: $path, $command Returns: unescaped string of values. =cut ######################################################## ######################################################## sub userlog { my ($path,$command)=@_; my %filters=(); foreach (split(/\:/,&unescape($command))) { my ($name,$value)=split(/\=/,$_); $filters{$name}=$value; } my @results=(); open(IN,$path.'/activity.log') or return ('file_error'); while (my $line=) { chomp($line); my ($timestamp,$host,$log)=split(/\:/,$line); $log=&unescape($log); my $include=1; if (($filters{'start'}) && ($timestamp<$filters{'start'})) { $include=0; } if (($filters{'end'}) && ($timestamp>$filters{'end'})) { $include=0; } if (($filters{'action'} eq 'log') && ($log!~/^Log/)) { $include=0; } if (($filters{'action'} eq 'check') && ($log!~/^Check/)) { $include=0; } if ($include) { push(@results,$timestamp.':'.$log); } } close IN; return join('&',sort(@results)); } ######################################################## ######################################################## =pod =item Functions required for forking =over 4 =item REAPER REAPER takes care of dead children. =item HUNTSMAN Signal handler for SIGINT. =item HUPSMAN Signal handler for SIGHUP =item DISCONNECT Disconnects from database. =back =cut ######################################################## ######################################################## sub REAPER { # takes care of dead children $SIG{CHLD} = \&REAPER; my $pid = wait; $children --; &logthis("Child $pid died"); delete $children{$pid}; } sub HUNTSMAN { # signal handler for SIGINT local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children kill 'INT' => keys %children; my $execdir=$perlvar{'lonDaemons'}; unlink("$execdir/logs/lonsql.pid"); &logthis("CRITICAL: Shutting down"); $unixsock = "mysqlsock"; my $port="$perlvar{'lonSockDir'}/$unixsock"; unlink($port); exit; # clean up with dignity } sub HUPSMAN { # signal handler for SIGHUP local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children kill 'INT' => keys %children; close($server); # free up socket &logthis("CRITICAL: Restarting"); my $execdir=$perlvar{'lonDaemons'}; $unixsock = "mysqlsock"; my $port="$perlvar{'lonSockDir'}/$unixsock"; unlink($port); exec("$execdir/lonsql"); # here we go again } sub DISCONNECT { $dbh->disconnect or &logthis("WARNING: Couldn't disconnect from database ". " $DBI::errstr : $@"); exit; } =pod =back =cut