--- loncom/metadata_database/searchcat.pl 2003/02/03 13:42:16 1.29 +++ loncom/metadata_database/searchcat.pl 2007/01/03 04:01:32 1.70.2.3 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.29 2003/02/03 13:42:16 albertel Exp $ +# $Id: searchcat.pl,v 1.70.2.3 2007/01/03 04:01:32 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -26,15 +26,6 @@ # # http://www.lon-capa.org/ # -# YEAR=2001 -# 04/14/2001, 04/16/2001 Scott Harrison -# -# YEAR=2002 -# 05/11/2002 Scott Harrison -# -# YEAR=2003 -# Scott Harrison -# ### =pod @@ -52,7 +43,6 @@ filesystem installation location: F - C<10 1 * * 7 www /home/httpd/perl/searchcat.pl> This script only allows itself to be run as the user C. @@ -65,8 +55,8 @@ The metadata is entered into a SQL datab This script also does general database maintenance such as reformatting the C table if it is deprecated. -This script also builds dynamic temporal metadata and stores this inside -a F database file. +This script evaluates dynamic metadata from the authors' +F database file in order to store it in MySQL. This script is playing an increasingly important role for a loncapa library server. The proper operation of this script is critical for a smooth @@ -74,896 +64,886 @@ and correct user experience. =cut -# ========================================================== Setting things up. - -# ------------------------------------------------------ Use external modules. - +use strict; +use DBI; use lib '/home/httpd/lib/perl/'; -use LONCAPA::Configuration; +use LONCAPA::lonmetadata; +use Getopt::Long; use IO::File; use HTML::TokeParser; -use DBI; use GDBM_File; use POSIX qw(strftime mktime); -# ----------------- Code to enable 'find' subroutine listing of the .meta files -use File::Find; +use Apache::lonnet(); -# List of .meta files (used on a per-user basis). -my @metalist; +use File::Find; -# --------------- Read loncapa_apache.conf and loncapa.conf and get variables. -my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf'); -my %perlvar = %{$perlvarref}; -undef($perlvarref); # Remove since sensitive and not needed. -delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed. - -# ------------------------------------- Only run if machine is a library server -if ($perlvar{'lonRole'} ne 'library') - { - exit(0); - } - -# ------------------------------ Make sure this process is running as user=www. -my $wwwid = getpwnam('www'); -if ($wwwid != $<) - { - $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj = "LON: $perlvar{'lonHostID'} User ID mismatch"; - system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ". - "mailto $emailto -s '$subj' > /dev/null"); - exit(1); - } - -# ------------------------------------------------------ Initialize log output. -open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); -print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n"); - -my $dbh; # Database object reference handle. - -# ----------------------------- Verify connection to loncapa:metadata database. -unless ( - $dbh = DBI->connect('DBI:mysql:loncapa','www', - $perlvar{'lonSqlAccess'}, - { RaiseError => 0,PrintError => 0}) - ) - { - print(LOG '**** ERROR **** Cannot connect to database!'."\n"); - exit(0); - } - -# ------------------------------ Create loncapa:metadata table if non-existent. -my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('. - 'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '. - 'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '. - 'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '. - 'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '. - 'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '. - 'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '. - 'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '. - 'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '. - 'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '. - 'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM'; - -$dbh->do($make_metadata_table); # Generate the table. - -# ----------------------------- Verify format of the loncapa:metadata database. -# (delete and recreate database if necessary). - -# Make a positive control for verifying table structure. -my $make_metadata_table_CONTROL = $make_metadata_table; -$make_metadata_table_CONTROL =~ - s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/; - -$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); -$dbh->do($make_metadata_table_CONTROL); - -my $table_description; # selectall reference to the table description. - -my $CONTROL_table_string; # What the table description should look like. -my $table_string; # What the table description does look like. - -# Calculate the CONTROL table description (what it should be). -$table_description = $dbh->selectall_arrayref('describe CONTROL_metadata'); -foreach my $table_row (@{$table_description}) - { - $CONTROL_table_string .= join(',',@{$table_row})."\n"; - } - -# Calculate the current table description (what it currently looks like). -$table_description = $dbh->selectall_arrayref('describe metadata'); -foreach my $table_row (@{$table_description}) - { - $table_string .= join(',',@{$table_row})."\n"; - } - -if ($table_string ne $CONTROL_table_string) - { - # Log this incident. - print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'. - '.'."\n"); - # Delete the table. - $dbh->do('DROP TABLE IF EXISTS metadata'); - # Generate the table. - $dbh->do($make_metadata_table); - } - -$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay. Done with control. - -# ----------------------------------------------- Set utilitysemaphore to zero. -$dbh->do('UPDATE metadata SET utilitysemaphore = 0'); - -# ========================================================= Main functionality. - -# - Determine home authors on this server based on resources dir and user tree. - -# RESOURCES: the resources directory (subdirs correspond to author usernames). -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or - (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n") - and exit(0)); - -# query_home_server_status will look for user home directories on this machine. -my @homeusers = - grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'. - $perlvar{'lonDefDomain'}.'/'.$_) - } grep {!/^\.\.?$/} readdir(RESOURCES); -closedir(RESOURCES); - -unless (@homeusers) - { - print(LOG '=== No home users found on this server.'."\n"); - } - -# Consider each author individually. -foreach my $user (@homeusers) - { - # Make a log entry. - print(LOG "\n".'=== User: '.$user."\n\n"); - - # Get filesystem path to this user's directory. - my $user_directory = - &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user); - - # Remove left-over db-files from a potentially crashed searchcat run. - unlink($user_directory.'/nohist_new_resevaldata.db'); - - # Cleanup the metalist array. - undef(@metalist); - @metalist = (); - - # This will add entries to the @metalist array. - &File::Find::find(\&wanted, - $perlvar{'lonDocRoot'}.'/res/'. - $perlvar{'lonDefDomain'}.'/'.$user); - - # -- process file to get metadata and put into search catalog SQL database - # Also, build and store dynamic metadata. - # Also, delete record entries before refreshing. - foreach my $m (@metalist) - { - # Log this action. - print(LOG "- ".$m."\n"); - - # Get metadata from the file. - my $ref = get_metadata_from_file($m); - - # Make a datarecord identifier for this resource. - my $m2 = '/res/'.declutter($m); - $m2 =~ s/\.meta$//; - - # Build and store dynamic metadata inside nohist_resevaldata.db. - build_on_the_fly_dynamic_metadata($m2); - - # Delete record if it already exists. - my $q2 = 'select * from metadata where url like binary '."'".$m2."'"; - my $sth = $dbh->prepare($q2); - $sth->execute(); - my $r1 = $sth->fetchall_arrayref; - if (@$r1) - { - $sth = - $dbh->prepare('delete from metadata where url like binary '. - "'".$m2."'"); - $sth->execute(); - } - - # Add new/replacement record into the loncapa:metadata table. - $sth = $dbh->prepare('insert into metadata values ('. - '"'.delete($ref->{'title'}).'"'.','. - '"'.delete($ref->{'author'}).'"'.','. - '"'.delete($ref->{'subject'}).'"'.','. - '"'.$m2.'"'.','. - '"'.delete($ref->{'keywords'}).'"'.','. - '"'.'current'.'"'.','. - '"'.delete($ref->{'notes'}).'"'.','. - '"'.delete($ref->{'abstract'}).'"'.','. - '"'.delete($ref->{'mime'}).'"'.','. - '"'.delete($ref->{'language'}).'"'.','. - '"'.sql_formatted_time( - delete($ref->{'creationdate'})).'"'.','. - '"'.sql_formatted_time( - delete($ref->{'lastrevisiondate'})).'"'.','. - '"'.delete($ref->{'owner'}).'"'.','. - '"'.delete($ref->{'copyright'}).'"'.','. - '1'.')'); - $sth->execute(); - } - -# ----------------------- Clean up database, remove stale SQL database records. - $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0'); - -# -------------------------------------------------- Copy over the new db-files - system('mv '.$user_directory.'/nohist_new_resevaldata.db '. - $user_directory.'/nohist_resevaldata.db'); - } - -# --------------------------------------------------- Close database connection -$dbh->disconnect; -print LOG "\n==== Searchcat completed ".localtime()." ====\n"; +# +# Set up configuration options +my ($simulate,$oneuser,$help,$verbose,$logfile,$debug); +GetOptions ( + 'help' => \$help, + 'simulate' => \$simulate, + 'only=s' => \$oneuser, + 'verbose=s' => \$verbose, + 'debug' => \$debug, + ); + +if ($help) { + print <<"ENDHELP"; +$0 +Rebuild and update the LON-CAPA metadata database. +Options: + -help Print this help + -simulate Do not modify the database. + -only=user Only compute for the given user. Implies -simulate + -verbose=val Sets logging level, val must be a number + -debug Turns on debugging output +ENDHELP + exit 0; +} + +if (! defined($debug)) { + $debug = 0; +} + +if (! defined($verbose)) { + $verbose = 0; +} + +if (defined($oneuser)) { + $simulate=1; +} + +## +## Use variables for table names so we can test this routine a little easier +my %oldnames = ( + 'metadata' => 'metadata', + 'portfolio' => 'portfolio_metadata', + 'access' => 'portfolio_access', + 'addedfields' => 'portfolio_addedfields', + ); + +my %newnames; +# new table names - append pid to have unique temporary tables +foreach my $key (keys(%oldnames)) { + $newnames{$key} = 'new'.$oldnames{$key}.$$; +} + +# +# Only run if machine is a library server +exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library'); +# +# Make sure this process is running from user=www +my $wwwid=getpwnam('www'); +if ($wwwid!=$<) { + my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}"; + my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ + mail -s '$subj' $emailto > /dev/null"); + exit 1; +} +# +# Let people know we are running +open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log'); +&log(0,'==== Searchcat Run '.localtime()."===="); + + +if ($debug) { + &log(0,'simulating') if ($simulate); + &log(0,'only processing user '.$oneuser) if ($oneuser); + &log(0,'verbosity level = '.$verbose); +} +# +# Connect to database +my $dbh; +if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'}, + { RaiseError =>0,PrintError=>0}))) { + &log(0,"Cannot connect to database!"); + die "MySQL Error: Cannot connect to database!\n"; +} +# This can return an error and still be okay, so we do not bother checking. +# (perhaps it should be more robust and check for specific errors) +foreach my $key (keys(%newnames)) { + if ($newnames{$key} ne '') { + $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key}); + } +} + +# +# Create the new metadata and portfolio tables +foreach my $key (keys(%newnames)) { + if ($newnames{$key} ne '') { + my $request = + &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key}); + $dbh->do($request); + if ($dbh->err) { + $dbh->disconnect(); + &log(0,"MySQL Error Create: ".$dbh->errstr); + die $dbh->errstr; + } + } +} + +# +# find out which users we need to examine +my @domains = sort(&Apache::lonnet::current_machine_domains()); +&log(9,'domains ="'.join('","',@domains).'"'); + +foreach my $dom (@domains) { + &log(9,'domain = '.$dom); + opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom"); + my @homeusers = + grep { + &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_"); + } grep { + !/^\.\.?$/; + } readdir(RESOURCES); + closedir RESOURCES; + &log(5,'users = '.$dom.':'.join(',',@homeusers)); + # + if ($oneuser) { + @homeusers=($oneuser); + } + # + # Loop through the users + foreach my $user (@homeusers) { + &log(0,"=== User: ".$user); + &process_dynamic_metadata($user,$dom); + # + # Use File::Find to get the files we need to read/modify + find( + {preprocess => \&only_meta_files, + #wanted => \&print_filename, + #wanted => \&log_metadata, + wanted => \&process_meta_file, + no_chdir => 1, + }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) ); + } + # Search for public portfolio files + my %portusers; + if ($oneuser) { + %portusers = ( + $oneuser => '', + ); + } else { + my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom; + &descend_tree($dir,0,\%portusers); + } + foreach my $uname (keys(%portusers)) { + my $urlstart = '/uploaded/'.$dom.'/'.$uname; + my $pathstart = &propath($dom,$uname).'/userfiles'; + my $is_course = &Apache::lonnet::is_course($dom,$uname); + my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname); + my %access = &Apache::lonnet::get_access_controls($curr_perm); + foreach my $file (keys(%access)) { + my ($group,$url,$fullpath); + if ($is_course) { + ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/); + $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path; + $url = $urlstart.'/groups/'.$group.'/portfolio'.$path; + } else { + $fullpath = $pathstart.'/portfolio'.$file; + $url = $urlstart.'/portfolio'.$file; + } + if (ref($access{$file}) eq 'HASH') { + &process_portfolio_access_data($url,$access{$file}); + } + &process_portfolio_metadata($url,$fullpath,$is_course,$dom, + $uname,$group); + } + } +} + +# +# Rename the tables +if (! $simulate) { + foreach my $key (keys(%oldnames)) { + if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) { + $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key}); + if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) { + &log(0,"MySQL Error Rename: ".$dbh->errstr); + die $dbh->errstr; + } else { + &log(1,"MySQL table rename successful for $key."); + } + } + } +} +if (! $dbh->disconnect) { + &log(0,"MySQL Error Disconnect: ".$dbh->errstr); + die $dbh->errstr; +} +## +## Finished! +&log(0,"==== Searchcat completed ".localtime()." ===="); close(LOG); -exit(0); - -# ================================================================ Subroutines. - -=pod - -=head1 SUBROUTINES - -=cut - -=pod - -B - translate to unstrange escaped syntax to strange characters. - -=over 4 - -Parameters: - -=item I<$str> - string with unweird characters. - -=back - -=over 4 -Returns: +&write_type_count(); +&write_copyright_count(); -=item C - string with potentially weird characters. +exit 0; -=back - -=cut - -sub unescape ($) - { - my $str = shift(@_); +## +## Status logging routine. Inputs: $level, $message +## +## $level 0 should be used for normal output and error messages +## +## $message does not need to end with \n. In the case of errors +## the message should contain as much information as possible to +## help in diagnosing the problem. +## +sub log { + my ($level,$message)=@_; + $level = 0 if (! defined($level)); + if ($verbose >= $level) { + print LOG $message.$/; + } +} + +sub descend_tree { + my ($dir,$depth,$alldomusers) = @_; + if (-d $dir) { + opendir(DIR,$dir); + my @contents = grep(!/^\./,readdir(DIR)); + closedir(DIR); + $depth ++; + foreach my $item (@contents) { + if ($depth < 4) { + &descend_tree($dir.'/'.$item,$depth,$alldomusers); + } else { + if (-e $dir.'/'.$item.'/file_permissions.db') { + + $$alldomusers{$item} = ''; + } + } + } + } +} + +sub process_portfolio_access_data { + my ($url,$access_hash) = @_; + foreach my $key (keys(%{$access_hash})) { + my $acc_data; + $acc_data->{url} = $url; + $acc_data->{keynum} = $key; + my ($num,$scope,$end,$start) = + ($key =~ /^([^:]+):([a-z]+)_(\d*)_?(\d*)$/); + next if (($scope ne 'public') && ($scope ne 'guest')); + $acc_data->{scope} = $scope; + if ($end != 0) { + $acc_data->{end} = &sqltime($end); + } + $acc_data->{start} = &sqltime($start); + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'access'}, + 'portfolio_access',$acc_data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + } + } + } +} + +sub process_portfolio_metadata { + my ($url,$fullpath,$is_course,$dom,$uname,$group) = @_; + my ($ref,$crs,$addedfields) = &portfolio_metadata($fullpath,$dom,$uname, + $group); + &getfiledates($ref,$fullpath); + if ($is_course) { + $ref->{'groupname'} = $group; + } + my %Data; + if (ref($ref) eq 'HASH') { + %Data = %{$ref}; + } + %Data = ( + %Data, + 'url'=>$url, + 'version'=>'current', + ); + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'portfolio'}, + 'portfolio_metadata',\%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL portfolio_metadata database table for $url"); + } + if (ref($addedfields) eq 'HASH') { + if (keys(%{$addedfields}) > 0) { + foreach my $key (keys(%{$addedfields})) { + my $added_data = { + 'url' => $url, + 'field' => $key, + 'value' => $addedfields->{$key}, + 'courserestricted' => $crs, + }; + ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh, + $newnames{'addedfields'}, + 'portfolio_addedfields', + $added_data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL portfolio_addedfields database table for url = $url and field = $key"); + } + } + } + } + } + return; +} + +######################################################## +######################################################## +### ### +### File::Find support routines ### +### ### +######################################################## +######################################################## +## +## &only_meta_files +## +## Called by File::Find. +## Takes a list of files/directories in and returns a list of files/directories +## to search. +sub only_meta_files { + my @PossibleFiles = @_; + my @ChosenFiles; + foreach my $file (@PossibleFiles) { + if ( ($file =~ /\.meta$/ && # Ends in meta + $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version + ) || (-d $File::Find::dir."/".$file )) { # directories are okay + # but we do not want /. or /.. + push(@ChosenFiles,$file); + } + } + return @ChosenFiles; +} + +## +## +## Debugging routines, use these for 'wanted' in the File::Find call +## +sub print_filename { + my ($file) = $_; + my $fullfilename = $File::Find::name; + if ($debug) { + if (-d $file) { + &log(5," Got directory ".$fullfilename); + } else { + &log(5," Got file ".$fullfilename); + } + } + $_=$file; +} + +sub log_metadata { + my ($file) = $_; + my $fullfilename = $File::Find::name; + return if (-d $fullfilename); # No need to do anything here for directories + if ($debug) { + &log(6,$fullfilename); + my $ref = &metadata($fullfilename); + if (! defined($ref)) { + &log(6," No data"); + return; + } + while (my($key,$value) = each(%$ref)) { + &log(6," ".$key." => ".$value); + } + &count_copyright($ref->{'copyright'}); + } + $_=$file; +} + +## +## process_meta_file +## Called by File::Find. +## Only input is the filename in $_. +sub process_meta_file { + my ($file) = $_; + my $filename = $File::Find::name; # full filename + return if (-d $filename); # No need to do anything here for directories + # + &log(3,$filename) if ($debug); + # + my $ref = &metadata($filename); + # + # $url is the original file url, not the metadata file + my $target = $filename; + $target =~ s/\.meta$//; + my $url='/res/'.&declutter($target); + &log(3," ".$url) if ($debug); + # + # Ignore some files based on their metadata + if ($ref->{'obsolete'}) { + &log(3,"obsolete") if ($debug); + return; + } + &count_copyright($ref->{'copyright'}); + if ($ref->{'copyright'} eq 'private') { + &log(3,"private") if ($debug); + return; + } + # + # Find the dynamic metadata + my %dyn; + if ($url=~ m:/default$:) { + $url=~ s:/default$:/:; + &log(3,"Skipping dynamic data") if ($debug); + } else { + &log(3,"Retrieving dynamic data") if ($debug); + %dyn=&get_dynamic_metadata($url); + &count_type($url); + } + &getfiledates($ref,$target); + # + my %Data = ( + %$ref, + %dyn, + 'url'=>$url, + 'version'=>'current'); + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'}, + 'metadata',\%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + } + } + # + # Reset $_ before leaving + $_ = $file; +} + +######################################################## +######################################################## +### ### +### &metadata($uri) ### +### Retrieve metadata for the given file ### +### ### +######################################################## +######################################################## +sub metadata { + my ($uri) = @_; + my %metacache=(); + $uri=&declutter($uri); + my $filename=$uri; + $uri=~s/\.meta$//; + $uri=''; + if ($filename !~ /\.meta$/) { + $filename.='.meta'; + } + my $metastring=&getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); + return undef if (! defined($metastring)); + my $parser=HTML::TokeParser->new(\$metastring); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + my $unikey=$entry; + if (defined($token->[2]->{'part'})) { + $unikey.='_'.$token->[2]->{'part'}; + } + if (defined($token->[2]->{'name'})) { + $unikey.='_'.$token->[2]->{'name'}; + } + if ($metacache{$uri.'keys'}) { + $metacache{$uri.'keys'}.=','.$unikey; + } else { + $metacache{$uri.'keys'}=$unikey; + } + foreach ( @{$token->[3]}) { + $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; + } + if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){ + $metacache{$uri.''.$unikey} = + $metacache{$uri.''.$unikey.'.default'}; + } + } # End of ($token->[0] eq 'S') + } + return \%metacache; +} + +############################################################### +############################################################### +### ### +### &portfolio_metadata($filepath,$dom,$uname,$group) ### +### Retrieve metadata for the given file ### +### Returns array - ### +### contains reference to metadatahash and ### +### optional reference to addedfields hash ### +### ### +############################################################### +############################################################### +sub portfolio_metadata { + my ($fullpath,$dom,$uname,$group)=@_; + my ($mime) = ( $fullpath=~/\.(\w+)$/ ); + my %metacache=(); + if ($fullpath !~ /\.meta$/) { + $fullpath .= '.meta'; + } + my (@standard_fields,%addedfields); + my $colsref = + $LONCAPA::lonmetadata::Portfolio_metadata_table_description; + if (ref($colsref) eq 'ARRAY') { + my @columns = @{$colsref}; + foreach my $coldata (@columns) { + push(@standard_fields,$coldata->{'name'}); + } + } + my $metastring=&getfile($fullpath); + if (! defined($metastring)) { + $metacache{'keys'}= 'owner,domain,mime'; + $metacache{'owner'} = $uname.':'.$dom; + $metacache{'domain'} = $dom; + $metacache{'mime'} = $mime; + if ($group ne '') { + $metacache{'keys'} .= ',courserestricted'; + $metacache{'courserestricted'} = 'course.'.$dom.'_'.$uname; + } + } else { + my $parser=HTML::TokeParser->new(\$metastring); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + if ($metacache{'keys'}) { + $metacache{'keys'}.=','.$entry; + } else { + $metacache{'keys'}=$entry; + } + my $value = $parser->get_text('/'.$entry); + if (!grep(/^\Q$entry\E$/,@standard_fields)) { + my $clean_value = lc($value); + $clean_value =~ s/\s/_/g; + if ($clean_value ne $entry) { + if (defined($addedfields{$entry})) { + $addedfields{$entry} .=','.$value; + } else { + $addedfields{$entry} = $value; + } + } + } else { + $metacache{$entry} = $value; + } + } + } # End of ($token->[0] eq 'S') + } + if (keys(%addedfields) > 0) { + foreach my $key (sort keys(%addedfields)) { + $metacache{'addedfieldnames'} .= $key.','; + $metacache{'addedfieldvalues'} .= $addedfields{$key}.'&&&'; + } + $metacache{'addedfieldnames'} =~ s/,$//; + $metacache{'addedfieldvalues'} =~ s/\&\&\&$//; + if ($metacache{'keys'}) { + $metacache{'keys'}.=',addedfieldnames'; + } else { + $metacache{'keys'}='addedfieldnames'; + } + $metacache{'keys'}.=',addedfieldvalues'; + } + return (\%metacache,$metacache{'courserestricted'},\%addedfields); +} + +## +## &getfile($filename) +## Slurps up an entire file into a scalar. +## Returns undef if the file does not exist +sub getfile { + my $file = shift(); + if (! -e $file ) { + return undef; + } + my $fh=IO::File->new($file); + my $contents = ''; + while (<$fh>) { + $contents .= $_; + } + return $contents; +} + +## +## &getfiledates() +## Converts creationdate and modifieddates to SQL format +## Applies stat() to file to retrieve dates if missing +sub getfiledates { + my ($ref,$target) = @_; + if (! defined($ref->{'creationdate'}) || + $ref->{'creationdate'} =~ /^\s*$/) { + $ref->{'creationdate'} = (stat($target))[9]; + } + if (! defined($ref->{'lastrevisiondate'}) || + $ref->{'lastrevisiondate'} =~ /^\s*$/) { + $ref->{'lastrevisiondate'} = (stat($target))[9]; + } + $ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); + $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); +} + +######################################################## +######################################################## +### ### +### Dynamic Metadata ### +### ### +######################################################## +######################################################## +## +## Dynamic metadata description (incomplete) +## +## For a full description of all fields, +## see LONCAPA::lonmetadata +## +## Field Type +##----------------------------------------------------------- +## count integer +## course integer +## course_list comma separated list of course ids +## avetries real +## avetries_list comma separated list of real numbers +## stdno real +## stdno_list comma separated list of real numbers +## usage integer +## usage_list comma separated list of resources +## goto scalar +## goto_list comma separated list of resources +## comefrom scalar +## comefrom_list comma separated list of resources +## difficulty real +## difficulty_list comma separated list of real numbers +## sequsage scalar +## sequsage_list comma separated list of resources +## clear real +## technical real +## correct real +## helpful real +## depth real +## comments html of all the comments made +## +{ + +my %DynamicData; +my %Counts; + +sub process_dynamic_metadata { + my ($user,$dom) = @_; + undef(%DynamicData); + undef(%Counts); + # + my $prodir = &propath($dom,$user); + # + # Read in the dynamic metadata + my %evaldata; + if (! tie(%evaldata,'GDBM_File', + $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { + return 0; + } + # + %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata); + untie(%evaldata); + $DynamicData{'domain'} = $dom; + #print('user = '.$user.' domain = '.$dom.$/); + # + # Read in the access count data + &log(7,'Reading access count data') if ($debug); + my %countdata; + if (! tie(%countdata,'GDBM_File', + $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { + return 0; + } + while (my ($key,$count) = each(%countdata)) { + next if ($key !~ /^$dom/); + $key = &unescape($key); + &log(8,' Count '.$key.' = '.$count) if ($debug); + $Counts{$key}=$count; + } + untie(%countdata); + if ($debug) { + &log(7,scalar(keys(%Counts)). + " Counts read for ".$user."@".$dom); + &log(7,scalar(keys(%DynamicData)). + " Dynamic metadata read for ".$user."@".$dom); + } + # + return 1; +} + +sub get_dynamic_metadata { + my ($url) = @_; + $url =~ s:^/res/::; + my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url, + \%DynamicData); + # find the count + $data{'count'} = $Counts{$url}; + # + # Log the dynamic metadata + if ($debug) { + while (my($k,$v)=each(%data)) { + &log(8," ".$k." => ".$v); + } + } + return %data; +} + +} # End of %DynamicData and %Counts scope + +######################################################## +######################################################## +### ### +### Counts ### +### ### +######################################################## +######################################################## +{ + +my %countext; + +sub count_type { + my $file=shift; + $file=~/\.(\w+)$/; + my $ext=lc($1); + $countext{$ext}++; +} + +sub write_type_count { + open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt'); + while (my ($extension,$count) = each(%countext)) { + print RESCOUNT $extension.'='.$count.'&'; + } + print RESCOUNT 'time='.time."\n"; + close(RESCOUNT); +} + +} # end of scope for %countext + +{ + +my %copyrights; + +sub count_copyright { + $copyrights{@_[0]}++; +} + +sub write_copyright_count { + open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt'); + while (my ($copyright,$count) = each(%copyrights)) { + print COPYCOUNT $copyright.'='.$count.'&'; + } + print COPYCOUNT 'time='.time."\n"; + close(COPYCOUNT); +} + +} # end of scope for %copyrights + +######################################################## +######################################################## +### ### +### Miscellanous Utility Routines ### +### ### +######################################################## +######################################################## +## +## &ishome($username) +## Returns 1 if $username is a LON-CAPA author, 0 otherwise +## (copied from lond, modification of the return value) +sub ishome { + my $author=shift; + $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; + my ($udom,$uname)=split(/\//,$author); + my $proname=propath($udom,$uname); + if (-e $proname) { + return 1; + } else { + return 0; + } +} + +## +## &propath($udom,$uname) +## Returns the path to the users LON-CAPA directory +## (copied from lond) +sub propath { + my ($udom,$uname)=@_; + $udom=~s/\W//g; + $uname=~s/\W//g; + my $subdir=$uname.'__'; + $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; + my $proname="$Apache::lonnet::perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; + return $proname; +} + +## +## &sqltime($timestamp) +## +## Convert perl $timestamp to MySQL time. MySQL expects YYYY-MM-DD HH:MM:SS +## +sub sqltime { + my ($time) = @_; + my $mysqltime; + if ($time =~ + /(\d+)-(\d+)-(\d+) # YYYY-MM-DD + \s # a space + (\d+):(\d+):(\d+) # HH:MM::SS + /x ) { + # Some of the .meta files have the time in mysql + # format already, so just make sure they are 0 padded and + # pass them back. + $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', + $1,$2,$3,$4,$5,$6); + } elsif ($time =~ /^\d+$/) { + my @TimeData = gmtime($time); + # Alter the month to be 1-12 instead of 0-11 + $TimeData[4]++; + # Alter the year to be from 0 instead of from 1900 + $TimeData[5]+=1900; + $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', + @TimeData[5,4,3,2,1,0]); + } elsif (! defined($time) || $time == 0) { + $mysqltime = 0; + } else { + &log(0," sqltime:Unable to decode time ".$time); + $mysqltime = 0; + } + return $mysqltime; +} + +## +## &declutter($filename) +## Given a filename, returns a url for the filename. +sub declutter { + my $thisfn=shift; + $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//; + $thisfn=~s/^\///; + $thisfn=~s/^res\///; + return $thisfn; +} + +## +## Escape / Unescape special characters +sub unescape { + my $str=shift; $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; - return($str); - } - -=pod - -B - translate strange characters to unstrange escaped syntax. - -=over 4 - -Parameters: - -=item I<$str> - string with potentially weird characters to unweird-ify. - -=back - -=over 4 - -Returns: + return $str; +} -=item C - unweird-ified string. - -=back - -=cut - -sub escape ($) - { - my $str = shift(@_); +sub escape { + my $str=shift; $str =~ s/(\W)/"%".unpack('H2',$1)/eg; - return($str); - } - -=pod - -B - evaluate and store dynamic metadata. - -Dynamic metadata is stored in a nohist_resevaldata GDBM database. -The only thing that this subroutine really makes happen is adjusting -a 'count' value inside the F as well -as updating F with information from -F. - -It may need optmization, but since it gets called once a week. . . -=over 4 - -Parameters: - -=item I<$url> - the filesystem path (url may be a misnomer...) - -=back - -=over 4 - -Returns: - -=item C - key-value table of dynamically evaluated metadata. - -=back - -=cut - -sub build_on_the_fly_dynamic_metadata ($) - { - # some elements in here maybe non-obvious - - # Need to compute the user's directory. - my $url = &declutter(shift(@_)); - $url =~ s/\.meta$//; - my %returnhash = (); - my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!); - my $user_directory = &construct_path_to_user_directory($adomain,$aauthor); - - # Attempt a GDBM database instantiation inside users directory and proceed. - if ((tie(%evaldata,'GDBM_File', - $user_directory. - '/nohist_resevaldata.db',&GDBM_READER(),0640)) && - (tie(%newevaldata,'GDBM_File', - $user_directory. - '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) - { - # For different variables, track the running sum and counts. - my %sum = (); - my %cnt = (); - - # Define computed items as a sum (add) or an average (avg) or a raw - # count (cnt) or 'app'? - my %listitems=('count' => 'add', - 'course' => 'add', - 'avetries' => 'avg', - 'stdno' => 'add', - 'difficulty' => 'avg', - 'clear' => 'avg', - 'technical' => 'avg', - 'helpful' => 'avg', - 'correct' => 'avg', - 'depth' => 'avg', - 'comments' => 'app', - 'usage' => 'cnt' - ); - - # Untaint the url and use as part of a regular expression. - my $regexp = $url; - $regexp =~ s/(\W)/\\$1/g; - $regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs - - # Check existing nohist database for this url. - # this is modfying the 'count' entries - # and copying all othe entries over - foreach (keys %evaldata) - { - my $key = &unescape($_); - if ($key =~ /$regexp/) # If url-based entry exists. - { - my $ctype = $1; # Set to specific category type. - - # Do an increment for this category type. - if (defined($cnt{$ctype})) - { - $cnt{$ctype}++; - } - else - { - $cnt{$ctype} = 1; - } - unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN? - { - # Increment the sum based on the evaluated data in the db. - if (defined($sum{$ctype})) - { - $sum{$ctype} += $evaldata{$_}; - } - else - { - $sum{$ctype} = $evaldata{$_}; - } - } - else # 'app' mode, means to use '
' as a separator - { - if (defined($sum{$ctype})) - { - if ($evaldata{$_}) - { - $sum{$ctype} .= '
'.$evaldata{$_}; - } - } - else - { - $sum{$ctype} = ''.$evaldata{$_}; - } - } - if ($ctype ne 'count') - { - # this is copying all data except 'count' attributes - $newevaldata{$_} = $evaldata{$_}; - } - } - } - - # the only other time this loop is useful is for the 'count' hash - # element - foreach (keys %cnt) - { - if ($listitems{$_} eq 'avg') - { - $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; - } - elsif ($listitems{$_} eq 'cnt') - { - $returnhash{$_} = $cnt{$_}; - } - else - { - $returnhash{$_} = $sum{$_}; - } - } - - # seems to be doing something useful - if ($returnhash{'count'}) - { - my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey} = $returnhash{'count'}; - } - - untie(%evaldata); # Close/release the original nohist database. - untie(%newevaldata); # Close/release the new nohist database. - } - return(%returnhash); - } - -=pod - -B - used by B subroutine. - -This evaluates whether a file is wanted, and pushes it onto the -I<@metalist> array. This subroutine was, for the most part, auto-generated -by the B command. - -=over 4 - -Parameters: - -=item I<$file> - a path to the file. - -=back - -=over 4 - -Returns: - -=item C - true or false based on logical statement. - -=back - -=cut - -sub wanted ($) - { - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f $_ && - /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,$File::Find::dir.'/'.$_); - } - -=pod - -B - read xml-tagged file and return parsed metadata. - -I - -=over 4 - -Parameters: - -=item I<$file> - a path.to the file. - -=back - -=over 4 - -Returns: - -=item C - a hash array (keys and values). - -=back - -=cut - -sub get_metadata_from_file ($) - { - my ($filename) = @_; - my %metatable; # Used to store return value of hash-tabled metadata. - $filename = &declutter($filename); # Remove non-identifying filesystem info - my $uri = ''; # The URI is not relevant in this scenario. - unless ($filename =~ m/\.meta$/) # Unless ending with .meta. - { - $filename .= '.meta'; # Append a .meta suffix. - } - # Get the file contents. - my $metadata_string = - &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename); - - # Parse the file based on its XML tags. - my $parser = HTML::TokeParser->new(\$metadata_string); - my $token; - while ($token = $parser->get_token) # Loop through tokens. - { - if ($token->[0] eq 'S') # If it is a start token. - { - my $entry = $token->[1]; - my $unikey = $entry; # A unique identifier for this xml tag key. - if (defined($token->[2]->{'part'})) - { - $unikey .= '_'.$token->[2]->{'part'}; - } - if (defined($token->[2]->{'name'})) - { - $unikey .= '_'.$token->[2]->{'name'}; - } - # Append $unikey to metatable's keys entry. - if ($metatable{$uri.'keys'}) - { - $metatable{$uri.'keys'} .= ','.$unikey; - } - else - { - $metatable{$uri.'keys'} = $unikey; - } - # Insert contents into metatable entry for the unikey. - foreach my $t3 (@{$token->[3]}) - { - $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3}; - } - # If there was no text contained inside the tags, set = default. - unless - ( - $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry) - ) - { - $metatable{$uri.''.$unikey} = - $metatable{$uri.''.$unikey.'.default'}; - } - } - } - # Return with a key-value table of XML tags and their tag contents. - return(\%metatable); - } - -=pod - -B - returns either the contents of the file or a -1. - -=over 4 - -Parameters: - -=item I<$file> - a complete filesystem path.to the file. - -=back - -=over 4 - -Returns: - -=item C - file contents or a -1. - -=back - -=cut - -sub get_file_contents ($) - { - my $file = shift(@_); - - # If file does not exist, then return a -1 value. - unless (-e $file) - { - return(-1); - } - - # Read in file contents. - my $file_handle = IO::File->new($file); - my $file_contents = ''; - while (<$file_handle>) - { - $file_contents .= $_; - } - - # Return file contents. - return($file_contents); - } - -=pod - -B - Declutters URLs (remove extraneous prefixed filesystem path). - -=over 4 - -Parameters: - -=item I<$filesystem_path> - a complete filesystem path. - -=back - -=over 4 - -Returns: - -=item C - remnants of the filesystem path (beginning portion removed). - -=back - -=cut - -sub declutter - { - my $filesystem_path = shift(@_); - - # Remove beginning portions of the filesystem path. - $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//; - $filesystem_path =~ s!^/!!; - $filesystem_path =~ s!^res/!!; - - # Return what is remaining for the filesystem path. - return($filesystem_path); - } - -=pod - -B - Is this the home server of an author's directory? - -=over 4 - -Parameters: - -=item I<$author_filesystem_path> - directory path for a user. - -=back - -=over 4 - -Returns: - -=item C - 1 if true; 0 if false. - -=back - -=cut - -sub query_home_server_status ($) - { - my $author_filesystem_path = shift(@_); - - # Remove beginning portion of this filesystem path. - $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!; - - # Construct path to the author's ordinary user directory. - my ($user_domain,$username) = split(m!/!,$author_filesystem_path); - my $user_directory_path = construct_path_to_user_directory($user_domain, - $username); - - # Return status of whether the user directory path is defined. - if (-e $user_directory_path) - { - return(1); # True. - } - else - { - return(0); # False. - } - } - -=pod - -B ($$) - makes a filesystem path to user dir. - -=over 4 - -Parameters: - -=item I<$user_domain> - the loncapa domain of the user. - -=item I<$username> - the unique username (user id) of the user. - -=back - -=over 4 - -Returns: - -=item C - representing the path on the filesystem. - -=back - -=cut - -sub construct_path_to_user_directory ($$) - { - my ($user_domain,$username) = @_; - - # Untaint. - $user_domain =~ s/\W//g; - $username =~ s/\W//g; - - # Create three levels of sub-directoried filesystem path - # based on the first three characters of the username. - my $sub_filesystem_path = $username.'__'; - $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!; - - # Use the sub-directoried levels and other variables to generate - # the complete filesystem path. - my $complete_filesystem_path = - join('/',($perlvar{'lonUsersDir'}, - $user_domain, - $sub_filesystem_path, - $username)); - - # Return the complete filesystem path. - return($complete_filesystem_path); - } - -=pod - -B (@) - turns seconds since epoch into datetime sql format. - -=over 4 - -Parameters: - -=item I<$epochtime> - time in seconds since epoch (may need to be sanitized). - -=back - -=over 4 - -Returns: - -=item C - datetime sql formatted string. - -=back - -=cut - -sub sql_formatted_time ($) - { - # Sanitize the time argument and convert to localtime array. - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime(&sanitize_time(shift(@_))); - - # Convert month from (0..11) to (1..12). - $mon += 1; - - # Make the year compatible with A.D. specification. - $year += 1900; - - # Return a date which is compatible with MySQL's "DATETIME" format. - return(join('-',($year,$mon,$mday)). - ' '. - join(':',($hour,$min,$sec)) - ); - } - - -# ==================================== The following two subroutines are needed -# for accommodating incorrect time formats inside the metadata. - -=pod - -B (@) - turns time metadata into seconds since epoch. - -=over 4 - -Parameters: - -=item I<%time_metadata> - a key-value listing characterizing month, year, etc. - -=back - -=over 4 - -Returns: - -=item C - seconds since epoch. - -=back - -=cut - -sub make_seconds_since_epoch (@) - { - # Keytable of time metadata. - my %time_metadata = @_; - - # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC). - return(POSIX::mktime( - ($time_metadata{'seconds'}, - $time_metadata{'minutes'}, - $time_metadata{'hours'}, - $time_metadata{'day'}, - $time_metadata{'month'}-1, - $time_metadata{'year'}-1900, - 0, - 0, - $time_metadata{'dlsav'}) - ) - ); - } - -=pod - -B - if time looks sql-formatted, make it seconds since epoch. - -Somebody described this subroutine as -"retro-fixing of un-backward-compatible time format". - -What this means, is that a part of this code expects to get UTC seconds -since the epoch (beginning of 1970). Yet, some of the .meta files have -sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch -integers (e.g. 1044147435). These time strings do not encode the timezone -and, in this sense, can be considered "un-backwards-compatible". - -=over 4 - -Parameters: - -=item I<$potentially_badformat_string> - string to "retro-fix". - -=back - -=over 4 - -Returns: - -=item C - seconds since epoch. - -=back - -=cut - -sub sanitize_time ($) - { - my $timestamp = shift(@_); - # If timestamp is in this unexpected format.... - if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) - { - # then convert into seconds since epoch (the expected format). - $timestamp = &make_seconds_since_epoch( - 'year' => $1, - 'month' => $2, - 'day' => $3, - 'hours' => $4, - 'minutes' => $5, - 'seconds' => $6 - ); - } - # Otherwise we assume timestamp to be as expected. - return($timestamp); - } - -=pod - -=head1 AUTHOR - -Written to help the loncapa project. - -Scott Harrison, sharrison@users.sourceforge.net - -This is distributed under the same terms as loncapa (i.e. "freeware"). - -=cut + return $str; +}