--- loncom/metadata_database/searchcat.pl 2004/04/08 15:57:32 1.55 +++ loncom/metadata_database/searchcat.pl 2007/01/02 12:52:22 1.75 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.55 2004/04/08 15:57:32 matthew Exp $ +# $Id: searchcat.pl,v 1.75 2007/01/02 12:52:22 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -65,105 +65,222 @@ and correct user experience. =cut use strict; - use DBI; use lib '/home/httpd/lib/perl/'; -use LONCAPA::Configuration; use LONCAPA::lonmetadata; +use Getopt::Long; use IO::File; use HTML::TokeParser; use GDBM_File; use POSIX qw(strftime mktime); + +use Apache::lonnet(); + use File::Find; +# +# Set up configuration options +my ($simulate,$oneuser,$help,$verbose,$logfile,$debug); +GetOptions ( + 'help' => \$help, + 'simulate' => \$simulate, + 'only=s' => \$oneuser, + 'verbose=s' => \$verbose, + 'debug' => \$debug, + ); + +if ($help) { + print <<"ENDHELP"; +$0 +Rebuild and update the LON-CAPA metadata database. +Options: + -help Print this help + -simulate Do not modify the database. + -only=user Only compute for the given user. Implies -simulate + -verbose=val Sets logging level, val must be a number + -debug Turns on debugging output +ENDHELP + exit 0; +} + +if (! defined($debug)) { + $debug = 0; +} + +if (! defined($verbose)) { + $verbose = 0; +} + +if (defined($oneuser)) { + $simulate=1; +} + ## ## Use variables for table names so we can test this routine a little easier -my $oldname = 'metadata'; -my $newname = 'newmetadata'; +my %oldnames = ( + 'metadata' => 'metadata', + 'portfolio' => 'portfolio_metadata', + 'access' => 'portfolio_access', + 'addedfields' => 'portfolio_addedfields', + ); + +my %newnames; +# new table names - append pid to have unique temporary tables +foreach my $key (keys(%oldnames)) { + $newnames{$key} = 'new'.$oldnames{$key}.$$; +} # -# Read loncapa_apache.conf and loncapa.conf -my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); -my %perlvar=%{$perlvarref}; -undef $perlvarref; -delete $perlvar{'lonReceipt'}; # remove since sensitive (really?) & not needed -# # Only run if machine is a library server -exit if ($perlvar{'lonRole'} ne 'library'); +exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library'); # # Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { - my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}"; + my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ - mailto $emailto -s '$subj' > /dev/null"); + mail -s '$subj' $emailto > /dev/null"); exit 1; } # # Let people know we are running -open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); -print LOG '==== Searchcat Run '.localtime()."====\n"; +open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log'); +&log(0,'==== Searchcat Run '.localtime()."===="); + + +if ($debug) { + &log(0,'simulating') if ($simulate); + &log(0,'only processing user '.$oneuser) if ($oneuser); + &log(0,'verbosity level = '.$verbose); +} # # Connect to database my $dbh; -if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'}, +if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0}))) { - print LOG "Cannot connect to database!\n"; + &log(0,"Cannot connect to database!"); die "MySQL Error: Cannot connect to database!\n"; } # This can return an error and still be okay, so we do not bother checking. # (perhaps it should be more robust and check for specific errors) -$dbh->do('DROP TABLE IF EXISTS '.$newname); +foreach my $key (keys(%newnames)) { + if ($newnames{$key} ne '') { + $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key}); + } +} + # -# Create the new table -my $request = &LONCAPA::lonmetadata::create_metadata_storage($newname); -$dbh->do($request); -if ($dbh->err) { - $dbh->disconnect(); - print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; - die $dbh->errstr; +# Create the new metadata and portfolio tables +foreach my $key (keys(%newnames)) { + if ($newnames{$key} ne '') { + my $request = + &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key}); + $dbh->do($request); + if ($dbh->err) { + $dbh->disconnect(); + &log(0,"MySQL Error Create: ".$dbh->errstr); + die $dbh->errstr; + } + } } + # # find out which users we need to examine -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); -my @homeusers = - grep { - &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_"); - } grep { - !/^\.\.?$/; - } readdir(RESOURCES); -closedir RESOURCES; -# -# Loop through the users -foreach my $user (@homeusers) { - print LOG "=== User: ".$user."\n"; - my $prodir=&propath($perlvar{'lonDefDomain'},$user); - # - # Use File::Find to get the files we need to read/modify - find( - {preprocess => \&only_meta_files, -# wanted => \&print_filename, -# wanted => \&log_metadata, - wanted => \&process_meta_file, - }, - "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); -} -# -# Rename the table -$dbh->do('DROP TABLE IF EXISTS '.$oldname); -if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { - print LOG "MySQL Error Rename: ".$dbh->errstr."\n"; - die $dbh->errstr; +my @domains = sort(&Apache::lonnet::current_machine_domains()); +&log(9,'domains ="'.join('","',@domains).'"'); + +foreach my $dom (@domains) { + &log(9,'domain = '.$dom); + opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom"); + my @homeusers = + grep { + &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_"); + } grep { + !/^\.\.?$/; + } readdir(RESOURCES); + closedir RESOURCES; + &log(5,'users = '.$dom.':'.join(',',@homeusers)); + # + if ($oneuser) { + @homeusers=($oneuser); + } + # + # Loop through the users + foreach my $user (@homeusers) { + &log(0,"=== User: ".$user); + &process_dynamic_metadata($user,$dom); + # + # Use File::Find to get the files we need to read/modify + find( + {preprocess => \&only_meta_files, + #wanted => \&print_filename, + #wanted => \&log_metadata, + wanted => \&process_meta_file, + no_chdir => 1, + }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) ); + } + # Search for public portfolio files + my %portusers; + if ($oneuser) { + %portusers = ( + $oneuser => '', + ); + } else { + my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom; + &descend_tree($dir,0,\%portusers); + } + foreach my $uname (keys(%portusers)) { + my $urlstart = '/uploaded/'.$dom.'/'.$uname; + my $pathstart = &propath($dom,$uname).'/userfiles'; + my $is_course = &Apache::lonnet::is_course($dom,$uname); + my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname); + my %access = &Apache::lonnet::get_access_controls($curr_perm); + foreach my $file (keys(%access)) { + my ($group,$url,$fullpath); + if ($is_course) { + ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/); + $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path; + $url = $urlstart.'/groups/'.$group.'/portfolio'.$path; + } else { + $fullpath = $pathstart.'/portfolio'.$file; + $url = $urlstart.'/portfolio'.$file; + } + if (ref($access{$file}) eq 'HASH') { + my %portaccesslog = + &LONCAPA::lonmetadata::process_portfolio_access_data($dbh, + $simulate,\%newnames,$url,$fullpath,$access{$file}); + &portfolio_logging(%portaccesslog); + } + my %portmetalog = &LONCAPA::lonmetadata::process_portfolio_metadata($dbh,$simulate,\%newnames,$url,$fullpath,$is_course,$dom,$uname,$group); + &portfolio_logging(%portmetalog); + } + } +} + +# +# Rename the tables +if (! $simulate) { + foreach my $key (keys(%oldnames)) { + if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) { + $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key}); + if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) { + &log(0,"MySQL Error Rename: ".$dbh->errstr); + die $dbh->errstr; + } else { + &log(1,"MySQL table rename successful for $key."); + } + } + } } if (! $dbh->disconnect) { - print LOG "MySQL Error Disconnect: ".$dbh->errstr."\n"; + &log(0,"MySQL Error Disconnect: ".$dbh->errstr); die $dbh->errstr; } ## ## Finished! -print LOG "==== Searchcat completed ".localtime()." ====\n"; +&log(0,"==== Searchcat completed ".localtime()." ===="); close(LOG); &write_type_count(); @@ -171,6 +288,54 @@ close(LOG); exit 0; +## +## Status logging routine. Inputs: $level, $message +## +## $level 0 should be used for normal output and error messages +## +## $message does not need to end with \n. In the case of errors +## the message should contain as much information as possible to +## help in diagnosing the problem. +## +sub log { + my ($level,$message)=@_; + $level = 0 if (! defined($level)); + if ($verbose >= $level) { + print LOG $message.$/; + } +} + +sub portfolio_logging { + my (%portlog) = @_; + foreach my $key (keys(%portlog)) { + if (ref($portlog{$key}) eq 'HASH') { + foreach my $item (keys(%{$portlog{$key}})) { + &log(0,$portlog{$key}{$item}); + } + } + } +} + +sub descend_tree { + my ($dir,$depth,$alldomusers) = @_; + if (-d $dir) { + opendir(DIR,$dir); + my @contents = grep(!/^\./,readdir(DIR)); + closedir(DIR); + $depth ++; + foreach my $item (@contents) { + if ($depth < 4) { + &descend_tree($dir.'/'.$item,$depth,$alldomusers); + } else { + if (-e $dir.'/'.$item.'/file_permissions.db') { + + $$alldomusers{$item} = ''; + } + } + } + } +} + ######################################################## ######################################################## ### ### @@ -190,7 +355,7 @@ sub only_meta_files { foreach my $file (@PossibleFiles) { if ( ($file =~ /\.meta$/ && # Ends in meta $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version - ) || (-d $file )) { # directories are okay + ) || (-d $File::Find::dir."/".$file )) { # directories are okay # but we do not want /. or /.. push(@ChosenFiles,$file); } @@ -205,10 +370,12 @@ sub only_meta_files { sub print_filename { my ($file) = $_; my $fullfilename = $File::Find::name; - if (-d $file) { - print LOG " Got directory ".$fullfilename."\n"; - } else { - print LOG " Got file ".$fullfilename."\n"; + if ($debug) { + if (-d $file) { + &log(5," Got directory ".$fullfilename); + } else { + &log(5," Got file ".$fullfilename); + } } $_=$file; } @@ -217,46 +384,48 @@ sub log_metadata { my ($file) = $_; my $fullfilename = $File::Find::name; return if (-d $fullfilename); # No need to do anything here for directories - print LOG $fullfilename."\n"; - my $ref=&metadata($fullfilename); - if (! defined($ref)) { - print LOG " No data\n"; - return; - } - while (my($key,$value) = each(%$ref)) { - print LOG " ".$key." => ".$value."\n"; + if ($debug) { + &log(6,$fullfilename); + my $ref = &metadata($fullfilename); + if (! defined($ref)) { + &log(6," No data"); + return; + } + while (my($key,$value) = each(%$ref)) { + &log(6," ".$key." => ".$value); + } + &count_copyright($ref->{'copyright'}); } - &count_copyright($ref->{'copyright'}); $_=$file; } - ## ## process_meta_file ## Called by File::Find. ## Only input is the filename in $_. sub process_meta_file { my ($file) = $_; - my $filename = $File::Find::name; + my $filename = $File::Find::name; # full filename return if (-d $filename); # No need to do anything here for directories # - print LOG $filename."\n"; + &log(3,$filename) if ($debug); # - my $ref=&metadata($filename); + my $ref = &metadata($filename); # # $url is the original file url, not the metadata file - my $url='/res/'.&declutter($filename); - $url=~s/\.meta$//; - print LOG " ".$url."\n"; + my $target = $filename; + $target =~ s/\.meta$//; + my $url='/res/'.&declutter($target); + &log(3," ".$url) if ($debug); # # Ignore some files based on their metadata if ($ref->{'obsolete'}) { - print LOG "obsolete\n"; + &log(3,"obsolete") if ($debug); return; } &count_copyright($ref->{'copyright'}); if ($ref->{'copyright'} eq 'private') { - print LOG "private\n"; + &log(3,"private") if ($debug); return; } # @@ -264,29 +433,29 @@ sub process_meta_file { my %dyn; if ($url=~ m:/default$:) { $url=~ s:/default$:/:; + &log(3,"Skipping dynamic data") if ($debug); } else { - # %dyn=&dynamicmeta($url); + &log(3,"Retrieving dynamic data") if ($debug); + %dyn=&get_dynamic_metadata($url); &count_type($url); } + &LONCAPA::lonmetadata::getfiledates($ref,$target); # - $ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); - $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); my %Data = ( %$ref, %dyn, 'url'=>$url, 'version'=>'current'); - my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, - \%Data); - if ($err) { - print LOG "\nMySQL Error Insert: ".$err."\n"; - die $err; - } - if ($count < 1) { - print LOG "Unable to insert record into MySQL database for $url\n"; - die "Unable to insert record into MySQl database for $url"; - } else { - print LOG "Count = ".$count."\n"; + if (! $simulate) { + my ($count,$err) = + &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'}, + 'metadata',\%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + } } # # Reset $_ before leaving @@ -302,7 +471,7 @@ sub process_meta_file { ######################################################## ######################################################## sub metadata { - my ($uri)=@_; + my ($uri) = @_; my %metacache=(); $uri=&declutter($uri); my $filename=$uri; @@ -311,7 +480,8 @@ sub metadata { if ($filename !~ /\.meta$/) { $filename.='.meta'; } - my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); + my $metastring = + &LONCAPA::lonmetadata::getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); return undef if (! defined($metastring)); my $parser=HTML::TokeParser->new(\$metastring); my $token; @@ -332,7 +502,7 @@ sub metadata { } foreach ( @{$token->[3]}) { $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; - } + } if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){ $metacache{$uri.''.$unikey} = $metacache{$uri.''.$unikey.'.default'}; @@ -342,23 +512,6 @@ sub metadata { return \%metacache; } -## -## &getfile($filename) -## Slurps up an entire file into a scalar. -## Returns undef if the file does not exist -sub getfile { - my $file = shift(); - if (! -e $file ) { - return undef; - } - my $fh=IO::File->new($file); - my $contents = ''; - while (<$fh>) { - $contents .= $_; - } - return $contents; -} - ######################################################## ######################################################## ### ### @@ -366,122 +519,105 @@ sub getfile { ### ### ######################################################## ######################################################## -sub dynamicmeta { - my $url = &declutter(shift()); - $url =~ s/\.meta$//; - my %data = ('count' => 0, - 'course' => 0, - 'course_list' => '', - 'avetries' => 'NULL', - 'avetries_list' => '', - 'stdno' => 0, - 'stdno_list' => '', - 'usage' => 0, - 'usage_list' => '', - 'goto' => 0, - 'goto_list' => '', - 'comefrom' => 0, - 'comefrom_list' => '', - 'difficulty' => 'NULL', - 'difficulty_list' => '', - 'sequsage' => '0', - 'sequsage_list' => '', - 'clear' => 'NULL', - 'technical' => 'NULL', - 'correct' => 'NULL', - 'helpful' => 'NULL', - 'depth' => 'NULL', - 'comments' => '', - ); - my ($dom,$auth)=($url=~/^(\w+)\/(\w+)\//); - my $prodir=&propath($dom,$auth); +## +## Dynamic metadata description (incomplete) +## +## For a full description of all fields, +## see LONCAPA::lonmetadata +## +## Field Type +##----------------------------------------------------------- +## count integer +## course integer +## course_list comma separated list of course ids +## avetries real +## avetries_list comma separated list of real numbers +## stdno real +## stdno_list comma separated list of real numbers +## usage integer +## usage_list comma separated list of resources +## goto scalar +## goto_list comma separated list of resources +## comefrom scalar +## comefrom_list comma separated list of resources +## difficulty real +## difficulty_list comma separated list of real numbers +## sequsage scalar +## sequsage_list comma separated list of resources +## clear real +## technical real +## correct real +## helpful real +## depth real +## comments html of all the comments made +## +{ + +my %DynamicData; +my %Counts; + +sub process_dynamic_metadata { + my ($user,$dom) = @_; + undef(%DynamicData); + undef(%Counts); # - # Get metadata except counts + my $prodir = &propath($dom,$user); + # + # Read in the dynamic metadata my %evaldata; if (! tie(%evaldata,'GDBM_File', $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { - return (undef); - } - my %sum=(); - my %count=(); - my %concat=(); - my %listitems=( - 'course' => 'add', - 'goto' => 'add', - 'comefrom' => 'add', - 'avetries' => 'average', - 'stdno' => 'add', - 'difficulty' => 'average', - 'clear' => 'average', - 'technical' => 'average', - 'helpful' => 'average', - 'correct' => 'average', - 'depth' => 'average', - 'comments' => 'append', - 'usage' => 'count' - ); - # - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - while (my ($esckey,$value)=each %evaldata) { - my $key=&unescape($esckey); - if ($key=~/$regexp/) { - my ($item,$purl,$cat)=split(/___/,$key); - $count{$cat}++; - if ($listitems{$cat} ne 'append') { - if (defined($sum{$cat})) { - $sum{$cat}+=&unescape($value); - $concat{$cat}.=','.$item; - } else { - $sum{$cat}=&unescape($value); - $concat{$cat}=$item; - } - } else { - if (defined($sum{$cat})) { - if ($evaldata{$esckey}=~/\w/) { - $sum{$cat}.='