--- loncom/metadata_database/searchcat.pl 2003/12/26 15:13:48 1.50
+++ loncom/metadata_database/searchcat.pl 2007/01/03 04:01:32 1.70.2.3
@@ -2,7 +2,7 @@
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
#
-# $Id: searchcat.pl,v 1.50 2003/12/26 15:13:48 www Exp $
+# $Id: searchcat.pl,v 1.70.2.3 2007/01/03 04:01:32 albertel Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -65,449 +65,803 @@ and correct user experience.
=cut
use strict;
-
+use DBI;
use lib '/home/httpd/lib/perl/';
-use LONCAPA::Configuration;
+use LONCAPA::lonmetadata;
+use Getopt::Long;
use IO::File;
use HTML::TokeParser;
-use DBI;
use GDBM_File;
use POSIX qw(strftime mktime);
-require "find.pl";
+use Apache::lonnet();
-my @metalist;
+use File::Find;
-my $simplestatus='';
-my %countext=();
+#
+# Set up configuration options
+my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
+GetOptions (
+ 'help' => \$help,
+ 'simulate' => \$simulate,
+ 'only=s' => \$oneuser,
+ 'verbose=s' => \$verbose,
+ 'debug' => \$debug,
+ );
-# ----------------------------------------------------- write out simple status
-sub writesimple {
- open(SMP,'>/home/httpd/html/lon-status/mysql.txt');
- print SMP $simplestatus."\n";
- close(SMP);
+if ($help) {
+ print <<"ENDHELP";
+$0
+Rebuild and update the LON-CAPA metadata database.
+Options:
+ -help Print this help
+ -simulate Do not modify the database.
+ -only=user Only compute for the given user. Implies -simulate
+ -verbose=val Sets logging level, val must be a number
+ -debug Turns on debugging output
+ENDHELP
+ exit 0;
}
-sub writecount {
- open(RSMP,'>/home/httpd/html/lon-status/rescount.txt');
- foreach (keys %countext) {
- print RSMP $_.'='.$countext{$_}.'&';
- }
- print RSMP 'time='.time."\n";
- close(RSMP);
+if (! defined($debug)) {
+ $debug = 0;
}
-# -------------------------------------- counts files with different extensions
-sub count {
- my $file=shift;
- $file=~/\.(\w+)$/;
- my $ext=lc($1);
- if (defined($countext{$ext})) {
- $countext{$ext}++;
- } else {
- $countext{$ext}=1;
- }
+if (! defined($verbose)) {
+ $verbose = 0;
}
-# ----------------------------------------------------- Un-Escape Special Chars
-sub unescape {
- my $str=shift;
- $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
- return $str;
+if (defined($oneuser)) {
+ $simulate=1;
}
-# -------------------------------------------------------- Escape Special Chars
+##
+## Use variables for table names so we can test this routine a little easier
+my %oldnames = (
+ 'metadata' => 'metadata',
+ 'portfolio' => 'portfolio_metadata',
+ 'access' => 'portfolio_access',
+ 'addedfields' => 'portfolio_addedfields',
+ );
-sub escape {
- my $str=shift;
- $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
- return $str;
+my %newnames;
+# new table names - append pid to have unique temporary tables
+foreach my $key (keys(%oldnames)) {
+ $newnames{$key} = 'new'.$oldnames{$key}.$$;
}
-# ------------------------------------------- Code to evaluate dynamic metadata
-
-sub dynamicmeta {
- my $url=&declutter(shift);
- $url=~s/\.meta$//;
- my %returnhash=(
- 'count' => 0,
- 'course' => 0,
- 'course_list' => '',
- 'avetries' => 'NULL',
- 'avetries_list' => '',
- 'stdno' => 0,
- 'stdno_list' => '',
- 'usage' => 0,
- 'usage_list' => '',
- 'goto' => 0,
- 'goto_list' => '',
- 'comefrom' => 0,
- 'comefrom_list' => '',
- 'difficulty' => 'NULL',
- 'difficulty_list' => '',
- 'clear' => 'NULL',
- 'technical' => 'NULL',
- 'correct' => 'NULL',
- 'helpful' => 'NULL',
- 'depth' => 'NULL',
- 'comments' => ''
- );
- my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
- my $prodir=&propath($adomain,$aauthor);
-
-# Get metadata except counts
- if (tie(my %evaldata,'GDBM_File',
- $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
- my %sum=();
- my %cnt=();
- my %concat=();
- my %listitems=(
- 'course' => 'add',
- 'goto' => 'add',
- 'comefrom' => 'add',
- 'avetries' => 'avg',
- 'stdno' => 'add',
- 'difficulty' => 'avg',
- 'clear' => 'avg',
- 'technical' => 'avg',
- 'helpful' => 'avg',
- 'correct' => 'avg',
- 'depth' => 'avg',
- 'comments' => 'app',
- 'usage' => 'cnt'
- );
-
- my $regexp=$url;
- $regexp=~s/(\W)/\\$1/g;
- $regexp='___'.$regexp.'___([a-z]+)$';
- while (my ($esckey,$value)=each %evaldata) {
- my $key=&unescape($esckey);
- if ($key=~/$regexp/) {
- my ($item,$purl,$cat)=split(/___/,$key);
- if (defined($cnt{$cat})) { $cnt{$cat}++; } else { $cnt{$cat}=1; }
- unless ($listitems{$cat} eq 'app') {
- if (defined($sum{$cat})) {
- $sum{$cat}+=&unescape($evaldata{$esckey});
- $concat{$cat}.=','.$item;
- } else {
- $sum{$cat}=&unescape($evaldata{$esckey});
- $concat{$cat}=$item;
- }
- } else {
- if (defined($sum{$cat})) {
- if ($evaldata{$esckey}=~/\w/) {
- $sum{$cat}.='
'.&unescape($evaldata{$esckey});
- }
- } else {
- $sum{$cat}=''.&unescape($evaldata{$esckey});
- }
- }
- }
- }
- untie(%evaldata);
-# transfer gathered data to returnhash, calculate averages where applicable
- while (my $cat=each(%cnt)) {
- if ($cnt{$cat} eq 'nan') { next; }
- if ($sum{$cat} eq 'nan') { next; }
- if ($listitems{$cat} eq 'avg') {
- if ($cnt{$cat}) {
- $returnhash{$cat}=int(($sum{$cat}/$cnt{$cat})*100.0+0.5)/100.0;
- } else {
- $returnhash{$cat}='NULL';
- }
- } elsif ($listitems{$cat} eq 'cnt') {
- $returnhash{$cat}=$cnt{$cat};
- } else {
- $returnhash{$cat}=$sum{$cat};
- }
- $returnhash{$cat.'_list'}=$concat{$cat};
- }
- }
-# get count
- if (tie(my %evaldata,'GDBM_File',
- $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
- my $escurl=&escape($url);
- if (! exists($evaldata{$escurl})) {
- $returnhash{'count'}=0;
- } else {
- $returnhash{'count'}=$evaldata{$escurl};
- }
- untie %evaldata;
- }
- return %returnhash;
-}
-
-# --------------- Read loncapa_apache.conf and loncapa.conf and get variables
-my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
-my %perlvar=%{$perlvarref};
-undef $perlvarref;
-delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
-
-# ------------------------------------- Only run if machine is a library server
-exit unless $perlvar{'lonRole'} eq 'library';
-
-# ----------------------------- Make sure this process is running from user=www
-
+#
+# Only run if machine is a library server
+exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library');
+#
+# Make sure this process is running from user=www
my $wwwid=getpwnam('www');
if ($wwwid!=$<) {
- my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
- my $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+ my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}";
+ my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch";
system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
- mailto $emailto -s '$subj' > /dev/null");
+ mail -s '$subj' $emailto > /dev/null");
exit 1;
}
+#
+# Let people know we are running
+open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log');
+&log(0,'==== Searchcat Run '.localtime()."====");
-# ---------------------------------------------------------- We are in business
-
-open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
-print LOG '==== Searchcat Run '.localtime()."====\n\n";
-$simplestatus='time='.time.'&';
+if ($debug) {
+ &log(0,'simulating') if ($simulate);
+ &log(0,'only processing user '.$oneuser) if ($oneuser);
+ &log(0,'verbosity level = '.$verbose);
+}
+#
+# Connect to database
my $dbh;
-# ------------------------------------- Make sure that database can be accessed
-{
- unless (
- $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
- ) {
- print LOG "Cannot connect to database!\n";
- $simplestatus.='mysql=defunct';
- &writesimple();
- exit;
- }
-
-# Make temporary table
- $dbh->do("DROP TABLE IF EXISTS newmetadata");
- my $make_metadata_table = "CREATE TABLE IF NOT EXISTS newmetadata (".
- "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
- "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
- "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ".
- "copyright TEXT, dependencies TEXT, ".
- "count INTEGER UNSIGNED, ".
- "course INTEGER UNSIGNED, course_list TEXT, ".
- "goto INTEGER UNSIGNED, goto_list TEXT, ".
- "comefrom INTEGER UNSIGNED, comefrom_list TEXT, ".
- "sequsage INTEGER UNSIGNED, sequsage_list TEXT, ".
- "stdno INTEGER UNSIGNED, stdno_list TEXT, ".
- "avetries FLOAT, avetries_list TEXT, ".
- "difficulty FLOAT, difficulty_list TEXT, ".
- "clear FLOAT, technical FLOAT, correct FLOAT, helpful FLOAT, depth FLOAT, ".
- "comments TEXT, ".
- "FULLTEXT idx_title (title), ".
- "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ".
- "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ".
- "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ".
- "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ".
- "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ".
- "FULLTEXT idx_copyright (copyright)) ".
- "TYPE=MyISAM";
- # It would sure be nice to have some logging mechanism.
- unless ($dbh->do($make_metadata_table)) {
- print LOG "\nMySQL Error Create: ".$dbh->errstr."\n";
- die $dbh->errstr;
- }
-}
-
-# ------------------------------------------------------------- get .meta files
-opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
-my @homeusers = grep {
- &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")
- } grep {!/^\.\.?$/} readdir(RESOURCES);
-closedir RESOURCES;
-
-#
-# Create the statement handlers we need
-
-my $insert_sth = $dbh->prepare
- ("INSERT INTO newmetadata VALUES (".
- "?,". # title
- "?,". # author
- "?,". # subject
- "?,". # declutter url
- "?,". # version
- "?,". # current
- "?,". # notes
- "?,". # abstract
- "?,". # mime
- "?,". # language
- "?,". # creationdate
- "?,". # revisiondate
- "?,". # owner
- "?,". # copyright
- "?,". # dependencies
- "?,". # count
- "?,". # course
- "?,". # course_list
- "?,". # goto
- "?,". # goto_list
- "?,". # comefrom
- "?,". # comefrom_list
- "?,". # usage
- "?,". # usage_list
- "?,". # stdno
- "?,". # stdno_list
- "?,". # avetries
- "?,". # avetries_list
- "?,". # difficulty
- "?,". # difficulty_list
- "?,". # clear
- "?,". # technical
- "?,". # correct
- "?,". # helpful
- "?,". # depth
- "?". # comments
- ")"
- );
-
-foreach my $user (@homeusers) {
- print LOG "\n=== User: ".$user."\n\n";
-
- my $prodir=&propath($perlvar{'lonDefDomain'},$user);
- # Use find.pl
- undef @metalist;
- @metalist=();
- &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
- # -- process each file to get metadata and put into search catalog SQL
- # database. Also, check to see if already there.
- # I could just delete (without searching first), but this works for now.
- foreach my $m (@metalist) {
- print LOG "- ".$m."\n";
- my $ref=&metadata($m);
- my $m2='/res/'.&declutter($m);
- $m2=~s/\.meta$//;
- if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; }
- if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; }
- my %dyn=&dynamicmeta($m2);
- &count($m2);
- unless ($insert_sth->execute(
- $ref->{'title'},
- $ref->{'author'},
- $ref->{'subject'},
- $m2,
- $ref->{'keywords'},
- 'current',
- $ref->{'notes'},
- $ref->{'abstract'},
- $ref->{'mime'},
- $ref->{'language'},
- sqltime($ref->{'creationdate'}),
- sqltime($ref->{'lastrevisiondate'}),
- $ref->{'owner'},
- $ref->{'copyright'},
- $ref->{'dependencies'},
- $dyn{'count'},
- $dyn{'course'},
- $dyn{'course_list'},
- $dyn{'goto'},
- $dyn{'goto_list'},
- $dyn{'comefrom'},
- $dyn{'comefrom_list'},
- $dyn{'usage'},
- $dyn{'usage_list'},
- $dyn{'stdno'},
- $dyn{'stdno_list'},
- $dyn{'avetries'},
- $dyn{'avetries_list'},
- $dyn{'difficulty'},
- $dyn{'difficulty_list'},
- $dyn{'clear'},
- $dyn{'technical'},
- $dyn{'correct'},
- $dyn{'helpful'},
- $dyn{'depth'},
- $dyn{'comments'}
- )) {
- print LOG "\nMySQL Error Insert: ".$dbh->errstr."\n";
- die $dbh->errstr;
- }
- $ref = undef;
- }
-}
-# --------------------------------------------------- Close database connection
-$dbh->do("DROP TABLE IF EXISTS metadata");
-unless ($dbh->do("RENAME TABLE newmetadata TO metadata")) {
- print LOG "\nMySQL Error Rename: ".$dbh->errstr."\n";
- die $dbh->errstr;
+if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'},
+ { RaiseError =>0,PrintError=>0}))) {
+ &log(0,"Cannot connect to database!");
+ die "MySQL Error: Cannot connect to database!\n";
+}
+# This can return an error and still be okay, so we do not bother checking.
+# (perhaps it should be more robust and check for specific errors)
+foreach my $key (keys(%newnames)) {
+ if ($newnames{$key} ne '') {
+ $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key});
+ }
+}
+
+#
+# Create the new metadata and portfolio tables
+foreach my $key (keys(%newnames)) {
+ if ($newnames{$key} ne '') {
+ my $request =
+ &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key});
+ $dbh->do($request);
+ if ($dbh->err) {
+ $dbh->disconnect();
+ &log(0,"MySQL Error Create: ".$dbh->errstr);
+ die $dbh->errstr;
+ }
+ }
+}
+
+#
+# find out which users we need to examine
+my @domains = sort(&Apache::lonnet::current_machine_domains());
+&log(9,'domains ="'.join('","',@domains).'"');
+
+foreach my $dom (@domains) {
+ &log(9,'domain = '.$dom);
+ opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom");
+ my @homeusers =
+ grep {
+ &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_");
+ } grep {
+ !/^\.\.?$/;
+ } readdir(RESOURCES);
+ closedir RESOURCES;
+ &log(5,'users = '.$dom.':'.join(',',@homeusers));
+ #
+ if ($oneuser) {
+ @homeusers=($oneuser);
+ }
+ #
+ # Loop through the users
+ foreach my $user (@homeusers) {
+ &log(0,"=== User: ".$user);
+ &process_dynamic_metadata($user,$dom);
+ #
+ # Use File::Find to get the files we need to read/modify
+ find(
+ {preprocess => \&only_meta_files,
+ #wanted => \&print_filename,
+ #wanted => \&log_metadata,
+ wanted => \&process_meta_file,
+ no_chdir => 1,
+ }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) );
+ }
+ # Search for public portfolio files
+ my %portusers;
+ if ($oneuser) {
+ %portusers = (
+ $oneuser => '',
+ );
+ } else {
+ my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom;
+ &descend_tree($dir,0,\%portusers);
+ }
+ foreach my $uname (keys(%portusers)) {
+ my $urlstart = '/uploaded/'.$dom.'/'.$uname;
+ my $pathstart = &propath($dom,$uname).'/userfiles';
+ my $is_course = &Apache::lonnet::is_course($dom,$uname);
+ my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname);
+ my %access = &Apache::lonnet::get_access_controls($curr_perm);
+ foreach my $file (keys(%access)) {
+ my ($group,$url,$fullpath);
+ if ($is_course) {
+ ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/);
+ $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path;
+ $url = $urlstart.'/groups/'.$group.'/portfolio'.$path;
+ } else {
+ $fullpath = $pathstart.'/portfolio'.$file;
+ $url = $urlstart.'/portfolio'.$file;
+ }
+ if (ref($access{$file}) eq 'HASH') {
+ &process_portfolio_access_data($url,$access{$file});
+ }
+ &process_portfolio_metadata($url,$fullpath,$is_course,$dom,
+ $uname,$group);
+ }
+ }
}
-unless ($dbh->disconnect) {
- print LOG "\nMySQL Error Disconnect: ".$dbh->errstr."\n";
+
+#
+# Rename the tables
+if (! $simulate) {
+ foreach my $key (keys(%oldnames)) {
+ if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) {
+ $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key});
+ if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) {
+ &log(0,"MySQL Error Rename: ".$dbh->errstr);
+ die $dbh->errstr;
+ } else {
+ &log(1,"MySQL table rename successful for $key.");
+ }
+ }
+ }
+}
+if (! $dbh->disconnect) {
+ &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
die $dbh->errstr;
}
-print LOG "\n==== Searchcat completed ".localtime()." ====\n";
+##
+## Finished!
+&log(0,"==== Searchcat completed ".localtime()." ====");
close(LOG);
-&writesimple();
-&writecount();
+
+&write_type_count();
+&write_copyright_count();
+
exit 0;
+##
+## Status logging routine. Inputs: $level, $message
+##
+## $level 0 should be used for normal output and error messages
+##
+## $message does not need to end with \n. In the case of errors
+## the message should contain as much information as possible to
+## help in diagnosing the problem.
+##
+sub log {
+ my ($level,$message)=@_;
+ $level = 0 if (! defined($level));
+ if ($verbose >= $level) {
+ print LOG $message.$/;
+ }
+}
+
+sub descend_tree {
+ my ($dir,$depth,$alldomusers) = @_;
+ if (-d $dir) {
+ opendir(DIR,$dir);
+ my @contents = grep(!/^\./,readdir(DIR));
+ closedir(DIR);
+ $depth ++;
+ foreach my $item (@contents) {
+ if ($depth < 4) {
+ &descend_tree($dir.'/'.$item,$depth,$alldomusers);
+ } else {
+ if (-e $dir.'/'.$item.'/file_permissions.db') {
+
+ $$alldomusers{$item} = '';
+ }
+ }
+ }
+ }
+}
+sub process_portfolio_access_data {
+ my ($url,$access_hash) = @_;
+ foreach my $key (keys(%{$access_hash})) {
+ my $acc_data;
+ $acc_data->{url} = $url;
+ $acc_data->{keynum} = $key;
+ my ($num,$scope,$end,$start) =
+ ($key =~ /^([^:]+):([a-z]+)_(\d*)_?(\d*)$/);
+ next if (($scope ne 'public') && ($scope ne 'guest'));
+ $acc_data->{scope} = $scope;
+ if ($end != 0) {
+ $acc_data->{end} = &sqltime($end);
+ }
+ $acc_data->{start} = &sqltime($start);
+ if (! $simulate) {
+ my ($count,$err) =
+ &LONCAPA::lonmetadata::store_metadata($dbh,
+ $newnames{'access'},
+ 'portfolio_access',$acc_data);
+ if ($err) {
+ &log(0,"MySQL Error Insert: ".$err);
+ }
+ if ($count < 1) {
+ &log(0,"Unable to insert record into MySQL database for $url");
+ }
+ }
+ }
+}
-# =============================================================================
+sub process_portfolio_metadata {
+ my ($url,$fullpath,$is_course,$dom,$uname,$group) = @_;
+ my ($ref,$crs,$addedfields) = &portfolio_metadata($fullpath,$dom,$uname,
+ $group);
+ &getfiledates($ref,$fullpath);
+ if ($is_course) {
+ $ref->{'groupname'} = $group;
+ }
+ my %Data;
+ if (ref($ref) eq 'HASH') {
+ %Data = %{$ref};
+ }
+ %Data = (
+ %Data,
+ 'url'=>$url,
+ 'version'=>'current',
+ );
+ if (! $simulate) {
+ my ($count,$err) =
+ &LONCAPA::lonmetadata::store_metadata($dbh,
+ $newnames{'portfolio'},
+ 'portfolio_metadata',\%Data);
+ if ($err) {
+ &log(0,"MySQL Error Insert: ".$err);
+ }
+ if ($count < 1) {
+ &log(0,"Unable to insert record into MySQL portfolio_metadata database table for $url");
+ }
+ if (ref($addedfields) eq 'HASH') {
+ if (keys(%{$addedfields}) > 0) {
+ foreach my $key (keys(%{$addedfields})) {
+ my $added_data = {
+ 'url' => $url,
+ 'field' => $key,
+ 'value' => $addedfields->{$key},
+ 'courserestricted' => $crs,
+ };
+ ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,
+ $newnames{'addedfields'},
+ 'portfolio_addedfields',
+ $added_data);
+ if ($err) {
+ &log(0,"MySQL Error Insert: ".$err);
+ }
+ if ($count < 1) {
+ &log(0,"Unable to insert record into MySQL portfolio_addedfields database table for url = $url and field = $key");
+ }
+ }
+ }
+ }
+ }
+ return;
+}
+
+########################################################
+########################################################
+### ###
+### File::Find support routines ###
+### ###
+########################################################
+########################################################
+##
+## &only_meta_files
+##
+## Called by File::Find.
+## Takes a list of files/directories in and returns a list of files/directories
+## to search.
+sub only_meta_files {
+ my @PossibleFiles = @_;
+ my @ChosenFiles;
+ foreach my $file (@PossibleFiles) {
+ if ( ($file =~ /\.meta$/ && # Ends in meta
+ $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version
+ ) || (-d $File::Find::dir."/".$file )) { # directories are okay
+ # but we do not want /. or /..
+ push(@ChosenFiles,$file);
+ }
+ }
+ return @ChosenFiles;
+}
+
+##
+##
+## Debugging routines, use these for 'wanted' in the File::Find call
+##
+sub print_filename {
+ my ($file) = $_;
+ my $fullfilename = $File::Find::name;
+ if ($debug) {
+ if (-d $file) {
+ &log(5," Got directory ".$fullfilename);
+ } else {
+ &log(5," Got file ".$fullfilename);
+ }
+ }
+ $_=$file;
+}
+
+sub log_metadata {
+ my ($file) = $_;
+ my $fullfilename = $File::Find::name;
+ return if (-d $fullfilename); # No need to do anything here for directories
+ if ($debug) {
+ &log(6,$fullfilename);
+ my $ref = &metadata($fullfilename);
+ if (! defined($ref)) {
+ &log(6," No data");
+ return;
+ }
+ while (my($key,$value) = each(%$ref)) {
+ &log(6," ".$key." => ".$value);
+ }
+ &count_copyright($ref->{'copyright'});
+ }
+ $_=$file;
+}
+
+##
+## process_meta_file
+## Called by File::Find.
+## Only input is the filename in $_.
+sub process_meta_file {
+ my ($file) = $_;
+ my $filename = $File::Find::name; # full filename
+ return if (-d $filename); # No need to do anything here for directories
+ #
+ &log(3,$filename) if ($debug);
+ #
+ my $ref = &metadata($filename);
+ #
+ # $url is the original file url, not the metadata file
+ my $target = $filename;
+ $target =~ s/\.meta$//;
+ my $url='/res/'.&declutter($target);
+ &log(3," ".$url) if ($debug);
+ #
+ # Ignore some files based on their metadata
+ if ($ref->{'obsolete'}) {
+ &log(3,"obsolete") if ($debug);
+ return;
+ }
+ &count_copyright($ref->{'copyright'});
+ if ($ref->{'copyright'} eq 'private') {
+ &log(3,"private") if ($debug);
+ return;
+ }
+ #
+ # Find the dynamic metadata
+ my %dyn;
+ if ($url=~ m:/default$:) {
+ $url=~ s:/default$:/:;
+ &log(3,"Skipping dynamic data") if ($debug);
+ } else {
+ &log(3,"Retrieving dynamic data") if ($debug);
+ %dyn=&get_dynamic_metadata($url);
+ &count_type($url);
+ }
+ &getfiledates($ref,$target);
+ #
+ my %Data = (
+ %$ref,
+ %dyn,
+ 'url'=>$url,
+ 'version'=>'current');
+ if (! $simulate) {
+ my ($count,$err) =
+ &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'},
+ 'metadata',\%Data);
+ if ($err) {
+ &log(0,"MySQL Error Insert: ".$err);
+ }
+ if ($count < 1) {
+ &log(0,"Unable to insert record into MySQL database for $url");
+ }
+ }
+ #
+ # Reset $_ before leaving
+ $_ = $file;
+}
-# ---------------------------------------------------------------- Get metadata
-# significantly altered from subroutine present in lonnet
+########################################################
+########################################################
+### ###
+### &metadata($uri) ###
+### Retrieve metadata for the given file ###
+### ###
+########################################################
+########################################################
sub metadata {
- my ($uri,$what)=@_;
+ my ($uri) = @_;
my %metacache=();
$uri=&declutter($uri);
my $filename=$uri;
$uri=~s/\.meta$//;
$uri='';
- unless ($metacache{$uri.'keys'}) {
- unless ($filename=~/\.meta$/) { $filename.='.meta'; }
- my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
+ if ($filename !~ /\.meta$/) {
+ $filename.='.meta';
+ }
+ my $metastring=&getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename);
+ return undef if (! defined($metastring));
+ my $parser=HTML::TokeParser->new(\$metastring);
+ my $token;
+ while ($token=$parser->get_token) {
+ if ($token->[0] eq 'S') {
+ my $entry=$token->[1];
+ my $unikey=$entry;
+ if (defined($token->[2]->{'part'})) {
+ $unikey.='_'.$token->[2]->{'part'};
+ }
+ if (defined($token->[2]->{'name'})) {
+ $unikey.='_'.$token->[2]->{'name'};
+ }
+ if ($metacache{$uri.'keys'}) {
+ $metacache{$uri.'keys'}.=','.$unikey;
+ } else {
+ $metacache{$uri.'keys'}=$unikey;
+ }
+ foreach ( @{$token->[3]}) {
+ $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
+ }
+ if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){
+ $metacache{$uri.''.$unikey} =
+ $metacache{$uri.''.$unikey.'.default'};
+ }
+ } # End of ($token->[0] eq 'S')
+ }
+ return \%metacache;
+}
+
+###############################################################
+###############################################################
+### ###
+### &portfolio_metadata($filepath,$dom,$uname,$group) ###
+### Retrieve metadata for the given file ###
+### Returns array - ###
+### contains reference to metadatahash and ###
+### optional reference to addedfields hash ###
+### ###
+###############################################################
+###############################################################
+sub portfolio_metadata {
+ my ($fullpath,$dom,$uname,$group)=@_;
+ my ($mime) = ( $fullpath=~/\.(\w+)$/ );
+ my %metacache=();
+ if ($fullpath !~ /\.meta$/) {
+ $fullpath .= '.meta';
+ }
+ my (@standard_fields,%addedfields);
+ my $colsref =
+ $LONCAPA::lonmetadata::Portfolio_metadata_table_description;
+ if (ref($colsref) eq 'ARRAY') {
+ my @columns = @{$colsref};
+ foreach my $coldata (@columns) {
+ push(@standard_fields,$coldata->{'name'});
+ }
+ }
+ my $metastring=&getfile($fullpath);
+ if (! defined($metastring)) {
+ $metacache{'keys'}= 'owner,domain,mime';
+ $metacache{'owner'} = $uname.':'.$dom;
+ $metacache{'domain'} = $dom;
+ $metacache{'mime'} = $mime;
+ if ($group ne '') {
+ $metacache{'keys'} .= ',courserestricted';
+ $metacache{'courserestricted'} = 'course.'.$dom.'_'.$uname;
+ }
+ } else {
my $parser=HTML::TokeParser->new(\$metastring);
my $token;
while ($token=$parser->get_token) {
if ($token->[0] eq 'S') {
my $entry=$token->[1];
- my $unikey=$entry;
- if (defined($token->[2]->{'part'})) {
- $unikey.='_'.$token->[2]->{'part'};
- }
- if (defined($token->[2]->{'name'})) {
- $unikey.='_'.$token->[2]->{'name'};
- }
- if ($metacache{$uri.'keys'}) {
- $metacache{$uri.'keys'}.=','.$unikey;
+ if ($metacache{'keys'}) {
+ $metacache{'keys'}.=','.$entry;
} else {
- $metacache{$uri.'keys'}=$unikey;
+ $metacache{'keys'}=$entry;
}
- map {
- $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
- } @{$token->[3]};
- unless (
- $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
- ) { $metacache{$uri.''.$unikey}=
- $metacache{$uri.''.$unikey.'.default'};
+ my $value = $parser->get_text('/'.$entry);
+ if (!grep(/^\Q$entry\E$/,@standard_fields)) {
+ my $clean_value = lc($value);
+ $clean_value =~ s/\s/_/g;
+ if ($clean_value ne $entry) {
+ if (defined($addedfields{$entry})) {
+ $addedfields{$entry} .=','.$value;
+ } else {
+ $addedfields{$entry} = $value;
}
+ }
+ } else {
+ $metacache{$entry} = $value;
+ }
}
+ } # End of ($token->[0] eq 'S')
+ }
+ if (keys(%addedfields) > 0) {
+ foreach my $key (sort keys(%addedfields)) {
+ $metacache{'addedfieldnames'} .= $key.',';
+ $metacache{'addedfieldvalues'} .= $addedfields{$key}.'&&&';
}
+ $metacache{'addedfieldnames'} =~ s/,$//;
+ $metacache{'addedfieldvalues'} =~ s/\&\&\&$//;
+ if ($metacache{'keys'}) {
+ $metacache{'keys'}.=',addedfieldnames';
+ } else {
+ $metacache{'keys'}='addedfieldnames';
+ }
+ $metacache{'keys'}.=',addedfieldvalues';
}
- return \%metacache;
+ return (\%metacache,$metacache{'courserestricted'},\%addedfields);
}
-# ------------------------------------------------------------ Serves up a file
-# returns either the contents of the file or a -1
+##
+## &getfile($filename)
+## Slurps up an entire file into a scalar.
+## Returns undef if the file does not exist
sub getfile {
- my $file=shift;
- if (! -e $file ) { return -1; };
+ my $file = shift();
+ if (! -e $file ) {
+ return undef;
+ }
my $fh=IO::File->new($file);
- my $a='';
- while (<$fh>) { $a .=$_; }
- return $a;
+ my $contents = '';
+ while (<$fh>) {
+ $contents .= $_;
+ }
+ return $contents;
}
-# ------------------------------------------------------------- Declutters URLs
-sub declutter {
- my $thisfn=shift;
- $thisfn=~s/^$perlvar{'lonDocRoot'}//;
- $thisfn=~s/^\///;
- $thisfn=~s/^res\///;
- return $thisfn;
+##
+## &getfiledates()
+## Converts creationdate and modifieddates to SQL format
+## Applies stat() to file to retrieve dates if missing
+sub getfiledates {
+ my ($ref,$target) = @_;
+ if (! defined($ref->{'creationdate'}) ||
+ $ref->{'creationdate'} =~ /^\s*$/) {
+ $ref->{'creationdate'} = (stat($target))[9];
+ }
+ if (! defined($ref->{'lastrevisiondate'}) ||
+ $ref->{'lastrevisiondate'} =~ /^\s*$/) {
+ $ref->{'lastrevisiondate'} = (stat($target))[9];
+ }
+ $ref->{'creationdate'} = &sqltime($ref->{'creationdate'});
+ $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'});
+}
+
+########################################################
+########################################################
+### ###
+### Dynamic Metadata ###
+### ###
+########################################################
+########################################################
+##
+## Dynamic metadata description (incomplete)
+##
+## For a full description of all fields,
+## see LONCAPA::lonmetadata
+##
+## Field Type
+##-----------------------------------------------------------
+## count integer
+## course integer
+## course_list comma separated list of course ids
+## avetries real
+## avetries_list comma separated list of real numbers
+## stdno real
+## stdno_list comma separated list of real numbers
+## usage integer
+## usage_list comma separated list of resources
+## goto scalar
+## goto_list comma separated list of resources
+## comefrom scalar
+## comefrom_list comma separated list of resources
+## difficulty real
+## difficulty_list comma separated list of real numbers
+## sequsage scalar
+## sequsage_list comma separated list of resources
+## clear real
+## technical real
+## correct real
+## helpful real
+## depth real
+## comments html of all the comments made
+##
+{
+
+my %DynamicData;
+my %Counts;
+
+sub process_dynamic_metadata {
+ my ($user,$dom) = @_;
+ undef(%DynamicData);
+ undef(%Counts);
+ #
+ my $prodir = &propath($dom,$user);
+ #
+ # Read in the dynamic metadata
+ my %evaldata;
+ if (! tie(%evaldata,'GDBM_File',
+ $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
+ return 0;
+ }
+ #
+ %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata);
+ untie(%evaldata);
+ $DynamicData{'domain'} = $dom;
+ #print('user = '.$user.' domain = '.$dom.$/);
+ #
+ # Read in the access count data
+ &log(7,'Reading access count data') if ($debug);
+ my %countdata;
+ if (! tie(%countdata,'GDBM_File',
+ $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
+ return 0;
+ }
+ while (my ($key,$count) = each(%countdata)) {
+ next if ($key !~ /^$dom/);
+ $key = &unescape($key);
+ &log(8,' Count '.$key.' = '.$count) if ($debug);
+ $Counts{$key}=$count;
+ }
+ untie(%countdata);
+ if ($debug) {
+ &log(7,scalar(keys(%Counts)).
+ " Counts read for ".$user."@".$dom);
+ &log(7,scalar(keys(%DynamicData)).
+ " Dynamic metadata read for ".$user."@".$dom);
+ }
+ #
+ return 1;
}
-# --------------------------------------- Is this the home server of an author?
-# (copied from lond, modification of the return value)
+sub get_dynamic_metadata {
+ my ($url) = @_;
+ $url =~ s:^/res/::;
+ my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url,
+ \%DynamicData);
+ # find the count
+ $data{'count'} = $Counts{$url};
+ #
+ # Log the dynamic metadata
+ if ($debug) {
+ while (my($k,$v)=each(%data)) {
+ &log(8," ".$k." => ".$v);
+ }
+ }
+ return %data;
+}
+
+} # End of %DynamicData and %Counts scope
+
+########################################################
+########################################################
+### ###
+### Counts ###
+### ###
+########################################################
+########################################################
+{
+
+my %countext;
+
+sub count_type {
+ my $file=shift;
+ $file=~/\.(\w+)$/;
+ my $ext=lc($1);
+ $countext{$ext}++;
+}
+
+sub write_type_count {
+ open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt');
+ while (my ($extension,$count) = each(%countext)) {
+ print RESCOUNT $extension.'='.$count.'&';
+ }
+ print RESCOUNT 'time='.time."\n";
+ close(RESCOUNT);
+}
+
+} # end of scope for %countext
+
+{
+
+my %copyrights;
+
+sub count_copyright {
+ $copyrights{@_[0]}++;
+}
+
+sub write_copyright_count {
+ open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt');
+ while (my ($copyright,$count) = each(%copyrights)) {
+ print COPYCOUNT $copyright.'='.$count.'&';
+ }
+ print COPYCOUNT 'time='.time."\n";
+ close(COPYCOUNT);
+}
+
+} # end of scope for %copyrights
+
+########################################################
+########################################################
+### ###
+### Miscellanous Utility Routines ###
+### ###
+########################################################
+########################################################
+##
+## &ishome($username)
+## Returns 1 if $username is a LON-CAPA author, 0 otherwise
+## (copied from lond, modification of the return value)
sub ishome {
my $author=shift;
$author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
@@ -520,54 +874,76 @@ sub ishome {
}
}
-# -------------------------------------------- Return path to profile directory
-# (copied from lond)
+##
+## &propath($udom,$uname)
+## Returns the path to the users LON-CAPA directory
+## (copied from lond)
sub propath {
my ($udom,$uname)=@_;
$udom=~s/\W//g;
$uname=~s/\W//g;
my $subdir=$uname.'__';
$subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
- my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
+ my $proname="$Apache::lonnet::perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
return $proname;
}
-# ---------------------------- convert 'time' format into a datetime sql format
+##
+## &sqltime($timestamp)
+##
+## Convert perl $timestamp to MySQL time. MySQL expects YYYY-MM-DD HH:MM:SS
+##
sub sqltime {
- my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
- localtime(&unsqltime(@_[0]));
- $mon++; $year+=1900;
- return "$year-$mon-$mday $hour:$min:$sec";
+ my ($time) = @_;
+ my $mysqltime;
+ if ($time =~
+ /(\d+)-(\d+)-(\d+) # YYYY-MM-DD
+ \s # a space
+ (\d+):(\d+):(\d+) # HH:MM::SS
+ /x ) {
+ # Some of the .meta files have the time in mysql
+ # format already, so just make sure they are 0 padded and
+ # pass them back.
+ $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
+ $1,$2,$3,$4,$5,$6);
+ } elsif ($time =~ /^\d+$/) {
+ my @TimeData = gmtime($time);
+ # Alter the month to be 1-12 instead of 0-11
+ $TimeData[4]++;
+ # Alter the year to be from 0 instead of from 1900
+ $TimeData[5]+=1900;
+ $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
+ @TimeData[5,4,3,2,1,0]);
+ } elsif (! defined($time) || $time == 0) {
+ $mysqltime = 0;
+ } else {
+ &log(0," sqltime:Unable to decode time ".$time);
+ $mysqltime = 0;
+ }
+ return $mysqltime;
}
-sub maketime {
- my %th=@_;
- return POSIX::mktime(($th{'seconds'},$th{'minutes'},$th{'hours'},
- $th{'day'},$th{'month'}-1,
- $th{'year'}-1900,0,0,$th{'dlsav'}));
+##
+## &declutter($filename)
+## Given a filename, returns a url for the filename.
+sub declutter {
+ my $thisfn=shift;
+ $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//;
+ $thisfn=~s/^\///;
+ $thisfn=~s/^res\///;
+ return $thisfn;
}
-
-#########################################
-#
-# Retro-fixing of un-backward-compatible time format
-
-sub unsqltime {
- my $timestamp=shift;
- if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) {
- $timestamp=&maketime('year'=>$1,'month'=>$2,'day'=>$3,
- 'hours'=>$4,'minutes'=>$5,'seconds'=>$6);
- }
- return $timestamp;
+##
+## Escape / Unescape special characters
+sub unescape {
+ my $str=shift;
+ $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
+ return $str;
}
-# ----------------- Code to enable 'find' subroutine listing of the .meta files
-
-no strict "vars";
-
-sub wanted {
- (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
- -f _ &&
- /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
- push(@metalist,"$dir/$_");
+sub escape {
+ my $str=shift;
+ $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
+ return $str;
}