--- loncom/metadata_database/searchcat.pl 2003/10/07 17:51:38 1.41
+++ loncom/metadata_database/searchcat.pl 2003/12/24 20:41:32 1.46
@@ -2,7 +2,7 @@
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
#
-# $Id: searchcat.pl,v 1.41 2003/10/07 17:51:38 www Exp $
+# $Id: searchcat.pl,v 1.46 2003/12/24 20:41:32 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -65,6 +65,8 @@ and correct user experience.
=cut
+use strict;
+
use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;
@@ -74,11 +76,14 @@ use DBI;
use GDBM_File;
use POSIX qw(strftime mktime);
+require "find.pl";
+
my @metalist;
-$simplestatus='';
+my $simplestatus='';
my %countext=();
+# ----------------------------------------------------- write out simple status
sub writesimple {
open(SMP,'>/home/httpd/html/lon-status/mysql.txt');
print SMP $simplestatus."\n";
@@ -94,6 +99,7 @@ sub writecount {
close(RSMP);
}
+# -------------------------------------- counts files with different extensions
sub count {
my $file=shift;
$file=~/\.(\w+)$/;
@@ -120,97 +126,91 @@ sub escape {
return $str;
}
-
# ------------------------------------------- Code to evaluate dynamic metadata
sub dynamicmeta {
-
my $url=&declutter(shift);
$url=~s/\.meta$//;
my %returnhash=();
my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
my $prodir=&propath($adomain,$aauthor);
- if ((tie(%evaldata,'GDBM_File',
- $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
- (tie(%newevaldata,'GDBM_File',
- $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
- my %sum=();
- my %cnt=();
- my %listitems=('count' => 'add',
- 'course' => 'add',
- 'goto' => 'add',
- 'comefrom' => 'add',
- 'avetries' => 'avg',
- 'stdno' => 'add',
- 'difficulty' => 'avg',
- 'clear' => 'avg',
- 'technical' => 'avg',
- 'helpful' => 'avg',
- 'correct' => 'avg',
- 'depth' => 'avg',
- 'comments' => 'app',
- 'usage' => 'cnt'
- );
- my $regexp=$url;
- $regexp=~s/(\W)/\\$1/g;
- $regexp='___'.$regexp.'___([a-z]+)$';
- foreach (keys %evaldata) {
- my $key=&unescape($_);
- if ($key=~/$regexp/) {
- my $ctype=$1;
- if (defined($cnt{$ctype})) {
- $cnt{$ctype}++;
- } else {
- $cnt{$ctype}=1;
- }
- unless ($listitems{$ctype} eq 'app') {
- if (defined($sum{$ctype})) {
- $sum{$ctype}+=$evaldata{$_};
- } else {
- $sum{$ctype}=$evaldata{$_};
- }
- } else {
- if (defined($sum{$ctype})) {
- if ($evaldata{$_}) {
- $sum{$ctype}.='
'.$evaldata{$_};
- }
- } else {
- $sum{$ctype}=''.$evaldata{$_};
- }
+
+# Get metadata except counts
+ if (tie(my %evaldata,'GDBM_File',
+ $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
+ my %sum=();
+ my %cnt=();
+ my %concat=();
+ my %listitems=(
+ 'course' => 'add',
+ 'goto' => 'add',
+ 'comefrom' => 'add',
+ 'avetries' => 'avg',
+ 'stdno' => 'add',
+ 'difficulty' => 'avg',
+ 'clear' => 'avg',
+ 'technical' => 'avg',
+ 'helpful' => 'avg',
+ 'correct' => 'avg',
+ 'depth' => 'avg',
+ 'comments' => 'app',
+ 'usage' => 'cnt'
+ );
+
+ my $regexp=$url;
+ $regexp=~s/(\W)/\\$1/g;
+ $regexp='___'.$regexp.'___([a-z]+)$';
+ while (my ($esckey,$value)=each %evaldata) {
+ my $key=&unescape($esckey);
+ if ($key=~/$regexp/) {
+ my ($item,$purl,$cat)=split(/___/,$key);
+ if (defined($cnt{$cat})) { $cnt{$cat}++; } else { $cnt{$cat}=1; }
+ unless ($listitems{$cat} eq 'app') {
+ if (defined($sum{$cat})) {
+ $sum{$cat}+=$evaldata{$esckey};
+ $concat{$cat}.=','.$item;
+ } else {
+ $sum{$cat}=$evaldata{$esckey};
+ $concat{$cat}=$item;
+ }
+ } else {
+ if (defined($sum{$cat})) {
+ if ($evaldata{$esckey}=~/\w/) {
+ $sum{$cat}.='
'.$evaldata{$esckey};
+ }
+ } else {
+ $sum{$cat}=''.$evaldata{$esckey};
+ }
+ }
+ }
+ }
+ untie(%evaldata);
+# transfer gathered data to returnhash, calculate averages where applicable
+ while (my $cat=each(%cnt)) {
+ if ($listitems{$cat} eq 'avg') {
+ $returnhash{$cat}=int(($sum{$cat}/$cnt{$cat})*100.0+0.5)/100.0;
+ } elsif ($listitems{$cat} eq 'cnt') {
+ $returnhash{$cat}=$cnt{$cat};
+ } else {
+ $returnhash{$cat}=$sum{$cat};
}
- if ($ctype ne 'count') {
- $newevaldata{$_}=$evaldata{$_};
- }
- }
- }
- foreach (keys %cnt) {
- if ($listitems{$_} eq 'avg') {
- $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
- } elsif ($listitems{$_} eq 'cnt') {
- $returnhash{$_}=$cnt{$_};
- } else {
- $returnhash{$_}=$sum{$_};
- }
- }
- if ($returnhash{'count'}) {
- my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
- $newevaldata{$newkey}=$returnhash{'count'};
- }
- untie(%evaldata);
- untie(%newevaldata);
- }
- return %returnhash;
+ $returnhash{$cat.'_list'}=$concat{$cat};
+ }
+ }
+# get count
+ if (tie(my %evaldata,'GDBM_File',
+ $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
+ my $escurl=&escape($url);
+ if (! exists($evaldata{$escurl})) {
+ $returnhash{'count'}=0;
+ } else {
+ $returnhash{'count'}=$evaldata{$escurl};
+ }
+ untie %evaldata;
+ }
+ return %returnhash;
}
-# ----------------- Code to enable 'find' subroutine listing of the .meta files
-require "find.pl";
-sub wanted {
- (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
- -f _ &&
- /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
- push(@metalist,"$dir/$_");
-}
-
# --------------- Read loncapa_apache.conf and loncapa.conf and get variables
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
my %perlvar=%{$perlvarref};
@@ -224,8 +224,8 @@ exit unless $perlvar{'lonRole'} eq 'libr
my $wwwid=getpwnam('www');
if ($wwwid!=$<) {
- $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
- $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+ my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+ my $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
mailto $emailto -s '$subj' > /dev/null");
exit 1;
@@ -249,6 +249,7 @@ my $dbh;
exit;
}
+# Create table for static metadata, unless exists
my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
"title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
"version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
@@ -262,6 +263,21 @@ my $dbh;
"FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";
# It would sure be nice to have some logging mechanism.
$dbh->do($make_metadata_table);
+
+# Create table for dynamic metadata, unless exists
+ my $make_dynmetadata_table = "CREATE TABLE IF NOT EXISTS dynmetadata (".
+ "url TEXT, count INTEGER UNSIGNED, ".
+ "course INTEGER UNSIGNED, course_list TEXT, ".
+ "goto INTEGER UNSIGNED, goto_list TEXT, ".
+ "comefrom INTEGER UNSIGNED, comefrom_list TEXT, ".
+ "usage INTEGER UNSIGNED, usage_list TEXT, ".
+ "stdno INTEGER UNSIGNED, stdno_list TEXT, ".
+ "avetries FLOAT, avetries_list TEXT, ".
+ "difficulty FLOAT, difficulty_list TEXT ".
+ "TYPE=MYISAM";
+ # It would sure be nice to have some logging mechanism.
+#### $dbh->do($make_dynmetadata_table);
+
}
# ------------------------------------------------------------- get .meta files
@@ -296,9 +312,8 @@ my $insert_sth = $dbh->prepare
foreach my $user (@homeusers) {
print LOG "\n=== User: ".$user."\n\n";
- # Remove left-over db-files from potentially crashed searchcat run
+
my $prodir=&propath($perlvar{'lonDefDomain'},$user);
- unlink($prodir.'/nohist_new_resevaldata.db');
# Use find.pl
undef @metalist;
@metalist=();
@@ -311,9 +326,9 @@ foreach my $user (@homeusers) {
my $ref=&metadata($m);
my $m2='/res/'.&declutter($m);
$m2=~s/\.meta$//;
- &dynamicmeta($m2);
if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; }
if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; }
+ &dynamicmeta($m2);
&count($m2);
$delete_sth->execute($m2);
$insert_sth->execute($ref->{'title'},
@@ -340,12 +355,6 @@ foreach my $user (@homeusers) {
# Need to, perhaps, remove stale SQL database records.
# ... not yet implemented
- # ------------------------------------------- Copy over the new db-files
- #
-
- system('mv '.$prodir.'/nohist_new_resevaldata.db '.
- $prodir.'/nohist_resevaldata.db');
-
}
# --------------------------------------------------- Close database connection
$dbh->disconnect;
@@ -477,3 +486,13 @@ sub unsqltime {
return $timestamp;
}
+# ----------------- Code to enable 'find' subroutine listing of the .meta files
+
+no strict "vars";
+
+sub wanted {
+ (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
+ -f _ &&
+ /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
+ push(@metalist,"$dir/$_");
+}