Annotation of loncom/metadata_database/searchcat.pl, revision 1.31
1.1 harris41 1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
1.16 harris41 4: #
1.31 ! harris41 5: # $Id: searchcat.pl,v 1.27 2003/01/04 19:23:31 www Exp $
1.16 harris41 6: #
7: # Copyright Michigan State University Board of Trustees
8: #
1.29 albertel 9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
1.16 harris41 10: #
1.29 albertel 11: # LON-CAPA is free software; you can redistribute it and/or modify
1.16 harris41 12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
1.29 albertel 16: # LON-CAPA is distributed in the hope that it will be useful,
1.16 harris41 17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
1.29 albertel 22: # along with LON-CAPA; if not, write to the Free Software
1.16 harris41 23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
1.29 albertel 27: # http://www.lon-capa.org/
1.16 harris41 28: #
29: ###
1.1 harris41 30:
1.31 ! harris41 31: # This script goes through a LON-CAPA resource
! 32: # directory and gathers metadata.
! 33: # The metadata is entered into a SQL database.
1.1 harris41 34:
1.17 harris41 35: use lib '/home/httpd/lib/perl/';
36: use LONCAPA::Configuration;
37:
1.1 harris41 38: use IO::File;
39: use HTML::TokeParser;
1.6 harris41 40: use DBI;
1.21 www 41: use GDBM_File;
1.24 www 42: use POSIX qw(strftime mktime);
1.1 harris41 43:
44: my @metalist;
1.21 www 45:
1.28 harris41 46:
1.31 ! harris41 47: # ----------------------------------------------------- Un-Escape Special Chars
1.28 harris41 48:
1.31 ! harris41 49: sub unescape {
! 50: my $str=shift;
1.21 www 51: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
1.31 ! harris41 52: return $str;
! 53: }
1.21 www 54:
1.31 ! harris41 55: # -------------------------------------------------------- Escape Special Chars
1.22 www 56:
1.31 ! harris41 57: sub escape {
! 58: my $str=shift;
1.22 www 59: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
1.31 ! harris41 60: return $str;
! 61: }
1.28 harris41 62:
63:
1.31 ! harris41 64: # ------------------------------------------- Code to evaluate dynamic metadata
1.28 harris41 65:
1.31 ! harris41 66: sub dynamicmeta {
1.28 harris41 67:
1.30 www 68: my $url=&declutter(shift);
69: $url=~s/\.meta$//;
70: my %returnhash=();
71: my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
1.31 ! harris41 72: my $prodir=&propath($adomain,$aauthor);
1.25 www 73: if ((tie(%evaldata,'GDBM_File',
1.31 ! harris41 74: $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
1.25 www 75: (tie(%newevaldata,'GDBM_File',
1.31 ! harris41 76: $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
! 77: my %sum=();
! 78: my %cnt=();
! 79: my %listitems=('count' => 'add',
! 80: 'course' => 'add',
! 81: 'avetries' => 'avg',
! 82: 'stdno' => 'add',
! 83: 'difficulty' => 'avg',
! 84: 'clear' => 'avg',
! 85: 'technical' => 'avg',
! 86: 'helpful' => 'avg',
! 87: 'correct' => 'avg',
! 88: 'depth' => 'avg',
! 89: 'comments' => 'app',
! 90: 'usage' => 'cnt'
! 91: );
! 92: my $regexp=$url;
! 93: $regexp=~s/(\W)/\\$1/g;
! 94: $regexp='___'.$regexp.'___([a-z]+)$';
! 95: foreach (keys %evaldata) {
! 96: my $key=&unescape($_);
! 97: if ($key=~/$regexp/) {
! 98: my $ctype=$1;
! 99: if (defined($cnt{$ctype})) {
! 100: $cnt{$ctype}++;
! 101: } else {
! 102: $cnt{$ctype}=1;
! 103: }
! 104: unless ($listitems{$ctype} eq 'app') {
! 105: if (defined($sum{$ctype})) {
! 106: $sum{$ctype}+=$evaldata{$_};
! 107: } else {
! 108: $sum{$ctype}=$evaldata{$_};
! 109: }
! 110: } else {
! 111: if (defined($sum{$ctype})) {
! 112: if ($evaldata{$_}) {
! 113: $sum{$ctype}.='<hr>'.$evaldata{$_};
! 114: }
! 115: } else {
! 116: $sum{$ctype}=''.$evaldata{$_};
! 117: }
1.30 www 118: }
1.31 ! harris41 119: if ($ctype ne 'count') {
! 120: $newevaldata{$_}=$evaldata{$_};
! 121: }
! 122: }
! 123: }
! 124: foreach (keys %cnt) {
! 125: if ($listitems{$_} eq 'avg') {
! 126: $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
! 127: } elsif ($listitems{$_} eq 'cnt') {
! 128: $returnhash{$_}=$cnt{$_};
! 129: } else {
! 130: $returnhash{$_}=$sum{$_};
! 131: }
! 132: }
! 133: if ($returnhash{'count'}) {
! 134: my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
! 135: $newevaldata{$newkey}=$returnhash{'count'};
! 136: }
! 137: untie(%evaldata);
! 138: untie(%newevaldata);
! 139: }
! 140: return %returnhash;
1.30 www 141: }
1.31 ! harris41 142:
! 143: # ----------------- Code to enable 'find' subroutine listing of the .meta files
! 144: require "find.pl";
! 145: sub wanted {
1.1 harris41 146: (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
1.31 ! harris41 147: -f _ &&
1.10 harris41 148: /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
1.31 ! harris41 149: push(@metalist,"$dir/$_");
! 150: }
1.28 harris41 151:
1.31 ! harris41 152: # --------------- Read loncapa_apache.conf and loncapa.conf and get variables
! 153: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
! 154: my %perlvar=%{$perlvarref};
! 155: undef $perlvarref; # remove since sensitive and not needed
! 156: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
1.28 harris41 157:
1.31 ! harris41 158: # ------------------------------------- Only run if machine is a library server
! 159: exit unless $perlvar{'lonRole'} eq 'library';
1.1 harris41 160:
1.31 ! harris41 161: # ----------------------------- Make sure this process is running from user=www
1.15 harris41 162:
1.31 ! harris41 163: my $wwwid=getpwnam('www');
! 164: if ($wwwid!=$<) {
! 165: $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
! 166: $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
! 167: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
! 168: mailto $emailto -s '$subj' > /dev/null");
! 169: exit 1;
! 170: }
1.1 harris41 171:
1.27 www 172:
1.31 ! harris41 173: # ---------------------------------------------------------- We are in business
1.27 www 174:
1.31 ! harris41 175: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
! 176: print LOG '==== Searchcat Run '.localtime()."====\n\n";
! 177: my $dbh;
! 178: # ------------------------------------- Make sure that database can be accessed
! 179: {
! 180: unless (
! 181: $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
! 182: ) {
! 183: print LOG "Cannot connect to database!\n";
! 184: exit;
! 185: }
! 186: my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
! 187: "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
! 188: "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
! 189: "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ".
! 190: "copyright TEXT, FULLTEXT idx_title (title), ".
! 191: "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ".
! 192: "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ".
! 193: "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ".
! 194: "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ".
! 195: "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ".
! 196: "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";
! 197: # It would sure be nice to have some logging mechanism.
! 198: $dbh->do($make_metadata_table);
! 199: }
1.27 www 200:
1.31 ! harris41 201: # ------------------------------------------------------------- get .meta files
! 202: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
! 203: my @homeusers=grep
! 204: {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}
! 205: grep {!/^\.\.?$/} readdir(RESOURCES);
! 206: closedir RESOURCES;
! 207: foreach my $user (@homeusers) {
! 208: print LOG "\n=== User: ".$user."\n\n";
! 209: # Remove left-over db-files from potentially crashed searchcat run
! 210: my $prodir=&propath($perlvar{'lonDefDomain'},$user);
! 211: unlink($prodir.'/nohist_new_resevaldata.db');
! 212: # Use find.pl
! 213: undef @metalist;
! 214: @metalist=();
! 215: &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
! 216:
! 217: # -- process each file to get metadata and put into search catalog SQL database
! 218: # Also, check to see if already there.
! 219: # I could just delete (without searching first), but this works for now.
! 220: foreach my $m (@metalist) {
! 221: print LOG "- ".$m."\n";
! 222: my $ref=&metadata($m);
! 223: my $m2='/res/'.&declutter($m);
! 224: $m2=~s/\.meta$//;
! 225: &dynamicmeta($m2);
! 226: my $q2="select * from metadata where url like binary '$m2'";
! 227: my $sth = $dbh->prepare($q2);
! 228: $sth->execute();
! 229: my $r1=$sth->fetchall_arrayref;
! 230: if (@$r1) {
! 231: $sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
! 232: $sth->execute();
! 233: }
! 234: $sth=$dbh->prepare('insert into metadata values ('.
! 235: '"'.delete($ref->{'title'}).'"'.','.
! 236: '"'.delete($ref->{'author'}).'"'.','.
! 237: '"'.delete($ref->{'subject'}).'"'.','.
! 238: '"'.$m2.'"'.','.
! 239: '"'.delete($ref->{'keywords'}).'"'.','.
! 240: '"'.'current'.'"'.','.
! 241: '"'.delete($ref->{'notes'}).'"'.','.
! 242: '"'.delete($ref->{'abstract'}).'"'.','.
! 243: '"'.delete($ref->{'mime'}).'"'.','.
! 244: '"'.delete($ref->{'language'}).'"'.','.
! 245: '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
! 246: '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
! 247: '"'.delete($ref->{'owner'}).'"'.','.
! 248: '"'.delete($ref->{'copyright'}).'"'.')');
! 249: $sth->execute();
! 250: }
1.25 www 251:
1.31 ! harris41 252: # ----------------------------------------------------------- Clean up database
! 253: # Need to, perhaps, remove stale SQL database records.
! 254: # ... not yet implemented
1.1 harris41 255:
256:
1.31 ! harris41 257: # -------------------------------------------------- Copy over the new db-files
! 258: system('mv '.$prodir.'/nohist_new_resevaldata.db '.
! 259: $prodir.'/nohist_resevaldata.db');
! 260: }
! 261: # --------------------------------------------------- Close database connection
! 262: $dbh->disconnect;
! 263: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
! 264: close(LOG);
! 265: exit 0;
! 266: # =============================================================================
1.1 harris41 267:
1.31 ! harris41 268: # ---------------------------------------------------------------- Get metadata
! 269: # significantly altered from subroutine present in lonnet
! 270: sub metadata {
! 271: my ($uri,$what)=@_;
! 272: my %metacache;
! 273: $uri=&declutter($uri);
! 274: my $filename=$uri;
! 275: $uri=~s/\.meta$//;
! 276: $uri='';
! 277: unless ($metacache{$uri.'keys'}) {
! 278: unless ($filename=~/\.meta$/) { $filename.='.meta'; }
! 279: my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
! 280: my $parser=HTML::TokeParser->new(\$metastring);
! 281: my $token;
! 282: while ($token=$parser->get_token) {
! 283: if ($token->[0] eq 'S') {
! 284: my $entry=$token->[1];
! 285: my $unikey=$entry;
! 286: if (defined($token->[2]->{'part'})) {
! 287: $unikey.='_'.$token->[2]->{'part'};
1.28 harris41 288: }
1.31 ! harris41 289: if (defined($token->[2]->{'name'})) {
! 290: $unikey.='_'.$token->[2]->{'name'};
1.28 harris41 291: }
1.31 ! harris41 292: if ($metacache{$uri.'keys'}) {
! 293: $metacache{$uri.'keys'}.=','.$unikey;
! 294: } else {
! 295: $metacache{$uri.'keys'}=$unikey;
1.1 harris41 296: }
1.31 ! harris41 297: map {
! 298: $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
! 299: } @{$token->[3]};
! 300: unless (
! 301: $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
! 302: ) { $metacache{$uri.''.$unikey}=
! 303: $metacache{$uri.''.$unikey.'.default'};
! 304: }
! 305: }
! 306: }
! 307: }
! 308: return \%metacache;
! 309: }
1.28 harris41 310:
1.31 ! harris41 311: # ------------------------------------------------------------ Serves up a file
! 312: # returns either the contents of the file or a -1
! 313: sub getfile {
! 314: my $file=shift;
! 315: if (! -e $file ) { return -1; };
! 316: my $fh=IO::File->new($file);
! 317: my $a='';
! 318: while (<$fh>) { $a .=$_; }
! 319: return $a
! 320: }
1.28 harris41 321:
1.31 ! harris41 322: # ------------------------------------------------------------- Declutters URLs
! 323: sub declutter {
! 324: my $thisfn=shift;
! 325: $thisfn=~s/^$perlvar{'lonDocRoot'}//;
! 326: $thisfn=~s/^\///;
! 327: $thisfn=~s/^res\///;
! 328: return $thisfn;
! 329: }
1.28 harris41 330:
1.31 ! harris41 331: # --------------------------------------- Is this the home server of an author?
! 332: # (copied from lond, modification of the return value)
! 333: sub ishome {
! 334: my $author=shift;
! 335: $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
! 336: my ($udom,$uname)=split(/\//,$author);
! 337: my $proname=propath($udom,$uname);
! 338: if (-e $proname) {
! 339: return 1;
! 340: } else {
! 341: return 0;
! 342: }
! 343: }
1.28 harris41 344:
1.31 ! harris41 345: # -------------------------------------------- Return path to profile directory
! 346: # (copied from lond)
! 347: sub propath {
! 348: my ($udom,$uname)=@_;
! 349: $udom=~s/\W//g;
! 350: $uname=~s/\W//g;
! 351: my $subdir=$uname.'__';
! 352: $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
! 353: my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
! 354: return $proname;
! 355: }
1.28 harris41 356:
1.31 ! harris41 357: # ---------------------------- convert 'time' format into a datetime sql format
! 358: sub sqltime {
1.13 harris41 359: my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
1.31 ! harris41 360: localtime(&unsqltime(@_[0]));
! 361: $mon++; $year+=1900;
! 362: return "$year-$mon-$mday $hour:$min:$sec";
! 363: }
1.28 harris41 364:
1.31 ! harris41 365: sub maketime {
! 366: my %th=@_;
! 367: return POSIX::mktime(
! 368: ($th{'seconds'},$th{'minutes'},$th{'hours'},
! 369: $th{'day'},$th{'month'}-1,$th{'year'}-1900,0,0,$th{'dlsav'}));
! 370: }
1.28 harris41 371:
372:
1.31 ! harris41 373: #########################################
! 374: #
! 375: # Retro-fixing of un-backward-compatible time format
1.28 harris41 376:
1.31 ! harris41 377: sub unsqltime {
! 378: my $timestamp=shift;
! 379: if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) {
! 380: $timestamp=&maketime(
! 381: 'year'=>$1,'month'=>$2,'day'=>$3,
! 382: 'hours'=>$4,'minutes'=>$5,'seconds'=>$6);
! 383: }
! 384: return $timestamp;
! 385: }
1.28 harris41 386:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>