Annotation of loncom/metadata_database/searchcat.pl, revision 1.85
1.1 harris41 1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
1.16 harris41 4: #
1.85 ! raeburn 5: # $Id: searchcat.pl,v 1.84 2016/01/31 21:25:49 raeburn Exp $
1.16 harris41 6: #
7: # Copyright Michigan State University Board of Trustees
8: #
1.29 albertel 9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
1.16 harris41 10: #
1.29 albertel 11: # LON-CAPA is free software; you can redistribute it and/or modify
1.16 harris41 12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
1.29 albertel 16: # LON-CAPA is distributed in the hope that it will be useful,
1.16 harris41 17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
1.29 albertel 22: # along with LON-CAPA; if not, write to the Free Software
1.16 harris41 23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
1.29 albertel 27: # http://www.lon-capa.org/
1.16 harris41 28: #
29: ###
1.33 matthew 30:
1.32 www 31: =pod
1.1 harris41 32:
1.32 www 33: =head1 NAME
34:
35: B<searchcat.pl> - put authoritative filesystem data into sql database.
36:
37: =head1 SYNOPSIS
38:
39: Ordinarily this script is to be called from a loncapa cron job
40: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
41: filesystem installation location: F</etc/cron.d/loncapa>).
42:
43: Here is the cron job entry.
44:
45: C<# Repopulate and refresh the metadata database used for the search catalog.>
46: C<10 1 * * 7 www /home/httpd/perl/searchcat.pl>
47:
48: This script only allows itself to be run as the user C<www>.
49:
50: =head1 DESCRIPTION
51:
52: This script goes through a loncapa resource directory and gathers metadata.
53: The metadata is entered into a SQL database.
54:
55: This script also does general database maintenance such as reformatting
56: the C<loncapa:metadata> table if it is deprecated.
57:
58: This script evaluates dynamic metadata from the authors'
1.48 www 59: F<nohist_resevaldata.db> database file in order to store it in MySQL.
1.32 www 60:
61: This script is playing an increasingly important role for a loncapa
62: library server. The proper operation of this script is critical for a smooth
63: and correct user experience.
64:
65: =cut
1.1 harris41 66:
1.45 www 67: use strict;
1.55 matthew 68: use DBI;
1.17 harris41 69: use lib '/home/httpd/lib/perl/';
1.55 matthew 70: use LONCAPA::lonmetadata;
1.76 albertel 71: use LONCAPA;
1.56 matthew 72: use Getopt::Long;
1.1 harris41 73: use IO::File;
74: use HTML::TokeParser;
1.21 www 75: use GDBM_File;
1.24 www 76: use POSIX qw(strftime mktime);
1.80 raeburn 77: use Mail::Send;
1.81 bisitz 78: use Apache::loncommon();
1.56 matthew 79:
1.63 matthew 80: use Apache::lonnet();
1.62 matthew 81:
1.55 matthew 82: use File::Find;
1.1 harris41 83:
1.56 matthew 84: #
85: # Set up configuration options
1.63 matthew 86: my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
1.56 matthew 87: GetOptions (
88: 'help' => \$help,
89: 'simulate' => \$simulate,
90: 'only=s' => \$oneuser,
91: 'verbose=s' => \$verbose,
92: 'debug' => \$debug,
93: );
94:
95: if ($help) {
96: print <<"ENDHELP";
97: $0
98: Rebuild and update the LON-CAPA metadata database.
99: Options:
100: -help Print this help
101: -simulate Do not modify the database.
102: -only=user Only compute for the given user. Implies -simulate
103: -verbose=val Sets logging level, val must be a number
104: -debug Turns on debugging output
105: ENDHELP
106: exit 0;
107: }
108:
109: if (! defined($debug)) {
110: $debug = 0;
111: }
112:
113: if (! defined($verbose)) {
114: $verbose = 0;
115: }
116:
117: if (defined($oneuser)) {
118: $simulate=1;
119: }
120:
1.55 matthew 121: ##
122: ## Use variables for table names so we can test this routine a little easier
1.69 raeburn 123: my %oldnames = (
124: 'metadata' => 'metadata',
125: 'portfolio' => 'portfolio_metadata',
126: 'access' => 'portfolio_access',
127: 'addedfields' => 'portfolio_addedfields',
1.78 raeburn 128: 'allusers' => 'allusers',
1.69 raeburn 129: );
130:
131: my %newnames;
132: # new table names - append pid to have unique temporary tables
133: foreach my $key (keys(%oldnames)) {
134: $newnames{$key} = 'new'.$oldnames{$key}.$$;
135: }
1.45 www 136:
1.55 matthew 137: #
138: # Only run if machine is a library server
1.63 matthew 139: exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library');
1.78 raeburn 140: my $hostid = $Apache::lonnet::perlvar{'lonHostID'};
141:
1.55 matthew 142: #
143: # Make sure this process is running from user=www
144: my $wwwid=getpwnam('www');
145: if ($wwwid!=$<) {
1.63 matthew 146: my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}";
147: my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch";
1.55 matthew 148: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
1.63 matthew 149: mail -s '$subj' $emailto > /dev/null");
1.55 matthew 150: exit 1;
151: }
152: #
153: # Let people know we are running
1.63 matthew 154: open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log');
1.56 matthew 155: &log(0,'==== Searchcat Run '.localtime()."====");
1.57 matthew 156:
157:
1.56 matthew 158: if ($debug) {
159: &log(0,'simulating') if ($simulate);
160: &log(0,'only processing user '.$oneuser) if ($oneuser);
161: &log(0,'verbosity level = '.$verbose);
162: }
1.55 matthew 163: #
164: # Connect to database
165: my $dbh;
1.63 matthew 166: if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'},
1.55 matthew 167: { RaiseError =>0,PrintError=>0}))) {
1.56 matthew 168: &log(0,"Cannot connect to database!");
1.55 matthew 169: die "MySQL Error: Cannot connect to database!\n";
170: }
171: # This can return an error and still be okay, so we do not bother checking.
172: # (perhaps it should be more robust and check for specific errors)
1.69 raeburn 173: foreach my $key (keys(%newnames)) {
174: if ($newnames{$key} ne '') {
175: $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key});
176: }
177: }
178:
1.55 matthew 179: #
1.77 raeburn 180: # Create the new metadata, portfolio and allusers tables
1.69 raeburn 181: foreach my $key (keys(%newnames)) {
182: if ($newnames{$key} ne '') {
183: my $request =
184: &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key});
185: $dbh->do($request);
186: if ($dbh->err) {
187: $dbh->disconnect();
188: &log(0,"MySQL Error Create: ".$dbh->errstr);
189: die $dbh->errstr;
190: }
191: }
1.55 matthew 192: }
1.69 raeburn 193:
1.55 matthew 194: #
195: # find out which users we need to examine
1.63 matthew 196: my @domains = sort(&Apache::lonnet::current_machine_domains());
197: &log(9,'domains ="'.join('","',@domains).'"');
1.62 matthew 198:
199: foreach my $dom (@domains) {
200: &log(9,'domain = '.$dom);
1.63 matthew 201: opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom");
1.62 matthew 202: my @homeusers =
203: grep {
1.63 matthew 204: &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_");
1.62 matthew 205: } grep {
206: !/^\.\.?$/;
207: } readdir(RESOURCES);
208: closedir RESOURCES;
209: &log(5,'users = '.$dom.':'.join(',',@homeusers));
210: #
1.85 ! raeburn 211: my %courses;
1.62 matthew 212: if ($oneuser) {
1.85 ! raeburn 213: %courses = &courseiddump($dom,'.',1,'.','.',$oneuser,undef,
! 214: undef,'.');
1.62 matthew 215: @homeusers=($oneuser);
1.85 ! raeburn 216: } else {
! 217: # get courseIDs for domain on current machine
! 218: %courses=&Apache::lonnet::courseiddump($dom,'.',1,'.','.','.',1,[$hostid],'.');
1.62 matthew 219: }
1.80 raeburn 220:
1.62 matthew 221: #
222: # Loop through the users
223: foreach my $user (@homeusers) {
1.85 ! raeburn 224: next if (exists($courses{$dom.'_'.$user}));
1.62 matthew 225: &log(0,"=== User: ".$user);
226: &process_dynamic_metadata($user,$dom);
227: #
228: # Use File::Find to get the files we need to read/modify
229: find(
230: {preprocess => \&only_meta_files,
231: #wanted => \&print_filename,
232: #wanted => \&log_metadata,
233: wanted => \&process_meta_file,
1.66 albertel 234: no_chdir => 1,
1.63 matthew 235: }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) );
1.62 matthew 236: }
1.77 raeburn 237: # Search for all users and public portfolio files
1.85 ! raeburn 238: my (%allusers,%portusers);
1.69 raeburn 239: if ($oneuser) {
240: %portusers = (
241: $oneuser => '',
242: );
1.77 raeburn 243: %allusers = (
244: $oneuser => '',
245: );
1.69 raeburn 246: } else {
247: my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom;
1.77 raeburn 248: &descend_tree($dom,$dir,0,\%portusers,\%allusers);
1.69 raeburn 249: }
250: foreach my $uname (keys(%portusers)) {
251: my $urlstart = '/uploaded/'.$dom.'/'.$uname;
252: my $pathstart = &propath($dom,$uname).'/userfiles';
1.78 raeburn 253: my $is_course = '';
254: if (exists($courses{$dom.'_'.$uname})) {
255: $is_course = 1;
256: }
1.69 raeburn 257: my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname);
258: my %access = &Apache::lonnet::get_access_controls($curr_perm);
1.75 raeburn 259: foreach my $file (keys(%access)) {
1.69 raeburn 260: my ($group,$url,$fullpath);
261: if ($is_course) {
262: ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/);
1.72 raeburn 263: $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path;
1.69 raeburn 264: $url = $urlstart.'/groups/'.$group.'/portfolio'.$path;
265: } else {
266: $fullpath = $pathstart.'/portfolio'.$file;
1.72 raeburn 267: $url = $urlstart.'/portfolio'.$file;
1.69 raeburn 268: }
269: if (ref($access{$file}) eq 'HASH') {
1.75 raeburn 270: my %portaccesslog =
271: &LONCAPA::lonmetadata::process_portfolio_access_data($dbh,
272: $simulate,\%newnames,$url,$fullpath,$access{$file});
273: &portfolio_logging(%portaccesslog);
1.69 raeburn 274: }
1.75 raeburn 275: my %portmetalog = &LONCAPA::lonmetadata::process_portfolio_metadata($dbh,$simulate,\%newnames,$url,$fullpath,$is_course,$dom,$uname,$group);
276: &portfolio_logging(%portmetalog);
1.69 raeburn 277: }
278: }
1.84 raeburn 279: my %duplicates;
280: my %names_by_id = (
281: id => {},
282: clickers => {},
283: );
284: my %ids_by_name = (
285: id => {},
286: clickers => {},
287: );
288: my %idstodelete = (
289: id => {},
290: clickers => {},
291: );
292: my %idstoadd = (
293: id => {},
294: clickers => {},
295: );
296: my %namespace = (
297: id => 'ids',
298: clickers => 'clickers',
299: );
300: my %idtext = (
301: id => 'employee/student IDs',
302: clickers => 'clicker IDs',
303: );
1.79 raeburn 304: unless ($simulate || $oneuser) {
1.84 raeburn 305: foreach my $key ('id','clickers') {
306: my $hashref = &tie_domain_hash($dom,$namespace{$key},&GDBM_WRCREAT());
307: if (ref($hashref) eq 'HASH') {
308: while (my ($id,$unamestr) = each(%{$hashref}) ) {
309: $id = &unescape($id);
310: $unamestr = &unescape($unamestr);
311: if ($key eq 'clickers') {
312: my @unames = split(/,/,$unamestr);
313: foreach my $uname (@unames) {
314: push(@{$ids_by_name{$key}{$uname}},$id);
315: }
316: $names_by_id{$key}{$id} = $unamestr;
317: } else {
318: $names_by_id{$key}{$id} = $unamestr;
319: push(@{$ids_by_name{$key}{$unamestr}},$id);
320: }
321: }
322: &untie_domain_hash($hashref);
1.79 raeburn 323: }
324: }
325: }
1.77 raeburn 326: # Update allusers
327: foreach my $uname (keys(%allusers)) {
1.78 raeburn 328: next if (exists($courses{$dom.'_'.$uname}));
1.77 raeburn 329: my %userdata =
330: &Apache::lonnet::get('environment',['firstname','lastname',
1.84 raeburn 331: 'middlename','generation','id','permanentemail','clickers'],
332: $dom,$uname);
1.79 raeburn 333: unless ($simulate || $oneuser) {
1.84 raeburn 334: foreach my $key ('id','clickers') {
335: my %addid = ();
336: if ($userdata{$key} ne '') {
337: my $idfromenv = $userdata{$key};
338: if ($key eq 'id') {
339: $idfromenv=~tr/A-Z/a-z/;
340: $addid{$idfromenv} = 1;
341: } else {
342: $idfromenv =~ s/^\s+//;
343: $idfromenv =~ s/\s+$//;
344: map { $addid{$_} = 1; } split(/,/,$idfromenv);
345: }
346: }
347: if (ref($ids_by_name{$key}{$uname}) eq 'ARRAY') {
348: if (scalar(@{$ids_by_name{$key}{$uname}}) > 1) {
349: &log(0,"Multiple $idtext{$key} found in $namespace{$key}.db for $uname:$dom -- ".
350: join(', ',@{$ids_by_name{$key}{$uname}}));
351: }
352: foreach my $id (@{$ids_by_name{$key}{$uname}}) {
353: if ($addid{$id}) {
354: delete($addid{$id});
355: } else {
356: if ($key eq 'id') {
357: $idstodelete{$key}{$id} = $uname;
358: } else {
359: $idstodelete{$key}{$id} .= $uname.',';
360: }
1.79 raeburn 361: }
362: }
363: }
1.84 raeburn 364: if (keys(%addid)) {
365: foreach my $id (keys(%addid)) {
366: if ($key eq 'id') {
367: if (exists($idstoadd{$key}{$id})) {
368: push(@{$duplicates{$id}},$uname);
369: } else {
370: $idstoadd{$key}{$id} = $uname;
371: }
372: } else {
373: $idstoadd{$key}{$id} .= $uname.',';
374: }
375: }
1.79 raeburn 376: }
377: }
378: }
1.84 raeburn 379:
1.77 raeburn 380: $userdata{'username'} = $uname;
381: $userdata{'domain'} = $dom;
382: my %alluserslog =
383: &LONCAPA::lonmetadata::process_allusers_data($dbh,$simulate,
384: \%newnames,$uname,$dom,\%userdata);
385: foreach my $item (keys(%alluserslog)) {
386: &log(0,$alluserslog{$item});
387: }
388: }
1.79 raeburn 389: unless ($simulate || $oneuser) {
1.84 raeburn 390: foreach my $key ('id','clickers') {
391: if (keys(%{$idstodelete{$key}}) > 0) {
392: my %resulthash;
393: if ($key eq 'id') {
394: %resulthash = &Apache::lonnet::iddel($dom,$idstodelete{$key},$hostid,$namespace{$key});
395: } else {
396: foreach my $delid (sort(keys(%{$idstodelete{$key}}))) {
397: $idstodelete{$key}{$delid} =~ s/,$//;
398: }
399: %resulthash = &Apache::lonnet::iddel($dom,$idstodelete{$key},$hostid,$namespace{$key});
1.79 raeburn 400: }
1.84 raeburn 401: if ($resulthash{$hostid} eq 'ok') {
402: foreach my $id (sort(keys(%{$idstodelete{$key}}))) {
403: &log(0,"Record deleted from $namespace{$key}.db for $dom -- $id => ".$idstodelete{$key}{$id});
404: }
1.80 raeburn 405: } else {
1.84 raeburn 406: &log(0,"Error: '$resulthash{$hostid}' occurred when attempting to delete records from $namespace{$key}.db for $dom");
1.80 raeburn 407: }
408: }
1.84 raeburn 409: if (keys(%{$idstoadd{$key}}) > 0) {
410: my $idmessage = '';
411: my %newids;
412: if ($key eq 'id') {
413: foreach my $addid (sort(keys(%{$idstoadd{$key}}))) {
414: if ((exists($names_by_id{$key}{$addid})) && ($names_by_id{$key}{$addid} ne $idstoadd{$key}{$addid}) && !($idstodelete{$key}{$addid})) {
415: &log(0,"Two usernames associated with a single ID $addid in domain: $dom: $names_by_id{$key}{$addid} (current) and $idstoadd{$key}{$addid}\n");
416: $idmessage .= "$addid,$names_by_id{$key}{$addid},$idstoadd{$key}{$addid}\n";
417: } else {
418: $newids{$addid} = $idstoadd{$key}{$addid};
419: }
1.80 raeburn 420: }
421: } else {
1.84 raeburn 422: foreach my $addid (sort(keys(%{$idstoadd{$key}}))) {
423: $idstoadd{$key}{$addid} =~ s/,$//;
424: $newids{$addid} = $idstoadd{$key}{$addid};
425: }
1.80 raeburn 426: }
1.84 raeburn 427: if (keys(%newids) > 0) {
428: my $putresult;
429: if ($key eq 'clickers') {
430: $putresult = &Apache::lonnet::updateclickers($dom,'add',\%newids,$hostid);
431: } else {
432: $putresult = &Apache::lonnet::put_dom($namespace{$key},\%newids,$dom,$hostid);
433: }
434: if ($putresult eq 'ok') {
435: foreach my $id (sort(keys(%newids))) {
436: &log(0,"Record added to $namespace{$key}.db for $dom -- $id => ".$newids{$id});
437: }
438: } else {
439: &log(0,"Error: '$putresult' occurred when attempting to add records to $namespace{$key}.db for $dom");
440: }
441: }
442: if ($idmessage) {
443: my $to = &Apache::loncommon::build_recipient_list(undef,'idconflictsmail',$dom);
444: if ($to ne '') {
445: my $msg = new Mail::Send;
446: $msg->to($to);
447: $msg->subject('LON-CAPA studentIDs conflict');
448: my $lonhost = $Apache::lonnet::perlvar{'lonHostID'};
449: my $hostname = &Apache::lonnet::hostname($lonhost);
450: my $replytoaddress = 'do-not-reply@'.$hostname;
451: $msg->add('Reply-to',$replytoaddress);
452: $msg->add('From','www@'.$hostname);
453: $msg->add('Content-type','text/plain; charset=UTF-8');
454: if (my $fh = $msg->open()) {
455: print $fh
456: 'The following IDs are used for more than one user in your domain:'."\n".
457: 'Each row contains: Student/Employee ID, Current username in ids.db file, '.
458: 'Additional username'."\n\n".
459: $idmessage;
460: $fh->close;
461: }
1.80 raeburn 462: }
1.79 raeburn 463: }
464: }
465: }
466: if (keys(%duplicates) > 0) {
467: foreach my $id (sort(keys(%duplicates))) {
1.84 raeburn 468: if (ref($duplicates{$id}) eq 'ARRAY') {
469: &log(0,"Duplicate IDs found for entries to add to ids.db in $dom -- $id => ".join(',',@{$duplicates{$id}}));
470: }
1.79 raeburn 471: }
472: }
473: }
1.55 matthew 474: }
1.69 raeburn 475:
1.55 matthew 476: #
1.69 raeburn 477: # Rename the tables
1.56 matthew 478: if (! $simulate) {
1.69 raeburn 479: foreach my $key (keys(%oldnames)) {
480: if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) {
481: $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key});
482: if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) {
483: &log(0,"MySQL Error Rename: ".$dbh->errstr);
484: die $dbh->errstr;
485: } else {
486: &log(1,"MySQL table rename successful for $key.");
487: }
488: }
1.56 matthew 489: }
1.55 matthew 490: }
491: if (! $dbh->disconnect) {
1.56 matthew 492: &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
1.55 matthew 493: die $dbh->errstr;
494: }
495: ##
496: ## Finished!
1.56 matthew 497: &log(0,"==== Searchcat completed ".localtime()." ====");
1.55 matthew 498: close(LOG);
1.21 www 499:
1.55 matthew 500: &write_type_count();
501: &write_copyright_count();
1.36 www 502:
1.55 matthew 503: exit 0;
1.28 harris41 504:
1.56 matthew 505: ##
506: ## Status logging routine. Inputs: $level, $message
507: ##
508: ## $level 0 should be used for normal output and error messages
509: ##
510: ## $message does not need to end with \n. In the case of errors
511: ## the message should contain as much information as possible to
512: ## help in diagnosing the problem.
513: ##
514: sub log {
515: my ($level,$message)=@_;
516: $level = 0 if (! defined($level));
517: if ($verbose >= $level) {
518: print LOG $message.$/;
519: }
520: }
521:
1.75 raeburn 522: sub portfolio_logging {
523: my (%portlog) = @_;
524: foreach my $key (keys(%portlog)) {
525: if (ref($portlog{$key}) eq 'HASH') {
526: foreach my $item (keys(%{$portlog{$key}})) {
527: &log(0,$portlog{$key}{$item});
528: }
529: }
530: }
531: }
532:
1.69 raeburn 533: sub descend_tree {
1.77 raeburn 534: my ($dom,$dir,$depth,$allportusers,$alldomusers) = @_;
1.69 raeburn 535: if (-d $dir) {
536: opendir(DIR,$dir);
537: my @contents = grep(!/^\./,readdir(DIR));
538: closedir(DIR);
539: $depth ++;
540: foreach my $item (@contents) {
1.83 raeburn 541: if (($depth < 4) && (length($item) == 1)) {
1.77 raeburn 542: &descend_tree($dom,$dir.'/'.$item,$depth,$allportusers,$alldomusers);
1.69 raeburn 543: } else {
544: if (-e $dir.'/'.$item.'/file_permissions.db') {
1.78 raeburn 545: $$allportusers{$item} = '';
1.77 raeburn 546: }
1.78 raeburn 547: if (-e $dir.'/'.$item.'/passwd') {
1.69 raeburn 548: $$alldomusers{$item} = '';
549: }
550: }
551: }
552: }
553: }
554:
1.55 matthew 555: ########################################################
556: ########################################################
557: ### ###
558: ### File::Find support routines ###
559: ### ###
560: ########################################################
561: ########################################################
562: ##
563: ## &only_meta_files
564: ##
565: ## Called by File::Find.
566: ## Takes a list of files/directories in and returns a list of files/directories
567: ## to search.
568: sub only_meta_files {
569: my @PossibleFiles = @_;
570: my @ChosenFiles;
571: foreach my $file (@PossibleFiles) {
572: if ( ($file =~ /\.meta$/ && # Ends in meta
573: $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version
1.67 albertel 574: ) || (-d $File::Find::dir."/".$file )) { # directories are okay
1.55 matthew 575: # but we do not want /. or /..
576: push(@ChosenFiles,$file);
577: }
1.38 www 578: }
1.55 matthew 579: return @ChosenFiles;
1.38 www 580: }
581:
1.55 matthew 582: ##
583: ##
584: ## Debugging routines, use these for 'wanted' in the File::Find call
585: ##
586: sub print_filename {
587: my ($file) = $_;
588: my $fullfilename = $File::Find::name;
1.56 matthew 589: if ($debug) {
590: if (-d $file) {
591: &log(5," Got directory ".$fullfilename);
592: } else {
593: &log(5," Got file ".$fullfilename);
594: }
1.38 www 595: }
1.55 matthew 596: $_=$file;
1.38 www 597: }
1.28 harris41 598:
1.55 matthew 599: sub log_metadata {
600: my ($file) = $_;
601: my $fullfilename = $File::Find::name;
602: return if (-d $fullfilename); # No need to do anything here for directories
1.56 matthew 603: if ($debug) {
604: &log(6,$fullfilename);
1.69 raeburn 605: my $ref = &metadata($fullfilename);
1.56 matthew 606: if (! defined($ref)) {
607: &log(6," No data");
608: return;
609: }
610: while (my($key,$value) = each(%$ref)) {
611: &log(6," ".$key." => ".$value);
612: }
613: &count_copyright($ref->{'copyright'});
1.55 matthew 614: }
615: $_=$file;
1.31 harris41 616: }
1.21 www 617:
1.55 matthew 618: ##
619: ## process_meta_file
620: ## Called by File::Find.
621: ## Only input is the filename in $_.
622: sub process_meta_file {
623: my ($file) = $_;
1.56 matthew 624: my $filename = $File::Find::name; # full filename
1.55 matthew 625: return if (-d $filename); # No need to do anything here for directories
626: #
1.56 matthew 627: &log(3,$filename) if ($debug);
1.55 matthew 628: #
1.69 raeburn 629: my $ref = &metadata($filename);
1.55 matthew 630: #
631: # $url is the original file url, not the metadata file
1.61 matthew 632: my $target = $filename;
633: $target =~ s/\.meta$//;
634: my $url='/res/'.&declutter($target);
1.56 matthew 635: &log(3," ".$url) if ($debug);
1.55 matthew 636: #
637: # Ignore some files based on their metadata
638: if ($ref->{'obsolete'}) {
1.56 matthew 639: &log(3,"obsolete") if ($debug);
1.55 matthew 640: return;
641: }
642: &count_copyright($ref->{'copyright'});
643: if ($ref->{'copyright'} eq 'private') {
1.56 matthew 644: &log(3,"private") if ($debug);
1.55 matthew 645: return;
646: }
647: #
648: # Find the dynamic metadata
649: my %dyn;
650: if ($url=~ m:/default$:) {
651: $url=~ s:/default$:/:;
1.56 matthew 652: &log(3,"Skipping dynamic data") if ($debug);
1.55 matthew 653: } else {
1.56 matthew 654: &log(3,"Retrieving dynamic data") if ($debug);
655: %dyn=&get_dynamic_metadata($url);
1.55 matthew 656: &count_type($url);
657: }
1.75 raeburn 658: &LONCAPA::lonmetadata::getfiledates($ref,$target);
1.55 matthew 659: #
660: my %Data = (
661: %$ref,
662: %dyn,
663: 'url'=>$url,
664: 'version'=>'current');
1.56 matthew 665: if (! $simulate) {
1.69 raeburn 666: my ($count,$err) =
667: &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'},
668: 'metadata',\%Data);
1.56 matthew 669: if ($err) {
670: &log(0,"MySQL Error Insert: ".$err);
671: }
672: if ($count < 1) {
673: &log(0,"Unable to insert record into MySQL database for $url");
674: }
1.55 matthew 675: }
676: #
677: # Reset $_ before leaving
678: $_ = $file;
679: }
680:
681: ########################################################
682: ########################################################
683: ### ###
684: ### &metadata($uri) ###
685: ### Retrieve metadata for the given file ###
686: ### ###
687: ########################################################
688: ########################################################
689: sub metadata {
1.69 raeburn 690: my ($uri) = @_;
1.55 matthew 691: my %metacache=();
692: $uri=&declutter($uri);
693: my $filename=$uri;
694: $uri=~s/\.meta$//;
695: $uri='';
696: if ($filename !~ /\.meta$/) {
697: $filename.='.meta';
698: }
1.75 raeburn 699: my $metastring =
700: &LONCAPA::lonmetadata::getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename);
1.55 matthew 701: return undef if (! defined($metastring));
702: my $parser=HTML::TokeParser->new(\$metastring);
703: my $token;
704: while ($token=$parser->get_token) {
705: if ($token->[0] eq 'S') {
706: my $entry=$token->[1];
707: my $unikey=$entry;
708: if (defined($token->[2]->{'part'})) {
709: $unikey.='_'.$token->[2]->{'part'};
710: }
711: if (defined($token->[2]->{'name'})) {
712: $unikey.='_'.$token->[2]->{'name'};
713: }
714: if ($metacache{$uri.'keys'}) {
715: $metacache{$uri.'keys'}.=','.$unikey;
716: } else {
717: $metacache{$uri.'keys'}=$unikey;
718: }
719: foreach ( @{$token->[3]}) {
720: $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
1.69 raeburn 721: }
1.55 matthew 722: if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){
723: $metacache{$uri.''.$unikey} =
724: $metacache{$uri.''.$unikey.'.default'};
725: }
726: } # End of ($token->[0] eq 'S')
727: }
728: return \%metacache;
1.31 harris41 729: }
1.28 harris41 730:
1.55 matthew 731: ########################################################
732: ########################################################
733: ### ###
734: ### Dynamic Metadata ###
735: ### ###
736: ########################################################
737: ########################################################
1.56 matthew 738: ##
1.58 www 739: ## Dynamic metadata description (incomplete)
740: ##
741: ## For a full description of all fields,
742: ## see LONCAPA::lonmetadata
1.56 matthew 743: ##
744: ## Field Type
745: ##-----------------------------------------------------------
746: ## count integer
747: ## course integer
1.58 www 748: ## course_list comma separated list of course ids
1.56 matthew 749: ## avetries real
1.58 www 750: ## avetries_list comma separated list of real numbers
1.56 matthew 751: ## stdno real
1.58 www 752: ## stdno_list comma separated list of real numbers
1.56 matthew 753: ## usage integer
1.58 www 754: ## usage_list comma separated list of resources
1.56 matthew 755: ## goto scalar
1.58 www 756: ## goto_list comma separated list of resources
1.56 matthew 757: ## comefrom scalar
1.58 www 758: ## comefrom_list comma separated list of resources
1.56 matthew 759: ## difficulty real
1.58 www 760: ## difficulty_list comma separated list of real numbers
1.56 matthew 761: ## sequsage scalar
1.58 www 762: ## sequsage_list comma separated list of resources
1.56 matthew 763: ## clear real
764: ## technical real
765: ## correct real
766: ## helpful real
767: ## depth real
768: ## comments html of all the comments made
769: ##
770: {
771:
772: my %DynamicData;
773: my %Counts;
774:
775: sub process_dynamic_metadata {
776: my ($user,$dom) = @_;
777: undef(%DynamicData);
778: undef(%Counts);
779: #
780: my $prodir = &propath($dom,$user);
1.55 matthew 781: #
1.56 matthew 782: # Read in the dynamic metadata
1.55 matthew 783: my %evaldata;
784: if (! tie(%evaldata,'GDBM_File',
785: $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
1.56 matthew 786: return 0;
1.55 matthew 787: }
1.56 matthew 788: #
1.57 matthew 789: %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata);
1.55 matthew 790: untie(%evaldata);
1.62 matthew 791: $DynamicData{'domain'} = $dom;
1.64 albertel 792: #print('user = '.$user.' domain = '.$dom.$/);
1.56 matthew 793: #
794: # Read in the access count data
795: &log(7,'Reading access count data') if ($debug);
796: my %countdata;
797: if (! tie(%countdata,'GDBM_File',
798: $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
799: return 0;
800: }
801: while (my ($key,$count) = each(%countdata)) {
802: next if ($key !~ /^$dom/);
803: $key = &unescape($key);
804: &log(8,' Count '.$key.' = '.$count) if ($debug);
805: $Counts{$key}=$count;
806: }
807: untie(%countdata);
808: if ($debug) {
809: &log(7,scalar(keys(%Counts)).
810: " Counts read for ".$user."@".$dom);
811: &log(7,scalar(keys(%DynamicData)).
812: " Dynamic metadata read for ".$user."@".$dom);
813: }
814: #
815: return 1;
816: }
817:
818: sub get_dynamic_metadata {
819: my ($url) = @_;
820: $url =~ s:^/res/::;
1.57 matthew 821: my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url,
822: \%DynamicData);
1.56 matthew 823: # find the count
824: $data{'count'} = $Counts{$url};
825: #
826: # Log the dynamic metadata
827: if ($debug) {
828: while (my($k,$v)=each(%data)) {
829: &log(8," ".$k." => ".$v);
830: }
1.44 www 831: }
1.56 matthew 832: return %data;
1.30 www 833: }
1.28 harris41 834:
1.56 matthew 835: } # End of %DynamicData and %Counts scope
836:
1.55 matthew 837: ########################################################
838: ########################################################
839: ### ###
840: ### Counts ###
841: ### ###
842: ########################################################
843: ########################################################
844: {
1.1 harris41 845:
1.55 matthew 846: my %countext;
1.15 harris41 847:
1.55 matthew 848: sub count_type {
849: my $file=shift;
850: $file=~/\.(\w+)$/;
851: my $ext=lc($1);
852: $countext{$ext}++;
1.31 harris41 853: }
1.1 harris41 854:
1.55 matthew 855: sub write_type_count {
856: open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt');
857: while (my ($extension,$count) = each(%countext)) {
858: print RESCOUNT $extension.'='.$count.'&';
1.47 www 859: }
1.55 matthew 860: print RESCOUNT 'time='.time."\n";
861: close(RESCOUNT);
1.31 harris41 862: }
1.27 www 863:
1.55 matthew 864: } # end of scope for %countext
1.34 matthew 865:
1.55 matthew 866: {
1.34 matthew 867:
1.55 matthew 868: my %copyrights;
1.44 www 869:
1.55 matthew 870: sub count_copyright {
871: $copyrights{@_[0]}++;
1.31 harris41 872: }
1.33 matthew 873:
1.55 matthew 874: sub write_copyright_count {
875: open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt');
876: while (my ($copyright,$count) = each(%copyrights)) {
877: print COPYCOUNT $copyright.'='.$count.'&';
1.31 harris41 878: }
1.55 matthew 879: print COPYCOUNT 'time='.time."\n";
880: close(COPYCOUNT);
1.31 harris41 881: }
1.28 harris41 882:
1.55 matthew 883: } # end of scope for %copyrights
1.28 harris41 884:
1.55 matthew 885: ########################################################
886: ########################################################
887: ### ###
888: ### Miscellanous Utility Routines ###
889: ### ###
890: ########################################################
891: ########################################################
892: ##
893: ## &ishome($username)
894: ## Returns 1 if $username is a LON-CAPA author, 0 otherwise
895: ## (copied from lond, modification of the return value)
1.31 harris41 896: sub ishome {
897: my $author=shift;
1.76 albertel 898: $author=~s{/home/httpd/html/res/([^/]*)/([^/]*).*}{$1/$2};
1.31 harris41 899: my ($udom,$uname)=split(/\//,$author);
900: my $proname=propath($udom,$uname);
901: if (-e $proname) {
902: return 1;
903: } else {
904: return 0;
905: }
906: }
1.28 harris41 907:
1.55 matthew 908: ##
909: ## &declutter($filename)
910: ## Given a filename, returns a url for the filename.
911: sub declutter {
912: my $thisfn=shift;
1.63 matthew 913: $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//;
1.55 matthew 914: $thisfn=~s/^\///;
915: $thisfn=~s/^res\///;
916: return $thisfn;
1.31 harris41 917: }
1.28 harris41 918:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>