Annotation of loncom/metadata_database/searchcat.pl, revision 1.30
1.1 harris41 1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
1.16 harris41 4: #
1.30 ! www 5: # $Id: searchcat.pl,v 1.29 2003/02/03 13:42:16 albertel Exp $
1.16 harris41 6: #
7: # Copyright Michigan State University Board of Trustees
8: #
1.29 albertel 9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
1.16 harris41 10: #
1.29 albertel 11: # LON-CAPA is free software; you can redistribute it and/or modify
1.16 harris41 12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
1.29 albertel 16: # LON-CAPA is distributed in the hope that it will be useful,
1.16 harris41 17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
1.29 albertel 22: # along with LON-CAPA; if not, write to the Free Software
1.16 harris41 23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
1.29 albertel 27: # http://www.lon-capa.org/
1.16 harris41 28: #
29: # YEAR=2001
1.17 harris41 30: # 04/14/2001, 04/16/2001 Scott Harrison
1.16 harris41 31: #
1.17 harris41 32: # YEAR=2002
33: # 05/11/2002 Scott Harrison
1.16 harris41 34: #
1.28 harris41 35: # YEAR=2003
36: # Scott Harrison
37: #
1.16 harris41 38: ###
1.1 harris41 39:
1.28 harris41 40: =pod
41:
42: =head1 NAME
43:
44: B<searchcat.pl> - put authoritative filesystem data into sql database.
45:
46: =head1 SYNOPSIS
47:
48: Ordinarily this script is to be called from a loncapa cron job
49: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
50: filesystem installation location: F</etc/cron.d/loncapa>).
51:
52: Here is the cron job entry.
53:
54: C<# Repopulate and refresh the metadata database used for the search catalog.>
55:
56: C<10 1 * * 7 www /home/httpd/perl/searchcat.pl>
57:
58: This script only allows itself to be run as the user C<www>.
59:
60: =head1 DESCRIPTION
61:
62: This script goes through a loncapa resource directory and gathers metadata.
63: The metadata is entered into a SQL database.
64:
65: This script also does general database maintenance such as reformatting
66: the C<loncapa:metadata> table if it is deprecated.
67:
68: This script also builds dynamic temporal metadata and stores this inside
69: a F<nohist_resevaldata.db> database file.
70:
71: This script is playing an increasingly important role for a loncapa
72: library server. The proper operation of this script is critical for a smooth
73: and correct user experience.
74:
75: =cut
76:
77: # ========================================================== Setting things up.
78:
79: # ------------------------------------------------------ Use external modules.
1.1 harris41 80:
1.17 harris41 81: use lib '/home/httpd/lib/perl/';
82: use LONCAPA::Configuration;
83:
1.1 harris41 84: use IO::File;
85: use HTML::TokeParser;
1.6 harris41 86: use DBI;
1.21 www 87: use GDBM_File;
1.24 www 88: use POSIX qw(strftime mktime);
1.1 harris41 89:
1.28 harris41 90: # ----------------- Code to enable 'find' subroutine listing of the .meta files
91: use File::Find;
92:
93: # List of .meta files (used on a per-user basis).
1.1 harris41 94: my @metalist;
1.21 www 95:
1.28 harris41 96: # --------------- Read loncapa_apache.conf and loncapa.conf and get variables.
97: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
98: my %perlvar = %{$perlvarref};
99: undef($perlvarref); # Remove since sensitive and not needed.
100: delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
101:
102: # ------------------------------------- Only run if machine is a library server
103: if ($perlvar{'lonRole'} ne 'library')
104: {
105: exit(0);
106: }
107:
108: # ------------------------------ Make sure this process is running as user=www.
109: my $wwwid = getpwnam('www');
110: if ($wwwid != $<)
111: {
112: $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
113: $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
114: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
115: "mailto $emailto -s '$subj' > /dev/null");
116: exit(1);
117: }
118:
119: # ------------------------------------------------------ Initialize log output.
120: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
121: print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
122:
123: my $dbh; # Database object reference handle.
124:
125: # ----------------------------- Verify connection to loncapa:metadata database.
126: unless (
127: $dbh = DBI->connect('DBI:mysql:loncapa','www',
128: $perlvar{'lonSqlAccess'},
129: { RaiseError => 0,PrintError => 0})
130: )
131: {
132: print(LOG '**** ERROR **** Cannot connect to database!'."\n");
133: exit(0);
134: }
135:
136: # ------------------------------ Create loncapa:metadata table if non-existent.
137: my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
138: 'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
139: 'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
140: 'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
141: 'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
142: 'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
143: 'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
144: 'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
145: 'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
146: 'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
147: 'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
148:
149: $dbh->do($make_metadata_table); # Generate the table.
150:
151: # ----------------------------- Verify format of the loncapa:metadata database.
152: # (delete and recreate database if necessary).
153:
154: # Make a positive control for verifying table structure.
155: my $make_metadata_table_CONTROL = $make_metadata_table;
156: $make_metadata_table_CONTROL =~
157: s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
158:
159: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
160: $dbh->do($make_metadata_table_CONTROL);
161:
162: my $table_description; # selectall reference to the table description.
163:
164: my $CONTROL_table_string; # What the table description should look like.
165: my $table_string; # What the table description does look like.
166:
167: # Calculate the CONTROL table description (what it should be).
168: $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
169: foreach my $table_row (@{$table_description})
170: {
171: $CONTROL_table_string .= join(',',@{$table_row})."\n";
172: }
173:
174: # Calculate the current table description (what it currently looks like).
175: $table_description = $dbh->selectall_arrayref('describe metadata');
176: foreach my $table_row (@{$table_description})
177: {
178: $table_string .= join(',',@{$table_row})."\n";
179: }
180:
181: if ($table_string ne $CONTROL_table_string)
182: {
183: # Log this incident.
184: print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
185: '.'."\n");
186: # Delete the table.
187: $dbh->do('DROP TABLE IF EXISTS metadata');
188: # Generate the table.
189: $dbh->do($make_metadata_table);
190: }
1.21 www 191:
1.28 harris41 192: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay. Done with control.
1.21 www 193:
1.28 harris41 194: # ----------------------------------------------- Set utilitysemaphore to zero.
195: $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
196:
197: # ========================================================= Main functionality.
198:
199: # - Determine home authors on this server based on resources dir and user tree.
200:
201: # RESOURCES: the resources directory (subdirs correspond to author usernames).
202: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
203: (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
204: and exit(0));
205:
206: # query_home_server_status will look for user home directories on this machine.
207: my @homeusers =
208: grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
209: $perlvar{'lonDefDomain'}.'/'.$_)
210: } grep {!/^\.\.?$/} readdir(RESOURCES);
211: closedir(RESOURCES);
212:
213: unless (@homeusers)
214: {
215: print(LOG '=== No home users found on this server.'."\n");
216: }
217:
218: # Consider each author individually.
219: foreach my $user (@homeusers)
220: {
221: # Make a log entry.
222: print(LOG "\n".'=== User: '.$user."\n\n");
223:
224: # Get filesystem path to this user's directory.
225: my $user_directory =
226: &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
227:
228: # Remove left-over db-files from a potentially crashed searchcat run.
229: unlink($user_directory.'/nohist_new_resevaldata.db');
230:
231: # Cleanup the metalist array.
232: undef(@metalist);
233: @metalist = ();
234:
235: # This will add entries to the @metalist array.
236: &File::Find::find(\&wanted,
237: $perlvar{'lonDocRoot'}.'/res/'.
238: $perlvar{'lonDefDomain'}.'/'.$user);
239:
240: # -- process file to get metadata and put into search catalog SQL database
241: # Also, build and store dynamic metadata.
242: # Also, delete record entries before refreshing.
243: foreach my $m (@metalist)
244: {
245: # Log this action.
246: print(LOG "- ".$m."\n");
247:
248: # Get metadata from the file.
249: my $ref = get_metadata_from_file($m);
250:
251: # Make a datarecord identifier for this resource.
252: my $m2 = '/res/'.declutter($m);
253: $m2 =~ s/\.meta$//;
254:
255: # Build and store dynamic metadata inside nohist_resevaldata.db.
256: build_on_the_fly_dynamic_metadata($m2);
257:
258: # Delete record if it already exists.
259: my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
260: my $sth = $dbh->prepare($q2);
261: $sth->execute();
262: my $r1 = $sth->fetchall_arrayref;
263: if (@$r1)
264: {
265: $sth =
266: $dbh->prepare('delete from metadata where url like binary '.
267: "'".$m2."'");
268: $sth->execute();
269: }
270:
271: # Add new/replacement record into the loncapa:metadata table.
272: $sth = $dbh->prepare('insert into metadata values ('.
273: '"'.delete($ref->{'title'}).'"'.','.
274: '"'.delete($ref->{'author'}).'"'.','.
275: '"'.delete($ref->{'subject'}).'"'.','.
276: '"'.$m2.'"'.','.
277: '"'.delete($ref->{'keywords'}).'"'.','.
278: '"'.'current'.'"'.','.
279: '"'.delete($ref->{'notes'}).'"'.','.
280: '"'.delete($ref->{'abstract'}).'"'.','.
281: '"'.delete($ref->{'mime'}).'"'.','.
282: '"'.delete($ref->{'language'}).'"'.','.
283: '"'.sql_formatted_time(
284: delete($ref->{'creationdate'})).'"'.','.
285: '"'.sql_formatted_time(
286: delete($ref->{'lastrevisiondate'})).'"'.','.
287: '"'.delete($ref->{'owner'}).'"'.','.
288: '"'.delete($ref->{'copyright'}).'"'.','.
289: '1'.')');
290: $sth->execute();
291: }
292:
293: # ----------------------- Clean up database, remove stale SQL database records.
294: $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
295:
296: # -------------------------------------------------- Copy over the new db-files
297: system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
298: $user_directory.'/nohist_resevaldata.db');
299: }
300:
301: # --------------------------------------------------- Close database connection
302: $dbh->disconnect;
303: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
304: close(LOG);
305: exit(0);
306:
307: # ================================================================ Subroutines.
308:
309: =pod
310:
311: =head1 SUBROUTINES
312:
313: =cut
314:
315: =pod
316:
317: B<unescape> - translate to unstrange escaped syntax to strange characters.
318:
319: =over 4
320:
321: Parameters:
322:
323: =item I<$str> - string with unweird characters.
324:
325: =back
326:
327: =over 4
328:
329: Returns:
330:
331: =item C<string> - string with potentially weird characters.
332:
333: =back
334:
335: =cut
336:
337: sub unescape ($)
338: {
339: my $str = shift(@_);
1.21 www 340: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
1.28 harris41 341: return($str);
342: }
343:
344: =pod
345:
346: B<escape> - translate strange characters to unstrange escaped syntax.
347:
348: =over 4
349:
350: Parameters:
1.21 www 351:
1.28 harris41 352: =item I<$str> - string with potentially weird characters to unweird-ify.
1.22 www 353:
1.28 harris41 354: =back
355:
356: =over 4
357:
358: Returns:
359:
360: =item C<string> - unweird-ified string.
361:
362: =back
363:
364: =cut
365:
366: sub escape ($)
367: {
368: my $str = shift(@_);
1.22 www 369: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
1.28 harris41 370: return($str);
371: }
372:
373: =pod
374:
375: B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
376:
1.30 ! www 377: Returns the dynamic metadata for an author, which will later be added to the
! 378: MySQL database (not yet implemented).
! 379:
! 380: The vast majority of entries in F<nohist_resevaldata.db>, which contains
! 381: the dynamic metadata for an author's resources, are "count", which make
! 382: the file really large and evaluation really slow.
! 383:
! 384: While computing the current value of all dynamic metadata
! 385: for later insertion into the MySQL metadata cache (not yet implemented),
! 386: this routine also simply adds up all "count" type fields and replaces them by
! 387: one new field with the to-date count.
! 388:
! 389: Only after successful completion of working with one author, copy new file to
! 390: original file. Copy to tmp-"new"-db-file was necessary since db-file size
! 391: would not shrink after "delete" of key.
1.28 harris41 392:
393: =over 4
394:
395: Parameters:
396:
397: =item I<$url> - the filesystem path (url may be a misnomer...)
398:
399: =back
400:
401: =over 4
402:
403: Returns:
1.22 www 404:
1.28 harris41 405: =item C<hash> - key-value table of dynamically evaluated metadata.
1.21 www 406:
1.28 harris41 407: =back
1.21 www 408:
1.28 harris41 409: =cut
1.25 www 410:
1.30 ! www 411: sub build_on_the_fly_dynamic_metadata {
1.29 albertel 412:
413: # Need to compute the user's directory.
1.30 ! www 414: my $url=&declutter(shift);
! 415: $url=~s/\.meta$//;
! 416: my %returnhash=();
! 417: my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
! 418: my $user_directory=&construct_path_to_user_directory($adomain,$aauthor);
1.28 harris41 419:
420: # Attempt a GDBM database instantiation inside users directory and proceed.
1.25 www 421: if ((tie(%evaldata,'GDBM_File',
1.28 harris41 422: $user_directory.
423: '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
1.25 www 424: (tie(%newevaldata,'GDBM_File',
1.28 harris41 425: $user_directory.
1.30 ! www 426: '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
1.28 harris41 427: # For different variables, track the running sum and counts.
1.30 ! www 428: my %sum=();
! 429: my %cnt=();
1.28 harris41 430:
431: # Define computed items as a sum (add) or an average (avg) or a raw
1.30 ! www 432: # count (cnt) or append (app)?
1.28 harris41 433: my %listitems=('count' => 'add',
434: 'course' => 'add',
435: 'avetries' => 'avg',
436: 'stdno' => 'add',
437: 'difficulty' => 'avg',
438: 'clear' => 'avg',
439: 'technical' => 'avg',
440: 'helpful' => 'avg',
441: 'correct' => 'avg',
442: 'depth' => 'avg',
443: 'comments' => 'app',
444: 'usage' => 'cnt'
445: );
446:
447: # Untaint the url and use as part of a regular expression.
1.30 ! www 448: my $regexp=$url;
! 449: $regexp=~s/(\W)/\\$1/g;
! 450: $regexp='___'.$regexp.'___([a-z]+)$'; #' emacs
! 451:
! 452: # Check existing database for this author.
! 453: # this is modifying the 'count' entries
! 454: # and copying all other entries over
! 455:
! 456: foreach (keys %evaldata) {
! 457: my $key=&unescape($_);
! 458: if ($key=~/$regexp/) { # If url-based entry exists.
! 459: my $ctype=$1; # Set to specific category type.
1.28 harris41 460:
461: # Do an increment for this category type.
1.30 ! www 462: if (defined($cnt{$ctype})) {
1.28 harris41 463: $cnt{$ctype}++;
1.30 ! www 464: } else {
! 465: $cnt{$ctype}=1;
! 466: }
! 467: unless ($listitems{$ctype} eq 'app') { # append comments
1.28 harris41 468: # Increment the sum based on the evaluated data in the db.
1.30 ! www 469: if (defined($sum{$ctype})) {
! 470: $sum{$ctype}+=$evaldata{$_};
! 471: } else {
! 472: $sum{$ctype}=$evaldata{$_};
! 473: }
! 474: } else { # 'app' mode, means to use '<hr />' as a separator
! 475: if (defined($sum{$ctype})) {
! 476: if ($evaldata{$_}) {
! 477: $sum{$ctype}.='<hr />'.$evaldata{$_};
! 478: }
! 479: } else {
! 480: $sum{$ctype}=''.$evaldata{$_};
! 481: }
! 482: }
! 483: if ($ctype ne 'count') {
1.29 albertel 484: # this is copying all data except 'count' attributes
1.30 ! www 485: $newevaldata{$_}=$evaldata{$_};
! 486: }
! 487: }
! 488: }
! 489:
! 490: # these values will be returned (currently still unused)
! 491: foreach (keys %cnt) {
! 492: if ($listitems{$_} eq 'avg') {
! 493: $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
! 494: } elsif ($listitems{$_} eq 'cnt') {
! 495: $returnhash{$_}=$cnt{$_};
! 496: } else {
! 497: $returnhash{$_}=$sum{$_};
! 498: }
! 499: }
! 500:
! 501: # generate new count key in resevaldata, insert sum
! 502: if ($returnhash{'count'}) {
! 503: my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
! 504: $newevaldata{$newkey}=$returnhash{'count'};
! 505: }
1.28 harris41 506:
507: untie(%evaldata); # Close/release the original nohist database.
508: untie(%newevaldata); # Close/release the new nohist database.
1.30 ! www 509: }
! 510: return %returnhash;
! 511: }
1.28 harris41 512:
513: =pod
514:
515: B<wanted> - used by B<File::Find::find> subroutine.
516:
517: This evaluates whether a file is wanted, and pushes it onto the
518: I<@metalist> array. This subroutine was, for the most part, auto-generated
519: by the B<find2perl> command.
520:
521: =over 4
522:
523: Parameters:
524:
525: =item I<$file> - a path to the file.
526:
527: =back
528:
529: =over 4
530:
531: Returns:
532:
533: =item C<boolean> - true or false based on logical statement.
534:
535: =back
536:
537: =cut
538:
539: sub wanted ($)
540: {
1.1 harris41 541: (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
1.28 harris41 542: -f $_ &&
1.10 harris41 543: /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
1.28 harris41 544: push(@metalist,$File::Find::dir.'/'.$_);
545: }
546:
547: =pod
548:
549: B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
1.1 harris41 550:
1.28 harris41 551: I<Note that this is significantly altered from a subroutine present in lonnet.>
1.15 harris41 552:
1.28 harris41 553: =over 4
1.1 harris41 554:
1.28 harris41 555: Parameters:
1.27 www 556:
1.28 harris41 557: =item I<$file> - a path.to the file.
1.27 www 558:
1.28 harris41 559: =back
1.27 www 560:
1.28 harris41 561: =over 4
1.25 www 562:
1.28 harris41 563: Returns:
1.1 harris41 564:
1.28 harris41 565: =item C<hash reference> - a hash array (keys and values).
1.1 harris41 566:
1.28 harris41 567: =back
1.25 www 568:
1.28 harris41 569: =cut
1.1 harris41 570:
1.28 harris41 571: sub get_metadata_from_file ($)
572: {
573: my ($filename) = @_;
574: my %metatable; # Used to store return value of hash-tabled metadata.
575: $filename = &declutter($filename); # Remove non-identifying filesystem info
576: my $uri = ''; # The URI is not relevant in this scenario.
577: unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
578: {
579: $filename .= '.meta'; # Append a .meta suffix.
580: }
581: # Get the file contents.
582: my $metadata_string =
583: &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
584:
585: # Parse the file based on its XML tags.
586: my $parser = HTML::TokeParser->new(\$metadata_string);
587: my $token;
588: while ($token = $parser->get_token) # Loop through tokens.
589: {
590: if ($token->[0] eq 'S') # If it is a start token.
591: {
592: my $entry = $token->[1];
593: my $unikey = $entry; # A unique identifier for this xml tag key.
594: if (defined($token->[2]->{'part'}))
595: {
596: $unikey .= '_'.$token->[2]->{'part'};
597: }
598: if (defined($token->[2]->{'name'}))
599: {
600: $unikey .= '_'.$token->[2]->{'name'};
601: }
602: # Append $unikey to metatable's keys entry.
603: if ($metatable{$uri.'keys'})
604: {
605: $metatable{$uri.'keys'} .= ','.$unikey;
1.1 harris41 606: }
1.28 harris41 607: else
608: {
609: $metatable{$uri.'keys'} = $unikey;
1.1 harris41 610: }
1.28 harris41 611: # Insert contents into metatable entry for the unikey.
612: foreach my $t3 (@{$token->[3]})
613: {
614: $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
1.1 harris41 615: }
1.28 harris41 616: # If there was no text contained inside the tags, set = default.
617: unless
618: (
619: $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
620: )
621: {
622: $metatable{$uri.''.$unikey} =
623: $metatable{$uri.''.$unikey.'.default'};
624: }
625: }
626: }
627: # Return with a key-value table of XML tags and their tag contents.
628: return(\%metatable);
629: }
630:
631: =pod
632:
633: B<get_file_contents> - returns either the contents of the file or a -1.
634:
635: =over 4
636:
637: Parameters:
638:
639: =item I<$file> - a complete filesystem path.to the file.
640:
641: =back
642:
643: =over 4
644:
645: Returns:
646:
647: =item C<string> - file contents or a -1.
648:
649: =back
650:
651: =cut
652:
653: sub get_file_contents ($)
654: {
655: my $file = shift(@_);
656:
657: # If file does not exist, then return a -1 value.
658: unless (-e $file)
659: {
660: return(-1);
661: }
662:
663: # Read in file contents.
664: my $file_handle = IO::File->new($file);
665: my $file_contents = '';
666: while (<$file_handle>)
667: {
668: $file_contents .= $_;
669: }
670:
671: # Return file contents.
672: return($file_contents);
673: }
674:
675: =pod
676:
677: B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
678:
679: =over 4
680:
681: Parameters:
682:
683: =item I<$filesystem_path> - a complete filesystem path.
684:
685: =back
686:
687: =over 4
688:
689: Returns:
690:
691: =item C<string> - remnants of the filesystem path (beginning portion removed).
692:
693: =back
694:
695: =cut
696:
697: sub declutter
698: {
699: my $filesystem_path = shift(@_);
700:
701: # Remove beginning portions of the filesystem path.
702: $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
703: $filesystem_path =~ s!^/!!;
704: $filesystem_path =~ s!^res/!!;
705:
706: # Return what is remaining for the filesystem path.
707: return($filesystem_path);
708: }
709:
710: =pod
711:
712: B<query_home_server_status> - Is this the home server of an author's directory?
713:
714: =over 4
715:
716: Parameters:
717:
718: =item I<$author_filesystem_path> - directory path for a user.
719:
720: =back
721:
722: =over 4
723:
724: Returns:
725:
726: =item C<boolean> - 1 if true; 0 if false.
727:
728: =back
729:
730: =cut
731:
732: sub query_home_server_status ($)
733: {
734: my $author_filesystem_path = shift(@_);
735:
736: # Remove beginning portion of this filesystem path.
737: $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
738:
739: # Construct path to the author's ordinary user directory.
740: my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
741: my $user_directory_path = construct_path_to_user_directory($user_domain,
742: $username);
743:
744: # Return status of whether the user directory path is defined.
745: if (-e $user_directory_path)
746: {
747: return(1); # True.
748: }
749: else
750: {
751: return(0); # False.
752: }
753: }
754:
755: =pod
756:
757: B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
758:
759: =over 4
760:
761: Parameters:
762:
763: =item I<$user_domain> - the loncapa domain of the user.
764:
765: =item I<$username> - the unique username (user id) of the user.
766:
767: =back
768:
769: =over 4
770:
771: Returns:
772:
773: =item C<string> - representing the path on the filesystem.
774:
775: =back
776:
777: =cut
778:
779: sub construct_path_to_user_directory ($$)
780: {
781: my ($user_domain,$username) = @_;
782:
783: # Untaint.
784: $user_domain =~ s/\W//g;
785: $username =~ s/\W//g;
786:
787: # Create three levels of sub-directoried filesystem path
788: # based on the first three characters of the username.
789: my $sub_filesystem_path = $username.'__';
790: $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
791:
792: # Use the sub-directoried levels and other variables to generate
793: # the complete filesystem path.
794: my $complete_filesystem_path =
795: join('/',($perlvar{'lonUsersDir'},
796: $user_domain,
797: $sub_filesystem_path,
798: $username));
799:
800: # Return the complete filesystem path.
801: return($complete_filesystem_path);
802: }
803:
804: =pod
805:
806: B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
807:
808: =over 4
809:
810: Parameters:
811:
812: =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
813:
814: =back
815:
816: =over 4
817:
818: Returns:
819:
820: =item C<string> - datetime sql formatted string.
821:
822: =back
823:
824: =cut
1.13 harris41 825:
1.28 harris41 826: sub sql_formatted_time ($)
827: {
828: # Sanitize the time argument and convert to localtime array.
1.13 harris41 829: my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
1.28 harris41 830: localtime(&sanitize_time(shift(@_)));
831:
832: # Convert month from (0..11) to (1..12).
833: $mon += 1;
834:
835: # Make the year compatible with A.D. specification.
836: $year += 1900;
837:
838: # Return a date which is compatible with MySQL's "DATETIME" format.
839: return(join('-',($year,$mon,$mday)).
840: ' '.
841: join(':',($hour,$min,$sec))
842: );
843: }
844:
845:
846: # ==================================== The following two subroutines are needed
847: # for accommodating incorrect time formats inside the metadata.
848:
849: =pod
850:
851: B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
852:
853: =over 4
854:
855: Parameters:
856:
857: =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
858:
859: =back
860:
861: =over 4
862:
863: Returns:
864:
865: =item C<integer> - seconds since epoch.
866:
867: =back
868:
869: =cut
870:
871: sub make_seconds_since_epoch (@)
872: {
873: # Keytable of time metadata.
874: my %time_metadata = @_;
875:
876: # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
877: return(POSIX::mktime(
878: ($time_metadata{'seconds'},
879: $time_metadata{'minutes'},
880: $time_metadata{'hours'},
881: $time_metadata{'day'},
882: $time_metadata{'month'}-1,
883: $time_metadata{'year'}-1900,
884: 0,
885: 0,
886: $time_metadata{'dlsav'})
887: )
888: );
889: }
890:
891: =pod
892:
893: B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
894:
895: Somebody described this subroutine as
896: "retro-fixing of un-backward-compatible time format".
897:
898: What this means, is that a part of this code expects to get UTC seconds
899: since the epoch (beginning of 1970). Yet, some of the .meta files have
900: sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
901: integers (e.g. 1044147435). These time strings do not encode the timezone
902: and, in this sense, can be considered "un-backwards-compatible".
903:
904: =over 4
905:
906: Parameters:
907:
908: =item I<$potentially_badformat_string> - string to "retro-fix".
909:
910: =back
911:
912: =over 4
913:
914: Returns:
915:
916: =item C<integer> - seconds since epoch.
917:
918: =back
919:
920: =cut
921:
922: sub sanitize_time ($)
923: {
924: my $timestamp = shift(@_);
925: # If timestamp is in this unexpected format....
926: if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
927: {
928: # then convert into seconds since epoch (the expected format).
929: $timestamp = &make_seconds_since_epoch(
930: 'year' => $1,
931: 'month' => $2,
932: 'day' => $3,
933: 'hours' => $4,
934: 'minutes' => $5,
935: 'seconds' => $6
936: );
937: }
938: # Otherwise we assume timestamp to be as expected.
939: return($timestamp);
940: }
941:
942: =pod
943:
944: =head1 AUTHOR
945:
946: Written to help the loncapa project.
947:
948: Scott Harrison, sharrison@users.sourceforge.net
949:
950: This is distributed under the same terms as loncapa (i.e. "freeware").
1.24 www 951:
1.28 harris41 952: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>