Annotation of loncom/metadata_database/searchcat.pl, revision 1.29
1.1 harris41 1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
1.16 harris41 4: #
1.29 ! albertel 5: # $Id: searchcat.pl,v 1.28 2003/02/03 05:39:37 harris41 Exp $
1.16 harris41 6: #
7: # Copyright Michigan State University Board of Trustees
8: #
1.29 ! albertel 9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
1.16 harris41 10: #
1.29 ! albertel 11: # LON-CAPA is free software; you can redistribute it and/or modify
1.16 harris41 12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
1.29 ! albertel 16: # LON-CAPA is distributed in the hope that it will be useful,
1.16 harris41 17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
1.29 ! albertel 22: # along with LON-CAPA; if not, write to the Free Software
1.16 harris41 23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
1.29 ! albertel 27: # http://www.lon-capa.org/
1.16 harris41 28: #
29: # YEAR=2001
1.17 harris41 30: # 04/14/2001, 04/16/2001 Scott Harrison
1.16 harris41 31: #
1.17 harris41 32: # YEAR=2002
33: # 05/11/2002 Scott Harrison
1.16 harris41 34: #
1.28 harris41 35: # YEAR=2003
36: # Scott Harrison
37: #
1.16 harris41 38: ###
1.1 harris41 39:
1.28 harris41 40: =pod
41:
42: =head1 NAME
43:
44: B<searchcat.pl> - put authoritative filesystem data into sql database.
45:
46: =head1 SYNOPSIS
47:
48: Ordinarily this script is to be called from a loncapa cron job
49: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
50: filesystem installation location: F</etc/cron.d/loncapa>).
51:
52: Here is the cron job entry.
53:
54: C<# Repopulate and refresh the metadata database used for the search catalog.>
55:
56: C<10 1 * * 7 www /home/httpd/perl/searchcat.pl>
57:
58: This script only allows itself to be run as the user C<www>.
59:
60: =head1 DESCRIPTION
61:
62: This script goes through a loncapa resource directory and gathers metadata.
63: The metadata is entered into a SQL database.
64:
65: This script also does general database maintenance such as reformatting
66: the C<loncapa:metadata> table if it is deprecated.
67:
68: This script also builds dynamic temporal metadata and stores this inside
69: a F<nohist_resevaldata.db> database file.
70:
71: This script is playing an increasingly important role for a loncapa
72: library server. The proper operation of this script is critical for a smooth
73: and correct user experience.
74:
75: =cut
76:
77: # ========================================================== Setting things up.
78:
79: # ------------------------------------------------------ Use external modules.
1.1 harris41 80:
1.17 harris41 81: use lib '/home/httpd/lib/perl/';
82: use LONCAPA::Configuration;
83:
1.1 harris41 84: use IO::File;
85: use HTML::TokeParser;
1.6 harris41 86: use DBI;
1.21 www 87: use GDBM_File;
1.24 www 88: use POSIX qw(strftime mktime);
1.1 harris41 89:
1.28 harris41 90: # ----------------- Code to enable 'find' subroutine listing of the .meta files
91: use File::Find;
92:
93: # List of .meta files (used on a per-user basis).
1.1 harris41 94: my @metalist;
1.21 www 95:
1.28 harris41 96: # --------------- Read loncapa_apache.conf and loncapa.conf and get variables.
97: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
98: my %perlvar = %{$perlvarref};
99: undef($perlvarref); # Remove since sensitive and not needed.
100: delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
101:
102: # ------------------------------------- Only run if machine is a library server
103: if ($perlvar{'lonRole'} ne 'library')
104: {
105: exit(0);
106: }
107:
108: # ------------------------------ Make sure this process is running as user=www.
109: my $wwwid = getpwnam('www');
110: if ($wwwid != $<)
111: {
112: $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
113: $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
114: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
115: "mailto $emailto -s '$subj' > /dev/null");
116: exit(1);
117: }
118:
119: # ------------------------------------------------------ Initialize log output.
120: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
121: print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
122:
123: my $dbh; # Database object reference handle.
124:
125: # ----------------------------- Verify connection to loncapa:metadata database.
126: unless (
127: $dbh = DBI->connect('DBI:mysql:loncapa','www',
128: $perlvar{'lonSqlAccess'},
129: { RaiseError => 0,PrintError => 0})
130: )
131: {
132: print(LOG '**** ERROR **** Cannot connect to database!'."\n");
133: exit(0);
134: }
135:
136: # ------------------------------ Create loncapa:metadata table if non-existent.
137: my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
138: 'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
139: 'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
140: 'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
141: 'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
142: 'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
143: 'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
144: 'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
145: 'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
146: 'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
147: 'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
148:
149: $dbh->do($make_metadata_table); # Generate the table.
150:
151: # ----------------------------- Verify format of the loncapa:metadata database.
152: # (delete and recreate database if necessary).
153:
154: # Make a positive control for verifying table structure.
155: my $make_metadata_table_CONTROL = $make_metadata_table;
156: $make_metadata_table_CONTROL =~
157: s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
158:
159: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
160: $dbh->do($make_metadata_table_CONTROL);
161:
162: my $table_description; # selectall reference to the table description.
163:
164: my $CONTROL_table_string; # What the table description should look like.
165: my $table_string; # What the table description does look like.
166:
167: # Calculate the CONTROL table description (what it should be).
168: $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
169: foreach my $table_row (@{$table_description})
170: {
171: $CONTROL_table_string .= join(',',@{$table_row})."\n";
172: }
173:
174: # Calculate the current table description (what it currently looks like).
175: $table_description = $dbh->selectall_arrayref('describe metadata');
176: foreach my $table_row (@{$table_description})
177: {
178: $table_string .= join(',',@{$table_row})."\n";
179: }
180:
181: if ($table_string ne $CONTROL_table_string)
182: {
183: # Log this incident.
184: print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
185: '.'."\n");
186: # Delete the table.
187: $dbh->do('DROP TABLE IF EXISTS metadata');
188: # Generate the table.
189: $dbh->do($make_metadata_table);
190: }
1.21 www 191:
1.28 harris41 192: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay. Done with control.
1.21 www 193:
1.28 harris41 194: # ----------------------------------------------- Set utilitysemaphore to zero.
195: $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
196:
197: # ========================================================= Main functionality.
198:
199: # - Determine home authors on this server based on resources dir and user tree.
200:
201: # RESOURCES: the resources directory (subdirs correspond to author usernames).
202: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
203: (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
204: and exit(0));
205:
206: # query_home_server_status will look for user home directories on this machine.
207: my @homeusers =
208: grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
209: $perlvar{'lonDefDomain'}.'/'.$_)
210: } grep {!/^\.\.?$/} readdir(RESOURCES);
211: closedir(RESOURCES);
212:
213: unless (@homeusers)
214: {
215: print(LOG '=== No home users found on this server.'."\n");
216: }
217:
218: # Consider each author individually.
219: foreach my $user (@homeusers)
220: {
221: # Make a log entry.
222: print(LOG "\n".'=== User: '.$user."\n\n");
223:
224: # Get filesystem path to this user's directory.
225: my $user_directory =
226: &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
227:
228: # Remove left-over db-files from a potentially crashed searchcat run.
229: unlink($user_directory.'/nohist_new_resevaldata.db');
230:
231: # Cleanup the metalist array.
232: undef(@metalist);
233: @metalist = ();
234:
235: # This will add entries to the @metalist array.
236: &File::Find::find(\&wanted,
237: $perlvar{'lonDocRoot'}.'/res/'.
238: $perlvar{'lonDefDomain'}.'/'.$user);
239:
240: # -- process file to get metadata and put into search catalog SQL database
241: # Also, build and store dynamic metadata.
242: # Also, delete record entries before refreshing.
243: foreach my $m (@metalist)
244: {
245: # Log this action.
246: print(LOG "- ".$m."\n");
247:
248: # Get metadata from the file.
249: my $ref = get_metadata_from_file($m);
250:
251: # Make a datarecord identifier for this resource.
252: my $m2 = '/res/'.declutter($m);
253: $m2 =~ s/\.meta$//;
254:
255: # Build and store dynamic metadata inside nohist_resevaldata.db.
256: build_on_the_fly_dynamic_metadata($m2);
257:
258: # Delete record if it already exists.
259: my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
260: my $sth = $dbh->prepare($q2);
261: $sth->execute();
262: my $r1 = $sth->fetchall_arrayref;
263: if (@$r1)
264: {
265: $sth =
266: $dbh->prepare('delete from metadata where url like binary '.
267: "'".$m2."'");
268: $sth->execute();
269: }
270:
271: # Add new/replacement record into the loncapa:metadata table.
272: $sth = $dbh->prepare('insert into metadata values ('.
273: '"'.delete($ref->{'title'}).'"'.','.
274: '"'.delete($ref->{'author'}).'"'.','.
275: '"'.delete($ref->{'subject'}).'"'.','.
276: '"'.$m2.'"'.','.
277: '"'.delete($ref->{'keywords'}).'"'.','.
278: '"'.'current'.'"'.','.
279: '"'.delete($ref->{'notes'}).'"'.','.
280: '"'.delete($ref->{'abstract'}).'"'.','.
281: '"'.delete($ref->{'mime'}).'"'.','.
282: '"'.delete($ref->{'language'}).'"'.','.
283: '"'.sql_formatted_time(
284: delete($ref->{'creationdate'})).'"'.','.
285: '"'.sql_formatted_time(
286: delete($ref->{'lastrevisiondate'})).'"'.','.
287: '"'.delete($ref->{'owner'}).'"'.','.
288: '"'.delete($ref->{'copyright'}).'"'.','.
289: '1'.')');
290: $sth->execute();
291: }
292:
293: # ----------------------- Clean up database, remove stale SQL database records.
294: $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
295:
296: # -------------------------------------------------- Copy over the new db-files
297: system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
298: $user_directory.'/nohist_resevaldata.db');
299: }
300:
301: # --------------------------------------------------- Close database connection
302: $dbh->disconnect;
303: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
304: close(LOG);
305: exit(0);
306:
307: # ================================================================ Subroutines.
308:
309: =pod
310:
311: =head1 SUBROUTINES
312:
313: =cut
314:
315: =pod
316:
317: B<unescape> - translate to unstrange escaped syntax to strange characters.
318:
319: =over 4
320:
321: Parameters:
322:
323: =item I<$str> - string with unweird characters.
324:
325: =back
326:
327: =over 4
328:
329: Returns:
330:
331: =item C<string> - string with potentially weird characters.
332:
333: =back
334:
335: =cut
336:
337: sub unescape ($)
338: {
339: my $str = shift(@_);
1.21 www 340: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
1.28 harris41 341: return($str);
342: }
343:
344: =pod
345:
346: B<escape> - translate strange characters to unstrange escaped syntax.
347:
348: =over 4
349:
350: Parameters:
1.21 www 351:
1.28 harris41 352: =item I<$str> - string with potentially weird characters to unweird-ify.
1.22 www 353:
1.28 harris41 354: =back
355:
356: =over 4
357:
358: Returns:
359:
360: =item C<string> - unweird-ified string.
361:
362: =back
363:
364: =cut
365:
366: sub escape ($)
367: {
368: my $str = shift(@_);
1.22 www 369: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
1.28 harris41 370: return($str);
371: }
372:
373: =pod
374:
375: B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
376:
377: Dynamic metadata is stored in a nohist_resevaldata GDBM database.
378: The only thing that this subroutine really makes happen is adjusting
379: a 'count' value inside the F<nohist_new_resevaldata.db> as well
380: as updating F<nohist_new_resevaldata.db> with information from
381: F<nohist_resevaldata.db>.
382:
1.29 ! albertel 383: It may need optmization, but since it gets called once a week. . .
1.28 harris41 384: =over 4
385:
386: Parameters:
387:
388: =item I<$url> - the filesystem path (url may be a misnomer...)
389:
390: =back
391:
392: =over 4
393:
394: Returns:
1.22 www 395:
1.28 harris41 396: =item C<hash> - key-value table of dynamically evaluated metadata.
1.21 www 397:
1.28 harris41 398: =back
1.21 www 399:
1.28 harris41 400: =cut
1.25 www 401:
1.28 harris41 402: sub build_on_the_fly_dynamic_metadata ($)
403: {
1.29 ! albertel 404: # some elements in here maybe non-obvious
! 405:
! 406: # Need to compute the user's directory.
1.28 harris41 407: my $url = &declutter(shift(@_));
408: $url =~ s/\.meta$//;
409: my %returnhash = ();
410: my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
411: my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
412:
413: # Attempt a GDBM database instantiation inside users directory and proceed.
1.25 www 414: if ((tie(%evaldata,'GDBM_File',
1.28 harris41 415: $user_directory.
416: '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
1.25 www 417: (tie(%newevaldata,'GDBM_File',
1.28 harris41 418: $user_directory.
419: '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
420: {
421: # For different variables, track the running sum and counts.
422: my %sum = ();
423: my %cnt = ();
424:
425: # Define computed items as a sum (add) or an average (avg) or a raw
426: # count (cnt) or 'app'?
427: my %listitems=('count' => 'add',
428: 'course' => 'add',
429: 'avetries' => 'avg',
430: 'stdno' => 'add',
431: 'difficulty' => 'avg',
432: 'clear' => 'avg',
433: 'technical' => 'avg',
434: 'helpful' => 'avg',
435: 'correct' => 'avg',
436: 'depth' => 'avg',
437: 'comments' => 'app',
438: 'usage' => 'cnt'
439: );
440:
441: # Untaint the url and use as part of a regular expression.
442: my $regexp = $url;
443: $regexp =~ s/(\W)/\\$1/g;
1.29 ! albertel 444: $regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs
1.28 harris41 445:
446: # Check existing nohist database for this url.
1.29 ! albertel 447: # this is modfying the 'count' entries
! 448: # and copying all othe entries over
1.28 harris41 449: foreach (keys %evaldata)
450: {
451: my $key = &unescape($_);
452: if ($key =~ /$regexp/) # If url-based entry exists.
453: {
454: my $ctype = $1; # Set to specific category type.
455:
456: # Do an increment for this category type.
457: if (defined($cnt{$ctype}))
458: {
459: $cnt{$ctype}++;
460: }
461: else
462: {
463: $cnt{$ctype} = 1;
464: }
465: unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
466: {
467: # Increment the sum based on the evaluated data in the db.
468: if (defined($sum{$ctype}))
469: {
470: $sum{$ctype} += $evaldata{$_};
471: }
472: else
473: {
474: $sum{$ctype} = $evaldata{$_};
475: }
476: }
477: else # 'app' mode, means to use '<hr />' as a separator
478: {
479: if (defined($sum{$ctype}))
480: {
481: if ($evaldata{$_})
482: {
483: $sum{$ctype} .= '<hr />'.$evaldata{$_};
484: }
485: }
486: else
487: {
488: $sum{$ctype} = ''.$evaldata{$_};
489: }
490: }
491: if ($ctype ne 'count')
492: {
1.29 ! albertel 493: # this is copying all data except 'count' attributes
1.28 harris41 494: $newevaldata{$_} = $evaldata{$_};
495: }
496: }
497: }
498:
1.29 ! albertel 499: # the only other time this loop is useful is for the 'count' hash
! 500: # element
1.28 harris41 501: foreach (keys %cnt)
502: {
503: if ($listitems{$_} eq 'avg')
504: {
505: $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
506: }
507: elsif ($listitems{$_} eq 'cnt')
508: {
509: $returnhash{$_} = $cnt{$_};
510: }
511: else
512: {
513: $returnhash{$_} = $sum{$_};
514: }
515: }
516:
1.29 ! albertel 517: # seems to be doing something useful
1.28 harris41 518: if ($returnhash{'count'})
519: {
520: my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
521: $newevaldata{$newkey} = $returnhash{'count'};
522: }
523:
524: untie(%evaldata); # Close/release the original nohist database.
525: untie(%newevaldata); # Close/release the new nohist database.
1.22 www 526: }
1.28 harris41 527: return(%returnhash);
528: }
529:
530: =pod
531:
532: B<wanted> - used by B<File::Find::find> subroutine.
533:
534: This evaluates whether a file is wanted, and pushes it onto the
535: I<@metalist> array. This subroutine was, for the most part, auto-generated
536: by the B<find2perl> command.
537:
538: =over 4
539:
540: Parameters:
541:
542: =item I<$file> - a path to the file.
543:
544: =back
545:
546: =over 4
547:
548: Returns:
549:
550: =item C<boolean> - true or false based on logical statement.
551:
552: =back
553:
554: =cut
555:
556: sub wanted ($)
557: {
1.1 harris41 558: (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
1.28 harris41 559: -f $_ &&
1.10 harris41 560: /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
1.28 harris41 561: push(@metalist,$File::Find::dir.'/'.$_);
562: }
563:
564: =pod
565:
566: B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
1.1 harris41 567:
1.28 harris41 568: I<Note that this is significantly altered from a subroutine present in lonnet.>
1.15 harris41 569:
1.28 harris41 570: =over 4
1.1 harris41 571:
1.28 harris41 572: Parameters:
1.27 www 573:
1.28 harris41 574: =item I<$file> - a path.to the file.
1.27 www 575:
1.28 harris41 576: =back
1.27 www 577:
1.28 harris41 578: =over 4
1.25 www 579:
1.28 harris41 580: Returns:
1.1 harris41 581:
1.28 harris41 582: =item C<hash reference> - a hash array (keys and values).
1.1 harris41 583:
1.28 harris41 584: =back
1.25 www 585:
1.28 harris41 586: =cut
1.1 harris41 587:
1.28 harris41 588: sub get_metadata_from_file ($)
589: {
590: my ($filename) = @_;
591: my %metatable; # Used to store return value of hash-tabled metadata.
592: $filename = &declutter($filename); # Remove non-identifying filesystem info
593: my $uri = ''; # The URI is not relevant in this scenario.
594: unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
595: {
596: $filename .= '.meta'; # Append a .meta suffix.
597: }
598: # Get the file contents.
599: my $metadata_string =
600: &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
601:
602: # Parse the file based on its XML tags.
603: my $parser = HTML::TokeParser->new(\$metadata_string);
604: my $token;
605: while ($token = $parser->get_token) # Loop through tokens.
606: {
607: if ($token->[0] eq 'S') # If it is a start token.
608: {
609: my $entry = $token->[1];
610: my $unikey = $entry; # A unique identifier for this xml tag key.
611: if (defined($token->[2]->{'part'}))
612: {
613: $unikey .= '_'.$token->[2]->{'part'};
614: }
615: if (defined($token->[2]->{'name'}))
616: {
617: $unikey .= '_'.$token->[2]->{'name'};
618: }
619: # Append $unikey to metatable's keys entry.
620: if ($metatable{$uri.'keys'})
621: {
622: $metatable{$uri.'keys'} .= ','.$unikey;
1.1 harris41 623: }
1.28 harris41 624: else
625: {
626: $metatable{$uri.'keys'} = $unikey;
1.1 harris41 627: }
1.28 harris41 628: # Insert contents into metatable entry for the unikey.
629: foreach my $t3 (@{$token->[3]})
630: {
631: $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
1.1 harris41 632: }
1.28 harris41 633: # If there was no text contained inside the tags, set = default.
634: unless
635: (
636: $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
637: )
638: {
639: $metatable{$uri.''.$unikey} =
640: $metatable{$uri.''.$unikey.'.default'};
641: }
642: }
643: }
644: # Return with a key-value table of XML tags and their tag contents.
645: return(\%metatable);
646: }
647:
648: =pod
649:
650: B<get_file_contents> - returns either the contents of the file or a -1.
651:
652: =over 4
653:
654: Parameters:
655:
656: =item I<$file> - a complete filesystem path.to the file.
657:
658: =back
659:
660: =over 4
661:
662: Returns:
663:
664: =item C<string> - file contents or a -1.
665:
666: =back
667:
668: =cut
669:
670: sub get_file_contents ($)
671: {
672: my $file = shift(@_);
673:
674: # If file does not exist, then return a -1 value.
675: unless (-e $file)
676: {
677: return(-1);
678: }
679:
680: # Read in file contents.
681: my $file_handle = IO::File->new($file);
682: my $file_contents = '';
683: while (<$file_handle>)
684: {
685: $file_contents .= $_;
686: }
687:
688: # Return file contents.
689: return($file_contents);
690: }
691:
692: =pod
693:
694: B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
695:
696: =over 4
697:
698: Parameters:
699:
700: =item I<$filesystem_path> - a complete filesystem path.
701:
702: =back
703:
704: =over 4
705:
706: Returns:
707:
708: =item C<string> - remnants of the filesystem path (beginning portion removed).
709:
710: =back
711:
712: =cut
713:
714: sub declutter
715: {
716: my $filesystem_path = shift(@_);
717:
718: # Remove beginning portions of the filesystem path.
719: $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
720: $filesystem_path =~ s!^/!!;
721: $filesystem_path =~ s!^res/!!;
722:
723: # Return what is remaining for the filesystem path.
724: return($filesystem_path);
725: }
726:
727: =pod
728:
729: B<query_home_server_status> - Is this the home server of an author's directory?
730:
731: =over 4
732:
733: Parameters:
734:
735: =item I<$author_filesystem_path> - directory path for a user.
736:
737: =back
738:
739: =over 4
740:
741: Returns:
742:
743: =item C<boolean> - 1 if true; 0 if false.
744:
745: =back
746:
747: =cut
748:
749: sub query_home_server_status ($)
750: {
751: my $author_filesystem_path = shift(@_);
752:
753: # Remove beginning portion of this filesystem path.
754: $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
755:
756: # Construct path to the author's ordinary user directory.
757: my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
758: my $user_directory_path = construct_path_to_user_directory($user_domain,
759: $username);
760:
761: # Return status of whether the user directory path is defined.
762: if (-e $user_directory_path)
763: {
764: return(1); # True.
765: }
766: else
767: {
768: return(0); # False.
769: }
770: }
771:
772: =pod
773:
774: B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
775:
776: =over 4
777:
778: Parameters:
779:
780: =item I<$user_domain> - the loncapa domain of the user.
781:
782: =item I<$username> - the unique username (user id) of the user.
783:
784: =back
785:
786: =over 4
787:
788: Returns:
789:
790: =item C<string> - representing the path on the filesystem.
791:
792: =back
793:
794: =cut
795:
796: sub construct_path_to_user_directory ($$)
797: {
798: my ($user_domain,$username) = @_;
799:
800: # Untaint.
801: $user_domain =~ s/\W//g;
802: $username =~ s/\W//g;
803:
804: # Create three levels of sub-directoried filesystem path
805: # based on the first three characters of the username.
806: my $sub_filesystem_path = $username.'__';
807: $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
808:
809: # Use the sub-directoried levels and other variables to generate
810: # the complete filesystem path.
811: my $complete_filesystem_path =
812: join('/',($perlvar{'lonUsersDir'},
813: $user_domain,
814: $sub_filesystem_path,
815: $username));
816:
817: # Return the complete filesystem path.
818: return($complete_filesystem_path);
819: }
820:
821: =pod
822:
823: B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
824:
825: =over 4
826:
827: Parameters:
828:
829: =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
830:
831: =back
832:
833: =over 4
834:
835: Returns:
836:
837: =item C<string> - datetime sql formatted string.
838:
839: =back
840:
841: =cut
1.13 harris41 842:
1.28 harris41 843: sub sql_formatted_time ($)
844: {
845: # Sanitize the time argument and convert to localtime array.
1.13 harris41 846: my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
1.28 harris41 847: localtime(&sanitize_time(shift(@_)));
848:
849: # Convert month from (0..11) to (1..12).
850: $mon += 1;
851:
852: # Make the year compatible with A.D. specification.
853: $year += 1900;
854:
855: # Return a date which is compatible with MySQL's "DATETIME" format.
856: return(join('-',($year,$mon,$mday)).
857: ' '.
858: join(':',($hour,$min,$sec))
859: );
860: }
861:
862:
863: # ==================================== The following two subroutines are needed
864: # for accommodating incorrect time formats inside the metadata.
865:
866: =pod
867:
868: B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
869:
870: =over 4
871:
872: Parameters:
873:
874: =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
875:
876: =back
877:
878: =over 4
879:
880: Returns:
881:
882: =item C<integer> - seconds since epoch.
883:
884: =back
885:
886: =cut
887:
888: sub make_seconds_since_epoch (@)
889: {
890: # Keytable of time metadata.
891: my %time_metadata = @_;
892:
893: # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
894: return(POSIX::mktime(
895: ($time_metadata{'seconds'},
896: $time_metadata{'minutes'},
897: $time_metadata{'hours'},
898: $time_metadata{'day'},
899: $time_metadata{'month'}-1,
900: $time_metadata{'year'}-1900,
901: 0,
902: 0,
903: $time_metadata{'dlsav'})
904: )
905: );
906: }
907:
908: =pod
909:
910: B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
911:
912: Somebody described this subroutine as
913: "retro-fixing of un-backward-compatible time format".
914:
915: What this means, is that a part of this code expects to get UTC seconds
916: since the epoch (beginning of 1970). Yet, some of the .meta files have
917: sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
918: integers (e.g. 1044147435). These time strings do not encode the timezone
919: and, in this sense, can be considered "un-backwards-compatible".
920:
921: =over 4
922:
923: Parameters:
924:
925: =item I<$potentially_badformat_string> - string to "retro-fix".
926:
927: =back
928:
929: =over 4
930:
931: Returns:
932:
933: =item C<integer> - seconds since epoch.
934:
935: =back
936:
937: =cut
938:
939: sub sanitize_time ($)
940: {
941: my $timestamp = shift(@_);
942: # If timestamp is in this unexpected format....
943: if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
944: {
945: # then convert into seconds since epoch (the expected format).
946: $timestamp = &make_seconds_since_epoch(
947: 'year' => $1,
948: 'month' => $2,
949: 'day' => $3,
950: 'hours' => $4,
951: 'minutes' => $5,
952: 'seconds' => $6
953: );
954: }
955: # Otherwise we assume timestamp to be as expected.
956: return($timestamp);
957: }
958:
959: =pod
960:
961: =head1 AUTHOR
962:
963: Written to help the loncapa project.
964:
965: Scott Harrison, sharrison@users.sourceforge.net
966:
967: This is distributed under the same terms as loncapa (i.e. "freeware").
1.24 www 968:
1.28 harris41 969: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>