Annotation of loncom/metadata_database/searchcat.pl, revision 1.28
1.1 harris41 1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
1.16 harris41 4: #
1.28 ! harris41 5: # $Id: searchcat.pl,v 1.27 2003/01/04 19:23:31 www Exp $
1.16 harris41 6: #
7: # Copyright Michigan State University Board of Trustees
8: #
1.28 ! harris41 9: # This file is part of the LearningOnline Network with a
! 10: # Computer assisted personalized approach (loncapa).
1.16 harris41 11: #
1.28 ! harris41 12: # Loncapa is free software; you can redistribute it and/or modify
1.16 harris41 13: # it under the terms of the GNU General Public License as published by
14: # the Free Software Foundation; either version 2 of the License, or
15: # (at your option) any later version.
16: #
1.28 ! harris41 17: # Loncapa is distributed in the hope that it will be useful,
1.16 harris41 18: # but WITHOUT ANY WARRANTY; without even the implied warranty of
19: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20: # GNU General Public License for more details.
21: #
22: # You should have received a copy of the GNU General Public License
1.28 ! harris41 23: # along with loncapa; if not, write to the Free Software
1.16 harris41 24: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25: #
26: # /home/httpd/html/adm/gpl.txt
27: #
1.28 ! harris41 28: # http://www.loncapa.org/
1.16 harris41 29: #
30: # YEAR=2001
1.17 harris41 31: # 04/14/2001, 04/16/2001 Scott Harrison
1.16 harris41 32: #
1.17 harris41 33: # YEAR=2002
34: # 05/11/2002 Scott Harrison
1.16 harris41 35: #
1.28 ! harris41 36: # YEAR=2003
! 37: # Scott Harrison
! 38: #
1.16 harris41 39: ###
1.1 harris41 40:
1.28 ! harris41 41: =pod
! 42:
! 43: =head1 NAME
! 44:
! 45: B<searchcat.pl> - put authoritative filesystem data into sql database.
! 46:
! 47: =head1 SYNOPSIS
! 48:
! 49: Ordinarily this script is to be called from a loncapa cron job
! 50: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
! 51: filesystem installation location: F</etc/cron.d/loncapa>).
! 52:
! 53: Here is the cron job entry.
! 54:
! 55: C<# Repopulate and refresh the metadata database used for the search catalog.>
! 56:
! 57: C<10 1 * * 7 www /home/httpd/perl/searchcat.pl>
! 58:
! 59: This script only allows itself to be run as the user C<www>.
! 60:
! 61: =head1 DESCRIPTION
! 62:
! 63: This script goes through a loncapa resource directory and gathers metadata.
! 64: The metadata is entered into a SQL database.
! 65:
! 66: This script also does general database maintenance such as reformatting
! 67: the C<loncapa:metadata> table if it is deprecated.
! 68:
! 69: This script also builds dynamic temporal metadata and stores this inside
! 70: a F<nohist_resevaldata.db> database file.
! 71:
! 72: This script is playing an increasingly important role for a loncapa
! 73: library server. The proper operation of this script is critical for a smooth
! 74: and correct user experience.
! 75:
! 76: =cut
! 77:
! 78: # ========================================================== Setting things up.
! 79:
! 80: # ------------------------------------------------------ Use external modules.
1.1 harris41 81:
1.17 harris41 82: use lib '/home/httpd/lib/perl/';
83: use LONCAPA::Configuration;
84:
1.1 harris41 85: use IO::File;
86: use HTML::TokeParser;
1.6 harris41 87: use DBI;
1.21 www 88: use GDBM_File;
1.24 www 89: use POSIX qw(strftime mktime);
1.1 harris41 90:
1.28 ! harris41 91: # ----------------- Code to enable 'find' subroutine listing of the .meta files
! 92: use File::Find;
! 93:
! 94: # List of .meta files (used on a per-user basis).
1.1 harris41 95: my @metalist;
1.21 www 96:
1.28 ! harris41 97: # --------------- Read loncapa_apache.conf and loncapa.conf and get variables.
! 98: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
! 99: my %perlvar = %{$perlvarref};
! 100: undef($perlvarref); # Remove since sensitive and not needed.
! 101: delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
! 102:
! 103: # ------------------------------------- Only run if machine is a library server
! 104: if ($perlvar{'lonRole'} ne 'library')
! 105: {
! 106: exit(0);
! 107: }
! 108:
! 109: # ------------------------------ Make sure this process is running as user=www.
! 110: my $wwwid = getpwnam('www');
! 111: if ($wwwid != $<)
! 112: {
! 113: $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
! 114: $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
! 115: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
! 116: "mailto $emailto -s '$subj' > /dev/null");
! 117: exit(1);
! 118: }
! 119:
! 120: # ------------------------------------------------------ Initialize log output.
! 121: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
! 122: print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
! 123:
! 124: my $dbh; # Database object reference handle.
! 125:
! 126: # ----------------------------- Verify connection to loncapa:metadata database.
! 127: unless (
! 128: $dbh = DBI->connect('DBI:mysql:loncapa','www',
! 129: $perlvar{'lonSqlAccess'},
! 130: { RaiseError => 0,PrintError => 0})
! 131: )
! 132: {
! 133: print(LOG '**** ERROR **** Cannot connect to database!'."\n");
! 134: exit(0);
! 135: }
! 136:
! 137: # ------------------------------ Create loncapa:metadata table if non-existent.
! 138: my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
! 139: 'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
! 140: 'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
! 141: 'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
! 142: 'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
! 143: 'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
! 144: 'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
! 145: 'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
! 146: 'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
! 147: 'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
! 148: 'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
! 149:
! 150: $dbh->do($make_metadata_table); # Generate the table.
! 151:
! 152: # ----------------------------- Verify format of the loncapa:metadata database.
! 153: # (delete and recreate database if necessary).
! 154:
! 155: # Make a positive control for verifying table structure.
! 156: my $make_metadata_table_CONTROL = $make_metadata_table;
! 157: $make_metadata_table_CONTROL =~
! 158: s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
! 159:
! 160: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
! 161: $dbh->do($make_metadata_table_CONTROL);
! 162:
! 163: my $table_description; # selectall reference to the table description.
! 164:
! 165: my $CONTROL_table_string; # What the table description should look like.
! 166: my $table_string; # What the table description does look like.
! 167:
! 168: # Calculate the CONTROL table description (what it should be).
! 169: $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
! 170: foreach my $table_row (@{$table_description})
! 171: {
! 172: $CONTROL_table_string .= join(',',@{$table_row})."\n";
! 173: }
! 174:
! 175: # Calculate the current table description (what it currently looks like).
! 176: $table_description = $dbh->selectall_arrayref('describe metadata');
! 177: foreach my $table_row (@{$table_description})
! 178: {
! 179: $table_string .= join(',',@{$table_row})."\n";
! 180: }
! 181:
! 182: if ($table_string ne $CONTROL_table_string)
! 183: {
! 184: # Log this incident.
! 185: print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
! 186: '.'."\n");
! 187: # Delete the table.
! 188: $dbh->do('DROP TABLE IF EXISTS metadata');
! 189: # Generate the table.
! 190: $dbh->do($make_metadata_table);
! 191: }
1.21 www 192:
1.28 ! harris41 193: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay. Done with control.
1.21 www 194:
1.28 ! harris41 195: # ----------------------------------------------- Set utilitysemaphore to zero.
! 196: $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
! 197:
! 198: # ========================================================= Main functionality.
! 199:
! 200: # - Determine home authors on this server based on resources dir and user tree.
! 201:
! 202: # RESOURCES: the resources directory (subdirs correspond to author usernames).
! 203: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
! 204: (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
! 205: and exit(0));
! 206:
! 207: # query_home_server_status will look for user home directories on this machine.
! 208: my @homeusers =
! 209: grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
! 210: $perlvar{'lonDefDomain'}.'/'.$_)
! 211: } grep {!/^\.\.?$/} readdir(RESOURCES);
! 212: closedir(RESOURCES);
! 213:
! 214: unless (@homeusers)
! 215: {
! 216: print(LOG '=== No home users found on this server.'."\n");
! 217: }
! 218:
! 219: # Consider each author individually.
! 220: foreach my $user (@homeusers)
! 221: {
! 222: # Make a log entry.
! 223: print(LOG "\n".'=== User: '.$user."\n\n");
! 224:
! 225: # Get filesystem path to this user's directory.
! 226: my $user_directory =
! 227: &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
! 228:
! 229: # Remove left-over db-files from a potentially crashed searchcat run.
! 230: unlink($user_directory.'/nohist_new_resevaldata.db');
! 231:
! 232: # Cleanup the metalist array.
! 233: undef(@metalist);
! 234: @metalist = ();
! 235:
! 236: # This will add entries to the @metalist array.
! 237: &File::Find::find(\&wanted,
! 238: $perlvar{'lonDocRoot'}.'/res/'.
! 239: $perlvar{'lonDefDomain'}.'/'.$user);
! 240:
! 241: # -- process file to get metadata and put into search catalog SQL database
! 242: # Also, build and store dynamic metadata.
! 243: # Also, delete record entries before refreshing.
! 244: foreach my $m (@metalist)
! 245: {
! 246: # Log this action.
! 247: print(LOG "- ".$m."\n");
! 248:
! 249: # Get metadata from the file.
! 250: my $ref = get_metadata_from_file($m);
! 251:
! 252: # Make a datarecord identifier for this resource.
! 253: my $m2 = '/res/'.declutter($m);
! 254: $m2 =~ s/\.meta$//;
! 255:
! 256: # Build and store dynamic metadata inside nohist_resevaldata.db.
! 257: build_on_the_fly_dynamic_metadata($m2);
! 258:
! 259: # Delete record if it already exists.
! 260: my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
! 261: my $sth = $dbh->prepare($q2);
! 262: $sth->execute();
! 263: my $r1 = $sth->fetchall_arrayref;
! 264: if (@$r1)
! 265: {
! 266: $sth =
! 267: $dbh->prepare('delete from metadata where url like binary '.
! 268: "'".$m2."'");
! 269: $sth->execute();
! 270: }
! 271:
! 272: # Add new/replacement record into the loncapa:metadata table.
! 273: $sth = $dbh->prepare('insert into metadata values ('.
! 274: '"'.delete($ref->{'title'}).'"'.','.
! 275: '"'.delete($ref->{'author'}).'"'.','.
! 276: '"'.delete($ref->{'subject'}).'"'.','.
! 277: '"'.$m2.'"'.','.
! 278: '"'.delete($ref->{'keywords'}).'"'.','.
! 279: '"'.'current'.'"'.','.
! 280: '"'.delete($ref->{'notes'}).'"'.','.
! 281: '"'.delete($ref->{'abstract'}).'"'.','.
! 282: '"'.delete($ref->{'mime'}).'"'.','.
! 283: '"'.delete($ref->{'language'}).'"'.','.
! 284: '"'.sql_formatted_time(
! 285: delete($ref->{'creationdate'})).'"'.','.
! 286: '"'.sql_formatted_time(
! 287: delete($ref->{'lastrevisiondate'})).'"'.','.
! 288: '"'.delete($ref->{'owner'}).'"'.','.
! 289: '"'.delete($ref->{'copyright'}).'"'.','.
! 290: '1'.')');
! 291: $sth->execute();
! 292: }
! 293:
! 294: # ----------------------- Clean up database, remove stale SQL database records.
! 295: $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
! 296:
! 297: # -------------------------------------------------- Copy over the new db-files
! 298: system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
! 299: $user_directory.'/nohist_resevaldata.db');
! 300: }
! 301:
! 302: # --------------------------------------------------- Close database connection
! 303: $dbh->disconnect;
! 304: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
! 305: close(LOG);
! 306: exit(0);
! 307:
! 308: # ================================================================ Subroutines.
! 309:
! 310: =pod
! 311:
! 312: =head1 SUBROUTINES
! 313:
! 314: =cut
! 315:
! 316: =pod
! 317:
! 318: B<unescape> - translate to unstrange escaped syntax to strange characters.
! 319:
! 320: =over 4
! 321:
! 322: Parameters:
! 323:
! 324: =item I<$str> - string with unweird characters.
! 325:
! 326: =back
! 327:
! 328: =over 4
! 329:
! 330: Returns:
! 331:
! 332: =item C<string> - string with potentially weird characters.
! 333:
! 334: =back
! 335:
! 336: =cut
! 337:
! 338: sub unescape ($)
! 339: {
! 340: my $str = shift(@_);
1.21 www 341: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
1.28 ! harris41 342: return($str);
! 343: }
! 344:
! 345: =pod
! 346:
! 347: B<escape> - translate strange characters to unstrange escaped syntax.
! 348:
! 349: =over 4
! 350:
! 351: Parameters:
1.21 www 352:
1.28 ! harris41 353: =item I<$str> - string with potentially weird characters to unweird-ify.
1.22 www 354:
1.28 ! harris41 355: =back
! 356:
! 357: =over 4
! 358:
! 359: Returns:
! 360:
! 361: =item C<string> - unweird-ified string.
! 362:
! 363: =back
! 364:
! 365: =cut
! 366:
! 367: sub escape ($)
! 368: {
! 369: my $str = shift(@_);
1.22 www 370: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
1.28 ! harris41 371: return($str);
! 372: }
! 373:
! 374: =pod
! 375:
! 376: B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
! 377:
! 378: Dynamic metadata is stored in a nohist_resevaldata GDBM database.
! 379: Most of the calculations in this subroutine are totally pointless
! 380: and not useful for anything that this subroutine does.
! 381: (THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.)
! 382: The only thing that this subroutine really makes happen is adjusting
! 383: a 'count' value inside the F<nohist_new_resevaldata.db> as well
! 384: as updating F<nohist_new_resevaldata.db> with information from
! 385: F<nohist_resevaldata.db>.
! 386:
! 387: =over 4
! 388:
! 389: Parameters:
! 390:
! 391: =item I<$url> - the filesystem path (url may be a misnomer...)
! 392:
! 393: =back
! 394:
! 395: =over 4
! 396:
! 397: Returns:
1.22 www 398:
1.28 ! harris41 399: =item C<hash> - key-value table of dynamically evaluated metadata.
1.21 www 400:
1.28 ! harris41 401: =back
1.21 www 402:
1.28 ! harris41 403: =cut
1.25 www 404:
1.28 ! harris41 405: sub build_on_the_fly_dynamic_metadata ($)
! 406: {
! 407: # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE.
! 408:
! 409: # Do all sorts of mumbo-jumbo to compute the user's directory.
! 410: my $url = &declutter(shift(@_));
! 411: $url =~ s/\.meta$//;
! 412: my %returnhash = ();
! 413: my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
! 414: my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
! 415:
! 416: # Attempt a GDBM database instantiation inside users directory and proceed.
1.25 www 417: if ((tie(%evaldata,'GDBM_File',
1.28 ! harris41 418: $user_directory.
! 419: '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
1.25 www 420: (tie(%newevaldata,'GDBM_File',
1.28 ! harris41 421: $user_directory.
! 422: '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
! 423: {
! 424: # For different variables, track the running sum and counts.
! 425: my %sum = ();
! 426: my %cnt = ();
! 427:
! 428: # Define computed items as a sum (add) or an average (avg) or a raw
! 429: # count (cnt) or 'app'?
! 430: my %listitems=('count' => 'add',
! 431: 'course' => 'add',
! 432: 'avetries' => 'avg',
! 433: 'stdno' => 'add',
! 434: 'difficulty' => 'avg',
! 435: 'clear' => 'avg',
! 436: 'technical' => 'avg',
! 437: 'helpful' => 'avg',
! 438: 'correct' => 'avg',
! 439: 'depth' => 'avg',
! 440: 'comments' => 'app',
! 441: 'usage' => 'cnt'
! 442: );
! 443:
! 444: # Untaint the url and use as part of a regular expression.
! 445: my $regexp = $url;
! 446: $regexp =~ s/(\W)/\\$1/g;
! 447: $regexp = '___'.$regexp.'___([a-z]+)$';
! 448:
! 449: # Check existing nohist database for this url.
! 450: # THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE
! 451: # IS FOR 'count' ENTRIES
! 452: # AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE.
! 453: foreach (keys %evaldata)
! 454: {
! 455: my $key = &unescape($_);
! 456: if ($key =~ /$regexp/) # If url-based entry exists.
! 457: {
! 458: my $ctype = $1; # Set to specific category type.
! 459:
! 460: # Do an increment for this category type.
! 461: if (defined($cnt{$ctype}))
! 462: {
! 463: $cnt{$ctype}++;
! 464: }
! 465: else
! 466: {
! 467: $cnt{$ctype} = 1;
! 468: }
! 469: unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
! 470: {
! 471: # Increment the sum based on the evaluated data in the db.
! 472: if (defined($sum{$ctype}))
! 473: {
! 474: $sum{$ctype} += $evaldata{$_};
! 475: }
! 476: else
! 477: {
! 478: $sum{$ctype} = $evaldata{$_};
! 479: }
! 480: }
! 481: else # 'app' mode, means to use '<hr />' as a separator
! 482: {
! 483: if (defined($sum{$ctype}))
! 484: {
! 485: if ($evaldata{$_})
! 486: {
! 487: $sum{$ctype} .= '<hr />'.$evaldata{$_};
! 488: }
! 489: }
! 490: else
! 491: {
! 492: $sum{$ctype} = ''.$evaldata{$_};
! 493: }
! 494: }
! 495: if ($ctype ne 'count')
! 496: {
! 497: # ALERT! THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING
! 498: # USEFUL!
! 499: $newevaldata{$_} = $evaldata{$_};
! 500: }
! 501: }
! 502: }
! 503:
! 504: # THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH
! 505: # ELEMENT.
! 506: foreach (keys %cnt)
! 507: {
! 508: if ($listitems{$_} eq 'avg')
! 509: {
! 510: $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
! 511: }
! 512: elsif ($listitems{$_} eq 'cnt')
! 513: {
! 514: $returnhash{$_} = $cnt{$_};
! 515: }
! 516: else
! 517: {
! 518: $returnhash{$_} = $sum{$_};
! 519: }
! 520: }
! 521:
! 522: # A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS
! 523: # BLEEPING SUBROUTINE.
! 524: if ($returnhash{'count'})
! 525: {
! 526: my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
! 527: $newevaldata{$newkey} = $returnhash{'count'};
! 528: }
! 529:
! 530: untie(%evaldata); # Close/release the original nohist database.
! 531: untie(%newevaldata); # Close/release the new nohist database.
1.22 www 532: }
1.28 ! harris41 533: return(%returnhash);
! 534: # Celebrate! We have now accomplished some simple calculations using
! 535: # 1000% bloated functionality in our subroutine. Go wash your eyeballs
! 536: # out now.
! 537: }
! 538:
! 539: =pod
! 540:
! 541: B<wanted> - used by B<File::Find::find> subroutine.
! 542:
! 543: This evaluates whether a file is wanted, and pushes it onto the
! 544: I<@metalist> array. This subroutine was, for the most part, auto-generated
! 545: by the B<find2perl> command.
! 546:
! 547: =over 4
! 548:
! 549: Parameters:
! 550:
! 551: =item I<$file> - a path to the file.
! 552:
! 553: =back
! 554:
! 555: =over 4
! 556:
! 557: Returns:
! 558:
! 559: =item C<boolean> - true or false based on logical statement.
! 560:
! 561: =back
! 562:
! 563: =cut
! 564:
! 565: sub wanted ($)
! 566: {
1.1 harris41 567: (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
1.28 ! harris41 568: -f $_ &&
1.10 harris41 569: /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
1.28 ! harris41 570: push(@metalist,$File::Find::dir.'/'.$_);
! 571: }
! 572:
! 573: =pod
! 574:
! 575: B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
1.1 harris41 576:
1.28 ! harris41 577: I<Note that this is significantly altered from a subroutine present in lonnet.>
1.15 harris41 578:
1.28 ! harris41 579: =over 4
1.1 harris41 580:
1.28 ! harris41 581: Parameters:
1.27 www 582:
1.28 ! harris41 583: =item I<$file> - a path.to the file.
1.27 www 584:
1.28 ! harris41 585: =back
1.27 www 586:
1.28 ! harris41 587: =over 4
1.25 www 588:
1.28 ! harris41 589: Returns:
1.1 harris41 590:
1.28 ! harris41 591: =item C<hash reference> - a hash array (keys and values).
1.1 harris41 592:
1.28 ! harris41 593: =back
1.25 www 594:
1.28 ! harris41 595: =cut
1.1 harris41 596:
1.28 ! harris41 597: sub get_metadata_from_file ($)
! 598: {
! 599: my ($filename) = @_;
! 600: my %metatable; # Used to store return value of hash-tabled metadata.
! 601: $filename = &declutter($filename); # Remove non-identifying filesystem info
! 602: my $uri = ''; # The URI is not relevant in this scenario.
! 603: unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
! 604: {
! 605: $filename .= '.meta'; # Append a .meta suffix.
! 606: }
! 607: # Get the file contents.
! 608: my $metadata_string =
! 609: &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
! 610:
! 611: # Parse the file based on its XML tags.
! 612: my $parser = HTML::TokeParser->new(\$metadata_string);
! 613: my $token;
! 614: while ($token = $parser->get_token) # Loop through tokens.
! 615: {
! 616: if ($token->[0] eq 'S') # If it is a start token.
! 617: {
! 618: my $entry = $token->[1];
! 619: my $unikey = $entry; # A unique identifier for this xml tag key.
! 620: if (defined($token->[2]->{'part'}))
! 621: {
! 622: $unikey .= '_'.$token->[2]->{'part'};
! 623: }
! 624: if (defined($token->[2]->{'name'}))
! 625: {
! 626: $unikey .= '_'.$token->[2]->{'name'};
! 627: }
! 628: # Append $unikey to metatable's keys entry.
! 629: if ($metatable{$uri.'keys'})
! 630: {
! 631: $metatable{$uri.'keys'} .= ','.$unikey;
1.1 harris41 632: }
1.28 ! harris41 633: else
! 634: {
! 635: $metatable{$uri.'keys'} = $unikey;
1.1 harris41 636: }
1.28 ! harris41 637: # Insert contents into metatable entry for the unikey.
! 638: foreach my $t3 (@{$token->[3]})
! 639: {
! 640: $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
1.1 harris41 641: }
1.28 ! harris41 642: # If there was no text contained inside the tags, set = default.
! 643: unless
! 644: (
! 645: $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
! 646: )
! 647: {
! 648: $metatable{$uri.''.$unikey} =
! 649: $metatable{$uri.''.$unikey.'.default'};
! 650: }
! 651: }
! 652: }
! 653: # Return with a key-value table of XML tags and their tag contents.
! 654: return(\%metatable);
! 655: }
! 656:
! 657: =pod
! 658:
! 659: B<get_file_contents> - returns either the contents of the file or a -1.
! 660:
! 661: =over 4
! 662:
! 663: Parameters:
! 664:
! 665: =item I<$file> - a complete filesystem path.to the file.
! 666:
! 667: =back
! 668:
! 669: =over 4
! 670:
! 671: Returns:
! 672:
! 673: =item C<string> - file contents or a -1.
! 674:
! 675: =back
! 676:
! 677: =cut
! 678:
! 679: sub get_file_contents ($)
! 680: {
! 681: my $file = shift(@_);
! 682:
! 683: # If file does not exist, then return a -1 value.
! 684: unless (-e $file)
! 685: {
! 686: return(-1);
! 687: }
! 688:
! 689: # Read in file contents.
! 690: my $file_handle = IO::File->new($file);
! 691: my $file_contents = '';
! 692: while (<$file_handle>)
! 693: {
! 694: $file_contents .= $_;
! 695: }
! 696:
! 697: # Return file contents.
! 698: return($file_contents);
! 699: }
! 700:
! 701: =pod
! 702:
! 703: B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
! 704:
! 705: =over 4
! 706:
! 707: Parameters:
! 708:
! 709: =item I<$filesystem_path> - a complete filesystem path.
! 710:
! 711: =back
! 712:
! 713: =over 4
! 714:
! 715: Returns:
! 716:
! 717: =item C<string> - remnants of the filesystem path (beginning portion removed).
! 718:
! 719: =back
! 720:
! 721: =cut
! 722:
! 723: sub declutter
! 724: {
! 725: my $filesystem_path = shift(@_);
! 726:
! 727: # Remove beginning portions of the filesystem path.
! 728: $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
! 729: $filesystem_path =~ s!^/!!;
! 730: $filesystem_path =~ s!^res/!!;
! 731:
! 732: # Return what is remaining for the filesystem path.
! 733: return($filesystem_path);
! 734: }
! 735:
! 736: =pod
! 737:
! 738: B<query_home_server_status> - Is this the home server of an author's directory?
! 739:
! 740: =over 4
! 741:
! 742: Parameters:
! 743:
! 744: =item I<$author_filesystem_path> - directory path for a user.
! 745:
! 746: =back
! 747:
! 748: =over 4
! 749:
! 750: Returns:
! 751:
! 752: =item C<boolean> - 1 if true; 0 if false.
! 753:
! 754: =back
! 755:
! 756: =cut
! 757:
! 758: sub query_home_server_status ($)
! 759: {
! 760: my $author_filesystem_path = shift(@_);
! 761:
! 762: # Remove beginning portion of this filesystem path.
! 763: $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
! 764:
! 765: # Construct path to the author's ordinary user directory.
! 766: my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
! 767: my $user_directory_path = construct_path_to_user_directory($user_domain,
! 768: $username);
! 769:
! 770: # Return status of whether the user directory path is defined.
! 771: if (-e $user_directory_path)
! 772: {
! 773: return(1); # True.
! 774: }
! 775: else
! 776: {
! 777: return(0); # False.
! 778: }
! 779: }
! 780:
! 781: =pod
! 782:
! 783: B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
! 784:
! 785: =over 4
! 786:
! 787: Parameters:
! 788:
! 789: =item I<$user_domain> - the loncapa domain of the user.
! 790:
! 791: =item I<$username> - the unique username (user id) of the user.
! 792:
! 793: =back
! 794:
! 795: =over 4
! 796:
! 797: Returns:
! 798:
! 799: =item C<string> - representing the path on the filesystem.
! 800:
! 801: =back
! 802:
! 803: =cut
! 804:
! 805: sub construct_path_to_user_directory ($$)
! 806: {
! 807: my ($user_domain,$username) = @_;
! 808:
! 809: # Untaint.
! 810: $user_domain =~ s/\W//g;
! 811: $username =~ s/\W//g;
! 812:
! 813: # Create three levels of sub-directoried filesystem path
! 814: # based on the first three characters of the username.
! 815: my $sub_filesystem_path = $username.'__';
! 816: $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
! 817:
! 818: # Use the sub-directoried levels and other variables to generate
! 819: # the complete filesystem path.
! 820: my $complete_filesystem_path =
! 821: join('/',($perlvar{'lonUsersDir'},
! 822: $user_domain,
! 823: $sub_filesystem_path,
! 824: $username));
! 825:
! 826: # Return the complete filesystem path.
! 827: return($complete_filesystem_path);
! 828: }
! 829:
! 830: =pod
! 831:
! 832: B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
! 833:
! 834: =over 4
! 835:
! 836: Parameters:
! 837:
! 838: =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
! 839:
! 840: =back
! 841:
! 842: =over 4
! 843:
! 844: Returns:
! 845:
! 846: =item C<string> - datetime sql formatted string.
! 847:
! 848: =back
! 849:
! 850: =cut
1.13 harris41 851:
1.28 ! harris41 852: sub sql_formatted_time ($)
! 853: {
! 854: # Sanitize the time argument and convert to localtime array.
1.13 harris41 855: my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
1.28 ! harris41 856: localtime(&sanitize_time(shift(@_)));
! 857:
! 858: # Convert month from (0..11) to (1..12).
! 859: $mon += 1;
! 860:
! 861: # Make the year compatible with A.D. specification.
! 862: $year += 1900;
! 863:
! 864: # Return a date which is compatible with MySQL's "DATETIME" format.
! 865: return(join('-',($year,$mon,$mday)).
! 866: ' '.
! 867: join(':',($hour,$min,$sec))
! 868: );
! 869: }
! 870:
! 871:
! 872: # ==================================== The following two subroutines are needed
! 873: # for accommodating incorrect time formats inside the metadata.
! 874:
! 875: =pod
! 876:
! 877: B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
! 878:
! 879: =over 4
! 880:
! 881: Parameters:
! 882:
! 883: =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
! 884:
! 885: =back
! 886:
! 887: =over 4
! 888:
! 889: Returns:
! 890:
! 891: =item C<integer> - seconds since epoch.
! 892:
! 893: =back
! 894:
! 895: =cut
! 896:
! 897: sub make_seconds_since_epoch (@)
! 898: {
! 899: # Keytable of time metadata.
! 900: my %time_metadata = @_;
! 901:
! 902: # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
! 903: return(POSIX::mktime(
! 904: ($time_metadata{'seconds'},
! 905: $time_metadata{'minutes'},
! 906: $time_metadata{'hours'},
! 907: $time_metadata{'day'},
! 908: $time_metadata{'month'}-1,
! 909: $time_metadata{'year'}-1900,
! 910: 0,
! 911: 0,
! 912: $time_metadata{'dlsav'})
! 913: )
! 914: );
! 915: }
! 916:
! 917: =pod
! 918:
! 919: B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
! 920:
! 921: Somebody described this subroutine as
! 922: "retro-fixing of un-backward-compatible time format".
! 923:
! 924: What this means, is that a part of this code expects to get UTC seconds
! 925: since the epoch (beginning of 1970). Yet, some of the .meta files have
! 926: sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
! 927: integers (e.g. 1044147435). These time strings do not encode the timezone
! 928: and, in this sense, can be considered "un-backwards-compatible".
! 929:
! 930: =over 4
! 931:
! 932: Parameters:
! 933:
! 934: =item I<$potentially_badformat_string> - string to "retro-fix".
! 935:
! 936: =back
! 937:
! 938: =over 4
! 939:
! 940: Returns:
! 941:
! 942: =item C<integer> - seconds since epoch.
! 943:
! 944: =back
! 945:
! 946: =cut
! 947:
! 948: sub sanitize_time ($)
! 949: {
! 950: my $timestamp = shift(@_);
! 951: # If timestamp is in this unexpected format....
! 952: if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
! 953: {
! 954: # then convert into seconds since epoch (the expected format).
! 955: $timestamp = &make_seconds_since_epoch(
! 956: 'year' => $1,
! 957: 'month' => $2,
! 958: 'day' => $3,
! 959: 'hours' => $4,
! 960: 'minutes' => $5,
! 961: 'seconds' => $6
! 962: );
! 963: }
! 964: # Otherwise we assume timestamp to be as expected.
! 965: return($timestamp);
! 966: }
! 967:
! 968: =pod
! 969:
! 970: =head1 AUTHOR
! 971:
! 972: Written to help the loncapa project.
! 973:
! 974: Scott Harrison, sharrison@users.sourceforge.net
! 975:
! 976: This is distributed under the same terms as loncapa (i.e. "freeware").
1.24 www 977:
1.28 ! harris41 978: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>