Annotation of nsdl/harvestsmete/cstc.pl, revision 1.1
1.1 ! www 1: #!/usr/local/bin/perl -w
! 2:
! 3: #
! 4: # cstc.pl - Use OAI MHP to harvest metadata from CSTS in oai_ims format
! 5: #
! 6: # Written by Andy Dong <adong@smete.org> 11/01/2001
! 7: #
! 8:
! 9: use strict;
! 10: use Getopt::Std;
! 11: use Time::HiRes qw(usleep ualarm gettimeofday tv_interval);
! 12:
! 13: use HTTP::Request;
! 14: use LWP::UserAgent;
! 15:
! 16: use XML::Element;
! 17: use XML::Parser;
! 18: use XML::TreeBuilder;
! 19:
! 20: use DBI;
! 21: use DBD::ODBC;
! 22:
! 23: require OAIvocabulary_v2;
! 24: require OAIcataloging_v2;
! 25:
! 26: # -u flag specifies [u]pdate database; otherwise output to STDOUT
! 27:
! 28: my $usage = << "EOT";
! 29: Usage: cstc.pl -u
! 30:
! 31: -u (U)pdate the database
! 32:
! 33: Without -u it simply prints to STDOUT
! 34: EOT
! 35:
! 36: my %args;
! 37: getopts('u', \%args) || die $usage;
! 38:
! 39: my $inserted = 0;
! 40: my $updated = 0;
! 41:
! 42: my $useDatabase = 1 if ($args{'u'});
! 43: my $dbh;
! 44: # Database Configuration
! 45: if ( $useDatabase ) {
! 46: print "Updating the database\n";
! 47: my $DBI_DSN='dbi:ODBC:mel.odbc';
! 48: my $DBI_USER='autocataloger';
! 49: my $DBI_PWD='regolatacotua';
! 50: $dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER\n";;
! 51: }
! 52:
! 53: my $content;
! 54: my @cstc;
! 55:
! 56: # All possible LOM record variables
! 57: my $logeneralDescription = "";
! 58: my $logeneralTitle1 = "";
! 59: my $logeneralCreation_Date = "";
! 60: my $logeneralPub_Year = "";
! 61: my $logeneralPub_Month = "";
! 62: my $logeneralPub_Day = "";
! 63: my $logeneralLanguage1 = "";
! 64: my @logeneralKeywords = ();
! 65: my $publisher = "";
! 66: my $personLastname = "";
! 67: my $personFirstname = "";
! 68: my $personEmail = "";
! 69: my $personCompany = "";
! 70: my $platformVersion = "";
! 71: my $platformType = "";
! 72: my $platformOS = "";
! 73: my @pedagogyEndUserRole = ();
! 74: my $pedagogyLContext = "";
! 75: my $platformFormat = "";
! 76: my $platformLocation_URL = "";
! 77:
! 78: #my $url = 'http://www.cstc.org/cgi-bin/OAI/CSTC.pl?verb=ListRecords&metadataPrefix=ims1_1';
! 79:
! 80: #my $ua = new LWP::UserAgent;
! 81: #my $request = HTTP::Request->new('GET', $url);
! 82: #my $response = $ua->request( $request );
! 83:
! 84: #if ( $response->is_success ) {
! 85: # $content = $response->content;
! 86: #} else {
! 87: # warn 'OAI request failed: ' . $response->message;
! 88: # exit 1;
! 89: #}
! 90:
! 91: my $tree = XML::TreeBuilder->new();
! 92: $tree->parse_file('cstc-111.xml');
! 93:
! 94: my $t0 = [gettimeofday];
! 95:
! 96: my @records = $tree->find_by_tag_name('record');
! 97: foreach my $record (@records){
! 98: # Extract information from <header> tag
! 99: my $header = $record->find_by_tag_name('header');
! 100: next if ! $header;
! 101: $logeneralCreation_Date = $header->find_by_tag_name('datestamp')->as_text;
! 102: ($logeneralPub_Year, $logeneralPub_Month, $logeneralPub_Day) = ($logeneralCreation_Date =~ /^(\d{4})-(\d{2})-(\d{2})$/);
! 103: # Extract information from <general> tag
! 104: my $general = $record->find_by_tag_name('general');
! 105: $logeneralTitle1= $general->find_by_tag_name('title')->as_text;
! 106: $logeneralDescription = $general->find_by_tag_name('description')->as_text;
! 107: $logeneralDescription =~ s/\n/ /g;
! 108: my $keywordsElement = $general->find_by_tag_name('keywords');
! 109: my @keywordsLangstringElement = $keywordsElement->find_by_tag_name('langstring');
! 110: my @logeneralKeywords = ();
! 111: foreach my $kw (@keywordsLangstringElement) {
! 112: my $word = $kw->as_text();
! 113: my ($spacejunk1,$unpaddedword,$spacejunk2) = ($word =~ /^(\s+)(\w+.*)(\s+)$/);
! 114: push(@logeneralKeywords,$unpaddedword);
! 115: }
! 116: # Extract information from <lifecycle> tag
! 117: my $lifecycle = $record->find_by_tag_name('lifecycle');
! 118: $platformVersion = $lifecycle->find_by_tag_name('status')->as_text;
! 119: # Extract information from <metametadata> tag
! 120: my $metametadata = $record->find_by_tag_name('metametadata');
! 121: my $role = $metametadata->find_by_tag_name('role')->as_text;
! 122: # We will only take the Contributor information
! 123: if ( $role eq "Contributor" ) {
! 124: my $entity = $metametadata->find_by_tag_name('centity')->as_text;
! 125: ($personLastname, $personFirstname, $personEmail, $personCompany) = OAIv_parseVcard($entity);
! 126: }
! 127: $logeneralLanguage1 = $metametadata->find_by_tag_name('language')->as_text;
! 128: # Extract information from <technical> tag
! 129: my $technical = $record->find_by_tag_name('technical');
! 130: $platformFormat = $technical->find_by_tag_name('format')->as_text;
! 131: # Convert text/html to format 65
! 132: if ($platformFormat eq 'text/html') {
! 133: $platformFormat = 65;
! 134: } else {
! 135: $platformFormat = 1;
! 136: }
! 137: $platformLocation_URL = $technical->find_by_tag_name('location')->as_text;
! 138: $platformLocation_URL =~ tr/ //d;
! 139: $platformLocation_URL =~ s/^\n(.*)$/$1/;
! 140: chomp($platformLocation_URL);
! 141: # Platform Type is sometimes null and must be mapped to standard platforms
! 142: my @requirementsElement = $technical->find_by_tag_name('requirements');
! 143: foreach my $req ( @requirementsElement ) {
! 144: if ( defined (my $typeElement = $req->find_by_tag_name('type')) ) {
! 145: if ( $req->find_by_tag_name('type')->as_text eq "Platform" ) {
! 146: if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) {
! 147: $platformType = OAIv_findPlatform($req->find_by_tag_name('name')->as_text);
! 148: }
! 149: } elsif ( $req->find_by_tag_name('type')->as_text eq "Operating System" ) {
! 150: if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) {
! 151: $platformOS = $req->find_by_tag_name('name')->as_text;
! 152: }
! 153: }
! 154: }
! 155: }
! 156: # Extract information from <educational> tag
! 157: my $educational = $record->find_by_tag_name('educational');
! 158: my @intendedenduserroleElement = $educational->find_by_tag_name('intendedenduserrole');
! 159: @pedagogyEndUserRole=();
! 160: foreach my $ieur (@intendedenduserroleElement) {
! 161: my $intendedenduserroleLangstringElement = $ieur->find_by_tag_name('langstring');
! 162: push(@pedagogyEndUserRole,$intendedenduserroleLangstringElement->content_list());
! 163: }
! 164: # Use Learner (end_user_type = 2)
! 165: my $pedagogyEndUserType = '2';
! 166: # Learning context must be mapped to grade levels
! 167: my @learningcontextElement = $educational->find_by_tag_name('learningcontext');
! 168: my @learningcontext = ();
! 169: foreach my $lc (@learningcontextElement) {
! 170: push(@learningcontext,$lc->find_by_tag_name('langstring')->as_text);
! 171: }
! 172: $pedagogyLContext = OAIv_findLContext(@learningcontext);
! 173:
! 174: my $difficulty_id = 0;
! 175: my $pedagogy_description = '';
! 176: my $interactivity_level_id = 0;
! 177: my $resource_type_id = 0;
! 178:
! 179: if ( $useDatabase ) {
! 180: # Some specific configuration information for CSTC
! 181: # Logo
! 182: my $image = "http://www.smete.org/images/affiliation/cstc.gif";
! 183: my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong
! 184: my $collection = 'Computer Science Teaching Center';
! 185: my $collection_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}';
! 186: my $publisher = 'Computer Science Teaching Center';
! 187: my $publisher_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}';
! 188: # Determine if this author already exists in the database (person and entity tables)
! 189: my $author_reg_key;
! 190: if ( ! ($author_reg_key = OAIc_personexists($dbh,$personEmail)) ) {
! 191: printf("Inserting person email=%s\n",$personEmail);
! 192: my $success = OAIc_insert_person($dbh,$publisher_reg_key,$submitter_key,$personLastname,$personFirstname,$personEmail,$personCompany);
! 193: $author_reg_key = OAIc_personexists($dbh,$personEmail);
! 194: }
! 195: if ( my $general_key = OAIc_loexists($dbh,$logeneralTitle1) ) {
! 196: my $success = OAIc_update_lo($dbh, $general_key, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key);
! 197: $updated = $updated + 1;
! 198: } else {
! 199: printf("Inserting new record for %s\n",$logeneralTitle1);
! 200: my $success = OAIc_insert_lo($dbh, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key, $collection_reg_key,$difficulty_id, $interactivity_level_id, $pedagogy_description, $resource_type_id);
! 201: $inserted = $inserted + 1;
! 202: }
! 203: } else {
! 204: # Print Results
! 205: printf("Title: %s\tDescription: %s\tKeywords: %s\n", $logeneralTitle1,$logeneralDescription,join(";",@logeneralKeywords));
! 206: printf("Creation Date: %s\tPublication Year: %4d\tPublication Month: %02d\n", $logeneralCreation_Date, $logeneralPub_Year, $logeneralPub_Month);
! 207: printf("Role: %s\n", $role);
! 208: printf("Firstname: %s\tLastname: %s\tEmail: %s\tOrganization: %s\n", $personFirstname, $personLastname, $personEmail, $personCompany);
! 209: printf("Language: %s\n", $logeneralLanguage1);
! 210: printf("Format: %s\tURL: %s\tPlatform: %s\tOS: %s\n", $platformFormat, $platformLocation_URL,$platformType,$platformOS);
! 211: printf("IntendedEndUserRole: %s\tLearningContext: %s\n", join(";",@pedagogyEndUserRole), $pedagogyLContext);
! 212: }
! 213: } # end for loop
! 214: $tree->delete;
! 215:
! 216: if ( $useDatabase ) {
! 217: $dbh->commit;
! 218: $dbh->disconnect();
! 219: }
! 220:
! 221: printf("Inserted %d records and Updated %d records in %f seconds.\n", $inserted, $updated, tv_interval($t0));
! 222:
! 223: exit 0;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>