#!/usr/local/bin/perl -w
#
# cstc.pl - Use OAI MHP to harvest metadata from CSTS in oai_ims format
#
# Written by Andy Dong <adong@smete.org> 11/01/2001
#
use strict;
use Getopt::Std;
use Time::HiRes qw(usleep ualarm gettimeofday tv_interval);
use HTTP::Request;
use LWP::UserAgent;
use XML::Element;
use XML::Parser;
use XML::TreeBuilder;
use DBI;
use DBD::ODBC;
require OAIvocabulary_v2;
require OAIcataloging_v2;
# -u flag specifies [u]pdate database; otherwise output to STDOUT
my $usage = << "EOT";
Usage: cstc.pl -u
-u (U)pdate the database
Without -u it simply prints to STDOUT
EOT
my %args;
getopts('u', \%args) || die $usage;
my $inserted = 0;
my $updated = 0;
my $useDatabase = 1 if ($args{'u'});
my $dbh;
# Database Configuration
if ( $useDatabase ) {
print "Updating the database\n";
my $DBI_DSN='dbi:ODBC:mel.odbc';
my $DBI_USER='autocataloger';
my $DBI_PWD='regolatacotua';
$dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER\n";;
}
my $content;
my @cstc;
# All possible LOM record variables
my $logeneralDescription = "";
my $logeneralTitle1 = "";
my $logeneralCreation_Date = "";
my $logeneralPub_Year = "";
my $logeneralPub_Month = "";
my $logeneralPub_Day = "";
my $logeneralLanguage1 = "";
my @logeneralKeywords = ();
my $publisher = "";
my $personLastname = "";
my $personFirstname = "";
my $personEmail = "";
my $personCompany = "";
my $platformVersion = "";
my $platformType = "";
my $platformOS = "";
my @pedagogyEndUserRole = ();
my $pedagogyLContext = "";
my $platformFormat = "";
my $platformLocation_URL = "";
#my $url = 'http://www.cstc.org/cgi-bin/OAI/CSTC.pl?verb=ListRecords&metadataPrefix=ims1_1';
#my $ua = new LWP::UserAgent;
#my $request = HTTP::Request->new('GET', $url);
#my $response = $ua->request( $request );
#if ( $response->is_success ) {
# $content = $response->content;
#} else {
# warn 'OAI request failed: ' . $response->message;
# exit 1;
#}
my $tree = XML::TreeBuilder->new();
$tree->parse_file('cstc-111.xml');
my $t0 = [gettimeofday];
my @records = $tree->find_by_tag_name('record');
foreach my $record (@records){
# Extract information from <header> tag
my $header = $record->find_by_tag_name('header');
next if ! $header;
$logeneralCreation_Date = $header->find_by_tag_name('datestamp')->as_text;
($logeneralPub_Year, $logeneralPub_Month, $logeneralPub_Day) = ($logeneralCreation_Date =~ /^(\d{4})-(\d{2})-(\d{2})$/);
# Extract information from <general> tag
my $general = $record->find_by_tag_name('general');
$logeneralTitle1= $general->find_by_tag_name('title')->as_text;
$logeneralDescription = $general->find_by_tag_name('description')->as_text;
$logeneralDescription =~ s/\n/ /g;
my $keywordsElement = $general->find_by_tag_name('keywords');
my @keywordsLangstringElement = $keywordsElement->find_by_tag_name('langstring');
my @logeneralKeywords = ();
foreach my $kw (@keywordsLangstringElement) {
my $word = $kw->as_text();
my ($spacejunk1,$unpaddedword,$spacejunk2) = ($word =~ /^(\s+)(\w+.*)(\s+)$/);
push(@logeneralKeywords,$unpaddedword);
}
# Extract information from <lifecycle> tag
my $lifecycle = $record->find_by_tag_name('lifecycle');
$platformVersion = $lifecycle->find_by_tag_name('status')->as_text;
# Extract information from <metametadata> tag
my $metametadata = $record->find_by_tag_name('metametadata');
my $role = $metametadata->find_by_tag_name('role')->as_text;
# We will only take the Contributor information
if ( $role eq "Contributor" ) {
my $entity = $metametadata->find_by_tag_name('centity')->as_text;
($personLastname, $personFirstname, $personEmail, $personCompany) = OAIv_parseVcard($entity);
}
$logeneralLanguage1 = $metametadata->find_by_tag_name('language')->as_text;
# Extract information from <technical> tag
my $technical = $record->find_by_tag_name('technical');
$platformFormat = $technical->find_by_tag_name('format')->as_text;
# Convert text/html to format 65
if ($platformFormat eq 'text/html') {
$platformFormat = 65;
} else {
$platformFormat = 1;
}
$platformLocation_URL = $technical->find_by_tag_name('location')->as_text;
$platformLocation_URL =~ tr/ //d;
$platformLocation_URL =~ s/^\n(.*)$/$1/;
chomp($platformLocation_URL);
# Platform Type is sometimes null and must be mapped to standard platforms
my @requirementsElement = $technical->find_by_tag_name('requirements');
foreach my $req ( @requirementsElement ) {
if ( defined (my $typeElement = $req->find_by_tag_name('type')) ) {
if ( $req->find_by_tag_name('type')->as_text eq "Platform" ) {
if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) {
$platformType = OAIv_findPlatform($req->find_by_tag_name('name')->as_text);
}
} elsif ( $req->find_by_tag_name('type')->as_text eq "Operating System" ) {
if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) {
$platformOS = $req->find_by_tag_name('name')->as_text;
}
}
}
}
# Extract information from <educational> tag
my $educational = $record->find_by_tag_name('educational');
my @intendedenduserroleElement = $educational->find_by_tag_name('intendedenduserrole');
@pedagogyEndUserRole=();
foreach my $ieur (@intendedenduserroleElement) {
my $intendedenduserroleLangstringElement = $ieur->find_by_tag_name('langstring');
push(@pedagogyEndUserRole,$intendedenduserroleLangstringElement->content_list());
}
# Use Learner (end_user_type = 2)
my $pedagogyEndUserType = '2';
# Learning context must be mapped to grade levels
my @learningcontextElement = $educational->find_by_tag_name('learningcontext');
my @learningcontext = ();
foreach my $lc (@learningcontextElement) {
push(@learningcontext,$lc->find_by_tag_name('langstring')->as_text);
}
$pedagogyLContext = OAIv_findLContext(@learningcontext);
my $difficulty_id = 0;
my $pedagogy_description = '';
my $interactivity_level_id = 0;
my $resource_type_id = 0;
if ( $useDatabase ) {
# Some specific configuration information for CSTC
# Logo
my $image = "http://www.smete.org/images/affiliation/cstc.gif";
my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong
my $collection = 'Computer Science Teaching Center';
my $collection_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}';
my $publisher = 'Computer Science Teaching Center';
my $publisher_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}';
# Determine if this author already exists in the database (person and entity tables)
my $author_reg_key;
if ( ! ($author_reg_key = OAIc_personexists($dbh,$personEmail)) ) {
printf("Inserting person email=%s\n",$personEmail);
my $success = OAIc_insert_person($dbh,$publisher_reg_key,$submitter_key,$personLastname,$personFirstname,$personEmail,$personCompany);
$author_reg_key = OAIc_personexists($dbh,$personEmail);
}
if ( my $general_key = OAIc_loexists($dbh,$logeneralTitle1) ) {
my $success = OAIc_update_lo($dbh, $general_key, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key);
$updated = $updated + 1;
} else {
printf("Inserting new record for %s\n",$logeneralTitle1);
my $success = OAIc_insert_lo($dbh, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key, $collection_reg_key,$difficulty_id, $interactivity_level_id, $pedagogy_description, $resource_type_id);
$inserted = $inserted + 1;
}
} else {
# Print Results
printf("Title: %s\tDescription: %s\tKeywords: %s\n", $logeneralTitle1,$logeneralDescription,join(";",@logeneralKeywords));
printf("Creation Date: %s\tPublication Year: %4d\tPublication Month: %02d\n", $logeneralCreation_Date, $logeneralPub_Year, $logeneralPub_Month);
printf("Role: %s\n", $role);
printf("Firstname: %s\tLastname: %s\tEmail: %s\tOrganization: %s\n", $personFirstname, $personLastname, $personEmail, $personCompany);
printf("Language: %s\n", $logeneralLanguage1);
printf("Format: %s\tURL: %s\tPlatform: %s\tOS: %s\n", $platformFormat, $platformLocation_URL,$platformType,$platformOS);
printf("IntendedEndUserRole: %s\tLearningContext: %s\n", join(";",@pedagogyEndUserRole), $pedagogyLContext);
}
} # end for loop
$tree->delete;
if ( $useDatabase ) {
$dbh->commit;
$dbh->disconnect();
}
printf("Inserted %d records and Updated %d records in %f seconds.\n", $inserted, $updated, tv_interval($t0));
exit 0;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>