File:  [LON-CAPA] / nsdl / harvestsmete / lon-capa.pl
Revision 1.1: download - view: text, annotated - select for diffs
Thu May 8 16:37:32 2003 UTC (21 years, 7 months ago) by www
Branches: MAIN
CVS tags: HEAD
SMETE side harvest code for LON-CAPA

#!/usr/local/bin/perl

#
# lon-capa.pl
# Parse the LON-CAPA metadata
#
# Andy Dong <adong@smete.org> 10/23/2002
#
# Contact Gerd Kortemeyer (korte@lite.msu.edu)

use strict;
use LWP::UserAgent;
use Getopt::Std;

use DBI;
use DBD::ODBC;

require OAIcataloging_v2;

# -u flag specifies [u]pdate database; otherwise output to STDOUT

my $usage = << "EOT";
Usage: lon-capa.pl -u

    -u (U)pdate the database

    Without -u it simply prints SQL UPDATE statements to STDOUT
EOT

my %args;
getopts('u', \%args) || die $usage;

my $useDatabase = 1 if ($args{'u'});

#my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1_dev.odbc';
my $DBI_DSN='dbi:ODBC:needs2_mel_needs_3_1.odbc';
my $DBI_USER='autocataloger';
my $DBI_PWD='regolatacotua';
my $dbh;

my $pub_month;
my $pub_year;
my @loncapa;

# HTTP requests

my $content;
my $content_regex = 'File Not Found';

# Configuration

my $debug = 0;
my $url = 'http://data.lite.msu.edu/cgi-bin/metadata_harvest.pl';
# The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab
my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu');

# End Configuration

#my $ua = new LWP::UserAgent;
#$ua->timeout(600);

#my $request = new HTTP::Request GET => $url;
#$request->authorization_basic('reaper', 'cat4u');

#my $response = $ua->request( $request );

#if ( $response->is_success ) {
#	$content = $response->content;
# Delete all blank lines
#	$content =~ s/(?<!.)\n//g;
# Replace all ^M with spaces
#	$content =~ s/
/\s/g;
# Push the content into an array
#	@loncapa = split /\n/, $content;
#} else {
#	die 'LON-CAPA request failed: ' . $response->message;
#}

@loncapa=undef;
open (LON_FILE, 'metadata_harvest.txt') || die;

while (<LON_FILE>) {
       chomp;
       push(@loncapa,$_);
}

my %records = ();;
foreach my $metadata (@loncapa) {
	chomp $metadata;
	my @tkline = split('\|', $metadata);
	my $title = $tkline[0];
	next if ( $title eq '' );
	my $author = $tkline[1];
	next if ( $author eq '' );
	my @authorname = split(' ', $author);
	my $author_fname = $authorname[0];
	my $author_lname = $authorname[1];
	# We have to make an exception for Multimedia Physics which is an organization not a person
	my $object_type;
	if ( $author_lname eq 'Physics' ) {
		$object_type = 'organization';
	} else {
		$object_type = 'person';
	}
	my $subject = $tkline[2];
	next if ( ($subject eq 'Sample') || ($subject eq 'Something') );
	my $resourceurl = 'http://lon-capa.smete.org' . $tkline[3];
	next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ );
	my $keywords = $tkline[4];
	my $version = $tkline[5];
	my $notes = $tkline[6];
	my $abstract = $tkline[7];
	next if ($abstract eq '');
	my $type = $tkline[8];
	my $learning_resource_type;
	if ( $type eq 'problem' ) {
		$learning_resource_type = 114;
	} elsif ( $type eq 'exam' ) {
		$learning_resource_type = 114;
	} elsif ( $type eq 'quiz' ) {
		$learning_resource_type = 114;
	} elsif ( $type eq 'assess' ) {
		$learning_resource_type = 114;
	} elsif ( $type eq 'survey' ) {
		$learning_resource_type = 114;
	} elsif ( $type eq 'form' ) {
		$learning_resource_type = 114;
	} elsif ( $type eq 'library' ) {
		$learning_resource_type = 107;
	} elsif ( $type eq 'page' ) {
		$learning_resource_type = 104;
	} elsif ( $type eq 'sequence' ) {
		$learning_resource_type = 104;
	} elsif ( $type eq 'spreadsheet' ) {
		$learning_resource_type = 114;
	} else {
		$learning_resource_type = 0;
	}
	
	my $media_format;
	if ( ($type eq 'htm') || ($type eq 'gif') || ($type eq 'mov') || ($type eq 'xml') ) {
		$media_format = 70;
	} else {
		$media_format = 0;
	}

	my $language = $tkline[9]; # Look only for seniso
	next if ( $language ne 'seniso');
	my $primary_language='en-US';
	my $creation_date = $tkline[10];
	my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ );
	my $revision_date = $tkline[11];
	my $owner = $tkline[12];
	my $rights_description;
	my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain)
	# Public means no login required

	if ( $copyright eq 'public' ) {
		$rights_description = 'LON-CAPA Public Resource. No login required.';
	} elsif ($copyright eq 'domain') {
		$rights_description = 'Restricted to certain LON-CAPA domains.';
	} else {
		$rights_description = 'LON-CAPA Default Use Restriction. Login required.';
	}
	# Domain means restricted to a particular LON-CAPA domain
	# Defaults mean access open to any registered LON-CAPA user
	# Private means open only to author of material
	next if ( $copyright eq 'private');
	my $platform = "5";     # HTML Browser (not specified but construed from metadata)

# Connect to database
if ( $useDatabase ) {
	$dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER: ($DBI::err) $DBI::errstr\n";;
	# Configuration information for LON-CAPA
	my $collection_id = OAIc_orgexists($dbh,'LearningOnline Network with CAPA');
	my $submitter_id = OAIc_personexists($dbh,'adong@smete.org');
	my $image = 'http://www.lite.msu.edu/liteani.gif';
	my $cost = 1; # version.purchase_license_type_id
	my $collection = 'LearningOnline Network with CAPA';
	# LON-CAPA has single authors
	my $reg_key;
	if ( $object_type eq 'organization' ) {
		if ( ! ($reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname))) ) {
		printf("Inserting new organization %s\n", join(' ',$author_fname, $author_lname));
		my $success = OAIc_insert_org($dbh,$collection_id,$submitter_id,'',join(' ',$author_fname,$author_lname),'','','','','','','','');
		$reg_key = OAIc_orgexists($dbh,join(' ',$author_fname,$author_lname));
		}
	} else {
		if ( ! ($reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname))) ) {
		printf("Inserting new person(author) %s\n", join(' ',$author_fname, $author_lname));
		my $success = OAIc_insert_person($dbh,$collection_id,$submitter_id,$author_lname,$author_fname,'','');
		$reg_key = OAIc_personexists_name($dbh,join(' ',$author_fname,$author_lname));
		}
	}
	my $updated;
	my $inserted;
	if ( my $general_key = OAIc_loexists($dbh,$title) ) {
		# Do nothing
		$updated = $updated + 1;
	} else {
		printf("Inserting new record for %s\n",$title);	
		my $success = OAIc_insert_lo($dbh, $title, $primary_language, $abstract, $image, $pub_month, $pub_year, $keywords, $submitter_id, $reg_key, $collection_id, $collection_id, $media_format, $platform, , '', $resourceurl, '', 1, $reg_key, $collection_id, $collection_id, '', '', '', $learning_resource_type, $rights_description, $cost);
		$inserted = $inserted + 1;
	}
}

if (! $useDatabase ) { # Print information if no database updates requested
	printf("Title: %s\n", $title);
	printf("Author First Name: %s\n", $author_fname);
	printf("Author Last Name: %s\n", $author_lname);
	printf("Subject: %s\n", $subject);
	printf("URL: %s\n", $resourceurl);
	printf("Keywords: %s\n", $keywords);
	printf("Version: %s\n", $version);
	printf("Notes: %s\n", $notes);
	printf("Abstract: %s\n", $abstract);
	printf("Learning Resource Type: %d\n", $learning_resource_type);
	printf("Media Format: %d\n", $media_format);
	printf("Primary Language: %s\n", $primary_language);
	printf("Creation Date: %s\n", $creation_date);
	printf("Revision Date: %s\n", $revision_date);
	printf("Copyright: %s\n", $copyright);
	printf("Publication Year: %4d\tPublication Month: %02d\n", $pub_year, $pub_month);
}

if ( $useDatabase ) {
	$dbh->commit;
	$dbh->disconnect;
}

}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>