version 1.3, 2003/07/29 14:13:36
|
version 1.5, 2003/07/29 15:10:31
|
Line 12 use strict;
|
Line 12 use strict;
|
use LWP::UserAgent; |
use LWP::UserAgent; |
use Getopt::Std; |
use Getopt::Std; |
use Digest::MD5 qw(md5_hex); |
use Digest::MD5 qw(md5_hex); |
|
use IO::File; |
|
|
|
my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data'; |
|
|
my $pub_month; |
my $pub_month; |
my $pub_year; |
my $pub_year; |
Line 25 my $content_regex = 'File Not Found';
|
Line 28 my $content_regex = 'File Not Found';
|
# Configuration |
# Configuration |
|
|
my $debug = 0; |
my $debug = 0; |
my $url = 'http://s10.lite.msu.edu/cgi-bin/metadata_harvest.pl'; |
|
# The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab |
# The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab |
my @servers = ( 'newscience.westshore.cc.mi.us', 's10.lite.msu.edu', 's12.lite.msu.edu', 'lon-capa.chem.sunysb.edu', 'schubert.tmcc.edu', 'dalton.chem.sfu.ca', 'capa2.phy.ohiou.edu', 'pollux.physics.fsu.edu', 'loncapa.physics.sc.edu', 'loncapa.math.ucf.edu', 'zappa.ags.udel.edu', 'loncapa.gwu.edu'); |
my @servers = ( |
|
'newscience.westshore.cc.mi.us', |
|
's10.lite.msu.edu', |
|
's12.lite.msu.edu', |
|
'lon-capa.chem.sunysb.edu', |
|
'schubert.tmcc.edu', |
|
'dalton.chem.sfu.ca', |
|
'capa2.phy.ohiou.edu', |
|
'pollux.physics.fsu.edu', |
|
'loncapa.physics.sc.edu', |
|
'loncapa.math.ucf.edu', |
|
'zappa.ags.udel.edu', |
|
'loncapa.gwu.edu', |
|
'neptune.physics.ndsu.nodak.edu', |
|
'capa1.uwsp.edu'); |
|
|
|
foreach (@servers) { |
|
my $url='http://'.$_.'/cgi-bin/metadata_harvest.pl'; |
# End Configuration |
# End Configuration |
|
|
my $ua = new LWP::UserAgent; |
my $ua = new LWP::UserAgent; |
Line 40 $request->authorization_basic('reaper',
|
Line 59 $request->authorization_basic('reaper',
|
my $response = $ua->request( $request ); |
my $response = $ua->request( $request ); |
|
|
if ( $response->is_success ) { |
if ( $response->is_success ) { |
|
print 'SUCCESS: ' . $response->message.' for '.$url."\n\n"; |
$content = $response->content; |
$content = $response->content; |
# Delete all blank lines |
# Delete all blank lines |
$content =~ s/(?<!.)\n//g; |
$content =~ s/(?<!.)\n//g; |
Line 48 if ( $response->is_success ) {
|
Line 68 if ( $response->is_success ) {
|
# Push the content into an array |
# Push the content into an array |
@loncapa = split /\n/, $content; |
@loncapa = split /\n/, $content; |
} else { |
} else { |
die 'LON-CAPA request failed: ' . $response->message; |
print 'LON-CAPA request failed: ' . $response->message.' for '.$url."\n\n"; |
|
next; |
} |
} |
|
|
#@loncapa=undef; |
#@loncapa=undef; |
Line 60 if ( $response->is_success ) {
|
Line 81 if ( $response->is_success ) {
|
#} |
#} |
|
|
my %records = ();; |
my %records = ();; |
print '<?xml version="1.0" encoding="UTF-8"?>'."\n\n"; |
|
|
|
foreach my $metadata (@loncapa) { |
foreach my $metadata (@loncapa) { |
chomp $metadata; |
chomp $metadata; |
Line 84 foreach my $metadata (@loncapa) {
|
Line 104 foreach my $metadata (@loncapa) {
|
next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); |
next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); |
my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; |
my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; |
my $baseid=$tkline[3]; |
my $baseid=$tkline[3]; |
|
my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//); |
$baseid=~s/\W/\_/g; |
$baseid=~s/\W/\_/g; |
$baseid=~s/^\_res\_//g; |
$baseid=~s/^\_res\_//g; |
|
my $fileid=md5_hex($baseid); |
|
|
next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); |
next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); |
my $keywords = $tkline[4]; |
my $keywords = $tkline[4]; |
Line 150 foreach my $metadata (@loncapa) {
|
Line 172 foreach my $metadata (@loncapa) {
|
# Private means open only to author of material |
# Private means open only to author of material |
next if ( $copyright eq 'private'); |
next if ( $copyright eq 'private'); |
my $platform = "5"; # HTML Browser (not specified but construed from metadata) |
my $platform = "5"; # HTML Browser (not specified but construed from metadata) |
print (<<ENDMETA); |
# |
|
# Create path |
|
# |
|
unless (-e $basepath.'/'.$adom) { mkdir($basepath.'/'.$adom); } |
|
unless (-e $basepath.'/'.$adom.'/'.$auname) { |
|
mkdir($basepath.'/'.$adom.'/'.$auname) || die 'Could not create '.$basepath.'/'.$adom.'/'.$auname; |
|
} |
|
open(XML,'>'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml'); |
|
print XML (<<ENDMETA); |
|
<?xml version="1.0" encoding="UTF-8"?> |
|
|
<oaidc:dc xmlns="http://purl.org/dc/elements/1.1/" |
<oaidc:dc xmlns="http://purl.org/dc/elements/1.1/" |
xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/" |
xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/" |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
Line 166 foreach my $metadata (@loncapa) {
|
Line 198 foreach my $metadata (@loncapa) {
|
<description>$abstract</description> |
<description>$abstract</description> |
<date>$rev_year-$rev_month-$rev_day</date> |
<date>$rev_year-$rev_month-$rev_day</date> |
</oaidc:dc> |
</oaidc:dc> |
|
|
ENDMETA |
ENDMETA |
|
close (XML); |
|
} |
} |
} |