#!/usr/local/bin/perl -w
#
# dlese-getrecord.pl - Use OAI MHP to harvest metadata from DLESE in dlese_ims format
# This program reads a file dlese-identifiers.xml then requests each record and stores in a sub-directory called dlese_ims
#
# Written by Andy Dong <adong@smete.org> 11/01/2001
#
use strict;
use Getopt::Std;
use Time::HiRes qw(usleep ualarm gettimeofday tv_interval);
use HTTP::Request;
use LWP::UserAgent;
use XML::Element;
use XML::Parser;
use XML::TreeBuilder;
# First parse the dlese-identifiers.xml file to get the list of available records
my $tree = XML::TreeBuilder->new();
$tree->parse_file('dlese-identifiers.xml');
my @identifiers = $tree->find_by_tag_name('identifier');
# Now go grab them and save them to a file
foreach my $identifier (@identifiers) {
my $record = $identifier->as_text();
my $url = join('','http://oai.dlese.org/provider?verb=GetRecord&metadataPrefix=dlese_ims&identifier=',$record);
printf("Going to retrieve %s\n", $url);
my $ua = new LWP::UserAgent;
my $request = HTTP::Request->new('GET', $url);
my $response = $ua->request( $request );
if ( $response->is_success ) {
my $content = $response->content;
open(OUTPUT,">dlese/$record");
print OUTPUT $content;
close OUTPUT;
} else {
warn 'OAI request failed: ' . $response->message;
}
# sleep else DLESE may crash
sleep(10);
}
exit 0;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>