1: #!/usr/local/bin/perl -w
2:
3: #
4: # cstc.pl - Use OAI MHP to harvest metadata from CSTS in oai_ims format
5: #
6: # Written by Andy Dong <adong@smete.org> 11/01/2001
7: #
8:
9: use strict;
10: use Getopt::Std;
11: use Time::HiRes qw(usleep ualarm gettimeofday tv_interval);
12:
13: use HTTP::Request;
14: use LWP::UserAgent;
15:
16: use XML::Element;
17: use XML::Parser;
18: use XML::TreeBuilder;
19:
20: use DBI;
21: use DBD::ODBC;
22:
23: require OAIvocabulary_v2;
24: require OAIcataloging_v2;
25:
26: # -u flag specifies [u]pdate database; otherwise output to STDOUT
27:
28: my $usage = << "EOT";
29: Usage: cstc.pl -u
30:
31: -u (U)pdate the database
32:
33: Without -u it simply prints to STDOUT
34: EOT
35:
36: my %args;
37: getopts('u', \%args) || die $usage;
38:
39: my $inserted = 0;
40: my $updated = 0;
41:
42: my $useDatabase = 1 if ($args{'u'});
43: my $dbh;
44: # Database Configuration
45: if ( $useDatabase ) {
46: print "Updating the database\n";
47: my $DBI_DSN='dbi:ODBC:mel.odbc';
48: my $DBI_USER='autocataloger';
49: my $DBI_PWD='regolatacotua';
50: $dbh= DBI->connect($DBI_DSN, $DBI_USER, $DBI_PWD, { RaiseError => 1, AutoCommit => 0 }) || die "Unable to connect to database $DBI_DSN as $DBI_USER\n";;
51: }
52:
53: my $content;
54: my @cstc;
55:
56: # All possible LOM record variables
57: my $logeneralDescription = "";
58: my $logeneralTitle1 = "";
59: my $logeneralCreation_Date = "";
60: my $logeneralPub_Year = "";
61: my $logeneralPub_Month = "";
62: my $logeneralPub_Day = "";
63: my $logeneralLanguage1 = "";
64: my @logeneralKeywords = ();
65: my $publisher = "";
66: my $personLastname = "";
67: my $personFirstname = "";
68: my $personEmail = "";
69: my $personCompany = "";
70: my $platformVersion = "";
71: my $platformType = "";
72: my $platformOS = "";
73: my @pedagogyEndUserRole = ();
74: my $pedagogyLContext = "";
75: my $platformFormat = "";
76: my $platformLocation_URL = "";
77:
78: #my $url = 'http://www.cstc.org/cgi-bin/OAI/CSTC.pl?verb=ListRecords&metadataPrefix=ims1_1';
79:
80: #my $ua = new LWP::UserAgent;
81: #my $request = HTTP::Request->new('GET', $url);
82: #my $response = $ua->request( $request );
83:
84: #if ( $response->is_success ) {
85: # $content = $response->content;
86: #} else {
87: # warn 'OAI request failed: ' . $response->message;
88: # exit 1;
89: #}
90:
91: my $tree = XML::TreeBuilder->new();
92: $tree->parse_file('cstc-111.xml');
93:
94: my $t0 = [gettimeofday];
95:
96: my @records = $tree->find_by_tag_name('record');
97: foreach my $record (@records){
98: # Extract information from <header> tag
99: my $header = $record->find_by_tag_name('header');
100: next if ! $header;
101: $logeneralCreation_Date = $header->find_by_tag_name('datestamp')->as_text;
102: ($logeneralPub_Year, $logeneralPub_Month, $logeneralPub_Day) = ($logeneralCreation_Date =~ /^(\d{4})-(\d{2})-(\d{2})$/);
103: # Extract information from <general> tag
104: my $general = $record->find_by_tag_name('general');
105: $logeneralTitle1= $general->find_by_tag_name('title')->as_text;
106: $logeneralDescription = $general->find_by_tag_name('description')->as_text;
107: $logeneralDescription =~ s/\n/ /g;
108: my $keywordsElement = $general->find_by_tag_name('keywords');
109: my @keywordsLangstringElement = $keywordsElement->find_by_tag_name('langstring');
110: my @logeneralKeywords = ();
111: foreach my $kw (@keywordsLangstringElement) {
112: my $word = $kw->as_text();
113: my ($spacejunk1,$unpaddedword,$spacejunk2) = ($word =~ /^(\s+)(\w+.*)(\s+)$/);
114: push(@logeneralKeywords,$unpaddedword);
115: }
116: # Extract information from <lifecycle> tag
117: my $lifecycle = $record->find_by_tag_name('lifecycle');
118: $platformVersion = $lifecycle->find_by_tag_name('status')->as_text;
119: # Extract information from <metametadata> tag
120: my $metametadata = $record->find_by_tag_name('metametadata');
121: my $role = $metametadata->find_by_tag_name('role')->as_text;
122: # We will only take the Contributor information
123: if ( $role eq "Contributor" ) {
124: my $entity = $metametadata->find_by_tag_name('centity')->as_text;
125: ($personLastname, $personFirstname, $personEmail, $personCompany) = OAIv_parseVcard($entity);
126: }
127: $logeneralLanguage1 = $metametadata->find_by_tag_name('language')->as_text;
128: # Extract information from <technical> tag
129: my $technical = $record->find_by_tag_name('technical');
130: $platformFormat = $technical->find_by_tag_name('format')->as_text;
131: # Convert text/html to format 65
132: if ($platformFormat eq 'text/html') {
133: $platformFormat = 65;
134: } else {
135: $platformFormat = 1;
136: }
137: $platformLocation_URL = $technical->find_by_tag_name('location')->as_text;
138: $platformLocation_URL =~ tr/ //d;
139: $platformLocation_URL =~ s/^\n(.*)$/$1/;
140: chomp($platformLocation_URL);
141: # Platform Type is sometimes null and must be mapped to standard platforms
142: my @requirementsElement = $technical->find_by_tag_name('requirements');
143: foreach my $req ( @requirementsElement ) {
144: if ( defined (my $typeElement = $req->find_by_tag_name('type')) ) {
145: if ( $req->find_by_tag_name('type')->as_text eq "Platform" ) {
146: if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) {
147: $platformType = OAIv_findPlatform($req->find_by_tag_name('name')->as_text);
148: }
149: } elsif ( $req->find_by_tag_name('type')->as_text eq "Operating System" ) {
150: if ( defined (my $nameElement = $req->find_by_tag_name('name')) ) {
151: $platformOS = $req->find_by_tag_name('name')->as_text;
152: }
153: }
154: }
155: }
156: # Extract information from <educational> tag
157: my $educational = $record->find_by_tag_name('educational');
158: my @intendedenduserroleElement = $educational->find_by_tag_name('intendedenduserrole');
159: @pedagogyEndUserRole=();
160: foreach my $ieur (@intendedenduserroleElement) {
161: my $intendedenduserroleLangstringElement = $ieur->find_by_tag_name('langstring');
162: push(@pedagogyEndUserRole,$intendedenduserroleLangstringElement->content_list());
163: }
164: # Use Learner (end_user_type = 2)
165: my $pedagogyEndUserType = '2';
166: # Learning context must be mapped to grade levels
167: my @learningcontextElement = $educational->find_by_tag_name('learningcontext');
168: my @learningcontext = ();
169: foreach my $lc (@learningcontextElement) {
170: push(@learningcontext,$lc->find_by_tag_name('langstring')->as_text);
171: }
172: $pedagogyLContext = OAIv_findLContext(@learningcontext);
173:
174: my $difficulty_id = 0;
175: my $pedagogy_description = '';
176: my $interactivity_level_id = 0;
177: my $resource_type_id = 0;
178:
179: if ( $useDatabase ) {
180: # Some specific configuration information for CSTC
181: # Logo
182: my $image = "http://www.smete.org/images/affiliation/cstc.gif";
183: my $submitter_key = '{710FE693-46E9-4002-BA94-1BE2E6218CD6}'; # Andy Dong
184: my $collection = 'Computer Science Teaching Center';
185: my $collection_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}';
186: my $publisher = 'Computer Science Teaching Center';
187: my $publisher_reg_key = '{9FF74842-6C5B-4A80-A86C-B27859C996DE}';
188: # Determine if this author already exists in the database (person and entity tables)
189: my $author_reg_key;
190: if ( ! ($author_reg_key = OAIc_personexists($dbh,$personEmail)) ) {
191: printf("Inserting person email=%s\n",$personEmail);
192: my $success = OAIc_insert_person($dbh,$publisher_reg_key,$submitter_key,$personLastname,$personFirstname,$personEmail,$personCompany);
193: $author_reg_key = OAIc_personexists($dbh,$personEmail);
194: }
195: if ( my $general_key = OAIc_loexists($dbh,$logeneralTitle1) ) {
196: my $success = OAIc_update_lo($dbh, $general_key, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key);
197: $updated = $updated + 1;
198: } else {
199: printf("Inserting new record for %s\n",$logeneralTitle1);
200: my $success = OAIc_insert_lo($dbh, $logeneralTitle1, $logeneralLanguage1, $logeneralDescription, $image, $logeneralPub_Month, $logeneralPub_Year, join(";",@logeneralKeywords), $submitter_key, join(" ", $personFirstname, $personLastname), $publisher, $collection, $platformFormat, $platformType, $platformOS, $platformLocation_URL, $pedagogyLContext, $pedagogyEndUserType, $author_reg_key, $publisher_reg_key, $collection_reg_key,$difficulty_id, $interactivity_level_id, $pedagogy_description, $resource_type_id);
201: $inserted = $inserted + 1;
202: }
203: } else {
204: # Print Results
205: printf("Title: %s\tDescription: %s\tKeywords: %s\n", $logeneralTitle1,$logeneralDescription,join(";",@logeneralKeywords));
206: printf("Creation Date: %s\tPublication Year: %4d\tPublication Month: %02d\n", $logeneralCreation_Date, $logeneralPub_Year, $logeneralPub_Month);
207: printf("Role: %s\n", $role);
208: printf("Firstname: %s\tLastname: %s\tEmail: %s\tOrganization: %s\n", $personFirstname, $personLastname, $personEmail, $personCompany);
209: printf("Language: %s\n", $logeneralLanguage1);
210: printf("Format: %s\tURL: %s\tPlatform: %s\tOS: %s\n", $platformFormat, $platformLocation_URL,$platformType,$platformOS);
211: printf("IntendedEndUserRole: %s\tLearningContext: %s\n", join(";",@pedagogyEndUserRole), $pedagogyLContext);
212: }
213: } # end for loop
214: $tree->delete;
215:
216: if ( $useDatabase ) {
217: $dbh->commit;
218: $dbh->disconnect();
219: }
220:
221: printf("Inserted %d records and Updated %d records in %f seconds.\n", $inserted, $updated, tv_interval($t0));
222:
223: exit 0;
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>