--- loncom/publisher/lonpublisher.pm 2002/08/09 17:59:14 1.90 +++ loncom/publisher/lonpublisher.pm 2024/12/27 04:01:41 1.306 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # Publication Handler # -# $Id: lonpublisher.pm,v 1.90 2002/08/09 17:59:14 matthew Exp $ +# $Id: lonpublisher.pm,v 1.306 2024/12/27 04:01:41 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -25,28 +25,6 @@ # # http://www.lon-capa.org/ # -# -# (TeX Content Handler -# -# 05/29/00,05/30,10/11 Gerd Kortemeyer) -# -# 11/28,11/29,11/30,12/01,12/02,12/04,12/23 Gerd Kortemeyer -# 03/23 Guy Albertelli -# 03/24,03/29,04/03 Gerd Kortemeyer -# 04/16/2001 Scott Harrison -# 05/03,05/05,05/07 Gerd Kortemeyer -# 05/28/2001 Scott Harrison -# 06/23,08/07,08/11,8/13,8/17,8/18,8/24,9/26,10/16 Gerd Kortemeyer -# 12/04,12/05 Guy Albertelli -# 12/05 Gerd Kortemeyer -# 12/05 Guy Albertelli -# 12/06,12/07 Gerd Kortemeyer -# 12/15,12/16 Scott Harrison -# 12/25 Gerd Kortemeyer -# YEAR=2002 -# 1/16,1/17 Scott Harrison -# 1/17 Gerd Kortemeyer -# ### ############################################################################### @@ -67,30 +45,62 @@ =pod -=head1 Name +=head1 NAME lonpublisher - LON-CAPA publishing handler -=head1 Synopsis +=head1 SYNOPSIS + +B is used by B inside B. This is the +invocation by F: -lonpublisher takes the proper steps to add resources to the LON-CAPA + + PerlAccessHandler Apache::lonacc + SetHandler perl-script + PerlHandler Apache::lonpublisher + ErrorDocument 403 /adm/login + ErrorDocument 404 /adm/notfound.html + ErrorDocument 406 /adm/unauthorized.html + ErrorDocument 500 /adm/errorhandler + + +=head1 OVERVIEW + +Authors can only write-access the C space. +They can copy resources into the resource area through the +publication step, and move them back through a recover step. +Authors do not have direct write-access to their resource space. + +During the publication step, several events will be +triggered. Metadata is gathered, where a wizard manages default +entries on a hierarchical per-directory base: The wizard imports the +metadata (including access privileges and royalty information) from +the most recent published resource in the current directory, and if +that is not available, from the next directory above, etc. The Network +keeps all previous versions of a resource and makes them available by +an explicit version number, which is inserted between the file name +and extension, for example C, while the most recent +version does not carry a version number (C). Servers +subscribing to a changed resource are notified that a new version is +available. + +=head1 DESCRIPTION + +B takes the proper steps to add resources to the LON-CAPA digital library. This includes updating the metadata table in the LON-CAPA database. -=head1 Description - -lonpublisher is many things to many people. -To all people it is woefully documented. -This documentation conforms to this standard. +B is many things to many people. This module publishes a file. This involves gathering metadata, versioning the file, copying file from construction space to publication space, and copying metadata from construction space to publication space. -=head2 Internal Functions +=head2 SUBROUTINES -=over 4 +Many of the undocumented subroutines implement various magical +parsing shortcuts. =cut @@ -106,78 +116,108 @@ use Apache::File; use File::Copy; use Apache::Constants qw(:common :http :methods); use HTML::LCParser; +use HTML::Entities; +use Encode::Encoder; use Apache::lonxml; -use Apache::lonhomework; -use Apache::loncacc; use DBI; -use Apache::lonnet(); +use Apache::lonnet; use Apache::loncommon(); +use Apache::lonhtmlcommon; use Apache::lonmysql; - -my %addid; -my %nokey; - -my %metadatafields; -my %metadatakeys; - +use Apache::lonlocal; +use Apache::loncfile; +use LONCAPA::lonmetadata; +use Apache::lonmsg; +use vars qw(%metadatafields %metadatakeys %addid $readit); +use LONCAPA qw(:DEFAULT :match); + my $docroot; my $cuname; my $cudom; -######################################### -######################################### +my $registered_cleanup; +my $modified_urls; + +my $lock; =pod -=item metaeval +=over 4 + +=item B + +Evaluates a string that contains metadata. This subroutine +stores values inside I<%metadatafields> and I<%metadatakeys>. +The hash key is a I<$unikey> corresponding to a unique id +that is descriptive of the parser location inside the XML tree. + +Parameters: -Evaluate string with metadata +=over 4 + +=item I<$metastring> + +A string that contains metadata. + +=back + +Returns: + +nothing =cut ######################################### ######################################### +# +# Modifies global %metadatafields %metadatakeys +# + sub metaeval { - my $metastring=shift; + my ($metastring,$prefix)=@_; - my $parser=HTML::LCParser->new(\$metastring); - my $token; - while ($token=$parser->get_token) { - if ($token->[0] eq 'S') { - my $entry=$token->[1]; - my $unikey=$entry; - if (defined($token->[2]->{'package'})) { - $unikey.='_package_'.$token->[2]->{'package'}; - } - if (defined($token->[2]->{'part'})) { - $unikey.='_'.$token->[2]->{'part'}; - } - if (defined($token->[2]->{'id'})) { - $unikey.='_'.$token->[2]->{'id'}; - } - if (defined($token->[2]->{'name'})) { - $unikey.='_'.$token->[2]->{'name'}; - } - foreach (@{$token->[3]}) { - $metadatafields{$unikey.'.'.$_}=$token->[2]->{$_}; - if ($metadatakeys{$unikey}) { - $metadatakeys{$unikey}.=','.$_; - } else { - $metadatakeys{$unikey}=$_; - } - } - if ($metadatafields{$unikey}) { - my $newentry=$parser->get_text('/'.$entry); - unless (($metadatafields{$unikey}=~/$newentry/) || - ($newentry eq '')) { - $metadatafields{$unikey}.=', '.$newentry; - } - } else { - $metadatafields{$unikey}=$parser->get_text('/'.$entry); - } - } - } + my $parser=HTML::LCParser->new(\$metastring); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + my $unikey=$entry; + next if ($entry =~ m/^(?:parameter|stores)_/); + if (defined($token->[2]->{'package'})) { + $unikey.="\0package\0".$token->[2]->{'package'}; + } + if (defined($token->[2]->{'part'})) { + $unikey.="\0".$token->[2]->{'part'}; + } + if (defined($token->[2]->{'id'})) { + $unikey.="\0".$token->[2]->{'id'}; + } + if (defined($token->[2]->{'name'})) { + $unikey.="\0".$token->[2]->{'name'}; + } + foreach my $item (@{$token->[3]}) { + $metadatafields{$unikey.'.'.$item}=$token->[2]->{$item}; + if ($metadatakeys{$unikey}) { + $metadatakeys{$unikey}.=','.$item; + } else { + $metadatakeys{$unikey}=$item; + } + } + my $newentry=$parser->get_text('/'.$entry); + if (($entry eq 'customdistributionfile') || + ($entry eq 'sourcerights')) { + $newentry=~s/^\s*//; + if ($newentry !~m|^/res|) { $newentry=$prefix.$newentry; } + } +# actually store + if ( $entry eq 'rule' && exists($metadatafields{$unikey})) { + $metadatafields{$unikey}.=','.$newentry; + } else { + $metadatafields{$unikey}=$newentry; + } + } + } } ######################################### @@ -185,66 +225,102 @@ sub metaeval { =pod -=item metaread +=item B Read a metadata file +Parameters: + +=over + +=item I<$logfile> + +File output stream to output errors and warnings to. + +=item I<$fn> + +File name (including path). + +=back + +Returns: + +=over 4 + +=item Scalar string (if successful) + +XHTML text that indicates successful reading of the metadata. + +=back + =cut ######################################### ######################################### sub metaread { - my ($logfile,$fn)=@_; + my ($logfile,$fn,$prefix)=@_; unless (-e $fn) { - print $logfile 'No file '.$fn."\n"; - return '
No file: '.$fn.''; + print($logfile 'No file '.$fn."\n"); + return '

' + .&mt('No file: [_1]',&Apache::loncfile::display($fn)) + .'

'; } - print $logfile 'Processing '.$fn."\n"; + print($logfile 'Processing '.$fn."\n"); my $metastring; { - my $metafh=Apache::File->new($fn); - $metastring=join('',<$metafh>); + my $metafh=Apache::File->new($fn); + $metastring=join('',<$metafh>); } - &metaeval($metastring); - return '
Processed file: '.$fn.''; + &metaeval($metastring,$prefix); + return '

' + .&mt('Processed file: [_1]',&Apache::loncfile::display($fn)) + .'

'; } ######################################### ######################################### -=pod - -=item sqltime - -Convert 'time' format into a datetime sql format - -=cut - -######################################### -######################################### -sub sqltime { - my $timef=shift @_; - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime($timef); - $mon++; $year+=1900; - return "$year-$mon-$mday $hour:$min:$sec"; +sub coursedependencies { + my $url=&Apache::lonnet::declutter(shift); + $url=~s/\.meta$//; + my ($adomain,$aauthor)=($url=~ m{^($match_domain)/($match_username)/}); + my $regexp=quotemeta($url); + $regexp='___'.$regexp.'___course'; + my %evaldata=&Apache::lonnet::dump('nohist_resevaldata',$adomain, + $aauthor,$regexp); + my %courses=(); + foreach my $item (keys(%evaldata)) { + if ($item=~/^([a-zA-Z0-9]+_[a-zA-Z0-9]+)___.+___course$/) { + $courses{$1}=1; + } + } + return %courses; } - - ######################################### ######################################### + =pod -=item Form field generating functions +=item Form-field-generating subroutines. + +For input parameters, these subroutines take in values +such as I<$name>, I<$value> and other form field metadata. +The output (scalar string that is returned) is an XHTML +string which presents the form field (foreseeably inside +
tags). =over 4 -=item textfield +=item B + +=item B -=item hiddenfield +=item B -=item selectbox +=item B + +=item B =back @@ -253,29 +329,138 @@ sub sqltime { ######################################### ######################################### sub textfield { - my ($title,$name,$value)=@_; - return "\n

$title:
". - ''; + my ($title,$name,$value,$noline,$readonly)=@_; + $value=~s/^\s+//gs; + $value=~s/\s+$//gs; + $value=~s/\s+/ /gs; + $title=&mt($title); + $env{'form.'.$name}=$value; + return "\n".&Apache::lonhtmlcommon::row_title($title) + .'' + .&Apache::lonhtmlcommon::row_closure($noline); +} + +sub text_with_browse_field { + my ($title,$name,$value,$restriction,$noline,$readonly)=@_; + $value=~s/^\s+//gs; + $value=~s/\s+$//gs; + $value=~s/\s+/ /gs; + $title=&mt($title); + $env{'form.'.$name}=$value; + my $disabled; + if ($readonly) { + $disabled = ' disabled="disabled"'; + } + my $output = + "\n".&Apache::lonhtmlcommon::row_title($title) + .''; + unless ($readonly) { + $output .= + '
' + .'' + .&mt('Select') + .' ' + .'' + .&mt('Search') + .''; + } + $output .= &Apache::lonhtmlcommon::row_closure($noline); + return $output; } sub hiddenfield { my ($name,$value)=@_; - return "\n".''; + $env{'form.'.$name}=$value; + return "\n".''; +} + +sub checkbox { + my ($name,$text)=@_; + return "\n"; } sub selectbox { - my ($title,$name,$value,$functionref,@idlist)=@_; - my $uctitle=uc($title); - my $selout="\n

$uctitle:". - "
".''; + foreach my $id (@idlist) { + $selout.='

+'; } ######################################### @@ -283,7 +468,7 @@ sub selectbox { =pod -=item urlfixup +=item B Fix up a url? First step of publication @@ -299,15 +484,14 @@ sub urlfixup { if ($url =~ /^mailto:/i) { return $url; } #internal document links need no fixing if ($url =~ /^\#/) { return $url; } - my ($host)=($url=~/(?:http\:\/\/)*([^\/]+)/); - foreach (values %Apache::lonnet::hostname) { - if ($_ eq $host) { - $url=~s/^http\:\/\///; - $url=~s/^$host//; - } + my ($host)=($url=~m{(?:(?:http|https|ftp)://)*([^/]+)}); + my @lonids = &Apache::lonnet::machine_ids($host); + if (@lonids) { + $url=~s{^(?:http|https|ftp)://}{}; + $url=~s/^\Q$host\E//; } - if ($url=~/^http\:\/\//) { return $url; } - $url=~s/\~$cuname/res\/$cudom\/$cuname/; + if ($url=~m{^(?:http|https|ftp)://}) { return $url; } + $url=~s{\Q~$cuname\E}{res/$cudom/$cuname}; return $url; } @@ -316,9 +500,9 @@ sub urlfixup { =pod -=item absoluteurl +=item B -Currently undocumented +Currently undocumented. =cut @@ -339,7 +523,7 @@ sub absoluteurl { =pod -=item set_allow +=item B Currently undocumented @@ -348,7 +532,7 @@ Currently undocumented ######################################### ######################################### sub set_allow { - my ($allow,$logfile,$target,$tag,$oldurl)=@_; + my ($allow,$logfile,$target,$tag,$oldurl,$type)=@_; my $newurl=&urlfixup($oldurl,$target); my $return_url=$oldurl; print $logfile 'GUYURL: '.$tag.':'.$oldurl.' - '.$newurl."\n"; @@ -358,11 +542,16 @@ sub set_allow { } if (($newurl !~ /^javascript:/i) && ($newurl !~ /^mailto:/i) && - ($newurl !~ /^http:/i) && + ($newurl !~ /^(?:http|https|ftp):/i) && ($newurl !~ /^\#/)) { + if (($type eq 'src') || ($type eq 'href')) { + if ($newurl =~ /^([^?]+)\?[^?]*$/) { + $newurl = $1; + } + } $$allow{&absoluteurl($newurl,$target)}=1; } - return $return_url + return $return_url; } ######################################### @@ -370,7 +559,7 @@ sub set_allow { =pod -=item get_subscribed_hosts +=item B Currently undocumented @@ -385,10 +574,16 @@ sub get_subscribed_hosts { $target=~/(.*)\/([^\/]+)$/; my $srcf=$2; opendir(DIR,$1); + # cycle through listed files, subscriptions used to exist + # as "filename.lonid" while ($filename=readdir(DIR)) { - if ($filename=~/$srcf\.(\w+)$/) { + if ($filename=~/\Q$srcf\E\.($match_lonid)$/) { my $subhost=$1; - if ($subhost ne 'meta' && $subhost ne 'subscription') { + if (($subhost ne 'meta' + && $subhost ne 'subscription' + && $subhost ne 'meta.subscription' + && $subhost ne 'tmp') && + ($subhost ne $Apache::lonnet::perlvar{'lonHostID'})) { push(@subscribed,$subhost); } } @@ -396,17 +591,14 @@ sub get_subscribed_hosts { closedir(DIR); my $sh; if ( $sh=Apache::File->new("$target.subscription") ) { - &Apache::lonnet::logthis("opened $target.subscription"); while (my $subline=<$sh>) { - &Apache::lonnet::logthis("Trying $subline"); - if ($subline =~ /(^\w+):/) { push(@subscribed,$1); } else { - &Apache::lonnet::logthis("No Match for $subline"); + if ($subline =~ /^($match_lonid):/) { + if ($1 ne $Apache::lonnet::perlvar{'lonHostID'}) { + push(@subscribed,$1); + } } } - } else { - &Apache::lonnet::logthis("Un able to open $target.subscription"); } - &Apache::lonnet::logthis("Got list of ".join(':',@subscribed)); return @subscribed; } @@ -416,7 +608,7 @@ sub get_subscribed_hosts { =pod -=item get_max_ids_indices +=item B Currently undocumented @@ -429,21 +621,34 @@ sub get_max_ids_indices { my $maxindex=10; my $maxid=10; my $needsfixup=0; + my $duplicateids=0; + + my %allids; + my %duplicatedids; my $parser=HTML::LCParser->new($content); + $parser->xml_mode(1); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { my $counter; if ($counter=$addid{$token->[1]}) { if ($counter eq 'id') { - if (defined($token->[2]->{'id'})) { + if (defined($token->[2]->{'id'}) && + $token->[2]->{'id'} !~ /^\s*$/) { $maxid=($token->[2]->{'id'}>$maxid)?$token->[2]->{'id'}:$maxid; + if (exists($allids{$token->[2]->{'id'}})) { + $duplicateids=1; + $duplicatedids{$token->[2]->{'id'}}=1; + } else { + $allids{$token->[2]->{'id'}}=1; + } } else { $needsfixup=1; } } else { - if (defined($token->[2]->{'index'})) { + if (defined($token->[2]->{'index'}) && + $token->[2]->{'index'} !~ /^\s*$/) { $maxindex=($token->[2]->{'index'}>$maxindex)?$token->[2]->{'index'}:$maxindex; } else { $needsfixup=1; @@ -452,7 +657,8 @@ sub get_max_ids_indices { } } } - return ($needsfixup,$maxid,$maxindex); + return ($needsfixup,$maxid,$maxindex,$duplicateids, + (keys(%duplicatedids))); } ######################################### @@ -460,7 +666,7 @@ sub get_max_ids_indices { =pod -=item get_all_text_unbalanced +=item B Currently undocumented @@ -484,11 +690,11 @@ sub get_all_text_unbalanced { } elsif ($token->[0] eq 'E') { $result.=$token->[2]; } - if ($result =~ /(.*)$tag(.*)/) { + if ($result =~ /\Q$tag\E/s) { + ($result,my $redo)=$result =~ /(.*)\Q$tag\E(.*)/is; #&Apache::lonnet::logthis('Got a winner with leftovers ::'.$2); #&Apache::lonnet::logthis('Result is :'.$1); - $result=$1; - my $redo=$tag.$2; + $redo=$tag.$redo; push (@$pars,HTML::LCParser->new(\$redo)); $$pars[-1]->xml_mode('1'); last; @@ -502,7 +708,7 @@ sub get_all_text_unbalanced { =pod -=item fix_ids_and_indices +=item B Currently undocumented @@ -521,14 +727,23 @@ sub fix_ids_and_indices { $content=join('',<$org>); } - my ($needsfixup,$maxid,$maxindex)=&get_max_ids_indices(\$content); + my ($needsfixup,$maxid,$maxindex,$duplicateids,@duplicatedids)= + &get_max_ids_indices(\$content); + print $logfile ("Got $needsfixup,$maxid,$maxindex,$duplicateids--". + join(', ',@duplicatedids)); + if ($duplicateids) { + print $logfile "Duplicate ID(s) exist, ".join(', ',@duplicatedids)."\n"; + my $outstring=''.&mt('Unable to publish file, it contains duplicated ID(s), ID(s) need to be unique. The duplicated ID(s) are').': '.join(', ',@duplicatedids).''; + return ($outstring,1); + } if ($needsfixup) { print $logfile "Needs ID and/or index fixup\n". "Max ID : $maxid (min 10)\n". "Max Index: $maxindex (min 10)\n"; } my $outstring=''; + my $responsecounter=1; my @parser; $parser[0]=HTML::LCParser->new(\$content); $parser[-1]->xml_mode(1); @@ -543,37 +758,55 @@ sub fix_ids_and_indices { $allow{$token->[2]->{'src'}}=1; next; } + if ($lctag eq 'base') { next; } + if (($lctag eq 'part') || ($lctag eq 'problem')) { + $responsecounter=0; + } + if ($lctag=~/response$/) { $responsecounter++; } + if ($lctag eq 'import') { $responsecounter++; } my %parms=%{$token->[2]}; $counter=$addid{$tag}; if (!$counter) { $counter=$addid{$lctag}; } if ($counter) { if ($counter eq 'id') { - unless (defined($parms{'id'})) { + unless (defined($parms{'id'}) && + $parms{'id'}!~/^\s*$/) { $maxid++; $parms{'id'}=$maxid; - print $logfile 'ID: '.$tag.':'.$maxid."\n"; + print $logfile 'ID(new) : '.$tag.':'.$maxid."\n"; + } else { + print $logfile 'ID(kept): '.$tag.':'.$parms{'id'}."\n"; } } elsif ($counter eq 'index') { - unless (defined($parms{'index'})) { + unless (defined($parms{'index'}) && + $parms{'index'}!~/^\s*$/) { $maxindex++; $parms{'index'}=$maxindex; print $logfile 'Index: '.$tag.':'.$maxindex."\n"; } } } - foreach my $type ('src','href','background','bgimg') { - foreach my $key (keys(%parms)) { - if ($key =~ /^$type$/i) { - $parms{$key}=&set_allow(\%allow,$logfile, - $target,$tag, - $parms{$key}); + unless ($parms{'type'} eq 'zombie') { + foreach my $type ('src','href','background','bgimg') { + foreach my $key (keys(%parms)) { + if ($key =~ /^$type$/i) { + next if (($lctag eq 'img') && ($type eq 'src') && + ($parms{$key} =~ m{^data\:image/gif;base64,})); + $parms{$key}=&set_allow(\%allow,$logfile, + $target,$tag, + $parms{$key},$type); + } } } } # probably a image type