--- loncom/publisher/lonpublisher.pm 2000/11/30 16:22:13 1.6
+++ loncom/publisher/lonpublisher.pm 2008/08/14 13:39:02 1.245
@@ -1,309 +1,2240 @@
# The LearningOnline Network with CAPA
# Publication Handler
-#
-# (TeX Content Handler
#
-# 05/29/00,05/30,10/11 Gerd Kortemeyer)
+# $Id: lonpublisher.pm,v 1.245 2008/08/14 13:39:02 onken Exp $
#
-# 11/28,11/29,11/30 Gerd Kortemeyer
+# Copyright Michigan State University Board of Trustees
+#
+# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
+#
+# LON-CAPA is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# LON-CAPA is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with LON-CAPA; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# /home/httpd/html/adm/gpl.txt
+#
+# http://www.lon-capa.org/
+#
+###
+
+###############################################################################
+## ##
+## ORGANIZATION OF THIS PERL MODULE ##
+## ##
+## 1. Modules used by this module ##
+## 2. Various subroutines ##
+## 3. Publication Step One ##
+## 4. Phase Two ##
+## 5. Main Handler ##
+## ##
+###############################################################################
+
+
+######################################################################
+######################################################################
+
+=pod
+
+=head1 NAME
+
+lonpublisher - LON-CAPA publishing handler
+
+=head1 SYNOPSIS
+
+B is used by B inside B. This is the
+invocation by F:
+
+
+ PerlAccessHandler Apache::lonacc
+ SetHandler perl-script
+ PerlHandler Apache::lonpublisher
+ ErrorDocument 403 /adm/login
+ ErrorDocument 404 /adm/notfound.html
+ ErrorDocument 406 /adm/unauthorized.html
+ ErrorDocument 500 /adm/errorhandler
+
+
+=head1 OVERVIEW
+
+Authors can only write-access the C~authorname/> space. They can
+copy resources into the resource area through the publication step,
+and move them back through a recover step. Authors do not have direct
+write-access to their resource space.
+
+During the publication step, several events will be
+triggered. Metadata is gathered, where a wizard manages default
+entries on a hierarchical per-directory base: The wizard imports the
+metadata (including access privileges and royalty information) from
+the most recent published resource in the current directory, and if
+that is not available, from the next directory above, etc. The Network
+keeps all previous versions of a resource and makes them available by
+an explicit version number, which is inserted between the file name
+and extension, for example C, while the most recent
+version does not carry a version number (C). Servers
+subscribing to a changed resource are notified that a new version is
+available.
+
+=head1 DESCRIPTION
+
+B takes the proper steps to add resources to the LON-CAPA
+digital library. This includes updating the metadata table in the
+LON-CAPA database.
+
+B is many things to many people.
+
+This module publishes a file. This involves gathering metadata,
+versioning the file, copying file from construction space to
+publication space, and copying metadata from construction space
+to publication space.
+
+=head2 SUBROUTINES
+
+Many of the undocumented subroutines implement various magical
+parsing shortcuts.
+
+=over 4
+
+=cut
+
+######################################################################
+######################################################################
+
package Apache::lonpublisher;
+# ------------------------------------------------- modules used by this module
use strict;
use Apache::File;
+use File::Copy;
use Apache::Constants qw(:common :http :methods);
-use HTML::TokeParser;
+use HTML::LCParser;
+use HTML::Entities;
+use Encode::Encoder;
use Apache::lonxml;
-use Apache::lonhomework;
+use Apache::loncacc;
+use DBI;
+use Apache::lonnet;
+use Apache::loncommon();
+use Apache::lonhtmlcommon;
+use Apache::lonmysql;
+use Apache::lonlocal;
+use Apache::loncfile;
+use LONCAPA::lonmetadata;
+use Apache::lonmsg;
+use vars qw(%metadatafields %metadatakeys);
+use LONCAPA qw(:DEFAULT :match);
+
my %addid;
my %nokey;
+my $docroot;
+
+my $cuname;
+my $cudom;
+
+my $registered_cleanup;
+my $modified_urls;
+
+my $lock;
+
+=pod
+
+=item B
+
+Evaluates a string that contains metadata. This subroutine
+stores values inside I<%metadatafields> and I<%metadatakeys>.
+The hash key is a I<$unikey> corresponding to a unique id
+that is descriptive of the parser location inside the XML tree.
+
+Parameters:
+
+=over 4
+
+=item I<$metastring>
+
+A string that contains metadata.
+
+=back
+
+Returns:
+
+nothing
+
+=cut
+
+#########################################
+#########################################
+#
+# Modifies global %metadatafields %metadatakeys
+#
+
+sub metaeval {
+ my ($metastring,$prefix)=@_;
+
+ my $parser=HTML::LCParser->new(\$metastring);
+ my $token;
+ while ($token=$parser->get_token) {
+ if ($token->[0] eq 'S') {
+ my $entry=$token->[1];
+ my $unikey=$entry;
+ next if ($entry =~ m/^(?:parameter|stores)_/);
+ if (defined($token->[2]->{'package'})) {
+ $unikey.="\0package\0".$token->[2]->{'package'};
+ }
+ if (defined($token->[2]->{'part'})) {
+ $unikey.="\0".$token->[2]->{'part'};
+ }
+ if (defined($token->[2]->{'id'})) {
+ $unikey.="\0".$token->[2]->{'id'};
+ }
+ if (defined($token->[2]->{'name'})) {
+ $unikey.="\0".$token->[2]->{'name'};
+ }
+ foreach (@{$token->[3]}) {
+ $metadatafields{$unikey.'.'.$_}=$token->[2]->{$_};
+ if ($metadatakeys{$unikey}) {
+ $metadatakeys{$unikey}.=','.$_;
+ } else {
+ $metadatakeys{$unikey}=$_;
+ }
+ }
+ my $newentry=$parser->get_text('/'.$entry);
+ if (($entry eq 'customdistributionfile') ||
+ ($entry eq 'sourcerights')) {
+ $newentry=~s/^\s*//;
+ if ($newentry !~m|^/res|) { $newentry=$prefix.$newentry; }
+ }
+# actually store
+ if ( $entry eq 'rule' && exists($metadatafields{$unikey})) {
+ $metadatafields{$unikey}.=','.$newentry;
+ } else {
+ $metadatafields{$unikey}=$newentry;
+ }
+ }
+ }
+}
+
+#########################################
+#########################################
+
+=pod
+
+=item B
+
+Read a metadata file
+
+Parameters:
+
+=over
+
+=item I<$logfile>
+
+File output stream to output errors and warnings to.
+
+=item I<$fn>
+
+File name (including path).
+
+=back
+
+Returns:
+
+=over 4
+
+=item Scalar string (if successful)
+
+XHTML text that indicates successful reading of the metadata.
+
+=back
+
+=cut
+
+#########################################
+#########################################
+sub metaread {
+ my ($logfile,$fn,$prefix)=@_;
+ unless (-e $fn) {
+ print($logfile 'No file '.$fn."\n");
+ return ' '.&mt('No file').':'.
+ &Apache::loncfile::display($fn).'';
+ }
+ print($logfile 'Processing '.$fn."\n");
+ my $metastring;
+ {
+ my $metafh=Apache::File->new($fn);
+ $metastring=join('',<$metafh>);
+ }
+ &metaeval($metastring,$prefix);
+ return ' '.&mt('Processed file').':'.
+ &Apache::loncfile::display($fn).' ';
+}
+
+#########################################
+#########################################
+
+sub coursedependencies {
+ my $url=&Apache::lonnet::declutter(shift);
+ $url=~s/\.meta$//;
+ my ($adomain,$aauthor)=($url=~ m{^($match_domain)/($match_username)/});
+ my $regexp=quotemeta($url);
+ $regexp='___'.$regexp.'___course';
+ my %evaldata=&Apache::lonnet::dump('nohist_resevaldata',$adomain,
+ $aauthor,$regexp);
+ my %courses=();
+ foreach (keys %evaldata) {
+ if ($_=~/^([a-zA-Z0-9]+_[a-zA-Z0-9]+)___.+___course$/) {
+ $courses{$1}=1;
+ }
+ }
+ return %courses;
+}
+#########################################
+#########################################
+
+
+=pod
+
+=item Form-field-generating subroutines.
+
+For input parameters, these subroutines take in values
+such as I<$name>, I<$value> and other form field metadata.
+The output (scalar string that is returned) is an XHTML
+string which presents the form field (foreseeably inside
+ tags).
+
+=over 4
+
+=item B
+
+=item B
+
+=item B
+
+=back
+
+=cut
+
+#########################################
+#########################################
+sub textfield {
+ my ($title,$name,$value,$noline)=@_;
+ $value=~s/^\s+//gs;
+ $value=~s/\s+$//gs;
+ $value=~s/\s+/ /gs;
+ $title=&mt($title);
+ $env{'form.'.$name}=$value;
+ return "\n".&Apache::lonhtmlcommon::row_title($title)
+ .''
+ .&Apache::lonhtmlcommon::row_closure($noline);
+}
+
+sub text_with_browse_field {
+ my ($title,$name,$value,$restriction,$noline)=@_;
+ $value=~s/^\s+//gs;
+ $value=~s/\s+$//gs;
+ $value=~s/\s+/ /gs;
+ $title=&mt($title);
+ $env{'form.'.$name}=$value;
+ return "\n".&Apache::lonhtmlcommon::row_title($title)
+ .''
+ .' '
+ .''
+ .&mt('Select')
+ .' '
+ .''
+ .&mt('Search')
+ .''
+ .&Apache::lonhtmlcommon::row_closure($noline);
+}
+
+sub hiddenfield {
+ my ($name,$value)=@_;
+ $env{'form.'.$name}=$value;
+ return "\n".'';
+}
+
+sub checkbox {
+ my ($name,$text)=@_;
+ return "\n ";
+}
+
+sub selectbox {
+ my ($title,$name,$value,$functionref,@idlist)=@_;
+ $title=&mt($title);
+ $value=(split(/\s*,\s*/,$value))[-1];
+ if (defined($value)) {
+ $env{'form.'.$name}=$value;
+ } else {
+ $env{'form.'.$name}=$idlist[0];
+ }
+ my $selout="\n".&Apache::lonhtmlcommon::row_title($title)
+ .''.&Apache::lonhtmlcommon::row_closure();
+ return $selout;
+}
+
+sub select_level_form {
+ my ($value,$name)=@_;
+ $env{'form.'.$name}=$value;
+ if (!defined($value)) { $env{'form.'.$name}=0; }
+ return &Apache::loncommon::select_level_form($value,$name);
+}
+#########################################
+#########################################
+
+=pod
+
+=item B
+
+Fix up a url? First step of publication
+
+=cut
+
+#########################################
+#########################################
+sub urlfixup {
+ my ($url,$target)=@_;
+ unless ($url) { return ''; }
+ #javascript code needs no fixing
+ if ($url =~ /^javascript:/i) { return $url; }
+ if ($url =~ /^mailto:/i) { return $url; }
+ #internal document links need no fixing
+ if ($url =~ /^\#/) { return $url; }
+ my ($host)=($url=~m{(?:(?:http|https|ftp)://)*([^/]+)});
+ my @lonids = &Apache::lonnet::machine_ids($host);
+ if (@lonids) {
+ $url=~s{^(?:http|https|ftp)://}{};
+ $url=~s/^\Q$host\E//;
+ }
+ if ($url=~m{^(?:http|https|ftp)://}) { return $url; }
+ $url=~s{\Q~$cuname\E}{res/$cudom/$cuname};
+ return $url;
+}
+
+#########################################
+#########################################
+
+=pod
+
+=item B
+
+Currently undocumented.
+
+=cut
+
+#########################################
+#########################################
+sub absoluteurl {
+ my ($url,$target)=@_;
+ unless ($url) { return ''; }
+ if ($target) {
+ $target=~s/\/[^\/]+$//;
+ $url=&Apache::lonnet::hreflocation($target,$url);
+ }
+ return $url;
+}
+
+#########################################
+#########################################
+
+=pod
+
+=item B
+
+Currently undocumented
+
+=cut
+
+#########################################
+#########################################
+sub set_allow {
+ my ($allow,$logfile,$target,$tag,$oldurl)=@_;
+ my $newurl=&urlfixup($oldurl,$target);
+ my $return_url=$oldurl;
+ print $logfile 'GUYURL: '.$tag.':'.$oldurl.' - '.$newurl."\n";
+ if ($newurl ne $oldurl) {
+ $return_url=$newurl;
+ print $logfile 'URL: '.$tag.':'.$oldurl.' - '.$newurl."\n";
+ }
+ if (($newurl !~ /^javascript:/i) &&
+ ($newurl !~ /^mailto:/i) &&
+ ($newurl !~ /^(?:http|https|ftp):/i) &&
+ ($newurl !~ /^\#/)) {
+ $$allow{&absoluteurl($newurl,$target)}=1;
+ }
+ return $return_url;
+}
+
+#########################################
+#########################################
+
+=pod
+
+=item B
+
+Currently undocumented
+
+=cut
+
+#########################################
+#########################################
+sub get_subscribed_hosts {
+ my ($target)=@_;
+ my @subscribed;
+ my $filename;
+ $target=~/(.*)\/([^\/]+)$/;
+ my $srcf=$2;
+ opendir(DIR,$1);
+ # cycle through listed files, subscriptions used to exist
+ # as "filename.lonid"
+ while ($filename=readdir(DIR)) {
+ if ($filename=~/\Q$srcf\E\.($match_lonid)$/) {
+ my $subhost=$1;
+ if (($subhost ne 'meta'
+ && $subhost ne 'subscription'
+ && $subhost ne 'meta.subscription'
+ && $subhost ne 'tmp') &&
+ ($subhost ne $Apache::lonnet::perlvar{'lonHostID'})) {
+ push(@subscribed,$subhost);
+ }
+ }
+ }
+ closedir(DIR);
+ my $sh;
+ if ( $sh=Apache::File->new("$target.subscription") ) {
+ while (my $subline=<$sh>) {
+ if ($subline =~ /^($match_lonid):/) {
+ if ($1 ne $Apache::lonnet::perlvar{'lonHostID'}) {
+ push(@subscribed,$1);
+ }
+ }
+ }
+ }
+ return @subscribed;
+}
+
+
+#########################################
+#########################################
+
+=pod
+
+=item B
+
+Currently undocumented
+
+=cut
+
+#########################################
+#########################################
+sub get_max_ids_indices {
+ my ($content)=@_;
+ my $maxindex=10;
+ my $maxid=10;
+ my $needsfixup=0;
+ my $duplicateids=0;
+
+ my %allids;
+ my %duplicatedids;
+
+ my $parser=HTML::LCParser->new($content);
+ $parser->xml_mode(1);
+ my $token;
+ while ($token=$parser->get_token) {
+ if ($token->[0] eq 'S') {
+ my $counter;
+ if ($counter=$addid{$token->[1]}) {
+ if ($counter eq 'id') {
+ if (defined($token->[2]->{'id'}) &&
+ $token->[2]->{'id'} !~ /^\s*$/) {
+ $maxid=($token->[2]->{'id'}>$maxid)?$token->[2]->{'id'}:$maxid;
+ if (exists($allids{$token->[2]->{'id'}})) {
+ $duplicateids=1;
+ $duplicatedids{$token->[2]->{'id'}}=1;
+ } else {
+ $allids{$token->[2]->{'id'}}=1;
+ }
+ } else {
+ $needsfixup=1;
+ }
+ } else {
+ if (defined($token->[2]->{'index'}) &&
+ $token->[2]->{'index'} !~ /^\s*$/) {
+ $maxindex=($token->[2]->{'index'}>$maxindex)?$token->[2]->{'index'}:$maxindex;
+ } else {
+ $needsfixup=1;
+ }
+ }
+ }
+ }
+ }
+ return ($needsfixup,$maxid,$maxindex,$duplicateids,
+ (keys(%duplicatedids)));
+}
+
+#########################################
+#########################################
+
+=pod
+
+=item B
+
+Currently undocumented
+
+=cut
+
+#########################################
+#########################################
+sub get_all_text_unbalanced {
+ #there is a copy of this in lonxml.pm
+ my($tag,$pars)= @_;
+ my $token;
+ my $result='';
+ $tag='<'.$tag.'>';
+ while ($token = $$pars[-1]->get_token) {
+ if (($token->[0] eq 'T')||($token->[0] eq 'C')||($token->[0] eq 'D')) {
+ $result.=$token->[1];
+ } elsif ($token->[0] eq 'PI') {
+ $result.=$token->[2];
+ } elsif ($token->[0] eq 'S') {
+ $result.=$token->[4];
+ } elsif ($token->[0] eq 'E') {
+ $result.=$token->[2];
+ }
+ if ($result =~ /\Q$tag\E/s) {
+ ($result,my $redo)=$result =~ /(.*)\Q$tag\E(.*)/is;
+ #&Apache::lonnet::logthis('Got a winner with leftovers ::'.$2);
+ #&Apache::lonnet::logthis('Result is :'.$1);
+ $redo=$tag.$redo;
+ push (@$pars,HTML::LCParser->new(\$redo));
+ $$pars[-1]->xml_mode('1');
+ last;
+ }
+ }
+ return $result
+}
+
+#########################################
+#########################################
+
+=pod
+
+=item B
+
+Currently undocumented
+
+=cut
+
+#########################################
+#########################################
+#Arguably this should all be done as a lonnet::ssi instead
+sub fix_ids_and_indices {
+ my ($logfile,$source,$target)=@_;
+
+ my %allow;
+ my $content;
+ {
+ my $org=Apache::File->new($source);
+ $content=join('',<$org>);
+ }
+
+ my ($needsfixup,$maxid,$maxindex,$duplicateids,@duplicatedids)=
+ &get_max_ids_indices(\$content);
+
+ print $logfile ("Got $needsfixup,$maxid,$maxindex,$duplicateids--".
+ join(', ',@duplicatedids));
+ if ($duplicateids) {
+ print $logfile "Duplicate ID(s) exist, ".join(', ',@duplicatedids)."\n";
+ my $outstring=''.&mt('Unable to publish file, it contains duplicated ID(s), ID(s) need to be unique. The duplicated ID(s) are').': '.join(', ',@duplicatedids).'';
+ return ($outstring,1);
+ }
+ if ($needsfixup) {
+ print $logfile "Needs ID and/or index fixup\n".
+ "Max ID : $maxid (min 10)\n".
+ "Max Index: $maxindex (min 10)\n";
+ }
+ my $outstring='';
+ my $responsecounter=1;
+ my @parser;
+ $parser[0]=HTML::LCParser->new(\$content);
+ $parser[-1]->xml_mode(1);
+ my $token;
+ while (@parser) {
+ while ($token=$parser[-1]->get_token) {
+ if ($token->[0] eq 'S') {
+ my $counter;
+ my $tag=$token->[1];
+ my $lctag=lc($tag);
+ if ($lctag eq 'allow') {
+ $allow{$token->[2]->{'src'}}=1;
+ next;
+ }
+ if ($lctag eq 'base') { next; }
+ if (($lctag eq 'part') || ($lctag eq 'problem')) {
+ $responsecounter=0;
+ }
+ if ($lctag=~/response$/) { $responsecounter++; }
+ my %parms=%{$token->[2]};
+ $counter=$addid{$tag};
+ if (!$counter) { $counter=$addid{$lctag}; }
+ if ($counter) {
+ if ($counter eq 'id') {
+ unless (defined($parms{'id'}) &&
+ $parms{'id'}!~/^\s*$/) {
+ $maxid++;
+ $parms{'id'}=$maxid;
+ print $logfile 'ID(new) : '.$tag.':'.$maxid."\n";
+ } else {
+ print $logfile 'ID(kept): '.$tag.':'.$parms{'id'}."\n";
+ }
+ } elsif ($counter eq 'index') {
+ unless (defined($parms{'index'}) &&
+ $parms{'index'}!~/^\s*$/) {
+ $maxindex++;
+ $parms{'index'}=$maxindex;
+ print $logfile 'Index: '.$tag.':'.$maxindex."\n";
+ }
+ }
+ }
+ unless ($parms{'type'} eq 'zombie') {
+ foreach my $type ('src','href','background','bgimg') {
+ foreach my $key (keys(%parms)) {
+ if ($key =~ /^$type$/i) {
+ $parms{$key}=&set_allow(\%allow,$logfile,
+ $target,$tag,
+ $parms{$key});
+ }
+ }
+ }
+ }
+ # probably a image type
'.&mt('Warning!').
+ '
'.
+ &mt('Copyright/distribution option "Private" is no longer supported. Select another option from below. Consider "Custom Rights" for maximum control over the usage of your resource.').'
';
+ }
+
+# ------------------------------------------------------- Now have all metadata
+
+ my %keywords=();
+
+ if (length($content)<500000) {
+ my $textonly=$content;
+ $textonly=~s/\
+END
+ $keywordout.="\n".&Apache::lonhtmlcommon::row_title(&mt('Keywords'))
+ .$keywords_help
+ .''
+ .''
+ .' '
+ .&Apache::loncommon::start_data_table()
+ .&Apache::loncommon::start_data_table_row();
+
+ my $colcount=0;
+
+ foreach (sort keys %keywords) {
+ $colcount++;
+ $keywordout.='
';
+ if ($colcount>=10) {
+ $keywordout.=&Apache::loncommon::end_data_table_row()
+ .&Apache::loncommon::start_data_table_row();
+ $colcount=0;
+ }
+ }
+ # Proper end of started table row:
+ if ($colcount != 0) { # has still to be improved: if amount of keywords mod 10 == 0, then do not start another table row
+ while ($colcount<10) {
+ $keywordout.='
');
+ print $logfile "\nRemoving error messages: $delresult";
+# ----------------------------------------------------------- Copy old versions
+
+ if (-e $target) {
+ my $filename;
+ my $maxversion=0;
+ $target=~/(.*)\/([^\/]+)\.(\w+)$/;
+ my $srcf=$2;
+ my $srct=$3;
+ my $srcd=$1;
+ unless ($srcd=~/^\/home\/httpd\/html\/res/) {
+ print $logfile "\nPANIC: Target dir is ".$srcd;
+ $r->print(
+ "".&mt('Invalid target directory, FAIL')."");
+ return 0;
+ }
+ opendir(DIR,$srcd);
+ while ($filename=readdir(DIR)) {
+ if (-l $srcd.'/'.$filename) {
+ unlink($srcd.'/'.$filename);
+ unlink($srcd.'/'.$filename.'.meta');
+ } else {
+ if ($filename=~/\Q$srcf\E\.(\d+)\.\Q$srct\E$/) {
+ $maxversion=($1>$maxversion)?$1:$maxversion;
+ }
+ }
+ }
+ closedir(DIR);
+ $maxversion++;
+ $r->print('
Creating old version '.$maxversion.'
');
+ print $logfile "\nCreating old version ".$maxversion."\n";
+
+ my $copyfile=$srcd.'/'.$srcf.'.'.$maxversion.'.'.$srct;
+
+ if (copy($target,$copyfile)) {
+ print $logfile "Copied old target to ".$copyfile."\n";
+ $r->print('
'.&mt('Copied old target file').'
');
+ } else {
+ print $logfile "Unable to write ".$copyfile.':'.$!."\n";
+ $r->print("".&mt('Failed to copy old target').
+ ", $!, ".&mt('FAIL')."");
+ return 0;
+ }
+
+# --------------------------------------------------------------- Copy Metadata
- $scrout=$allmeta;
+ $copyfile=$copyfile.'.meta';
+
+ if (copy($target.'.meta',$copyfile)) {
+ print $logfile "Copied old target metadata to ".$copyfile."\n";
+ $r->print('
');
-# ------------ We are publishing from $thisfn to $thistarget with $thisembstyle
+ my $thisfn=$fn;
- $r->print('Result: '.&publish($thisfn,$thistarget,$thisembstyle));
+ my $thistarget=$thisfn;
- }
+ $thistarget=~s/^\/home/$targetdir/;
+ $thistarget=~s/\/public\_html//;
+
+ my $thisdistarget=$thistarget;
+ $thisdistarget=~s/^\Q$docroot\E//;
- $r->print('