--- loncom/publisher/loncleanup.pm 2005/05/28 01:32:33 1.1 +++ loncom/publisher/loncleanup.pm 2011/11/03 00:30:32 1.15 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # Handler to cleanup XML files # -# $Id: loncleanup.pm,v 1.1 2005/05/28 01:32:33 www Exp $ +# $Id: loncleanup.pm,v 1.15 2011/11/03 00:30:32 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -36,8 +36,12 @@ use File::Copy; use Apache::Constants qw(:common :http :methods); use Apache::loncacc; use Apache::loncommon(); +use Apache::lonhtmlcommon(); use Apache::lonlocal; use Apache::lonnet; +use lib '/home/httpd/lib/perl/'; +use LONCAPA; + sub latextrans { my $symbolfont=shift; @@ -208,95 +212,255 @@ sub symbolfontreplace { } sub htmlclean { - my ($raw,$full)=@_; + my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_; # Take care of CRLF etc - - $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs; - $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs; - $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs; - $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs; - + unless ($blocklinefeed) { + $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs; + $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs; + $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs; + $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs; + } # Generate empty tags, remove wrong end tags - $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis; - $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis; - unless ($full) { - $raw=~s/\<[\/]*(body|head|html)\>//gis; + unless ($blockemptytags) { + $raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis; + $raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis; + $raw=~s/\/ \/\>/\/\>/gs; + unless ($full) { + $raw=~s/\<[\/]*(body|head|html)\>//gis; + } } # Make standard tags lowercase - foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m', - 'table','tr','td','th','p','br','hr','img','embed','font', - 'a','strong','center','title','basefont','li','ol','ul', - 'input','select','form','option','script','pre') { - $raw=~s/\<$_\s*\>/\<$_\>/gis; - $raw=~s/\<\/$_\s*\>/<\/$_\>/gis; - $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis; + unless ($blocklowercasing) { + foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m', + 'table','tr','td','th','p','br','hr','img','embed','font', + 'a','strong','center','title','basefont','li','ol','ul', + 'input','select','form','option','script','pre') { + $raw=~s/\<$_\s*\>/\<$_\>/gis; + $raw=~s/\<\/$_\s*\>/<\/$_\>/gis; + $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis; + } + } +# Replace + unless ($blockdesymboling) { + $raw=&symbolfontreplace($raw); } return $raw; } sub phaseone { + my ($r,$fn,$uname,$udom)=@_; + $r->print( + &Apache::lonhtmlcommon::start_pick_box() + .&Apache::lonhtmlcommon::row_title(&mt('Select actions to attempt')) + .' ' + .&mt('Linefeeds, formfeeds, and carriage returns') + .'
' + .' ' + .&mt('Empty tags') + .'
' + .' ' + .&mt('Lower casing') + .'
' + .' ' + .&mt('Symbol font') + .&Apache::lonhtmlcommon::row_closure(1) + .&Apache::lonhtmlcommon::end_pick_box() + ); + + $r->print( + '' + .'

' + .'' + .'

' + ); } sub phasetwo { + # Check original file + my ($r,$fn,$uname,$udom)=@_; + my $text=''; + my $londocroot = $r->dir_config('lonDocRoot'); + if (open(IN,"<$londocroot/priv/$udom/$uname".$fn)) { + while (my $line=) { + $text.=$line; + } + close(IN); + } + my $uri="/priv/$udom/$uname".$fn; + my $result=&Apache::lonnet::ssi_body($uri, + ('grade_target'=>'web', + 'return_only_error_and_warning_counts' => 1)); + my ($errorcount,$warningcount)=split(':',$result); + + # Display results for original file + $r->print( + &Apache::lonhtmlcommon::start_pick_box() + .&Apache::lonhtmlcommon::row_title(&mt('Original file')) + .&Apache::lonhtmlcommon::confirm_success( + &mt('[quant,_1,error]',$errorcount), $errorcount) + .'
' + .&Apache::lonhtmlcommon::confirm_success( + &mt('[quant,_1,warning]',$warningcount), $warningcount) + .&Apache::lonhtmlcommon::row_closure() + ); + + # Clean up file + $text=&htmlclean($text,1, + ($env{'form.linefeed'} ne 'on'), + ($env{'form.empty'} ne 'on'), + ($env{'form.lower'} ne 'on'), + ($env{'form.symbol'} ne 'on')); + my ($main,$ext)=($fn=~/^(.*)\.(\w+)/); + my $newfn=$main.'_Auto_Cleaned_Up.'.$ext; + if (open(OUT,">$londocroot/priv/$udom/$uname".$newfn)) { + print OUT $text; + close(OUT); + } + my $newuri="/priv/$udom/$uname".$newfn; + $result=&Apache::lonnet::ssi_body($newuri, + ('grade_target'=>'web', + 'return_only_error_and_warning_counts' => 1)); + ($errorcount,$warningcount)=split(':',$result); + + # Display results for cleaned up file + $r->print( + &Apache::lonhtmlcommon::row_title(&mt('Cleaned up file')) + .&Apache::lonhtmlcommon::confirm_success( + &mt('[quant,_1,error]',$errorcount), $errorcount) + .'
' + .&Apache::lonhtmlcommon::confirm_success( + &mt('[quant,_1,warning]',$warningcount), $warningcount) + .&Apache::lonhtmlcommon::row_closure() + ); + + # Display actions + $r->print( + &Apache::lonhtmlcommon::row_title(&mt('Actions')) + .'' + .&Apache::lonhtmlcommon::row_closure(1) + .&Apache::lonhtmlcommon::end_pick_box() + .'

' + .'' + .'' + .' ' + .'

' + ); +} + +sub phasethree { + my ($r,$fn,$uname,$udom)=@_; + my $old=$r->dir_config('lonDocRoot')."/priv/$udom/$uname".$fn; + my ($main,$ext)=($fn=~/^(.*)\.(\w+)/); + my $newfn=$main.'_Auto_Cleaned_Up.'.$ext; + my $new=$r->dir_config('lonDocRoot')."/priv/$udom/$uname".$newfn; + if ($env{'form.accept'}) { + $r->print( + '

' + .&mt('Accepting changes') + .'

' + ); + move($new,$old); + } else { + $r->print( + '

' + .&mt('Rejecting changes') + .'

' + ); + unlink($new); + } } # ---------------------------------------------------------------- Main Handler sub handler { - my $r=shift; - + my $r=shift; + my $fn=''; # Get query string for limited number of parameters - &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'}, - ['filename']); + &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'}, + ['filename']); + + if ($env{'form.filename'}) { + $fn=$env{'form.filename'}; + $fn=~s{^https?\://[^/]+}{}; + } else { + $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}. + ' unspecified filename for cleanup', $r->filename); + return HTTP_NOT_FOUND; + } - if ($env{'form.filename'}) { - $fn=$env{'form.filename'}; - $fn=~s/^http\:\/\/[^\/]+//; - } else { - $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}. - ' unspecified filename for cleanup', $r->filename); - return HTTP_NOT_FOUND; - } - - unless ($fn) { - $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}. - ' trying to cleanup non-existing file', $r->filename); - return HTTP_NOT_FOUND; - } + unless ($fn) { + $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}. + ' trying to cleanup non-existing file', $r->filename); + return HTTP_NOT_FOUND; + } # ----------------------------------------------------------- Start page output - my $uname; - my $udom; + my $uname; + my $udom; + + ($uname,$udom)=&Apache::loncacc::constructaccess($fn); + unless (($uname) && ($udom)) { + $r->log_reason($uname.' at '.$udom. + ' trying to cleanup file '.$env{'form.filename'}. + ' ('.$fn.') - not authorized', + $r->filename); + return HTTP_NOT_ACCEPTABLE; + } - ($uname,$udom)= - &Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain')); - unless (($uname) && ($udom)) { - $r->log_reason($uname.' at '.$udom. - ' trying to cleanup file '.$env{'form.filename'}. - ' ('.$fn.') - not authorized', - $r->filename); - return HTTP_NOT_ACCEPTABLE; - } - - $fn=~s/\/\~(\w+)//; - - &Apache::loncommon::content_type($r,'text/html'); - $r->send_http_header; - - $r->print('LON-CAPA Construction Space'); - - $r->print(&Apache::loncommon::bodytag('Cleanup XML Document')); - - if ($env{'form.phase'} eq 'two') { - &phasetwo($r,$fn,$uname,$udom); - } else { - &phaseone($r,$fn,$uname,$udom); - } + $fn=~s{^/priv/$LONCAPA::domain_re/$LONCAPA::username_re}{}; + + &Apache::loncommon::content_type($r,'text/html'); + $r->send_http_header; + + # Breadcrumbs + my $brcrum = [{'href' => &Apache::loncommon::authorspace(), + 'text' => 'Construction Space'}, + {'href' => '', + 'text' => 'Cleanup XML Document'}]; + + $r->print(&Apache::loncommon::start_page('Cleanup XML Document', + undef, + {'bread_crumbs' => $brcrum,})); + $r->print('

'.$fn.'

'. + '
'. + ''); + unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) { + $r->print(&mt('Cannot cleanup this filetype')); + } else { + if ($env{'form.phase'} eq 'three') { + &phasethree($r,$fn,$uname,$udom); + } elsif ($env{'form.phase'} eq 'two') { + &phasetwo($r,$fn,$uname,$udom); + } else { + &phaseone($r,$fn,$uname,$udom); + } + } + my $dir=$fn; + $dir=~s{[^/]+$}{}; + $r->print( + '
' + .&Apache::lonhtmlcommon::start_funclist() + .&Apache::lonhtmlcommon::add_item_funclist( + ''.&mt('Back to Source File').'') + .&Apache::lonhtmlcommon::add_item_funclist( + ''.&mt('Back to Source Directory').'') + .&Apache::lonhtmlcommon::end_funclist() + .&Apache::loncommon::end_page() + ); - $r->print(''); - return OK; + return OK; } 1;