# The LearningOnline Network with CAPA
# Handler to cleanup XML files
#
# $Id: loncleanup.pm,v 1.22 2023/07/23 13:16:29 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
#
###
package Apache::loncleanup;
use strict;
use Apache::File;
use File::Copy;
use Apache::Constants qw(:common :http :methods);
use Apache::loncommon();
use Apache::lonhtmlcommon();
use Apache::lonlocal;
use Apache::lonnet;
use lib '/home/httpd/lib/perl/';
use LONCAPA;
use HTML::Entities();
sub latextrans {
my $symbolfont=shift;
my %latexsymb=(
'±' => '\pm',
'´' => '\times',
'¸' => '\div',
'Ò' => '(R)',
'Ó' => '\copy',
'Ø' => '\neg',
'â' => '(R)',
'ã' => '\copy',
'¦' => 'f',
'A' => '\Alpha',
'B' => '\Beta',
'G' => '\Gamma',
'D' => '\Delta',
'E' => '\Epsilon',
'Z' => '\Zeta',
'H' => '\Eta',
'Q' => '\Theta',
'I' => '\Iota',
'K' => '\Kappa',
'L' => '\Lambda',
'M' => '\Mu',
'N' => '\Nu',
'X' => '\Xi',
'O' => '\Omicron',
'P' => '\Pi',
'R' => '\Rho',
'S' => '\Sigma',
'T' => '\Tau',
'U' => 'Y',
'F' => '\Phi',
'C' => '\Chi',
'Y' => '\Psi',
'W' => '\Omega',
'a' => '\alpha',
'b' => '\beta',
'g' => '\gamma',
'd' => '\delta',
'e' => '\epsilon',
'z' => '\zeta',
'h' => '\eta',
'q' => '\theta',
'i' => '\iota',
'k' => '\kappa',
'l' => '\lambda',
'm' => '\mu',
'n' => '\nu',
'x' => '\xi',
'o' => '\omicron',
'p' => '\pi',
'r' => '\rho',
'V' => '\sigmaf',
's' => '\sigma',
't' => '\tau',
'u' => '\upsilon',
'f' => '\phi',
'c' => '\chi',
'y' => '\psi',
'w' => '\omega',
'J' => '\vartheta',
'j' => '\varphi',
'v' => '\varpi',
'¡' => '\Upsilon',
'¢' => "'",
'¤' => '/',
'²' => '"',
'¼' => '\ldots',
'À' => '\aleph',
'Á' => '\Im',
'Â' => '\Re',
'Ã' => '\wp',
'Ô' => '^{TM}',
'ä' => '^{TM}',
'ð' => 'EUR',
'«' => '\leftrightarrow',
'¬' => '\leftarrow',
'­' => '\uparrow',
'®' => '\rightarrow',
'¯' => '\downarraw',
'¿' => '\hookleftarrow',
'Û' => '\Leftrightarrow',
'Ü' => '\Leftarrow',
'Ý' => '\Uparrow',
'Þ' => '\Rightarrow',
'ß' => '\Downarrow',
'"' => '\forall',
'$' => '\exists',
''' => '\ni',
'*' => '\ast',
'-' => '-',
'@' => '\cong',
'\' => '\therefore',
'^' => '\perp',
'~' => '\sim',
'£' => '\leq',
'¥' => '\infty',
'³' => '\geq',
'µ' => '\propto',
'¶' => '\partial',
'·' => '\cdot',
'¹' => '\not=',
'º' => '\equiv',
'»' => '\approx',
'Ä' => '\otimes',
'Å' => '\oplus',
'Æ' => '\emptyset',
'Ç' => '\cap',
'È' => '\cup',
'É' => '\supset',
'Ê' => '\supseteq',
'Ë' => '\not\subset',
'Ì' => '\subset',
'Í' => '\subseteq',
'Î' => '\in',
'Ï' => '\not\in',
'Ð' => '\angle',
'Ñ' => '\nabla',
'Õ' => '\prod',
'Ö' => '\surd',
'×' => '\cdot',
'Ù' => '\wedge',
'Ú' => '\wee',
'å' => '\sum',
'ò' => '\int',
'á' => '\langle',
'ñ' => '\rangle',
'à' => '\diamondsuit',
'§' => '\clubsuit',
'¨' => '\diamondsuit',
'©' => '\heartsuit',
'ª' => '\spadesuit'
);
my $output='';
my $char='';
my $entitymode=0;
for (my $i=0; $i<length($symbolfont); $i++) {
my $newchar=substr($symbolfont,$i,1);
$char.=$newchar;
if ($newchar eq '&') { $entitymode=1; }
if (($entitymode) && ($newchar ne ';')) { next; }
my $latex=$latexsymb{$char};
if ($latex) {
$output.=$latex;
} else {
$output.=$char;
}
$char='';
$entitymode=0;
}
return $output;
}
sub insidetrans {
my @args=@_;
return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
}
sub symbolfontreplace {
my $text=shift;
my @fragments=split(/\<\/font\>/si,$text);
for (my $i=0; $i<=$#fragments;$i++) {
$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
}
return join('</font>',@fragments);
}
sub htmlclean {
my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
# Take care of CRLF etc
unless ($blocklinefeed) {
$raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
$raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
$raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
$raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
}
# Generate empty tags, remove wrong end tags
unless ($blockemptytags) {
$raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis;
$raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis;
$raw=~s/\/ \/\>/\/\>/gs;
unless ($full) {
$raw=~s/\<[\/]*(body|head|html)\>//gis;
}
}
# Make standard tags lowercase
unless ($blocklowercasing) {
foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
'table','tr','td','th','p','br','hr','img','embed','font',
'a','strong','center','title','basefont','li','ol','ul',
'input','select','form','option','script','pre') {
$raw=~s/\<$_\s*\>/\<$_\>/gis;
$raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
$raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
}
}
# Replace <font face="symbol">
unless ($blockdesymboling) {
$raw=&symbolfontreplace($raw);
}
return $raw;
}
sub phaseone {
my ($r,$fn,$uname,$udom)=@_;
$r->print(
&Apache::lonhtmlcommon::start_pick_box()
.&Apache::lonhtmlcommon::row_title(&mt('Select actions to attempt'))
.'<label>'
.'<input type="checkbox" name="linefeed" checked="checked" /> '
.&mt('Linefeeds, formfeeds, and carriage returns')
.'</label><br />'
.'<label>'
.'<input type="checkbox" name="empty" checked="checked" /> '
.&mt('Empty tags')
.'</label><br />'
.'<label>'
.'<input type="checkbox" name="lower" checked="checked" /> '
.&mt('Lower casing')
.'</label><br />'
.'<label>'
.'<input type="checkbox" name="symbol" checked="checked" /> '
.&mt('Symbol font')
.'</label>'
.&Apache::lonhtmlcommon::row_closure(1)
.&Apache::lonhtmlcommon::end_pick_box()
);
$r->print(
'<input type="hidden" name="phase" value="two" />'
.'<p>'
.'<input type="submit" value="'.&mt('Next').'" />'
.'</p>'
);
}
sub phasetwo {
# Check original file
my ($r,$fn,$uname,$udom)=@_;
my $text='';
my $londocroot = $r->dir_config('lonDocRoot');
if (open(IN,"<$londocroot/priv/$udom/$uname".$fn)) {
while (my $line=<IN>) {
$text.=$line;
}
close(IN);
}
# Check if any selection was made
if ($env{'form.linefeed'} ne 'on' &&
$env{'form.empty'} ne 'on' &&
$env{'form.lower'} ne 'on' &&
$env{'form.symbol'} ne 'on') {
$r->print(
'<p class="LC_warning">'
.&mt('Please select at least one option.')
.'</p>'
.'<p><a href="javascript:history.back();">'.&mt('Back').'</p>'
);
return;
}
my $uri="/priv/$udom/$uname".$fn;
my $result=&Apache::lonnet::ssi_body($uri,
('grade_target'=>'web',
'return_only_error_and_warning_counts' => 1));
my ($errorcount,$warningcount)=split(':',$result);
# Display results for original file
$r->print(
&Apache::lonhtmlcommon::start_pick_box()
.&Apache::lonhtmlcommon::row_title(&mt('Original file'))
.&Apache::lonhtmlcommon::confirm_success(
&mt('[quant,_1,error]',$errorcount), $errorcount)
.'<br />'
.&Apache::lonhtmlcommon::confirm_success(
&mt('[quant,_1,warning]',$warningcount), $warningcount)
.&Apache::lonhtmlcommon::row_closure()
);
# Clean up file
$text=&htmlclean($text,1,
($env{'form.linefeed'} ne 'on'),
($env{'form.empty'} ne 'on'),
($env{'form.lower'} ne 'on'),
($env{'form.symbol'} ne 'on'));
my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
if (open(OUT,">$londocroot/priv/$udom/$uname".$newfn)) {
print OUT $text;
close(OUT);
}
my $newuri="/priv/$udom/$uname".$newfn;
$result=&Apache::lonnet::ssi_body($newuri,
('grade_target'=>'web',
'return_only_error_and_warning_counts' => 1));
($errorcount,$warningcount)=split(':',$result);
# Display results for cleaned up file
$r->print(
&Apache::lonhtmlcommon::row_title(&mt('Cleaned up file'))
.&Apache::lonhtmlcommon::confirm_success(
&mt('[quant,_1,error]',$errorcount), $errorcount)
.'<br />'
.&Apache::lonhtmlcommon::confirm_success(
&mt('[quant,_1,warning]',$warningcount), $warningcount)
.&Apache::lonhtmlcommon::row_closure()
);
# Display actions
$r->print(
&Apache::lonhtmlcommon::row_title(&mt('Actions'))
.'<ul>'
.'<li><a href="'.$newuri.'" target="prev">'
.&mt('Open (and edit) cleaned up file in new window')
.'</a></li>'
.'<li><a href="'
.&HTML::Entities::encode(
'/adm/diff?filename='.&escape($uri)
.'&versionone=priv&filetwo='.&escape($newuri))
.'" target="prev">'
.&mt('Show diffs in new window')
.'</a></li>'
.'</ul>'
.&Apache::lonhtmlcommon::row_closure(1)
.&Apache::lonhtmlcommon::end_pick_box()
.'<p>'
.'<input type="hidden" name="phase" value="three" />'
.'<input type="submit" name="accept" value="'
.&mt('Clean Up').'" />'
.' <input type="submit" name="reject" value="'
.&mt('Cancel').'" />'
.'</p>'
);
}
sub phasethree {
my ($r,$fn,$uname,$udom)=@_;
my $old=$r->dir_config('lonDocRoot')."/priv/$udom/$uname".$fn;
my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
my $new=$r->dir_config('lonDocRoot')."/priv/$udom/$uname".$newfn;
if ($env{'form.accept'}) {
$r->print(
'<p class="LC_info">'
.&mt('Accepting changes...')
.'</p>'
);
move($new,$old);
} else {
$r->print(
'<p class="LC_info">'
.&mt('Rejecting changes...')
.'</p>'
);
unlink($new);
}
$r->print(
'<p>'
.&Apache::lonhtmlcommon::confirm_success(&mt('Done')));
'</p>'
}
# ---------------------------------------------------------------- Main Handler
sub handler {
my $r=shift;
my $fn='';
# Get query string for limited number of parameters
&Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
['filename']);
if ($env{'form.filename'}) {
$fn=$env{'form.filename'};
$fn=~s{^https?\://[^/]+}{};
} else {
$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
' unspecified filename for cleanup', $r->filename);
return HTTP_NOT_FOUND;
}
unless ($fn) {
$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
' trying to cleanup non-existing file', $r->filename);
return HTTP_NOT_FOUND;
}
# ----------------------------------------------------------- Start page output
my $uname;
my $udom;
($uname,$udom)=&Apache::lonnet::constructaccess($fn);
unless (($uname) && ($udom)) {
$r->log_reason($uname.' at '.$udom.
' trying to cleanup file '.$env{'form.filename'}.
' ('.$fn.') - not authorized',
$r->filename);
return HTTP_NOT_ACCEPTABLE;
}
&Apache::loncommon::content_type($r,'text/html');
$r->send_http_header;
# Breadcrumbs
my $text = 'Authoring Space';
my $href = &Apache::loncommon::authorspace($fn);
if ($env{'request.course.id'}) {
my $cnum = $env{'course.'.$env{'request.course.id'}.'.num'};
my $cdom = $env{'course.'.$env{'request.course.id'}.'.domain'};
if ($href eq "/priv/$cdom/$cnum/") {
$text = 'Course Authoring Space';
}
}
my $brcrum = [{'href' => $href,
'text' => $text},
{'href' => '',
'text' => 'Cleanup XML Document'}];
$fn=~s{^/priv/$LONCAPA::domain_re/$LONCAPA::username_re}{};
$r->print(&Apache::loncommon::start_page('Cleanup XML Document',
undef,
{'bread_crumbs' => $brcrum,}));
$r->print('<h2>'.$fn.'</h2>'.
'<form action="/adm/cleanup" method="post">'.
'<input type="hidden" name="filename" value="'.$env{'form.filename'}.'" />');
unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) {
$r->print('<p class="LC_warning">'.&mt('Cannot cleanup this filetype').'</p>');
} else {
if ($env{'form.phase'} eq 'three') {
&phasethree($r,$fn,$uname,$udom);
} elsif ($env{'form.phase'} eq 'two') {
&phasetwo($r,$fn,$uname,$udom);
} else {
&phaseone($r,$fn,$uname,$udom);
}
}
my $dir=$fn;
$dir=~s{[^/]+$}{};
$r->print(
'</form>'
.&Apache::lonhtmlcommon::actionbox(
['<a href="/priv/'.$udom.'/'.$uname.$fn.'">'.
&mt('Back to Source File').'</a>',
'<a href="/priv/'.$udom.'/'.$uname.$dir.'">'.
&mt('Back to Source Directory').'</a>'])
.&Apache::loncommon::end_page()
);
return OK;
}
1;
__END__
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>