File:  [LON-CAPA] / loncom / publisher / loncleanup.pm
Revision 1.22: download - view: text, annotated - select for diffs
Sun Jul 23 13:16:29 2023 UTC (17 months ago) by raeburn
Branches: MAIN
CVS tags: version_2_12_X, HEAD
- Fix typo in rev 1.21

# The LearningOnline Network with CAPA
# Handler to cleanup XML files
#
# $Id: loncleanup.pm,v 1.22 2023/07/23 13:16:29 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
#
###

package Apache::loncleanup;

use strict;
use Apache::File;
use File::Copy;
use Apache::Constants qw(:common :http :methods);
use Apache::loncommon();
use Apache::lonhtmlcommon();
use Apache::lonlocal;
use Apache::lonnet;
use lib '/home/httpd/lib/perl/';
use LONCAPA;
use HTML::Entities();
 

sub latextrans {
    my $symbolfont=shift;
    my %latexsymb=(
		   '±' => '\pm',
		   '´' => '\times',
		   '¸' => '\div',
		   'Ò' => '(R)',
		   'Ó' => '\copy',
		   'Ø' => '\neg',
		   'â' => '(R)',
		   'ã' => '\copy',
		   '¦' => 'f',
		   'A' => '\Alpha',
		   'B' => '\Beta',
		   'G' => '\Gamma',
		   'D' => '\Delta',
		   'E' => '\Epsilon',
		   'Z' => '\Zeta',
		   'H' => '\Eta',
		   'Q' => '\Theta',
		   'I' => '\Iota',
		   'K' => '\Kappa',
		   'L' => '\Lambda',
		   'M' => '\Mu',
		   'N' => '\Nu',
		   'X' => '\Xi',
		   'O' => '\Omicron',
		   'P' => '\Pi',
		   'R' => '\Rho',
		   'S' => '\Sigma',
		   'T' => '\Tau',
		   'U' => 'Y',
		   'F' => '\Phi',
		   'C' => '\Chi',
		   'Y' => '\Psi',
		   'W' => '\Omega',
		   'a' => '\alpha',
		   'b' => '\beta',
		   'g' => '\gamma',
		   'd' => '\delta',
		   'e' => '\epsilon',
		   'z' => '\zeta',
		   'h' => '\eta',
		   'q' => '\theta',
		   'i' => '\iota',
		   'k' => '\kappa',
		   'l' => '\lambda',
		   'm' => '\mu',
		   'n' => '\nu',
		   'x' => '\xi',
		   'o' => '\omicron',
		   'p' => '\pi',
		   'r' => '\rho',
		   'V' => '\sigmaf',
		   's' => '\sigma',
		   't' => '\tau',
		   'u' => '\upsilon',
		   'f' => '\phi',
		   'c' => '\chi',
		   'y' => '\psi',
		   'w' => '\omega',
		   'J' => '\vartheta',
		   'j' => '\varphi',
		   'v' => '\varpi',
		   '¡' => '\Upsilon',
		   '¢' => "'",
		   '¤' => '/',
		   '²' => '"',
		   '¼' => '\ldots',
		   'À' => '\aleph',
		   'Á' => '\Im',
		   'Â' => '\Re',
		   'Ã' => '\wp',
		   'Ô' => '^{TM}',
		   'ä' => '^{TM}',
		   'ð' => 'EUR',
		   '«' => '\leftrightarrow',
		   '¬' => '\leftarrow',
		   '­' => '\uparrow',
		   '®' => '\rightarrow',
		   '¯' => '\downarraw',
		   '¿' => '\hookleftarrow',
		   'Û' => '\Leftrightarrow',
		   'Ü' => '\Leftarrow',
		   'Ý' => '\Uparrow',
		   'Þ' => '\Rightarrow',
		   'ß' => '\Downarrow',
		   '"' => '\forall',
		   '$' => '\exists',
		   ''' => '\ni',
		   '*' => '\ast',
		   '-' => '-',
		   '@' => '\cong',
		   '\' => '\therefore',
		   '^' => '\perp',
		   '~' => '\sim',
		   '£' => '\leq',
		   '¥' => '\infty',
		   '³' => '\geq',
		   'µ' => '\propto',
		   '¶' => '\partial',
		   '·' => '\cdot',
		   '¹' => '\not=',
		   'º' => '\equiv',
		   '»' => '\approx',
		   'Ä' => '\otimes',
		   'Å' => '\oplus',
		   'Æ' => '\emptyset',
		   'Ç' => '\cap',
		   'È' => '\cup',
		   'É' => '\supset',
		   'Ê' => '\supseteq',
		   'Ë' => '\not\subset',
		   'Ì' => '\subset',
		   'Í' => '\subseteq',
		   'Î' => '\in',
		   'Ï' => '\not\in',
		   'Ð' => '\angle',
		   'Ñ' => '\nabla',
		   'Õ' => '\prod',
		   'Ö' => '\surd',
		   '×' => '\cdot',
		   'Ù' => '\wedge',
		   'Ú' => '\wee',
		   'å' => '\sum',
		   'ò' => '\int',
		   'á' => '\langle',
		   'ñ' => '\rangle',
		   'à' => '\diamondsuit',
		   '§' => '\clubsuit',
		   '¨' => '\diamondsuit',
		   '©' => '\heartsuit',
		   'ª' => '\spadesuit'
		   );
    my $output='';
    my $char='';
    my $entitymode=0;
    for (my $i=0; $i<length($symbolfont); $i++) {
        my $newchar=substr($symbolfont,$i,1);
        $char.=$newchar;
        if ($newchar eq '&') { $entitymode=1; }
        if (($entitymode) && ($newchar ne ';')) { next; }
        my $latex=$latexsymb{$char};
	if ($latex) {
	    $output.=$latex;
	} else {
	    $output.=$char;
	}
        $char='';
        $entitymode=0;
    }
    return $output;
}

sub insidetrans {
    my @args=@_;
    return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
}

sub symbolfontreplace {
    my $text=shift;
    my @fragments=split(/\<\/font\>/si,$text);
    for (my $i=0; $i<=$#fragments;$i++) {
	$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
    }
    return join('</font>',@fragments);
}

sub htmlclean {
    my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
# Take care of CRLF etc
    unless ($blocklinefeed) {
	$raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
	$raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
	$raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
	$raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
    }
# Generate empty tags, remove wrong end tags
    unless ($blockemptytags) {
	$raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis;
	$raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis;
	$raw=~s/\/ \/\>/\/\>/gs;
	unless ($full) {
	    $raw=~s/\<[\/]*(body|head|html)\>//gis;
	}
    }
# Make standard tags lowercase
    unless ($blocklowercasing) {
	foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
		 'table','tr','td','th','p','br','hr','img','embed','font',
		 'a','strong','center','title','basefont','li','ol','ul',
		 'input','select','form','option','script','pre') {
	    $raw=~s/\<$_\s*\>/\<$_\>/gis;
	    $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
	    $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
	}
    }
# Replace <font face="symbol">
    unless ($blockdesymboling) {
	$raw=&symbolfontreplace($raw);
    }
    return $raw;
}

sub phaseone {
    my ($r,$fn,$uname,$udom)=@_;
    $r->print(
        &Apache::lonhtmlcommon::start_pick_box()
       .&Apache::lonhtmlcommon::row_title(&mt('Select actions to attempt'))
       .'<label>'
       .'<input type="checkbox" name="linefeed" checked="checked" /> '
       .&mt('Linefeeds, formfeeds, and carriage returns')
       .'</label><br />'
       .'<label>'
       .'<input type="checkbox" name="empty" checked="checked" /> '
       .&mt('Empty tags')
       .'</label><br />'
       .'<label>'
       .'<input type="checkbox" name="lower" checked="checked" /> '
       .&mt('Lower casing')
       .'</label><br />'
       .'<label>'
       .'<input type="checkbox" name="symbol" checked="checked" /> '
       .&mt('Symbol font')
       .'</label>'
       .&Apache::lonhtmlcommon::row_closure(1)
       .&Apache::lonhtmlcommon::end_pick_box()
    );

    $r->print(
        '<input type="hidden" name="phase" value="two" />'
       .'<p>'
       .'<input type="submit" value="'.&mt('Next').'" />'
       .'</p>'
    );
}

sub phasetwo {
    # Check original file
    my ($r,$fn,$uname,$udom)=@_;
    my $text='';
    my $londocroot = $r->dir_config('lonDocRoot');
    if (open(IN,"<$londocroot/priv/$udom/$uname".$fn)) {
        while (my $line=<IN>) {
	    $text.=$line;
        }
        close(IN);
    }
    # Check if any selection was made
    if ($env{'form.linefeed'} ne 'on' &&
        $env{'form.empty'} ne 'on' &&
        $env{'form.lower'} ne 'on' &&
        $env{'form.symbol'} ne 'on') {
        $r->print(
            '<p class="LC_warning">'
           .&mt('Please select at least one option.')
           .'</p>'
           .'<p><a href="javascript:history.back();">'.&mt('Back').'</p>'
        );
        return;
    }

    my $uri="/priv/$udom/$uname".$fn;
    my $result=&Apache::lonnet::ssi_body($uri,
					 ('grade_target'=>'web',
					  'return_only_error_and_warning_counts' => 1));
    my ($errorcount,$warningcount)=split(':',$result);

    # Display results for original file
    $r->print(
        &Apache::lonhtmlcommon::start_pick_box()
       .&Apache::lonhtmlcommon::row_title(&mt('Original file'))
       .&Apache::lonhtmlcommon::confirm_success(
           &mt('[quant,_1,error]',$errorcount), $errorcount)
       .'<br />'
       .&Apache::lonhtmlcommon::confirm_success(
           &mt('[quant,_1,warning]',$warningcount), $warningcount)
       .&Apache::lonhtmlcommon::row_closure()
    );

    # Clean up file
    $text=&htmlclean($text,1,
               ($env{'form.linefeed'} ne 'on'),
               ($env{'form.empty'} ne 'on'),
               ($env{'form.lower'} ne 'on'),
               ($env{'form.symbol'} ne 'on'));
    my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
    my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
    if (open(OUT,">$londocroot/priv/$udom/$uname".$newfn)) {
        print OUT $text;
        close(OUT);
    }
    my $newuri="/priv/$udom/$uname".$newfn;
    $result=&Apache::lonnet::ssi_body($newuri,
                                         ('grade_target'=>'web',
                                          'return_only_error_and_warning_counts' => 1));
    ($errorcount,$warningcount)=split(':',$result);

    # Display results for cleaned up file
    $r->print(
        &Apache::lonhtmlcommon::row_title(&mt('Cleaned up file'))
       .&Apache::lonhtmlcommon::confirm_success(
           &mt('[quant,_1,error]',$errorcount), $errorcount)
       .'<br />'
       .&Apache::lonhtmlcommon::confirm_success(
           &mt('[quant,_1,warning]',$warningcount), $warningcount)
       .&Apache::lonhtmlcommon::row_closure()
    );

    # Display actions
    $r->print(
        &Apache::lonhtmlcommon::row_title(&mt('Actions'))
       .'<ul>'
       .'<li><a href="'.$newuri.'" target="prev">'
       .&mt('Open (and edit) cleaned up file in new window')
       .'</a></li>'
       .'<li><a href="'
       .&HTML::Entities::encode(
            '/adm/diff?filename='.&escape($uri)
           .'&versionone=priv&filetwo='.&escape($newuri))
       .'" target="prev">'
       .&mt('Show diffs in new window')
       .'</a></li>'
       .'</ul>'
       .&Apache::lonhtmlcommon::row_closure(1)
       .&Apache::lonhtmlcommon::end_pick_box()
       .'<p>'
       .'<input type="hidden" name="phase" value="three" />'
       .'<input type="submit" name="accept" value="'
       .&mt('Clean Up').'" />'
       .' <input type="submit" name="reject" value="'
       .&mt('Cancel').'" />'
       .'</p>'
    );
}

sub phasethree {
    my ($r,$fn,$uname,$udom)=@_;
    my $old=$r->dir_config('lonDocRoot')."/priv/$udom/$uname".$fn;
    my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
    my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
    my $new=$r->dir_config('lonDocRoot')."/priv/$udom/$uname".$newfn;
    if ($env{'form.accept'}) {
        $r->print(
        '<p class="LC_info">'
       .&mt('Accepting changes...')
       .'</p>'
    );
        move($new,$old);
    } else {
        $r->print(
        '<p class="LC_info">'
       .&mt('Rejecting changes...')
       .'</p>'
        );
        unlink($new);
    }
    $r->print(
        '<p>'
       .&Apache::lonhtmlcommon::confirm_success(&mt('Done')));
        '</p>'
}

# ---------------------------------------------------------------- Main Handler
sub handler {

    my $r=shift;
    my $fn='';

# Get query string for limited number of parameters

    &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
					    ['filename']);

    if ($env{'form.filename'}) {
	$fn=$env{'form.filename'};
	$fn=~s{^https?\://[^/]+}{};
    } else {
	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
		       ' unspecified filename for cleanup', $r->filename); 
	return HTTP_NOT_FOUND;
    }

    unless ($fn) { 
	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
		       ' trying to cleanup non-existing file', $r->filename); 
	return HTTP_NOT_FOUND;
    } 

# ----------------------------------------------------------- Start page output
    my $uname;
    my $udom;

    ($uname,$udom)=&Apache::lonnet::constructaccess($fn);
    unless (($uname) && ($udom)) {
	$r->log_reason($uname.' at '.$udom.
		       ' trying to cleanup file '.$env{'form.filename'}.
		       ' ('.$fn.') - not authorized', 
		       $r->filename); 
	return HTTP_NOT_ACCEPTABLE;
    }

    &Apache::loncommon::content_type($r,'text/html');
    $r->send_http_header;

    # Breadcrumbs
    my $text = 'Authoring Space';
    my $href = &Apache::loncommon::authorspace($fn);
    if ($env{'request.course.id'}) {
        my $cnum = $env{'course.'.$env{'request.course.id'}.'.num'};
        my $cdom = $env{'course.'.$env{'request.course.id'}.'.domain'};
        if ($href eq "/priv/$cdom/$cnum/") {
            $text = 'Course Authoring Space';
        }
    }
    my $brcrum = [{'href' => $href,
                   'text' => $text},
                  {'href' => '',
                   'text' => 'Cleanup XML Document'}];

    $fn=~s{^/priv/$LONCAPA::domain_re/$LONCAPA::username_re}{};

    $r->print(&Apache::loncommon::start_page('Cleanup XML Document',
                                             undef,
                                             {'bread_crumbs' => $brcrum,}));
    $r->print('<h2>'.$fn.'</h2>'.
              '<form action="/adm/cleanup" method="post">'.
              '<input type="hidden" name="filename" value="'.$env{'form.filename'}.'" />');
    unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) {
	$r->print('<p class="LC_warning">'.&mt('Cannot cleanup this filetype').'</p>');
    } else {
	if ($env{'form.phase'} eq 'three') {
	    &phasethree($r,$fn,$uname,$udom);
	} elsif ($env{'form.phase'} eq 'two') {
	    &phasetwo($r,$fn,$uname,$udom);
	} else {
	    &phaseone($r,$fn,$uname,$udom);
	}
    }
    my $dir=$fn;
    $dir=~s{[^/]+$}{};
    $r->print(
        '</form>'
       .&Apache::lonhtmlcommon::actionbox(
            ['<a href="/priv/'.$udom.'/'.$uname.$fn.'">'.
                 &mt('Back to Source File').'</a>',
            '<a href="/priv/'.$udom.'/'.$uname.$dir.'">'.
                &mt('Back to Source Directory').'</a>'])
       .&Apache::loncommon::end_page()
    );

    return OK;  
}

1;
__END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>