--- loncom/publisher/loncleanup.pm	2005/05/28 01:32:33	1.1
+++ loncom/publisher/loncleanup.pm	2006/12/20 22:41:08	1.9
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # Handler to cleanup XML files
 #
-# $Id: loncleanup.pm,v 1.1 2005/05/28 01:32:33 www Exp $
+# $Id: loncleanup.pm,v 1.9 2006/12/20 22:41:08 albertel Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -38,6 +38,9 @@ use Apache::loncacc;
 use Apache::loncommon();
 use Apache::lonlocal;
 use Apache::lonnet;
+use lib '/home/httpd/lib/perl/';
+use LONCAPA;
+ 
 
 sub latextrans {
     my $symbolfont=shift;
@@ -208,95 +211,184 @@ sub symbolfontreplace {
 }
 
 sub htmlclean {
-    my ($raw,$full)=@_;
+    my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
 # Take care of CRLF etc
-
-    $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
-    $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
-    $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
-    $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
-
+    unless ($blocklinefeed) {
+	$raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
+	$raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
+	$raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
+	$raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
+    }
 # Generate empty tags, remove wrong end tags
-    $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
-    $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
-    unless ($full) {
-       $raw=~s/\<[\/]*(body|head|html)\>//gis;
+    unless ($blockemptytags) {
+	$raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis;
+	$raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis;
+	$raw=~s/\/ \/\>/\/\>/gs;
+	unless ($full) {
+	    $raw=~s/\<[\/]*(body|head|html)\>//gis;
+	}
     }
 # Make standard tags lowercase
-    foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
-             'table','tr','td','th','p','br','hr','img','embed','font',
-             'a','strong','center','title','basefont','li','ol','ul',
-             'input','select','form','option','script','pre') {
-	$raw=~s/\<$_\s*\>/\<$_\>/gis;
-        $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
-        $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
+    unless ($blocklowercasing) {
+	foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
+		 'table','tr','td','th','p','br','hr','img','embed','font',
+		 'a','strong','center','title','basefont','li','ol','ul',
+		 'input','select','form','option','script','pre') {
+	    $raw=~s/\<$_\s*\>/\<$_\>/gis;
+	    $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
+	    $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
+	}
+    }
+# Replace <font face="symbol">
+    unless ($blockdesymboling) {
+	$raw=&symbolfontreplace($raw);
     }
     return $raw;
 }
 
 sub phaseone {
+    my ($r,$fn,$uname,$udom)=@_;
+    $r->print(&mt('Select actions to attempt:').
+	      '<br /><input type="checkbox" name="linefeed" checked="checked" /> '.
+	      &mt('Linefeeds, formfeeds, and carriage returns').
+	      '<br /><input type="checkbox" name="empty" checked="checked" /> '.
+	      &mt('Empty tags').
+	      '<br /><input type="checkbox" name="lower" checked="checked" /> '.
+	      &mt('Lower casing').
+	      '<br /><input type="checkbox" name="symbol"checked="checked" /> '.
+	      &mt('Symbol font').
+	      '<input type="hidden" name="phase" value="two" />'.
+	      '<p><input type="submit" value="'.&mt('Cleanup').'" /></p>');
 }
 
 sub phasetwo {
+    my ($r,$fn,$uname,$udom)=@_;
+    open(IN,'/home/'.$uname.'/public_html/'.$fn);
+    my $text='';
+    while (my $line=<IN>) {
+	$text.=$line;
+    }
+    close(IN);
+    my $uri='/~'.$uname.$fn;
+    my $result=&Apache::lonnet::ssi_body($uri,
+					 ('grade_target'=>'web',
+					  'return_only_error_and_warning_counts' => 1));
+    my ($errorcount,$warningcount)=split(':',$result);
+    $r->print(&mt('Original file').': '.
+	      $errorcount.' '.&mt('error(s)').', '.
+	      $warningcount.' '.&mt('warning(s)'));
+    $text=&htmlclean($text,1,
+               ($env{'form.linefeed'} ne 'on'),
+               ($env{'form.empty'} ne 'on'),
+               ($env{'form.lower'} ne 'on'),
+               ($env{'form.symbol'} ne 'on'));
+    my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
+    my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
+    open(OUT,'>/home/'.$uname.'/public_html'.$newfn);
+    print OUT $text;
+    close(OUT);
+    my $newuri='/~'.$uname.$newfn;
+    $result=&Apache::lonnet::ssi_body($newuri,
+					 ('grade_target'=>'web',
+					  'return_only_error_and_warning_counts' => 1));
+    ($errorcount,$warningcount)=split(':',$result);
+    $r->print('<br />'.&mt('Cleaned up file').': '.
+	      $errorcount.' '.&mt('error(s)').', '.
+	      $warningcount.' '.&mt('warning(s)').
+              '<br /><a href="'.$newuri.'" target="prev">'.
+	      &mt('Open (and edit) cleaned up file in new window').'</a>'.
+              '<br /><a href="/adm/diff?filename='.&escape($uri).
+	      '&versionone=priv&filetwo='.
+	      &escape($newuri).'" target="prev">'.
+	      &mt('Show diffs in new window').'</a><br />'.
+	      '<input type="hidden" name="phase" value="three" />'.
+	      '<input type="submit" name="accept" value="'.&mt('Accept Result').'" />'.
+	      '<input type="submit" name="reject" value="'.&mt('Reject Result').'" />'
+	      );
+}
+
+sub phasethree {
+    my ($r,$fn,$uname,$udom)=@_;
+    my $old='/home/'.$uname.'/public_html/'.$fn;
+    my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
+    my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
+    my $new='/home/'.$uname.'/public_html'.$newfn;
+    if ($env{'form.accept'}) {
+	$r->print(&mt('Accepting changes'));
+        move($new,$old);
+    } else {
+	$r->print(&mt('Rejeting changes'));
+        unlink($new);
+    }
 }
 
 # ---------------------------------------------------------------- Main Handler
 sub handler {
 
-  my $r=shift;
-
+    my $r=shift;
+    my $fn='';
 
 # Get query string for limited number of parameters
 
-  &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
-					  ['filename']);
+    &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
+					    ['filename']);
+
+    if ($env{'form.filename'}) {
+	$fn=$env{'form.filename'};
+	$fn=~s/^http\:\/\/[^\/]+//;
+    } else {
+	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
+		       ' unspecified filename for cleanup', $r->filename); 
+	return HTTP_NOT_FOUND;
+    }
 
-  if ($env{'form.filename'}) {
-      $fn=$env{'form.filename'};
-      $fn=~s/^http\:\/\/[^\/]+//;
-  } else {
-     $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
-         ' unspecified filename for cleanup', $r->filename); 
-     return HTTP_NOT_FOUND;
-  }
-
-  unless ($fn) { 
-     $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
-         ' trying to cleanup non-existing file', $r->filename); 
-     return HTTP_NOT_FOUND;
-  } 
+    unless ($fn) { 
+	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
+		       ' trying to cleanup non-existing file', $r->filename); 
+	return HTTP_NOT_FOUND;
+    } 
 
 # ----------------------------------------------------------- Start page output
-  my $uname;
-  my $udom;
+    my $uname;
+    my $udom;
+
+    ($uname,$udom)=
+	&Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
+    unless (($uname) && ($udom)) {
+	$r->log_reason($uname.' at '.$udom.
+		       ' trying to cleanup file '.$env{'form.filename'}.
+		       ' ('.$fn.') - not authorized', 
+		       $r->filename); 
+	return HTTP_NOT_ACCEPTABLE;
+    }
 
-  ($uname,$udom)=
-    &Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
-  unless (($uname) && ($udom)) {
-     $r->log_reason($uname.' at '.$udom.
-         ' trying to cleanup file '.$env{'form.filename'}.
-         ' ('.$fn.') - not authorized', 
-         $r->filename); 
-     return HTTP_NOT_ACCEPTABLE;
-  }
-
-  $fn=~s/\/\~(\w+)//;
-
-  &Apache::loncommon::content_type($r,'text/html');
-  $r->send_http_header;
-
-  $r->print('<html><head><title>LON-CAPA Construction Space</title></head>');
-
-  $r->print(&Apache::loncommon::bodytag('Cleanup XML Document'));
-
-  if ($env{'form.phase'} eq 'two') {
-      &phasetwo($r,$fn,$uname,$udom);
-  } else {
-      &phaseone($r,$fn,$uname,$udom);
-  }
+    $fn=~s{/~($LONCAPA::username_re)}{};
 
-  $r->print('</body></html>');
-  return OK;  
+    &Apache::loncommon::content_type($r,'text/html');
+    $r->send_http_header;
+
+    $r->print(&Apache::loncommon::start_page('Cleanup XML Document'));
+    $r->print('<h2>'.$fn.'</h2>'.
+              '<form action="/adm/cleanup" method="post">'.
+              '<input type="hidden" name="filename" value="'.$env{'form.filename'}.'" />');
+    unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) {
+	$r->print(&mt('Cannot cleanup this filetype'));
+    } else {
+	if ($env{'form.phase'} eq 'three') {
+	    &phasethree($r,$fn,$uname,$udom);
+	} elsif ($env{'form.phase'} eq 'two') {
+	    &phasetwo($r,$fn,$uname,$udom);
+	} else {
+	    &phaseone($r,$fn,$uname,$udom);
+	}
+    }
+    my $dir=$fn;
+    $dir=~s/\/[^\/]+$/\//;
+    $r->print('</form>'.
+	      '<br /><a href="/priv/'.$uname.'/'.$fn.'">'.&mt('Back to Source File').'</a>'.
+              '<br /><a href="/priv/'.$uname.'/'.$dir.'">'.&mt('Back to Source Directory').'</a>'.
+	      &Apache::loncommon::end_page());
+    return OK;  
 }
 
 1;