--- loncom/publisher/loncleanup.pm	2005/05/28 18:53:50	1.3
+++ loncom/publisher/loncleanup.pm	2005/07/13 21:43:02	1.6
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # Handler to cleanup XML files
 #
-# $Id: loncleanup.pm,v 1.3 2005/05/28 18:53:50 albertel Exp $
+# $Id: loncleanup.pm,v 1.6 2005/07/13 21:43:02 www Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -218,8 +218,9 @@ sub htmlclean {
     }
 # Generate empty tags, remove wrong end tags
     unless ($blockemptytags) {
-	$raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
-	$raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
+	$raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis;
+	$raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis;
+	$raw=~s/\/ \/\>/\/\>/gs;
 	unless ($full) {
 	    $raw=~s/\<[\/]*(body|head|html)\>//gis;
 	}
@@ -244,21 +245,85 @@ sub htmlclean {
 
 sub phaseone {
     my ($r,$fn,$uname,$udom)=@_;
+    $r->print(&mt('Select actions to attempt:').
+	      '<br /><input type="checkbox" name="linefeed" checked="checked" /> '.
+	      &mt('Linefeeds, formfeeds, and carriage returns').
+	      '<br /><input type="checkbox" name="empty" checked="checked" /> '.
+	      &mt('Empty tags').
+	      '<br /><input type="checkbox" name="lower" checked="checked" /> '.
+	      &mt('Lower casing').
+	      '<br /><input type="checkbox" name="symbol"checked="checked" /> '.
+	      &mt('Symbol font').
+	      '<input type="hidden" name="phase" value="two" />'.
+	      '<p><input type="submit" value="'.&mt('Cleanup').'" /></p>');
 }
 
 sub phasetwo {
     my ($r,$fn,$uname,$udom)=@_;
+    open(IN,'/home/'.$uname.'/public_html/'.$fn);
+    my $text='';
+    while (my $line=<IN>) {
+	$text.=$line;
+    }
+    close(IN);
+    my $uri='/~'.$uname.$fn;
+    my $result=&Apache::lonnet::ssi_body($uri,
+					 ('grade_target'=>'web',
+					  'return_only_error_and_warning_counts' => 1));
+    my ($errorcount,$warningcount)=split(':',$result);
+    $r->print(&mt('Original file').': '.
+	      $errorcount.' '.&mt('error(s)').', '.
+	      $warningcount.' '.&mt('warning(s)'));
+    $text=&htmlclean($text,1,
+               ($env{'form.linefeed'} ne 'on'),
+               ($env{'form.empty'} ne 'on'),
+               ($env{'form.lower'} ne 'on'),
+               ($env{'form.symbol'} ne 'on'));
+    my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
+    my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
+    open(OUT,'>/home/'.$uname.'/public_html'.$newfn);
+    print OUT $text;
+    close(OUT);
+    my $newuri='/~'.$uname.$newfn;
+    $result=&Apache::lonnet::ssi_body($newuri,
+					 ('grade_target'=>'web',
+					  'return_only_error_and_warning_counts' => 1));
+    ($errorcount,$warningcount)=split(':',$result);
+    $r->print('<br />'.&mt('Cleaned up file').': '.
+	      $errorcount.' '.&mt('error(s)').', '.
+	      $warningcount.' '.&mt('warning(s)').
+              '<br /><a href="'.$newuri.'" target="prev">'.
+	      &mt('Open (and edit) cleaned up file in new window').'</a>'.
+              '<br /><a href="/adm/diff?filename='.&Apache::lonnet::escape($uri).
+	      '&versionone=priv&filetwo='.
+	      &Apache::lonnet::escape($newuri).'" target="prev">'.
+	      &mt('Show diffs in new window').'</a><br />'.
+	      '<input type="hidden" name="phase" value="three" />'.
+	      '<input type="submit" name="accept" value="'.&mt('Accept Result').'" />'.
+	      '<input type="submit" name="reject" value="'.&mt('Reject Result').'" />'
+	      );
 }
 
 sub phasethree {
     my ($r,$fn,$uname,$udom)=@_;
+    my $old='/home/'.$uname.'/public_html/'.$fn;
+    my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
+    my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
+    my $new='/home/'.$uname.'/public_html'.$newfn;
+    if ($env{'form.accept'}) {
+	$r->print(&mt('Accepting changes'));
+        move($new,$old);
+    } else {
+	$r->print(&mt('Rejeting changes'));
+        unlink($new);
+    }
 }
 
 # ---------------------------------------------------------------- Main Handler
 sub handler {
 
     my $r=shift;
-
+    my $fn='';
 
 # Get query string for limited number of parameters
 
@@ -299,19 +364,30 @@ sub handler {
     &Apache::loncommon::content_type($r,'text/html');
     $r->send_http_header;
 
-    $r->print('<html><head><title>LON-CAPA Construction Space</title></head>');
+    my $html=&Apache::lonxml::xmlbegin();
+    $r->print($html.'<head><title>LON-CAPA Construction Space</title></head>');
 
     $r->print(&Apache::loncommon::bodytag('Cleanup XML Document'));
-
-    if ($env{'form.phase'} eq 'three') {
-	&phasethree($r,$fn,$uname,$udom);
-    } elsif ($env{'form.phase'} eq 'two') {
-	&phasetwo($r,$fn,$uname,$udom);
+    $r->print('<h2>'.$fn.'</h2>'.
+              '<form action="/adm/cleanup" method="post">'.
+              '<input type="hidden" name="filename" value="'.$env{'form.filename'}.'" />');
+    unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) {
+	$r->print(&mt('Cannot cleanup this filetype'));
     } else {
-	&phaseone($r,$fn,$uname,$udom);
+	if ($env{'form.phase'} eq 'three') {
+	    &phasethree($r,$fn,$uname,$udom);
+	} elsif ($env{'form.phase'} eq 'two') {
+	    &phasetwo($r,$fn,$uname,$udom);
+	} else {
+	    &phaseone($r,$fn,$uname,$udom);
+	}
     }
-
-    $r->print('</body></html>');
+    my $dir=$fn;
+    $dir=~s/\/[^\/]+$/\//;
+    $r->print('</form>'.
+	      '<br /><a href="/priv/'.$uname.'/'.$fn.'">'.&mt('Back to Source File').'</a>'.
+              '<br /><a href="/priv/'.$uname.'/'.$dir.'">'.&mt('Back to Source Directory').'</a>'.
+	      '</body></html>');
     return OK;  
 }