--- loncom/xml/lonxml.pm	2005/01/24 22:36:03	1.351
+++ loncom/xml/lonxml.pm	2005/02/17 08:34:56	1.356
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # XML Parser Module 
 #
-# $Id: lonxml.pm,v 1.351 2005/01/24 22:36:03 albertel Exp $
+# $Id: lonxml.pm,v 1.356 2005/02/17 08:34:56 albertel Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -149,20 +149,23 @@ $Apache::lonxml::post_evaluate=1;
 $Apache::lonxml::warnings_error_header='';
 
 sub xmlbegin {
-  my $output='';
-  @htmlareafields=();
-  if ($ENV{'browser.mathml'}) {
-      $output='<?xml version="1.0"?>'
+    my ($style)=@_;
+    my $output='';
+    @htmlareafields=();
+    if ($ENV{'browser.mathml'}) {
+	$output='<?xml version="1.0"?>'
             .'<?xml-stylesheet type="text/css" href="/adm/MathML/mathml.css"?>'
             .'<!DOCTYPE html SYSTEM "/adm/MathML/mathml.dtd" '
             .'[<!ENTITY mathns "http://www.w3.org/1998/Math/MathML">]>'
             .'<html xmlns:math="http://www.w3.org/1998/Math/MathML" ' 
-		.'xmlns="http://www.w3.org/TR/REC-html40">';
-  } else {
-      $output='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html>';
-  }
-  return $output;
+	    .'xmlns="http://www.w3.org/TR/REC-html40">';
+    } else {
+	$output='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>';
+    }
+    if ($style eq 'encode') {
+	$output=&HTML::Entities::encode($output,'<>&"');
+    }
+    return $output;
 }
 
 sub xmlend {
@@ -284,7 +287,7 @@ sub printtokenheader {
     }
 }
 
-sub fontsettings() {
+sub fontsettings {
     my $headerstring='';
     if (($ENV{'browser.os'} eq 'mac') && (!$ENV{'browser.mathml'})) { 
 	$headerstring.=
@@ -368,23 +371,29 @@ sub xmlparse {
 
 sub htmlclean {
     my ($raw,$full)=@_;
+# Take care of CRLF etc
 
-    my $tree = HTML::TreeBuilder->new;
-    $tree->ignore_unknown(0);
-
-    $tree->parse($raw);
-
-    my $output= $tree->as_HTML(undef,' ');
-
-    $output=~s/\<(br|hr|img|meta|allow)(.*?)\>/\<$1$2 \/\>/gis;
-    $output=~s/\<\/(br|hr|img|meta|allow)\>//gis;
+    $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
+    $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
+    $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
+    $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
+
+# Generate empty tags, remove wrong end tags
+    $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
+    $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
     unless ($full) {
-       $output=~s/\<[\/]*(body|head|html)\>//gis;
+       $raw=~s/\<[\/]*(body|head|html)\>//gis;
     }
-
-    $tree = $tree->delete;
-
-    return $output;
+# Make standard tags lowercase
+    foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
+             'table','tr','td','th','p','br','hr','img','embed','font',
+             'a','strong','center','title','basefont','li','ol','ul',
+             'input','select','form','option','script','pre') {
+	$raw=~s/\<$_\s*\>/\<$_\>/gis;
+        $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
+        $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
+    }
+    return $raw;
 }
 
 sub latex_special_symbols {
@@ -1261,6 +1270,8 @@ sub handler {
 	&Apache::loncommon::content_type($request,'text/html');
     }
     &Apache::loncommon::no_cache($request);
+    $request->set_last_modified(&Apache::lonnet::metadata($request->uri,
+							  'lastrevisiondate'));
     $request->send_http_header;
     
     return OK if $request->header_only;