Annotation of loncom/publisher/loncleanup.pm, revision 1.3
1.1 www 1: # The LearningOnline Network with CAPA
2: # Handler to cleanup XML files
3: #
1.3 ! albertel 4: # $Id: loncleanup.pm,v 1.2 2005/05/28 02:18:03 www Exp $
1.1 www 5: #
6: # Copyright Michigan State University Board of Trustees
7: #
8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
9: #
10: # LON-CAPA is free software; you can redistribute it and/or modify
11: # it under the terms of the GNU General Public License as published by
12: # the Free Software Foundation; either version 2 of the License, or
13: # (at your option) any later version.
14: #
15: # LON-CAPA is distributed in the hope that it will be useful,
16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18: # GNU General Public License for more details.
19: #
20: # You should have received a copy of the GNU General Public License
21: # along with LON-CAPA; if not, write to the Free Software
22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23: #
24: # /home/httpd/html/adm/gpl.txt
25: #
26: # http://www.lon-capa.org/
27: #
28: #
29: ###
30:
31: package Apache::loncleanup;
32:
33: use strict;
34: use Apache::File;
35: use File::Copy;
36: use Apache::Constants qw(:common :http :methods);
37: use Apache::loncacc;
38: use Apache::loncommon();
39: use Apache::lonlocal;
40: use Apache::lonnet;
41:
42: sub latextrans {
43: my $symbolfont=shift;
44: my %latexsymb=(
45: '±' => '\pm',
46: '´' => '\times',
47: '¸' => '\div',
48: 'Ò' => '(R)',
49: 'Ó' => '\copy',
50: 'Ø' => '\neg',
51: 'â' => '(R)',
52: 'ã' => '\copy',
53: '¦' => 'f',
54: 'A' => '\Alpha',
55: 'B' => '\Beta',
56: 'G' => '\Gamma',
57: 'D' => '\Delta',
58: 'E' => '\Epsilon',
59: 'Z' => '\Zeta',
60: 'H' => '\Eta',
61: 'Q' => '\Theta',
62: 'I' => '\Iota',
63: 'K' => '\Kappa',
64: 'L' => '\Lambda',
65: 'M' => '\Mu',
66: 'N' => '\Nu',
67: 'X' => '\Xi',
68: 'O' => '\Omicron',
69: 'P' => '\Pi',
70: 'R' => '\Rho',
71: 'S' => '\Sigma',
72: 'T' => '\Tau',
73: 'U' => 'Y',
74: 'F' => '\Phi',
75: 'C' => '\Chi',
76: 'Y' => '\Psi',
77: 'W' => '\Omega',
78: 'a' => '\alpha',
79: 'b' => '\beta',
80: 'g' => '\gamma',
81: 'd' => '\delta',
82: 'e' => '\epsilon',
83: 'z' => '\zeta',
84: 'h' => '\eta',
85: 'q' => '\theta',
86: 'i' => '\iota',
87: 'k' => '\kappa',
88: 'l' => '\lambda',
89: 'm' => '\mu',
90: 'n' => '\nu',
91: 'x' => '\xi',
92: 'o' => '\omicron',
93: 'p' => '\pi',
94: 'r' => '\rho',
95: 'V' => '\sigmaf',
96: 's' => '\sigma',
97: 't' => '\tau',
98: 'u' => '\upsilon',
99: 'f' => '\phi',
100: 'c' => '\chi',
101: 'y' => '\psi',
102: 'w' => '\omega',
103: 'J' => '\vartheta',
104: 'j' => '\varphi',
105: 'v' => '\varpi',
106: '¡' => '\Upsilon',
107: '¢' => "'",
108: '¤' => '/',
109: '²' => '"',
110: '¼' => '\ldots',
111: 'À' => '\aleph',
112: 'Á' => '\Im',
113: 'Â' => '\Re',
114: 'Ã' => '\wp',
115: 'Ô' => '^{TM}',
116: 'ä' => '^{TM}',
117: 'ð' => 'EUR',
118: '«' => '\leftrightarrow',
119: '¬' => '\leftarrow',
120: '­' => '\uparrow',
121: '®' => '\rightarrow',
122: '¯' => '\downarraw',
123: '¿' => '\hookleftarrow',
124: 'Û' => '\Leftrightarrow',
125: 'Ü' => '\Leftarrow',
126: 'Ý' => '\Uparrow',
127: 'Þ' => '\Rightarrow',
128: 'ß' => '\Downarrow',
129: '"' => '\forall',
130: '$' => '\exists',
131: ''' => '\ni',
132: '*' => '\ast',
133: '-' => '-',
134: '@' => '\cong',
135: '\' => '\therefore',
136: '^' => '\perp',
137: '~' => '\sim',
138: '£' => '\leq',
139: '¥' => '\infty',
140: '³' => '\geq',
141: 'µ' => '\propto',
142: '¶' => '\partial',
143: '·' => '\cdot',
144: '¹' => '\not=',
145: 'º' => '\equiv',
146: '»' => '\approx',
147: 'Ä' => '\otimes',
148: 'Å' => '\oplus',
149: 'Æ' => '\emptyset',
150: 'Ç' => '\cap',
151: 'È' => '\cup',
152: 'É' => '\supset',
153: 'Ê' => '\supseteq',
154: 'Ë' => '\not\subset',
155: 'Ì' => '\subset',
156: 'Í' => '\subseteq',
157: 'Î' => '\in',
158: 'Ï' => '\not\in',
159: 'Ð' => '\angle',
160: 'Ñ' => '\nabla',
161: 'Õ' => '\prod',
162: 'Ö' => '\surd',
163: '×' => '\cdot',
164: 'Ù' => '\wedge',
165: 'Ú' => '\wee',
166: 'å' => '\sum',
167: 'ò' => '\int',
168: 'á' => '\langle',
169: 'ñ' => '\rangle',
170: 'à' => '\diamondsuit',
171: '§' => '\clubsuit',
172: '¨' => '\diamondsuit',
173: '©' => '\heartsuit',
174: 'ª' => '\spadesuit'
175: );
176: my $output='';
177: my $char='';
178: my $entitymode=0;
179: for (my $i=0; $i<length($symbolfont); $i++) {
180: my $newchar=substr($symbolfont,$i,1);
181: $char.=$newchar;
182: if ($newchar eq '&') { $entitymode=1; }
183: if (($entitymode) && ($newchar ne ';')) { next; }
184: my $latex=$latexsymb{$char};
185: if ($latex) {
186: $output.=$latex;
187: } else {
188: $output.=$char;
189: }
190: $char='';
191: $entitymode=0;
192: }
193: return $output;
194: }
195:
196: sub insidetrans {
197: my @args=@_;
198: return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
199: }
200:
201: sub symbolfontreplace {
202: my $text=shift;
203: my @fragments=split(/\<\/font\>/si,$text);
204: for (my $i=0; $i<=$#fragments;$i++) {
205: $fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
206: }
207: return join('</font>',@fragments);
208: }
209:
210: sub htmlclean {
1.2 www 211: my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
1.1 www 212: # Take care of CRLF etc
1.2 www 213: unless ($blocklinefeed) {
214: $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
215: $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
216: $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
217: $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
218: }
1.1 www 219: # Generate empty tags, remove wrong end tags
1.2 www 220: unless ($blockemptytags) {
221: $raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
222: $raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
223: unless ($full) {
224: $raw=~s/\<[\/]*(body|head|html)\>//gis;
225: }
1.1 www 226: }
227: # Make standard tags lowercase
1.2 www 228: unless ($blocklowercasing) {
229: foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
230: 'table','tr','td','th','p','br','hr','img','embed','font',
231: 'a','strong','center','title','basefont','li','ol','ul',
232: 'input','select','form','option','script','pre') {
233: $raw=~s/\<$_\s*\>/\<$_\>/gis;
234: $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
235: $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
236: }
237: }
238: # Replace <font face="symbol">
239: unless ($blockdesymboling) {
240: $raw=&symbolfontreplace($raw);
1.1 www 241: }
242: return $raw;
243: }
244:
245: sub phaseone {
1.2 www 246: my ($r,$fn,$uname,$udom)=@_;
1.1 www 247: }
248:
249: sub phasetwo {
1.2 www 250: my ($r,$fn,$uname,$udom)=@_;
251: }
252:
253: sub phasethree {
254: my ($r,$fn,$uname,$udom)=@_;
1.1 www 255: }
256:
257: # ---------------------------------------------------------------- Main Handler
258: sub handler {
259:
1.3 ! albertel 260: my $r=shift;
1.1 www 261:
262:
263: # Get query string for limited number of parameters
264:
1.3 ! albertel 265: &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
! 266: ['filename']);
1.1 www 267:
1.3 ! albertel 268: if ($env{'form.filename'}) {
! 269: $fn=$env{'form.filename'};
! 270: $fn=~s/^http\:\/\/[^\/]+//;
! 271: } else {
! 272: $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
! 273: ' unspecified filename for cleanup', $r->filename);
! 274: return HTTP_NOT_FOUND;
! 275: }
! 276:
! 277: unless ($fn) {
! 278: $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
! 279: ' trying to cleanup non-existing file', $r->filename);
! 280: return HTTP_NOT_FOUND;
! 281: }
1.1 www 282:
283: # ----------------------------------------------------------- Start page output
1.3 ! albertel 284: my $uname;
! 285: my $udom;
! 286:
! 287: ($uname,$udom)=
! 288: &Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
! 289: unless (($uname) && ($udom)) {
! 290: $r->log_reason($uname.' at '.$udom.
! 291: ' trying to cleanup file '.$env{'form.filename'}.
! 292: ' ('.$fn.') - not authorized',
! 293: $r->filename);
! 294: return HTTP_NOT_ACCEPTABLE;
! 295: }
! 296:
! 297: $fn=~s/\/\~(\w+)//;
1.1 www 298:
1.3 ! albertel 299: &Apache::loncommon::content_type($r,'text/html');
! 300: $r->send_http_header;
! 301:
! 302: $r->print('<html><head><title>LON-CAPA Construction Space</title></head>');
! 303:
! 304: $r->print(&Apache::loncommon::bodytag('Cleanup XML Document'));
! 305:
! 306: if ($env{'form.phase'} eq 'three') {
! 307: &phasethree($r,$fn,$uname,$udom);
! 308: } elsif ($env{'form.phase'} eq 'two') {
! 309: &phasetwo($r,$fn,$uname,$udom);
! 310: } else {
! 311: &phaseone($r,$fn,$uname,$udom);
! 312: }
1.1 www 313:
1.3 ! albertel 314: $r->print('</body></html>');
! 315: return OK;
1.1 www 316: }
317:
318: 1;
319: __END__
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>