Annotation of loncom/publisher/loncleanup.pm, revision 1.12
1.1 www 1: # The LearningOnline Network with CAPA
2: # Handler to cleanup XML files
3: #
1.12 ! www 4: # $Id: loncleanup.pm,v 1.11 2009/09/09 17:58:37 bisitz Exp $
1.1 www 5: #
6: # Copyright Michigan State University Board of Trustees
7: #
8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
9: #
10: # LON-CAPA is free software; you can redistribute it and/or modify
11: # it under the terms of the GNU General Public License as published by
12: # the Free Software Foundation; either version 2 of the License, or
13: # (at your option) any later version.
14: #
15: # LON-CAPA is distributed in the hope that it will be useful,
16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18: # GNU General Public License for more details.
19: #
20: # You should have received a copy of the GNU General Public License
21: # along with LON-CAPA; if not, write to the Free Software
22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23: #
24: # /home/httpd/html/adm/gpl.txt
25: #
26: # http://www.lon-capa.org/
27: #
28: #
29: ###
30:
31: package Apache::loncleanup;
32:
33: use strict;
34: use Apache::File;
35: use File::Copy;
36: use Apache::Constants qw(:common :http :methods);
37: use Apache::loncacc;
38: use Apache::loncommon();
1.11 bisitz 39: use Apache::lonhtmlcommon();
1.1 www 40: use Apache::lonlocal;
41: use Apache::lonnet;
1.8 www 42: use lib '/home/httpd/lib/perl/';
43: use LONCAPA;
44:
1.1 www 45:
46: sub latextrans {
47: my $symbolfont=shift;
48: my %latexsymb=(
49: '±' => '\pm',
50: '´' => '\times',
51: '¸' => '\div',
52: 'Ò' => '(R)',
53: 'Ó' => '\copy',
54: 'Ø' => '\neg',
55: 'â' => '(R)',
56: 'ã' => '\copy',
57: '¦' => 'f',
58: 'A' => '\Alpha',
59: 'B' => '\Beta',
60: 'G' => '\Gamma',
61: 'D' => '\Delta',
62: 'E' => '\Epsilon',
63: 'Z' => '\Zeta',
64: 'H' => '\Eta',
65: 'Q' => '\Theta',
66: 'I' => '\Iota',
67: 'K' => '\Kappa',
68: 'L' => '\Lambda',
69: 'M' => '\Mu',
70: 'N' => '\Nu',
71: 'X' => '\Xi',
72: 'O' => '\Omicron',
73: 'P' => '\Pi',
74: 'R' => '\Rho',
75: 'S' => '\Sigma',
76: 'T' => '\Tau',
77: 'U' => 'Y',
78: 'F' => '\Phi',
79: 'C' => '\Chi',
80: 'Y' => '\Psi',
81: 'W' => '\Omega',
82: 'a' => '\alpha',
83: 'b' => '\beta',
84: 'g' => '\gamma',
85: 'd' => '\delta',
86: 'e' => '\epsilon',
87: 'z' => '\zeta',
88: 'h' => '\eta',
89: 'q' => '\theta',
90: 'i' => '\iota',
91: 'k' => '\kappa',
92: 'l' => '\lambda',
93: 'm' => '\mu',
94: 'n' => '\nu',
95: 'x' => '\xi',
96: 'o' => '\omicron',
97: 'p' => '\pi',
98: 'r' => '\rho',
99: 'V' => '\sigmaf',
100: 's' => '\sigma',
101: 't' => '\tau',
102: 'u' => '\upsilon',
103: 'f' => '\phi',
104: 'c' => '\chi',
105: 'y' => '\psi',
106: 'w' => '\omega',
107: 'J' => '\vartheta',
108: 'j' => '\varphi',
109: 'v' => '\varpi',
110: '¡' => '\Upsilon',
111: '¢' => "'",
112: '¤' => '/',
113: '²' => '"',
114: '¼' => '\ldots',
115: 'À' => '\aleph',
116: 'Á' => '\Im',
117: 'Â' => '\Re',
118: 'Ã' => '\wp',
119: 'Ô' => '^{TM}',
120: 'ä' => '^{TM}',
121: 'ð' => 'EUR',
122: '«' => '\leftrightarrow',
123: '¬' => '\leftarrow',
124: '­' => '\uparrow',
125: '®' => '\rightarrow',
126: '¯' => '\downarraw',
127: '¿' => '\hookleftarrow',
128: 'Û' => '\Leftrightarrow',
129: 'Ü' => '\Leftarrow',
130: 'Ý' => '\Uparrow',
131: 'Þ' => '\Rightarrow',
132: 'ß' => '\Downarrow',
133: '"' => '\forall',
134: '$' => '\exists',
135: ''' => '\ni',
136: '*' => '\ast',
137: '-' => '-',
138: '@' => '\cong',
139: '\' => '\therefore',
140: '^' => '\perp',
141: '~' => '\sim',
142: '£' => '\leq',
143: '¥' => '\infty',
144: '³' => '\geq',
145: 'µ' => '\propto',
146: '¶' => '\partial',
147: '·' => '\cdot',
148: '¹' => '\not=',
149: 'º' => '\equiv',
150: '»' => '\approx',
151: 'Ä' => '\otimes',
152: 'Å' => '\oplus',
153: 'Æ' => '\emptyset',
154: 'Ç' => '\cap',
155: 'È' => '\cup',
156: 'É' => '\supset',
157: 'Ê' => '\supseteq',
158: 'Ë' => '\not\subset',
159: 'Ì' => '\subset',
160: 'Í' => '\subseteq',
161: 'Î' => '\in',
162: 'Ï' => '\not\in',
163: 'Ð' => '\angle',
164: 'Ñ' => '\nabla',
165: 'Õ' => '\prod',
166: 'Ö' => '\surd',
167: '×' => '\cdot',
168: 'Ù' => '\wedge',
169: 'Ú' => '\wee',
170: 'å' => '\sum',
171: 'ò' => '\int',
172: 'á' => '\langle',
173: 'ñ' => '\rangle',
174: 'à' => '\diamondsuit',
175: '§' => '\clubsuit',
176: '¨' => '\diamondsuit',
177: '©' => '\heartsuit',
178: 'ª' => '\spadesuit'
179: );
180: my $output='';
181: my $char='';
182: my $entitymode=0;
183: for (my $i=0; $i<length($symbolfont); $i++) {
184: my $newchar=substr($symbolfont,$i,1);
185: $char.=$newchar;
186: if ($newchar eq '&') { $entitymode=1; }
187: if (($entitymode) && ($newchar ne ';')) { next; }
188: my $latex=$latexsymb{$char};
189: if ($latex) {
190: $output.=$latex;
191: } else {
192: $output.=$char;
193: }
194: $char='';
195: $entitymode=0;
196: }
197: return $output;
198: }
199:
200: sub insidetrans {
201: my @args=@_;
202: return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
203: }
204:
205: sub symbolfontreplace {
206: my $text=shift;
207: my @fragments=split(/\<\/font\>/si,$text);
208: for (my $i=0; $i<=$#fragments;$i++) {
209: $fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
210: }
211: return join('</font>',@fragments);
212: }
213:
214: sub htmlclean {
1.2 www 215: my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
1.1 www 216: # Take care of CRLF etc
1.2 www 217: unless ($blocklinefeed) {
218: $raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
219: $raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
220: $raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
221: $raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
222: }
1.1 www 223: # Generate empty tags, remove wrong end tags
1.2 www 224: unless ($blockemptytags) {
1.6 www 225: $raw=~s/\<(br|hr|img|meta|embed|allow|basefont)([^\>]*?)\>/\<$1$2 \/\>/gis;
226: $raw=~s/\<\/(br|hr|img|meta|embed|allow|basefont)\>//gis;
227: $raw=~s/\/ \/\>/\/\>/gs;
1.2 www 228: unless ($full) {
229: $raw=~s/\<[\/]*(body|head|html)\>//gis;
230: }
1.1 www 231: }
232: # Make standard tags lowercase
1.2 www 233: unless ($blocklowercasing) {
234: foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
235: 'table','tr','td','th','p','br','hr','img','embed','font',
236: 'a','strong','center','title','basefont','li','ol','ul',
237: 'input','select','form','option','script','pre') {
238: $raw=~s/\<$_\s*\>/\<$_\>/gis;
239: $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
240: $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
241: }
242: }
243: # Replace <font face="symbol">
244: unless ($blockdesymboling) {
245: $raw=&symbolfontreplace($raw);
1.1 www 246: }
247: return $raw;
248: }
249:
250: sub phaseone {
1.2 www 251: my ($r,$fn,$uname,$udom)=@_;
1.11 bisitz 252: $r->print(
253: &Apache::lonhtmlcommon::start_pick_box()
254: .&Apache::lonhtmlcommon::row_title(&mt('Select actions to attempt'))
255: .'<input type="checkbox" name="linefeed" checked="checked" /> '
256: .&mt('Linefeeds, formfeeds, and carriage returns')
257: .'<br />'
258: .'<input type="checkbox" name="empty" checked="checked" /> '
259: .&mt('Empty tags')
260: .'<br />'
261: .'<input type="checkbox" name="lower" checked="checked" /> '
262: .&mt('Lower casing')
263: .'<br />'
264: .'<input type="checkbox" name="symbol" checked="checked" /> '
265: .&mt('Symbol font')
266: .&Apache::lonhtmlcommon::row_closure(1)
267: .&Apache::lonhtmlcommon::end_pick_box()
268: );
269:
270: $r->print(
271: '<input type="hidden" name="phase" value="two" />'
272: .'<p>'
273: .'<input type="submit" value="'.&mt('Next').'" />'
274: .'</p>'
275: );
1.1 www 276: }
277:
278: sub phasetwo {
1.11 bisitz 279: # Check original file
1.2 www 280: my ($r,$fn,$uname,$udom)=@_;
1.12 ! www 281: open(IN,'/home/httpd/html/priv/'.$udom.'/'.$uname.'/'.$fn);
1.4 www 282: my $text='';
283: while (my $line=<IN>) {
284: $text.=$line;
285: }
286: close(IN);
287: my $uri='/~'.$uname.$fn;
288: my $result=&Apache::lonnet::ssi_body($uri,
289: ('grade_target'=>'web',
290: 'return_only_error_and_warning_counts' => 1));
291: my ($errorcount,$warningcount)=split(':',$result);
1.11 bisitz 292:
293: # Display results for original file
294: $r->print(
295: &Apache::lonhtmlcommon::start_pick_box()
296: .&Apache::lonhtmlcommon::row_title(&mt('Original file'))
297: .&Apache::lonhtmlcommon::confirm_success(
298: &mt('[quant,_1,error]',$errorcount), $errorcount)
299: .'<br />'
300: .&Apache::lonhtmlcommon::confirm_success(
301: &mt('[quant,_1,warning]',$warningcount), $warningcount)
302: .&Apache::lonhtmlcommon::row_closure()
303: );
304:
305: # Clean up file
1.4 www 306: $text=&htmlclean($text,1,
307: ($env{'form.linefeed'} ne 'on'),
308: ($env{'form.empty'} ne 'on'),
309: ($env{'form.lower'} ne 'on'),
310: ($env{'form.symbol'} ne 'on'));
311: my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
312: my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
1.12 ! www 313: open(OUT,'>/home/httpd/html/priv/'.$udom.'/'.$uname.'/'.$newfn);
1.4 www 314: print OUT $text;
315: close(OUT);
316: my $newuri='/~'.$uname.$newfn;
317: $result=&Apache::lonnet::ssi_body($newuri,
1.11 bisitz 318: ('grade_target'=>'web',
319: 'return_only_error_and_warning_counts' => 1));
1.4 www 320: ($errorcount,$warningcount)=split(':',$result);
1.11 bisitz 321:
322: # Display results for cleaned up file
323: $r->print(
324: &Apache::lonhtmlcommon::row_title(&mt('Cleaned up file'))
325: .&Apache::lonhtmlcommon::confirm_success(
326: &mt('[quant,_1,error]',$errorcount), $errorcount)
327: .'<br />'
328: .&Apache::lonhtmlcommon::confirm_success(
329: &mt('[quant,_1,warning]',$warningcount), $warningcount)
330: .&Apache::lonhtmlcommon::row_closure()
331: );
332:
333: # Display actions
334: $r->print(
335: &Apache::lonhtmlcommon::row_title(&mt('Actions'))
336: .'<ul>'
337: .'<li><a href="'.$newuri.'" target="prev">'
338: .&mt('Open (and edit) cleaned up file in new window')
339: .'</a></li>'
340: .'<li><a href="/adm/diff?filename='.&escape($uri)
341: .'&versionone=priv&filetwo='.&escape($newuri).'" target="prev">'
342: .&mt('Show diffs in new window')
343: .'</a></li>'
344: .'</ul>'
345: .&Apache::lonhtmlcommon::row_closure(1)
346: .&Apache::lonhtmlcommon::end_pick_box()
347: .'<p>'
348: .'<input type="hidden" name="phase" value="three" />'
349: .'<input type="submit" name="accept" value="'
350: .&mt('Clean Up').'" />'
351: .' <input type="submit" name="reject" value="'
352: .&mt('Cancel').'" />'
353: .'</p>'
354: );
1.2 www 355: }
356:
357: sub phasethree {
358: my ($r,$fn,$uname,$udom)=@_;
1.12 ! www 359: my $old='/home/httpd/html/priv/'.$udom.'/'.$uname.'/'.$fn;
1.4 www 360: my ($main,$ext)=($fn=~/^(.*)\.(\w+)/);
361: my $newfn=$main.'_Auto_Cleaned_Up.'.$ext;
1.12 ! www 362: my $new='/home/httpd/html/priv/'.$udom.'/'.$uname.'/'.$newfn;
1.4 www 363: if ($env{'form.accept'}) {
1.11 bisitz 364: $r->print(
365: '<p class="LC_info">'
366: .&mt('Accepting changes')
367: .'</p>'
368: );
1.4 www 369: move($new,$old);
370: } else {
1.11 bisitz 371: $r->print(
372: '<p class="LC_info">'
373: .&mt('Rejecting changes')
374: .'</p>'
375: );
1.4 www 376: unlink($new);
377: }
1.1 www 378: }
379:
380: # ---------------------------------------------------------------- Main Handler
381: sub handler {
382:
1.3 albertel 383: my $r=shift;
1.4 www 384: my $fn='';
1.1 www 385:
386: # Get query string for limited number of parameters
387:
1.3 albertel 388: &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
389: ['filename']);
1.1 www 390:
1.3 albertel 391: if ($env{'form.filename'}) {
392: $fn=$env{'form.filename'};
1.10 raeburn 393: $fn=~s/^https?\:\/\/[^\/]+//;
1.3 albertel 394: } else {
395: $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
396: ' unspecified filename for cleanup', $r->filename);
397: return HTTP_NOT_FOUND;
398: }
399:
400: unless ($fn) {
401: $r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
402: ' trying to cleanup non-existing file', $r->filename);
403: return HTTP_NOT_FOUND;
404: }
1.1 www 405:
406: # ----------------------------------------------------------- Start page output
1.3 albertel 407: my $uname;
408: my $udom;
409:
410: ($uname,$udom)=
411: &Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
412: unless (($uname) && ($udom)) {
413: $r->log_reason($uname.' at '.$udom.
414: ' trying to cleanup file '.$env{'form.filename'}.
415: ' ('.$fn.') - not authorized',
416: $r->filename);
417: return HTTP_NOT_ACCEPTABLE;
418: }
419:
1.9 albertel 420: $fn=~s{/~($LONCAPA::username_re)}{};
1.1 www 421:
1.3 albertel 422: &Apache::loncommon::content_type($r,'text/html');
423: $r->send_http_header;
424:
1.11 bisitz 425: # Breadcrumbs
426: my $brcrum = [{'href' => &Apache::loncommon::authorspace(),
427: 'text' => 'Construction Space'},
428: {'href' => '',
429: 'text' => 'Cleanup XML Document'}];
430:
431: $r->print(&Apache::loncommon::start_page('Cleanup XML Document',
432: undef,
433: {'bread_crumbs' => $brcrum,}));
1.4 www 434: $r->print('<h2>'.$fn.'</h2>'.
435: '<form action="/adm/cleanup" method="post">'.
436: '<input type="hidden" name="filename" value="'.$env{'form.filename'}.'" />');
437: unless ($fn=~/\.(problem|exam|quiz|assess|survey|form|library|xml|html|htm|xhtml|xhtm|sty)$/) {
438: $r->print(&mt('Cannot cleanup this filetype'));
1.3 albertel 439: } else {
1.4 www 440: if ($env{'form.phase'} eq 'three') {
441: &phasethree($r,$fn,$uname,$udom);
442: } elsif ($env{'form.phase'} eq 'two') {
443: &phasetwo($r,$fn,$uname,$udom);
444: } else {
445: &phaseone($r,$fn,$uname,$udom);
446: }
1.3 albertel 447: }
1.4 www 448: my $dir=$fn;
449: $dir=~s/\/[^\/]+$/\//;
1.11 bisitz 450: $r->print(
451: '</form>'
452: .&Apache::lonhtmlcommon::start_funclist()
453: .&Apache::lonhtmlcommon::add_item_funclist(
454: '<a href="/priv/'.$uname.'/'.$fn.'">'.&mt('Back to Source File').'</a>')
455: .&Apache::lonhtmlcommon::add_item_funclist(
456: '<a href="/priv/'.$uname.'/'.$dir.'">'.&mt('Back to Source Directory').'</a>')
457: .&Apache::lonhtmlcommon::end_funclist()
458: .&Apache::loncommon::end_page()
459: );
460:
1.3 albertel 461: return OK;
1.1 www 462: }
463:
464: 1;
465: __END__
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>