File:
[LON-CAPA] /
loncom /
interface /
entities.pm
Revision
1.1:
download - view:
text,
annotated -
select for diffs
Mon Feb 11 11:35:46 2008 UTC (16 years, 5 months ago) by
foxr
Branches:
MAIN
CVS tags:
HEAD
Building up hash of entity -> latex translations to sanitize
lonprintout.pm's character_table sub and to
1. Make it easy to add new entities.
2. Add some entities that are not in the table.
3. I think this version will also run faster.
1: # The LearningOnline Network
2: # entity -> tex.
3: #
4: # $Id:
5: #
6: # Copyright Michigan State University Board of Trustees
7: #
8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
9: #
10: # LON-CAPA is free software; you can redistribute it and/or modify
11: # it under the terms of the GNU General Public License as published by
12: # the Free Software Foundation; either version 2 of the License, or
13: # (at your option) any later version.
14: #
15: # LON-CAPA is distributed in the hope that it will be useful,
16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18: # GNU General Public License for more details.
19: #
20: # You should have received a copy of the GNU General Public License
21: # along with LON-CAPA; if not, write to the Free Software
22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23: #
24: # /home/httpd/html/adm/gpl.txt
25: # http://www.lon-capa.org/
26: #
27: #
28: package Apache::entities;
29: use strict;
30: #
31: # This file contains a table driven entity-->latex converter.
32: #
33: # Assumptions:
34: # The number of entities in a resource is small compared with the
35: # number of possible entities that might be translated.
36: # Therefore the strategy is to match a general entity pattern
37: # &.+; over and over, pull out the match look it up in an entity -> tex hash
38: # and do the replacement.
39: #
40: # In order to simplify the hash, the following reductions are done:
41: # &#d+; have the &# and ; stripped and is converted to an int.
42: # &#.+; have the &#x and ; stripped and is converted to an int as a hex
43: # value.
44: # All others have the & and ; stripped.
45:
46:
47: # The hash: Add new conversions here; leave off the leading & and the trailing ;
48: # all numeric entities need only appear as their decimal versions
49: # (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
50: #
51: # This entity table is mercilessly cribbed from the HTML pocket reference
52: # table starting at pg 82. In most cases the LaTeX equivalent codes come from
53: # the original massive regular expression replacements originally by
54: # A. Sakharuk in lonprintout.pm
55: #
56: # Note numerical entities are essentially unicode character codes.
57: #
58: my %entities = {
59:
60: # ---- ASCII code page: ----------------
61:
62: # Translation to empty strings:
63:
64: 7 => "",
65: 9 => "",
66: 10 => "",
67: 13 => "",
68:
69: # Translations to simple characters:
70:
71: 32 => " ",
72: 33 => "!",
73: 34 => '"',
74: 'quot' => '"',
75: 35 => '\\\#',
76: 36 => '\\\$',
77: 37 => '\\%',
78: 38 => '\\&',
79: 'amp' => '\\&',
80: 39 => '\'', # Apostrophe
81: 40 => '(',
82: 41 => ')',
83: 42 => '\*',
84: 43 => '\+',
85: 44 => ',', # comma
86: 45 => '-',
87: 46 => '\.',
88: 47 => '\/',
89: 48 => '0',
90: 49 => '1',
91: 50 => '2',
92: 51 => '3',
93: 52 => '4',
94: 53 => '5',
95: 54 => '6',
96: 55 => '7',
97: 56 => '8',
98: 57 => '9',
99: 58 => ':',
100: 59 => ';',
101: 60 => '\\ensuremath\{<\}',
102: 'lt' => '\\ensuremath\{<\}',
103: 61 => '\\ensuremath\{=\}',
104: 62 => '\\ensuremath\{>\}',
105: 'gt' => '\\ensuremath\{>\}',
106: 63 => '\?',
107: 64 => '@',
108: 65 => 'A',
109: 66 => 'B',
110: 67 => 'C',
111: 68 => 'D',
112: 69 => 'E',
113: 70 => 'F',
114: 71 => 'G',
115: 72 => 'H',
116: 73 => 'I',
117: 74 => 'J',
118: 75 => 'K',
119: 76 => 'L',
120: 77 => 'M',
121: 78 => 'N',
122: 79 => 'O',
123: 80 => 'P',
124: 81 => 'Q',
125: 82 => 'R',
126: 83 => 'S',
127: 84 => 'T',
128: 85 => 'U',
129: 86 => 'V',
130: 87 => 'W',
131: 88 => 'X',
132: 89 => 'Y',
133: 90 => 'Z',
134: 91 => '[',
135: 92 => '\\ensuremath\{\\setminus\}', # \setminus is \ with special spacing.
136: 93 => ']',
137: 94 => '\\ensuremath\{\\wedge\}',
138: 95 => '\\underline\{\\makebox[2mm]\\{\\strut\}\}', # Underline 2mm of space for _
139: 96 => '`',
140: 97 => 'a',
141: 98 => 'b',
142: 99 => 'c',
143: 100 => 'd',
144: 101 => 'e',
145: 102 => 'f',
146: 103 => 'g',
147: 104 => 'h',
148: 105 => 'i',
149: 106 => 'j',
150: 107 => 'k',
151: 108 => 'l',
152: 109 => 'm',
153: 110 => 'n',
154: 111 => 'o',
155: 112 => 'p',
156: 113 => 'q',
157: 114 => 'r',
158: 115 => 's',
159: 116 => 't',
160: 117 => 'u',
161: 118 => 'v',
162: 119 => 'w',
163: 120 => 'x',
164: 121 => 'y',
165: 122 => 'z',
166: 123 => '\\{',
167: 124 => '\|',
168: 125 => '\\}',
169: 126 => '\~',
170:
171: # Controls and Latin-1 supplement. Note that some entities that have
172: # visible effect are not printing unicode characters. Specifically
173: # ‚- 
174:
175: 130 => ',',
176: 131 => '\\textflorin ',
177: 132 => ',,', # Low double left quotes.
178: 133 => '\\ensuremat\{\\ldots\}',
179: 134 => '\\ensuremath\{\\dagger\}',
180: 135 => '\\ensuremath\{\\ddagger\}',
181: 136 => '\\ensuremath\{\\wedge\}',
182: 137 => '\\textperthousand ',
183: 138 => '\\v\{S\}',
184: 139 => '\\ensuremath\{<\}',
185: 140 => '\{\\OE\}',
186:
187: # There's a gap here in my entity table
188:
189: 145 => '\`',
190: 146 => '\'',
191: 147 => '\`\`',
192: 148 => '\'\'',
193: 149 => '\\ensuremath\{\\bullet\}',
194: 150 => '--',
195: 151 => '---',
196: 152 => '\\ensuremath\{\\sim\}',
197: 153 => '\\texttrademark',
198: 154 => '\\v\{s\}',
199: 155 => '\\ensuremath\{>\}',
200: 156 => '\\oe ',
201:
202: # Another short gap:
203:
204: 159 => '\\"Y',
205: 160 => '~',
206: 'nbsp' => '~',
207: 161 => '\\textexclamdown ',
208: 'iexcl' => '\\textexclamdown ',
209: 162 => '\\textcent ',
210: 'cent' => '\\textcent ',
211: 163 => '\\pounds ',
212: 'pound' => '\\pounds ',
213: 164 => '\\textcurrency ',
214: 'curren' => '\\textcurrency ',
215: 165 => '\\textyen ',
216: 'yen' => '\\textyen ',
217: 166 => '\\textbrokenbar ',
218: 'brvbar' => '\\textbrokenbar ',
219: 167 => '\\textsection ',
220: 'sect' => '\\textsection ',
221: 168 => '\\texthighdieresis ',
222: 'uml' => '\\texthighdieresis ',
223: 169 => '\\copyright ',
224: 'copy' => '\\copyright ',
225: 170 => '\\textordfeminine ',
226: 'ordf' => '\\textordfeminine ',
227: 171 => '\\ensuremath\{\ll\}', # approximation of left angle quote.
228: 'laquo' => '\\ensuremath\{\ll\}', # ""
229: 172 => '\\ensuremath\{\\neg\}',
230: 'not' => '\\ensuremath\{\\neg\}',
231: 173 => ' - ',
232: 'shy' => ' - ',
233: 174 => '\\textregistered ',
234: 'reg' => '\\textregistered ',
235: 175 => '\\ensuremath\{^\{-\}\}',
236: 'macr' => '\\ensuremath\{^\{-\}\}',
237: 176 => '\\ensuremath\{^\{\\circ\}\}',
238: 'deg' => '\\ensuremath\{^\{\\circ\}\}',
239: 177 => '\\ensuremath\{\\pm\}',
240: 'plusmn' => '\\ensuremath\{\\pm\}',
241: 178 => '\\ensuremath\{^2\}',
242: 'sup2' => '\\ensuremath\{^2\}',
243: 179 => '\\ensuremath\{^3\}',
244: 'sup3' => '\\ensuremath\{^3\}',
245: 180 => '\\textacute ',
246: 'acute' => '\\textacute ',
247: 181 => '\\ensuremath\{\\mu\}',
248: 'micro' => '\\ensuremath\{\\mu\}',
249: 182 => '\\P ',
250: para => '\\P ',
251: 183 => '\\ensuremath\{\\cdot\}',
252: 'middot' => '\\ensuremath\{\\cdot\}',
253: 184 => '\\c\{\\strut\}',
254: 'cedil' => '\\c\{\\strut\}',
255: 185 => '\\ensuremath\{^1\}',
256: sup1 => '\\ensuremath\{^1\}',
257: 186 => '\\textordmasculine ',
258: 'ordm' => '\\textordmasculine ',
259: 187 => '\\ensuremath\{\\gg\}',
260: 'raquo' => '\\ensuremath\{\\gg\}',
261: 188 => '\\textonequarter ',
262: 'frac14' => '\\textonequarter ',
263: 189 => '\\textonehalf' ,
264: 'frac12' => '\\textonehalf' ,
265: 190 => '\\textthreequarters ',
266: 'frac34' => '\\textthreequarters ',
267: 191 => '\\textquestiondown ',
268: 'iquest' => '\\textquestiondown ',
269: 192 => '\\\`\{A\}',
270: 'Agrave' => '\\\`\{A\}',
271: 193 => '\\\'\{A\}',
272: 'Aacute' => '\\\'\{A\}',
273: 194 => '\\^\{A\}',
274: 'Acirc' => '\\^\{A\}',
275: 195 => '\\~{A}',
276: 'Atilde'=> '\\~{A}',
277: 196 => '\\\"{A}',
278: 'Auml' => '\\\"{A}',
279: 197 => '{\\AA}',
280: 'Aring' => '{\\AA}',
281: 198 => '{\\AE}',
282: 'AElig' => '{\\AE}',
283: 199 => '\\c{c}',
284: 'Ccedil'=> '\\c{c}',
285: '200' => '\\\`{E}',
286: 'Egrave'=> '\\\`{E}',
287: 201 => '\\\'{E}',
288: 'Eacute'=> '\\\'{E}',
289: 202 => '\\\^{E}',
290: 'Ecirc' => '\\\^{E}',
291: 203 => '\\\"{E}',
292: 'Euml' => '\\\"{E}',
293: 204 => '\\\`{I}',
294: 'Igrave'=> '\\\`{I}',
295: 205 => '\\\'{I}',
296: 'Iacute'=> '\\\'{I}',
297: 206 => '\\\^{I}',
298: 'Icirc' => '\\\^{I}',
299: 207 => '\\\"{I}',
300: 'Iuml' => '\\\"{I}',
301: 208 => '\\OE',
302: 'ETH' => '\\OE',
303: 209 => '\\~{N}',
304: 'Ntilde'=> '\\~{N}',
305: 210 => '\\\`{O}',
306: 'Ograve'=> '\\\`{O}',
307: 211 => '\\\'{O}',
308: 'Oacute'=> '\\\'{O}',
309: 212 => '\\\^{O}',
310: 'Ocirc' => '\\\^{O}',
311: 213 => '\\~{O}',
312: 'Otilde'=> '\\~{O}',
313: 214 => '\\\"{O}',
314: 'Ouml' => '\\\"{O}',
315: 215 => '\\ensuremath\{\\times\}',
316: 'times' => '\\ensuremath\{\\times\}',
317: 216 => '\\O',
318: 'Oslash'=> '\\O',
319: 217 => '\\\`{U}',
320: 'Ugrave'=> '\\\`{U}',
321: 218 => '\\\'{U}',
322: 'Uacute'=> '\\\'{U}',
323: 219 => '\\\^{U}',
324: 'Ucirc' => '\\\^{U}',
325: 220 => '\\\"{U}',
326: 'Uuml' => '\\\"{U}',
327: 221 => '\\\'{Y}',
328: 'Yacute'=> '\\\'{Y}',
329: 222 => '\\TH',
330: 'THORN' => '\\TH',
331: 223 => '{\\sz}',
332: 'szlig' => '{\\sz}',
333: 224 => '\\\`{a}',
334: 'agrave'=> '\\\`{a}',
335: 225 => '\\\'{a}',
336: 'aacute'=> '\\\'{a}',
337: 226 => '\\\^{a}',
338: 'acirc' => '\\\^{a}',
339: 227 => '\\\~{a}',
340: 'atilde'=> '\\\~{a}',
341: 228 => '\\\"{a}',
342: 'auml' => '\\\"{a}',
343: 229 => '\\aa',
344: 'aring' => '\\aa',
345: 230 => '\\ae',
346: 'aelig' => '\\ae',
347: 231 => '\\c{c}',
348: 'ccedil'=> '\\c{c}',
349: 232 => '\\\`{e}',
350: 'egrave'=> '\\\`{e}',
351: 233 => '\\\'{e}',
352: 'eacute'=> '\\\'{e}',
353: 234 => '\\\^{e}',
354: 'ecirc' => '\\\^{e}',
355: 235 => '\\\"{e}',
356: 'euml' => '\\\"{e}',
357: 236 => '\\\`{i}',
358: 'igrave'=> '\\\`{i}',
359: 237 => '\\\'{i}',
360: 'iacute'=> '\\\'{i}',
361: 238 => '\\\^{i}',
362: 'icirc' => '\\\^{i}',
363: 239 => '\\\"{i}',
364: 'iuml' => '\\\"{i}',
365: 240 => '\\dh',
366: 'eth' => '\\dh',
367: 241 => '\\\~{n}',
368: 'ntilde'=> '\\\~{n}',
369: 242 => '\\\`{o}',
370: 'ograve'=> '\\\`{o}',
371: 243 => '\\\'{o}',
372: 'oacute'=> '\\\'{o}',
373: 244 => '\\\^{o}',
374: 'ocirc' => '\\\^{o}',
375: 245 => '\\\~{o}',
376: 'otilde'=> '\\\~{o}',
377: 246 => '\\\"{o}',
378: 'ouml' => '\\\"{o}',
379: 247 => '\\ensuremath\{\\div\}',
380: 'divide'=> '\\ensuremath\{\\div\}',
381: 248 => '{\\o}',
382: 'oslash'=> '{\\o}',
383: 249 => '\\\`{u}',
384: 'ugrave'=> '\\\`{u}',
385: 250 => '\\\'{u}',
386: 'uacute'=> '\\\'{u}',
387: 251 => '\\\^{u}',
388: 'ucirc' => '\\\^{u}',
389: 252 => '\\\"{u}',
390: 'uuml' => '\\\"{u}',
391: 253 => '\\\'{y}',
392: 'yacute'=> '\\\'{y}',
393: 254 => '\\th',
394: 'thorn' => '\\th',
395: 255 => '\\\"{y}',
396: 'yuml' => '\\\"{y}',
397:
398: # hbar entity number comes from the unicode charater:
399: # see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
400: # ISO also documents a 'planck' entity.
401:
402: 295 => '\\ensuremath\{\hbar\}',
403: 'plank' => '\\ensuremath\{\hbar\}',
404:
405: # Latin extended-A HTML 4.01 entities:
406:
407: 338 => '\\OE',
408: 'OElig' => '\\OE',
409: 339 => '\\oe',
410: 'oelig' => '\\oe',
411: 352 => '\\v{S}',
412: 'Scaron' => '\\v{S}',
413: 353 => '\\v{s}',
414: 'scaron' => '\\v{s}',
415: 376 => '\\\"{Y}',
416: 'Yuml' => '\\\"{Y}',
417:
418:
419: # Latin extended B HTML 4.01 entities
420:
421: 402 => '\\ensuremath{f}',
422: 'fnof' => '\\ensuremath{f}',
423:
424: # Spacing modifier letters:
425:
426: 710 => '\^{}',
427: 'circ' => '\^{}',
428: 732 => '\~{}',
429: 'tilde' => '\~{}',
430:
431: # Greek uppercase:
432:
433: 913 => '\\ensuremath\{\\mathrm\{A\}\}',
434: 'Alpha' => '\\ensuremath\{\\mathrm\{A\}\}',
435: 914 => '\\ensuremath\{\\mathrm\{B\}\}',
436: 'Beta' => '\\ensuremath\{\\mathrm\{B\}\}',
437: 915 => '\\ensuremath\{\\Gamma\}',
438: 'Gamma' => '\\ensuremath\{\\Gamma\}',
439: 916 => '\\ensuremath\{\\Delta\}',
440: 'Delta' => '\\ensuremath\{\\Delta\}',
441: 917 => '\\ensuremath\{\\mathrm\{E\}\}',
442: 'Epsilon'=> '\\ensuremath\{\\mathrm\{E\}\}',
443: 918 => '\\ensuremath\{\\mathrm\{Z\}\}',
444: 'Zeta' => '\\ensuremath\{\\mathrm\{Z\}\}',
445: 919 => '\\ensuremath\{\\mathrm\{H\}\}',
446: 'Eta' => '\\ensuremath\{\\mathrm\{H\}\}',
447: 920 => '\\ensuremath\{\\Theta\}',
448: 'Theta' => '\\ensuremath\{\\Theta\}',
449: 921 => '\\ensuremath\{\\mathrm\{I\}\}',
450: 'Iota' => '\\ensuremath\{\\mathrm\{I\}\}',
451: 922 => '\\ensuremath\{\\mathrm\{K\}\}',
452: 'Kappa' => '\\ensuremath\{\\mathrm\{K\}\}',
453: 923 => '\\ensuremath\{\\Lambda\}',
454: 'Lambda' => '\\ensuremath\{\\Lambda\}',
455: 924 => '\\ensuremath\{\\mathrm\{M\}\}',
456: 'Mu' => '\\ensuremath\{\\mathrm\{M\}\}',
457: 925 => '\\ensuremath\{\\mathrm\{N\}\}',
458: 'Nu' => '\\ensuremath\{\\mathrm\{N\}\}',
459: 926 => '\\ensuremath\{\\mathrm\{\\Xi\}',
460: 'Xi' => '\\ensuremath\{\\mathrm\{\\Xi\}',
461: 927 => '\\ensuremath\{\\mathrm\{O\}\}',
462: 'Omicron'=> '\\ensuremath\{\\mathrm\{O\}\}',
463: 928 => '\\ensuremath\{\\Pi\}',
464: 'Pi' => '\\ensuremath\{\\Pi\}',
465: 929 => '\\ensuremath\{\\mathrm\{P\}\}',
466: 'Rho' => '\\ensuremath\{\\mathrm\{P\}\}',
467:
468: # Skips 930
469:
470: 931 => '\\ensuremath\{\Sigma\}',
471: 'Sigma' => '\\ensuremath\{\Sigma\}',
472: 932 => '\\ensuremath\{\\mathrm\{T\}\}',
473: 'Tau' => '\\ensuremath\{\\mathrm\{T\}\}',
474: 933 => '\\ensuremath\{\\Upsilon\}',
475: 'Upsilon'=> '\\ensuremath\{\\Upsilon\}',
476: 934 => '\\ensuremath\{\\Phi\}',
477: 'Phi' => '\\ensuremath\{\\Phi\}',
478: 935 => '\\ensuremath\{\\mathrm\{X\}\}',
479: 'Chi' => '\\ensuremath\{\\mathrm\{X\}\}',
480: 936 => '\\ensuremath\{\\Psi\}',
481: 'Psi' => '\\ensuermath\{\\Psi\}',
482: 937 => '\\ensuremath\{\\Omega\}',
483: 'Omega' => '\\ensuremath\{\\Omega\}',
484:
485:
486: # Greek lowercase:
487:
488: 945 => '\\ensuremath\{\\alpha\}',
489: 'alpha' => '\\ensuremath\{\\alpha\}',
490: 946 => '\\ensuremath\{\\beta\}',
491: 'beta' => '\\ensuremath\{\\beta\}',
492: 947 => '\\ensuremath\{\\gamma\}',
493: 'gamma' => '\\ensuremath\{\\gamma\}',
494: 948 => '\\ensuremath\{\\delta\}',
495: 'delta' => '\\ensuremath\{\\delta\}',
496: 949 => '\\ensuremath\{\\epsilon\}',
497: 'epsilon'=> '\\ensuremath\{\\epsilon\}',
498: 950 => '\\ensuremath\{\\zeta\}',
499: 'zeta' => '\\ensuremath\{\\zeta\}',
500: 951 => '\\ensuremath\{\\eta\}',
501: 'eta' => '\\ensuremath\{\\eta\}',
502:
503:
504: };
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>