Annotation of loncom/interface/entities.pm, revision 1.1
1.1 ! foxr 1: # The LearningOnline Network
! 2: # entity -> tex.
! 3: #
! 4: # $Id:
! 5: #
! 6: # Copyright Michigan State University Board of Trustees
! 7: #
! 8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
! 9: #
! 10: # LON-CAPA is free software; you can redistribute it and/or modify
! 11: # it under the terms of the GNU General Public License as published by
! 12: # the Free Software Foundation; either version 2 of the License, or
! 13: # (at your option) any later version.
! 14: #
! 15: # LON-CAPA is distributed in the hope that it will be useful,
! 16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
! 17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 18: # GNU General Public License for more details.
! 19: #
! 20: # You should have received a copy of the GNU General Public License
! 21: # along with LON-CAPA; if not, write to the Free Software
! 22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
! 23: #
! 24: # /home/httpd/html/adm/gpl.txt
! 25: # http://www.lon-capa.org/
! 26: #
! 27: #
! 28: package Apache::entities;
! 29: use strict;
! 30: #
! 31: # This file contains a table driven entity-->latex converter.
! 32: #
! 33: # Assumptions:
! 34: # The number of entities in a resource is small compared with the
! 35: # number of possible entities that might be translated.
! 36: # Therefore the strategy is to match a general entity pattern
! 37: # &.+; over and over, pull out the match look it up in an entity -> tex hash
! 38: # and do the replacement.
! 39: #
! 40: # In order to simplify the hash, the following reductions are done:
! 41: # &#d+; have the &# and ; stripped and is converted to an int.
! 42: # &#.+; have the &#x and ; stripped and is converted to an int as a hex
! 43: # value.
! 44: # All others have the & and ; stripped.
! 45:
! 46:
! 47: # The hash: Add new conversions here; leave off the leading & and the trailing ;
! 48: # all numeric entities need only appear as their decimal versions
! 49: # (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
! 50: #
! 51: # This entity table is mercilessly cribbed from the HTML pocket reference
! 52: # table starting at pg 82. In most cases the LaTeX equivalent codes come from
! 53: # the original massive regular expression replacements originally by
! 54: # A. Sakharuk in lonprintout.pm
! 55: #
! 56: # Note numerical entities are essentially unicode character codes.
! 57: #
! 58: my %entities = {
! 59:
! 60: # ---- ASCII code page: ----------------
! 61:
! 62: # Translation to empty strings:
! 63:
! 64: 7 => "",
! 65: 9 => "",
! 66: 10 => "",
! 67: 13 => "",
! 68:
! 69: # Translations to simple characters:
! 70:
! 71: 32 => " ",
! 72: 33 => "!",
! 73: 34 => '"',
! 74: 'quot' => '"',
! 75: 35 => '\\\#',
! 76: 36 => '\\\$',
! 77: 37 => '\\%',
! 78: 38 => '\\&',
! 79: 'amp' => '\\&',
! 80: 39 => '\'', # Apostrophe
! 81: 40 => '(',
! 82: 41 => ')',
! 83: 42 => '\*',
! 84: 43 => '\+',
! 85: 44 => ',', # comma
! 86: 45 => '-',
! 87: 46 => '\.',
! 88: 47 => '\/',
! 89: 48 => '0',
! 90: 49 => '1',
! 91: 50 => '2',
! 92: 51 => '3',
! 93: 52 => '4',
! 94: 53 => '5',
! 95: 54 => '6',
! 96: 55 => '7',
! 97: 56 => '8',
! 98: 57 => '9',
! 99: 58 => ':',
! 100: 59 => ';',
! 101: 60 => '\\ensuremath\{<\}',
! 102: 'lt' => '\\ensuremath\{<\}',
! 103: 61 => '\\ensuremath\{=\}',
! 104: 62 => '\\ensuremath\{>\}',
! 105: 'gt' => '\\ensuremath\{>\}',
! 106: 63 => '\?',
! 107: 64 => '@',
! 108: 65 => 'A',
! 109: 66 => 'B',
! 110: 67 => 'C',
! 111: 68 => 'D',
! 112: 69 => 'E',
! 113: 70 => 'F',
! 114: 71 => 'G',
! 115: 72 => 'H',
! 116: 73 => 'I',
! 117: 74 => 'J',
! 118: 75 => 'K',
! 119: 76 => 'L',
! 120: 77 => 'M',
! 121: 78 => 'N',
! 122: 79 => 'O',
! 123: 80 => 'P',
! 124: 81 => 'Q',
! 125: 82 => 'R',
! 126: 83 => 'S',
! 127: 84 => 'T',
! 128: 85 => 'U',
! 129: 86 => 'V',
! 130: 87 => 'W',
! 131: 88 => 'X',
! 132: 89 => 'Y',
! 133: 90 => 'Z',
! 134: 91 => '[',
! 135: 92 => '\\ensuremath\{\\setminus\}', # \setminus is \ with special spacing.
! 136: 93 => ']',
! 137: 94 => '\\ensuremath\{\\wedge\}',
! 138: 95 => '\\underline\{\\makebox[2mm]\\{\\strut\}\}', # Underline 2mm of space for _
! 139: 96 => '`',
! 140: 97 => 'a',
! 141: 98 => 'b',
! 142: 99 => 'c',
! 143: 100 => 'd',
! 144: 101 => 'e',
! 145: 102 => 'f',
! 146: 103 => 'g',
! 147: 104 => 'h',
! 148: 105 => 'i',
! 149: 106 => 'j',
! 150: 107 => 'k',
! 151: 108 => 'l',
! 152: 109 => 'm',
! 153: 110 => 'n',
! 154: 111 => 'o',
! 155: 112 => 'p',
! 156: 113 => 'q',
! 157: 114 => 'r',
! 158: 115 => 's',
! 159: 116 => 't',
! 160: 117 => 'u',
! 161: 118 => 'v',
! 162: 119 => 'w',
! 163: 120 => 'x',
! 164: 121 => 'y',
! 165: 122 => 'z',
! 166: 123 => '\\{',
! 167: 124 => '\|',
! 168: 125 => '\\}',
! 169: 126 => '\~',
! 170:
! 171: # Controls and Latin-1 supplement. Note that some entities that have
! 172: # visible effect are not printing unicode characters. Specifically
! 173: # ‚- 
! 174:
! 175: 130 => ',',
! 176: 131 => '\\textflorin ',
! 177: 132 => ',,', # Low double left quotes.
! 178: 133 => '\\ensuremat\{\\ldots\}',
! 179: 134 => '\\ensuremath\{\\dagger\}',
! 180: 135 => '\\ensuremath\{\\ddagger\}',
! 181: 136 => '\\ensuremath\{\\wedge\}',
! 182: 137 => '\\textperthousand ',
! 183: 138 => '\\v\{S\}',
! 184: 139 => '\\ensuremath\{<\}',
! 185: 140 => '\{\\OE\}',
! 186:
! 187: # There's a gap here in my entity table
! 188:
! 189: 145 => '\`',
! 190: 146 => '\'',
! 191: 147 => '\`\`',
! 192: 148 => '\'\'',
! 193: 149 => '\\ensuremath\{\\bullet\}',
! 194: 150 => '--',
! 195: 151 => '---',
! 196: 152 => '\\ensuremath\{\\sim\}',
! 197: 153 => '\\texttrademark',
! 198: 154 => '\\v\{s\}',
! 199: 155 => '\\ensuremath\{>\}',
! 200: 156 => '\\oe ',
! 201:
! 202: # Another short gap:
! 203:
! 204: 159 => '\\"Y',
! 205: 160 => '~',
! 206: 'nbsp' => '~',
! 207: 161 => '\\textexclamdown ',
! 208: 'iexcl' => '\\textexclamdown ',
! 209: 162 => '\\textcent ',
! 210: 'cent' => '\\textcent ',
! 211: 163 => '\\pounds ',
! 212: 'pound' => '\\pounds ',
! 213: 164 => '\\textcurrency ',
! 214: 'curren' => '\\textcurrency ',
! 215: 165 => '\\textyen ',
! 216: 'yen' => '\\textyen ',
! 217: 166 => '\\textbrokenbar ',
! 218: 'brvbar' => '\\textbrokenbar ',
! 219: 167 => '\\textsection ',
! 220: 'sect' => '\\textsection ',
! 221: 168 => '\\texthighdieresis ',
! 222: 'uml' => '\\texthighdieresis ',
! 223: 169 => '\\copyright ',
! 224: 'copy' => '\\copyright ',
! 225: 170 => '\\textordfeminine ',
! 226: 'ordf' => '\\textordfeminine ',
! 227: 171 => '\\ensuremath\{\ll\}', # approximation of left angle quote.
! 228: 'laquo' => '\\ensuremath\{\ll\}', # ""
! 229: 172 => '\\ensuremath\{\\neg\}',
! 230: 'not' => '\\ensuremath\{\\neg\}',
! 231: 173 => ' - ',
! 232: 'shy' => ' - ',
! 233: 174 => '\\textregistered ',
! 234: 'reg' => '\\textregistered ',
! 235: 175 => '\\ensuremath\{^\{-\}\}',
! 236: 'macr' => '\\ensuremath\{^\{-\}\}',
! 237: 176 => '\\ensuremath\{^\{\\circ\}\}',
! 238: 'deg' => '\\ensuremath\{^\{\\circ\}\}',
! 239: 177 => '\\ensuremath\{\\pm\}',
! 240: 'plusmn' => '\\ensuremath\{\\pm\}',
! 241: 178 => '\\ensuremath\{^2\}',
! 242: 'sup2' => '\\ensuremath\{^2\}',
! 243: 179 => '\\ensuremath\{^3\}',
! 244: 'sup3' => '\\ensuremath\{^3\}',
! 245: 180 => '\\textacute ',
! 246: 'acute' => '\\textacute ',
! 247: 181 => '\\ensuremath\{\\mu\}',
! 248: 'micro' => '\\ensuremath\{\\mu\}',
! 249: 182 => '\\P ',
! 250: para => '\\P ',
! 251: 183 => '\\ensuremath\{\\cdot\}',
! 252: 'middot' => '\\ensuremath\{\\cdot\}',
! 253: 184 => '\\c\{\\strut\}',
! 254: 'cedil' => '\\c\{\\strut\}',
! 255: 185 => '\\ensuremath\{^1\}',
! 256: sup1 => '\\ensuremath\{^1\}',
! 257: 186 => '\\textordmasculine ',
! 258: 'ordm' => '\\textordmasculine ',
! 259: 187 => '\\ensuremath\{\\gg\}',
! 260: 'raquo' => '\\ensuremath\{\\gg\}',
! 261: 188 => '\\textonequarter ',
! 262: 'frac14' => '\\textonequarter ',
! 263: 189 => '\\textonehalf' ,
! 264: 'frac12' => '\\textonehalf' ,
! 265: 190 => '\\textthreequarters ',
! 266: 'frac34' => '\\textthreequarters ',
! 267: 191 => '\\textquestiondown ',
! 268: 'iquest' => '\\textquestiondown ',
! 269: 192 => '\\\`\{A\}',
! 270: 'Agrave' => '\\\`\{A\}',
! 271: 193 => '\\\'\{A\}',
! 272: 'Aacute' => '\\\'\{A\}',
! 273: 194 => '\\^\{A\}',
! 274: 'Acirc' => '\\^\{A\}',
! 275: 195 => '\\~{A}',
! 276: 'Atilde'=> '\\~{A}',
! 277: 196 => '\\\"{A}',
! 278: 'Auml' => '\\\"{A}',
! 279: 197 => '{\\AA}',
! 280: 'Aring' => '{\\AA}',
! 281: 198 => '{\\AE}',
! 282: 'AElig' => '{\\AE}',
! 283: 199 => '\\c{c}',
! 284: 'Ccedil'=> '\\c{c}',
! 285: '200' => '\\\`{E}',
! 286: 'Egrave'=> '\\\`{E}',
! 287: 201 => '\\\'{E}',
! 288: 'Eacute'=> '\\\'{E}',
! 289: 202 => '\\\^{E}',
! 290: 'Ecirc' => '\\\^{E}',
! 291: 203 => '\\\"{E}',
! 292: 'Euml' => '\\\"{E}',
! 293: 204 => '\\\`{I}',
! 294: 'Igrave'=> '\\\`{I}',
! 295: 205 => '\\\'{I}',
! 296: 'Iacute'=> '\\\'{I}',
! 297: 206 => '\\\^{I}',
! 298: 'Icirc' => '\\\^{I}',
! 299: 207 => '\\\"{I}',
! 300: 'Iuml' => '\\\"{I}',
! 301: 208 => '\\OE',
! 302: 'ETH' => '\\OE',
! 303: 209 => '\\~{N}',
! 304: 'Ntilde'=> '\\~{N}',
! 305: 210 => '\\\`{O}',
! 306: 'Ograve'=> '\\\`{O}',
! 307: 211 => '\\\'{O}',
! 308: 'Oacute'=> '\\\'{O}',
! 309: 212 => '\\\^{O}',
! 310: 'Ocirc' => '\\\^{O}',
! 311: 213 => '\\~{O}',
! 312: 'Otilde'=> '\\~{O}',
! 313: 214 => '\\\"{O}',
! 314: 'Ouml' => '\\\"{O}',
! 315: 215 => '\\ensuremath\{\\times\}',
! 316: 'times' => '\\ensuremath\{\\times\}',
! 317: 216 => '\\O',
! 318: 'Oslash'=> '\\O',
! 319: 217 => '\\\`{U}',
! 320: 'Ugrave'=> '\\\`{U}',
! 321: 218 => '\\\'{U}',
! 322: 'Uacute'=> '\\\'{U}',
! 323: 219 => '\\\^{U}',
! 324: 'Ucirc' => '\\\^{U}',
! 325: 220 => '\\\"{U}',
! 326: 'Uuml' => '\\\"{U}',
! 327: 221 => '\\\'{Y}',
! 328: 'Yacute'=> '\\\'{Y}',
! 329: 222 => '\\TH',
! 330: 'THORN' => '\\TH',
! 331: 223 => '{\\sz}',
! 332: 'szlig' => '{\\sz}',
! 333: 224 => '\\\`{a}',
! 334: 'agrave'=> '\\\`{a}',
! 335: 225 => '\\\'{a}',
! 336: 'aacute'=> '\\\'{a}',
! 337: 226 => '\\\^{a}',
! 338: 'acirc' => '\\\^{a}',
! 339: 227 => '\\\~{a}',
! 340: 'atilde'=> '\\\~{a}',
! 341: 228 => '\\\"{a}',
! 342: 'auml' => '\\\"{a}',
! 343: 229 => '\\aa',
! 344: 'aring' => '\\aa',
! 345: 230 => '\\ae',
! 346: 'aelig' => '\\ae',
! 347: 231 => '\\c{c}',
! 348: 'ccedil'=> '\\c{c}',
! 349: 232 => '\\\`{e}',
! 350: 'egrave'=> '\\\`{e}',
! 351: 233 => '\\\'{e}',
! 352: 'eacute'=> '\\\'{e}',
! 353: 234 => '\\\^{e}',
! 354: 'ecirc' => '\\\^{e}',
! 355: 235 => '\\\"{e}',
! 356: 'euml' => '\\\"{e}',
! 357: 236 => '\\\`{i}',
! 358: 'igrave'=> '\\\`{i}',
! 359: 237 => '\\\'{i}',
! 360: 'iacute'=> '\\\'{i}',
! 361: 238 => '\\\^{i}',
! 362: 'icirc' => '\\\^{i}',
! 363: 239 => '\\\"{i}',
! 364: 'iuml' => '\\\"{i}',
! 365: 240 => '\\dh',
! 366: 'eth' => '\\dh',
! 367: 241 => '\\\~{n}',
! 368: 'ntilde'=> '\\\~{n}',
! 369: 242 => '\\\`{o}',
! 370: 'ograve'=> '\\\`{o}',
! 371: 243 => '\\\'{o}',
! 372: 'oacute'=> '\\\'{o}',
! 373: 244 => '\\\^{o}',
! 374: 'ocirc' => '\\\^{o}',
! 375: 245 => '\\\~{o}',
! 376: 'otilde'=> '\\\~{o}',
! 377: 246 => '\\\"{o}',
! 378: 'ouml' => '\\\"{o}',
! 379: 247 => '\\ensuremath\{\\div\}',
! 380: 'divide'=> '\\ensuremath\{\\div\}',
! 381: 248 => '{\\o}',
! 382: 'oslash'=> '{\\o}',
! 383: 249 => '\\\`{u}',
! 384: 'ugrave'=> '\\\`{u}',
! 385: 250 => '\\\'{u}',
! 386: 'uacute'=> '\\\'{u}',
! 387: 251 => '\\\^{u}',
! 388: 'ucirc' => '\\\^{u}',
! 389: 252 => '\\\"{u}',
! 390: 'uuml' => '\\\"{u}',
! 391: 253 => '\\\'{y}',
! 392: 'yacute'=> '\\\'{y}',
! 393: 254 => '\\th',
! 394: 'thorn' => '\\th',
! 395: 255 => '\\\"{y}',
! 396: 'yuml' => '\\\"{y}',
! 397:
! 398: # hbar entity number comes from the unicode charater:
! 399: # see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
! 400: # ISO also documents a 'planck' entity.
! 401:
! 402: 295 => '\\ensuremath\{\hbar\}',
! 403: 'plank' => '\\ensuremath\{\hbar\}',
! 404:
! 405: # Latin extended-A HTML 4.01 entities:
! 406:
! 407: 338 => '\\OE',
! 408: 'OElig' => '\\OE',
! 409: 339 => '\\oe',
! 410: 'oelig' => '\\oe',
! 411: 352 => '\\v{S}',
! 412: 'Scaron' => '\\v{S}',
! 413: 353 => '\\v{s}',
! 414: 'scaron' => '\\v{s}',
! 415: 376 => '\\\"{Y}',
! 416: 'Yuml' => '\\\"{Y}',
! 417:
! 418:
! 419: # Latin extended B HTML 4.01 entities
! 420:
! 421: 402 => '\\ensuremath{f}',
! 422: 'fnof' => '\\ensuremath{f}',
! 423:
! 424: # Spacing modifier letters:
! 425:
! 426: 710 => '\^{}',
! 427: 'circ' => '\^{}',
! 428: 732 => '\~{}',
! 429: 'tilde' => '\~{}',
! 430:
! 431: # Greek uppercase:
! 432:
! 433: 913 => '\\ensuremath\{\\mathrm\{A\}\}',
! 434: 'Alpha' => '\\ensuremath\{\\mathrm\{A\}\}',
! 435: 914 => '\\ensuremath\{\\mathrm\{B\}\}',
! 436: 'Beta' => '\\ensuremath\{\\mathrm\{B\}\}',
! 437: 915 => '\\ensuremath\{\\Gamma\}',
! 438: 'Gamma' => '\\ensuremath\{\\Gamma\}',
! 439: 916 => '\\ensuremath\{\\Delta\}',
! 440: 'Delta' => '\\ensuremath\{\\Delta\}',
! 441: 917 => '\\ensuremath\{\\mathrm\{E\}\}',
! 442: 'Epsilon'=> '\\ensuremath\{\\mathrm\{E\}\}',
! 443: 918 => '\\ensuremath\{\\mathrm\{Z\}\}',
! 444: 'Zeta' => '\\ensuremath\{\\mathrm\{Z\}\}',
! 445: 919 => '\\ensuremath\{\\mathrm\{H\}\}',
! 446: 'Eta' => '\\ensuremath\{\\mathrm\{H\}\}',
! 447: 920 => '\\ensuremath\{\\Theta\}',
! 448: 'Theta' => '\\ensuremath\{\\Theta\}',
! 449: 921 => '\\ensuremath\{\\mathrm\{I\}\}',
! 450: 'Iota' => '\\ensuremath\{\\mathrm\{I\}\}',
! 451: 922 => '\\ensuremath\{\\mathrm\{K\}\}',
! 452: 'Kappa' => '\\ensuremath\{\\mathrm\{K\}\}',
! 453: 923 => '\\ensuremath\{\\Lambda\}',
! 454: 'Lambda' => '\\ensuremath\{\\Lambda\}',
! 455: 924 => '\\ensuremath\{\\mathrm\{M\}\}',
! 456: 'Mu' => '\\ensuremath\{\\mathrm\{M\}\}',
! 457: 925 => '\\ensuremath\{\\mathrm\{N\}\}',
! 458: 'Nu' => '\\ensuremath\{\\mathrm\{N\}\}',
! 459: 926 => '\\ensuremath\{\\mathrm\{\\Xi\}',
! 460: 'Xi' => '\\ensuremath\{\\mathrm\{\\Xi\}',
! 461: 927 => '\\ensuremath\{\\mathrm\{O\}\}',
! 462: 'Omicron'=> '\\ensuremath\{\\mathrm\{O\}\}',
! 463: 928 => '\\ensuremath\{\\Pi\}',
! 464: 'Pi' => '\\ensuremath\{\\Pi\}',
! 465: 929 => '\\ensuremath\{\\mathrm\{P\}\}',
! 466: 'Rho' => '\\ensuremath\{\\mathrm\{P\}\}',
! 467:
! 468: # Skips 930
! 469:
! 470: 931 => '\\ensuremath\{\Sigma\}',
! 471: 'Sigma' => '\\ensuremath\{\Sigma\}',
! 472: 932 => '\\ensuremath\{\\mathrm\{T\}\}',
! 473: 'Tau' => '\\ensuremath\{\\mathrm\{T\}\}',
! 474: 933 => '\\ensuremath\{\\Upsilon\}',
! 475: 'Upsilon'=> '\\ensuremath\{\\Upsilon\}',
! 476: 934 => '\\ensuremath\{\\Phi\}',
! 477: 'Phi' => '\\ensuremath\{\\Phi\}',
! 478: 935 => '\\ensuremath\{\\mathrm\{X\}\}',
! 479: 'Chi' => '\\ensuremath\{\\mathrm\{X\}\}',
! 480: 936 => '\\ensuremath\{\\Psi\}',
! 481: 'Psi' => '\\ensuermath\{\\Psi\}',
! 482: 937 => '\\ensuremath\{\\Omega\}',
! 483: 'Omega' => '\\ensuremath\{\\Omega\}',
! 484:
! 485:
! 486: # Greek lowercase:
! 487:
! 488: 945 => '\\ensuremath\{\\alpha\}',
! 489: 'alpha' => '\\ensuremath\{\\alpha\}',
! 490: 946 => '\\ensuremath\{\\beta\}',
! 491: 'beta' => '\\ensuremath\{\\beta\}',
! 492: 947 => '\\ensuremath\{\\gamma\}',
! 493: 'gamma' => '\\ensuremath\{\\gamma\}',
! 494: 948 => '\\ensuremath\{\\delta\}',
! 495: 'delta' => '\\ensuremath\{\\delta\}',
! 496: 949 => '\\ensuremath\{\\epsilon\}',
! 497: 'epsilon'=> '\\ensuremath\{\\epsilon\}',
! 498: 950 => '\\ensuremath\{\\zeta\}',
! 499: 'zeta' => '\\ensuremath\{\\zeta\}',
! 500: 951 => '\\ensuremath\{\\eta\}',
! 501: 'eta' => '\\ensuremath\{\\eta\}',
! 502:
! 503:
! 504: };
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>