diff libinterp/corefcn/txt-eng.cc @ 17327:fe6518a1d87c

Move TeX symbol decoding into the lexer (bug #39831). * libinterp/Makefile.am (BUILT_SOURCES): Add corefcn/oct-tex-symbols.cc. (BUILT_DISTFILES): Add corefcn/oct-tex-lexer.ll and corefcn/oct-tex-symbols.cc. (EXTRA_DIST): Add corefcn/oct-tex-lexer.in.ll and corefcn/oct-tex-symbols.in. (ULT_DIST_SRC): Filter out corefcn/oct-tex-lexer.ll from DIST_SRC and add corefcn/oct-tex-lexer.in.ll instead. (CLEAN_FILES): Add corefcn/oct-tex-parser.output. * libinterp/corefcn/modules.mk (corefcn/oct-tex-lexer.ll, corefcn/oct-tex-symbols.cc): New rules to build the TeX lexer. (corefcn/txt-eng.cc): Add dependency on corefcn/oct-tex-symbols.cc. * libinterp/corefcn/oct-tex-lexer.in.ll: Renamed from oct-tex.lexer.ll. Remove COMMAND state. Remove ID regex. Replace rules for symbols with tag @SYMBOL_RULES@. * libinterp/corefcn/oct-tex-parser.yy (ID, CMD, identifier): Remove tokens. (SYM, sym): New token and value. (symbol_element): Build from SYM. * libinterp/corefcn/oct-tex-symbols.in: New file with supported TeX symbols and corresponding codes (unicode and MS symbols). * libinterp/corefcn/txt-eng.h (class text_element_symbol): Make it inherit from text_element. (text_element_symbol::code): Removed member. (text_element_symbol::symbol): New member. (text_element_symbol::text_element_symbol): Adapt constructor. (text_element_symbol::get_symbol): New method. (text_element_symbol::get_symbol_code): Make const. * libinterp/corefcn/txt-eng.cc (symbol_names, symbol_codes): Remove static variables, now auto-generated from oct-tex-symbols.in. (oct-tex-symbols.cc): New include. (text_element_symbol::get_symbol_code): Change implementation to simply index into auto-generated symbol_codes array. * libinterp/corefcn/txt-eng-ft.cc (ft_render::visit(text_element_symbol)): Don't use text_element_symbol::string_value(), use text_element_symbol::get_symbol() instead.
author Michael Goffioul <michael.goffioul@gmail.com>
date Sat, 24 Aug 2013 14:27:09 -0400
parents 199444fc2c6b
children 4197fc428c7d
line wrap: on
line diff
--- a/libinterp/corefcn/txt-eng.cc	Sat Aug 24 12:44:23 2013 -0400
+++ b/libinterp/corefcn/txt-eng.cc	Sat Aug 24 14:27:09 2013 -0400
@@ -25,248 +25,15 @@
 #endif
 
 #include "txt-eng.h"
-
-static const char* symbol_names[] = {
-  "alpha",
-  "angle",
-  "ast",
-  "beta",
-  "gamma",
-  "delta",
-  "epsilon",
-  "zeta",
-  "eta",
-  "theta",
-  "vartheta",
-  "iota",
-  "kappa",
-  "lambda",
-  "mu",
-  "nu",
-  "xi",
-  "pi",
-  "rho",
-  "sigma",
-  "varsigma",
-  "tau",
-  "equiv",
-  "Im",
-  "otimes",
-  "cap",
-  "supset",
-  "int",
-  "rfloor",
-  "lfloor",
-  "perp",
-  "wedge",
-  "rceil",
-  "vee",
-  "langle",
-
-  "upsilon",
-  "phi",
-  "chi",
-  "psi",
-  "omega",
-  "Gamma",
-  "Delta",
-  "Theta",
-  "Lambda",
-  "Xi",
-  "Pi",
-  "Sigma",
-  "Upsilon",
-  "Phi",
-  "Psi",
-  "Omega",
-  "forall",
-  "exists",
-  "ni",
-  "cong",
-  "approx",
-  "Re",
-  "oplus",
-  "cup",
-  "subseteq",
-  "in",
-  "lceil",
-  "cdot",
-  "neg",
-  "times",
-  "surd",
-  "varpi",
-  "rangle",
-
-  "sim",
-  "leq",
-  "infty",
-  "clubsuit",
-  "diamondsuit",
-  "heartsuit",
-  "spadesuit",
-  "leftrightarrow",
-  "leftarrow",
-  "Leftarrow",
-  "uparrow",
-  "rightarrow",
-  "Rightarrow",
-  "downarrow",
-  "circ",
-  "pm",
-  "geq",
-  "propto",
-  "partial",
-  "bullet",
-  "div",
-  "neq",
-  "aleph",
-  "wp",
-  "oslash",
-  "supseteq",
-  "subset",
-  "o",
-  "nabla",
-  "ldots",
-  "prime",
-  "0",
-  "mid",
-  "copyright",
-
-  0
-};
+#include "oct-tex-symbols.cc"
 
-// Maps the symbol names (using index from symbol_names array) to
-// character codes, using 2 mapping:
-// - Unicode
-// - MS symbol (using Private Use Area)
-static uint32_t symbol_codes[][2] = {
-  { 0x03B1, 0xF061 },   // alpha
-  { 0x2220, 0xF0D0 },   // angle
-  { 0x2217, 0xF02A },   // ast
-  { 0x03B2, 0xF062 },   // beta
-  { 0x03B3, 0xF067 },   // gamma
-  { 0x03B4, 0xF064 },   // delta
-  { 0x03B5, 0xF065 },   // epsilon
-  { 0x03B6, 0xF07A },   // zeta
-  { 0x03B7, 0xF068 },   // eta
-  { 0x03B8, 0xF071 },   // theta
-  { 0x03D1, 0xF04A },   // vartheta
-  { 0x03B9, 0xF069 },   // iota
-  { 0x03BA, 0xF06B },   // kappa
-  { 0x03BB, 0xF06C },   // lambda
-  { 0x03BC, 0xF06D },   // mu
-  { 0x03BD, 0xF06E },   // nu
-  { 0x03BE, 0xF078 },   // xi
-  { 0x03C0, 0xF070 },   // pi
-  { 0x03C1, 0xF072 },   // rho
-  { 0x03C3, 0xF073 },   // sigma
-  { 0x03C2, 0xF056 },   // varsigma
-  { 0x03C4, 0xF074 },   // tau
-  { 0x2261, 0xF0BA },   // equiv
-  { 0x2111, 0xF0C1 },   // Im
-  { 0x2297, 0xF0C4 },   // otimes
-  { 0x2229, 0xF0C7 },   // cap
-  { 0x2283, 0xF0C9 },   // supset
-  { 0x222B, 0xF0F2 },   // int
-  { 0x230B, 0xF0FB },   // rfloor
-  { 0x230A, 0xF0EB },   // lfloor
-  { 0x22A5, 0xF05E },   // perp
-  { 0x2227, 0xF0D9 },   // wedge
-  { 0x2309, 0xF0F9 },   // rceil
-  { 0x2228, 0xF0DA },   // vee
-  { 0x27E8, 0xF0E1 },   // langle
+uint32_t
+text_element_symbol::get_symbol_code (void) const
+{
+  uint32_t code = invalid_code;
 
-  { 0x03C5, 0xF075 },   // upsilon
-  { 0x03C6, 0xF066 },   // phi
-  { 0x03C7, 0xF063 },   // chi
-  { 0x03C8, 0xF079 },   // psi
-  { 0x03C9, 0xF077 },   // omega
-  { 0x0393, 0xF047 },   // Gamma
-  { 0x0394, 0xF044 },   // Delta
-  { 0x0398, 0xF051 },   // Theta
-  { 0x039B, 0xF04C },   // Lambda
-  { 0x039E, 0xF058 },   // Xi
-  { 0x03A0, 0xF050 },   // Pi
-  { 0x03A3, 0xF053 },   // Sigma
-  { 0x03D2, 0xF055 },   // Upsilon
-  { 0x03A6, 0xF046 },   // Phi
-  { 0x03A8, 0xF059 },   // Psi
-  { 0x03A9, 0xF057 },   // Omega
-  { 0x2200, 0xF022 },   // forall
-  { 0x2203, 0xF024 },   // exists
-  { 0x220B, 0xF027 },   // ni
-  { 0x2245, 0xF040 },   // cong
-  { 0x2248, 0xF0BB },   // approx
-  { 0x211C, 0xF0C2 },   // Re
-  { 0x2295, 0xF0C5 },   // oplus
-  { 0x222A, 0xF0C8 },   // cup
-  { 0x2286, 0xF0CD },   // subseteq
-  { 0x2208, 0xF0CE },   // in
-  { 0x2308, 0xF0E9 },   // lceil
-  { 0x22C5, 0xF0D7 },   // cdot
-  { 0x00AC, 0xF0D8 },   // neg
-  { 0x00D7, 0xF0B4 },   // times
-  { 0x221A, 0xF0D6 },   // surd
-  { 0x03D6, 0xF076 },   // varpi
-  { 0x27E9, 0xF0F1 },   // rangle
-
-  { 0x223C, 0xF07E },   // sim
-  { 0x2264, 0xF0A3 },   // leq
-  { 0x221E, 0xF0A5 },   // infty
-  { 0x2663, 0xF0A7 },   // clubsuit
-  { 0x2666, 0xF0A8 },   // diamondsuit
-  { 0x2665, 0xF0A9 },   // heartsuit
-  { 0x2660, 0xF0AA },   // spadesuit
-  { 0x2194, 0xF0AB },   // leftrightarrow
-  { 0x2190, 0xF0AC },   // leftarrow
-  { 0x21D0, 0xF0DC },   // Leftarrow
-  { 0x2191, 0xF0AD },   // uparrow
-  { 0x2192, 0xF0AE },   // rightarrow
-  { 0x21D2, 0xF0DE },   // Rightarrow
-  { 0x2193, 0xF0AF },   // downarrow
-  { 0x25CB, 0xF0B0 },   // circ
-  { 0x00B1, 0xF0B1 },   // pm
-  { 0x2265, 0xF0B3 },   // geq
-  { 0x221D, 0xF0B5 },   // propto
-  { 0x2202, 0xF0B6 },   // partial
-  { 0x2022, 0xF0B7 },   // bullet
-  { 0x00F7, 0xF0B8 },   // div
-  { 0x2260, 0xF0B9 },   // neq
-  { 0x2135, 0xF0C0 },   // aleph
-  { 0x2118, 0xF0C3 },   // wp
-  { 0x2298, 0xF0C6 },   // oslash
-  { 0x2287, 0xF0CA },   // supseteq
-  { 0x2282, 0xF0CC },   // subset
-  { 0x03BF, 0xF0B0 },   // o
-  { 0x2207, 0xF0D1 },   // nabla
-  { 0x2026, 0xF0BC },   // ldots
-  { 0x2032, 0xF0A2 },   // prime
-  { 0x2205, 0xF0C6 },   // 0 (empty set)
-  { 0x2223, 0xF0BD },   // mid
-  { 0x00A9, 0xF0E3 },   // copyright
-
-  { 0, 0 }
-};
-
-// FIXME: May want to replace lookup with a map from STL
-//        Number of symbols is ~100 which means linear search
-//        is bordering on inefficient.
-uint32_t
-text_element_symbol::get_symbol_code (void)
-{
-  if (code == invalid_code)
-    {
-      std::string sym = string_value ();
-
-      for (int i = 0; symbol_names[i]; i++)
-        {
-          if (symbol_names[i] == sym)
-            {
-              code = symbol_codes[i][0];
-              break;
-            }
-        }
-    }
+  if (0 <= symbol && symbol < num_symbol_codes)
+    code = symbol_codes[symbol][0];
 
   return code;
 }