diff libinterp/corefcn/oct-tex-lexer.in.ll @ 17327:fe6518a1d87c

Move TeX symbol decoding into the lexer (bug #39831). * libinterp/Makefile.am (BUILT_SOURCES): Add corefcn/oct-tex-symbols.cc. (BUILT_DISTFILES): Add corefcn/oct-tex-lexer.ll and corefcn/oct-tex-symbols.cc. (EXTRA_DIST): Add corefcn/oct-tex-lexer.in.ll and corefcn/oct-tex-symbols.in. (ULT_DIST_SRC): Filter out corefcn/oct-tex-lexer.ll from DIST_SRC and add corefcn/oct-tex-lexer.in.ll instead. (CLEAN_FILES): Add corefcn/oct-tex-parser.output. * libinterp/corefcn/modules.mk (corefcn/oct-tex-lexer.ll, corefcn/oct-tex-symbols.cc): New rules to build the TeX lexer. (corefcn/txt-eng.cc): Add dependency on corefcn/oct-tex-symbols.cc. * libinterp/corefcn/oct-tex-lexer.in.ll: Renamed from oct-tex.lexer.ll. Remove COMMAND state. Remove ID regex. Replace rules for symbols with tag @SYMBOL_RULES@. * libinterp/corefcn/oct-tex-parser.yy (ID, CMD, identifier): Remove tokens. (SYM, sym): New token and value. (symbol_element): Build from SYM. * libinterp/corefcn/oct-tex-symbols.in: New file with supported TeX symbols and corresponding codes (unicode and MS symbols). * libinterp/corefcn/txt-eng.h (class text_element_symbol): Make it inherit from text_element. (text_element_symbol::code): Removed member. (text_element_symbol::symbol): New member. (text_element_symbol::text_element_symbol): Adapt constructor. (text_element_symbol::get_symbol): New method. (text_element_symbol::get_symbol_code): Make const. * libinterp/corefcn/txt-eng.cc (symbol_names, symbol_codes): Remove static variables, now auto-generated from oct-tex-symbols.in. (oct-tex-symbols.cc): New include. (text_element_symbol::get_symbol_code): Change implementation to simply index into auto-generated symbol_codes array. * libinterp/corefcn/txt-eng-ft.cc (ft_render::visit(text_element_symbol)): Don't use text_element_symbol::string_value(), use text_element_symbol::get_symbol() instead.
author Michael Goffioul <michael.goffioul@gmail.com>
date Sat, 24 Aug 2013 14:27:09 -0400
parents libinterp/corefcn/oct-tex-lexer.ll@ea23eb07f8ed
children bdbf91c0a1a4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libinterp/corefcn/oct-tex-lexer.in.ll	Sat Aug 24 14:27:09 2013 -0400
@@ -0,0 +1,146 @@
+/*
+
+Copyright (C) 2013 Michael Goffioul
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+%option prefix = "octave_tex_"
+%option noyywrap
+%option reentrant
+%option bison-bridge
+
+%top {
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "txt-eng.h"
+#include "oct-tex-parser.h"
+}
+
+%x	NUM_MODE
+%x	MAYBE_NUM_MODE
+
+D       [0-9]
+NUM	(({D}+\.?{D}*)|(\.{D}+))
+
+%%
+
+%{
+// Numeric values
+%}
+
+<NUM_MODE>{NUM}		{
+    int nread;
+
+    nread = sscanf (yytext, "%lf", &(yylval->num));
+    if (nread == 1)
+      return NUM;
+  }
+<NUM_MODE>[ \t]+	{ }
+<NUM_MODE>"\n"|.	{ yyless (0); BEGIN (INITIAL); }
+
+<MAYBE_NUM_MODE>"{"	{ BEGIN (NUM_MODE); return START; }
+<MAYBE_NUM_MODE>"\n"|.	{ yyless (0); BEGIN (INITIAL); }
+
+%{
+// Simple commands
+%}
+
+"\\bf"		{ return BF; }
+"\\it"		{ return IT; }
+"\\sl"		{ return SL; }
+"\\rm"		{ return RM; }
+
+%{
+// Generic font commands
+%}
+
+"\\fontname"	{ return FONTNAME; }
+"\\fontsize"	{ BEGIN (MAYBE_NUM_MODE); return FONTSIZE; }
+"\\color[rgb]"	{ BEGIN (MAYBE_NUM_MODE); return COLOR_RGB; }
+"\\color"	{ return COLOR; }
+
+%{
+// Special characters
+%}
+
+"{"	{ return START; }
+"}"	{ return END; }
+"^"	{ return SUPER; }
+"_"	{ return SUB; }
+
+"\\{"	|
+"\\}"	|
+"\\^"	|
+"\\_"	|
+"\\\\"	{ yylval->ch = yytext[1]; return CH; }
+
+%{
+// Symbols
+%}
+
+@SYMBOL_RULES@
+
+%{
+// Generic character
+%}
+
+"\n"	|
+.	{ yylval->ch = yytext[0]; return CH; }
+
+%%
+
+bool
+text_parser_tex::init_lexer (const std::string& s)
+{
+  if (! scanner)
+    octave_tex_lex_init (&scanner);
+
+  if (scanner)
+    {
+      if (buffer_state)
+        {
+          octave_tex__delete_buffer (reinterpret_cast<YY_BUFFER_STATE> (buffer_state),
+                                     scanner);
+          buffer_state = 0;
+        }
+
+      buffer_state = octave_tex__scan_bytes (s.data (), s.length (), scanner);
+    }
+
+  return (scanner && buffer_state);
+}
+
+void
+text_parser_tex::destroy_lexer (void)
+{
+  if (buffer_state)
+    {
+      octave_tex__delete_buffer (reinterpret_cast<YY_BUFFER_STATE> (buffer_state),
+                                 scanner);
+      buffer_state = 0;
+    }
+
+  if (scanner)
+    {
+      octave_tex_lex_destroy (scanner);
+      scanner = 0;
+    }
+}