Skip to content

Commit

Permalink
Merge pull request #3239 from techee/latex
Browse files Browse the repository at this point in the history
Tex: parse some more latex macros
  • Loading branch information
masatake authored Dec 30, 2021
2 parents bacba8d + c6cbaf9 commit de1f594
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 9 deletions.
5 changes: 5 additions & 0 deletions Units/parser-tex.r/newcommand.d/expected.tags
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
\\bar input.tex /^\\providecommand{\\bar}{\\section{#1}}$/;" C
\\baz input.tex /^\\def\\baz{\\section{#1}}$/;" C
\\foo input.tex /^\\renewcommand{\\foo}{\\section{#1}}$/;" C
\\mysection0 input.tex /^\\newcommand{\\mysection0}{\\section{#1}}$/;" C
\\mysection1 input.tex /^\\newcommand{\\mysection1}[1]{\\section{#1}}$/;" C
\\mysection2 input.tex /^\\newcommand{\\mysection2}[1][1]{\\section{#1}}$/;" C
\\mysection3 input.tex /^\\newcommand\\mysection3[1][1]{\\section{#1}}$/;" C
\\op input.tex /^\\DeclareMathOperator{\\op}{foo}$/;" o
5 changes: 5 additions & 0 deletions Units/parser-tex.r/newcommand.d/input.tex
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
\newcommand{\mysection0}{\section{#1}}
\newcommand{\mysection1}[1]{\section{#1}}
\newcommand{\mysection2}[1][1]{\section{#1}}
\newcommand\mysection3[1][1]{\section{#1}}
\renewcommand{\foo}{\section{#1}}
\providecommand{\bar}{\section{#1}}
\def\baz{\section{#1}}
\DeclareMathOperator{\op}{foo}
\begin{document}
\mysection0{ABC}
\mysection1{EFG}
Expand Down
4 changes: 4 additions & 0 deletions Units/parser-tex.r/newenvironment.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
boxed input.tex /^\\newenvironment{boxed}$/;" e
boxedA input.tex /^\\renewenvironment{boxedA}$/;" e
theorem input.tex /^\\newtheorem{theorem}{Theorem}$/;" t
theoremA input.tex /^\\newtheorem{theoremA}{Theorem A}$/;" t
34 changes: 34 additions & 0 deletions Units/parser-tex.r/newenvironment.d/input.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
\documentclass{article}
\newtheorem{theoremA}{Theorem A}
\newenvironment{boxed}
{\begin{center}
\begin{tabular}{|p{0.9\textwidth}|}
\hline\\
}
{
\\\\\hline
\end{tabular}
\end{center}
}
\renewenvironment{boxedA}
{\begin{center}
\begin{tabular}{|p{0.9\textwidth}|}
\hline\\
}
{
\\\\\hline
\end{tabular}
\end{center}
}
\newtheorem{theorem}{Theorem}
\begin{document}
\begin{boxed}
foo
\end{boxed}
\begin{boxedA}
foo
\end{boxedA}
\begin{theorem}
bar
\end{theorem}
\end{document}
152 changes: 145 additions & 7 deletions parsers/tex.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ enum eKeywordId {
KEYWORD_bibitem,
KEYWORD_bibliography,
KEYWORD_newcommand,
KEYWORD_renewcommand,
KEYWORD_providecommand,
KEYWORD_def,
KEYWORD_declaremathoperator,
KEYWORD_newenvironment,
KEYWORD_renewenvironment,
KEYWORD_newtheorem,
KEYWORD_newcounter,
};
typedef int keywordId; /* to allow KEYWORD_NONE */
Expand Down Expand Up @@ -117,6 +124,9 @@ typedef enum {
TEXTAG_XINPUT,
TEXTAG_BIBITEM,
TEXTAG_COMMAND,
TEXTAG_OPERATOR,
TEXTAG_ENVIRONMENT,
TEXTAG_THEOREM,
TEXTAG_COUNTER,
TEXTAG_COUNT
} texKind;
Expand Down Expand Up @@ -149,6 +159,9 @@ static kindDefinition TexKinds [] = {
.referenceOnly = true, ATTACH_ROLES(TexInputRoles) },
{ true, 'B', "bibitem", "bibliography items" },
{ true, 'C', "command", "command created with \\newcommand" },
{ true, 'o', "operator", "math operator created with \\DeclareMathOperator" },
{ true, 'e', "environment", "environment created with \\newenvironment" },
{ true, 't', "theorem", "theorem created with \\newtheorem" },
{ true, 'N', "counter", "counter created with \\newcounter" },
};

Expand All @@ -169,6 +182,13 @@ static const keywordTable TexKeywordTable [] = {
{ "bibitem", KEYWORD_bibitem },
{ "bibliography", KEYWORD_bibliography },
{ "newcommand", KEYWORD_newcommand },
{ "renewcommand", KEYWORD_renewcommand },
{ "providecommand", KEYWORD_providecommand },
{ "def", KEYWORD_def },
{ "DeclareMathOperator", KEYWORD_declaremathoperator },
{ "newenvironment", KEYWORD_newenvironment },
{ "renewenvironment", KEYWORD_renewenvironment},
{ "newtheorem", KEYWORD_newtheorem },
{ "newcounter", KEYWORD_newcounter },
};

Expand Down Expand Up @@ -602,13 +622,15 @@ static bool parseWithStrategy (tokenInfo *token,
}
else if (s->type == '*' && isType (token, '*'))
next_token = true;
else if (s->type == '{' && isType (token, '{'))
else if (((s->type == '{' || s->type == '\\') && isType (token, '{')) ||
(s->type == '\\' && isType (token, TOKEN_IDENTIFIER)))
{
int depth = 1;
bool missing_parens = isType (token, TOKEN_IDENTIFIER);

next_token = true;

if (!readToken (token))
if (!missing_parens && !readToken (token))
{
eof = true;
break;
Expand All @@ -618,6 +640,11 @@ static bool parseWithStrategy (tokenInfo *token,
copyToken (name, token);
vStringClear (name->string);
}
if (missing_parens)
{
vStringCat (name->string, token->string);
depth = 0;
}

/* Handle the case the code like \section{} */
if (isType (token, '}'))
Expand Down Expand Up @@ -668,6 +695,10 @@ static bool parseWithStrategy (tokenInfo *token,
}
}

/* The last token is optional and not present - let the caller know */
if (!next_token)
*tokenUnprocessed = true;

if (name)
deleteToken (name);

Expand Down Expand Up @@ -798,16 +829,65 @@ static bool parseEnv (tokenInfo *const token, bool begin, bool *tokenUnprocessed

}

static bool parseNewcommand (tokenInfo *const token, bool *tokenUnprocessed)
static bool parseNewcommandFull (tokenInfo *const token, bool *tokenUnprocessed, texKind kind)
{
bool eof = false;

/* \newcommand {cmd}[args][opt]{def} */
/* \newcommand{cmd}[args][opt]{def} */
/* \newcommand\cmd[args][opt]{def} */
/* \def\cmd{replacement} */
struct TexParseStrategy strategy [] = {
{
.type = '\\',
.flags = 0,
.kindIndex = kind,
.roleIndex = ROLE_DEFINITION_INDEX,
.name = NULL,
.unique = false,
},
{
.type = '[',
.flags = TEX_NAME_FLAG_OPTIONAL,
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
{
.type = '[',
.flags = TEX_NAME_FLAG_OPTIONAL,
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
{
.type = '{',
.flags = 0,
.kindIndex = TEXTAG_COMMAND,
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
{
.type = 0
}
};

if (parseWithStrategy (token, strategy, tokenUnprocessed))
eof = true;

return eof;
}

static bool parseNewcommand (tokenInfo *const token, bool *tokenUnprocessed)
{
return parseNewcommandFull (token, tokenUnprocessed, TEXTAG_COMMAND);
}

static bool parseNewEnvironment (tokenInfo *const token, bool *tokenUnprocessed)
{
bool eof = false;
/* \newenvironment{nam}[args]{begdef}{enddef} */
struct TexParseStrategy strategy [] = {
{
.type = '{',
.flags = 0,
.kindIndex = TEXTAG_ENVIRONMENT,
.roleIndex = ROLE_DEFINITION_INDEX,
.name = NULL,
.unique = false,
Expand All @@ -818,6 +898,44 @@ static bool parseNewcommand (tokenInfo *const token, bool *tokenUnprocessed)
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
{
.type = '{',
.flags = 0,
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
{
.type = '{',
.flags = 0,
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
{
.type = 0
}
};

if (parseWithStrategy (token, strategy, tokenUnprocessed))
eof = true;

return eof;
}

static bool parseNewTheorem (tokenInfo *const token, bool *tokenUnprocessed)
{
bool eof = false;
/* \newtheorem{name}{title}
\newtheorem{name}{title}[numbered_within]
\newtheorem{name}[numbered_like]{title} */
struct TexParseStrategy strategy [] = {
{
.type = '{',
.flags = 0,
.kindIndex = TEXTAG_THEOREM,
.roleIndex = ROLE_DEFINITION_INDEX,
.name = NULL,
.unique = false,
},
{
.type = '[',
.flags = TEX_NAME_FLAG_OPTIONAL,
Expand All @@ -830,6 +948,12 @@ static bool parseNewcommand (tokenInfo *const token, bool *tokenUnprocessed)
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
{
.type = '[',
.flags = TEX_NAME_FLAG_OPTIONAL,
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
{
.type = 0
}
Expand All @@ -844,7 +968,7 @@ static bool parseNewcommand (tokenInfo *const token, bool *tokenUnprocessed)
static bool parseNewcounter (tokenInfo *const token, bool *tokenUnprocessed)
{
bool eof = false;
/* \newccounter {counter}[parentCounter] */
/* \newcounter {counter}[parentCounter] */
struct TexParseStrategy strategy [] = {
{
.type = '{',
Expand All @@ -856,7 +980,7 @@ static bool parseNewcounter (tokenInfo *const token, bool *tokenUnprocessed)
},
{
.type = '[',
.flags = 0,
.flags = TEX_NAME_FLAG_OPTIONAL,
.kindIndex = KIND_GHOST_INDEX,
.name = NULL,
},
Expand All @@ -870,6 +994,7 @@ static bool parseNewcounter (tokenInfo *const token, bool *tokenUnprocessed)

return eof;
}

static void parseTexFile (tokenInfo *const token)
{
bool eof = false;
Expand Down Expand Up @@ -934,8 +1059,21 @@ static void parseTexFile (tokenInfo *const token)
false, &tokenUnprocessed);
break;
case KEYWORD_newcommand:
case KEYWORD_renewcommand:
case KEYWORD_providecommand:
case KEYWORD_def:
eof = parseNewcommand (token, &tokenUnprocessed);
break;
case KEYWORD_declaremathoperator:
eof = parseNewcommandFull (token, &tokenUnprocessed, TEXTAG_OPERATOR);
break;
case KEYWORD_newenvironment:
case KEYWORD_renewenvironment:
eof = parseNewEnvironment (token, &tokenUnprocessed);
break;
case KEYWORD_newtheorem:
eof = parseNewTheorem (token, &tokenUnprocessed);
break;
case KEYWORD_newcounter:
eof = parseNewcounter (token, &tokenUnprocessed);
break;
Expand Down
4 changes: 2 additions & 2 deletions parsers/tex.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ enum TexNameFlag {
};

struct TexParseStrategy {
/* Expected token type '<', '[', '*', and '{' are supported.
* 0 means the end of strategies.
/* Expected token type '<', '[', '*', '{', and '\\' are supported.
* 0 means the end of strategies. '\\' means {} pair may be omitted.
*
* A string between <>, [], or {} (pairs) can be tagged or store to
* a vString. See kindIndex and name field of this structure.
Expand Down

0 comments on commit de1f594

Please sign in to comment.