From a8715a2d6e66eb3add6e98b56a40931056cef7d3 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sun, 28 Sep 2025 21:44:54 +0300 Subject: [PATCH] libexpr: Switch parser.y to %skeleton lalr1.cc Since the parser is now LALR we can easily switch over to the less ugly sketelon than the default C one. This would allow us to switch from %union to %define api.value.type variant in the future to avoid the need for triviall POD types. --- src/libexpr/include/nix/expr/parser-state.hh | 1 - src/libexpr/lexer-helpers.cc | 4 +- src/libexpr/lexer-helpers.hh | 10 ++-- src/libexpr/lexer.l | 4 ++ src/libexpr/parser-scanner-decls.hh | 17 +++++++ src/libexpr/parser.y | 48 ++++++++++---------- 6 files changed, 51 insertions(+), 33 deletions(-) create mode 100644 src/libexpr/parser-scanner-decls.hh diff --git a/src/libexpr/include/nix/expr/parser-state.hh b/src/libexpr/include/nix/expr/parser-state.hh index 193d955c2..32e9f5db0 100644 --- a/src/libexpr/include/nix/expr/parser-state.hh +++ b/src/libexpr/include/nix/expr/parser-state.hh @@ -24,7 +24,6 @@ struct StringToken } }; -// This type must be trivially copyable; see YYLTYPE_IS_TRIVIAL in parser.y. struct ParserLocation { int beginOffset; diff --git a/src/libexpr/lexer-helpers.cc b/src/libexpr/lexer-helpers.cc index 927e3cc73..59f6f6f70 100644 --- a/src/libexpr/lexer-helpers.cc +++ b/src/libexpr/lexer-helpers.cc @@ -1,11 +1,11 @@ #include "lexer-helpers.hh" -void nix::lexer::internal::initLoc(YYLTYPE * loc) +void nix::lexer::internal::initLoc(Parser::location_type * loc) { loc->beginOffset = loc->endOffset = 0; } -void nix::lexer::internal::adjustLoc(yyscan_t yyscanner, YYLTYPE * loc, const char * s, size_t len) +void nix::lexer::internal::adjustLoc(yyscan_t yyscanner, Parser::location_type * loc, const char * s, size_t len) { loc->stash(); diff --git a/src/libexpr/lexer-helpers.hh b/src/libexpr/lexer-helpers.hh index 49865f794..b60fb9e7d 100644 --- a/src/libexpr/lexer-helpers.hh +++ b/src/libexpr/lexer-helpers.hh @@ -2,16 +2,12 @@ #include -// including the generated headers twice leads to errors -#ifndef BISON_HEADER -# include "lexer-tab.hh" -# include "parser-tab.hh" -#endif +#include "parser-scanner-decls.hh" namespace nix::lexer::internal { -void initLoc(YYLTYPE * loc); +void initLoc(Parser::location_type * loc); -void adjustLoc(yyscan_t yyscanner, YYLTYPE * loc, const char * s, size_t len); +void adjustLoc(yyscan_t yyscanner, Parser::location_type * loc, const char * s, size_t len); } // namespace nix::lexer::internal diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index 1005f9f7e..f420fc13f 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -82,6 +82,10 @@ static void requireExperimentalFeature(const ExperimentalFeature & feature, cons } +using enum nix::Parser::token::token_kind_type; +using YYSTYPE = nix::Parser::value_type; +using YYLTYPE = nix::Parser::location_type; + // yacc generates code that uses unannotated fallthrough. #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" diff --git a/src/libexpr/parser-scanner-decls.hh b/src/libexpr/parser-scanner-decls.hh new file mode 100644 index 000000000..e4e061883 --- /dev/null +++ b/src/libexpr/parser-scanner-decls.hh @@ -0,0 +1,17 @@ +#pragma once + +#ifndef BISON_HEADER +# include "parser-tab.hh" +using YYSTYPE = nix::parser::BisonParser::value_type; +using YYLTYPE = nix::parser::BisonParser::location_type; +# include "lexer-tab.hh" // IWYU pragma: export +#endif + +namespace nix { + +class Parser : public parser::BisonParser +{ + using BisonParser::BisonParser; +}; + +} // namespace nix diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index 89da001ef..8f77b4b0a 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -1,5 +1,7 @@ +%skeleton "lalr1.cc" %define api.location.type { ::nix::ParserLocation } -%define api.pure +%define api.namespace { ::nix::parser } +%define api.parser.class { BisonParser } %locations %define parse.error verbose %defines @@ -26,19 +28,12 @@ #include "nix/expr/eval-settings.hh" #include "nix/expr/parser-state.hh" -// Bison seems to have difficulty growing the parser stack when using C++ with -// a custom location type. This undocumented macro tells Bison that our -// location type is "trivially copyable" in C++-ese, so it is safe to use the -// same memcpy macro it uses to grow the stack that it uses with its own -// default location type. Without this, we get "error: memory exhausted" when -// parsing some large Nix files. Our other options are to increase the initial -// stack size (200 by default) to be as large as we ever want to support (so -// that growing the stack is unnecessary), or redefine the stack-relocation -// macro ourselves (which is also undocumented). -#define YYLTYPE_IS_TRIVIAL 1 - -#define YY_DECL int yylex \ - (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParserState * state) +#define YY_DECL \ + int yylex( \ + nix::Parser::value_type * yylval_param, \ + nix::Parser::location_type * yylloc_param, \ + yyscan_t yyscanner, \ + nix::ParserState * state) // For efficiency, we only track offsets; not line,column coordinates # define YYLLOC_DEFAULT(Current, Rhs, N) \ @@ -78,24 +73,30 @@ Expr * parseExprFromBuf( %{ -#include "parser-tab.hh" -#include "lexer-tab.hh" +/* The parser is very performance sensitive and loses out on a lot + of performance even with basic stdlib assertions. Since those don't + affect ABI we can disable those just for this file. */ +#if defined(_GLIBCXX_ASSERTIONS) && !defined(_GLIBCXX_DEBUG) +#undef _GLIBCXX_ASSERTIONS +#endif + +#include "parser-scanner-decls.hh" YY_DECL; using namespace nix; -#define CUR_POS state->at(yyloc) +#define CUR_POS state->at(yylhs.location) - -void yyerror(YYLTYPE * loc, yyscan_t scanner, ParserState * state, const char * error) +void parser::BisonParser::error(const location_type &loc_, const std::string &error) { + auto loc = loc_; if (std::string_view(error).starts_with("syntax error, unexpected end of file")) { - loc->beginOffset = loc->endOffset; + loc.beginOffset = loc.endOffset; } throw ParseError({ .msg = HintFmt(error), - .pos = state->positions[state->at(*loc)] + .pos = state->positions[state->at(loc)] }); } @@ -182,7 +183,7 @@ start: expr { state->result = $1; // This parser does not use yynerrs; suppress the warning. - (void) yynerrs; + (void) yynerrs_; }; expr: expr_function; @@ -563,7 +564,8 @@ Expr * parseExprFromBuf( Finally _destroy([&] { yylex_destroy(scanner); }); yy_scan_buffer(text, length, scanner); - yyparse(scanner, &state); + Parser parser(scanner, &state); + parser.parse(); return state.result; }