From e78398554d27be2dc1a2c366571ee011989b5ce0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 1 Nov 2023 14:28:01 -0400 Subject: [PATCH] Handle invalid constants in the constant pool It's possible for us to parse a constant that is invalid in the current encoding. To fix this, we wrap the intern function in an rb_protect to ensure we always put a valid symbol into the AST. --- Makefile | 6 ++++++ src/prism.c | 7 +++++-- templates/ext/prism/api_node.c.erb | 12 +++++++++++- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index dd08ce3b616..4a0fbc659a2 100644 --- a/Makefile +++ b/Makefile @@ -88,3 +88,9 @@ clean: all-no-debug: DEBUG_FLAGS := -DNDEBUG=1 all-no-debug: OPTFLAGS := -O3 all-no-debug: all + +run: Makefile $(STATIC_OBJECTS) $(HEADERS) test.c + $(ECHO) "compiling test.c" + $(Q) $(CC) $(CPPFLAGS) $(CFLAGS) $(STATIC_OBJECTS) test.c + $(ECHO) "running test.c" + $(Q) ./a.out diff --git a/src/prism.c b/src/prism.c index 42d97120634..de32ccae63e 100644 --- a/src/prism.c +++ b/src/prism.c @@ -8633,13 +8633,16 @@ parser_lex(pm_parser_t *parser) { if (!lex_state_beg_p(parser) && match(parser, '=')) { lex_state_set(parser, PM_LEX_STATE_BEG); LEX(PM_TOKEN_PERCENT_EQUAL); - } - else if( + } else if ( lex_state_beg_p(parser) || (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) || lex_state_spcarg_p(parser, space_seen) ) { if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) { + if (*parser->current.end >= 0x80) { + pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT); + } + lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); size_t eol_length = match_eol(parser); diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index 6da58321855..1ffbf6c9f84 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -83,7 +83,17 @@ pm_ast_new(pm_parser_t *parser, pm_node_t *node, rb_encoding *encoding) { for (uint32_t index = 0; index < parser->constant_pool.size; index++) { pm_constant_t *constant = &parser->constant_pool.constants[index]; - constants[index] = rb_intern3((const char *) constant->start, constant->length, encoding); + int state = 0; + + VALUE string = rb_enc_str_new((const char *) constant->start, constant->length, encoding); + ID value = rb_protect(rb_intern_str, string, &state); + + if (state != 0) { + value = rb_intern_const("?"); + rb_set_errinfo(Qnil); + } + + constants[index] = value; } pm_node_stack_node_t *node_stack = NULL;