From 99e81619dee4af4459851238136d23846de5378d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 11:15:37 -0400 Subject: [PATCH 01/10] Create an options struct for passing all of the possible options --- ext/prism/extension.c | 92 +++++++++++---- include/prism.h | 5 +- include/prism/options.h | 161 +++++++++++++++++++++++++++ include/prism/util/pm_newline_list.h | 12 ++ prism.gemspec | 2 + src/options.c | 96 ++++++++++++++++ src/prism.c | 58 ++++++++-- src/util/pm_newline_list.c | 27 +++++ templates/src/serialize.c.erb | 6 +- 9 files changed, 427 insertions(+), 32 deletions(-) create mode 100644 include/prism/options.h create mode 100644 src/options.c diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 42268c85382..ed166a51763 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -61,14 +61,14 @@ input_load_string(pm_string_t *input, VALUE string) { * Dump the AST corresponding to the given input to a string. */ static VALUE -dump_input(pm_string_t *input, const char *filepath) { +dump_input(pm_string_t *input, const pm_options_t *options) { pm_buffer_t buffer; if (!pm_buffer_init(&buffer)) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } pm_parser_t parser; - pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath); + pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); pm_serialize(&parser, node, &buffer); @@ -103,7 +103,11 @@ dump(int argc, VALUE *argv, VALUE self) { pm_string_constant_init(&input, dup, length); #endif - VALUE value = dump_input(&input, check_string(filepath)); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, check_string(filepath)); + + VALUE value = dump_input(&input, &options); + pm_options_free(&options); #ifdef PRISM_DEBUG_MODE_BUILD free(dup); @@ -125,7 +129,12 @@ dump_file(VALUE self, VALUE filepath) { const char *checked = check_string(filepath); if (!pm_string_mapped_init(&input, checked)) return Qnil; - VALUE value = dump_input(&input, checked); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, checked); + + VALUE value = dump_input(&input, &options); + + pm_options_free(&options); pm_string_free(&input); return value; @@ -316,9 +325,9 @@ parse_lex_encoding_changed_callback(pm_parser_t *parser) { * the nodes and tokens. */ static VALUE -parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) { +parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) { pm_parser_t parser; - pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath); + pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback); VALUE offsets = rb_ary_new(); @@ -385,7 +394,13 @@ lex(int argc, VALUE *argv, VALUE self) { pm_string_t input; input_load_string(&input, string); - return parse_lex_input(&input, check_string(filepath), false); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, check_string(filepath)); + + VALUE result = parse_lex_input(&input, &options, false); + pm_options_free(&options); + + return result; } /** @@ -401,7 +416,12 @@ lex_file(VALUE self, VALUE filepath) { const char *checked = check_string(filepath); if (!pm_string_mapped_init(&input, checked)) return Qnil; - VALUE value = parse_lex_input(&input, checked, false); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, checked); + + VALUE value = parse_lex_input(&input, &options, false); + + pm_options_free(&options); pm_string_free(&input); return value; @@ -415,9 +435,9 @@ lex_file(VALUE self, VALUE filepath) { * Parse the given input and return a ParseResult instance. */ static VALUE -parse_input(pm_string_t *input, const char *filepath) { +parse_input(pm_string_t *input, const pm_options_t *options) { pm_parser_t parser; - pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath); + pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); rb_encoding *encoding = rb_enc_find(parser.encoding.name); @@ -462,7 +482,11 @@ parse(int argc, VALUE *argv, VALUE self) { pm_string_constant_init(&input, dup, length); #endif - VALUE value = parse_input(&input, check_string(filepath)); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, check_string(filepath)); + + VALUE value = parse_input(&input, &options); + pm_options_free(&options); #ifdef PRISM_DEBUG_MODE_BUILD free(dup); @@ -484,7 +508,11 @@ parse_file(VALUE self, VALUE filepath) { const char *checked = check_string(filepath); if (!pm_string_mapped_init(&input, checked)) return Qnil; - VALUE value = parse_input(&input, checked); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, checked); + + VALUE value = parse_input(&input, &options); + pm_options_free(&options); pm_string_free(&input); return value; @@ -494,9 +522,9 @@ parse_file(VALUE self, VALUE filepath) { * Parse the given input and return an array of Comment objects. */ static VALUE -parse_input_comments(pm_string_t *input, const char *filepath) { +parse_input_comments(pm_string_t *input, const pm_options_t *options) { pm_parser_t parser; - pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), filepath); + pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); rb_encoding *encoding = rb_enc_find(parser.encoding.name); @@ -525,7 +553,13 @@ parse_comments(int argc, VALUE *argv, VALUE self) { pm_string_t input; input_load_string(&input, string); - return parse_input_comments(&input, check_string(filepath)); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, check_string(filepath)); + + VALUE result = parse_input_comments(&input, &options); + pm_options_free(&options); + + return result; } /** @@ -541,7 +575,12 @@ parse_file_comments(VALUE self, VALUE filepath) { const char *checked = check_string(filepath); if (!pm_string_mapped_init(&input, checked)) return Qnil; - VALUE value = parse_input_comments(&input, checked); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, checked); + + VALUE value = parse_input_comments(&input, &options); + + pm_options_free(&options); pm_string_free(&input); return value; @@ -568,7 +607,12 @@ parse_lex(int argc, VALUE *argv, VALUE self) { pm_string_t input; input_load_string(&input, string); - VALUE value = parse_lex_input(&input, check_string(filepath), true); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, check_string(filepath)); + + VALUE value = parse_lex_input(&input, &options, true); + + pm_options_free(&options); pm_string_free(&input); return value; @@ -593,7 +637,12 @@ parse_lex_file(VALUE self, VALUE filepath) { const char *checked = check_string(filepath); if (!pm_string_mapped_init(&input, checked)) return Qnil; - VALUE value = parse_lex_input(&input, checked, true); + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, checked); + + VALUE value = parse_lex_input(&input, &options, true); + + pm_options_free(&options); pm_string_free(&input); return value; @@ -670,13 +719,16 @@ profile_file(VALUE self, VALUE filepath) { const char *checked = check_string(filepath); if (!pm_string_mapped_init(&input, checked)) return Qnil; + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, checked); + pm_parser_t parser; - pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), checked); + pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options); pm_node_t *node = pm_parse(&parser); pm_node_destroy(&parser, node); pm_parser_free(&parser); - + pm_options_free(&options); pm_string_free(&input); return Qnil; diff --git a/include/prism.h b/include/prism.h index d6b18a44a80..84bd3f84a7f 100644 --- a/include/prism.h +++ b/include/prism.h @@ -15,6 +15,7 @@ #include "prism/ast.h" #include "prism/diagnostic.h" #include "prism/node.h" +#include "prism/options.h" #include "prism/pack.h" #include "prism/parser.h" #include "prism/prettyprint.h" @@ -47,9 +48,9 @@ PRISM_EXPORTED_FUNCTION const char * pm_version(void); * @param parser The parser to initialize. * @param source The source to parse. * @param size The size of the source. - * @param filepath The optional filepath to pass to the parser. + * @param options The optional options to use when parsing. */ -PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const char *filepath); +PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options); /** * Register a callback that will be called whenever prism changes the encoding diff --git a/include/prism/options.h b/include/prism/options.h new file mode 100644 index 00000000000..5abf16a6013 --- /dev/null +++ b/include/prism/options.h @@ -0,0 +1,161 @@ +/** + * @file options.h + * + * The options that can be passed to parsing. + */ +#ifndef PRISM_OPTIONS_H +#define PRISM_OPTIONS_H + +#include "prism/defines.h" +#include "prism/util/pm_string.h" + +#include +#include +#include + +/** + * A scope of locals surrounding the code that is being parsed. + */ +typedef struct pm_options_scope { + /** The number of locals in the scope. */ + size_t locals_count; + + /** The names of the locals in the scope. */ + pm_string_t *locals; +} pm_options_scope_t; + +/** + * The options that can be passed to the parser. + */ +typedef struct { + /** The name of the file that is currently being parsed. */ + const char *filepath; + + /** + * The name of the encoding that the source file is in. Note that this must + * correspond to a name that can be found with Encoding.find in Ruby. + */ + const char *encoding; + + /** + * The line within the file that the parse starts on. This value is + * 0-indexed. + */ + uint32_t line; + + /** + * The number of scopes surrounding the code that is being parsed. + */ + size_t scopes_count; + + /** + * The scopes surrounding the code that is being parsed. For most parses + * this will be NULL, but for evals it will be the locals that are in scope + * surrounding the eval. + */ + pm_options_scope_t *scopes; + + /** Whether or not the frozen string literal option has been set. */ + bool frozen_string_literal; + + /** + * Whether or not we should suppress warnings. This is purposefully negated + * so that the default is to not suppress warnings, which allows us to still + * create an options struct with zeroed memory. + */ + bool suppress_warnings; +} pm_options_t; + +/** + * Set the filepath option on the given options struct. + * + * @param options The options struct to set the filepath on. + * @param filepath The filepath to set. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_filepath_set(pm_options_t *options, const char *filepath); + +/** + * Set the encoding option on the given options struct. + * + * @param options The options struct to set the encoding on. + * @param encoding The encoding to set. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_encoding_set(pm_options_t *options, const char *encoding); + +/** + * Set the line option on the given options struct. + * + * @param options The options struct to set the line on. + * @param line The line to set. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_line_set(pm_options_t *options, uint32_t line); + +/** + * Set the frozen string literal option on the given options struct. + * + * @param options The options struct to set the frozen string literal value on. + * @param frozen_string_literal The frozen string literal value to set. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal); + +/** + * Set the suppress warnings option on the given options struct. + * + * @param options The options struct to set the suppress warnings value on. + * @param suppress_warnings The suppress warnings value to set. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_suppress_warnings_set(pm_options_t *options, bool suppress_warnings); + +/** + * Allocate and zero out the scopes array on the given options struct. + * + * @param options The options struct to initialize the scopes array on. + * @param scopes_count The number of scopes to allocate. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_scopes_init(pm_options_t *options, size_t scopes_count); + +/** + * Return a pointer to the scope at the given index within the given options. + * + * @param options The options struct to get the scope from. + * @param index The index of the scope to get. + * @return A pointer to the scope at the given index. + */ +PRISM_EXPORTED_FUNCTION const pm_options_scope_t * +pm_options_scope_get(const pm_options_t *options, size_t index); + +/** + * Create a new options scope struct. This will hold a set of locals that are in + * scope surrounding the code that is being parsed. + * + * @param scope The scope struct to initialize. + * @param locals_count The number of locals to allocate. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count); + +/** + * Return a pointer to the local at the given index within the given scope. + * + * @param scope The scope struct to get the local from. + * @param index The index of the local to get. + * @return A pointer to the local at the given index. + */ +PRISM_EXPORTED_FUNCTION const pm_string_t * +pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index); + +/** + * Free the internal memory associated with the options. + * + * @param options The options struct whose internal memory should be freed. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_free(pm_options_t *options); + +#endif diff --git a/include/prism/util/pm_newline_list.h b/include/prism/util/pm_newline_list.h index a31051f4e0a..93816b06561 100644 --- a/include/prism/util/pm_newline_list.h +++ b/include/prism/util/pm_newline_list.h @@ -61,6 +61,18 @@ typedef struct { */ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity); +/** + * Set up the newline list such that it believes it is starting on a specific + * line in the source. Basically this entails pushing on pointers to the start + * of the string until we hit the desired line. + * + * @param list The list to set up. + * @param count The number of lines to push onto the list. + * @return True if no reallocation was needed or the reallocation of the offsets + * succeeds (if one was necessary), otherwise false. + */ +bool pm_newline_list_force(pm_newline_list_t *list, size_t count); + /** * Append a new offset to the newline list. Returns true if the reallocation of * the offsets succeeds (if one was necessary), otherwise returns false. diff --git a/prism.gemspec b/prism.gemspec index 16c21cfc140..e402736c724 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -45,6 +45,7 @@ Gem::Specification.new do |spec| "include/prism/diagnostic.h", "include/prism/enc/pm_encoding.h", "include/prism/node.h", + "include/prism/options.h", "include/prism/pack.h", "include/prism/parser.h", "include/prism/prettyprint.h", @@ -106,6 +107,7 @@ Gem::Specification.new do |spec| "src/util/pm_string_list.c", "src/util/pm_strncasecmp.c", "src/util/pm_strpbrk.c", + "src/options.c", "src/prism.c", "prism.gemspec", "sig/prism.rbs", diff --git a/src/options.c b/src/options.c new file mode 100644 index 00000000000..7f45c2026c7 --- /dev/null +++ b/src/options.c @@ -0,0 +1,96 @@ +#include "prism/options.h" + +/** + * Set the filepath option on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_filepath_set(pm_options_t *options, const char *filepath) { + options->filepath = filepath; +} + +/** + * Set the encoding option on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_encoding_set(pm_options_t *options, const char *encoding) { + options->encoding = encoding; +} + +/** + * Set the line option on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_line_set(pm_options_t *options, uint32_t line) { + options->line = line; +} + +/** + * Set the frozen string literal option on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal) { + options->frozen_string_literal = frozen_string_literal; +} + +/** + * Set the suppress warnings option on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_suppress_warnings_set(pm_options_t *options, bool suppress_warnings) { + options->suppress_warnings = suppress_warnings; +} + +/** + * Allocate and zero out the scopes array on the given options struct. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_scopes_init(pm_options_t *options, size_t scopes_count) { + options->scopes_count = scopes_count; + options->scopes = calloc(scopes_count, sizeof(pm_options_scope_t)); + if (options->scopes == NULL) abort(); +} + +/** + * Return a pointer to the scope at the given index within the given options. + */ +PRISM_EXPORTED_FUNCTION const pm_options_scope_t * +pm_options_scope_get(const pm_options_t *options, size_t index) { + return &options->scopes[index]; +} + +/** + * Create a new options scope struct. This will hold a set of locals that are in + * scope surrounding the code that is being parsed. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count) { + scope->locals_count = locals_count; + scope->locals = calloc(locals_count, sizeof(pm_string_t)); + if (scope->locals == NULL) abort(); +} + +/** + * Return a pointer to the local at the given index within the given scope. + */ +PRISM_EXPORTED_FUNCTION const pm_string_t * +pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) { + return &scope->locals[index]; +} + +/** + * Free the internal memory associated with the options. + */ +PRISM_EXPORTED_FUNCTION void +pm_options_free(pm_options_t *options) { + for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { + pm_options_scope_t *scope = &options->scopes[scope_index]; + + for (size_t local_index = 0; local_index < scope->locals_count; local_index++) { + pm_string_free(&scope->locals[local_index]); + } + + free(scope->locals); + } + + free(options->scopes); +} diff --git a/src/prism.c b/src/prism.c index de32ccae63e..fc836a956d7 100644 --- a/src/prism.c +++ b/src/prism.c @@ -16319,14 +16319,9 @@ pm_parser_metadata(pm_parser_t *parser, const char *metadata) { * Initialize a parser with the given start and end pointers. */ PRISM_EXPORTED_FUNCTION void -pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const char *filepath) { +pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) { assert(source != NULL); - // Set filepath to the file that was passed - if (!filepath) filepath = ""; - pm_string_t filepath_string; - pm_string_constant_init(&filepath_string, filepath, strlen(filepath)); - *parser = (pm_parser_t) { .lex_state = PM_LEX_STATE_BEG, .enclosure_nesting = 0, @@ -16356,7 +16351,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch .encoding_decode_callback = NULL, .encoding_comment_start = source, .lex_callback = NULL, - .filepath_string = filepath_string, + .filepath_string = { 0 }, .constant_pool = { 0 }, .newline_list = { 0 }, .integer_base = 0, @@ -16370,8 +16365,6 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch .frozen_string_literal = false }; - pm_accepts_block_stack_push(parser, true); - // Initialize the constant pool. We're going to completely guess as to the // number of constants that we'll need based on the size of the input. The // ratio we chose here is actually less arbitrary than you might think. @@ -16395,6 +16388,53 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch size_t newline_size = size / 22; pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size); + // If options were provided to this parse, establish them here. + if (options != NULL) { + // filepath option + if (options->filepath == NULL) { + pm_string_constant_init(&parser->filepath_string, "", 0); + } else { + pm_string_constant_init(&parser->filepath_string, options->filepath, strlen(options->filepath)); + } + + // line option + if (options->line > 0) { + pm_newline_list_force(&parser->newline_list, options->line); + } + + // encoding option + // if (options->encoding != NULL) {} + + // frozen_string_literal option + if (options->frozen_string_literal) { + parser->frozen_string_literal = true; + } + + // suppress_warnings option + // if (options->suppress_warnings) {} + + // scopes option + for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { + const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index); + pm_parser_scope_push(parser, scope_index == 0); + + for (size_t local_index = 0; local_index < scope->locals_count; local_index++) { + const pm_string_t *local = pm_options_scope_local_get(scope, local_index); + + const uint8_t *source = pm_string_source(local); + size_t length = pm_string_length(local); + + uint8_t *allocated = malloc(length); + if (allocated == NULL) continue; + + memcpy((void *) allocated, source, length); + pm_parser_local_add_owned(parser, allocated, length); + } + } + } + + pm_accepts_block_stack_push(parser, true); + // Skip past the UTF-8 BOM if it exists. if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) { parser->current.end += 3; diff --git a/src/util/pm_newline_list.c b/src/util/pm_newline_list.c index f27bb75b63a..978ebf3d0e1 100644 --- a/src/util/pm_newline_list.c +++ b/src/util/pm_newline_list.c @@ -19,6 +19,33 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac return true; } +/** + * Set up the newline list such that it believes it is starting on a specific + * line in the source. Basically this entails pushing on pointers to the start + * of the string until we hit the desired line. + */ +bool +pm_newline_list_force(pm_newline_list_t *list, size_t count) { + size_t next_capacity = list->capacity == 0 ? 1 : list->capacity; + while (count > next_capacity) { + next_capacity *= 2; + } + + size_t *offsets = list->offsets; + list->offsets = (size_t *) calloc(next_capacity, sizeof(size_t)); + if (list->offsets == NULL) return false; + + if (offsets != NULL) { + memcpy(list->offsets, offsets, list->size * sizeof(size_t)); + free(offsets); + } + + memset(list->offsets + list->size, 0, count * sizeof(size_t)); + list->size += count; + + return true; +} + /** * Append a new offset to the newline list. Returns true if the reallocation of * the offsets succeeds (if one was necessary), otherwise returns false. diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 5e166f1eef2..db10fab6de7 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -288,8 +288,11 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) { */ PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer) { + pm_options_t options = { 0 }; + pm_options_filepath_set(&options, filepath); + pm_parser_t parser; - pm_parser_init(&parser, source, size, filepath); + pm_parser_init(&parser, source, size, &options); pm_lex_callback_t lex_callback = (pm_lex_callback_t) { .data = (void *) buffer, @@ -310,6 +313,7 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu pm_node_destroy(&parser, node); pm_parser_free(&parser); + pm_options_free(&options); } /** From 8582d372a34f98be6fe8b4858ef6fd523fa59c56 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 12:48:19 -0400 Subject: [PATCH 02/10] Wire up options through the Ruby API --- bin/lex | 2 +- ext/prism/extension.c | 368 +++++++++++++++++++++++++++--------- lib/prism/lex_compat.rb | 2 +- test/prism/parse_test.rb | 10 +- test/prism/ruby_api_test.rb | 8 +- 5 files changed, 289 insertions(+), 101 deletions(-) diff --git a/bin/lex b/bin/lex index 970efbe94f0..afff791c8d9 100755 --- a/bin/lex +++ b/bin/lex @@ -30,7 +30,7 @@ ripper = end prism = Prism.lex_compat(source, filepath) -prism_new = Prism.lex(source, filepath) +prism_new = Prism.lex(source, filepath: filepath) if prism.errors.any? puts "Errors lexing:" prism.errors.map do |error| diff --git a/ext/prism/extension.c b/ext/prism/extension.c index ed166a51763..679d83c1beb 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -15,6 +15,13 @@ VALUE rb_cPrismParseError; VALUE rb_cPrismParseWarning; VALUE rb_cPrismParseResult; +ID id_filepath; +ID id_encoding; +ID id_line; +ID id_frozen_string_literal; +ID id_suppress_warnings; +ID id_scopes; + /******************************************************************************/ /* IO of Ruby code */ /******************************************************************************/ @@ -53,6 +60,133 @@ input_load_string(pm_string_t *input, VALUE string) { pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string)); } +/******************************************************************************/ +/* Building C options from Ruby options */ +/******************************************************************************/ + +/** + * Build the scopes associated with the provided Ruby keyword value. + */ +static void +build_options_scopes(pm_options_t *options, VALUE scopes) { + // Check if the value is an array. If it's not, then raise a type error. + if (!RB_TYPE_P(scopes, T_ARRAY)) { + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes)); + } + + // Initialize the scopes array. + size_t scopes_count = RARRAY_LEN(scopes); + pm_options_scopes_init(options, scopes_count); + + // Iterate over the scopes and add them to the options. + for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) { + VALUE scope = rb_ary_entry(scopes, scope_index); + + // Check that the scope is an array. If it's not, then raise a type + // error. + if (!RB_TYPE_P(scope, T_ARRAY)) { + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope)); + } + + // Initialize the scope array. + size_t locals_count = RARRAY_LEN(scope); + pm_options_scope_t *options_scope = &options->scopes[scope_index]; + pm_options_scope_init(options_scope, locals_count); + + // Iterate over the locals and add them to the scope. + for (size_t local_index = 0; local_index < locals_count; local_index++) { + VALUE local = rb_ary_entry(scope, local_index); + + // Check that the local is a symbol. If it's not, then raise a + // type error. + if (!RB_TYPE_P(local, T_SYMBOL)) { + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local)); + } + + // Add the local to the scope. + pm_string_t *scope_local = &options_scope->locals[local_index]; + const char *name = rb_id2name(SYM2ID(local)); + pm_string_constant_init(scope_local, name, strlen(name)); + } + } +} + +/** + * An iterator function that is called for each key-value in the keywords hash. + */ +static int +build_options_i(VALUE key, VALUE value, VALUE argument) { + pm_options_t *options = (pm_options_t *) argument; + ID key_id = SYM2ID(key); + + if (key_id == id_filepath) { + if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value)); + } else if (key_id == id_encoding) { + if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value))); + } else if (key_id == id_line) { + if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value)); + } else if (key_id == id_frozen_string_literal) { + if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue); + } else if (key_id == id_suppress_warnings) { + if (!NIL_P(value)) pm_options_suppress_warnings_set(options, value == Qtrue); + } else if (key_id == id_scopes) { + if (!NIL_P(value)) build_options_scopes(options, value); + } else { + rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key); + } + + return ST_CONTINUE; +} + +/** + * We need a struct here to pass through rb_protect and it has to be a single + * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this + * through as an opaque pointer and cast it on both sides. + */ +struct build_options_data { + pm_options_t *options; + VALUE keywords; +}; + +/** + * Build the set of options from the given keywords. Note that this can raise a + * Ruby error if the options are not valid. + */ +static VALUE +build_options(VALUE argument) { + struct build_options_data *data = (struct build_options_data *) argument; + rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options); + return Qnil; +} + +/** + * Extract the options from the given keyword arguments. + */ +static void +extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { + if (!NIL_P(keywords)) { + struct build_options_data data = { .options = options, .keywords = keywords }; + struct build_options_data *argument = &data; + + int state = 0; + rb_protect(build_options, (VALUE) argument, &state); + + if (state != 0) { + pm_options_free(options); + rb_jump_tag(state); + } + } + + if (!NIL_P(filepath)) { + if (!RB_TYPE_P(filepath, T_STRING)) { + pm_options_free(options); + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath)); + } + + pm_options_filepath_set(options, RSTRING_PTR(filepath)); + } +} + /******************************************************************************/ /* Serializing the AST */ /******************************************************************************/ @@ -83,15 +217,19 @@ dump_input(pm_string_t *input, const pm_options_t *options) { /** * call-seq: - * Prism::dump(source, filepath = nil) -> dumped + * Prism::dump(source, **options) -> String * - * Dump the AST corresponding to the given string to a string. + * Dump the AST corresponding to the given string to a string. For supported + * options, see Prism::parse. */ static VALUE dump(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); @@ -103,9 +241,6 @@ dump(int argc, VALUE *argv, VALUE self) { pm_string_constant_init(&input, dup, length); #endif - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE value = dump_input(&input, &options); pm_options_free(&options); @@ -118,24 +253,29 @@ dump(int argc, VALUE *argv, VALUE self) { /** * call-seq: - * Prism::dump_file(filepath) -> dumped + * Prism::dump_file(filepath, **options) -> String * - * Dump the AST corresponding to the given file to a string. + * Dump the AST corresponding to the given file to a string. For supported + * options, see Prism::parse. */ static VALUE -dump_file(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +dump_file(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); - VALUE value = dump_input(&input, &options); + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } - pm_options_free(&options); + VALUE value = dump_input(&input, &options); pm_string_free(&input); + pm_options_free(&options); return value; } @@ -381,22 +521,23 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod /** * call-seq: - * Prism::lex(source, filepath = nil) -> Array + * Prism::lex(source, **options) -> Array * - * Return an array of Token instances corresponding to the given string. + * Return an array of Token instances corresponding to the given string. For + * supported options, see Prism::parse. */ static VALUE lex(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE result = parse_lex_input(&input, &options, false); pm_options_free(&options); @@ -405,24 +546,29 @@ lex(int argc, VALUE *argv, VALUE self) { /** * call-seq: - * Prism::lex_file(filepath) -> Array + * Prism::lex_file(filepath, **options) -> Array * - * Return an array of Token instances corresponding to the given file. + * Return an array of Token instances corresponding to the given file. For + * supported options, see Prism::parse. */ static VALUE -lex_file(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +lex_file(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); - VALUE value = parse_lex_input(&input, &options, false); + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } - pm_options_free(&options); + VALUE value = parse_lex_input(&input, &options, false); pm_string_free(&input); + pm_options_free(&options); return value; } @@ -462,15 +608,32 @@ parse_input(pm_string_t *input, const pm_options_t *options) { /** * call-seq: - * Prism::parse(source, filepath = nil) -> ParseResult + * Prism::parse(source, **options) -> ParseResult + * + * Parse the given string and return a ParseResult instance. The options that + * are supported are: * - * Parse the given string and return a ParseResult instance. + * * `filepath` - the filepath of the source being parsed. This should be a + * string or nil + * * `encoding` - the encoding of the source being parsed. This should be an + * encoding or nil + * * `line` - the line number that the parse starts on. This should be an + * integer or nil. Note that this is 1-indexed. + * * `frozen_string_literal` - whether or not the frozen string literal pragma + * has been set. This should be a boolean or nil. + * * `suppress_warnings` - whether or not warnings should be suppressed. This + * should be a boolean or nil. + * * `scopes` - the locals that are in scope surrounding the code that is being + * parsed. This should be an array of arrays of symbols or nil. */ static VALUE parse(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); @@ -482,38 +645,41 @@ parse(int argc, VALUE *argv, VALUE self) { pm_string_constant_init(&input, dup, length); #endif - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE value = parse_input(&input, &options); - pm_options_free(&options); #ifdef PRISM_DEBUG_MODE_BUILD free(dup); #endif + pm_options_free(&options); return value; } /** * call-seq: - * Prism::parse_file(filepath) -> ParseResult + * Prism::parse_file(filepath, **options) -> ParseResult * - * Parse the given file and return a ParseResult instance. + * Parse the given file and return a ParseResult instance. For supported + * options, see Prism::parse. */ static VALUE -parse_file(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +parse_file(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); + + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } VALUE value = parse_input(&input, &options); - pm_options_free(&options); pm_string_free(&input); + pm_options_free(&options); return value; } @@ -540,22 +706,23 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { /** * call-seq: - * Prism::parse_comments(source, filepath = nil) -> Array + * Prism::parse_comments(source, **options) -> Array * - * Parse the given string and return an array of Comment objects. + * Parse the given string and return an array of Comment objects. For supported + * options, see Prism::parse. */ static VALUE parse_comments(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE result = parse_input_comments(&input, &options); pm_options_free(&options); @@ -564,31 +731,36 @@ parse_comments(int argc, VALUE *argv, VALUE self) { /** * call-seq: - * Prism::parse_file_comments(filepath) -> Array + * Prism::parse_file_comments(filepath, **options) -> Array * - * Parse the given file and return an array of Comment objects. + * Parse the given file and return an array of Comment objects. For supported + * options, see Prism::parse. */ static VALUE -parse_file_comments(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +parse_file_comments(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); - VALUE value = parse_input_comments(&input, &options); + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } - pm_options_free(&options); + VALUE value = parse_input_comments(&input, &options); pm_string_free(&input); + pm_options_free(&options); return value; } /** * call-seq: - * Prism::parse_lex(source, filepath = nil) -> ParseResult + * Prism::parse_lex(source, **options) -> ParseResult * * Parse the given string and return a ParseResult instance that contains a * 2-element array, where the first element is the AST and the second element is @@ -597,30 +769,31 @@ parse_file_comments(VALUE self, VALUE filepath) { * This API is only meant to be used in the case where you need both the AST and * the tokens. If you only need one or the other, use either Prism::parse or * Prism::lex. + * + * For supported options, see Prism::parse. */ static VALUE parse_lex(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE value = parse_lex_input(&input, &options, true); - - pm_options_free(&options); pm_string_free(&input); + pm_options_free(&options); return value; } /** * call-seq: - * Prism::parse_lex_file(filepath) -> ParseResult + * Prism::parse_lex_file(filepath, **options) -> ParseResult * * Parse the given file and return a ParseResult instance that contains a * 2-element array, where the first element is the AST and the second element is @@ -629,21 +802,27 @@ parse_lex(int argc, VALUE *argv, VALUE self) { * This API is only meant to be used in the case where you need both the AST and * the tokens. If you only need one or the other, use either Prism::parse_file * or Prism::lex_file. + * + * For supported options, see Prism::parse. */ static VALUE -parse_lex_file(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +parse_lex_file(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); - VALUE value = parse_lex_input(&input, &options, true); + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } - pm_options_free(&options); + VALUE value = parse_lex_input(&input, &options, true); pm_string_free(&input); + pm_options_free(&options); return value; } @@ -821,6 +1000,15 @@ Init_prism(void) { rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject); rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject); + // Intern all of the options that we support so that we don't have to do it + // every time we parse. + id_filepath = rb_intern_const("filepath"); + id_encoding = rb_intern_const("encoding"); + id_line = rb_intern_const("line"); + id_frozen_string_literal = rb_intern_const("frozen_string_literal"); + id_suppress_warnings = rb_intern_const("suppress_warnings"); + id_scopes = rb_intern_const("scopes"); + /** * The version of the prism library. */ @@ -835,15 +1023,15 @@ Init_prism(void) { // First, the functions that have to do with lexing and parsing. rb_define_singleton_method(rb_cPrism, "dump", dump, -1); - rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1); + rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1); rb_define_singleton_method(rb_cPrism, "lex", lex, -1); - rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1); + rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1); rb_define_singleton_method(rb_cPrism, "parse", parse, -1); - rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1); + rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1); rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1); - rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, 1); + rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1); rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1); - rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1); + rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1); // Next, the functions that will be called by the parser to perform various // internal tasks. We expose these to make them easier to test. diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 7556576f30d..c1f5cfe944e 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -607,7 +607,7 @@ def result state = :default heredoc_stack = [[]] - result = Prism.lex(source, @filepath) + result = Prism.lex(source, filepath: @filepath) result_value = result.value previous_state = nil diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb index eada2952dff..6bd7a5d2a1f 100644 --- a/test/prism/parse_test.rb +++ b/test/prism/parse_test.rb @@ -41,7 +41,7 @@ def test_empty_string def test_parse_takes_file_path filepath = "filepath.rb" - result = Prism.parse("def foo; __FILE__; end", filepath) + result = Prism.parse("def foo; __FILE__; end", filepath: filepath) assert_equal filepath, find_source_file_node(result.value).filepath end @@ -122,7 +122,7 @@ def test_parse_lex_file end # Next, assert that there were no errors during parsing. - result = Prism.parse(source, relative) + result = Prism.parse(source, filepath: relative) assert_empty result.errors # Next, pretty print the source. @@ -149,7 +149,7 @@ def test_parse_lex_file # Next, assert that the value can be serialized and deserialized without # changing the shape of the tree. - assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, relative)).value) + assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value) # Next, check that the location ranges of each node in the tree are a # superset of their respective child nodes. @@ -203,10 +203,10 @@ def test_parse_lex_file file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet| snippet = snippet.rstrip - result = Prism.parse(snippet, relative) + result = Prism.parse(snippet, filepath: relative) assert_empty result.errors - assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, relative)).value) + assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value) end end end diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb index a6ce976a85e..efe8bc1c1b6 100644 --- a/test/prism/ruby_api_test.rb +++ b/test/prism/ruby_api_test.rb @@ -8,12 +8,12 @@ def test_ruby_api filepath = __FILE__ source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8) - assert_equal Prism.lex(source, filepath).value, Prism.lex_file(filepath).value - assert_equal Prism.dump(source, filepath), Prism.dump_file(filepath) + assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value + assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath) - serialized = Prism.dump(source, filepath) + serialized = Prism.dump(source, filepath: filepath) ast1 = Prism.load(source, serialized).value - ast2 = Prism.parse(source, filepath).value + ast2 = Prism.parse(source, filepath: filepath).value ast3 = Prism.parse_file(filepath).value assert_equal_nodes ast1, ast2 From f0aa8ad93b9991c8eba87f672b4f84ad0cdaa403 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 14:01:20 -0400 Subject: [PATCH 03/10] Wire up options through the FFI API --- bin/lex | 2 +- ext/prism/extension.c | 35 +---- include/prism.h | 23 +--- include/prism/options.h | 109 +++++++++++----- lib/prism.rb | 8 +- lib/prism/debug.rb | 8 -- lib/prism/ffi.rb | 198 ++++++++++++++++++----------- lib/prism/lex_compat.rb | 8 +- src/options.c | 78 +++++++++++- src/prism.c | 24 ++-- templates/src/serialize.c.erb | 13 +- test/prism/parse_serialize_test.rb | 38 ------ test/prism/ruby_api_test.rb | 13 ++ 13 files changed, 329 insertions(+), 228 deletions(-) delete mode 100644 test/prism/parse_serialize_test.rb diff --git a/bin/lex b/bin/lex index afff791c8d9..2c764a9c8f7 100755 --- a/bin/lex +++ b/bin/lex @@ -29,7 +29,7 @@ ripper = [] end -prism = Prism.lex_compat(source, filepath) +prism = Prism.lex_compat(source, filepath: filepath) prism_new = Prism.lex(source, filepath: filepath) if prism.errors.any? puts "Errors lexing:" diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 679d83c1beb..86221a7bc5b 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -268,7 +268,7 @@ dump_file(int argc, VALUE *argv, VALUE self) { extract_options(&options, filepath, keywords); pm_string_t input; - if (!pm_string_mapped_init(&input, options.filepath)) { + if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { pm_options_free(&options); return Qnil; } @@ -561,7 +561,7 @@ lex_file(int argc, VALUE *argv, VALUE self) { extract_options(&options, filepath, keywords); pm_string_t input; - if (!pm_string_mapped_init(&input, options.filepath)) { + if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { pm_options_free(&options); return Qnil; } @@ -672,7 +672,7 @@ parse_file(int argc, VALUE *argv, VALUE self) { extract_options(&options, filepath, keywords); pm_string_t input; - if (!pm_string_mapped_init(&input, options.filepath)) { + if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { pm_options_free(&options); return Qnil; } @@ -746,7 +746,7 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) { extract_options(&options, filepath, keywords); pm_string_t input; - if (!pm_string_mapped_init(&input, options.filepath)) { + if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { pm_options_free(&options); return Qnil; } @@ -815,7 +815,7 @@ parse_lex_file(int argc, VALUE *argv, VALUE self) { extract_options(&options, filepath, keywords); pm_string_t input; - if (!pm_string_mapped_init(&input, options.filepath)) { + if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { pm_options_free(&options); return Qnil; } @@ -913,30 +913,6 @@ profile_file(VALUE self, VALUE filepath) { return Qnil; } -/** - * call-seq: - * Debug::parse_serialize_file_metadata(filepath, metadata) -> dumped - * - * Parse the file and serialize the result. This is mostly used to test this - * path since it is used by client libraries. - */ -static VALUE -parse_serialize_file_metadata(VALUE self, VALUE filepath, VALUE metadata) { - pm_string_t input; - pm_buffer_t buffer; - pm_buffer_init(&buffer); - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; - - pm_parse_serialize(pm_string_source(&input), pm_string_length(&input), &buffer, check_string(metadata)); - VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer)); - - pm_string_free(&input); - pm_buffer_free(&buffer); - return result; -} - /** * call-seq: * Debug::inspect_node(source) -> inspected @@ -1039,7 +1015,6 @@ Init_prism(void) { rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1); rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1); rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1); - rb_define_singleton_method(rb_cPrismDebug, "parse_serialize_file_metadata", parse_serialize_file_metadata, 2); rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1); // Next, initialize the other APIs. diff --git a/include/prism.h b/include/prism.h index 84bd3f84a7f..e2c5d064180 100644 --- a/include/prism.h +++ b/include/prism.h @@ -123,30 +123,15 @@ void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buf */ PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); -/** - * Process any additional metadata being passed into a call to the parser via - * the pm_parse_serialize function. Since the source of these calls will be from - * Ruby implementation internals we assume it is from a trusted source. - * - * Currently, this is only passing in variable scoping surrounding an eval, but - * eventually it will be extended to hold any additional metadata. This data - * is serialized to reduce the calling complexity for a foreign function call - * vs a foreign runtime making a bindable in-memory version of a C structure. - * - * @param parser The parser to process the metadata for. - * @param metadata The metadata to process. - */ -void pm_parser_metadata(pm_parser_t *parser, const char *metadata); - /** * Parse the given source to the AST and serialize the AST to the given buffer. * * @param source The source to parse. * @param size The size of the source. * @param buffer The buffer to serialize to. - * @param metadata The optional metadata to pass to the parser. + * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); +PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data); /** * Parse and serialize the comments in the given source to the given buffer. @@ -154,9 +139,9 @@ PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t si * @param source The source to parse. * @param size The size of the source. * @param buffer The buffer to serialize to. - * @param metadata The optional metadata to pass to the parser. + * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); +PRISM_EXPORTED_FUNCTION void pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data); /** * Lex the given source and serialize to the given buffer. diff --git a/include/prism/options.h b/include/prism/options.h index 5abf16a6013..6faadc2c47a 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -29,13 +29,7 @@ typedef struct pm_options_scope { */ typedef struct { /** The name of the file that is currently being parsed. */ - const char *filepath; - - /** - * The name of the encoding that the source file is in. Note that this must - * correspond to a name that can be found with Encoding.find in Ruby. - */ - const char *encoding; + pm_string_t filepath; /** * The line within the file that the parse starts on. This value is @@ -43,6 +37,12 @@ typedef struct { */ uint32_t line; + /** + * The name of the encoding that the source file is in. Note that this must + * correspond to a name that can be found with Encoding.find in Ruby. + */ + pm_string_t encoding; + /** * The number of scopes surrounding the code that is being parsed. */ @@ -72,26 +72,23 @@ typedef struct { * @param options The options struct to set the filepath on. * @param filepath The filepath to set. */ -PRISM_EXPORTED_FUNCTION void -pm_options_filepath_set(pm_options_t *options, const char *filepath); +PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath); /** - * Set the encoding option on the given options struct. + * Set the line option on the given options struct. * - * @param options The options struct to set the encoding on. - * @param encoding The encoding to set. + * @param options The options struct to set the line on. + * @param line The line to set. */ -PRISM_EXPORTED_FUNCTION void -pm_options_encoding_set(pm_options_t *options, const char *encoding); +PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, uint32_t line); /** - * Set the line option on the given options struct. + * Set the encoding option on the given options struct. * - * @param options The options struct to set the line on. - * @param line The line to set. + * @param options The options struct to set the encoding on. + * @param encoding The encoding to set. */ -PRISM_EXPORTED_FUNCTION void -pm_options_line_set(pm_options_t *options, uint32_t line); +PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding); /** * Set the frozen string literal option on the given options struct. @@ -99,8 +96,7 @@ pm_options_line_set(pm_options_t *options, uint32_t line); * @param options The options struct to set the frozen string literal value on. * @param frozen_string_literal The frozen string literal value to set. */ -PRISM_EXPORTED_FUNCTION void -pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal); +PRISM_EXPORTED_FUNCTION void pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_literal); /** * Set the suppress warnings option on the given options struct. @@ -108,8 +104,7 @@ pm_options_frozen_string_literal_set(pm_options_t *options, bool frozen_string_l * @param options The options struct to set the suppress warnings value on. * @param suppress_warnings The suppress warnings value to set. */ -PRISM_EXPORTED_FUNCTION void -pm_options_suppress_warnings_set(pm_options_t *options, bool suppress_warnings); +PRISM_EXPORTED_FUNCTION void pm_options_suppress_warnings_set(pm_options_t *options, bool suppress_warnings); /** * Allocate and zero out the scopes array on the given options struct. @@ -117,8 +112,7 @@ pm_options_suppress_warnings_set(pm_options_t *options, bool suppress_warnings); * @param options The options struct to initialize the scopes array on. * @param scopes_count The number of scopes to allocate. */ -PRISM_EXPORTED_FUNCTION void -pm_options_scopes_init(pm_options_t *options, size_t scopes_count); +PRISM_EXPORTED_FUNCTION void pm_options_scopes_init(pm_options_t *options, size_t scopes_count); /** * Return a pointer to the scope at the given index within the given options. @@ -127,8 +121,7 @@ pm_options_scopes_init(pm_options_t *options, size_t scopes_count); * @param index The index of the scope to get. * @return A pointer to the scope at the given index. */ -PRISM_EXPORTED_FUNCTION const pm_options_scope_t * -pm_options_scope_get(const pm_options_t *options, size_t index); +PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index); /** * Create a new options scope struct. This will hold a set of locals that are in @@ -137,8 +130,7 @@ pm_options_scope_get(const pm_options_t *options, size_t index); * @param scope The scope struct to initialize. * @param locals_count The number of locals to allocate. */ -PRISM_EXPORTED_FUNCTION void -pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count); +PRISM_EXPORTED_FUNCTION void pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count); /** * Return a pointer to the local at the given index within the given scope. @@ -147,15 +139,66 @@ pm_options_scope_init(pm_options_scope_t *scope, size_t locals_count); * @param index The index of the local to get. * @return A pointer to the local at the given index. */ -PRISM_EXPORTED_FUNCTION const pm_string_t * -pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index); +PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index); /** * Free the internal memory associated with the options. * * @param options The options struct whose internal memory should be freed. */ -PRISM_EXPORTED_FUNCTION void -pm_options_free(pm_options_t *options); +PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); + +/** + * Deserialize an options struct from the given binary string. This is used to + * pass options to the parser from an FFI call so that consumers of the library + * from an FFI perspective don't have to worry about the structure of our + * options structs. Since the source of these calls will be from Ruby + * implementation internals we assume it is from a trusted source. + * + * `data` is assumed to be a valid pointer pointing to well-formed data. The + * layout of this data should be the same every time, and is described below: + * + * | # bytes | field | + * | ------- | -------------------------- | + * | 4 | the length of the filepath | + * | ... | the filepath bytes | + * | 4 | the line number | + * | 4 | the length the encoding | + * | ... | the encoding bytes | + * | 1 | frozen string literal | + * | 1 | suppress warnings | + * | 4 | the number of scopes | + * | ... | the scopes | + * + * Each scope is layed out as follows: + * + * | # bytes | field | + * | ------- | -------------------------- | + * | 4 | the number of locals | + * | ... | the locals | + * + * Each local is layed out as follows: + * + * | # bytes | field | + * | ------- | -------------------------- | + * | 4 | the length of the local | + * | ... | the local bytes | + * + * Some additional things to note about this layout: + * + * * The filepath can have a length of 0, in which case we'll consider it an + * empty string. + * * The line number should be 0-indexed. + * * The encoding can have a length of 0, in which case we'll use the default + * encoding (UTF-8). If it's not 0, it should correspond to a name of an + * encoding that can be passed to `Encoding.find` in Ruby. + * * The frozen string literal and suppress warnings fields are booleans, so + * their values should be either 0 or 1. + * * The number of scopes can be 0. + * + * @param options The options struct to deserialize into. + * @param data The binary string to deserialize from. + */ +void pm_options_read(pm_options_t *options, const char *data); #endif diff --git a/lib/prism.rb b/lib/prism.rb index 86cdadcdadf..350febcaa88 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -35,13 +35,15 @@ module Prism private_constant :LexRipper # :call-seq: - # Prism::lex_compat(source, filepath = "") -> Array + # Prism::lex_compat(source, **options) -> Array # # Returns an array of tokens that closely resembles that of the Ripper lexer. # The only difference is that since we don't keep track of lexer state in the # same way, it's going to always return the NONE state. - def self.lex_compat(source, filepath = "") - LexCompat.new(source, filepath).result + # + # For supported options, see Prism::parse. + def self.lex_compat(source, **options) + LexCompat.new(source, **options).result end # :call-seq: diff --git a/lib/prism/debug.rb b/lib/prism/debug.rb index f573d0958d6..f5d19dc3df7 100644 --- a/lib/prism/debug.rb +++ b/lib/prism/debug.rb @@ -187,13 +187,5 @@ def self.prism_locals(source) def self.newlines(source) Prism.parse(source).source.offsets end - - # :call-seq: - # Debug::parse_serialize_file(filepath) -> dumped - # - # For the given file, parse the AST and dump it to a string. - def self.parse_serialize_file(filepath) - parse_serialize_file_metadata(filepath, [filepath.bytesize, filepath.b, 0].pack("LA*L")) - end end end diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 170ca8b8d73..61ece6a6413 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -167,15 +167,6 @@ def self.with(filepath, &block) end end end - - # Dump the given source into a serialized format. - def self.dump_internal(source, source_size, filepath) - PrismBuffer.with do |buffer| - metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath - pm_parse_serialize(source, source_size, buffer.pointer, metadata) - buffer.read - end - end end # Mark the LibRubyParser module as private as it should only be called through @@ -185,93 +176,152 @@ def self.dump_internal(source, source_size, filepath) # The version constant is set by reading the result of calling pm_version. VERSION = LibRubyParser.pm_version.read_string - # Mirror the Prism.dump API by using the serialization API. - def self.dump(code, filepath = nil) - LibRubyParser.dump_internal(code, code.bytesize, filepath) - end + class << self + # Mirror the Prism.dump API by using the serialization API. + def dump(code, **options) + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_parse_serialize(code, code.bytesize, buffer.pointer, dump_options(options)) + buffer.read + end + end - # Mirror the Prism.dump_file API by using the serialization API. - def self.dump_file(filepath) - LibRubyParser::PrismString.with(filepath) do |string| - LibRubyParser.dump_internal(string.source, string.length, filepath) + # Mirror the Prism.dump_file API by using the serialization API. + def dump_file(filepath, **options) + LibRubyParser::PrismString.with(filepath) do |string| + dump(string.read, **options, filepath: filepath) + end end - end - # Mirror the Prism.lex API by using the serialization API. - def self.lex(code, filepath = nil) - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_lex_serialize(code, code.bytesize, filepath, buffer.pointer) - Serialize.load_tokens(Source.new(code), buffer.read) + # Mirror the Prism.lex API by using the serialization API. + def lex(code, **options) + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_lex_serialize(code, code.bytesize, dump_options(options), buffer.pointer) + Serialize.load_tokens(Source.new(code), buffer.read) + end end - end - # Mirror the Prism.lex_file API by using the serialization API. - def self.lex_file(filepath) - LibRubyParser::PrismString.with(filepath) do |string| - lex(string.read, filepath) + # Mirror the Prism.lex_file API by using the serialization API. + def lex_file(filepath, **options) + LibRubyParser::PrismString.with(filepath) do |string| + lex(string.read, **options, filepath: filepath) + end end - end - # Mirror the Prism.parse API by using the serialization API. - def self.parse(code, filepath = nil) - Prism.load(code, dump(code, filepath)) - end + # Mirror the Prism.parse API by using the serialization API. + def parse(code, **options) + Prism.load(code, dump(code, **options)) + end - # Mirror the Prism.parse_file API by using the serialization API. This uses - # native strings instead of Ruby strings because it allows us to use mmap when - # it is available. - def self.parse_file(filepath) - LibRubyParser::PrismString.with(filepath) do |string| - parse(string.read, filepath) + # Mirror the Prism.parse_file API by using the serialization API. This uses + # native strings instead of Ruby strings because it allows us to use mmap when + # it is available. + def parse_file(filepath, **options) + LibRubyParser::PrismString.with(filepath) do |string| + parse(string.read, **options, filepath: filepath) + end end - end - # Mirror the Prism.parse_comments API by using the serialization API. - def self.parse_comments(code, filepath = nil) - LibRubyParser::PrismBuffer.with do |buffer| - metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath - LibRubyParser.pm_parse_serialize_comments(code, code.bytesize, buffer.pointer, metadata) + # Mirror the Prism.parse_comments API by using the serialization API. + def parse_comments(code, **options) + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_parse_serialize_comments(code, code.bytesize, buffer.pointer, dump_options(options)) - source = Source.new(code) - loader = Serialize::Loader.new(source, buffer.read) + source = Source.new(code) + loader = Serialize::Loader.new(source, buffer.read) - loader.load_header - loader.load_force_encoding - loader.load_comments + loader.load_header + loader.load_force_encoding + loader.load_comments + end end - end - # Mirror the Prism.parse_file_comments API by using the serialization - # API. This uses native strings instead of Ruby strings because it allows us - # to use mmap when it is available. - def self.parse_file_comments(filepath) - LibRubyParser::PrismString.with(filepath) do |string| - parse_comments(string.read, filepath) + # Mirror the Prism.parse_file_comments API by using the serialization + # API. This uses native strings instead of Ruby strings because it allows us + # to use mmap when it is available. + def parse_file_comments(filepath, **options) + LibRubyParser::PrismString.with(filepath) do |string| + parse_comments(string.read, **options, filepath: filepath) + end end - end - # Mirror the Prism.parse_lex API by using the serialization API. - def self.parse_lex(code, filepath = nil) - LibRubyParser::PrismBuffer.with do |buffer| - metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath - LibRubyParser.pm_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata) + # Mirror the Prism.parse_lex API by using the serialization API. + def parse_lex(code, **options) + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_parse_lex_serialize(code, code.bytesize, buffer.pointer, dump_options(options)) - source = Source.new(code) - loader = Serialize::Loader.new(source, buffer.read) + source = Source.new(code) + loader = Serialize::Loader.new(source, buffer.read) - tokens = loader.load_tokens - node, comments, magic_comments, errors, warnings = loader.load_nodes + tokens = loader.load_tokens + node, comments, magic_comments, errors, warnings = loader.load_nodes - tokens.each { |token,| token.value.force_encoding(loader.encoding) } + tokens.each { |token,| token.value.force_encoding(loader.encoding) } + ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source) + end + end - ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source) + # Mirror the Prism.parse_lex_file API by using the serialization API. + def parse_lex_file(filepath, **options) + LibRubyParser::PrismString.with(filepath) do |string| + parse_lex(string.read, **options, filepath: filepath) + end end - end - # Mirror the Prism.parse_lex_file API by using the serialization API. - def self.parse_lex_file(filepath) - LibRubyParser::PrismString.with(filepath) do |string| - parse_lex(string.read, filepath) + private + + # Convert the given options into a serialized options string. + def dump_options(options) + template = +"" + values = [] + + template << "L" + if (filepath = options[:filepath]) + values.push(filepath.bytesize, filepath.b) + template << "A*" + else + values << 0 + end + + template << "L" + values << options.fetch(:line, 0) + + template << "L" + if (encoding = options[:encoding]) + name = encoding.name + values.push(name.bytesize, name.b) + template << "A*" + else + values << 0 + end + + template << "C" + values << (options.fetch(:frozen_string_literal, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:suppress_warnings, false) ? 1 : 0) + + template << "L" + if (scopes = options[:scopes]) + values << scopes.length + + scopes.each do |scope| + template << "L" + values << scope.length + + scope.each do |local| + name = local.name + template << "L" + values << name.bytesize + + template << "A*" + values << name.b + end + end + else + values << 0 + end + + values.pack(template) end end end diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index c1f5cfe944e..b6d12053a0f 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -594,11 +594,11 @@ def self.build(opening) private_constant :Heredoc - attr_reader :source, :filepath + attr_reader :source, :options - def initialize(source, filepath = "") + def initialize(source, **options) @source = source - @filepath = filepath || "" + @options = options end def result @@ -607,7 +607,7 @@ def result state = :default heredoc_stack = [[]] - result = Prism.lex(source, filepath: @filepath) + result = Prism.lex(source, **options) result_value = result.value previous_state = nil diff --git a/src/options.c b/src/options.c index 7f45c2026c7..84c1fcbb397 100644 --- a/src/options.c +++ b/src/options.c @@ -5,7 +5,7 @@ */ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, const char *filepath) { - options->filepath = filepath; + pm_string_constant_init(&options->filepath, filepath, strlen(filepath)); } /** @@ -13,7 +13,7 @@ pm_options_filepath_set(pm_options_t *options, const char *filepath) { */ PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding) { - options->encoding = encoding; + pm_string_constant_init(&options->encoding, encoding, strlen(encoding)); } /** @@ -82,6 +82,9 @@ pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index) { */ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options) { + pm_string_free(&options->filepath); + pm_string_free(&options->encoding); + for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { pm_options_scope_t *scope = &options->scopes[scope_index]; @@ -94,3 +97,74 @@ pm_options_free(pm_options_t *options) { free(options->scopes); } + +/** + * Read a 32-bit unsigned integer from a pointer. This function is used to read + * the options that are passed into the parser from the Ruby implementation. It + * handles aligned and unaligned reads. + */ +static uint32_t +pm_options_read_u32(const char *data) { + if (((uintptr_t) data) % sizeof(uint32_t) == 0) { + return *((uint32_t *) data); + } else { + uint32_t value; + memcpy(&value, data, sizeof(uint32_t)); + return value; + } +} + +/** + * Deserialize an options struct from the given binary string. This is used to + * pass options to the parser from an FFI call so that consumers of the library + * from an FFI perspective don't have to worry about the structure of our + * options structs. Since the source of these calls will be from Ruby + * implementation internals we assume it is from a trusted source. + */ +void +pm_options_read(pm_options_t *options, const char *data) { + uint32_t filepath_length = pm_options_read_u32(data); + data += 4; + + if (filepath_length > 0) { + pm_string_constant_init(&options->filepath, data, filepath_length); + data += filepath_length; + } + + options->line = pm_options_read_u32(data); + data += 4; + + uint32_t encoding_length = pm_options_read_u32(data); + data += 4; + + if (encoding_length > 0) { + pm_string_constant_init(&options->encoding, data, encoding_length); + data += encoding_length; + } + + options->frozen_string_literal = *data++; + options->suppress_warnings = *data++; + + uint32_t scopes_count = pm_options_read_u32(data); + data += 4; + + if (scopes_count > 0) { + pm_options_scopes_init(options, scopes_count); + + for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) { + uint32_t locals_count = pm_options_read_u32(data); + data += 4; + + pm_options_scope_t *scope = &options->scopes[scope_index]; + pm_options_scope_init(scope, locals_count); + + for (size_t local_index = 0; local_index < locals_count; local_index++) { + uint32_t local_length = pm_options_read_u32(data); + data += 4; + + pm_string_constant_init(&scope->locals[local_index], data, local_length); + data += local_length; + } + } + } +} diff --git a/src/prism.c b/src/prism.c index fc836a956d7..2c04860de1d 100644 --- a/src/prism.c +++ b/src/prism.c @@ -16391,11 +16391,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm // If options were provided to this parse, establish them here. if (options != NULL) { // filepath option - if (options->filepath == NULL) { - pm_string_constant_init(&parser->filepath_string, "", 0); - } else { - pm_string_constant_init(&parser->filepath_string, options->filepath, strlen(options->filepath)); - } + parser->filepath_string = options->filepath; // line option if (options->line > 0) { @@ -16561,10 +16557,12 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { * buffer. */ PRISM_EXPORTED_FUNCTION void -pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) { +pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data) { + pm_options_t options = { 0 }; + if (data != NULL) pm_options_read(&options, data); + pm_parser_t parser; - pm_parser_init(&parser, source, size, NULL); - if (metadata) pm_parser_metadata(&parser, metadata); + pm_parser_init(&parser, source, size, &options); pm_node_t *node = pm_parse(&parser); @@ -16574,16 +16572,19 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons pm_node_destroy(&parser, node); pm_parser_free(&parser); + pm_options_free(&options); } /** * Parse and serialize the comments in the given source to the given buffer. */ PRISM_EXPORTED_FUNCTION void -pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) { +pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data) { + pm_options_t options = { 0 }; + if (data != NULL) pm_options_read(&options, data); + pm_parser_t parser; - pm_parser_init(&parser, source, size, NULL); - if (metadata) pm_parser_metadata(&parser, metadata); + pm_parser_init(&parser, source, size, &options); pm_node_t *node = pm_parse(&parser); pm_serialize_header(buffer); @@ -16592,6 +16593,7 @@ pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buf pm_node_destroy(&parser, node); pm_parser_free(&parser); + pm_options_free(&options); } #undef PM_CASE_KEYWORD diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index db10fab6de7..b8e93df6fb2 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -287,9 +287,9 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) { * Lex the given source and serialize to the given buffer. */ PRISM_EXPORTED_FUNCTION void -pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer) { +pm_lex_serialize(const uint8_t *source, size_t size, const char *data, pm_buffer_t *buffer) { pm_options_t options = { 0 }; - pm_options_filepath_set(&options, filepath); + if (data != NULL) pm_options_read(&options, data); pm_parser_t parser; pm_parser_init(&parser, source, size, &options); @@ -321,10 +321,12 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu * source to the given buffer. */ PRISM_EXPORTED_FUNCTION void -pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) { +pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data) { + pm_options_t options = { 0 }; + if (data != NULL) pm_options_read(&options, data); + pm_parser_t parser; - pm_parser_init(&parser, source, size, NULL); - if (metadata) pm_parser_metadata(&parser, metadata); + pm_parser_init(&parser, source, size, &options); pm_lex_callback_t lex_callback = (pm_lex_callback_t) { .data = (void *) buffer, @@ -339,4 +341,5 @@ pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, pm_node_destroy(&parser, node); pm_parser_free(&parser); + pm_options_free(&options); } diff --git a/test/prism/parse_serialize_test.rb b/test/prism/parse_serialize_test.rb deleted file mode 100644 index 001518c14d5..00000000000 --- a/test/prism/parse_serialize_test.rb +++ /dev/null @@ -1,38 +0,0 @@ -# frozen_string_literal: true - -require_relative "test_helper" - -return if Prism::BACKEND == :FFI - -module Prism - class ParseSerializeTest < TestCase - def test_parse_serialize - dumped = Debug.parse_serialize_file(__FILE__) - result = Prism.load(File.read(__FILE__), dumped) - - assert_kind_of ParseResult, result, "Expected the return value to be a ParseResult" - assert_equal __FILE__, find_file_node(result)&.filepath, "Expected the filepath to be set correctly" - end - - def test_parse_serialize_with_locals - filepath = __FILE__ - metadata = [filepath.bytesize, filepath.b, 1, 1, 1, "foo".b].pack("LA*LLLA*") - - dumped = Debug.parse_serialize_file_metadata(filepath, metadata) - result = Prism.load(File.read(__FILE__), dumped) - - assert_kind_of ParseResult, result, "Expected the return value to be a ParseResult" - end - - private - - def find_file_node(result) - queue = [result.value] - - while (node = queue.shift) - return node if node.is_a?(SourceFileNode) - queue.concat(node.compact_child_nodes) - end - end - end -end diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb index efe8bc1c1b6..844a7796a3c 100644 --- a/test/prism/ruby_api_test.rb +++ b/test/prism/ruby_api_test.rb @@ -20,6 +20,19 @@ def test_ruby_api assert_equal_nodes ast2, ast3 end + def test_options + assert_equal "", Prism.parse("__FILE__").value.statements.body[0].filepath + assert_equal "foo.rb", Prism.parse("__FILE__", filepath: "foo.rb").value.statements.body[0].filepath + + refute Prism.parse("\"foo\"").value.statements.body[0].frozen? + assert Prism.parse("\"foo\"", frozen_string_literal: true).value.statements.body[0].frozen? + refute Prism.parse("\"foo\"", frozen_string_literal: false).value.statements.body[0].frozen? + + assert_kind_of Prism::CallNode, Prism.parse("foo").value.statements.body[0] + assert_kind_of Prism::LocalVariableReadNode, Prism.parse("foo", scopes: [[:foo]]).value.statements.body[0] + assert_equal 2, Prism.parse("foo", scopes: [[:foo], []]).value.statements.body[0].depth + end + def test_literal_value_method assert_equal 123, parse_expression("123").value assert_equal 3.14, parse_expression("3.14").value From 5a2252e3ac76e37a69414fbdd82c9872edf31793 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 14:13:28 -0400 Subject: [PATCH 04/10] Rename serialization APIs for consistency --- include/prism.h | 18 +++++++++--------- lib/prism/ffi.rb | 18 +++++++++--------- src/prism.c | 4 ++-- templates/src/serialize.c.erb | 4 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/prism.h b/include/prism.h index e2c5d064180..2f7208929bb 100644 --- a/include/prism.h +++ b/include/prism.h @@ -124,24 +124,24 @@ void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buf PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); /** - * Parse the given source to the AST and serialize the AST to the given buffer. + * Parse the given source to the AST and dump the AST to the given buffer. * + * @param buffer The buffer to serialize to. * @param source The source to parse. * @param size The size of the source. - * @param buffer The buffer to serialize to. * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); /** * Parse and serialize the comments in the given source to the given buffer. * + * @param buffer The buffer to serialize to. * @param source The source to parse. * @param size The size of the source. - * @param buffer The buffer to serialize to. * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); /** * Lex the given source and serialize to the given buffer. @@ -151,18 +151,18 @@ PRISM_EXPORTED_FUNCTION void pm_parse_serialize_comments(const uint8_t *source, * @param filepath The optional filepath to pass to the lexer. * @param buffer The buffer to serialize to. */ -PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer); +PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); /** * Parse and serialize both the AST and the tokens represented by the given * source to the given buffer. * + * @param buffer The buffer to serialize to. * @param source The source to parse. * @param size The size of the source. - * @param buffer The buffer to serialize to. - * @param metadata The optional metadata to pass to the parser. + * @param data The optional data to pass to the parser. */ -PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); +PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); /** * Returns a string representation of the given token type. diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 61ece6a6413..cc84d00f145 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -69,10 +69,10 @@ def self.load_exported_functions_from(header, *functions) load_exported_functions_from( "prism.h", "pm_version", - "pm_parse_serialize", - "pm_parse_serialize_comments", - "pm_lex_serialize", - "pm_parse_lex_serialize" + "pm_serialize_parse", + "pm_serialize_parse_comments", + "pm_serialize_lex", + "pm_serialize_parse_lex" ) load_exported_functions_from( @@ -180,7 +180,7 @@ class << self # Mirror the Prism.dump API by using the serialization API. def dump(code, **options) LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_parse_serialize(code, code.bytesize, buffer.pointer, dump_options(options)) + LibRubyParser.pm_serialize_parse(buffer.pointer, code, code.bytesize, dump_options(options)) buffer.read end end @@ -195,7 +195,7 @@ def dump_file(filepath, **options) # Mirror the Prism.lex API by using the serialization API. def lex(code, **options) LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_lex_serialize(code, code.bytesize, dump_options(options), buffer.pointer) + LibRubyParser.pm_serialize_lex(buffer.pointer, code, code.bytesize, dump_options(options)) Serialize.load_tokens(Source.new(code), buffer.read) end end @@ -224,7 +224,7 @@ def parse_file(filepath, **options) # Mirror the Prism.parse_comments API by using the serialization API. def parse_comments(code, **options) LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_parse_serialize_comments(code, code.bytesize, buffer.pointer, dump_options(options)) + LibRubyParser.pm_serialize_parse_comments(buffer.pointer, code, code.bytesize, dump_options(options)) source = Source.new(code) loader = Serialize::Loader.new(source, buffer.read) @@ -247,15 +247,15 @@ def parse_file_comments(filepath, **options) # Mirror the Prism.parse_lex API by using the serialization API. def parse_lex(code, **options) LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_parse_lex_serialize(code, code.bytesize, buffer.pointer, dump_options(options)) + LibRubyParser.pm_serialize_parse_lex(buffer.pointer, code, code.bytesize, dump_options(options)) source = Source.new(code) loader = Serialize::Loader.new(source, buffer.read) tokens = loader.load_tokens node, comments, magic_comments, errors, warnings = loader.load_nodes - tokens.each { |token,| token.value.force_encoding(loader.encoding) } + ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source) end end diff --git a/src/prism.c b/src/prism.c index 2c04860de1d..9d2fd6d6f78 100644 --- a/src/prism.c +++ b/src/prism.c @@ -16557,7 +16557,7 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { * buffer. */ PRISM_EXPORTED_FUNCTION void -pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data) { +pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; if (data != NULL) pm_options_read(&options, data); @@ -16579,7 +16579,7 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons * Parse and serialize the comments in the given source to the given buffer. */ PRISM_EXPORTED_FUNCTION void -pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data) { +pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; if (data != NULL) pm_options_read(&options, data); diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index b8e93df6fb2..9b0722c2571 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -287,7 +287,7 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) { * Lex the given source and serialize to the given buffer. */ PRISM_EXPORTED_FUNCTION void -pm_lex_serialize(const uint8_t *source, size_t size, const char *data, pm_buffer_t *buffer) { +pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; if (data != NULL) pm_options_read(&options, data); @@ -321,7 +321,7 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *data, pm_buffer * source to the given buffer. */ PRISM_EXPORTED_FUNCTION void -pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *data) { +pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) { pm_options_t options = { 0 }; if (data != NULL) pm_options_read(&options, data); From 4b538af20f3a0bf12a201c203ed676869d8646c5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 14:20:28 -0400 Subject: [PATCH 05/10] Properly support the encoding option --- src/prism.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/prism.c b/src/prism.c index 9d2fd6d6f78..cfb32ded66e 100644 --- a/src/prism.c +++ b/src/prism.c @@ -5847,7 +5847,7 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) { * Here we're going to check if this is a "magic" comment, and perform whatever * actions are necessary for it here. */ -static void +static bool parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) { size_t width = (size_t) (end - start); @@ -5859,7 +5859,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star if (encoding != NULL) { parser->encoding = *encoding; - return; + return true; } } @@ -5870,7 +5870,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "utf-8", 5) == 0)) { // We don't need to do anything here because the default encoding is // already UTF-8. We'll just return. - return; + return true; } // Next, we're going to loop through each of the encodings that we handle @@ -5880,7 +5880,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star parser->encoding = prebuilt; \ parser->encoding_changed |= true; \ if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \ - return; \ + return true; \ } // Check most common first. (This is pretty arbitrary.) @@ -5921,11 +5921,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star #undef ENCODING - // If nothing was returned by this point, then we've got an issue because we - // didn't understand the encoding that the user was trying to use. In this - // case we'll keep using the default encoding but add an error to the - // parser to indicate an unsuccessful parse. - pm_parser_err(parser, start, end, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT); + return false; } /** @@ -5975,7 +5971,13 @@ parser_lex_magic_comment_encoding(pm_parser_t *parser) { const uint8_t *value_start = cursor; while ((*cursor == '-' || *cursor == '_' || parser->encoding.alnum_char(cursor, 1)) && ++cursor < end); - parser_lex_magic_comment_encoding_value(parser, value_start, cursor); + if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) { + // If we were unable to parse the encoding value, then we've got an + // issue because we didn't understand the encoding that the user was + // trying to use. In this case we'll keep using the default encoding but + // add an error to the parser to indicate an unsuccessful parse. + pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT); + } } /** @@ -16399,7 +16401,11 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm } // encoding option - // if (options->encoding != NULL) {} + size_t encoding_length = pm_string_length(&options->encoding); + if (encoding_length > 0) { + const uint8_t *encoding_source = pm_string_source(&options->encoding); + parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length); + } // frozen_string_literal option if (options->frozen_string_literal) { From 84229529d744ba2da6bc5bc6376951c39b24745c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 14:28:25 -0400 Subject: [PATCH 06/10] Properly support the suppress_warnings option --- include/prism/parser.h | 7 +++++++ src/prism.c | 11 ++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/prism/parser.h b/include/prism/parser.h index f4d0153e17b..a30e61c9bfa 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -672,6 +672,13 @@ struct pm_parser { * a true value. */ bool frozen_string_literal; + + /** + * Whether or not we should emit warnings. This will be set to false if the + * consumer of the library specified it, usually because they are parsing + * when $VERBOSE is nil. + */ + bool suppress_warnings; }; #endif diff --git a/src/prism.c b/src/prism.c index cfb32ded66e..0e135db5cbb 100644 --- a/src/prism.c +++ b/src/prism.c @@ -508,7 +508,9 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_ */ static inline void pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) { - pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id); + if (!parser->suppress_warnings) { + pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id); + } } /** @@ -16364,7 +16366,8 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .pattern_matching_newlines = false, .in_keyword_arg = false, .semantic_token_seen = false, - .frozen_string_literal = false + .frozen_string_literal = false, + .suppress_warnings = false }; // Initialize the constant pool. We're going to completely guess as to the @@ -16413,7 +16416,9 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm } // suppress_warnings option - // if (options->suppress_warnings) {} + if (options->suppress_warnings) { + parser->suppress_warnings = true; + } // scopes option for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) { From 33cc75a4b74d29d70c2542573c72d7263dc56614 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 15:06:50 -0400 Subject: [PATCH 07/10] Properly support the start line option --- ext/prism/extension.c | 4 +-- include/prism/parser.h | 6 ++++ include/prism/util/pm_newline_list.h | 12 -------- lib/prism/parse_result.rb | 42 +++++++++++++++++----------- rbi/prism_static.rbi | 4 +-- sig/prism_static.rbs | 4 ++- src/prism.c | 3 +- src/util/pm_newline_list.c | 27 ------------------ templates/ext/prism/api_node.c.erb | 4 +-- test/prism/newline_test.rb | 2 +- test/prism/ruby_api_test.rb | 3 ++ 11 files changed, 47 insertions(+), 64 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 86221a7bc5b..dfd8e76d5a6 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -471,8 +471,8 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback); VALUE offsets = rb_ary_new(); - VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), offsets }; - VALUE source = rb_class_new_instance(2, source_argv, rb_cPrismSource); + VALUE source_argv[] = { rb_str_new((const char *) pm_string_source(input), pm_string_length(input)), ULONG2NUM(parser.start_line), offsets }; + VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource); parse_lex_data_t parse_lex_data = { .source = source, diff --git a/include/prism/parser.h b/include/prism/parser.h index a30e61c9bfa..edefe70f253 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -639,6 +639,12 @@ struct pm_parser { */ pm_string_t current_string; + /** + * The line number at the start of the parse. This will be used to offset + * the line numbers of all of the locations. + */ + uint32_t start_line; + /** Whether or not we're at the beginning of a command. */ bool command_start; diff --git a/include/prism/util/pm_newline_list.h b/include/prism/util/pm_newline_list.h index 93816b06561..a31051f4e0a 100644 --- a/include/prism/util/pm_newline_list.h +++ b/include/prism/util/pm_newline_list.h @@ -61,18 +61,6 @@ typedef struct { */ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity); -/** - * Set up the newline list such that it believes it is starting on a specific - * line in the source. Basically this entails pushing on pointers to the start - * of the string until we hit the desired line. - * - * @param list The list to set up. - * @param count The number of lines to push onto the list. - * @return True if no reallocation was needed or the reallocation of the offsets - * succeeds (if one was necessary), otherwise false. - */ -bool pm_newline_list_force(pm_newline_list_t *list, size_t count); - /** * Append a new offset to the newline list. Returns true if the reallocation of * the offsets succeeds (if one was necessary), otherwise returns false. diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index cf94232df2a..92651cf7660 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -8,14 +8,18 @@ class Source # The source code that this source object represents. attr_reader :source + # The line number where this source starts. + attr_reader :start_line + # The list of newline byte offsets in the source code. attr_reader :offsets # Create a new source object with the given source code and newline byte # offsets. If no newline byte offsets are given, they will be computed from # the source code. - def initialize(source, offsets = compute_offsets(source)) + def initialize(source, start_line = 1, offsets = compute_offsets(source)) @source = source + @start_line = start_line @offsets = offsets end @@ -28,6 +32,25 @@ def slice(offset, length) # Binary search through the offsets to find the line number for the given # byte offset. def line(value) + start_line + find_line(value) + end + + # Return the byte offset of the start of the line corresponding to the given + # byte offset. + def line_offset(value) + offsets[find_line(value)] + end + + # Return the column number for the given byte offset. + def column(value) + value - offsets[find_line(value)] + end + + private + + # Binary search through the offsets to find the line number for the given + # byte offset. + def find_line(value) left = 0 right = offsets.length - 1 @@ -45,19 +68,6 @@ def line(value) left - 1 end - # Return the byte offset of the start of the line corresponding to the given - # byte offset. - def line_offset(value) - offsets[line(value)] - end - - # Return the column number for the given byte offset. - def column(value) - value - offsets[line(value)] - end - - private - # Find all of the newlines in the source code and return their byte offsets # from the start of the string an array. def compute_offsets(code) @@ -118,7 +128,7 @@ def end_offset # The line number where this location starts. def start_line - source.line(start_offset) + 1 + source.line(start_offset) end # The content of the line where this location starts before this location. @@ -129,7 +139,7 @@ def start_line_slice # The line number where this location ends. def end_line - source.line(end_offset) + 1 + source.line(end_offset) end # The column number in bytes where this location starts from the start of diff --git a/rbi/prism_static.rbi b/rbi/prism_static.rbi index 502a7cd8db3..3860f0ab3bf 100644 --- a/rbi/prism_static.rbi +++ b/rbi/prism_static.rbi @@ -83,8 +83,8 @@ module Prism end class Source - sig { params(source: String, offsets: T::Array[Integer]).void } - def initialize(source, offsets); end + sig { params(source: String, start_line: Integer, offsets: T::Array[Integer]).void } + def initialize(source, start_line, offsets); end sig { params(offset: Integer, length: Integer).returns(String) } def slice(offset, length); end diff --git a/sig/prism_static.rbs b/sig/prism_static.rbs index 59e22b309d7..1497ca46595 100644 --- a/sig/prism_static.rbs +++ b/sig/prism_static.rbs @@ -42,12 +42,14 @@ module Prism class Source attr_reader source: String + attr_reader start_line: Integer attr_reader offsets: Array[Integer] @source: String + @start_line: Integer @offsets: Array[Integer] - def initialize: (source: String, offsets: Array[Integer]) -> void + def initialize: (source: String, start_line: Integer, offsets: Array[Integer]) -> void def slice: (offset: Integer, length: Integer) -> String def line: (value: Integer) -> Integer def line_offset: (value: Integer) -> Integer diff --git a/src/prism.c b/src/prism.c index 0e135db5cbb..be8dd38ccf6 100644 --- a/src/prism.c +++ b/src/prism.c @@ -16360,6 +16360,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm .newline_list = { 0 }, .integer_base = 0, .current_string = PM_STRING_EMPTY, + .start_line = 1, .command_start = true, .recovering = false, .encoding_changed = false, @@ -16400,7 +16401,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm // line option if (options->line > 0) { - pm_newline_list_force(&parser->newline_list, options->line); + parser->start_line = options->line; } // encoding option diff --git a/src/util/pm_newline_list.c b/src/util/pm_newline_list.c index 978ebf3d0e1..f27bb75b63a 100644 --- a/src/util/pm_newline_list.c +++ b/src/util/pm_newline_list.c @@ -19,33 +19,6 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac return true; } -/** - * Set up the newline list such that it believes it is starting on a specific - * line in the source. Basically this entails pushing on pointers to the start - * of the string until we hit the desired line. - */ -bool -pm_newline_list_force(pm_newline_list_t *list, size_t count) { - size_t next_capacity = list->capacity == 0 ? 1 : list->capacity; - while (count > next_capacity) { - next_capacity *= 2; - } - - size_t *offsets = list->offsets; - list->offsets = (size_t *) calloc(next_capacity, sizeof(size_t)); - if (list->offsets == NULL) return false; - - if (offsets != NULL) { - memcpy(list->offsets, offsets, list->size * sizeof(size_t)); - free(offsets); - } - - memset(list->offsets + list->size, 0, count * sizeof(size_t)); - list->size += count; - - return true; -} - /** * Append a new offset to the newline list. Returns true if the reallocation of * the offsets succeeds (if one was necessary), otherwise returns false. diff --git a/templates/ext/prism/api_node.c.erb b/templates/ext/prism/api_node.c.erb index 1ffbf6c9f84..7bc52c1120b 100644 --- a/templates/ext/prism/api_node.c.erb +++ b/templates/ext/prism/api_node.c.erb @@ -46,8 +46,8 @@ pm_source_new(pm_parser_t *parser, rb_encoding *encoding) { rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index])); } - VALUE source_argv[] = { source, offsets }; - return rb_class_new_instance(2, source_argv, rb_cPrismSource); + VALUE source_argv[] = { source, ULONG2NUM(parser->start_line), offsets }; + return rb_class_new_instance(3, source_argv, rb_cPrismSource); } typedef struct pm_node_stack_node { diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb index 3a2892b9706..c20a99a398e 100644 --- a/test/prism/newline_test.rb +++ b/test/prism/newline_test.rb @@ -84,7 +84,7 @@ def prism_lines(result) while node = queue.shift queue.concat(node.compact_child_nodes) - newlines << (result.source.line(node.location.start_offset) + 1) if node&.newline? + newlines << result.source.line(node.location.start_offset) if node&.newline? end newlines.sort diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb index 844a7796a3c..a61282cca18 100644 --- a/test/prism/ruby_api_test.rb +++ b/test/prism/ruby_api_test.rb @@ -24,6 +24,9 @@ def test_options assert_equal "", Prism.parse("__FILE__").value.statements.body[0].filepath assert_equal "foo.rb", Prism.parse("__FILE__", filepath: "foo.rb").value.statements.body[0].filepath + assert_equal 1, Prism.parse("foo").value.statements.body[0].location.start_line + assert_equal 10, Prism.parse("foo", line: 10).value.statements.body[0].location.start_line + refute Prism.parse("\"foo\"").value.statements.body[0].frozen? assert Prism.parse("\"foo\"", frozen_string_literal: true).value.statements.body[0].frozen? refute Prism.parse("\"foo\"", frozen_string_literal: false).value.statements.body[0].frozen? From 13fa262669a9ffca39a87180bc69a7742963beca Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 15:42:18 -0400 Subject: [PATCH 08/10] Wire up options through the Java parser --- docs/fuzzing.md | 2 +- docs/serialization.md | 57 +++++++++-------- fuzz/parse.c | 2 +- include/prism.h | 4 +- include/prism/options.h | 16 ++--- javascript/src/parsePrism.js | 2 +- lib/prism/ffi.rb | 3 +- lib/prism/parse_result.rb | 2 +- src/prism.c | 79 +----------------------- templates/java/org/prism/Loader.java.erb | 2 + templates/java/org/prism/Nodes.java.erb | 13 +++- templates/lib/prism/serialize.rb.erb | 9 ++- 12 files changed, 68 insertions(+), 123 deletions(-) diff --git a/docs/fuzzing.md b/docs/fuzzing.md index fdfa88dca46..8565af24056 100644 --- a/docs/fuzzing.md +++ b/docs/fuzzing.md @@ -25,7 +25,7 @@ fuzz There are currently three fuzzing targets -- `pm_parse_serialize` (parse) +- `pm_serialize_parse` (parse) - `pm_regexp_named_capture_group_names` (regexp) Respectively, fuzzing can be performed with diff --git a/docs/serialization.md b/docs/serialization.md index 9a0b6cf3847..9c38897463a 100644 --- a/docs/serialization.md +++ b/docs/serialization.md @@ -72,6 +72,7 @@ The header is structured like the following table: | `1` | patch version number | | `1` | 1 indicates only semantics fields were serialized, 0 indicates all fields were serialized (including location fields) | | string | the encoding name | +| varint | the start line | | varint | number of comments | | comment* | comments | | varint | number of magic comments | @@ -136,56 +137,54 @@ typedef struct { size_t capacity; } pm_buffer_t; -// Initialize a pm_buffer_t with its default values. -bool pm_buffer_init(pm_buffer_t *); - // Free the memory associated with the buffer. void pm_buffer_free(pm_buffer_t *); // Parse and serialize the AST represented by the given source to the given // buffer. -void pm_parse_serialize(const uint8_t *source, size_t length, pm_buffer_t *buffer, const char *metadata); +void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t length, const char *data); ``` -Typically you would use a stack-allocated `pm_buffer_t` and call `pm_parse_serialize`, as in: +Typically you would use a stack-allocated `pm_buffer_t` and call `pm_serialize_parse`, as in: ```c void serialize(const uint8_t *source, size_t length) { - pm_buffer_t buffer; - if (!pm_buffer_init(&buffer)) return; + pm_buffer_t buffer = { 0 }; + pm_serialize_parse(&buffer, source, length, NULL); - pm_parse_serialize(source, length, &buffer, NULL); // Do something with the serialized string. pm_buffer_free(&buffer); } ``` -The final argument to `pm_parse_serialize` controls the metadata of the source. -This includes the filepath that the source is associated with, and any nested local variables scopes that are necessary to properly parse the file (in the case of parsing an `eval`). -Note that no `varint` are used here to make it easier to produce the metadata for the caller, and also serialized size is less important here. -The metadata is a serialized format itself, and is structured as follows: +The final argument to `pm_serialize_parse` is an optional string that controls the options to the parse function. This includes all of the normal options that could be passed to `pm_parser_init` through a `pm_options_t` struct, but serialized as a string to make it easier for callers through FFI. Note that no `varint` are used here to make it easier to produce the data for the caller, and also serialized size is less important here. The format of the data is structured as follows: -| # bytes | field | -| --- | --- | -| `4` | the size of the filepath string | -| | the filepath string | -| `4` | the number of local variable scopes | +| # bytes | field | +| ------- | -------------------------- | +| `4` | the length of the filepath | +| ... | the filepath bytes | +| `4` | the line number | +| `4` | the length the encoding | +| ... | the encoding bytes | +| `1` | frozen string literal | +| `1` | suppress warnings | +| `4` | the number of scopes | +| ... | the scopes | -Then, each local variable scope is encoded as: +Each scope is layed out as follows: -| # bytes | field | -| --- | --- | -| `4` | the number of local variables in the scope | -| | the local variables | +| # bytes | field | +| ------- | -------------------------- | +| `4` | the number of locals | +| ... | the locals | -Each local variable within each scope is encoded as: +Each local is layed out as follows: -| # bytes | field | -| --- | --- | -| `4` | the size of the local variable name | -| | the local variable name | +| # bytes | field | +| ------- | -------------------------- | +| `4` | the length of the local | +| ... | the local bytes | -The metadata can be `NULL` (as seen in the example above). -If it is not null, then a minimal metadata string would be `"\0\0\0\0\0\0\0\0"` which would use 4 bytes to indicate an empty filepath string and 4 bytes to indicate that there were no local variable scopes. +The data can be `NULL` (as seen in the example above). diff --git a/fuzz/parse.c b/fuzz/parse.c index 500d1795cf9..8c389bb33b9 100644 --- a/fuzz/parse.c +++ b/fuzz/parse.c @@ -4,6 +4,6 @@ void harness(const uint8_t *input, size_t size) { pm_buffer_t buffer; pm_buffer_init(&buffer); - pm_parse_serialize(input, size, &buffer, NULL); + pm_serialize_parser(&buffer, input, size, NULL); pm_buffer_free(&buffer); } diff --git a/include/prism.h b/include/prism.h index 2f7208929bb..211d2957f24 100644 --- a/include/prism.h +++ b/include/prism.h @@ -233,7 +233,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_ * * `pm_buffer_t` - a small buffer object that will hold the serialized AST * * `pm_buffer_free` - free the memory associated with the buffer * * `pm_serialize` - serialize the AST into a buffer - * * `pm_parse_serialize` - parse and serialize the AST into a buffer + * * `pm_serialize_parse` - parse and serialize the AST into a buffer * * Putting all of this together would look something like: * @@ -241,7 +241,7 @@ PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_ * void serialize(const uint8_t *source, size_t length) { * pm_buffer_t buffer = { 0 }; * - * pm_parse_serialize(source, length, &buffer, NULL); + * pm_serialize_parse(&buffer, source, length, NULL); * printf("SERIALIZED!\n"); * * pm_buffer_free(&buffer); diff --git a/include/prism/options.h b/include/prism/options.h index 6faadc2c47a..f1b0254ffff 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -160,28 +160,28 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); * * | # bytes | field | * | ------- | -------------------------- | - * | 4 | the length of the filepath | + * | `4` | the length of the filepath | * | ... | the filepath bytes | - * | 4 | the line number | - * | 4 | the length the encoding | + * | `4` | the line number | + * | `4` | the length the encoding | * | ... | the encoding bytes | - * | 1 | frozen string literal | - * | 1 | suppress warnings | - * | 4 | the number of scopes | + * | `1` | frozen string literal | + * | `1` | suppress warnings | + * | `4` | the number of scopes | * | ... | the scopes | * * Each scope is layed out as follows: * * | # bytes | field | * | ------- | -------------------------- | - * | 4 | the number of locals | + * | `4` | the number of locals | * | ... | the locals | * * Each local is layed out as follows: * * | # bytes | field | * | ------- | -------------------------- | - * | 4 | the length of the local | + * | `4` | the length of the local | * | ... | the local bytes | * * Some additional things to note about this layout: diff --git a/javascript/src/parsePrism.js b/javascript/src/parsePrism.js index d546fb87d1f..4a72baa30d7 100644 --- a/javascript/src/parsePrism.js +++ b/javascript/src/parsePrism.js @@ -17,7 +17,7 @@ export function parsePrism(prism, source) { const sourceView = new Uint8Array(prism.memory.buffer, sourcePointer, sourceArray.length); sourceView.set(sourceArray); - prism.pm_parse_serialize(sourcePointer, sourceArray.length, bufferPointer); + prism.pm_serialize_parse(bufferPointer, sourcePointer, sourceArray.length); const serializedView = new Uint8Array(prism.memory.buffer, prism.pm_buffer_value(bufferPointer), prism.pm_buffer_length(bufferPointer)); const result = deserialize(sourceArray, serializedView); diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index cc84d00f145..931e277ef58 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -231,6 +231,7 @@ def parse_comments(code, **options) loader.load_header loader.load_force_encoding + loader.load_start_line loader.load_comments end end @@ -283,7 +284,7 @@ def dump_options(options) end template << "L" - values << options.fetch(:line, 0) + values << options.fetch(:line, 1) template << "L" if (encoding = options[:encoding]) diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 92651cf7660..c7f6cdcd16c 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -9,7 +9,7 @@ class Source attr_reader :source # The line number where this source starts. - attr_reader :start_line + attr_accessor :start_line # The list of newline byte offsets in the source code. attr_reader :offsets diff --git a/src/prism.c b/src/prism.c index be8dd38ccf6..a362961a1f8 100644 --- a/src/prism.c +++ b/src/prism.c @@ -16239,82 +16239,6 @@ parse_program(pm_parser_t *parser) { return (pm_node_t *) pm_program_node_create(parser, &locals, statements); } -/** - * Read a 32-bit unsigned integer from a pointer. This function is used to read - * the metadata that is passed into the parser from the Ruby implementation. It - * handles aligned and unaligned reads. - */ -static uint32_t -pm_metadata_read_u32(const char *ptr) { - if (((uintptr_t) ptr) % sizeof(uint32_t) == 0) { - return *((uint32_t *) ptr); - } else { - uint32_t value; - memcpy(&value, ptr, sizeof(uint32_t)); - return value; - } -} - -/** - * Process any additional metadata being passed into a call to the parser via - * the pm_parse_serialize function. Since the source of these calls will be from - * Ruby implementation internals we assume it is from a trusted source. - * - * Currently, this is only passing in variable scoping surrounding an eval, but - * eventually it will be extended to hold any additional metadata. This data - * is serialized to reduce the calling complexity for a foreign function call - * vs a foreign runtime making a bindable in-memory version of a C structure. - * - * metadata is assumed to be a valid pointer pointing to well-formed data. The - * format is described below: - * - * ```text - * [ - * filepath_size: uint32_t, - * filepath: char*, - * scopes_count: uint32_t, - * [ - * locals_count: uint32_t, - * [local_size: uint32_t, local: char*]* - * ]* - * ] - * ``` - */ -void -pm_parser_metadata(pm_parser_t *parser, const char *metadata) { - uint32_t filepath_size = pm_metadata_read_u32(metadata); - metadata += 4; - - if (filepath_size) { - pm_string_t filepath_string; - pm_string_constant_init(&filepath_string, metadata, filepath_size); - - parser->filepath_string = filepath_string; - metadata += filepath_size; - } - - uint32_t scopes_count = pm_metadata_read_u32(metadata); - metadata += 4; - - for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) { - uint32_t locals_count = pm_metadata_read_u32(metadata); - metadata += 4; - - pm_parser_scope_push(parser, scope_index == 0); - - for (size_t local_index = 0; local_index < locals_count; local_index++) { - uint32_t local_size = pm_metadata_read_u32(metadata); - metadata += 4; - - uint8_t *constant = malloc(local_size); - memcpy(constant, metadata, local_size); - - pm_parser_local_add_owned(parser, constant, (size_t) local_size); - metadata += local_size; - } - } -} - /******************************************************************************/ /* External functions */ /******************************************************************************/ @@ -16561,7 +16485,7 @@ PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_serialize_header(buffer); pm_serialize_content(parser, node, buffer); - pm_buffer_append_string(buffer, "\0", 1); + pm_buffer_append_byte(buffer, '\0'); } /** @@ -16601,6 +16525,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s pm_node_t *node = pm_parse(&parser); pm_serialize_header(buffer); pm_serialize_encoding(&parser.encoding, buffer); + pm_buffer_append_varint(buffer, parser.start_line); pm_serialize_comment_list(&parser, &parser.comment_list, buffer); pm_node_destroy(&parser, node); diff --git a/templates/java/org/prism/Loader.java.erb b/templates/java/org/prism/Loader.java.erb index d15c38b3a1f..c8a15296010 100644 --- a/templates/java/org/prism/Loader.java.erb +++ b/templates/java/org/prism/Loader.java.erb @@ -108,6 +108,8 @@ public class Loader { this.encodingName = new String(encodingNameBytes, StandardCharsets.US_ASCII); this.encodingCharset = getEncodingCharset(this.encodingName); + source.setStartLine(loadVarInt()); + ParseResult.MagicComment[] magicComments = loadMagicComments(); ParseResult.Error[] errors = loadSyntaxErrors(); ParseResult.Warning[] warnings = loadWarnings(); diff --git a/templates/java/org/prism/Nodes.java.erb b/templates/java/org/prism/Nodes.java.erb index 3fb7a775c40..138e8efa807 100644 --- a/templates/java/org/prism/Nodes.java.erb +++ b/templates/java/org/prism/Nodes.java.erb @@ -34,9 +34,11 @@ public abstract class Nodes { public static final class Source { public final byte[] bytes; + private final int startLine; private final int[] lineOffsets; public Source(byte[] bytes) { + this.startLine = 1; this(bytes, computeLineOffsets(bytes)); } @@ -46,6 +48,11 @@ public abstract class Nodes { this.lineOffsets = lineOffsets; } + public void setStartLine(int startLine) { + assert startLine >= 1; + this.startLine = startLine; + } + public static int[] computeLineOffsets(byte[] bytes) { int[] lineOffsets = new int[8]; int lineOffsetsSize = 0; @@ -63,6 +70,10 @@ public abstract class Nodes { } public int line(int byteOffset) { + return startLine + findLine(byteOffset); + } + + public int findLine(int byteOffset) { assert byteOffset >= 0 && byteOffset < bytes.length : byteOffset; int index = Arrays.binarySearch(lineOffsets, byteOffset); int line; @@ -102,7 +113,7 @@ public abstract class Nodes { } public void setNewLineFlag(Source source, boolean[] newlineMarked) { - int line = source.line(this.startOffset); + int line = source.findLine(this.startOffset); if (!newlineMarked[line]) { newlineMarked[line] = true; this.newLineFlag = true; diff --git a/templates/lib/prism/serialize.rb.erb b/templates/lib/prism/serialize.rb.erb index a8a6a2dd474..ef8ebbbae8f 100644 --- a/templates/lib/prism/serialize.rb.erb +++ b/templates/lib/prism/serialize.rb.erb @@ -46,6 +46,7 @@ module Prism class Loader # :nodoc: attr_reader :encoding, :input, :serialized, :io attr_reader :constant_pool_offset, :constant_pool, :source + attr_reader :start_line def initialize(source, serialized) @encoding = Encoding::UTF_8 @@ -59,7 +60,7 @@ module Prism @constant_pool = nil @source = source - define_load_node_lambdas unless RUBY_ENGINE == 'ruby' + define_load_node_lambdas unless RUBY_ENGINE == "ruby" end def load_header @@ -80,6 +81,10 @@ module Prism @input = input.force_encoding(@encoding).freeze end + def load_start_line + source.start_line = load_varint + end + def load_comments load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) } end @@ -108,6 +113,7 @@ module Prism def load_tokens_result tokens = load_tokens encoding = load_encoding + load_start_line comments, magic_comments, errors, warnings = load_metadata if encoding != @encoding @@ -121,6 +127,7 @@ module Prism def load_nodes load_header load_force_encoding + load_start_line comments, magic_comments, errors, warnings = load_metadata From 81a9b2817f19e6bd95d26d0be8af2a913a039f10 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 15:43:41 -0400 Subject: [PATCH 09/10] Wire up the options through JavaScript --- ext/prism/extension.c | 154 ++++++++------------ templates/javascript/src/deserialize.js.erb | 4 + 2 files changed, 62 insertions(+), 96 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index dfd8e76d5a6..10bd141f937 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -187,6 +187,38 @@ extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { } } +/** + * Read options for methods that look like (source, **options). + */ +static void +string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) { + VALUE string; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + extract_options(options, Qnil, keywords); + input_load_string(input, string); +} + +/** + * Read options for methods that look like (filepath, **options). + */ +static bool +file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); + + extract_options(options, filepath, keywords); + + if (!pm_string_mapped_init(input, (const char *) pm_string_source(&options->filepath))) { + pm_options_free(options); + return false; + } + + return true; +} + /******************************************************************************/ /* Serializing the AST */ /******************************************************************************/ @@ -224,15 +256,9 @@ dump_input(pm_string_t *input, const pm_options_t *options) { */ static VALUE dump(int argc, VALUE *argv, VALUE self) { - VALUE string; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &string, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, Qnil, keywords); - pm_string_t input; - input_load_string(&input, string); + pm_options_t options = { 0 }; + string_options(argc, argv, &input, &options); #ifdef PRISM_DEBUG_MODE_BUILD size_t length = pm_string_length(&input); @@ -242,12 +268,14 @@ dump(int argc, VALUE *argv, VALUE self) { #endif VALUE value = dump_input(&input, &options); - pm_options_free(&options); #ifdef PRISM_DEBUG_MODE_BUILD free(dup); #endif + pm_string_free(&input); + pm_options_free(&options); + return value; } @@ -260,18 +288,9 @@ dump(int argc, VALUE *argv, VALUE self) { */ static VALUE dump_file(int argc, VALUE *argv, VALUE self) { - VALUE filepath; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &filepath, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, filepath, keywords); - pm_string_t input; - if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { - pm_options_free(&options); - return Qnil; - } + pm_options_t options = { 0 }; + if (!file_options(argc, argv, &input, &options)) return Qnil; VALUE value = dump_input(&input, &options); pm_string_free(&input); @@ -528,17 +547,12 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod */ static VALUE lex(int argc, VALUE *argv, VALUE self) { - VALUE string; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &string, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, Qnil, keywords); - pm_string_t input; - input_load_string(&input, string); + pm_options_t options = { 0 }; + string_options(argc, argv, &input, &options); VALUE result = parse_lex_input(&input, &options, false); + pm_string_free(&input); pm_options_free(&options); return result; @@ -553,18 +567,9 @@ lex(int argc, VALUE *argv, VALUE self) { */ static VALUE lex_file(int argc, VALUE *argv, VALUE self) { - VALUE filepath; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &filepath, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, filepath, keywords); - pm_string_t input; - if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { - pm_options_free(&options); - return Qnil; - } + pm_options_t options = { 0 }; + if (!file_options(argc, argv, &input, &options)) return Qnil; VALUE value = parse_lex_input(&input, &options, false); pm_string_free(&input); @@ -628,15 +633,9 @@ parse_input(pm_string_t *input, const pm_options_t *options) { */ static VALUE parse(int argc, VALUE *argv, VALUE self) { - VALUE string; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &string, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, Qnil, keywords); - pm_string_t input; - input_load_string(&input, string); + pm_options_t options = { 0 }; + string_options(argc, argv, &input, &options); #ifdef PRISM_DEBUG_MODE_BUILD size_t length = pm_string_length(&input); @@ -651,6 +650,7 @@ parse(int argc, VALUE *argv, VALUE self) { free(dup); #endif + pm_string_free(&input); pm_options_free(&options); return value; } @@ -664,18 +664,9 @@ parse(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_file(int argc, VALUE *argv, VALUE self) { - VALUE filepath; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &filepath, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, filepath, keywords); - pm_string_t input; - if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { - pm_options_free(&options); - return Qnil; - } + pm_options_t options = { 0 }; + if (!file_options(argc, argv, &input, &options)) return Qnil; VALUE value = parse_input(&input, &options); pm_string_free(&input); @@ -713,17 +704,12 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { */ static VALUE parse_comments(int argc, VALUE *argv, VALUE self) { - VALUE string; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &string, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, Qnil, keywords); - pm_string_t input; - input_load_string(&input, string); + pm_options_t options = { 0 }; + string_options(argc, argv, &input, &options); VALUE result = parse_input_comments(&input, &options); + pm_string_free(&input); pm_options_free(&options); return result; @@ -738,18 +724,9 @@ parse_comments(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_file_comments(int argc, VALUE *argv, VALUE self) { - VALUE filepath; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &filepath, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, filepath, keywords); - pm_string_t input; - if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { - pm_options_free(&options); - return Qnil; - } + pm_options_t options = { 0 }; + if (!file_options(argc, argv, &input, &options)) return Qnil; VALUE value = parse_input_comments(&input, &options); pm_string_free(&input); @@ -774,15 +751,9 @@ parse_file_comments(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_lex(int argc, VALUE *argv, VALUE self) { - VALUE string; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &string, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, Qnil, keywords); - pm_string_t input; - input_load_string(&input, string); + pm_options_t options = { 0 }; + string_options(argc, argv, &input, &options); VALUE value = parse_lex_input(&input, &options, true); pm_string_free(&input); @@ -807,18 +778,9 @@ parse_lex(int argc, VALUE *argv, VALUE self) { */ static VALUE parse_lex_file(int argc, VALUE *argv, VALUE self) { - VALUE filepath; - VALUE keywords; - rb_scan_args(argc, argv, "1:", &filepath, &keywords); - - pm_options_t options = { 0 }; - extract_options(&options, filepath, keywords); - pm_string_t input; - if (!pm_string_mapped_init(&input, (const char *) pm_string_source(&options.filepath))) { - pm_options_free(&options); - return Qnil; - } + pm_options_t options = { 0 }; + if (!file_options(argc, argv, &input, &options)) return Qnil; VALUE value = parse_lex_input(&input, &options, true); pm_string_free(&input); diff --git a/templates/javascript/src/deserialize.js.erb b/templates/javascript/src/deserialize.js.erb index 610659d8a18..12b4ac58b63 100644 --- a/templates/javascript/src/deserialize.js.erb +++ b/templates/javascript/src/deserialize.js.erb @@ -206,6 +206,10 @@ export function deserialize(source, array) { // Skip past the encoding, it means nothing to us in JavaScript. buffer.readString(buffer.readVarInt()); + // Skip past the start line, as we don't support that option yet in + // JavaScript. + buffer.readVarInt(); + const comments = Array.from({ length: buffer.readVarInt() }, () => ({ type: buffer.readVarInt(), location: buffer.readLocation() From ed481b9f2eea056fe74943bff3484c953379117e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 2 Nov 2023 18:12:57 -0400 Subject: [PATCH 10/10] Fix up lint --- ext/prism/extension.c | 36 ++++++++++++------------- include/prism.h | 2 +- include/prism/options.h | 2 +- templates/java/org/prism/Nodes.java.erb | 8 +++--- templates/src/serialize.c.erb | 2 ++ 5 files changed, 26 insertions(+), 24 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index 10bd141f937..839a5cda38a 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -15,12 +15,12 @@ VALUE rb_cPrismParseError; VALUE rb_cPrismParseWarning; VALUE rb_cPrismParseResult; -ID id_filepath; -ID id_encoding; -ID id_line; -ID id_frozen_string_literal; -ID id_suppress_warnings; -ID id_scopes; +ID rb_option_id_filepath; +ID rb_option_id_encoding; +ID rb_option_id_line; +ID rb_option_id_frozen_string_literal; +ID rb_option_id_suppress_warnings; +ID rb_option_id_scopes; /******************************************************************************/ /* IO of Ruby code */ @@ -119,17 +119,17 @@ build_options_i(VALUE key, VALUE value, VALUE argument) { pm_options_t *options = (pm_options_t *) argument; ID key_id = SYM2ID(key); - if (key_id == id_filepath) { + if (key_id == rb_option_id_filepath) { if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value)); - } else if (key_id == id_encoding) { + } else if (key_id == rb_option_id_encoding) { if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value))); - } else if (key_id == id_line) { + } else if (key_id == rb_option_id_line) { if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value)); - } else if (key_id == id_frozen_string_literal) { + } else if (key_id == rb_option_id_frozen_string_literal) { if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue); - } else if (key_id == id_suppress_warnings) { + } else if (key_id == rb_option_id_suppress_warnings) { if (!NIL_P(value)) pm_options_suppress_warnings_set(options, value == Qtrue); - } else if (key_id == id_scopes) { + } else if (key_id == rb_option_id_scopes) { if (!NIL_P(value)) build_options_scopes(options, value); } else { rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key); @@ -940,12 +940,12 @@ Init_prism(void) { // Intern all of the options that we support so that we don't have to do it // every time we parse. - id_filepath = rb_intern_const("filepath"); - id_encoding = rb_intern_const("encoding"); - id_line = rb_intern_const("line"); - id_frozen_string_literal = rb_intern_const("frozen_string_literal"); - id_suppress_warnings = rb_intern_const("suppress_warnings"); - id_scopes = rb_intern_const("scopes"); + rb_option_id_filepath = rb_intern_const("filepath"); + rb_option_id_encoding = rb_intern_const("encoding"); + rb_option_id_line = rb_intern_const("line"); + rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal"); + rb_option_id_suppress_warnings = rb_intern_const("suppress_warnings"); + rb_option_id_scopes = rb_intern_const("scopes"); /** * The version of the prism library. diff --git a/include/prism.h b/include/prism.h index 211d2957f24..5eec5f49ec9 100644 --- a/include/prism.h +++ b/include/prism.h @@ -148,8 +148,8 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, co * * @param source The source to lex. * @param size The size of the source. - * @param filepath The optional filepath to pass to the lexer. * @param buffer The buffer to serialize to. + * @param data The optional data to pass to the lexer. */ PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data); diff --git a/include/prism/options.h b/include/prism/options.h index f1b0254ffff..2ea85c838ce 100644 --- a/include/prism/options.h +++ b/include/prism/options.h @@ -171,7 +171,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options); * | ... | the scopes | * * Each scope is layed out as follows: - * + * * | # bytes | field | * | ------- | -------------------------- | * | `4` | the number of locals | diff --git a/templates/java/org/prism/Nodes.java.erb b/templates/java/org/prism/Nodes.java.erb index 138e8efa807..c1a0b02e2ab 100644 --- a/templates/java/org/prism/Nodes.java.erb +++ b/templates/java/org/prism/Nodes.java.erb @@ -34,17 +34,17 @@ public abstract class Nodes { public static final class Source { public final byte[] bytes; - private final int startLine; + private int startLine; private final int[] lineOffsets; public Source(byte[] bytes) { - this.startLine = 1; - this(bytes, computeLineOffsets(bytes)); + this(bytes, 1, computeLineOffsets(bytes)); } - public Source(byte[] bytes, int[] lineOffsets) { + public Source(byte[] bytes, int startLine, int[] lineOffsets) { assert lineOffsets[0] == 0; this.bytes = bytes; + this.startLine = startLine; this.lineOffsets = lineOffsets; } diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 9b0722c2571..db4c91e0cd8 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -209,6 +209,7 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) { void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_serialize_encoding(&parser->encoding, buffer); + pm_buffer_append_varint(buffer, parser->start_line); <%- unless Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS -%> pm_serialize_comment_list(parser, &parser->comment_list, buffer); <%- end -%> @@ -306,6 +307,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const pm_buffer_append_byte(buffer, 0); pm_serialize_encoding(&parser.encoding, buffer); + pm_buffer_append_varint(buffer, parser.start_line); pm_serialize_comment_list(&parser, &parser.comment_list, buffer); pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer); pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);