diff --git a/Doxyfile b/Doxyfile index c7dc26c6dd8..fe4d0e1b8f2 100644 --- a/Doxyfile +++ b/Doxyfile @@ -24,16 +24,15 @@ PROJECT_NAME = "Prism" OUTPUT_DIRECTORY = doc JAVADOC_AUTOBRIEF = YES OPTIMIZE_OUTPUT_FOR_C = YES -EXTRACT_ALL = YES INPUT = src src/enc src/util include include/prism include/prism/enc include/prism/util HTML_OUTPUT = c +WARN_IF_UNDOCUMENTED = YES +WARN_NO_PARAMDOC = YES +WARN_AS_ERROR = FAIL_ON_WARNINGS +WARN_IF_UNDOC_ENUM_VAL = YES +SORT_MEMBER_DOCS = NO # Default options we might want to edit in the future HTML_STYLESHEET = HTML_COLORSTYLE = AUTO_LIGHT GENERATE_LATEX = NO - -# Default options we definitely want to edit in the future -WARN_IF_UNDOCUMENTED = NO -WARN_NO_PARAMDOC = NO -WARN_AS_ERROR = NO diff --git a/config.yml b/config.yml index 2d8c18bea3a..f419f54ec8b 100644 --- a/config.yml +++ b/config.yml @@ -59,11 +59,11 @@ tokens: - name: CONSTANT comment: "a constant" - name: DOT - comment: "." + comment: "the . call operator" - name: DOT_DOT - comment: ".." + comment: "the .. range operator" - name: DOT_DOT_DOT - comment: "..." + comment: "the ... range operator or forwarding parameter" - name: EMBDOC_BEGIN comment: "=begin" - name: EMBDOC_END @@ -311,9 +311,9 @@ tokens: - name: UCOLON_COLON comment: "unary ::" - name: UDOT_DOT - comment: "unary .." + comment: "unary .. operator" - name: UDOT_DOT_DOT - comment: "unary ..." + comment: "unary ... operator" - name: UMINUS comment: "-@" - name: UMINUS_NUM diff --git a/include/prism.h b/include/prism.h index c68e9cbdf7d..4b8755a30d9 100644 --- a/include/prism.h +++ b/include/prism.h @@ -1,3 +1,8 @@ +/** + * @file prism.h + * + * The main header file for the prism parser. + */ #ifndef PRISM_H #define PRISM_H @@ -75,10 +80,10 @@ PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_pars PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser); /** - * Parse the Ruby source associated with the given parser and return the tree. + * Initiate the parser with the given parser. * * @param parser The parser to use. - * @return The AST representing the Ruby source. + * @return The AST representing the source. */ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser); @@ -181,4 +186,103 @@ PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_ */ PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type); +/** + * @mainpage + * + * Prism is a parser for the Ruby programming language. It is designed to be + * portable, error tolerant, and maintainable. It is written in C99 and has no + * dependencies. It is currently being integrated into + * [CRuby](https://github.com/ruby/ruby), + * [JRuby](https://github.com/jruby/jruby), + * [TruffleRuby](https://github.com/oracle/truffleruby), + * [Sorbet](https://github.com/sorbet/sorbet), and + * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree). + * + * @section getting-started Getting started + * + * If you're vendoring this project and compiling it statically then as long as + * you have a C99 compiler you will be fine. If you're linking against it as + * shared library, then you should compile with `-fvisibility=hidden` and + * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface + * visible. + * + * @section parsing Parsing + * + * In order to parse Ruby code, the structures and functions that you're going + * to want to use and be aware of are: + * + * * pm_parser_t - the main parser structure + * * pm_parser_init - initialize a parser + * * pm_parse - parse and return the root node + * * pm_node_destroy - deallocate the root node returned by `pm_parse` + * * pm_parser_free - free the internal memory of the parser + * + * Putting all of this together would look something like: + * + * ```c + * void parse(const uint8_t *source, size_t length) { + * pm_parser_t parser; + * pm_parser_init(&parser, source, length, NULL); + * + * pm_node_t *root = pm_parse(&parser); + * printf("PARSED!\n"); + * + * pm_node_destroy(root); + * pm_parser_free(&parser); + * } + * ``` + * + * All of the nodes "inherit" from `pm_node_t` by embedding those structures as + * their first member. This means you can downcast and upcast any node in the + * tree to a `pm_node_t`. + * + * @section serializing Serializing + * + * Prism provides the ability to serialize the AST and its related metadata into + * a binary format. This format is designed to be portable to different + * languages and runtimes so that you only need to make one FFI call in order to + * parse Ruby code. The structures and functions that you're going to want to + * use and be aware of are: + * + * * pm_buffer_t - a small buffer object that will hold the serialized AST + * * pm_buffer_free - free the memory associated with the buffer + * * pm_serialize - serialize the AST into a buffer + * * pm_parse_serialize - parse and serialize the AST into a buffer + * + * Putting all of this together would look something like: + * + * ```c + * void serialize(const uint8_t *source, size_t length) { + * pm_buffer_t buffer = { 0 }; + * + * pm_parse_serialize(source, length, &buffer, NULL); + * printf("SERIALIZED!\n"); + * + * pm_buffer_free(&buffer); + * } + * ``` + * + * @section inspecting Inspecting + * + * Prism provides the ability to inspect the AST by pretty-printing nodes. You + * can do this with the `pm_prettyprint` function, which you would use like: + * + * ```c + * void prettyprint(const uint8_t *source, size_t length) { + * pm_parser_t parser; + * pm_parser_init(&parser, source, length, NULL); + * + * pm_node_t *root = pm_parse(&parser); + * pm_buffer_t buffer = { 0 }; + * + * pm_prettyprint(&buffer, &parser, root); + * printf("*.s%\n", (int) buffer.length, buffer.value); + * + * pm_buffer_free(&buffer); + * pm_node_destroy(root); + * pm_parser_free(&parser); + * } + * ``` + */ + #endif diff --git a/include/prism/defines.h b/include/prism/defines.h index 457a8502f89..b10f8fa3e08 100644 --- a/include/prism/defines.h +++ b/include/prism/defines.h @@ -1,8 +1,14 @@ +/** + * @file defines.h + * + * Macro definitions used throughout the prism library. + * + * This file should be included first by any *.h or *.c in prism for consistency + * and to ensure that the macros are defined before they are used. + */ #ifndef PRISM_DEFINES_H #define PRISM_DEFINES_H -// This file should be included first by any *.h or *.c in prism. - #include #include #include @@ -10,7 +16,11 @@ #include #include -// PRISM_EXPORTED_FUNCTION +/** + * By default, we compile with -fvisibility=hidden. When this is enabled, we + * need to mark certain functions as being publically-visible. This macro does + * that in a compiler-agnostic way. + */ #ifndef PRISM_EXPORTED_FUNCTION # ifdef PRISM_EXPORT_SYMBOLS # ifdef _WIN32 @@ -23,7 +33,12 @@ # endif #endif -// PRISM_ATTRIBUTE_FORMAT +/** + * Certain compilers support specifying that a function accepts variadic + * parameters that look like printf format strings to provide a better developer + * experience when someone is using the function. This macro does that in a + * compiler-agnostic way. + */ #if defined(__GNUC__) # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index))) #elif defined(__clang__) @@ -32,19 +47,29 @@ # define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) #endif -// PRISM_ATTRIBUTE_UNUSED +/** + * GCC will warn if you specify a function or parameter that is unused at + * runtime. This macro allows you to mark a function or parameter as unused in a + * compiler-agnostic way. + */ #if defined(__GNUC__) # define PRISM_ATTRIBUTE_UNUSED __attribute__((unused)) #else # define PRISM_ATTRIBUTE_UNUSED #endif -// inline +/** + * Old Visual Studio versions do not support the inline keyword, so we need to + * define it to be __inline. + */ #if defined(_MSC_VER) && !defined(inline) # define inline __inline #endif -// Windows versions before 2015 use _snprintf +/** + * Old Visual Studio versions before 2015 do not implement sprintf, but instead + * implement _snprintf. We standard that here. + */ #if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900) # define snprintf _snprintf #endif diff --git a/include/prism/diagnostic.h b/include/prism/diagnostic.h index 7d78a160002..97bd83fdf7d 100644 --- a/include/prism/diagnostic.h +++ b/include/prism/diagnostic.h @@ -1,3 +1,8 @@ +/** + * @file diagnostic.h + * + * A list of diagnostics generated during parsing. + */ #ifndef PRISM_DIAGNOSTIC_H #define PRISM_DIAGNOSTIC_H @@ -9,14 +14,21 @@ #include /** - * This struct represents a diagnostic found during parsing. + * This struct represents a diagnostic generated during parsing. * * @extends pm_list_node_t */ typedef struct { + /** The embedded base node. */ pm_list_node_t node; + + /** A pointer to the start of the source that generated the diagnostic. */ const uint8_t *start; + + /** A pointer to the end of the source that generated the diagnostic. */ const uint8_t *end; + + /** The message associated with the diagnostic. */ const char *message; } pm_diagnostic_t; diff --git a/include/prism/enc/pm_encoding.h b/include/prism/enc/pm_encoding.h index 28b9f02281b..f8e554e6178 100644 --- a/include/prism/enc/pm_encoding.h +++ b/include/prism/enc/pm_encoding.h @@ -1,3 +1,8 @@ +/** + * @file pm_encoding.h + * + * The encoding interface and implementations used by the parser. + */ #ifndef PRISM_ENCODING_H #define PRISM_ENCODING_H @@ -55,10 +60,22 @@ typedef struct { bool multibyte; } pm_encoding_t; -// These bits define the location of each bit of metadata within the various -// lookup tables that are used to determine the properties of a character. +/** + * All of the lookup tables use the first bit of each embedded byte to indicate + * whether the codepoint is alphabetical. + */ #define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0 + +/** + * All of the lookup tables use the second bit of each embedded byte to indicate + * whether the codepoint is alphanumeric. + */ #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1 + +/** + * All of the lookup tables use the third bit of each embedded byte to indicate + * whether the codepoint is uppercase. + */ #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2 /** diff --git a/include/prism/node.h b/include/prism/node.h index 768ddec1b0e..3e15d18552b 100644 --- a/include/prism/node.h +++ b/include/prism/node.h @@ -1,3 +1,8 @@ +/** + * @file node.h + * + * Functions related to nodes in the AST. + */ #ifndef PRISM_NODE_H #define PRISM_NODE_H diff --git a/include/prism/pack.h b/include/prism/pack.h index be52a7b4de8..e4948483897 100644 --- a/include/prism/pack.h +++ b/include/prism/pack.h @@ -1,3 +1,8 @@ +/** + * @file pack.h + * + * A pack template string parser. + */ #ifndef PRISM_PACK_H #define PRISM_PACK_H @@ -6,15 +11,18 @@ #include #include +/** The version of the pack template language that we are parsing. */ typedef enum pm_pack_version { PM_PACK_VERSION_3_2_0 } pm_pack_version; +/** The type of pack template we are parsing. */ typedef enum pm_pack_variant { PM_PACK_VARIANT_PACK, PM_PACK_VARIANT_UNPACK } pm_pack_variant; +/** A directive within the pack template. */ typedef enum pm_pack_type { PM_PACK_SPACE, PM_PACK_COMMENT, @@ -40,12 +48,14 @@ typedef enum pm_pack_type { PM_PACK_END } pm_pack_type; +/** The signness of a pack directive. */ typedef enum pm_pack_signed { PM_PACK_UNSIGNED, PM_PACK_SIGNED, PM_PACK_SIGNED_NA } pm_pack_signed; +/** The endianness of a pack directive. */ typedef enum pm_pack_endian { PM_PACK_AGNOSTIC_ENDIAN, PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V' @@ -54,6 +64,7 @@ typedef enum pm_pack_endian { PM_PACK_ENDIAN_NA } pm_pack_endian; +/** The size of an integer pack directive. */ typedef enum pm_pack_size { PM_PACK_SIZE_SHORT, PM_PACK_SIZE_INT, @@ -67,6 +78,7 @@ typedef enum pm_pack_size { PM_PACK_SIZE_NA } pm_pack_size; +/** The type of length of a pack directive. */ typedef enum pm_pack_length_type { PM_PACK_LENGTH_FIXED, PM_PACK_LENGTH_MAX, @@ -74,6 +86,7 @@ typedef enum pm_pack_length_type { PM_PACK_LENGTH_NA } pm_pack_length_type; +/** The type of encoding for a pack template string. */ typedef enum pm_pack_encoding { PM_PACK_ENCODING_START, PM_PACK_ENCODING_ASCII_8BIT, @@ -81,6 +94,7 @@ typedef enum pm_pack_encoding { PM_PACK_ENCODING_UTF_8 } pm_pack_encoding; +/** The result of parsing a pack template. */ typedef enum pm_pack_result { PM_PACK_OK, PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE, @@ -90,39 +104,31 @@ typedef enum pm_pack_result { PM_PACK_ERROR_DOUBLE_ENDIAN } pm_pack_result; -// Parse a single directive from a pack or unpack format string. -// -// Parameters: -// - [in] pm_pack_version version the version of Ruby -// - [in] pm_pack_variant variant pack or unpack -// - [in out] const char **format the start of the next directive to parse -// on calling, and advanced beyond the parsed directive on return, or as -// much of it as was consumed until an error was encountered -// - [in] const char *format_end the end of the format string -// - [out] pm_pack_type *type the type of the directive -// - [out] pm_pack_signed *signed_type -// whether the value is signed -// - [out] pm_pack_endian *endian the endianness of the value -// - [out] pm_pack_size *size the size of the value -// - [out] pm_pack_length_type *length_type -// what kind of length is specified -// - [out] size_t *length the length of the directive -// - [in out] pm_pack_encoding *encoding -// takes the current encoding of the string -// which would result from parsing the whole format string, and returns a -// possibly changed directive - the encoding should be -// PM_PACK_ENCODING_START when pm_pack_parse is called for the first -// directive in a format string -// -// Return: -// - PM_PACK_OK on success -// - PM_PACK_ERROR_* on error -// -// Notes: -// Consult Ruby documentation for the meaning of directives. +/** + * Parse a single directive from a pack or unpack format string. + * + * @param variant (in) pack or unpack + * @param format (in, out) the start of the next directive to parse on calling, + * and advanced beyond the parsed directive on return, or as much of it as + * was consumed until an error was encountered + * @param format_end (in) the end of the format string + * @param type (out) the type of the directive + * @param signed_type (out) whether the value is signed + * @param endian (out) the endianness of the value + * @param size (out) the size of the value + * @param length_type (out) what kind of length is specified + * @param length (out) the length of the directive + * @param encoding (in, out) takes the current encoding of the string which + * would result from parsing the whole format string, and returns a possibly + * changed directive - the encoding should be `PM_PACK_ENCODING_START` when + * pm_pack_parse is called for the first directive in a format string + * + * @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error + * @note Consult Ruby documentation for the meaning of directives. + */ PRISM_EXPORTED_FUNCTION pm_pack_result pm_pack_parse( - pm_pack_variant variant_arg, + pm_pack_variant variant, const char **format, const char *format_end, pm_pack_type *type, @@ -134,8 +140,13 @@ pm_pack_parse( pm_pack_encoding *encoding ); -// prism abstracts sizes away from the native system - this converts an abstract -// size to a native size. +/** + * Prism abstracts sizes away from the native system - this converts an abstract + * size to a native size. + * + * @param size The abstract size to convert. + * @return The native size. + */ PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size); #endif diff --git a/include/prism/parser.h b/include/prism/parser.h index 92a8ce589de..f4d0153e17b 100644 --- a/include/prism/parser.h +++ b/include/prism/parser.h @@ -1,3 +1,8 @@ +/** + * @file parser.h + * + * The parser used to parse Ruby source. + */ #ifndef PRISM_PARSER_H #define PRISM_PARSER_H @@ -84,6 +89,7 @@ typedef enum { * are found as part of a string. */ typedef struct pm_lex_mode { + /** The type of this lex mode. */ enum { /** This state is used when any given token is being lexed. */ PM_LEX_DEFAULT, @@ -122,6 +128,7 @@ typedef struct pm_lex_mode { PM_LEX_STRING } mode; + /** The data associated with this type of lex mode. */ union { struct { /** This keeps track of the nesting level of the list. */ @@ -240,8 +247,9 @@ typedef struct pm_lex_mode { */ #define PM_LEX_STACK_SIZE 4 -// A forward declaration since our error handler struct accepts a parser for -// each of its function calls. +/** + * The parser used to parse Ruby source. + */ typedef struct pm_parser pm_parser_t; /** @@ -343,7 +351,10 @@ typedef enum { /** This is a node in a linked list of contexts. */ typedef struct pm_context_node { + /** The context that this node represents. */ pm_context_t context; + + /** A pointer to the previous context in the linked list. */ struct pm_context_node *prev; } pm_context_node_t; @@ -360,9 +371,16 @@ typedef enum { * @extends pm_list_node_t */ typedef struct pm_comment { + /** The embedded base node. */ pm_list_node_t node; + + /** A pointer to the start of the comment in the source. */ const uint8_t *start; + + /** A pointer to the end of the comment in the source. */ const uint8_t *end; + + /** The type of comment that we've found. */ pm_comment_type_t type; } pm_comment_t; @@ -373,10 +391,19 @@ typedef struct pm_comment { * @extends pm_list_node_t */ typedef struct { + /** The embedded base node. */ pm_list_node_t node; + + /** A pointer to the start of the key in the source. */ const uint8_t *key_start; + + /** A pointer to the start of the value in the source. */ const uint8_t *value_start; + + /** The length of the key in the source. */ uint32_t key_length; + + /** The length of the value in the source. */ uint32_t value_length; } pm_magic_comment_t; @@ -493,6 +520,7 @@ struct pm_parser { */ pm_state_stack_t accepts_block_stack; + /** A stack of lex modes. */ struct { /** The current mode of the lexer. */ pm_lex_mode_t *current; diff --git a/include/prism/prettyprint.h b/include/prism/prettyprint.h index 9ae2397e636..351b92df395 100644 --- a/include/prism/prettyprint.h +++ b/include/prism/prettyprint.h @@ -1,3 +1,8 @@ +/** + * @file prettyprint.h + * + * An AST node pretty-printer. + */ #ifndef PRISM_PRETTYPRINT_H #define PRISM_PRETTYPRINT_H diff --git a/include/prism/regexp.h b/include/prism/regexp.h index 9eae245d1e1..09bdaca89a9 100644 --- a/include/prism/regexp.h +++ b/include/prism/regexp.h @@ -1,3 +1,8 @@ +/** + * @file regexp.h + * + * A regular expression parser. + */ #ifndef PRISM_REGEXP_H #define PRISM_REGEXP_H diff --git a/include/prism/util/pm_buffer.h b/include/prism/util/pm_buffer.h index a2bccb3d001..3c3a6fb6881 100644 --- a/include/prism/util/pm_buffer.h +++ b/include/prism/util/pm_buffer.h @@ -1,3 +1,8 @@ +/** + * @file pm_buffer.h + * + * A wrapper around a contiguous block of allocated memory. + */ #ifndef PRISM_BUFFER_H #define PRISM_BUFFER_H diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h index 2bdc67de4a4..32f698a42b9 100644 --- a/include/prism/util/pm_char.h +++ b/include/prism/util/pm_char.h @@ -1,3 +1,8 @@ +/** + * @file pm_char.h + * + * Functions for working with characters and strings. + */ #ifndef PRISM_CHAR_H #define PRISM_CHAR_H diff --git a/include/prism/util/pm_constant_pool.h b/include/prism/util/pm_constant_pool.h index 19c3b619e12..238d0c4cae8 100644 --- a/include/prism/util/pm_constant_pool.h +++ b/include/prism/util/pm_constant_pool.h @@ -1,8 +1,12 @@ -// The constant pool is a data structure that stores a set of strings. Each -// string is assigned a unique id, which can be used to compare strings for -// equality. This comparison ends up being much faster than strcmp, since it -// only requires a single integer comparison. - +/** + * @file pm_constant_pool.h + * + * A data structure that stores a set of strings. + * + * Each string is assigned a unique id, which can be used to compare strings for + * equality. This comparison ends up being much faster than strcmp, since it + * only requires a single integer comparison. + */ #ifndef PRISM_CONSTANT_POOL_H #define PRISM_CONSTANT_POOL_H @@ -14,12 +18,23 @@ #include #include +/** + * A constant id is a unique identifier for a constant in the constant pool. + */ typedef uint32_t pm_constant_id_t; +/** + * A list of constant IDs. Usually used to represent a set of locals. + */ typedef struct { - pm_constant_id_t *ids; + /** The number of constant ids in the list. */ size_t size; + + /** The number of constant ids that have been allocated in the list. */ size_t capacity; + + /** The constant ids in the list. */ + pm_constant_id_t *ids; } pm_constant_id_list_t; /** diff --git a/include/prism/util/pm_list.h b/include/prism/util/pm_list.h index 53a5b9c3a18..b05ed0290a9 100644 --- a/include/prism/util/pm_list.h +++ b/include/prism/util/pm_list.h @@ -1,3 +1,8 @@ +/** + * @file pm_list.h + * + * An abstract linked list. + */ #ifndef PRISM_LIST_H #define PRISM_LIST_H diff --git a/include/prism/util/pm_memchr.h b/include/prism/util/pm_memchr.h index 6b817a55217..1eae6ab1bad 100644 --- a/include/prism/util/pm_memchr.h +++ b/include/prism/util/pm_memchr.h @@ -1,3 +1,8 @@ +/** + * @file pm_memchr.h + * + * A custom memchr implementation. + */ #ifndef PRISM_MEMCHR_H #define PRISM_MEMCHR_H diff --git a/include/prism/util/pm_newline_list.h b/include/prism/util/pm_newline_list.h index 603a84c38cc..a31051f4e0a 100644 --- a/include/prism/util/pm_newline_list.h +++ b/include/prism/util/pm_newline_list.h @@ -1,11 +1,16 @@ -// When compiling the syntax tree, it's necessary to know the line and column -// of many nodes. This is necessary to support things like error messages, -// tracepoints, etc. -// -// It's possible that we could store the start line, start column, end line, and -// end column on every node in addition to the offsets that we already store, -// but that would be quite a lot of memory overhead. - +/** + * @file pm_newline_list.h + * + * A list of byte offsets of newlines in a string. + * + * When compiling the syntax tree, it's necessary to know the line and column + * of many nodes. This is necessary to support things like error messages, + * tracepoints, etc. + * + * It's possible that we could store the start line, start column, end line, and + * end column on every node in addition to the offsets that we already store, + * but that would be quite a lot of memory overhead. + */ #ifndef PRISM_NEWLINE_LIST_H #define PRISM_NEWLINE_LIST_H diff --git a/include/prism/util/pm_state_stack.h b/include/prism/util/pm_state_stack.h index 7268a3fd63b..1ce57a2209f 100644 --- a/include/prism/util/pm_state_stack.h +++ b/include/prism/util/pm_state_stack.h @@ -1,3 +1,8 @@ +/** + * @file pm_state_stack.h + * + * A stack of boolean values. + */ #ifndef PRISM_STATE_STACK_H #define PRISM_STATE_STACK_H @@ -30,6 +35,7 @@ void pm_state_stack_pop(pm_state_stack_t *stack); * Returns the value at the top of the stack. * * @param stack The stack to get the value from. + * @return The value at the top of the stack. */ bool pm_state_stack_p(pm_state_stack_t *stack); diff --git a/include/prism/util/pm_string.h b/include/prism/util/pm_string.h index b0b7c6bf2dc..ddb153784f4 100644 --- a/include/prism/util/pm_string.h +++ b/include/prism/util/pm_string.h @@ -1,3 +1,8 @@ +/** + * @file pm_string.h + * + * A generic string type that can have various ownership semantics. + */ #ifndef PRISM_STRING_H #define PRISM_STRING_H diff --git a/include/prism/util/pm_string_list.h b/include/prism/util/pm_string_list.h index 1f460e5dc9c..0d406cc5d80 100644 --- a/include/prism/util/pm_string_list.h +++ b/include/prism/util/pm_string_list.h @@ -1,3 +1,8 @@ +/** + * @file pm_string_list.h + * + * A list of strings. + */ #ifndef PRISM_STRING_LIST_H #define PRISM_STRING_LIST_H diff --git a/include/prism/util/pm_strncasecmp.h b/include/prism/util/pm_strncasecmp.h index 6cf7aa80234..c381ea38f46 100644 --- a/include/prism/util/pm_strncasecmp.h +++ b/include/prism/util/pm_strncasecmp.h @@ -1,3 +1,8 @@ +/** + * @file pm_strncasecmp.h + * + * A custom strncasecmp implementation. + */ #ifndef PRISM_STRNCASECMP_H #define PRISM_STRNCASECMP_H diff --git a/include/prism/util/pm_strpbrk.h b/include/prism/util/pm_strpbrk.h index b589004abfd..61a443e51ad 100644 --- a/include/prism/util/pm_strpbrk.h +++ b/include/prism/util/pm_strpbrk.h @@ -1,3 +1,8 @@ +/** + * @file pm_strpbrk.h + * + * A custom strpbrk implementation. + */ #ifndef PRISM_STRPBRK_H #define PRISM_STRPBRK_H diff --git a/include/prism/version.h b/include/prism/version.h index 25ee409c74f..2e5e84cdf14 100644 --- a/include/prism/version.h +++ b/include/prism/version.h @@ -1,3 +1,11 @@ +/** + * @file version.h + * + * The version of the Prism library. + */ +#ifndef PRISM_VERSION_H +#define PRISM_VERSION_H + /** * The major version of the Prism library as an int. */ @@ -17,3 +25,5 @@ * The version of the Prism library as a constant string. */ #define PRISM_VERSION "0.16.0" + +#endif diff --git a/src/prism.c b/src/prism.c index b04f45854ad..08bebcfdee7 100644 --- a/src/prism.c +++ b/src/prism.c @@ -1,104 +1,5 @@ #include "prism.h" -/** - * @mainpage - * - * Prism is a parser for the Ruby programming language. It is designed to be - * portable, error tolerant, and maintainable. It is written in C99 and has no - * dependencies. It is currently being integrated into - * [CRuby](https://github.com/ruby/ruby), - * [JRuby](https://github.com/jruby/jruby), - * [TruffleRuby](https://github.com/oracle/truffleruby), - * [Sorbet](https://github.com/sorbet/sorbet), and - * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree). - * - * @section getting-started Getting started - * - * If you're vendoring this project and compiling it statically then as long as - * you have a C99 compiler you will be fine. If you're linking against it as - * shared library, then you should compile with `-fvisibility=hidden` and - * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface - * visible. - * - * @section parsing Parsing - * - * In order to parse Ruby code, the structures and functions that you're going - * to want to use and be aware of are: - * - * * @ref pm_parser_t - the main parser structure - * * @ref pm_parser_init - initialize a parser - * * @ref pm_parse - parse and return the root node - * * @ref pm_node_destroy - deallocate the root node returned by `pm_parse` - * * @ref pm_parser_free - free the internal memory of the parser - * - * Putting all of this together would look something like: - * - * ```c - * void parse(const uint8_t *source, size_t length) { - * pm_parser_t parser; - * pm_parser_init(&parser, source, length, NULL); - * - * pm_node_t *root = pm_parse(&parser); - * printf("PARSED!\n"); - * - * pm_node_destroy(root); - * pm_parser_free(&parser); - * } - * ``` - * - * All of the nodes "inherit" from `pm_node_t` by embedding those structures as - * their first member. This means you can downcast and upcast any node in the - * tree to a `pm_node_t`. - * - * @section serializing Serializing - * - * Prism provides the ability to serialize the AST and its related metadata into - * a binary format. This format is designed to be portable to different - * languages and runtimes so that you only need to make one FFI call in order to - * parse Ruby code. The structures and functions that you're going to want to - * use and be aware of are: - * - * * @ref pm_buffer_t - a small buffer object that will hold the serialized AST - * * @ref pm_buffer_free - free the memory associated with the buffer - * * @ref pm_serialize - serialize the AST into a buffer - * * @ref pm_parse_serialize - parse and serialize the AST into a buffer - * - * Putting all of this together would look something like: - * - * ```c - * void serialize(const uint8_t *source, size_t length) { - * pm_buffer_t buffer = { 0 }; - * - * pm_parse_serialize(source, length, &buffer, NULL); - * printf("SERIALIZED!\n"); - * - * pm_buffer_free(&buffer); - * } - * ``` - * - * @section inspecting Inspecting - * - * Prism provides the ability to inspect the AST by pretty-printing nodes. You - * can do this with the `pm_prettyprint` function, which you would use like: - * - * ```c - * void prettyprint(const uint8_t *source, size_t length) { - * pm_parser_t parser; - * pm_parser_init(&parser, source, length, NULL); - * - * pm_node_t *root = pm_parse(&parser); - * pm_buffer_t buffer = { 0 }; - * - * pm_prettyprint(&buffer, &parser, root); - * printf("*.s%\n", (int) buffer.length, buffer.value); - * - * pm_buffer_free(&buffer); - * pm_node_destroy(root); - * pm_parser_free(&parser); - * } - * ``` - */ - /** * The prism version and the serialization format. */ @@ -764,9 +665,16 @@ not_provided(pm_parser_t *parser) { * of the call node creation functions. */ typedef struct { + /** The optional location of the opening parenthesis or bracket. */ pm_location_t opening_loc; + + /** The lazily-allocated optional arguments node. */ pm_arguments_node_t *arguments; + + /** The optional location of the closing parenthesis or bracket. */ pm_location_t closing_loc; + + /** The optional block attached to the call. */ pm_node_t *block; } pm_arguments_t; @@ -7668,7 +7576,16 @@ parser_flush_heredoc_end(pm_parser_t *parser) { * automatically attach the string content to the node that it belongs to. */ typedef struct { + /** + * The buffer that we're using to keep track of the string content. It will + * only be initialized if we receive an escape sequence. + */ pm_buffer_t buffer; + + /** + * The cursor into the source string that points to how far we have + * currently copied into the buffer. + */ const uint8_t *cursor; } pm_token_buffer_t; @@ -9835,8 +9752,13 @@ typedef enum { * are combined in this way to make it easier to represent associativity. */ typedef struct { + /** The left binding power. */ pm_binding_power_t left; + + /** The right binding power. */ pm_binding_power_t right; + + /** Whether or not this token can be used as a binary operator. */ bool binary; } pm_binding_powers_t; diff --git a/src/regexp.c b/src/regexp.c index fa2ea5cd20a..22833d177f3 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -4,11 +4,22 @@ * This is the parser that is going to handle parsing regular expressions. */ typedef struct { + /** A pointer to the start of the source that we are parsing. */ const uint8_t *start; + + /** A pointer to the current position in the source. */ const uint8_t *cursor; + + /** A pointer to the end of the source that we are parsing. */ const uint8_t *end; + + /** A list of named captures that we've found. */ pm_string_list_t *named_captures; + + /** Whether the encoding has changed from the default. */ bool encoding_changed; + + /** The encoding of the source. */ pm_encoding_t *encoding; } pm_regexp_parser_t; @@ -318,6 +329,7 @@ typedef enum { * This is the set of options that are configurable on the regular expression. */ typedef struct { + /** The current state of each option. */ uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS]; } pm_regexp_options_t; diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index fadf461c40c..3f279d9779d 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -1,3 +1,8 @@ +/** + * @file ast.h + * + * The abstract syntax tree. + */ #ifndef PRISM_AST_H #define PRISM_AST_H @@ -78,18 +83,37 @@ enum pm_node_type { PM_SCOPE_NODE }; +/** + * This is the type of node embedded in the node struct. We explicitly control + * the size of it here to avoid having the variable-width enum. + */ typedef uint16_t pm_node_type_t; + +/** + * These are the flags embedded in the node struct. We explicitly control the + * size of it here to avoid having the variable-width enum. + */ typedef uint16_t pm_node_flags_t; -// We store the flags enum in every node in the tree. Some flags are common to -// all nodes (the ones listed below). Others are specific to certain node types. +/** + * We store the flags enum in every node in the tree. Some flags are common to + * all nodes (the ones listed below). Others are specific to certain node types. + */ #define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8) + static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1)); static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2)); static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS - 1)) | (1 << (PM_NODE_FLAG_BITS - 2)); -// For easy access, we define some macros to check node type -#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type) +/** + * Cast the type to an enum to allow the compiler to provide exhaustiveness + * checking. + */ +#define PM_NODE_TYPE(node) ((enum pm_node_type) node->type) + +/** + * Return true if the type of the given node matches the given type. + */ #define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type)) /** @@ -132,8 +156,11 @@ typedef struct pm_node { * @extends pm_node_t */ typedef struct pm_<%= node.human %> { + /** The embedded base node. */ pm_node_t base; <%- node.fields.grep_v(Prism::FlagsField).each do |field| -%> + + /** <%= node.name %>#<%= field.name %> */ <%= case field when Prism::NodeField, Prism::OptionalNodeField then "struct #{field.c_type} *#{field.name}" when Prism::NodeListField then "struct pm_node_list #{field.name}" @@ -162,6 +189,12 @@ typedef enum pm_<%= flag.human %> { } pm_<%= flag.human %>_t; <%- end -%> +/** + * When we're serializing to Java, we want to skip serializing the location + * fields as they won't be used by JRuby or TruffleRuby. This boolean allows us + * to specify that through the environment. It will never be true except for in + * those build systems. + */ #define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %> #endif