diff --git a/include/prism/util/pm_newline_list.h b/include/prism/util/pm_newline_list.h index da6c565d6bc..603a84c38cc 100644 --- a/include/prism/util/pm_newline_list.h +++ b/include/prism/util/pm_newline_list.h @@ -16,46 +16,84 @@ #include #include -// A list of offsets of newlines in a string. The offsets are assumed to be -// sorted/inserted in ascending order. +/** + * A list of offsets of newlines in a string. The offsets are assumed to be + * sorted/inserted in ascending order. + */ typedef struct { + /** A pointer to the start of the source string. */ const uint8_t *start; - size_t *offsets; + /** The number of offsets in the list. */ size_t size; + + /** The capacity of the list that has been allocated. */ size_t capacity; - size_t last_offset; - size_t last_index; + /** The list of offsets. */ + size_t *offsets; } pm_newline_list_t; -// A line and column in a string. +/** + * A line and column in a string. + */ typedef struct { + /** The line number. */ size_t line; + + /** The column number. */ size_t column; } pm_line_column_t; -#define PM_NEWLINE_LIST_EMPTY ((pm_newline_list_t) { \ - .start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \ -}) - -// Initialize a new newline list with the given capacity. Returns true if the -// allocation of the offsets succeeds, otherwise returns false. +/** + * Initialize a new newline list with the given capacity. Returns true if the + * allocation of the offsets succeeds, otherwise returns false. + * + * @param list The list to initialize. + * @param start A pointer to the start of the source string. + * @param capacity The initial capacity of the list. + * @return True if the allocation of the offsets succeeds, otherwise false. + */ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity); -// Append a new offset to the newline list. Returns true if the reallocation of -// the offsets succeeds (if one was necessary), otherwise returns false. +/** + * Append a new offset to the newline list. Returns true if the reallocation of + * the offsets succeeds (if one was necessary), otherwise returns false. + * + * @param list The list to append to. + * @param cursor A pointer to the offset to append. + * @return True if the reallocation of the offsets succeeds (if one was + * necessary), otherwise false. + */ bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor); -// Conditionally append a new offset to the newline list, if the value passed in is a newline. +/** + * Conditionally append a new offset to the newline list, if the value passed in + * is a newline. + * + * @param list The list to append to. + * @param cursor A pointer to the offset to append. + * @return True if the reallocation of the offsets succeeds (if one was + * necessary), otherwise false. + */ bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor); -// Returns the line and column of the given offset. If the offset is not in the -// list, the line and column of the closest offset less than the given offset -// are returned. +/** + * Returns the line and column of the given offset. If the offset is not in the + * list, the line and column of the closest offset less than the given offset + * are returned. + * + * @param list The list to search. + * @param cursor A pointer to the offset to search for. + * @return The line and column of the given offset. + */ pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor); -// Free the internal memory allocated for the newline list. +/** + * Free the internal memory allocated for the newline list. + * + * @param list The list to free. + */ void pm_newline_list_free(pm_newline_list_t *list); #endif diff --git a/include/prism/util/pm_strpbrk.h b/include/prism/util/pm_strpbrk.h index c9ea6c945ee..b589004abfd 100644 --- a/include/prism/util/pm_strpbrk.h +++ b/include/prism/util/pm_strpbrk.h @@ -7,23 +7,32 @@ #include #include -// Here we have rolled our own version of strpbrk. The standard library strpbrk -// has undefined behavior when the source string is not null-terminated. We want -// to support strings that are not null-terminated because pm_parse does not -// have the contract that the string is null-terminated. (This is desirable -// because it means the extension can call pm_parse with the result of a call to -// mmap). -// -// The standard library strpbrk also does not support passing a maximum length -// to search. We want to support this for the reason mentioned above, but we -// also don't want it to stop on null bytes. Ruby actually allows null bytes -// within strings, comments, regular expressions, etc. So we need to be able to -// skip past them. -// -// Finally, we want to support encodings wherein the charset could contain -// characters that are trailing bytes of multi-byte characters. For example, in -// Shift-JIS, the backslash character can be a trailing byte. In that case we -// need to take a slower path and iterate one multi-byte character at a time. +/** + * Here we have rolled our own version of strpbrk. The standard library strpbrk + * has undefined behavior when the source string is not null-terminated. We want + * to support strings that are not null-terminated because pm_parse does not + * have the contract that the string is null-terminated. (This is desirable + * because it means the extension can call pm_parse with the result of a call to + * mmap). + * + * The standard library strpbrk also does not support passing a maximum length + * to search. We want to support this for the reason mentioned above, but we + * also don't want it to stop on null bytes. Ruby actually allows null bytes + * within strings, comments, regular expressions, etc. So we need to be able to + * skip past them. + * + * Finally, we want to support encodings wherein the charset could contain + * characters that are trailing bytes of multi-byte characters. For example, in + * Shift-JIS, the backslash character can be a trailing byte. In that case we + * need to take a slower path and iterate one multi-byte character at a time. + * + * @param parser The parser. + * @param source The source string. + * @param charset The charset to search for. + * @param length The maximum length to search. + * @return A pointer to the first character in the source string that is in the + * charset, or NULL if no such character exists. + */ const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length); #endif diff --git a/src/prism.c b/src/prism.c index 0a8c26ea185..d82d1cc0894 100644 --- a/src/prism.c +++ b/src/prism.c @@ -15691,7 +15691,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch .lex_callback = NULL, .filepath_string = filepath_string, .constant_pool = PM_CONSTANT_POOL_EMPTY, - .newline_list = PM_NEWLINE_LIST_EMPTY, + .newline_list = { 0 }, .integer_base = 0, .current_string = PM_STRING_EMPTY, .command_start = true, diff --git a/src/util/pm_newline_list.c b/src/util/pm_newline_list.c index 20a1a221cbe..f27bb75b63a 100644 --- a/src/util/pm_newline_list.c +++ b/src/util/pm_newline_list.c @@ -1,7 +1,9 @@ #include "prism/util/pm_newline_list.h" -// Initialize a new newline list with the given capacity. Returns true if the -// allocation of the offsets succeeds, otherwise returns false. +/** + * Initialize a new newline list with the given capacity. Returns true if the + * allocation of the offsets succeeds, otherwise returns false. + */ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) { list->offsets = (size_t *) calloc(capacity, sizeof(size_t)); @@ -14,14 +16,13 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac list->size = 1; list->capacity = capacity; - list->last_index = 0; - list->last_offset = 0; - return true; } -// Append a new offset to the newline list. Returns true if the reallocation of -// the offsets succeeds (if one was necessary), otherwise returns false. +/** + * Append a new offset to the newline list. Returns true if the reallocation of + * the offsets succeeds (if one was necessary), otherwise returns false. + */ bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) { if (list->size == list->capacity) { @@ -44,7 +45,10 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) { return true; } -// Conditionally append a new offset to the newline list, if the value passed in is a newline. +/** + * Conditionally append a new offset to the newline list, if the value passed in + * is a newline. + */ bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) { if (*cursor != '\n') { @@ -53,9 +57,11 @@ pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) { return pm_newline_list_append(list, cursor); } -// Returns the line and column of the given offset. If the offset is not in the -// list, the line and column of the closest offset less than the given offset -// are returned. +/** + * Returns the line and column of the given offset. If the offset is not in the + * list, the line and column of the closest offset less than the given offset + * are returned. + */ pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor) { assert(cursor >= list->start); @@ -81,7 +87,9 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] }); } -// Free the internal memory allocated for the newline list. +/** + * Free the internal memory allocated for the newline list. + */ void pm_newline_list_free(pm_newline_list_t *list) { free(list->offsets); diff --git a/src/util/pm_strpbrk.c b/src/util/pm_strpbrk.c index 49bcd847b84..ce1f36910b0 100644 --- a/src/util/pm_strpbrk.c +++ b/src/util/pm_strpbrk.c @@ -1,6 +1,8 @@ #include "prism/util/pm_strpbrk.h" -// This is the slow path that does care about the encoding. +/** + * This is the slow path that does care about the encoding. + */ static inline const uint8_t * pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) { size_t index = 0; @@ -21,7 +23,9 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t return NULL; } -// This is the fast path that does not care about the encoding. +/** + * This is the fast path that does not care about the encoding. + */ static inline const uint8_t * pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) { size_t index = 0; @@ -37,23 +41,25 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max return NULL; } -// Here we have rolled our own version of strpbrk. The standard library strpbrk -// has undefined behavior when the source string is not null-terminated. We want -// to support strings that are not null-terminated because pm_parse does not -// have the contract that the string is null-terminated. (This is desirable -// because it means the extension can call pm_parse with the result of a call to -// mmap). -// -// The standard library strpbrk also does not support passing a maximum length -// to search. We want to support this for the reason mentioned above, but we -// also don't want it to stop on null bytes. Ruby actually allows null bytes -// within strings, comments, regular expressions, etc. So we need to be able to -// skip past them. -// -// Finally, we want to support encodings wherein the charset could contain -// characters that are trailing bytes of multi-byte characters. For example, in -// Shift-JIS, the backslash character can be a trailing byte. In that case we -// need to take a slower path and iterate one multi-byte character at a time. +/** + * Here we have rolled our own version of strpbrk. The standard library strpbrk + * has undefined behavior when the source string is not null-terminated. We want + * to support strings that are not null-terminated because pm_parse does not + * have the contract that the string is null-terminated. (This is desirable + * because it means the extension can call pm_parse with the result of a call to + * mmap). + * + * The standard library strpbrk also does not support passing a maximum length + * to search. We want to support this for the reason mentioned above, but we + * also don't want it to stop on null bytes. Ruby actually allows null bytes + * within strings, comments, regular expressions, etc. So we need to be able to + * skip past them. + * + * Finally, we want to support encodings wherein the charset could contain + * characters that are trailing bytes of multi-byte characters. For example, in + * Shift-JIS, the backslash character can be a trailing byte. In that case we + * need to take a slower path and iterate one multi-byte character at a time. + */ const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) { if (length <= 0) {