Documentation for pm_newline_list_t

ruby · Nov 1, 2023 · 97b3cc0 · 97b3cc0
1 parent ff1d2ec
commit 97b3cc0
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 68 deletions.
diff --git a/include/prism/util/pm_newline_list.h b/include/prism/util/pm_newline_list.h
@@ -16,46 +16,84 @@
 #include <stddef.h>
 #include <stdlib.h>
 
-// A list of offsets of newlines in a string. The offsets are assumed to be
-// sorted/inserted in ascending order.
+/**
+ * A list of offsets of newlines in a string. The offsets are assumed to be
+ * sorted/inserted in ascending order.
+ */
 typedef struct {
+    /** A pointer to the start of the source string. */
     const uint8_t *start;
 
-    size_t *offsets;
+    /** The number of offsets in the list. */
     size_t size;
+
+    /** The capacity of the list that has been allocated. */
     size_t capacity;
 
-    size_t last_offset;
-    size_t last_index;
+    /** The list of offsets. */
+    size_t *offsets;
 } pm_newline_list_t;
 
-// A line and column in a string.
+/**
+ * A line and column in a string.
+ */
 typedef struct {
+    /** The line number. */
     size_t line;
+
+    /** The column number. */
     size_t column;
 } pm_line_column_t;
 
-#define PM_NEWLINE_LIST_EMPTY ((pm_newline_list_t) { \
-    .start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \
-})
-
-// Initialize a new newline list with the given capacity. Returns true if the
-// allocation of the offsets succeeds, otherwise returns false.
+/**
+ * Initialize a new newline list with the given capacity. Returns true if the
+ * allocation of the offsets succeeds, otherwise returns false.
+ *
+ * @param list The list to initialize.
+ * @param start A pointer to the start of the source string.
+ * @param capacity The initial capacity of the list.
+ * @return True if the allocation of the offsets succeeds, otherwise false.
+ */
 bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
 
-// Append a new offset to the newline list. Returns true if the reallocation of
-// the offsets succeeds (if one was necessary), otherwise returns false.
+/**
+ * Append a new offset to the newline list. Returns true if the reallocation of
+ * the offsets succeeds (if one was necessary), otherwise returns false.
+ *
+ * @param list The list to append to.
+ * @param cursor A pointer to the offset to append.
+ * @return True if the reallocation of the offsets succeeds (if one was
+ *     necessary), otherwise false.
+ */
 bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
 
-// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+/**
+ * Conditionally append a new offset to the newline list, if the value passed in
+ * is a newline.
+ *
+ * @param list The list to append to.
+ * @param cursor A pointer to the offset to append.
+ * @return True if the reallocation of the offsets succeeds (if one was
+ *     necessary), otherwise false.
+ */
 bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor);
 
-// Returns the line and column of the given offset. If the offset is not in the
-// list, the line and column of the closest offset less than the given offset
-// are returned.
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ *
+ * @param list The list to search.
+ * @param cursor A pointer to the offset to search for.
+ * @return The line and column of the given offset.
+ */
 pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor);
 
-// Free the internal memory allocated for the newline list.
+/**
+ * Free the internal memory allocated for the newline list.
+ *
+ * @param list The list to free.
+ */
 void pm_newline_list_free(pm_newline_list_t *list);
 
 #endif
diff --git a/include/prism/util/pm_strpbrk.h b/include/prism/util/pm_strpbrk.h
@@ -7,23 +7,32 @@
 #include <stddef.h>
 #include <string.h>
 
-// Here we have rolled our own version of strpbrk. The standard library strpbrk
-// has undefined behavior when the source string is not null-terminated. We want
-// to support strings that are not null-terminated because pm_parse does not
-// have the contract that the string is null-terminated. (This is desirable
-// because it means the extension can call pm_parse with the result of a call to
-// mmap).
-//
-// The standard library strpbrk also does not support passing a maximum length
-// to search. We want to support this for the reason mentioned above, but we
-// also don't want it to stop on null bytes. Ruby actually allows null bytes
-// within strings, comments, regular expressions, etc. So we need to be able to
-// skip past them.
-//
-// Finally, we want to support encodings wherein the charset could contain
-// characters that are trailing bytes of multi-byte characters. For example, in
-// Shift-JIS, the backslash character can be a trailing byte. In that case we
-// need to take a slower path and iterate one multi-byte character at a time.
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift-JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ *
+ * @param parser The parser.
+ * @param source The source string.
+ * @param charset The charset to search for.
+ * @param length The maximum length to search.
+ * @return A pointer to the first character in the source string that is in the
+ *     charset, or NULL if no such character exists.
+ */
 const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
 
 #endif
diff --git a/src/prism.c b/src/prism.c
@@ -15691,7 +15691,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
         .lex_callback = NULL,
         .filepath_string = filepath_string,
         .constant_pool = PM_CONSTANT_POOL_EMPTY,
-        .newline_list = PM_NEWLINE_LIST_EMPTY,
+        .newline_list = { 0 },
         .integer_base = 0,
         .current_string = PM_STRING_EMPTY,
         .command_start = true,

diff --git a/src/util/pm_newline_list.c b/src/util/pm_newline_list.c
@@ -1,7 +1,9 @@
 #include "prism/util/pm_newline_list.h"
 
-// Initialize a new newline list with the given capacity. Returns true if the
-// allocation of the offsets succeeds, otherwise returns false.
+/**
+ * Initialize a new newline list with the given capacity. Returns true if the
+ * allocation of the offsets succeeds, otherwise returns false.
+ */
 bool
 pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
     list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
@@ -14,14 +16,13 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac
     list->size = 1;
     list->capacity = capacity;
 
-    list->last_index = 0;
-    list->last_offset = 0;
-
     return true;
 }
 
-// Append a new offset to the newline list. Returns true if the reallocation of
-// the offsets succeeds (if one was necessary), otherwise returns false.
+/**
+ * Append a new offset to the newline list. Returns true if the reallocation of
+ * the offsets succeeds (if one was necessary), otherwise returns false.
+ */
 bool
 pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
     if (list->size == list->capacity) {
@@ -44,7 +45,10 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
     return true;
 }
 
-// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+/**
+ * Conditionally append a new offset to the newline list, if the value passed in
+ * is a newline.
+ */
 bool
 pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
     if (*cursor != '\n') {
@@ -53,9 +57,11 @@ pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
     return pm_newline_list_append(list, cursor);
 }
 
-// Returns the line and column of the given offset. If the offset is not in the
-// list, the line and column of the closest offset less than the given offset
-// are returned.
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ */
 pm_line_column_t
 pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor) {
     assert(cursor >= list->start);
@@ -81,7 +87,9 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
     return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
 }
 
-// Free the internal memory allocated for the newline list.
+/**
+ * Free the internal memory allocated for the newline list.
+ */
 void
 pm_newline_list_free(pm_newline_list_t *list) {
     free(list->offsets);

diff --git a/src/util/pm_strpbrk.c b/src/util/pm_strpbrk.c
@@ -1,6 +1,8 @@
 #include "prism/util/pm_strpbrk.h"
 
-// This is the slow path that does care about the encoding.
+/**
+ * This is the slow path that does care about the encoding.
+ */
 static inline const uint8_t *
 pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
     size_t index = 0;
@@ -21,7 +23,9 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
     return NULL;
 }
 
-// This is the fast path that does not care about the encoding.
+/**
+ * This is the fast path that does not care about the encoding.
+ */
 static inline const uint8_t *
 pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
     size_t index = 0;
@@ -37,23 +41,25 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
     return NULL;
 }
 
-// Here we have rolled our own version of strpbrk. The standard library strpbrk
-// has undefined behavior when the source string is not null-terminated. We want
-// to support strings that are not null-terminated because pm_parse does not
-// have the contract that the string is null-terminated. (This is desirable
-// because it means the extension can call pm_parse with the result of a call to
-// mmap).
-//
-// The standard library strpbrk also does not support passing a maximum length
-// to search. We want to support this for the reason mentioned above, but we
-// also don't want it to stop on null bytes. Ruby actually allows null bytes
-// within strings, comments, regular expressions, etc. So we need to be able to
-// skip past them.
-//
-// Finally, we want to support encodings wherein the charset could contain
-// characters that are trailing bytes of multi-byte characters. For example, in
-// Shift-JIS, the backslash character can be a trailing byte. In that case we
-// need to take a slower path and iterate one multi-byte character at a time.
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift-JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ */
 const uint8_t *
 pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
     if (length <= 0) {