Documentation for diagnostics and regexp

ruby · Nov 1, 2023 · 16e0579 · 16e0579
1 parent 76af64e
commit 16e0579
Show file tree

Hide file tree

Showing 4 changed files with 225 additions and 152 deletions.
diff --git a/include/prism/diagnostic.h b/include/prism/diagnostic.h
@@ -20,6 +20,10 @@ typedef struct {
     const char *message;
 } pm_diagnostic_t;
 
+/**
+ * The diagnostic IDs of all of the diagnostics, used to communicate the types
+ * of errors between the parser and the user.
+ */
 typedef enum {
     PM_ERR_ALIAS_ARGUMENT,
     PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
@@ -223,14 +227,27 @@ typedef enum {
     PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
     PM_WARN_AMBIGUOUS_PREFIX_STAR,
     PM_WARN_AMBIGUOUS_SLASH,
+
     /* This must be the last member. */
     PM_DIAGNOSTIC_ID_LEN,
 } pm_diagnostic_id_t;
 
-// Append a diagnostic to the given list of diagnostics.
+/**
+ * Append a diagnostic to the given list of diagnostics.
+ *
+ * @param list The list to append to.
+ * @param start The start of the diagnostic.
+ * @param end The end of the diagnostic.
+ * @param diag_id The diagnostic ID.
+ * @return Whether the diagnostic was successfully appended.
+ */
 bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);
 
-// Deallocate the internal state of the given diagnostic list.
+/**
+ * Deallocate the internal state of the given diagnostic list.
+ *
+ * @param list The list to deallocate.
+ */
 void pm_diagnostic_list_free(pm_list_t *list);
 
 #endif
diff --git a/include/prism/regexp.h b/include/prism/regexp.h
@@ -12,8 +12,17 @@
 #include <stddef.h>
 #include <string.h>
 
-// Parse a regular expression and extract the names of all of the named capture
-// groups.
+/**
+ * Parse a regular expression and extract the names of all of the named capture
+ * groups.
+ *
+ * @param source The source code to parse.
+ * @param size The size of the source code.
+ * @param named_captures The list to add the names of the named capture groups.
+ * @param encoding_changed Whether or not the encoding changed from the default.
+ * @param encoding The encoding of the source code.
+ * @return Whether or not the parsing was successful.
+ */
 PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);
 
 #endif
diff --git a/src/diagnostic.c b/src/diagnostic.c
@@ -1,56 +1,55 @@
 #include "prism/diagnostic.h"
 
-/*
-  ## Message composition
-
-  When composing an error message, use sentence fragments.
-
-  Try describing the property of the code that caused the error, rather than the rule that is being
-  violated. It may help to use a fragment that completes a sentence beginning, "The parser
-  encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
-  context) after a semicolon.
-
-  For example:, instead of "Control escape sequence cannot be doubled", prefer:
-
-  > "Invalid control escape sequence; control cannot be repeated"
-
-  In some cases, where the failure is more general or syntax expectations are violated, it may make
-  more sense to use a fragment that completes a sentence beginning, "The parser ...".
-
-  For example:
-
-  > "Expected an expression after `(`"
-  > "Cannot parse the expression"
-
-
-  ## Message style guide
-
-  - Use articles like "a", "an", and "the" when appropriate.
-    - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
-  - Use the common name for tokens and nodes.
-    - e.g., prefer "keyword splat" to "assoc splat"
-    - e.g., prefer "embedded document" to "embdoc"
-  - Capitalize the initial word of the message.
-  - Use back ticks around token literals
-    - e.g., "Expected a `=>` between the hash key and value"
-  - Do not use `.` or other punctuation at the end of the message.
-  - Do not use contractions like "can't". Prefer "cannot" to "can not".
-  - For tokens that can have multiple meanings, reference the token and its meaning.
-    - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
-
-
-  ## Error names (PM_ERR_*)
-
-  - When appropriate, prefer node name to token name.
-    - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
-  - Prefer token name to common name.
-    - e.g., prefer "STAR" to "ASTERISK".
-  - Try to order the words in the name from more general to more specific,
-    - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
-    - When in doubt, look for similar patterns and name them so that they are grouped when lexically
-      sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
-*/
-
+/**
+ * ## Message composition
+ *
+ * When composing an error message, use sentence fragments.
+ *
+ * Try describing the property of the code that caused the error, rather than the rule that is being
+ * violated. It may help to use a fragment that completes a sentence beginning, "The parser
+ * encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
+ * context) after a semicolon.
+ *
+ * For example:, instead of "Control escape sequence cannot be doubled", prefer:
+ *
+ * > "Invalid control escape sequence; control cannot be repeated"
+ *
+ * In some cases, where the failure is more general or syntax expectations are violated, it may make
+ * more sense to use a fragment that completes a sentence beginning, "The parser ...".
+ *
+ * For example:
+ *
+ * > "Expected an expression after `(`"
+ * > "Cannot parse the expression"
+ *
+ *
+ * ## Message style guide
+ *
+ * - Use articles like "a", "an", and "the" when appropriate.
+ *   - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
+ * - Use the common name for tokens and nodes.
+ *   - e.g., prefer "keyword splat" to "assoc splat"
+ *   - e.g., prefer "embedded document" to "embdoc"
+ * - Capitalize the initial word of the message.
+ * - Use back ticks around token literals
+ *   - e.g., "Expected a `=>` between the hash key and value"
+ * - Do not use `.` or other punctuation at the end of the message.
+ * - Do not use contractions like "can't". Prefer "cannot" to "can not".
+ * - For tokens that can have multiple meanings, reference the token and its meaning.
+ *   - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
+ *
+ *
+ * ## Error names (PM_ERR_*)
+ *
+ * - When appropriate, prefer node name to token name.
+ *   - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
+ * - Prefer token name to common name.
+ *   - e.g., prefer "STAR" to "ASTERISK".
+ * - Try to order the words in the name from more general to more specific,
+ *   - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
+ *   - When in doubt, look for similar patterns and name them so that they are grouped when lexically
+ *     sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
+ */
 static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
     [PM_ERR_ALIAS_ARGUMENT]                     = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
     [PM_ERR_AMPAMPEQ_MULTI_ASSIGN]              = "Unexpected `&&=` in a multiple assignment",
@@ -263,7 +262,9 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
     return message;
 }
 
-// Append an error to the given list of diagnostic.
+/**
+ * Append an error to the given list of diagnostic.
+ */
 bool
 pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
     pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
@@ -274,7 +275,9 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *
     return true;
 }
 
-// Deallocate the internal state of the given diagnostic list.
+/**
+ * Deallocate the internal state of the given diagnostic list.
+ */
 void
 pm_diagnostic_list_free(pm_list_t *list) {
     pm_list_node_t *node, *next;