Skip to content

Commit

Permalink
Documentation for diagnostics and regexp
Browse files Browse the repository at this point in the history
  • Loading branch information
kddnewton committed Nov 1, 2023
1 parent 76af64e commit 16e0579
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 152 deletions.
21 changes: 19 additions & 2 deletions include/prism/diagnostic.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ typedef struct {
const char *message;
} pm_diagnostic_t;

/**
* The diagnostic IDs of all of the diagnostics, used to communicate the types
* of errors between the parser and the user.
*/
typedef enum {
PM_ERR_ALIAS_ARGUMENT,
PM_ERR_AMPAMPEQ_MULTI_ASSIGN,
Expand Down Expand Up @@ -223,14 +227,27 @@ typedef enum {
PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS,
PM_WARN_AMBIGUOUS_PREFIX_STAR,
PM_WARN_AMBIGUOUS_SLASH,

/* This must be the last member. */
PM_DIAGNOSTIC_ID_LEN,
} pm_diagnostic_id_t;

// Append a diagnostic to the given list of diagnostics.
/**
* Append a diagnostic to the given list of diagnostics.
*
* @param list The list to append to.
* @param start The start of the diagnostic.
* @param end The end of the diagnostic.
* @param diag_id The diagnostic ID.
* @return Whether the diagnostic was successfully appended.
*/
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id);

// Deallocate the internal state of the given diagnostic list.
/**
* Deallocate the internal state of the given diagnostic list.
*
* @param list The list to deallocate.
*/
void pm_diagnostic_list_free(pm_list_t *list);

#endif
13 changes: 11 additions & 2 deletions include/prism/regexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,17 @@
#include <stddef.h>
#include <string.h>

// Parse a regular expression and extract the names of all of the named capture
// groups.
/**
* Parse a regular expression and extract the names of all of the named capture
* groups.
*
* @param source The source code to parse.
* @param size The size of the source code.
* @param named_captures The list to add the names of the named capture groups.
* @param encoding_changed Whether or not the encoding changed from the default.
* @param encoding The encoding of the source code.
* @return Whether or not the parsing was successful.
*/
PRISM_EXPORTED_FUNCTION bool pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding);

#endif
109 changes: 56 additions & 53 deletions src/diagnostic.c
Original file line number Diff line number Diff line change
@@ -1,56 +1,55 @@
#include "prism/diagnostic.h"

/*
## Message composition
When composing an error message, use sentence fragments.
Try describing the property of the code that caused the error, rather than the rule that is being
violated. It may help to use a fragment that completes a sentence beginning, "The parser
encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
context) after a semicolon.
For example:, instead of "Control escape sequence cannot be doubled", prefer:
> "Invalid control escape sequence; control cannot be repeated"
In some cases, where the failure is more general or syntax expectations are violated, it may make
more sense to use a fragment that completes a sentence beginning, "The parser ...".
For example:
> "Expected an expression after `(`"
> "Cannot parse the expression"
## Message style guide
- Use articles like "a", "an", and "the" when appropriate.
- e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
- Use the common name for tokens and nodes.
- e.g., prefer "keyword splat" to "assoc splat"
- e.g., prefer "embedded document" to "embdoc"
- Capitalize the initial word of the message.
- Use back ticks around token literals
- e.g., "Expected a `=>` between the hash key and value"
- Do not use `.` or other punctuation at the end of the message.
- Do not use contractions like "can't". Prefer "cannot" to "can not".
- For tokens that can have multiple meanings, reference the token and its meaning.
- e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
## Error names (PM_ERR_*)
- When appropriate, prefer node name to token name.
- e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
- Prefer token name to common name.
- e.g., prefer "STAR" to "ASTERISK".
- Try to order the words in the name from more general to more specific,
- e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
- When in doubt, look for similar patterns and name them so that they are grouped when lexically
sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
*/

/**
* ## Message composition
*
* When composing an error message, use sentence fragments.
*
* Try describing the property of the code that caused the error, rather than the rule that is being
* violated. It may help to use a fragment that completes a sentence beginning, "The parser
* encountered (a) ...". If appropriate, add a description of the rule violation (or other helpful
* context) after a semicolon.
*
* For example:, instead of "Control escape sequence cannot be doubled", prefer:
*
* > "Invalid control escape sequence; control cannot be repeated"
*
* In some cases, where the failure is more general or syntax expectations are violated, it may make
* more sense to use a fragment that completes a sentence beginning, "The parser ...".
*
* For example:
*
* > "Expected an expression after `(`"
* > "Cannot parse the expression"
*
*
* ## Message style guide
*
* - Use articles like "a", "an", and "the" when appropriate.
* - e.g., prefer "Cannot parse the expression" to "Cannot parse expression".
* - Use the common name for tokens and nodes.
* - e.g., prefer "keyword splat" to "assoc splat"
* - e.g., prefer "embedded document" to "embdoc"
* - Capitalize the initial word of the message.
* - Use back ticks around token literals
* - e.g., "Expected a `=>` between the hash key and value"
* - Do not use `.` or other punctuation at the end of the message.
* - Do not use contractions like "can't". Prefer "cannot" to "can not".
* - For tokens that can have multiple meanings, reference the token and its meaning.
* - e.g., "`*` splat argument" is clearer and more complete than "splat argument" or "`*` argument"
*
*
* ## Error names (PM_ERR_*)
*
* - When appropriate, prefer node name to token name.
* - e.g., prefer "SPLAT" to "STAR" in the context of argument parsing.
* - Prefer token name to common name.
* - e.g., prefer "STAR" to "ASTERISK".
* - Try to order the words in the name from more general to more specific,
* - e.g., "INVALID_NUMBER_DECIMAL" is better than "DECIMAL_INVALID_NUMBER".
* - When in doubt, look for similar patterns and name them so that they are grouped when lexically
* sorted. See PM_ERR_ARGUMENT_NO_FORWARDING_* for an example.
*/
static const char* const diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
[PM_ERR_ALIAS_ARGUMENT] = "Invalid argument being passed to `alias`; expected a bare word, symbol, constant, or global variable",
[PM_ERR_AMPAMPEQ_MULTI_ASSIGN] = "Unexpected `&&=` in a multiple assignment",
Expand Down Expand Up @@ -263,7 +262,9 @@ pm_diagnostic_message(pm_diagnostic_id_t diag_id) {
return message;
}

// Append an error to the given list of diagnostic.
/**
* Append an error to the given list of diagnostic.
*/
bool
pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) calloc(sizeof(pm_diagnostic_t), 1);
Expand All @@ -274,7 +275,9 @@ pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *
return true;
}

// Deallocate the internal state of the given diagnostic list.
/**
* Deallocate the internal state of the given diagnostic list.
*/
void
pm_diagnostic_list_free(pm_list_t *list) {
pm_list_node_t *node, *next;
Expand Down
Loading

0 comments on commit 16e0579

Please sign in to comment.