diff --git a/include/prism/enc/pm_encoding.h b/include/prism/enc/pm_encoding.h index 232bc97dd4c..28b9f02281b 100644 --- a/include/prism/enc/pm_encoding.h +++ b/include/prism/enc/pm_encoding.h @@ -8,36 +8,50 @@ #include #include -// This struct defines the functions necessary to implement the encoding -// interface so we can determine how many bytes the subsequent character takes. -// Each callback should return the number of bytes, or 0 if the next bytes are -// invalid for the encoding and type. +/** + * This struct defines the functions necessary to implement the encoding + * interface so we can determine how many bytes the subsequent character takes. + * Each callback should return the number of bytes, or 0 if the next bytes are + * invalid for the encoding and type. + */ typedef struct { - // Return the number of bytes that the next character takes if it is valid - // in the encoding. Does not read more than n bytes. It is assumed that n is - // at least 1. + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding. Does not read more than n bytes. It is assumed that n is + * at least 1. + */ size_t (*char_width)(const uint8_t *b, ptrdiff_t n); - // Return the number of bytes that the next character takes if it is valid - // in the encoding and is alphabetical. Does not read more than n bytes. It - // is assumed that n is at least 1. + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding and is alphabetical. Does not read more than n bytes. It + * is assumed that n is at least 1. + */ size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n); - // Return the number of bytes that the next character takes if it is valid - // in the encoding and is alphanumeric. Does not read more than n bytes. It - // is assumed that n is at least 1. + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding and is alphanumeric. Does not read more than n bytes. It + * is assumed that n is at least 1. + */ size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n); - // Return true if the next character is valid in the encoding and is an - // uppercase character. Does not read more than n bytes. It is assumed that - // n is at least 1. + /** + * Return true if the next character is valid in the encoding and is an + * uppercase character. Does not read more than n bytes. It is assumed that + * n is at least 1. + */ bool (*isupper_char)(const uint8_t *b, ptrdiff_t n); - // The name of the encoding. This should correspond to a value that can be - // passed to Encoding.find in Ruby. + /** + * The name of the encoding. This should correspond to a value that can be + * passed to Encoding.find in Ruby. + */ const char *name; - // Return true if the encoding is a multibyte encoding. + /** + * Return true if the encoding is a multibyte encoding. + */ bool multibyte; } pm_encoding_t; @@ -47,50 +61,109 @@ typedef struct { #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1 #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2 -// These functions are reused by some other encodings, so they are defined here -// so they can be shared. +/** + * Return the size of the next character in the ASCII encoding if it is an + * alphabetical character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ size_t pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n); + +/** + * Return the size of the next character in the ASCII encoding if it is an + * alphanumeric character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ size_t pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n); + +/** + * Return true if the next character in the ASCII encoding if it is an uppercase + * character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns True if the next character is valid in the encoding and is an + * uppercase character, or false if it is not. + */ bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n); -// These functions are shared between the actual encoding and the fast path in -// the parser so they need to be internally visible. +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphabetical character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n); + +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphanumeric character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n); + +/** + * Return true if the next character in the UTF-8 encoding if it is an uppercase + * character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns True if the next character is valid in the encoding and is an + * uppercase character, or false if it is not. + */ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n); -// This lookup table is referenced in both the UTF-8 encoding file and the -// parser directly in order to speed up the default encoding processing. +/** + * This lookup table is referenced in both the UTF-8 encoding file and the + * parser directly in order to speed up the default encoding processing. It is + * used to indicate whether a character is alphabetical, alphanumeric, or + * uppercase in unicode mappings. + */ extern const uint8_t pm_encoding_unicode_table[256]; -// These are the encodings that are supported by the parser. They are defined in +// Below are the encodings that are supported by the parser. They are defined in // their own files in the src/enc directory. -extern pm_encoding_t pm_encoding_ascii; -extern pm_encoding_t pm_encoding_ascii_8bit; -extern pm_encoding_t pm_encoding_big5; -extern pm_encoding_t pm_encoding_euc_jp; -extern pm_encoding_t pm_encoding_gbk; -extern pm_encoding_t pm_encoding_iso_8859_1; -extern pm_encoding_t pm_encoding_iso_8859_2; -extern pm_encoding_t pm_encoding_iso_8859_3; -extern pm_encoding_t pm_encoding_iso_8859_4; -extern pm_encoding_t pm_encoding_iso_8859_5; -extern pm_encoding_t pm_encoding_iso_8859_6; -extern pm_encoding_t pm_encoding_iso_8859_7; -extern pm_encoding_t pm_encoding_iso_8859_8; -extern pm_encoding_t pm_encoding_iso_8859_9; -extern pm_encoding_t pm_encoding_iso_8859_10; -extern pm_encoding_t pm_encoding_iso_8859_11; -extern pm_encoding_t pm_encoding_iso_8859_13; -extern pm_encoding_t pm_encoding_iso_8859_14; -extern pm_encoding_t pm_encoding_iso_8859_15; -extern pm_encoding_t pm_encoding_iso_8859_16; -extern pm_encoding_t pm_encoding_koi8_r; -extern pm_encoding_t pm_encoding_shift_jis; -extern pm_encoding_t pm_encoding_utf_8; -extern pm_encoding_t pm_encoding_utf8_mac; -extern pm_encoding_t pm_encoding_windows_31j; -extern pm_encoding_t pm_encoding_windows_1251; -extern pm_encoding_t pm_encoding_windows_1252; + +const extern pm_encoding_t pm_encoding_ascii; +const extern pm_encoding_t pm_encoding_ascii_8bit; +const extern pm_encoding_t pm_encoding_big5; +const extern pm_encoding_t pm_encoding_euc_jp; +const extern pm_encoding_t pm_encoding_gbk; +const extern pm_encoding_t pm_encoding_iso_8859_1; +const extern pm_encoding_t pm_encoding_iso_8859_2; +const extern pm_encoding_t pm_encoding_iso_8859_3; +const extern pm_encoding_t pm_encoding_iso_8859_4; +const extern pm_encoding_t pm_encoding_iso_8859_5; +const extern pm_encoding_t pm_encoding_iso_8859_6; +const extern pm_encoding_t pm_encoding_iso_8859_7; +const extern pm_encoding_t pm_encoding_iso_8859_8; +const extern pm_encoding_t pm_encoding_iso_8859_9; +const extern pm_encoding_t pm_encoding_iso_8859_10; +const extern pm_encoding_t pm_encoding_iso_8859_11; +const extern pm_encoding_t pm_encoding_iso_8859_13; +const extern pm_encoding_t pm_encoding_iso_8859_14; +const extern pm_encoding_t pm_encoding_iso_8859_15; +const extern pm_encoding_t pm_encoding_iso_8859_16; +const extern pm_encoding_t pm_encoding_koi8_r; +const extern pm_encoding_t pm_encoding_shift_jis; +const extern pm_encoding_t pm_encoding_utf_8; +const extern pm_encoding_t pm_encoding_utf8_mac; +const extern pm_encoding_t pm_encoding_windows_31j; +const extern pm_encoding_t pm_encoding_windows_1251; +const extern pm_encoding_t pm_encoding_windows_1252; #endif diff --git a/src/enc/pm_big5.c b/src/enc/pm_big5.c index deaa3afb3fb..6d4fefcf2b7 100644 --- a/src/enc/pm_big5.c +++ b/src/enc/pm_big5.c @@ -42,7 +42,8 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) { } } -pm_encoding_t pm_encoding_big5 = { +/** Big5 encoding */ +const pm_encoding_t pm_encoding_big5 = { .name = "big5", .char_width = pm_encoding_big5_char_width, .alnum_char = pm_encoding_big5_alnum_char, diff --git a/src/enc/pm_euc_jp.c b/src/enc/pm_euc_jp.c index 13d36624559..cd0b1a79102 100644 --- a/src/enc/pm_euc_jp.c +++ b/src/enc/pm_euc_jp.c @@ -48,7 +48,8 @@ pm_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) { } } -pm_encoding_t pm_encoding_euc_jp = { +/** EUC-JP encoding */ +const pm_encoding_t pm_encoding_euc_jp = { .name = "euc-jp", .char_width = pm_encoding_euc_jp_char_width, .alnum_char = pm_encoding_euc_jp_alnum_char, diff --git a/src/enc/pm_gbk.c b/src/enc/pm_gbk.c index 2fc67b47a4d..3dcf41fb996 100644 --- a/src/enc/pm_gbk.c +++ b/src/enc/pm_gbk.c @@ -51,7 +51,8 @@ pm_encoding_gbk_isupper_char(const uint8_t *b, ptrdiff_t n) { } } -pm_encoding_t pm_encoding_gbk = { +/** GBK encoding */ +const pm_encoding_t pm_encoding_gbk = { .name = "gbk", .char_width = pm_encoding_gbk_char_width, .alnum_char = pm_encoding_gbk_alnum_char, diff --git a/src/enc/pm_shift_jis.c b/src/enc/pm_shift_jis.c index 3c93937efc3..ecc3d51b87b 100644 --- a/src/enc/pm_shift_jis.c +++ b/src/enc/pm_shift_jis.c @@ -46,7 +46,8 @@ pm_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) { } } -pm_encoding_t pm_encoding_shift_jis = { +/** Shift_JIS encoding */ +const pm_encoding_t pm_encoding_shift_jis = { .name = "shift_jis", .char_width = pm_encoding_shift_jis_char_width, .alnum_char = pm_encoding_shift_jis_alnum_char, diff --git a/src/enc/pm_tables.c b/src/enc/pm_tables.c index c6bb4dce651..c2133649a4e 100644 --- a/src/enc/pm_tables.c +++ b/src/enc/pm_tables.c @@ -1,7 +1,9 @@ #include "prism/enc/pm_encoding.h" -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ASCII character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ASCII character. + */ static uint8_t pm_encoding_ascii_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -22,8 +24,10 @@ static uint8_t pm_encoding_ascii_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-1 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-1 character. + */ static uint8_t pm_encoding_iso_8859_1_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -44,8 +48,10 @@ static uint8_t pm_encoding_iso_8859_1_table[256] = { 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-2 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-2 character. + */ static uint8_t pm_encoding_iso_8859_2_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -66,8 +72,10 @@ static uint8_t pm_encoding_iso_8859_2_table[256] = { 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-3 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-3 character. + */ static uint8_t pm_encoding_iso_8859_3_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -88,8 +96,10 @@ static uint8_t pm_encoding_iso_8859_3_table[256] = { 0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-4 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-4 character. + */ static uint8_t pm_encoding_iso_8859_4_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -110,8 +120,10 @@ static uint8_t pm_encoding_iso_8859_4_table[256] = { 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-5 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-5 character. + */ static uint8_t pm_encoding_iso_8859_5_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -132,8 +144,10 @@ static uint8_t pm_encoding_iso_8859_5_table[256] = { 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-6 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-6 character. + */ static uint8_t pm_encoding_iso_8859_6_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -154,8 +168,10 @@ static uint8_t pm_encoding_iso_8859_6_table[256] = { 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-7 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-7 character. + */ static uint8_t pm_encoding_iso_8859_7_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -176,8 +192,10 @@ static uint8_t pm_encoding_iso_8859_7_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-8 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-8 character. + */ static uint8_t pm_encoding_iso_8859_8_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -198,8 +216,10 @@ static uint8_t pm_encoding_iso_8859_8_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-9 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-9 character. + */ static uint8_t pm_encoding_iso_8859_9_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -220,8 +240,10 @@ static uint8_t pm_encoding_iso_8859_9_table[256] = { 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-10 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-10 character. + */ static uint8_t pm_encoding_iso_8859_10_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -242,8 +264,10 @@ static uint8_t pm_encoding_iso_8859_10_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-11 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-11 character. + */ static uint8_t pm_encoding_iso_8859_11_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -264,8 +288,10 @@ static uint8_t pm_encoding_iso_8859_11_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-13 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-13 character. + */ static uint8_t pm_encoding_iso_8859_13_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -286,8 +312,10 @@ static uint8_t pm_encoding_iso_8859_13_table[256] = { 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 0, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-14 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-14 character. + */ static uint8_t pm_encoding_iso_8859_14_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -308,8 +336,10 @@ static uint8_t pm_encoding_iso_8859_14_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-15 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-15 character. + */ static uint8_t pm_encoding_iso_8859_15_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -330,8 +360,10 @@ static uint8_t pm_encoding_iso_8859_15_table[256] = { 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding ISO-8859-16 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-16 character. + */ static uint8_t pm_encoding_iso_8859_16_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -352,8 +384,10 @@ static uint8_t pm_encoding_iso_8859_16_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding KOI8-R character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding KOI8-R character. + */ static uint8_t pm_encoding_koi8_r_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -374,8 +408,10 @@ static uint8_t pm_encoding_koi8_r_table[256] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding windows-1251 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1251 character. + */ static uint8_t pm_encoding_windows_1251_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -396,8 +432,10 @@ static uint8_t pm_encoding_windows_1251_table[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // Fx }; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding windows-1252 character. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1252 character. + */ static uint8_t pm_encoding_windows_1252_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -418,37 +456,94 @@ static uint8_t pm_encoding_windows_1252_table[256] = { 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, // Fx }; +/** + * Returns the size of the next character in the ASCII encoding. This basically + * means that if the top bit is not set, the character is 1 byte long. + */ static size_t pm_encoding_ascii_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { return *b < 0x80 ? 1 : 0; } +/** + * Return the size of the next character in the ASCII encoding if it is an + * alphabetical character. + */ size_t pm_encoding_ascii_alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); } +/** + * Return the size of the next character in the ASCII encoding if it is an + * alphanumeric character. + */ size_t pm_encoding_ascii_alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; } +/** + * Return true if the next character in the ASCII encoding if it is an uppercase + * character. + */ bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { return (pm_encoding_ascii_table[*b] & PRISM_ENCODING_UPPERCASE_BIT); } +/** + * For a lot of encodings the default is that they are a single byte long no + * matter what the codepoint, so this function is shared between them. + */ static size_t -pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { - return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0; +pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { + return 1; } +/** + * Returns the size of the next character in the KOI-8 encoding. This means + * checking if it's a valid codepoint in KOI-8 and if it is returning 1. + */ static size_t -pm_encoding_single_char_width(PRISM_ATTRIBUTE_UNUSED const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { - return 1; +pm_encoding_koi8_r_char_width(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { + return ((*b >= 0x20 && *b <= 0x7E) || (*b >= 0x80)) ? 1 : 0; } -pm_encoding_t pm_encoding_ascii = { +#define PRISM_ENCODING_TABLE(name) \ + static size_t pm_encoding_ ##name ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \ + return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \ + } \ + static size_t pm_encoding_ ##name ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \ + return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \ + } \ + static bool pm_encoding_ ##name ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \ + return (pm_encoding_ ##name ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \ + } + +PRISM_ENCODING_TABLE(iso_8859_1) +PRISM_ENCODING_TABLE(iso_8859_2) +PRISM_ENCODING_TABLE(iso_8859_3) +PRISM_ENCODING_TABLE(iso_8859_4) +PRISM_ENCODING_TABLE(iso_8859_5) +PRISM_ENCODING_TABLE(iso_8859_6) +PRISM_ENCODING_TABLE(iso_8859_7) +PRISM_ENCODING_TABLE(iso_8859_8) +PRISM_ENCODING_TABLE(iso_8859_9) +PRISM_ENCODING_TABLE(iso_8859_10) +PRISM_ENCODING_TABLE(iso_8859_11) +PRISM_ENCODING_TABLE(iso_8859_13) +PRISM_ENCODING_TABLE(iso_8859_14) +PRISM_ENCODING_TABLE(iso_8859_15) +PRISM_ENCODING_TABLE(iso_8859_16) +PRISM_ENCODING_TABLE(koi8_r) +PRISM_ENCODING_TABLE(windows_1251) +PRISM_ENCODING_TABLE(windows_1252) + +#undef PRISM_ENCODING_TABLE + +/** ASCII encoding */ +const pm_encoding_t pm_encoding_ascii = { .name = "ascii", .char_width = pm_encoding_ascii_char_width, .alnum_char = pm_encoding_ascii_alnum_char, @@ -457,7 +552,8 @@ pm_encoding_t pm_encoding_ascii = { .multibyte = false }; -pm_encoding_t pm_encoding_ascii_8bit = { +/** ASCII-8BIT encoding */ +const pm_encoding_t pm_encoding_ascii_8bit = { .name = "ascii-8bit", .char_width = pm_encoding_single_char_width, .alnum_char = pm_encoding_ascii_alnum_char, @@ -466,42 +562,182 @@ pm_encoding_t pm_encoding_ascii_8bit = { .multibyte = false }; -#define PRISM_ENCODING_TABLE(s, i, w) \ - static size_t pm_encoding_ ##i ## _alpha_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \ - return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHABETIC_BIT); \ - } \ - static size_t pm_encoding_ ##i ## _alnum_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \ - return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \ - } \ - static bool pm_encoding_ ##i ## _isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptrdiff_t n) { \ - return (pm_encoding_ ##i ## _table[*b] & PRISM_ENCODING_UPPERCASE_BIT); \ - } \ - pm_encoding_t pm_encoding_ ##i = { \ - .name = s, \ - .char_width = w, \ - .alnum_char = pm_encoding_ ##i ## _alnum_char, \ - .alpha_char = pm_encoding_ ##i ## _alpha_char, \ - .isupper_char = pm_encoding_ ##i ## _isupper_char, \ - .multibyte = false, \ - }; - -PRISM_ENCODING_TABLE("iso-8859-1", iso_8859_1, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-2", iso_8859_2, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-3", iso_8859_3, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-4", iso_8859_4, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-5", iso_8859_5, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-6", iso_8859_6, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-7", iso_8859_7, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-8", iso_8859_8, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-9", iso_8859_9, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-10", iso_8859_10, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-11", iso_8859_11, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-13", iso_8859_13, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-14", iso_8859_14, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-15", iso_8859_15, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("iso-8859-16", iso_8859_16, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("koi8-r", koi8_r, pm_encoding_koi8_r_char_width) -PRISM_ENCODING_TABLE("windows-1251", windows_1251, pm_encoding_single_char_width) -PRISM_ENCODING_TABLE("windows-1252", windows_1252, pm_encoding_single_char_width) +/** ISO-8859-1 */ +const pm_encoding_t pm_encoding_iso_8859_1 = { + .name = "iso-8859-1", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_1_alnum_char, + .alpha_char = pm_encoding_iso_8859_1_alpha_char, + .isupper_char = pm_encoding_iso_8859_1_isupper_char, + .multibyte = false +}; -#undef PRISM_ENCODING_TABLE +/** ISO-8859-2 */ +const pm_encoding_t pm_encoding_iso_8859_2 = { + .name = "iso-8859-2", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_2_alnum_char, + .alpha_char = pm_encoding_iso_8859_2_alpha_char, + .isupper_char = pm_encoding_iso_8859_2_isupper_char, + .multibyte = false +}; + +/** ISO-8859-3 */ +const pm_encoding_t pm_encoding_iso_8859_3 = { + .name = "iso-8859-3", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_3_alnum_char, + .alpha_char = pm_encoding_iso_8859_3_alpha_char, + .isupper_char = pm_encoding_iso_8859_3_isupper_char, + .multibyte = false +}; + +/** ISO-8859-4 */ +const pm_encoding_t pm_encoding_iso_8859_4 = { + .name = "iso-8859-4", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_4_alnum_char, + .alpha_char = pm_encoding_iso_8859_4_alpha_char, + .isupper_char = pm_encoding_iso_8859_4_isupper_char, + .multibyte = false +}; + +/** ISO-8859-5 */ +const pm_encoding_t pm_encoding_iso_8859_5 = { + .name = "iso-8859-5", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_5_alnum_char, + .alpha_char = pm_encoding_iso_8859_5_alpha_char, + .isupper_char = pm_encoding_iso_8859_5_isupper_char, + .multibyte = false +}; + +/** ISO-8859-6 */ +const pm_encoding_t pm_encoding_iso_8859_6 = { + .name = "iso-8859-6", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_6_alnum_char, + .alpha_char = pm_encoding_iso_8859_6_alpha_char, + .isupper_char = pm_encoding_iso_8859_6_isupper_char, + .multibyte = false +}; + +/** ISO-8859-7 */ +const pm_encoding_t pm_encoding_iso_8859_7 = { + .name = "iso-8859-7", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_7_alnum_char, + .alpha_char = pm_encoding_iso_8859_7_alpha_char, + .isupper_char = pm_encoding_iso_8859_7_isupper_char, + .multibyte = false +}; + +/** ISO-8859-8 */ +const pm_encoding_t pm_encoding_iso_8859_8 = { + .name = "iso-8859-8", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_8_alnum_char, + .alpha_char = pm_encoding_iso_8859_8_alpha_char, + .isupper_char = pm_encoding_iso_8859_8_isupper_char, + .multibyte = false +}; + +/** ISO-8859-9 */ +const pm_encoding_t pm_encoding_iso_8859_9 = { + .name = "iso-8859-9", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_9_alnum_char, + .alpha_char = pm_encoding_iso_8859_9_alpha_char, + .isupper_char = pm_encoding_iso_8859_9_isupper_char, + .multibyte = false +}; + +/** ISO-8859-10 */ +const pm_encoding_t pm_encoding_iso_8859_10 = { + .name = "iso-8859-10", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_10_alnum_char, + .alpha_char = pm_encoding_iso_8859_10_alpha_char, + .isupper_char = pm_encoding_iso_8859_10_isupper_char, + .multibyte = false +}; + +/** ISO-8859-11 */ +const pm_encoding_t pm_encoding_iso_8859_11 = { + .name = "iso-8859-11", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_11_alnum_char, + .alpha_char = pm_encoding_iso_8859_11_alpha_char, + .isupper_char = pm_encoding_iso_8859_11_isupper_char, + .multibyte = false +}; + +/** ISO-8859-13 */ +const pm_encoding_t pm_encoding_iso_8859_13 = { + .name = "iso-8859-13", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_13_alnum_char, + .alpha_char = pm_encoding_iso_8859_13_alpha_char, + .isupper_char = pm_encoding_iso_8859_13_isupper_char, + .multibyte = false +}; + +/** ISO-8859-14 */ +const pm_encoding_t pm_encoding_iso_8859_14 = { + .name = "iso-8859-14", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_14_alnum_char, + .alpha_char = pm_encoding_iso_8859_14_alpha_char, + .isupper_char = pm_encoding_iso_8859_14_isupper_char, + .multibyte = false +}; + +/** ISO-8859-15 */ +const pm_encoding_t pm_encoding_iso_8859_15 = { + .name = "iso-8859-15", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_15_alnum_char, + .alpha_char = pm_encoding_iso_8859_15_alpha_char, + .isupper_char = pm_encoding_iso_8859_15_isupper_char, + .multibyte = false +}; + +/** ISO-8859-16 */ +const pm_encoding_t pm_encoding_iso_8859_16 = { + .name = "iso-8859-16", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_iso_8859_16_alnum_char, + .alpha_char = pm_encoding_iso_8859_16_alpha_char, + .isupper_char = pm_encoding_iso_8859_16_isupper_char, + .multibyte = false +}; + +/** KOI8-R */ +const pm_encoding_t pm_encoding_koi8_r = { + .name = "koi8-r", + .char_width = pm_encoding_koi8_r_char_width, + .alnum_char = pm_encoding_koi8_r_alnum_char, + .alpha_char = pm_encoding_koi8_r_alpha_char, + .isupper_char = pm_encoding_koi8_r_isupper_char, + .multibyte = false +}; + +/** Windows-1251 */ +const pm_encoding_t pm_encoding_windows_1251 = { + .name = "windows-1251", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_windows_1251_alnum_char, + .alpha_char = pm_encoding_windows_1251_alpha_char, + .isupper_char = pm_encoding_windows_1251_isupper_char, + .multibyte = false +}; + +/** Windows-1252 */ +const pm_encoding_t pm_encoding_windows_1252 = { + .name = "windows-1252", + .char_width = pm_encoding_single_char_width, + .alnum_char = pm_encoding_windows_1252_alnum_char, + .alpha_char = pm_encoding_windows_1252_alpha_char, + .isupper_char = pm_encoding_windows_1252_isupper_char, + .multibyte = false +}; diff --git a/src/enc/pm_unicode.c b/src/enc/pm_unicode.c index ee776fa2add..d021894c1e6 100644 --- a/src/enc/pm_unicode.c +++ b/src/enc/pm_unicode.c @@ -1,15 +1,14 @@ -// Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA -// decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. - #include "prism/enc/pm_encoding.h" typedef uint32_t pm_unicode_codepoint_t; -// Each element of the following table contains a bitfield that indicates a -// piece of information about the corresponding unicode codepoint. Note that -// this table is different from other encodings where we used a lookup table -// because the indices of those tables are the byte representations, not the -// codepoints themselves. +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding unicode codepoint. Note that + * this table is different from other encodings where we used a lookup table + * because the indices of those tables are the byte representations, not the + * codepoints themselves. + */ const uint8_t pm_encoding_unicode_table[256] = { // 0 1 2 3 4 5 6 7 8 9 A B C D E F 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x @@ -2179,8 +2178,12 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C 0x1F170, 0x1F189, }; +/** + * Binary search through the given list of codepoints to see if the given + * codepoint is in the list. + */ static bool -pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) { +pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, size_t size, const pm_unicode_codepoint_t codepoints[size]) { size_t start = 0; size_t end = size; @@ -2202,6 +2205,29 @@ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_co return false; } +/** + * A state transition table for decoding UTF-8. + * + * Copyright (c) 2008-2009 Bjoern Hoehrmann + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ static const uint8_t pm_utf_8_dfa[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f @@ -2219,6 +2245,11 @@ static const uint8_t pm_utf_8_dfa[] = { 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 }; +/** + * Given a pointer to a string and the number of bytes remaining in the string, + * decode the next UTF-8 codepoint and return it. The number of bytes consumed + * is returned in the width out parameter. + */ static pm_unicode_codepoint_t pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) { assert(n >= 1); @@ -2253,6 +2284,10 @@ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) { return width; } +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphabetical character. + */ size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) { if (*b < 0x80) { @@ -2265,10 +2300,14 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) { if (codepoint <= 0xFF) { return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0; } else { - return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0; + return pm_unicode_codepoint_match(codepoint, UNICODE_ALPHA_CODEPOINTS_LENGTH, unicode_alpha_codepoints) ? width : 0; } } +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphanumeric character. + */ size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) { if (*b < 0x80) { @@ -2281,10 +2320,14 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) { if (codepoint <= 0xFF) { return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0; } else { - return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0; + return pm_unicode_codepoint_match(codepoint, UNICODE_ALNUM_CODEPOINTS_LENGTH, unicode_alnum_codepoints) ? width : 0; } } +/** + * Return true if the next character in the UTF-8 encoding if it is an uppercase + * character. + */ bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) { if (*b < 0x80) { @@ -2297,7 +2340,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) { if (codepoint <= 0xFF) { return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false; } else { - return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false; + return pm_unicode_codepoint_match(codepoint, UNICODE_ISUPPER_CODEPOINTS_LENGTH, unicode_isupper_codepoints) ? true : false; } } @@ -2305,7 +2348,8 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) { #undef UNICODE_ALNUM_CODEPOINTS_LENGTH #undef UNICODE_ISUPPER_CODEPOINTS_LENGTH -pm_encoding_t pm_encoding_utf_8 = { +/** UTF-8 */ +const pm_encoding_t pm_encoding_utf_8 = { .name = "utf-8", .char_width = pm_encoding_utf_8_char_width, .alnum_char = pm_encoding_utf_8_alnum_char, @@ -2314,7 +2358,8 @@ pm_encoding_t pm_encoding_utf_8 = { .multibyte = true }; -pm_encoding_t pm_encoding_utf8_mac = { +/** UTF8-mac */ +const pm_encoding_t pm_encoding_utf8_mac = { .name = "utf8-mac", .char_width = pm_encoding_utf_8_char_width, .alnum_char = pm_encoding_utf_8_alnum_char, diff --git a/src/enc/pm_windows_31j.c b/src/enc/pm_windows_31j.c index cf7eb46864d..ce67cfb04e7 100644 --- a/src/enc/pm_windows_31j.c +++ b/src/enc/pm_windows_31j.c @@ -46,7 +46,8 @@ pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) { } } -pm_encoding_t pm_encoding_windows_31j = { +/** Windows-31J */ +const pm_encoding_t pm_encoding_windows_31j = { .name = "windows-31j", .char_width = pm_encoding_windows_31j_char_width, .alnum_char = pm_encoding_windows_31j_alnum_char,