diff --git a/.changeset/clean-kids-mate.md b/.changeset/clean-kids-mate.md new file mode 100644 index 000000000..93c55c2b5 --- /dev/null +++ b/.changeset/clean-kids-mate.md @@ -0,0 +1,5 @@ +--- +"eslint-plugin-regexp": minor +--- + +Add support for `v` flag to `regexp/prefer-character-class` diff --git a/lib/rules/no-useless-character-class.ts b/lib/rules/no-useless-character-class.ts index 30c837466..af67957bd 100644 --- a/lib/rules/no-useless-character-class.ts +++ b/lib/rules/no-useless-character-class.ts @@ -7,17 +7,13 @@ import type { ExpressionCharacterClass, UnicodeSetsCharacterClass, } from "@eslint-community/regexpp/ast" +import { RESERVED_DOUBLE_PUNCTUATOR_CHARS } from "../utils/unicode-set" const ESCAPES_OUTSIDE_CHARACTER_CLASS = new Set("$()*+./?[{|") const ESCAPES_OUTSIDE_CHARACTER_CLASS_WITH_U = new Set([ ...ESCAPES_OUTSIDE_CHARACTER_CLASS, "}", ]) -// A single character set of ClassSetReservedDoublePunctuator. -// && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator -const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set( - "!#$%&*+,.:;<=>?@^`~", -) export default createRule("no-useless-character-class", { meta: { @@ -217,9 +213,7 @@ export default createRule("no-useless-character-class", { // Avoid [A&&[&]] => [A&&&] if ( - REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has( - char, - ) && + RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(char) && // The previous character is the same pattern[ccNode.start - 1] === char ) { @@ -263,9 +257,7 @@ export default createRule("no-useless-character-class", { // Avoid [A[&]&B] => [A&&B] return ( - REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has( - char, - ) && + RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(char) && // The next character is the same pattern[ccNode.end] === char ) diff --git a/lib/rules/no-useless-escape.ts b/lib/rules/no-useless-escape.ts index b47a2179a..64d2fc484 100644 --- a/lib/rules/no-useless-escape.ts +++ b/lib/rules/no-useless-escape.ts @@ -25,20 +25,8 @@ import { CP_PIPE, CP_MINUS, canUnwrapped, - CP_HASH, - CP_PERCENT, - CP_BAN, - CP_AMP, - CP_COMMA, - CP_COLON, - CP_SEMI, - CP_LT, - CP_EQ, - CP_GT, - CP_AT, - CP_TILDE, - CP_BACKTICK, } from "../utils" +import { RESERVED_DOUBLE_PUNCTUATOR_CP } from "../utils/unicode-set" const REGEX_CHAR_CLASS_ESCAPES = new Set([ CP_BACK_SLASH, // \\ @@ -80,29 +68,6 @@ const POTENTIAL_ESCAPE_SEQUENCE_FOR_CHAR_CLASS = new Set([ ...POTENTIAL_ESCAPE_SEQUENCE, "q", ]) -// A single character set of ClassSetReservedDoublePunctuator. -// && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator -const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set([ - CP_BAN, // ! - CP_HASH, // # - CP_DOLLAR, // $ - CP_PERCENT, // % - CP_AMP, // & - CP_STAR, // * - CP_PLUS, // + - CP_COMMA, // , - CP_DOT, // . - CP_COLON, // : - CP_SEMI, // ; - CP_LT, // < - CP_EQ, // = - CP_GT, // > - CP_QUESTION, // ? - CP_AT, // @ - CP_CARET, // ^ - CP_BACKTICK, // ` - CP_TILDE, // ~ -]) export default createRule("no-useless-escape", { meta: { @@ -186,7 +151,7 @@ export default createRule("no-useless-escape", { } if (flags.unicodeSets) { if ( - REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has( + RESERVED_DOUBLE_PUNCTUATOR_CP.has( cNode.value, ) ) { diff --git a/lib/rules/prefer-character-class.ts b/lib/rules/prefer-character-class.ts index 89ccf54aa..957a0277f 100644 --- a/lib/rules/prefer-character-class.ts +++ b/lib/rules/prefer-character-class.ts @@ -6,7 +6,7 @@ import type { CharacterClass, CharacterClassElement, CharacterSet, - Element, + ExpressionCharacterClass, Group, LookaroundAssertion, Node, @@ -17,12 +17,13 @@ import { createRule, defineRegexpVisitor } from "../utils" import type { CharSet } from "refa" import type { FirstConsumedChar, ReadonlyFlags } from "regexp-ast-analysis" import { - toCharSet, getFirstConsumedChar, getMatchingDirection, + toUnicodeSet, } from "regexp-ast-analysis" import type { Position, SourceLocation } from "estree" import { assertNever } from "../utils/util" +import { RESERVED_DOUBLE_PUNCTUATOR_CHARS } from "../utils/unicode-set" /** * Find the first index of an element that satisfies the given condition. @@ -59,7 +60,12 @@ type RawAlternative = RawCharAlternative | RawNonCharAlternative interface RawCharAlternative { readonly isCharacter: true readonly alternative: Alternative - readonly element: Character | CharacterSet | CharacterClass + readonly char: CharSet + readonly element: + | Character + | CharacterSet + | CharacterClass + | ExpressionCharacterClass } interface RawNonCharAlternative { readonly isCharacter: false @@ -88,52 +94,54 @@ function elementsToCharacterClass(elements: CharElementArray): string { // Its ONLY job is to generate a valid character class from the given elements. // Optimizations can be done by another rule. - let result = "[" + const parts: string[] = [] - elements.forEach((e, i) => { + elements.forEach((e) => { switch (e.type) { case "Character": if (e.raw === "-") { - if (i === 0 || i === elements.length - 1) { - result += "-" - } else { - result += "\\-" - } - } else if (e.raw === "^") { - if (i === 0) { - result += "\\^" - } else { - result += "^" - } + parts.push("\\-") } else if (e.raw === "]") { - result += "\\]" + parts.push("\\]") } else { - result += e.raw + parts.push(e.raw) } break case "CharacterClassRange": - if (e.min.raw === "^" && i === 0) { - result += `\\^-${e.max.raw}` - } else { - result += `${e.min.raw}-${e.max.raw}` - } - break - case "CharacterSet": - result += e.raw + case "CharacterClass": + case "ClassStringDisjunction": + case "ExpressionCharacterClass": + parts.push(e.raw) break default: - // FIXME: TS Error - // @ts-expect-error -- FIXME throw assertNever(e) } }) - result += "]" + if (parts.length > 0 && parts[0].startsWith("^")) { + parts[0] = `\\${parts[0]}` + } + + // escape double punctuators for v flag + for (let i = 1; i < parts.length; i++) { + const prev = parts[i - 1] + const curr = parts[i] + + const pChar = prev.slice(-1) + const cChar = curr[0] + if ( + RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(cChar) && + cChar === pChar && + !prev.endsWith(`\\${pChar}`) + ) { + parts[i - 1] = `${prev.slice(0, -1)}\\${pChar}` + } + } - return result + return `[${parts.join("")}]` } /** @@ -144,21 +152,23 @@ function categorizeRawAlts( alternatives: readonly Alternative[], flags: ReadonlyFlags, ): RawAlternative[] { - return alternatives.map((alternative) => { + return alternatives.map((alternative): RawAlternative => { if (alternative.elements.length === 1) { const element = alternative.elements[0] if ( element.type === "Character" || element.type === "CharacterClass" || - element.type === "CharacterSet" + element.type === "CharacterSet" || + element.type === "ExpressionCharacterClass" ) { - return { - isCharacter: true, - alternative, - element, - // FIXME: TS Error - // @ts-expect-error -- FIXME - char: toCharSet(element, flags), + const set = toUnicodeSet(element, flags) + if (set.accept.isEmpty) { + return { + isCharacter: true, + alternative, + char: set.chars, + element, + } } } } @@ -189,23 +199,36 @@ function containsCharacterClass(alts: readonly RawAlternative[]): boolean { * * The returned array may be empty. */ -function toCharacterClassElement(element: Element): CharElementArray | null { - if (element.type === "CharacterSet") { - // normal dot is not possible (it technically is but it's complicated) - if (element.kind === "any") { - return null - } - return [element] - } else if (element.type === "CharacterClass") { - if (element.negate) { - // we can't (easily) combine negated character classes - return null - } - return element.elements - } else if (element.type === "Character") { - return [element] +function toCharacterClassElement( + element: RawCharAlternative["element"], +): CharElementArray | null { + switch (element.type) { + case "Character": + return [element] + + case "CharacterSet": + if (element.kind === "any") { + // normal dot is not possible (it technically is but it's complicated) + return null + } + return [element] + + case "CharacterClass": + if (element.negate) { + if (element.unicodeSets) { + return [element] + } + // we can't (easily) combine negated character classes without the v flag + return null + } + return element.elements + + case "ExpressionCharacterClass": + return [element] + + default: + return assertNever(element) } - return null } /** @@ -215,16 +238,14 @@ function parseRawAlts( alternatives: readonly RawAlternative[], flags: ReadonlyFlags, ): ParsedAlternative[] { - return alternatives.map((a) => { + return alternatives.map((a): ParsedAlternative => { if (a.isCharacter) { const elements = toCharacterClassElement(a.element) if (elements) { return { isCharacter: true, elements, - // FIXME: TS Error - // @ts-expect-error -- FIXME - char: toCharSet(a.element, flags), + char: a.char, raw: a.alternative.raw, } } @@ -349,21 +370,14 @@ function findNonDisjointAlt( /** * Returns where the given alternative can accept any character. */ -function totalIsAll( - alternatives: readonly RawAlternative[], - { flags }: RegExpContext, -): boolean { +function totalIsAll(alternatives: readonly RawAlternative[]): boolean { let total: CharSet | undefined = undefined for (const a of alternatives) { if (a.isCharacter) { if (total === undefined) { - // FIXME: TS Error - // @ts-expect-error -- FIXME - total = toCharSet(a.element, flags) + total = a.char } else { - // FIXME: TS Error - // @ts-expect-error -- FIXME - total = total.union(toCharSet(a.element, flags)) + total = total.union(a.char) } } } @@ -506,10 +520,7 @@ export default createRule("prefer-character-class", { return } - if ( - alts.every((a) => a.isCharacter) && - totalIsAll(alts, regexpContext) - ) { + if (alts.every((a) => a.isCharacter) && totalIsAll(alts)) { // This is the special case where: // 1) all alternatives are characters, // 2) there are at least 2 alternatives, and @@ -538,7 +549,7 @@ export default createRule("prefer-character-class", { if ( characterAltsCount >= minCharacterAlternatives || containsCharacterClass(alts) || - totalIsAll(alts, regexpContext) || + totalIsAll(alts) || findNonDisjointAlt(parsedAlts) ) { optimizeCharacterAlts(parsedAlts) diff --git a/lib/rules/require-unicode-sets-regexp.ts b/lib/rules/require-unicode-sets-regexp.ts index d8e33d12d..07a139872 100644 --- a/lib/rules/require-unicode-sets-regexp.ts +++ b/lib/rules/require-unicode-sets-regexp.ts @@ -3,29 +3,7 @@ import type { RegExpContext } from "../utils" import { createRule, defineRegexpVisitor } from "../utils" import { RegExpParser, visitRegExpAST } from "@eslint-community/regexpp" import { toUnicodeSet } from "regexp-ast-analysis" - -const CLASS_SET_RESERVED_DOUBLE_PUNCTUATORS = [ - "&&", - "!!", - "##", - "$$", - "%%", - "**", - "++", - ",,", - "..", - "::", - ";;", - "<<", - "==", - ">>", - "??", - "@@", - "^^", - "``", - "~~", - "--", -] +import { RESERVED_DOUBLE_PUNCTUATOR_PATTERN } from "../utils/unicode-set" /** * Returns whether the regex would keep its behavior if the v flag were to be @@ -48,11 +26,7 @@ function isCompatible(regexpContext: RegExpContext): boolean { if (!us.equals(vus)) { throw INCOMPATIBLE } - if ( - CLASS_SET_RESERVED_DOUBLE_PUNCTUATORS.some((punctuator) => - node.raw.includes(punctuator), - ) - ) { + if (RESERVED_DOUBLE_PUNCTUATOR_PATTERN.test(node.raw)) { throw INCOMPATIBLE } }, diff --git a/lib/utils/unicode-set.ts b/lib/utils/unicode-set.ts new file mode 100644 index 000000000..cc118515f --- /dev/null +++ b/lib/utils/unicode-set.ts @@ -0,0 +1,18 @@ +/** + * A single character set of ClassSetReservedDoublePunctuator. + * + * `&& !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ --` are ClassSetReservedDoublePunctuator + */ +export const RESERVED_DOUBLE_PUNCTUATOR_CHARS: ReadonlySet = new Set( + "&!#$%*+,.:;<=>?@^`~-", +) + +/** + * Same as {@link RESERVED_DOUBLE_PUNCTUATOR_CHARS} but as code points. + */ +export const RESERVED_DOUBLE_PUNCTUATOR_CP: ReadonlySet = new Set( + [...RESERVED_DOUBLE_PUNCTUATOR_CHARS].map((c) => c.codePointAt(0)!), +) + +export const RESERVED_DOUBLE_PUNCTUATOR_PATTERN = + /&&|!!|##|\$\$|%%|\*\*|\+\+|,,|\.\.|::|;;|<<|==|>>|\?\?|@@|\^\^|``|~~|--/u diff --git a/tests/lib/rules/prefer-character-class.ts b/tests/lib/rules/prefer-character-class.ts index 7bd33e947..fe54d0228 100644 --- a/tests/lib/rules/prefer-character-class.ts +++ b/tests/lib/rules/prefer-character-class.ts @@ -3,7 +3,7 @@ import rule from "../../../lib/rules/prefer-character-class" const tester = new RuleTester({ parserOptions: { - ecmaVersion: 2020, + ecmaVersion: "latest", sourceType: "module", }, }) @@ -133,7 +133,7 @@ tester.run("prefer-character-class", rule as any, { { code: String.raw`/a|b|c/`, output: String.raw`/[abc]/`, errors: 1 }, { code: String.raw`/]|a|b/`, output: String.raw`/[\]ab]/`, errors: 1 }, - { code: String.raw`/-|a|c/`, output: String.raw`/[-ac]/`, errors: 1 }, + { code: String.raw`/-|a|c/`, output: String.raw`/[\-ac]/`, errors: 1 }, { code: String.raw`/a|-|c/`, output: String.raw`/[a\-c]/`, errors: 1 }, { code: String.raw`/a|[-]|c/`, @@ -271,6 +271,22 @@ tester.run("prefer-character-class", rule as any, { errors: 1, }, + { + code: String.raw`/1|2|3|[\w--\d]/v`, + output: String.raw`/[123[\w--\d]]/v`, + errors: 1, + }, + { + code: String.raw`/1|&|&|[\w--\d]/v`, + output: String.raw`/[1\&&[\w--\d]]/v`, + errors: 1, + }, + { + code: String.raw`/1|~|~|[\w--\d]|[\q{abc}]/v`, + output: String.raw`/[1\~~[\w--\d]]|[\q{abc}]/v`, + errors: 1, + }, + // only report affected alternatives { code: String.raw`/foo|bar|a|b|c|baz/`,