From e406bea7f0ac7fd057585827904233f329f93817 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 15 Aug 2023 12:27:00 +0200 Subject: [PATCH 1/2] Fix `isEqualNodes` --- lib/utils/regexp-ast/is-equals.ts | 157 +++++++++++------------------- tests/lib/utils/regexp-ast.ts | 5 + 2 files changed, 64 insertions(+), 98 deletions(-) diff --git a/lib/utils/regexp-ast/is-equals.ts b/lib/utils/regexp-ast/is-equals.ts index 31d29636d..80b1b4910 100644 --- a/lib/utils/regexp-ast/is-equals.ts +++ b/lib/utils/regexp-ast/is-equals.ts @@ -1,23 +1,6 @@ import type { ToCharSetElement, ReadonlyFlags } from "regexp-ast-analysis" import { toCharSet } from "regexp-ast-analysis" -import type { - Alternative, - Assertion, - CapturingGroup, - Quantifier, - Group, - CharacterClass, - CharacterSet, - Character, - Backreference, - CharacterClassRange, - Node, - RegExpLiteral, - Pattern, - Flags, - Element, - CharacterClassElement, -} from "@eslint-community/regexpp/ast" +import type { Node } from "@eslint-community/regexpp/ast" import type { ShortCircuit } from "./common" /** @@ -59,21 +42,19 @@ function isEqualChar( return toCharSet(a, flags).equals(toCharSet(b, flags)) } -const EQUALS_CHECKER = { - Alternative( - a: Alternative, - b: Alternative, +type OfType = Extract +const EQUALS_CHECKER: { + [T in Node["type"]]: ( + a: OfType, + b: OfType, flags: ReadonlyFlags, - shortCircuit?: ShortCircuit, - ) { - return isEqualElements(a.elements, b.elements, flags, shortCircuit) + shortCircuit: ShortCircuit | undefined, + ) => boolean +} = { + Alternative(a, b, flags, shortCircuit) { + return isEqualConcatenation(a.elements, b.elements, flags, shortCircuit) }, - Assertion( - a: Assertion, - b: Assertion, - flags: ReadonlyFlags, - shortCircuit?: ShortCircuit, - ) { + Assertion(a, b, flags, shortCircuit) { if (a.kind === "start" || a.kind === "end") { /* istanbul ignore next */ return a.kind === b.kind @@ -83,7 +64,7 @@ const EQUALS_CHECKER = { } if (a.kind === "lookahead" || a.kind === "lookbehind") { if (b.kind === a.kind && a.negate === b.negate) { - return isEqualAlternatives( + return isEqualSet( a.alternatives, b.alternatives, flags, @@ -95,43 +76,52 @@ const EQUALS_CHECKER = { /* istanbul ignore next */ return false }, - Backreference(a: Backreference, b: Backreference) { + Backreference(a, b) { return a.ref === b.ref }, - CapturingGroup( - a: CapturingGroup, - b: CapturingGroup, - flags: ReadonlyFlags, - shortCircuit?: ShortCircuit, - ) { + CapturingGroup(a, b, flags, shortCircuit) { return ( a.name === b.name && - isEqualAlternatives( - a.alternatives, - b.alternatives, - flags, - shortCircuit, - ) + isEqualSet(a.alternatives, b.alternatives, flags, shortCircuit) ) }, - Character(a: Character, b: Character, flags: ReadonlyFlags) { + Character(a, b, flags) { return isEqualChar(a, b, flags) }, - CharacterClass(a: CharacterClass, b: CharacterClass, flags: ReadonlyFlags) { + CharacterClass(a, b, flags) { return isEqualChar(a, b, flags) }, - CharacterClassRange( - a: CharacterClassRange, - b: CharacterClassRange, - flags: ReadonlyFlags, - ) { + CharacterClassRange(a, b, flags) { return isEqualChar(a, b, flags) }, - CharacterSet(a: CharacterSet, b: CharacterSet, flags: ReadonlyFlags) { + CharacterSet(a, b, flags) { return isEqualChar(a, b, flags) }, + ClassIntersection(a, b, flags, shortCircuit) { + return isEqualSet( + [a.left, a.right], + [b.left, b.right], + flags, + shortCircuit, + ) + }, + ClassStringDisjunction(a, b, flags, shortCircuit) { + return isEqualSet(a.alternatives, b.alternatives, flags, shortCircuit) + }, + ClassSubtraction(a, b, flags, shortCircuit) { + return ( + isEqualNodes(a.left, b.left, flags, shortCircuit) && + isEqualNodes(a.right, b.right, flags, shortCircuit) + ) + }, + ExpressionCharacterClass(a, b, flags) { + return ( + a.negate === b.negate && + isEqualNodes(a.expression, b.expression, flags) + ) + }, /* istanbul ignore next */ - Flags(a: Flags, b: Flags) { + Flags(a, b) { /* istanbul ignore next */ return ( a.dotAll === b.dotAll && @@ -142,38 +132,13 @@ const EQUALS_CHECKER = { a.unicode === b.unicode ) }, - Group( - a: Group, - b: Group, - flags: ReadonlyFlags, - shortCircuit?: ShortCircuit, - ) { - return isEqualAlternatives( - a.alternatives, - b.alternatives, - flags, - shortCircuit, - ) + Group(a, b, flags, shortCircuit) { + return isEqualSet(a.alternatives, b.alternatives, flags, shortCircuit) }, - Pattern( - a: Pattern, - b: Pattern, - flags: ReadonlyFlags, - shortCircuit?: ShortCircuit, - ) { - return isEqualAlternatives( - a.alternatives, - b.alternatives, - flags, - shortCircuit, - ) + Pattern(a, b, flags, shortCircuit) { + return isEqualSet(a.alternatives, b.alternatives, flags, shortCircuit) }, - Quantifier( - a: Quantifier, - b: Quantifier, - flags: ReadonlyFlags, - shortCircuit?: ShortCircuit, - ) { + Quantifier(a, b, flags, shortCircuit) { return ( a.min === b.min && a.max === b.max && @@ -181,17 +146,15 @@ const EQUALS_CHECKER = { isEqualNodes(a.element, b.element, flags, shortCircuit) ) }, - RegExpLiteral( - a: RegExpLiteral, - b: RegExpLiteral, - flags: ReadonlyFlags, - shortCircuit?: ShortCircuit, - ) { + RegExpLiteral(a, b, flags, shortCircuit) { return ( isEqualNodes(a.pattern, b.pattern, flags, shortCircuit) && isEqualNodes(a.flags, b.flags, flags, shortCircuit) ) }, + StringAlternative(a, b, flags, shortCircuit) { + return isEqualConcatenation(a.elements, b.elements, flags, shortCircuit) + }, } /** @@ -227,8 +190,6 @@ export function isEqualNodes( } } if (/[(*+?[\\{|]/u.test(a.raw) || /[(*+?[\\{|]/u.test(b.raw)) { - // FIXME: TS Error - // @ts-expect-error -- FIXME return EQUALS_CHECKER[a.type]( a as never, b as never, @@ -240,9 +201,9 @@ export function isEqualNodes( } /** Check whether given elements are equals or not. */ -function isEqualElements( - a: Element[], - b: Element[], +function isEqualConcatenation( + a: readonly N[], + b: readonly N[], flags: ReadonlyFlags, shortCircuit?: ShortCircuit, ) { @@ -260,9 +221,9 @@ function isEqualElements( } /** Check whether given alternatives are equals or not. */ -function isEqualAlternatives( - a: N[], - b: N[], +function isEqualSet( + a: readonly N[], + b: readonly N[], flags: ReadonlyFlags, shortCircuit?: ShortCircuit, ) { diff --git a/tests/lib/utils/regexp-ast.ts b/tests/lib/utils/regexp-ast.ts index 6d9d80e9c..3214b3601 100644 --- a/tests/lib/utils/regexp-ast.ts +++ b/tests/lib/utils/regexp-ast.ts @@ -202,6 +202,11 @@ const TESTCASES_FOR_IS_EQUAL_NODES: TestCase[] = [ b: /ya?ml/u, result: true, }, + { + a: String.raw`/[\q{foo|bar}]/v`, + b: String.raw`/[\q{bar|foo}]/v`, + result: true, + }, ] describe("regexp-ast isEqualNodes", () => { for (const testCase of TESTCASES_FOR_IS_EQUAL_NODES) { From 21c760d7a55d269c7979f3d241adce8e6fc9dfb3 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Thu, 14 Sep 2023 13:14:25 +0200 Subject: [PATCH 2/2] Fixed isCovered --- lib/utils/regexp-ast/is-covered.ts | 161 +++++++++++++++++------------ 1 file changed, 96 insertions(+), 65 deletions(-) diff --git a/lib/utils/regexp-ast/is-covered.ts b/lib/utils/regexp-ast/is-covered.ts index b5eee73be..3abc23e54 100644 --- a/lib/utils/regexp-ast/is-covered.ts +++ b/lib/utils/regexp-ast/is-covered.ts @@ -8,8 +8,12 @@ import type { LookaroundAssertion, } from "@eslint-community/regexpp/ast" import { isEqualNodes } from "./is-equals" -import type { ReadonlyFlags, ToCharSetElement } from "regexp-ast-analysis" -import { toCharSet } from "regexp-ast-analysis" +import type { + ReadonlyFlags, + ToCharSetElement, + ToUnicodeSetElement, +} from "regexp-ast-analysis" +import { toCharSet, toUnicodeSet } from "regexp-ast-analysis" import type { CharSet } from "refa" type Options = { @@ -54,6 +58,10 @@ class NormalizedCharacter implements NormalizedNodeBase { return new NormalizedCharacter(toCharSet(element, options.flags)) } + public static fromChars(charSet: CharSet) { + return new NormalizedCharacter(charSet) + } + private constructor(charSet: CharSet) { this.charSet = charSet } @@ -69,7 +77,7 @@ class NormalizedAlternative implements NormalizedNodeBase { public readonly raw: string - public readonly elements: NormalizedNode[] + public readonly elements: readonly NormalizedNode[] public static fromAlternative(node: Alternative, options: Options) { const normalizeElements = [ @@ -107,7 +115,7 @@ class NormalizedAlternative implements NormalizedNodeBase { public static fromElements( elements: NormalizedNode[], - node: Alternative | Quantifier, + node: Alternative | Quantifier | ToUnicodeSetElement, ) { const normalizeElements = [ ...NormalizedAlternative.normalizedElements(function* () { @@ -131,7 +139,7 @@ class NormalizedAlternative implements NormalizedNodeBase { private constructor( elements: NormalizedNode[], - node: Alternative | Quantifier, + node: Alternative | Quantifier | ToUnicodeSetElement, ) { this.raw = node.raw this.elements = elements @@ -148,11 +156,9 @@ class NormalizedDisjunctions implements NormalizedNodeBase { public readonly raw: string - public readonly node: CapturingGroup | Group | Pattern + private readonly getAlternatives: () => readonly NormalizedAlternative[] - private readonly options: Options - - public normalizedAlternatives?: NormalizedAlternative[] + private normalizedAlternatives?: readonly NormalizedAlternative[] public static fromNode( node: CapturingGroup | Group | Pattern, @@ -164,32 +170,35 @@ class NormalizedDisjunctions implements NormalizedNodeBase { options, ) } - return new NormalizedDisjunctions(node, options) + return new NormalizedDisjunctions(node, () => { + return node.alternatives.map((alt) => { + const n = normalizeNode(alt, options) + if (n.type === "NormalizedAlternative") { + return n + } + return NormalizedAlternative.fromElements([n], alt) + }) + }) + } + + public static fromAlternatives( + alternatives: readonly NormalizedAlternative[], + node: CapturingGroup | Group | Pattern | ToUnicodeSetElement, + ) { + return new NormalizedDisjunctions(node, () => alternatives) } private constructor( - node: CapturingGroup | Group | Pattern, - options: Options, + node: CapturingGroup | Group | Pattern | ToUnicodeSetElement, + getAlternatives: () => readonly NormalizedAlternative[], ) { this.raw = node.raw - this.node = node - this.options = options + this.getAlternatives = getAlternatives } - public get alternatives() { - if (this.normalizedAlternatives) { - return this.normalizedAlternatives - } - this.normalizedAlternatives = [] - for (const alt of this.node.alternatives) { - const node = normalizeNode(alt, this.options) - if (node.type === "NormalizedAlternative") { - this.normalizedAlternatives.push(node) - } else { - this.normalizedAlternatives.push( - NormalizedAlternative.fromElements([node], alt), - ) - } + public get alternatives(): readonly NormalizedAlternative[] { + if (!this.normalizedAlternatives) { + this.normalizedAlternatives = this.getAlternatives() } return this.normalizedAlternatives } @@ -208,7 +217,7 @@ class NormalizedLookaroundAssertion implements NormalizedNodeBase { private readonly options: Options - public normalizedAlternatives?: NormalizedAlternative[] + private normalizedAlternatives?: NormalizedAlternative[] public static fromNode(node: LookaroundAssertion, options: Options) { return new NormalizedLookaroundAssertion(node, options) @@ -220,7 +229,7 @@ class NormalizedLookaroundAssertion implements NormalizedNodeBase { this.options = options } - public get alternatives() { + public get alternatives(): readonly NormalizedAlternative[] { if (this.normalizedAlternatives) { return this.normalizedAlternatives } @@ -425,44 +434,66 @@ function normalizeNodeWithoutCache( node: Node, options: Options, ): NormalizedNode { - if ( - node.type === "CharacterSet" || - node.type === "CharacterClass" || - node.type === "Character" || - node.type === "CharacterClassRange" - ) { - // FIXME: TS Error - // @ts-expect-error -- FIXME - return NormalizedCharacter.fromElement(node, options) - } - if (node.type === "Alternative") { - return NormalizedAlternative.fromAlternative(node, options) - } - if (node.type === "Quantifier") { - return NormalizedOptional.fromQuantifier(node, options) - } - if ( - node.type === "CapturingGroup" || - node.type === "Group" || - node.type === "Pattern" - ) { - return NormalizedDisjunctions.fromNode(node, options) - } - if (node.type === "RegExpLiteral") { - return normalizeNode(node.pattern, options) - } - if (node.type === "Assertion") { - if (node.kind === "lookahead" || node.kind === "lookbehind") { - return NormalizedLookaroundAssertion.fromNode(node, options) + switch (node.type) { + case "CharacterSet": + case "CharacterClass": + case "Character": + case "CharacterClassRange": + case "ExpressionCharacterClass": + case "ClassIntersection": + case "ClassSubtraction": + case "ClassStringDisjunction": + case "StringAlternative": { + const set = toUnicodeSet(node, options.flags) + if (set.accept.isEmpty) { + return NormalizedCharacter.fromChars(set.chars) + } + + const alternatives = set.wordSets.map((wordSet) => { + return NormalizedAlternative.fromElements( + wordSet.map(NormalizedCharacter.fromChars), + node, + ) + }) + return NormalizedDisjunctions.fromAlternatives(alternatives, node) } - return NormalizedOther.fromNode(node) + + case "Alternative": + return NormalizedAlternative.fromAlternative(node, options) + + case "Quantifier": + return NormalizedOptional.fromQuantifier(node, options) + + case "CapturingGroup": + case "Group": + case "Pattern": + return NormalizedDisjunctions.fromNode(node, options) + + case "Assertion": + if (node.kind === "lookahead" || node.kind === "lookbehind") { + return NormalizedLookaroundAssertion.fromNode(node, options) + } + return NormalizedOther.fromNode(node) + + case "RegExpLiteral": + return normalizeNode(node.pattern, options) + + case "Backreference": + case "Flags": + return NormalizedOther.fromNode(node) + + default: + return assertNever(node) } - return NormalizedOther.fromNode(node) +} + +function assertNever(value: never): never { + throw new Error(`Invalid value: ${value}`) } /** Check whether the right node is covered by the left nodes. */ function isCoveredAnyNode( - left: NormalizedNode[], + left: readonly NormalizedNode[], right: NormalizedNode, options: Options, ) { @@ -476,8 +507,8 @@ function isCoveredAnyNode( /** Check whether the right nodes is covered by the left nodes. */ function isCoveredAltNodes( - leftNodes: NormalizedNode[], - rightNodes: NormalizedNode[], + leftNodes: readonly NormalizedNode[], + rightNodes: readonly NormalizedNode[], options: Options, ): boolean { const left = options.canOmitRight ? omitEnds(leftNodes) : [...leftNodes] @@ -561,7 +592,7 @@ function isCoveredAltNodes( /** * Exclude the end optionals. */ -function omitEnds(nodes: NormalizedNode[]): NormalizedNode[] { +function omitEnds(nodes: readonly NormalizedNode[]): NormalizedNode[] { for (let index = nodes.length - 1; index >= 0; index--) { const node = nodes[index] if (node.type !== "NormalizedOptional") {