Skip to content

Commit

Permalink
Add support for v flag to regexp/prefer-character-class (#619)
Browse files Browse the repository at this point in the history
* Add support for `v` flag to `regexp/prefer-character-class`

* Create clean-kids-mate.md
  • Loading branch information
RunDevelopment authored Oct 1, 2023
1 parent fbf590d commit 81f0153
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 152 deletions.
5 changes: 5 additions & 0 deletions .changeset/clean-kids-mate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"eslint-plugin-regexp": minor
---

Add support for `v` flag to `regexp/prefer-character-class`
14 changes: 3 additions & 11 deletions lib/rules/no-useless-character-class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,13 @@ import type {
ExpressionCharacterClass,
UnicodeSetsCharacterClass,
} from "@eslint-community/regexpp/ast"
import { RESERVED_DOUBLE_PUNCTUATOR_CHARS } from "../utils/unicode-set"

const ESCAPES_OUTSIDE_CHARACTER_CLASS = new Set("$()*+./?[{|")
const ESCAPES_OUTSIDE_CHARACTER_CLASS_WITH_U = new Set([
...ESCAPES_OUTSIDE_CHARACTER_CLASS,
"}",
])
// A single character set of ClassSetReservedDoublePunctuator.
// && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set(
"!#$%&*+,.:;<=>?@^`~",
)

export default createRule("no-useless-character-class", {
meta: {
Expand Down Expand Up @@ -217,9 +213,7 @@ export default createRule("no-useless-character-class", {

// Avoid [A&&[&]] => [A&&&]
if (
REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(
char,
) &&
RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(char) &&
// The previous character is the same
pattern[ccNode.start - 1] === char
) {
Expand Down Expand Up @@ -263,9 +257,7 @@ export default createRule("no-useless-character-class", {

// Avoid [A[&]&B] => [A&&B]
return (
REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(
char,
) &&
RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(char) &&
// The next character is the same
pattern[ccNode.end] === char
)
Expand Down
39 changes: 2 additions & 37 deletions lib/rules/no-useless-escape.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,8 @@ import {
CP_PIPE,
CP_MINUS,
canUnwrapped,
CP_HASH,
CP_PERCENT,
CP_BAN,
CP_AMP,
CP_COMMA,
CP_COLON,
CP_SEMI,
CP_LT,
CP_EQ,
CP_GT,
CP_AT,
CP_TILDE,
CP_BACKTICK,
} from "../utils"
import { RESERVED_DOUBLE_PUNCTUATOR_CP } from "../utils/unicode-set"

const REGEX_CHAR_CLASS_ESCAPES = new Set([
CP_BACK_SLASH, // \\
Expand Down Expand Up @@ -80,29 +68,6 @@ const POTENTIAL_ESCAPE_SEQUENCE_FOR_CHAR_CLASS = new Set([
...POTENTIAL_ESCAPE_SEQUENCE,
"q",
])
// A single character set of ClassSetReservedDoublePunctuator.
// && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set([
CP_BAN, // !
CP_HASH, // #
CP_DOLLAR, // $
CP_PERCENT, // %
CP_AMP, // &
CP_STAR, // *
CP_PLUS, // +
CP_COMMA, // ,
CP_DOT, // .
CP_COLON, // :
CP_SEMI, // ;
CP_LT, // <
CP_EQ, // =
CP_GT, // >
CP_QUESTION, // ?
CP_AT, // @
CP_CARET, // ^
CP_BACKTICK, // `
CP_TILDE, // ~
])

export default createRule("no-useless-escape", {
meta: {
Expand Down Expand Up @@ -186,7 +151,7 @@ export default createRule("no-useless-escape", {
}
if (flags.unicodeSets) {
if (
REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(
RESERVED_DOUBLE_PUNCTUATOR_CP.has(
cNode.value,
)
) {
Expand Down
159 changes: 85 additions & 74 deletions lib/rules/prefer-character-class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import type {
CharacterClass,
CharacterClassElement,
CharacterSet,
Element,
ExpressionCharacterClass,
Group,
LookaroundAssertion,
Node,
Expand All @@ -17,12 +17,13 @@ import { createRule, defineRegexpVisitor } from "../utils"
import type { CharSet } from "refa"
import type { FirstConsumedChar, ReadonlyFlags } from "regexp-ast-analysis"
import {
toCharSet,
getFirstConsumedChar,
getMatchingDirection,
toUnicodeSet,
} from "regexp-ast-analysis"
import type { Position, SourceLocation } from "estree"
import { assertNever } from "../utils/util"
import { RESERVED_DOUBLE_PUNCTUATOR_CHARS } from "../utils/unicode-set"

/**
* Find the first index of an element that satisfies the given condition.
Expand Down Expand Up @@ -59,7 +60,12 @@ type RawAlternative = RawCharAlternative | RawNonCharAlternative
interface RawCharAlternative {
readonly isCharacter: true
readonly alternative: Alternative
readonly element: Character | CharacterSet | CharacterClass
readonly char: CharSet
readonly element:
| Character
| CharacterSet
| CharacterClass
| ExpressionCharacterClass
}
interface RawNonCharAlternative {
readonly isCharacter: false
Expand Down Expand Up @@ -88,52 +94,54 @@ function elementsToCharacterClass(elements: CharElementArray): string {
// Its ONLY job is to generate a valid character class from the given elements.
// Optimizations can be done by another rule.

let result = "["
const parts: string[] = []

elements.forEach((e, i) => {
elements.forEach((e) => {
switch (e.type) {
case "Character":
if (e.raw === "-") {
if (i === 0 || i === elements.length - 1) {
result += "-"
} else {
result += "\\-"
}
} else if (e.raw === "^") {
if (i === 0) {
result += "\\^"
} else {
result += "^"
}
parts.push("\\-")
} else if (e.raw === "]") {
result += "\\]"
parts.push("\\]")
} else {
result += e.raw
parts.push(e.raw)
}
break

case "CharacterClassRange":
if (e.min.raw === "^" && i === 0) {
result += `\\^-${e.max.raw}`
} else {
result += `${e.min.raw}-${e.max.raw}`
}
break

case "CharacterSet":
result += e.raw
case "CharacterClass":
case "ClassStringDisjunction":
case "ExpressionCharacterClass":
parts.push(e.raw)
break

default:
// FIXME: TS Error
// @ts-expect-error -- FIXME
throw assertNever(e)
}
})

result += "]"
if (parts.length > 0 && parts[0].startsWith("^")) {
parts[0] = `\\${parts[0]}`
}

// escape double punctuators for v flag
for (let i = 1; i < parts.length; i++) {
const prev = parts[i - 1]
const curr = parts[i]

const pChar = prev.slice(-1)
const cChar = curr[0]
if (
RESERVED_DOUBLE_PUNCTUATOR_CHARS.has(cChar) &&
cChar === pChar &&
!prev.endsWith(`\\${pChar}`)
) {
parts[i - 1] = `${prev.slice(0, -1)}\\${pChar}`
}
}

return result
return `[${parts.join("")}]`
}

/**
Expand All @@ -144,21 +152,23 @@ function categorizeRawAlts(
alternatives: readonly Alternative[],
flags: ReadonlyFlags,
): RawAlternative[] {
return alternatives.map<RawAlternative>((alternative) => {
return alternatives.map((alternative): RawAlternative => {
if (alternative.elements.length === 1) {
const element = alternative.elements[0]
if (
element.type === "Character" ||
element.type === "CharacterClass" ||
element.type === "CharacterSet"
element.type === "CharacterSet" ||
element.type === "ExpressionCharacterClass"
) {
return {
isCharacter: true,
alternative,
element,
// FIXME: TS Error
// @ts-expect-error -- FIXME
char: toCharSet(element, flags),
const set = toUnicodeSet(element, flags)
if (set.accept.isEmpty) {
return {
isCharacter: true,
alternative,
char: set.chars,
element,
}
}
}
}
Expand Down Expand Up @@ -189,23 +199,36 @@ function containsCharacterClass(alts: readonly RawAlternative[]): boolean {
*
* The returned array may be empty.
*/
function toCharacterClassElement(element: Element): CharElementArray | null {
if (element.type === "CharacterSet") {
// normal dot is not possible (it technically is but it's complicated)
if (element.kind === "any") {
return null
}
return [element]
} else if (element.type === "CharacterClass") {
if (element.negate) {
// we can't (easily) combine negated character classes
return null
}
return element.elements
} else if (element.type === "Character") {
return [element]
function toCharacterClassElement(
element: RawCharAlternative["element"],
): CharElementArray | null {
switch (element.type) {
case "Character":
return [element]

case "CharacterSet":
if (element.kind === "any") {
// normal dot is not possible (it technically is but it's complicated)
return null
}
return [element]

case "CharacterClass":
if (element.negate) {
if (element.unicodeSets) {
return [element]
}
// we can't (easily) combine negated character classes without the v flag
return null
}
return element.elements

case "ExpressionCharacterClass":
return [element]

default:
return assertNever(element)
}
return null
}

/**
Expand All @@ -215,16 +238,14 @@ function parseRawAlts(
alternatives: readonly RawAlternative[],
flags: ReadonlyFlags,
): ParsedAlternative[] {
return alternatives.map<ParsedAlternative>((a) => {
return alternatives.map((a): ParsedAlternative => {
if (a.isCharacter) {
const elements = toCharacterClassElement(a.element)
if (elements) {
return {
isCharacter: true,
elements,
// FIXME: TS Error
// @ts-expect-error -- FIXME
char: toCharSet(a.element, flags),
char: a.char,
raw: a.alternative.raw,
}
}
Expand Down Expand Up @@ -349,21 +370,14 @@ function findNonDisjointAlt(
/**
* Returns where the given alternative can accept any character.
*/
function totalIsAll(
alternatives: readonly RawAlternative[],
{ flags }: RegExpContext,
): boolean {
function totalIsAll(alternatives: readonly RawAlternative[]): boolean {
let total: CharSet | undefined = undefined
for (const a of alternatives) {
if (a.isCharacter) {
if (total === undefined) {
// FIXME: TS Error
// @ts-expect-error -- FIXME
total = toCharSet(a.element, flags)
total = a.char
} else {
// FIXME: TS Error
// @ts-expect-error -- FIXME
total = total.union(toCharSet(a.element, flags))
total = total.union(a.char)
}
}
}
Expand Down Expand Up @@ -506,10 +520,7 @@ export default createRule("prefer-character-class", {
return
}

if (
alts.every((a) => a.isCharacter) &&
totalIsAll(alts, regexpContext)
) {
if (alts.every((a) => a.isCharacter) && totalIsAll(alts)) {
// This is the special case where:
// 1) all alternatives are characters,
// 2) there are at least 2 alternatives, and
Expand Down Expand Up @@ -538,7 +549,7 @@ export default createRule("prefer-character-class", {
if (
characterAltsCount >= minCharacterAlternatives ||
containsCharacterClass(alts) ||
totalIsAll(alts, regexpContext) ||
totalIsAll(alts) ||
findNonDisjointAlt(parsedAlts)
) {
optimizeCharacterAlts(parsedAlts)
Expand Down
Loading

0 comments on commit 81f0153

Please sign in to comment.