ota-meshi · RunDevelopment · Oct 2, 2023 · Sep 11, 2023 · Sep 11, 2023 · Sep 11, 2023
diff --git a/.changeset/early-islands-press.md b/.changeset/early-islands-press.md
@@ -0,0 +1,5 @@
+---
+"eslint-plugin-regexp": major
+---
+
+Add `regexp/simplify-set-operations` rule
diff --git a/.changeset/early-islands-press2.md b/.changeset/early-islands-press2.md
@@ -0,0 +1,5 @@
+---
+"eslint-plugin-regexp": minor
+---
+
+Improve `regexp/negation` rule to report nested negation character classes
diff --git a/README.md b/README.md
@@ -167,6 +167,7 @@ The `plugin:regexp/all` config enables all rules. It's meant for testing, not fo
 | [prefer-regexp-test](https://ota-meshi.github.io/eslint-plugin-regexp/rules/prefer-regexp-test.html)                                       | enforce that `RegExp#test` is used instead of `String#match` and `RegExp#exec`             |    |    | 🔧 |    |
 | [require-unicode-regexp](https://ota-meshi.github.io/eslint-plugin-regexp/rules/require-unicode-regexp.html)                               | enforce the use of the `u` flag                                                            |    |    | 🔧 |    |
 | [require-unicode-sets-regexp](https://ota-meshi.github.io/eslint-plugin-regexp/rules/require-unicode-sets-regexp.html)                     | enforce the use of the `v` flag                                                            |    |    | 🔧 |    |
+| [simplify-set-operations](https://ota-meshi.github.io/eslint-plugin-regexp/rules/simplify-set-operations.html)                             | require simplify set operations                                                            | ✅  |    | 🔧 |    |
 | [sort-alternatives](https://ota-meshi.github.io/eslint-plugin-regexp/rules/sort-alternatives.html)                                         | sort alternatives if order doesn't matter                                                  |    |    | 🔧 |    |
 | [use-ignore-case](https://ota-meshi.github.io/eslint-plugin-regexp/rules/use-ignore-case.html)                                             | use the `i` flag if it simplifies the pattern                                              | ✅  |    | 🔧 |    |
 

diff --git a/docs/rules/index.md b/docs/rules/index.md
@@ -74,6 +74,7 @@ sidebarDepth: 0
 | [prefer-regexp-test](prefer-regexp-test.md)                                       | enforce that `RegExp#test` is used instead of `String#match` and `RegExp#exec`             |    |    | 🔧 |    |
 | [require-unicode-regexp](require-unicode-regexp.md)                               | enforce the use of the `u` flag                                                            |    |    | 🔧 |    |
 | [require-unicode-sets-regexp](require-unicode-sets-regexp.md)                     | enforce the use of the `v` flag                                                            |    |    | 🔧 |    |
+| [simplify-set-operations](simplify-set-operations.md)                             | require simplify set operations                                                            | ✅  |    | 🔧 |    |
 | [sort-alternatives](sort-alternatives.md)                                         | sort alternatives if order doesn't matter                                                  |    |    | 🔧 |    |
 | [use-ignore-case](use-ignore-case.md)                                             | use the `i` flag if it simplifies the pattern                                              | ✅  |    | 🔧 |    |
 

diff --git a/docs/rules/negation.md b/docs/rules/negation.md
@@ -53,6 +53,12 @@ var foo = /[^\P{ASCII}]/u
 
 Nothing.
 
+## :couple: Related rules
+
+- [regexp/simplify-set-operations]
+
+[regexp/simplify-set-operations]: ./simplify-set-operations.md
+
 ## :rocket: Version
 
 This rule was introduced in eslint-plugin-regexp v0.4.0

diff --git a/docs/rules/simplify-set-operations.md b/docs/rules/simplify-set-operations.md
@@ -0,0 +1,92 @@
+---
+pageClass: "rule-details"
+sidebarDepth: 0
+title: "regexp/simplify-set-operations"
+description: "require simplify set operations"
+---
+# regexp/simplify-set-operations
+
+💼 This rule is enabled in the ✅ `plugin:regexp/recommended` config.
+
+🔧 This rule is automatically fixable by the [`--fix` CLI option](https://eslint.org/docs/latest/user-guide/command-line-interface#--fix).
+
+<!-- end auto-generated rule header -->
+
+> require simplify set operations
+
+## :book: Rule Details
+
+This rule aims to optimize patterns by simplifying set operations on character classes (with `v` flag).
+
+This rule does not report simple nested negations. (e.g. `/[^[^abc]]/v`)\
+If you want to report simple nested negations, use [regexp/negation] rule together.
+
+<eslint-code-block fix>
+
+```js
+/* eslint regexp/simplify-set-operations: "error" */
+
+/* ✗ BAD */
+var re = /[a&&[^b]]/v; // -> /[a--b]/v
+var re = /[[^b]&&a]/v; // -> /[a--b]/v
+var re = /[a--[^b]]/v; // -> /[a&&b]/v
+var re = /[[^a]&&[^b]]/v; // -> /[^ab]/v
+var re = /[[^a][^b]]/v; // -> /[^a&&b]/v
+
+/* ✓ GOOD */
+var re = /[a--b]/v;
+var re = /[a&&b]/v;
+var re = /[^ab]/v;
+var re = /[^a&&b]/v;
+```
+
+</eslint-code-block>
+
+### How does this rule work?
+
+This rule attempts to simplify set operations in the ways listed below:
+
+#### De Morgan's laws
+
+This rule uses De Morgan's laws to look for patterns that can convert multiple negations into a single negation, reports on them, auto-fix them.\
+For example, `/[[^a]&&[^b]]/v` is equivalent to `/[^ab]/v`, `/[[^a][^b]]/v` is equivalent to `/[^a&&b]/v`.
+
+See <https://en.wikipedia.org/wiki/De_Morgan's_laws>.
+
+#### Conversion from the intersection to the subtraction
+
+Intersection sets with complement operands can be converted to difference sets.\
+The rule looks for character class intersection with negation operands, reports on them, auto-fix them.\
+For example, `/[a&&[^b]]/v` is equivalent to `/[a--b]/v`, `/[[^a]&&b]/v` is equivalent to `/[b--a]/v`.
+
+#### Conversion from the subtraction to the intersection
+
+Difference set with a complement operand on the right side can be converted to intersection sets.\
+The rule looks for character class subtraction with negation operand on the right side, reports on them, auto-fix them.\
+For example, `/[a--[^b]]/v` is equivalent to `/[a&&b]/v`.
+
+### Auto Fixes
+
+This rule's auto-fix does not remove unnecessary brackets. For example, `/[[^a]&&[^b]]/v` will be automatically fixed to `/[[a][b]]/v`.\
+If you want to remove unnecessary brackets (e.g. auto-fixed to `/[^ab]/v`), use [regexp/no-useless-character-class] rule together.
+
+## :wrench: Options
+
+Nothing.
+
+## :couple: Related rules
+
+- [regexp/negation]
+- [regexp/no-useless-character-class]
+
+[regexp/negation]: ./negation.md
+[regexp/no-useless-character-class]: ./no-useless-character-class.md
+
+## :rocket: Version
+
+:exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
+
+## :mag: Implementation
+
+- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/simplify-set-operations.ts)
+- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/simplify-set-operations.ts)
diff --git a/lib/configs/recommended.ts b/lib/configs/recommended.ts
@@ -63,6 +63,7 @@ export const rules = {
     "regexp/prefer-star-quantifier": "error",
     "regexp/prefer-unicode-codepoint-escapes": "error",
     "regexp/prefer-w": "error",
+    "regexp/simplify-set-operations": "error",
     "regexp/sort-flags": "error",
     "regexp/strict": "error",
     "regexp/use-ignore-case": "error",

diff --git a/lib/rules/negation.ts b/lib/rules/negation.ts
@@ -1,11 +1,33 @@
-import { toCharSet, toUnicodeSet } from "regexp-ast-analysis"
+import { toUnicodeSet } from "regexp-ast-analysis"
 import type {
+    CharacterClass,
+    CharacterClassElement,
+    CharacterUnicodePropertyCharacterSet,
     EscapeCharacterSet,
-    UnicodePropertyCharacterSet,
+    ExpressionCharacterClass,
 } from "@eslint-community/regexpp/ast"
 import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
 import type { RegExpContext } from "../utils"
 import { createRule, defineRegexpVisitor } from "../utils"
+import { assertNever } from "../utils/util"
+
+type NegatableCharacterClassElement =
+    | CharacterClass
+    | ExpressionCharacterClass
+    | EscapeCharacterSet
+    | CharacterUnicodePropertyCharacterSet
+
+/** Checks whether the given character class is negatable. */
+function isNegatableCharacterClassElement<N extends CharacterClassElement>(
+    node: N,
+): node is N & NegatableCharacterClassElement {
+    return (
+        node.type === "CharacterClass" ||
+        node.type === "ExpressionCharacterClass" ||
+        (node.type === "CharacterSet" &&
+            (node.kind !== "property" || !node.strings))
+    )
+}
 
 export default createRule("negation", {
     meta: {
@@ -36,19 +58,17 @@ export default createRule("negation", {
                     }
 
                     const element = ccNode.elements[0]
-                    if (element.type !== "CharacterSet") {
+                    if (!isNegatableCharacterClassElement(element)) {
                         return
                     }
-                    if (element.kind === "property" && element.strings) {
-                        // Unicode property escape with property of strings.
-                        // Actually the pattern passing through this branch is an invalid pattern,
-                        // but it has to be checked because of the type guards.
+                    if (element.type !== "CharacterSet" && !element.negate) {
                         return
                     }
 
                     if (
                         flags.ignoreCase &&
                         !flags.unicodeSets &&
+                        element.type === "CharacterSet" &&
                         element.kind === "property"
                     ) {
                         // The ignore case canonicalization affects negated
@@ -61,7 +81,7 @@ export default createRule("negation", {
                         // (/./, /\s/, /\d/) or inconsistent (/\w/).
                         const ccSet = toUnicodeSet(ccNode, flags)
 
-                        const negatedElementSet = toCharSet(
+                        const negatedElementSet = toUnicodeSet(
                             {
                                 ...element,
                                 negate: !element.negate,
@@ -96,17 +116,24 @@ export default createRule("negation", {
 /**
  * Gets the text that negation the CharacterSet.
  */
-function getNegationText(
-    node: EscapeCharacterSet | UnicodePropertyCharacterSet,
-) {
-    // they are all of the form: /\\[dswp](?:\{[^{}]+\})?/
-    let kind = node.raw[1]
+function getNegationText(node: NegatableCharacterClassElement) {
+    if (node.type === "CharacterSet") {
+        // they are all of the form: /\\[dswp](?:\{[^{}]+\})?/
+        let kind = node.raw[1]
 
-    if (kind.toLowerCase() === kind) {
-        kind = kind.toUpperCase()
-    } else {
-        kind = kind.toLowerCase()
-    }
+        if (kind.toLowerCase() === kind) {
+            kind = kind.toUpperCase()
+        } else {
+            kind = kind.toLowerCase()
+        }
 
-    return `\\${kind}${node.raw.slice(2)}`
+        return `\\${kind}${node.raw.slice(2)}`
+    }
+    if (node.type === "CharacterClass") {
+        return `[${node.elements.map((e) => e.raw).join("")}]`
+    }
+    if (node.type === "ExpressionCharacterClass") {
+        return `[${node.raw.slice(2, -1)}]`
+    }
+    return assertNever(node)
 }