From 17e826a7ffd9d3622702e64ebcb4db435ee83b37 Mon Sep 17 00:00:00 2001 From: mrmlnc Date: Sat, 13 May 2023 22:52:19 +0300 Subject: [PATCH] fix: expand patterns with brace expansion to avoid some matching issues --- __snapshots__/dot.e2e.js | 6 ---- __snapshots__/regular.e2e.js | 15 ++++++-- src/index.ts | 5 ++- src/managers/patterns.spec.ts | 51 --------------------------- src/managers/patterns.ts | 18 ---------- src/managers/tasks.spec.ts | 13 +++++++ src/managers/tasks.ts | 14 ++++++-- src/providers/matchers/matcher.ts | 8 +---- src/tests/e2e/patterns/regular.e2e.ts | 2 +- src/tests/e2e/patterns/root.e2e.ts | 4 ++- src/utils/pattern.spec.ts | 46 +++++++++++++++++++++++- src/utils/pattern.ts | 29 ++++++++++++--- 12 files changed, 114 insertions(+), 97 deletions(-) delete mode 100644 src/managers/patterns.spec.ts delete mode 100644 src/managers/patterns.ts diff --git a/__snapshots__/dot.e2e.js b/__snapshots__/dot.e2e.js index 14fdefd1..cbfc0ddb 100644 --- a/__snapshots__/dot.e2e.js +++ b/__snapshots__/dot.e2e.js @@ -176,7 +176,6 @@ exports['Options Dot {"pattern":"fixtures/**/{.,}*","options":{}} (stream) 1'] = ] exports['Options Dot {"pattern":"fixtures/{.**,**}","options":{}} (sync) 1'] = [ - "fixtures/.directory/file.md", "fixtures/.file", "fixtures/file.md", "fixtures/first/file.md", @@ -191,7 +190,6 @@ exports['Options Dot {"pattern":"fixtures/{.**,**}","options":{}} (sync) 1'] = [ ] exports['Options Dot {"pattern":"fixtures/{.**,**}","options":{}} (async) 1'] = [ - "fixtures/.directory/file.md", "fixtures/.file", "fixtures/file.md", "fixtures/first/file.md", @@ -206,7 +204,6 @@ exports['Options Dot {"pattern":"fixtures/{.**,**}","options":{}} (async) 1'] = ] exports['Options Dot {"pattern":"fixtures/{.**,**}","options":{}} (stream) 1'] = [ - "fixtures/.directory/file.md", "fixtures/.file", "fixtures/file.md", "fixtures/first/file.md", @@ -221,7 +218,6 @@ exports['Options Dot {"pattern":"fixtures/{.**,**}","options":{}} (stream) 1'] = ] exports['Options Dot {"pattern":"fixtures/{**/.*,**}","options":{}} (sync) 1'] = [ - "fixtures/.directory/file.md", "fixtures/.file", "fixtures/file.md", "fixtures/first/file.md", @@ -236,7 +232,6 @@ exports['Options Dot {"pattern":"fixtures/{**/.*,**}","options":{}} (sync) 1'] = ] exports['Options Dot {"pattern":"fixtures/{**/.*,**}","options":{}} (async) 1'] = [ - "fixtures/.directory/file.md", "fixtures/.file", "fixtures/file.md", "fixtures/first/file.md", @@ -251,7 +246,6 @@ exports['Options Dot {"pattern":"fixtures/{**/.*,**}","options":{}} (async) 1'] ] exports['Options Dot {"pattern":"fixtures/{**/.*,**}","options":{}} (stream) 1'] = [ - "fixtures/.directory/file.md", "fixtures/.file", "fixtures/file.md", "fixtures/first/file.md", diff --git a/__snapshots__/regular.e2e.js b/__snapshots__/regular.e2e.js index a0ccc85e..142988ee 100644 --- a/__snapshots__/regular.e2e.js +++ b/__snapshots__/regular.e2e.js @@ -5464,11 +5464,20 @@ exports['Patterns Regular (negative group) {"pattern":"**/!(*.md)","options":{"c "nested/directory/file.json" ] -exports['Patterns Regular (segmented lists) {"pattern":"{book.xml,**/library/*/book.md}","options":{"cwd":"fixtures/third"}} (sync) 1'] = [] +exports['Patterns Regular (segmented lists) {"pattern":"{book.xml,**/library/*/book.md}","options":{"cwd":"fixtures/third"}} (sync) 1'] = [ + "library/a/book.md", + "library/b/book.md" +] -exports['Patterns Regular (segmented lists) {"pattern":"{book.xml,**/library/*/book.md}","options":{"cwd":"fixtures/third"}} (async) 1'] = [] +exports['Patterns Regular (segmented lists) {"pattern":"{book.xml,**/library/*/book.md}","options":{"cwd":"fixtures/third"}} (async) 1'] = [ + "library/a/book.md", + "library/b/book.md" +] -exports['Patterns Regular (segmented lists) {"pattern":"{book.xml,**/library/*/book.md}","options":{"cwd":"fixtures/third"}} (stream) 1'] = [] +exports['Patterns Regular (segmented lists) {"pattern":"{book.xml,**/library/*/book.md}","options":{"cwd":"fixtures/third"}} (stream) 1'] = [ + "library/a/book.md", + "library/b/book.md" +] exports['Patterns Regular (segmented lists) {"pattern":"{book.xml,library/**/a/book.md}","options":{"cwd":"fixtures/third"}} (sync) 1'] = [ "library/a/book.md" diff --git a/src/index.ts b/src/index.ts index 9f957bd7..b6ef0c2d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,4 @@ import * as taskManager from './managers/tasks'; -import * as patternManager from './managers/patterns'; import ProviderAsync from './providers/async'; import Provider from './providers/provider'; import ProviderStream from './providers/stream'; @@ -59,7 +58,7 @@ namespace FastGlob { export function generateTasks(source: PatternInternal | PatternInternal[], options?: OptionsInternal): Task[] { assertPatternsInput(source); - const patterns = patternManager.transform(([] as PatternInternal[]).concat(source)); + const patterns = ([] as PatternInternal[]).concat(source); const settings = new Settings(options); return taskManager.generate(patterns, settings); @@ -81,7 +80,7 @@ namespace FastGlob { } function getWorks(source: PatternInternal | PatternInternal[], _Provider: new (settings: Settings) => Provider, options?: OptionsInternal): T[] { - const patterns = patternManager.transform(([] as PatternInternal[]).concat(source)); + const patterns = ([] as PatternInternal[]).concat(source); const settings = new Settings(options); const tasks = taskManager.generate(patterns, settings); diff --git a/src/managers/patterns.spec.ts b/src/managers/patterns.spec.ts deleted file mode 100644 index 7093c990..00000000 --- a/src/managers/patterns.spec.ts +++ /dev/null @@ -1,51 +0,0 @@ -import * as assert from 'assert'; - -import * as manager from './patterns'; - -describe('Managers → Pattern', () => { - describe('.transform', () => { - it('should transform patterns with duplicate slashes', () => { - const expected = ['a/b', 'b/c']; - - const actual = manager.transform(['a/b', 'b//c']); - - assert.deepStrictEqual(actual, expected); - }); - }); - - describe('.removeDuplicateSlashes', () => { - it('should do not change patterns', () => { - const action = manager.removeDuplicateSlashes; - - assert.strictEqual(action('directory/file.md'), 'directory/file.md'); - assert.strictEqual(action('files{.txt,/file.md}'), 'files{.txt,/file.md}'); - }); - - it('should do not change the device path in patterns with UNC parts', () => { - const action = manager.removeDuplicateSlashes; - - assert.strictEqual(action('//?//D://'), '//?/D:/'); - assert.strictEqual(action('//.//D:///'), '//./D:/'); - assert.strictEqual(action('//LOCALHOST//d$//'), '//LOCALHOST/d$/'); - assert.strictEqual(action('//127.0.0.1///d$//'), '//127.0.0.1/d$/'); - assert.strictEqual(action('//./UNC////LOCALHOST///d$//'), '//./UNC/LOCALHOST/d$/'); - }); - - it('should remove duplicate slashes in the middle and the of the pattern', () => { - const action = manager.removeDuplicateSlashes; - - assert.strictEqual(action('a//b'), 'a/b'); - assert.strictEqual(action('b///c'), 'b/c'); - assert.strictEqual(action('c/d///'), 'c/d/'); - assert.strictEqual(action('//?//D://'), '//?/D:/'); - }); - - it('should form double slashes at the beginning of the pattern', () => { - const action = manager.removeDuplicateSlashes; - - assert.strictEqual(action('///*'), '//*'); - assert.strictEqual(action('////?'), '//?'); - assert.strictEqual(action('///?/D:/'), '//?/D:/'); - }); - }); -}); diff --git a/src/managers/patterns.ts b/src/managers/patterns.ts deleted file mode 100644 index 36a84fd1..00000000 --- a/src/managers/patterns.ts +++ /dev/null @@ -1,18 +0,0 @@ -/** - * Matches a sequence of two or more consecutive slashes, excluding the first two slashes at the beginning of the string. - * The latter is due to the presence of the device path at the beginning of the UNC path. - * @todo rewrite to negative lookbehind with the next major release. - */ -const DOUBLE_SLASH_RE = /(?!^)\/{2,}/g; - -export function transform(patterns: string[]): string[] { - return patterns.map((pattern) => removeDuplicateSlashes(pattern)); -} - -/** - * This package only works with forward slashes as a path separator. - * Because of this, we cannot use the standard `path.normalize` method, because on Windows platform it will use of backslashes. - */ -export function removeDuplicateSlashes(pattern: string): string { - return pattern.replace(DOUBLE_SLASH_RE, '/'); -} diff --git a/src/managers/tasks.spec.ts b/src/managers/tasks.spec.ts index bf54af63..77c034e6 100644 --- a/src/managers/tasks.spec.ts +++ b/src/managers/tasks.spec.ts @@ -44,6 +44,19 @@ describe('Managers → Task', () => { assert.deepStrictEqual(actual, expected); }); + + it('should expand patterns with brace expansion', () => { + const settings = new Settings(); + + const expected = [ + tests.task.builder().base('a').positive('a/*').build(), + tests.task.builder().base('a/b').positive('a/b/*').build(), + ]; + + const actual = manager.generate(['a/{b,}/*'], settings); + + assert.deepStrictEqual(actual, expected); + }); }); describe('.convertPatternsToTasks', () => { diff --git a/src/managers/tasks.ts b/src/managers/tasks.ts index 3cc4f9dc..34d12216 100644 --- a/src/managers/tasks.ts +++ b/src/managers/tasks.ts @@ -10,9 +10,19 @@ export type Task = { negative: Pattern[]; }; -export function generate(patterns: Pattern[], settings: Settings): Task[] { +export function generate(input: Pattern[], settings: Settings): Task[] { + /** + * The original pattern like `{,*,**,a/*}` can lead to problems checking the depth when matching entry + * and some problems with the micromatch package (see fast-glob issues: #365, #394). + * + * To solve this problem, we expand all patterns containing brace expansion. This can lead to a slight slowdown + * in matching in the case of a large set of patterns after expansion. + */ + const patterns = utils.pattern.expandPatternsWithBraceExpansion(input); + const ignore = utils.pattern.expandPatternsWithBraceExpansion(settings.ignore); + const positivePatterns = getPositivePatterns(patterns); - const negativePatterns = getNegativePatternsAsPositive(patterns, settings.ignore); + const negativePatterns = getNegativePatternsAsPositive(patterns, ignore); const staticPatterns = positivePatterns.filter((pattern) => utils.pattern.isStaticPattern(pattern, settings)); const dynamicPatterns = positivePatterns.filter((pattern) => utils.pattern.isDynamicPattern(pattern, settings)); diff --git a/src/providers/matchers/matcher.ts b/src/providers/matchers/matcher.ts index 75d2d991..57fecb38 100644 --- a/src/providers/matchers/matcher.ts +++ b/src/providers/matchers/matcher.ts @@ -35,13 +35,7 @@ export default abstract class Matcher { } private _fillStorage(): void { - /** - * The original pattern may include `{,*,**,a/*}`, which will lead to problems with matching (unresolved level). - * So, before expand patterns with brace expansion into separated patterns. - */ - const patterns = utils.pattern.expandPatternsWithBraceExpansion(this._patterns); - - for (const pattern of patterns) { + for (const pattern of this._patterns) { const segments = this._getPatternSegments(pattern); const sections = this._splitSegmentsIntoSections(segments); diff --git a/src/tests/e2e/patterns/regular.e2e.ts b/src/tests/e2e/patterns/regular.e2e.ts index 75be4fd7..419879f2 100644 --- a/src/tests/e2e/patterns/regular.e2e.ts +++ b/src/tests/e2e/patterns/regular.e2e.ts @@ -486,7 +486,7 @@ runner.suite('Patterns Regular (negative group)', { runner.suite('Patterns Regular (segmented lists)', { tests: [ - { pattern: '{book.xml,**/library/*/book.md}', options: { cwd: 'fixtures/third' }, issue: 365 }, + { pattern: '{book.xml,**/library/*/book.md}', options: { cwd: 'fixtures/third' } }, { pattern: '{book.xml,library/**/a/book.md}', options: { cwd: 'fixtures/third' } } ] }); diff --git a/src/tests/e2e/patterns/root.e2e.ts b/src/tests/e2e/patterns/root.e2e.ts index 3ca1460b..13a72eb6 100644 --- a/src/tests/e2e/patterns/root.e2e.ts +++ b/src/tests/e2e/patterns/root.e2e.ts @@ -16,6 +16,8 @@ function getRootEntries(root: string, withBase: boolean = false): string[] { result = getRootEntriesWithStatsCall(root); } + console.dir(result, { colors: true, compact: false, depth: null }); + if (withBase) { const separator = root.endsWith('/') ? '' : '/'; @@ -48,7 +50,7 @@ runner.suite('Patterns Root', { { pattern: '/tmp/*', condition: () => !utils.platform.isWindows(), - expected: () => getRootEntries('/tmp') + expected: () => getRootEntries('/tmp', /** withBase */ true) }, { pattern: '/*', diff --git a/src/utils/pattern.spec.ts b/src/utils/pattern.spec.ts index a50455c3..9d564406 100644 --- a/src/utils/pattern.spec.ts +++ b/src/utils/pattern.spec.ts @@ -408,12 +408,20 @@ describe('Utils → Pattern', () => { describe('.expandBraceExpansion', () => { it('should return an array of expanded patterns with brace expansion without dupes', () => { - const expected = ['a/b', 'a/c/d', 'a/c']; + const expected = ['a/b', 'a/c', 'a/c/d']; const actual = util.expandBraceExpansion('a/{b,c/d,{b,c}}'); assert.deepStrictEqual(actual, expected); }); + + it('should remove duplicate slashes', () => { + const expected = ['a/*', 'a/b/*', 'a/c/*', 'a/b/c/*']; + + const actual = util.expandBraceExpansion('a/{b,}/{c,}/*'); + + assert.deepStrictEqual(actual, expected); + }); }); describe('.getPatternParts', () => { @@ -479,4 +487,40 @@ describe('Utils → Pattern', () => { assert.ok(!actual); }); }); + + describe('.removeDuplicateSlashes', () => { + it('should do not change patterns', () => { + const action = util.removeDuplicateSlashes; + + assert.strictEqual(action('directory/file.md'), 'directory/file.md'); + assert.strictEqual(action('files{.txt,/file.md}'), 'files{.txt,/file.md}'); + }); + + it('should do not change the device path in patterns with UNC parts', () => { + const action = util.removeDuplicateSlashes; + + assert.strictEqual(action('//?//D://'), '//?/D:/'); + assert.strictEqual(action('//.//D:///'), '//./D:/'); + assert.strictEqual(action('//LOCALHOST//d$//'), '//LOCALHOST/d$/'); + assert.strictEqual(action('//127.0.0.1///d$//'), '//127.0.0.1/d$/'); + assert.strictEqual(action('//./UNC////LOCALHOST///d$//'), '//./UNC/LOCALHOST/d$/'); + }); + + it('should remove duplicate slashes in the middle and the of the pattern', () => { + const action = util.removeDuplicateSlashes; + + assert.strictEqual(action('a//b'), 'a/b'); + assert.strictEqual(action('b///c'), 'b/c'); + assert.strictEqual(action('c/d///'), 'c/d/'); + assert.strictEqual(action('//?//D://'), '//?/D:/'); + }); + + it('should form double slashes at the beginning of the pattern', () => { + const action = util.removeDuplicateSlashes; + + assert.strictEqual(action('///*'), '//*'); + assert.strictEqual(action('////?'), '//?'); + assert.strictEqual(action('///?/D:/'), '//?/D:/'); + }); + }); }); diff --git a/src/utils/pattern.ts b/src/utils/pattern.ts index 0575b7d3..72d7e622 100644 --- a/src/utils/pattern.ts +++ b/src/utils/pattern.ts @@ -14,6 +14,12 @@ const REGEX_GROUP_SYMBOLS_RE = /(?:^|[^!*+?@])\([^(]*\|[^|]*\)/; const GLOB_EXTENSION_SYMBOLS_RE = /[!*+?@]\([^(]*\)/; const BRACE_EXPANSION_SEPARATORS_RE = /,|\.\./; +/** + * Matches a sequence of two or more consecutive slashes, excluding the first two slashes at the beginning of the string. + * The latter is due to the presence of the device path at the beginning of the UNC path. + */ +const DOUBLE_SLASH_RE = /(?!^)\/{2,}/g; + type PatternTypeOptions = { braceExpansion?: boolean; caseSensitiveMatch?: boolean; @@ -150,10 +156,17 @@ export function expandPatternsWithBraceExpansion(patterns: Pattern[]): Pattern[] } export function expandBraceExpansion(pattern: Pattern): Pattern[] { - return micromatch.braces(pattern, { - expand: true, - nodupes: true - }); + const patterns = micromatch + .braces(pattern, { expand: true, nodupes: true }) + .map((pattern) => removeDuplicateSlashes(pattern)); + + /** + * Sort the patterns by length so that the same depth patterns are processed side by side. + * `a/{b,}/{c,}/*` – `['a/*', 'a/b/*', 'a/c/*', 'a/b/c/*']` + */ + patterns.sort((a, b) => a.length - b.length); + + return patterns; } export function getPatternParts(pattern: Pattern, options: MicromatchOptions): Pattern[] { @@ -193,3 +206,11 @@ export function convertPatternsToRe(patterns: Pattern[], options: MicromatchOpti export function matchAny(entry: string, patternsRe: PatternRe[]): boolean { return patternsRe.some((patternRe) => patternRe.test(entry)); } + +/** + * This package only works with forward slashes as a path separator. + * Because of this, we cannot use the standard `path.normalize` method, because on Windows platform it will use of backslashes. + */ +export function removeDuplicateSlashes(pattern: string): string { + return pattern.replace(DOUBLE_SLASH_RE, '/'); +}