From fe7a6abb4692a178d1488f43e2d09cc824bc64a0 Mon Sep 17 00:00:00 2001 From: Harlan Wilton Date: Tue, 23 Jan 2024 19:51:31 +0800 Subject: [PATCH] fix: robots.txt `Allow` rule breaking filtering --- packages/core/src/discovery/robotsTxt.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/core/src/discovery/robotsTxt.ts b/packages/core/src/discovery/robotsTxt.ts index 382862c6..6f8c53cb 100644 --- a/packages/core/src/discovery/robotsTxt.ts +++ b/packages/core/src/discovery/robotsTxt.ts @@ -72,11 +72,14 @@ export function mergeRobotsTxtConfig(config: ResolvedUserConfig, { groups, sitem ...(config.scanner.exclude || []), ...normalisedGroups.flatMap(group => group.disallow), ])].filter(isValidRegex) - config.scanner.include = [...new Set([ - ...(config.scanner.include || []), - ...normalisedGroups.flatMap(group => group.allow), - ])].filter(isValidRegex) - + config.scanner.include = config.scanner.include || [] + const robotsAllows = normalisedGroups.flatMap(group => group.allow).filter(a => a.length) + if (!config.scanner.include.length && robotsAllows.length) { + config.scanner.include = [...new Set([ + '/*', + ...normalisedGroups.flatMap(group => group.allow), + ])].filter(isValidRegex) + } if (config.scanner.sitemap !== false && sitemaps.length) config.scanner.sitemap = [...new Set([...(Array.isArray(config.scanner.sitemap) ? config.scanner.sitemap : []), ...sitemaps])] }