Skip to content

Commit

Permalink
fix: more accurate filtering for include, exclude
Browse files Browse the repository at this point in the history
  • Loading branch information
harlan-zw committed Nov 27, 2023
1 parent 9dacf52 commit ca5712c
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 10 deletions.
17 changes: 7 additions & 10 deletions packages/core/src/puppeteer/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ UnlighthouseWorkerStats,
import { ReportArtifacts, asRegExp, createTaskReportFromRoute } from '../util'
import { useUnlighthouse } from '../unlighthouse'
import { useLogger } from '../logger'
import { createFilter } from '../util/filter'
import {
launchPuppeteerCluster,
} from './cluster'
Expand Down Expand Up @@ -91,16 +92,12 @@ export async function createUnlighthouseWorker(tasks: Record<UnlighthouseTask, T
if (ignoredRoutes.has(id))
return

if (resolvedConfig.scanner.include) {
// must match
if (resolvedConfig.scanner.include.filter(rule => asRegExp(rule).test(path)).length === 0)
return
}

if (resolvedConfig.scanner.exclude) {
// must not match
if (resolvedConfig.scanner.exclude.filter(rule => asRegExp(rule).test(path)).length > 0)
if (resolvedConfig.scanner.include || resolvedConfig.scanner.exclude) {
const filter = createFilter(resolvedConfig.scanner)
if (filter(path)) {
logger.debug('Skipping route based on include / exclude rules', { path })
return
}
}

/*
Expand All @@ -109,7 +106,7 @@ export async function createUnlighthouseWorker(tasks: Record<UnlighthouseTask, T
* Note: this is somewhat similar to the logic in discovery/routes.ts, that's because we need to sample the routes
* from the sitemap or as provided. This logic is for ensuring crawled URLs don't exceed the group limit.
*/
if (resolvedConfig.scanner.dynamicSampling > 0) {
if (resolvedConfig.scanner.dynamicSampling && resolvedConfig.scanner.dynamicSampling > 0) {
const routeGroup = get(route, resolvedConfig.client.groupRoutesKey.replace('route.', ''))
// group all urls by their route definition path name
const routesInGroup = [...routeReports.values()].filter(
Expand Down
39 changes: 39 additions & 0 deletions packages/core/src/util/filter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { createRouter, toRouteMatcher } from 'radix3'

interface CreateFilterOptions {
include?: (string | RegExp)[]
exclude?: (string | RegExp)[]
}

export function createFilter(options: CreateFilterOptions = {}): (path: string) => boolean {
const include = options.include || []
const exclude = options.exclude || []
if (include.length === 0 && exclude.length === 0)
return () => true

return function (path: string): boolean {
for (const v of [{ rules: exclude, result: false }, { rules: include, result: true }]) {
const regexRules = v.rules.filter(r => r instanceof RegExp) as RegExp[]
if (regexRules.some(r => r.test(path)))
return v.result

const stringRules = v.rules.filter(r => typeof r === 'string') as string[]
if (stringRules.length > 0) {
const routes = {}
for (const r of stringRules) {
// quick scan of literal string matches
if (r === path)
return v.result

// need to flip the array data for radix3 format, true value is arbitrary
// @ts-expect-error untyped
routes[r] = true
}
const routeRulesMatcher = toRouteMatcher(createRouter({ routes, strictTrailingSlash: false }))
if (routeRulesMatcher.matchAll(path).length > 0)
return Boolean(v.result)
}
}
return include.length === 0
}
}

0 comments on commit ca5712c

Please sign in to comment.