Skip to content

Commit

Permalink
fix: perf regression on hot string munging path
Browse files Browse the repository at this point in the history
This doesn't change any functionality, but it optimizes a few extremely
hot code paths for the input typically encountered during a large npm
project installation.

`String.normalize()` is cached, and trailing-slash removal is done with
a single `String.slice()`, rather than multiple slices and
`String.length` comparisons.

It is extremely rare that any code path is ever hot enough for this kind
of thing to be relevant enough to justify this sort of
microoptimization, but these two issues resulted in a 25-50% install
time increase in some cases, which is fairly dramatic.

Fix: npm/cli#3676

PR-URL: #286
Credit: @isaacs
Close: #286
Reviewed-by: @wraithgar
  • Loading branch information
isaacs committed Aug 26, 2021
1 parent a9d9b05 commit edb8e9a
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 28 deletions.
11 changes: 11 additions & 0 deletions lib/normalize-unicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// warning: extremely hot code path.
// This has been meticulously optimized for use
// within npm install on large package trees.
// Do not edit without careful benchmarking.
const normalizeCache = Object.create(null)
const {hasOwnProperty} = Object.prototype
module.exports = s => {
if (!hasOwnProperty.call(normalizeCache, s))
normalizeCache[s] = s.normalize('NFKD')
return normalizeCache[s]
}
9 changes: 4 additions & 5 deletions lib/path-reservations.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// while still allowing maximal safe parallelization.

const assert = require('assert')
const normPath = require('./normalize-windows-path.js')
const normalize = require('./normalize-unicode.js')
const stripSlashes = require('./strip-trailing-slashes.js')
const { join } = require('path')

Expand All @@ -28,7 +28,7 @@ module.exports = () => {
const getDirs = path => {
const dirs = path.split('/').slice(0, -1).reduce((set, path) => {
if (set.length)
path = normPath(join(set[set.length - 1], path))
path = join(set[set.length - 1], path)
set.push(path || '/')
return set
}, [])
Expand Down Expand Up @@ -116,9 +116,8 @@ module.exports = () => {
// So, we just pretend that every path matches every other path here,
// effectively removing all parallelization on windows.
paths = isWindows ? ['win32 parallelization disabled'] : paths.map(p => {
return stripSlashes(normPath(join(p)))
.normalize('NFKD')
.toLowerCase()
// don't need normPath, because we skip this entirely for windows
return normalize(stripSlashes(join(p))).toLowerCase()
})

const dirs = new Set(
Expand Down
31 changes: 10 additions & 21 deletions lib/strip-trailing-slashes.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,13 @@
// this is the only approach that was significantly faster than using
// str.replace(/\/+$/, '') for strings ending with a lot of / chars and
// containing multiple / chars.
const batchStrings = [
'/'.repeat(1024),
'/'.repeat(512),
'/'.repeat(256),
'/'.repeat(128),
'/'.repeat(64),
'/'.repeat(32),
'/'.repeat(16),
'/'.repeat(8),
'/'.repeat(4),
'/'.repeat(2),
'/',
]

// warning: extremely hot code path.
// This has been meticulously optimized for use
// within npm install on large package trees.
// Do not edit without careful benchmarking.
module.exports = str => {
for (const s of batchStrings) {
while (str.length >= s.length && str.slice(-1 * s.length) === s)
str = str.slice(0, -1 * s.length)
let i = str.length - 1
let slashesStart = -1
while (i > -1 && str.charAt(i) === '/') {
slashesStart = i
i--
}
return str
return slashesStart === -1 ? str : str.slice(0, slashesStart)
}
4 changes: 2 additions & 2 deletions lib/unpack.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const pathReservations = require('./path-reservations.js')
const stripAbsolutePath = require('./strip-absolute-path.js')
const normPath = require('./normalize-windows-path.js')
const stripSlash = require('./strip-trailing-slashes.js')
const normalize = require('./normalize-unicode.js')

const ONENTRY = Symbol('onEntry')
const CHECKFS = Symbol('checkFs')
Expand Down Expand Up @@ -101,8 +102,7 @@ const uint32 = (a, b, c) =>
// Note that on windows, we always drop the entire cache whenever a
// symbolic link is encountered, because 8.3 filenames are impossible
// to reason about, and collisions are hazards rather than just failures.
const cacheKeyNormalize = path => stripSlash(normPath(path))
.normalize('NFKD')
const cacheKeyNormalize = path => normalize(stripSlash(normPath(path)))
.toLowerCase()

const pruneCache = (cache, abs) => {
Expand Down
12 changes: 12 additions & 0 deletions test/normalize-unicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
const t = require('tap')
const normalize = require('../lib/normalize-unicode.js')

// café
const cafe1 = Buffer.from([0x63, 0x61, 0x66, 0xc3, 0xa9]).toString()

// cafe with a `
const cafe2 = Buffer.from([0x63, 0x61, 0x66, 0x65, 0xcc, 0x81]).toString()

t.equal(normalize(cafe1), normalize(cafe2), 'matching unicodes')
t.equal(normalize(cafe1), normalize(cafe2), 'cached')
t.equal(normalize('foo'), 'foo', 'non-unicdoe string')

This comment has been minimized.

Copy link
@isaacs

isaacs Aug 26, 2021

Author Owner

d'oh, missed that typo. it's only a test name, at least 😬

1 change: 1 addition & 0 deletions test/strip-trailing-slashes.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ const stripSlash = require('../lib/strip-trailing-slashes.js')
const short = '///a///b///c///'
const long = short.repeat(10) + '/'.repeat(1000000)

t.equal(stripSlash('no slash'), 'no slash')
t.equal(stripSlash(short), '///a///b///c')
t.equal(stripSlash(long), short.repeat(9) + '///a///b///c')

1 comment on commit edb8e9a

@fireAi-Cloud
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix to security

Please sign in to comment.