diff --git a/lib/normalize-unicode.js b/lib/normalize-unicode.js new file mode 100644 index 00000000..4aeb1d50 --- /dev/null +++ b/lib/normalize-unicode.js @@ -0,0 +1,11 @@ +// warning: extremely hot code path. +// This has been meticulously optimized for use +// within npm install on large package trees. +// Do not edit without careful benchmarking. +const normalizeCache = Object.create(null) +const {hasOwnProperty} = Object.prototype +module.exports = s => { + if (!hasOwnProperty.call(normalizeCache, s)) + normalizeCache[s] = s.normalize('NFKD') + return normalizeCache[s] +} diff --git a/lib/path-reservations.js b/lib/path-reservations.js index 8d0ead9b..8183c45f 100644 --- a/lib/path-reservations.js +++ b/lib/path-reservations.js @@ -7,7 +7,7 @@ // while still allowing maximal safe parallelization. const assert = require('assert') -const normPath = require('./normalize-windows-path.js') +const normalize = require('./normalize-unicode.js') const stripSlashes = require('./strip-trailing-slashes.js') const { join } = require('path') @@ -28,7 +28,7 @@ module.exports = () => { const getDirs = path => { const dirs = path.split('/').slice(0, -1).reduce((set, path) => { if (set.length) - path = normPath(join(set[set.length - 1], path)) + path = join(set[set.length - 1], path) set.push(path || '/') return set }, []) @@ -116,9 +116,8 @@ module.exports = () => { // So, we just pretend that every path matches every other path here, // effectively removing all parallelization on windows. paths = isWindows ? ['win32 parallelization disabled'] : paths.map(p => { - return stripSlashes(normPath(join(p))) - .normalize('NFKD') - .toLowerCase() + // don't need normPath, because we skip this entirely for windows + return normalize(stripSlashes(join(p))).toLowerCase() }) const dirs = new Set( diff --git a/lib/strip-trailing-slashes.js b/lib/strip-trailing-slashes.js index f702ed5a..3e3ecec5 100644 --- a/lib/strip-trailing-slashes.js +++ b/lib/strip-trailing-slashes.js @@ -1,24 +1,13 @@ -// this is the only approach that was significantly faster than using -// str.replace(/\/+$/, '') for strings ending with a lot of / chars and -// containing multiple / chars. -const batchStrings = [ - '/'.repeat(1024), - '/'.repeat(512), - '/'.repeat(256), - '/'.repeat(128), - '/'.repeat(64), - '/'.repeat(32), - '/'.repeat(16), - '/'.repeat(8), - '/'.repeat(4), - '/'.repeat(2), - '/', -] - +// warning: extremely hot code path. +// This has been meticulously optimized for use +// within npm install on large package trees. +// Do not edit without careful benchmarking. module.exports = str => { - for (const s of batchStrings) { - while (str.length >= s.length && str.slice(-1 * s.length) === s) - str = str.slice(0, -1 * s.length) + let i = str.length - 1 + let slashesStart = -1 + while (i > -1 && str.charAt(i) === '/') { + slashesStart = i + i-- } - return str + return slashesStart === -1 ? str : str.slice(0, slashesStart) } diff --git a/lib/unpack.js b/lib/unpack.js index 7f397f10..7d39dc0f 100644 --- a/lib/unpack.js +++ b/lib/unpack.js @@ -17,6 +17,7 @@ const pathReservations = require('./path-reservations.js') const stripAbsolutePath = require('./strip-absolute-path.js') const normPath = require('./normalize-windows-path.js') const stripSlash = require('./strip-trailing-slashes.js') +const normalize = require('./normalize-unicode.js') const ONENTRY = Symbol('onEntry') const CHECKFS = Symbol('checkFs') @@ -101,8 +102,7 @@ const uint32 = (a, b, c) => // Note that on windows, we always drop the entire cache whenever a // symbolic link is encountered, because 8.3 filenames are impossible // to reason about, and collisions are hazards rather than just failures. -const cacheKeyNormalize = path => stripSlash(normPath(path)) - .normalize('NFKD') +const cacheKeyNormalize = path => normalize(stripSlash(normPath(path))) .toLowerCase() const pruneCache = (cache, abs) => { diff --git a/test/normalize-unicode.js b/test/normalize-unicode.js new file mode 100644 index 00000000..c5d8be08 --- /dev/null +++ b/test/normalize-unicode.js @@ -0,0 +1,12 @@ +const t = require('tap') +const normalize = require('../lib/normalize-unicode.js') + +// cafeĢ +const cafe1 = Buffer.from([0x63, 0x61, 0x66, 0xc3, 0xa9]).toString() + +// cafe with a ` +const cafe2 = Buffer.from([0x63, 0x61, 0x66, 0x65, 0xcc, 0x81]).toString() + +t.equal(normalize(cafe1), normalize(cafe2), 'matching unicodes') +t.equal(normalize(cafe1), normalize(cafe2), 'cached') +t.equal(normalize('foo'), 'foo', 'non-unicdoe string') diff --git a/test/strip-trailing-slashes.js b/test/strip-trailing-slashes.js index 198797bf..ce0695f8 100644 --- a/test/strip-trailing-slashes.js +++ b/test/strip-trailing-slashes.js @@ -3,5 +3,6 @@ const stripSlash = require('../lib/strip-trailing-slashes.js') const short = '///a///b///c///' const long = short.repeat(10) + '/'.repeat(1000000) +t.equal(stripSlash('no slash'), 'no slash') t.equal(stripSlash(short), '///a///b///c') t.equal(stripSlash(long), short.repeat(9) + '///a///b///c')