Skip to content

Commit

Permalink
buffer: use a branchless loop for atob
Browse files Browse the repository at this point in the history
  • Loading branch information
chjj committed Feb 7, 2024
1 parent 527e435 commit 14c636c
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 35 deletions.
101 changes: 66 additions & 35 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -1246,9 +1246,9 @@ if (internalBinding('config').hasIntl) {
}

function btoa(input) {
// The implementation here has not been performance optimized in any way and
// should not be.
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
// The implementation here has been slightly performance optimized,
// but still not nearly as much as it should be.
// Refs: https://github.com/nodejs/node/pull/51670
if (arguments.length === 0) {
throw new ERR_MISSING_ARGS('input');
}
Expand All @@ -1267,63 +1267,94 @@ function btoa(input) {
// Refs: https://infra.spec.whatwg.org/#forgiving-base64-decode
// https://infra.spec.whatwg.org/#ascii-whitespace
// Valid Characters: [\t\n\f\r +/0-9=A-Za-z]
// Lookup table (-1 = invalid, 0 = valid)
// Lookup table (-1 = invalid, 0 = whitespace, 1 = non-whitespace)
// Note that `=` is set to `-1` as it is handled elsewhere.
/* eslint-disable no-multi-spaces, indent */
const kForgivingBase64AllowedChars = [
-1, -1, -1, -1, -1, -1, -1, -1,
-1, 0, 0, -1, 0, 0, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
0, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, 0, -1, -1, -1, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, -1, -1, -1, 0, -1, -1,
-1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, -1, -1, -1, -1, -1,
-1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, -1, -1, -1, -1, -1,
-1, -1, -1, 1, -1, -1, -1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, -1, -1, -1, -1, -1, -1,
-1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, -1, -1, -1, -1, -1,
-1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, -1, -1, -1, -1, -1,
];
/* eslint-enable no-multi-spaces, indent */

function atob(input) {
// The implementation here has not been performance optimized in any way and
// should not be.
// Refs: https://github.com/nodejs/node/pull/38433#issuecomment-828426932
// The implementation here has been slightly performance optimized,
// but still not nearly as much as it should be.
// Refs: https://github.com/nodejs/node/pull/51670
if (arguments.length === 0) {
throw new ERR_MISSING_ARGS('input');
}

input = `${input}`;

let nonAsciiWhitespaceCharCount = 0;
let equalCharCount = 0;
let length = input.length;

// We use an accumulator to track errors. If, at the end,
// any high bits are set in `acc`, an invalid character has
// been parsed.
//
// This works because invalid base64 characters in the lookup
// table are `-1` and any non-ascii character will be greater
// than 0x7f.
let acc = 0;

for (let n = 0; n < input.length; n++) {
const ch = StringPrototypeCharCodeAt(input, n);
const val = kForgivingBase64AllowedChars[ch & 0x7f];
// Right-trim whitespace and equal signs.
while (length > 0) {
const ch = StringPrototypeCharCodeAt(input, length - 1);

if ((ch | val) & ~0x7f) {
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
// Possibly-valid whitespace.
if (ch <= 0x20) {
acc |= kForgivingBase64AllowedChars[ch];
length--;
continue;
}

if (ch > 0x20) {
// Equals sign.
if (ch === 0x3d) {
nonAsciiWhitespaceCharCount++;
equalCharCount++;
length--;
continue;
}

if (ch === 0x3d) {
equalCharCount++;
} else if (equalCharCount) {
// The `=` char is only allowed at the end.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}
break;
}

if (equalCharCount > 2) {
// Only one more `=` is permitted after the first equal sign.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}
}
// Parse optimistically. Check for errors after.
// Equal signs are considered errors at this point (value = -1).
for (let n = 0; n < length; n++) {
const ch = StringPrototypeCharCodeAt(input, n);
const value = kForgivingBase64AllowedChars[ch & 0x7f];

acc |= ch | value;

// Valid non-whitespace has a value of `1`.
nonAsciiWhitespaceCharCount += value;
}

if (acc & ~0x7f) {
// We parsed an invalid character at some point in one of the loops.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}

if (equalCharCount > 2) {
// Only two equal signs are permitted.
throw lazyDOMException('Invalid character', 'InvalidCharacterError');
}

let reminder = nonAsciiWhitespaceCharCount % 4;
Expand Down
11 changes: 11 additions & 0 deletions test/parallel/test-btoa-atob.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ for (let i = 0; i <= 0xffff; i++) {
throws(() => atob(ch + 'aaa'), invalidChar);
}

strictEqual(atob('YQ'), 'a');
strictEqual(atob('YQ '), 'a');
strictEqual(atob('Y Q'), 'a');
strictEqual(atob('Y Q\t'), 'a');
strictEqual(atob('Y Q=\t= '), 'a');
strictEqual(atob('Y Q = = '), 'a');
throws(() => atob('YQ='), invalidChar);
throws(() => atob('YQ==='), invalidChar);
throws(() => atob('Y=Q'), invalidChar);
throws(() => atob('YQ\v'), invalidChar);

throws(() => btoa('abcd\ufeffx'), invalidChar);

const charset =
Expand Down

0 comments on commit 14c636c

Please sign in to comment.