From d101804674e6ba42c28505fc8fdf605020ac319e Mon Sep 17 00:00:00 2001 From: Alex Potsides Date: Fri, 13 Oct 2023 08:54:58 +0300 Subject: [PATCH] fix: port protobuf reader/writer to ts (#60) Ports just the parts of protobufjs we use to typescript and integrates the changes from https://github.com/protobufjs/protobuf.js/pull/1557 to have native BigInt support. --- packages/protons-runtime/package.json | 7 +- packages/protons-runtime/src/decode.ts | 6 +- packages/protons-runtime/src/encode.ts | 4 +- packages/protons-runtime/src/index.ts | 39 +- packages/protons-runtime/src/utils.ts | 63 --- packages/protons-runtime/src/utils/float.ts | 109 +++++ .../protons-runtime/src/utils/longbits.ts | 155 +++++++ packages/protons-runtime/src/utils/pool.ts | 30 ++ packages/protons-runtime/src/utils/reader.ts | 311 +++++++++++++ packages/protons-runtime/src/utils/utf8.ts | 103 +++++ packages/protons-runtime/src/utils/writer.ts | 437 ++++++++++++++++++ packages/protons/package.json | 2 +- 12 files changed, 1173 insertions(+), 93 deletions(-) delete mode 100644 packages/protons-runtime/src/utils.ts create mode 100644 packages/protons-runtime/src/utils/float.ts create mode 100644 packages/protons-runtime/src/utils/longbits.ts create mode 100644 packages/protons-runtime/src/utils/pool.ts create mode 100644 packages/protons-runtime/src/utils/reader.ts create mode 100644 packages/protons-runtime/src/utils/utf8.ts create mode 100644 packages/protons-runtime/src/utils/writer.ts diff --git a/packages/protons-runtime/package.json b/packages/protons-runtime/package.json index 6740427..e19caa1 100644 --- a/packages/protons-runtime/package.json +++ b/packages/protons-runtime/package.json @@ -125,13 +125,10 @@ "release": "aegir release" }, "dependencies": { - "protobufjs": "^7.0.0", - "uint8arraylist": "^2.4.3" + "uint8arraylist": "^2.4.3", + "uint8arrays": "^4.0.6" }, "devDependencies": { "aegir": "^41.0.4" - }, - "peerDependencies": { - "uint8arraylist": "^2.3.2" } } diff --git a/packages/protons-runtime/src/decode.ts b/packages/protons-runtime/src/decode.ts index aa80e88..19360c2 100644 --- a/packages/protons-runtime/src/decode.ts +++ b/packages/protons-runtime/src/decode.ts @@ -1,9 +1,9 @@ -import { reader } from './utils.js' +import { createReader } from './utils/reader.js' import type { Codec } from './codec.js' import type { Uint8ArrayList } from 'uint8arraylist' export function decodeMessage (buf: Uint8Array | Uint8ArrayList, codec: Codec): T { - const r = reader(buf instanceof Uint8Array ? buf : buf.subarray()) + const reader = createReader(buf) - return codec.decode(r) + return codec.decode(reader) } diff --git a/packages/protons-runtime/src/encode.ts b/packages/protons-runtime/src/encode.ts index 3a6822e..9f23828 100644 --- a/packages/protons-runtime/src/encode.ts +++ b/packages/protons-runtime/src/encode.ts @@ -1,8 +1,8 @@ -import { writer } from './utils.js' +import { createWriter } from './utils/writer.js' import type { Codec } from './codec.js' export function encodeMessage (message: T, codec: Codec): Uint8Array { - const w = writer() + const w = createWriter() codec.encode(message, w, { lengthDelimited: false diff --git a/packages/protons-runtime/src/index.ts b/packages/protons-runtime/src/index.ts index 2b4b823..56a57ea 100644 --- a/packages/protons-runtime/src/index.ts +++ b/packages/protons-runtime/src/index.ts @@ -18,7 +18,8 @@ export { export { enumeration } from './codecs/enum.js' export { message } from './codecs/message.js' -export { reader, writer } from './utils.js' +export { createReader as reader } from './utils/reader.js' +export { createWriter as writer } from './utils/writer.js' export type { Codec, EncodeOptions } from './codec.js' export interface Writer { @@ -30,93 +31,93 @@ export interface Writer { /** * Writes an unsigned 32 bit value as a varint */ - uint32(value: number): Writer + uint32(value: number): this /** * Writes a signed 32 bit value as a varint` */ - int32(value: number): Writer + int32(value: number): this /** * Writes a 32 bit value as a varint, zig-zag encoded */ - sint32(value: number): Writer + sint32(value: number): this /** * Writes an unsigned 64 bit value as a varint */ - uint64(value: bigint): Writer + uint64(value: bigint): this /** * Writes a signed 64 bit value as a varint */ - int64(value: bigint): Writer + int64(value: bigint): this /** * Writes a signed 64 bit value as a varint, zig-zag encoded */ - sint64(value: bigint): Writer + sint64(value: bigint): this /** * Writes a boolish value as a varint */ - bool(value: boolean): Writer + bool(value: boolean): this /** * Writes an unsigned 32 bit value as fixed 32 bits */ - fixed32(value: number): Writer + fixed32(value: number): this /** * Writes a signed 32 bit value as fixed 32 bits */ - sfixed32(value: number): Writer + sfixed32(value: number): this /** * Writes an unsigned 64 bit value as fixed 64 bits */ - fixed64(value: bigint): Writer + fixed64(value: bigint): this /** * Writes a signed 64 bit value as fixed 64 bits */ - sfixed64(value: bigint): Writer + sfixed64(value: bigint): this /** * Writes a float (32 bit) */ - float(value: number): Writer + float(value: number): this /** * Writes a double (64 bit float) */ - double(value: number): Writer + double(value: number): this /** * Writes a sequence of bytes */ - bytes(value: Uint8Array): Writer + bytes(value: Uint8Array): this /** * Writes a string */ - string(value: string): Writer + string(value: string): this /** * Forks this writer's state by pushing it to a stack. * Calling {@link Writer#reset|reset} or {@link Writer#ldelim|ldelim} resets the writer to the previous state. */ - fork(): Writer + fork(): this /** * Resets this instance to the last state. */ - reset(): Writer + reset(): this /** * Resets to the last state and appends the fork state's current write length as a varint followed by its operations. */ - ldelim(): Writer + ldelim(): this /** * Finishes the write operation diff --git a/packages/protons-runtime/src/utils.ts b/packages/protons-runtime/src/utils.ts deleted file mode 100644 index 7a746c6..0000000 --- a/packages/protons-runtime/src/utils.ts +++ /dev/null @@ -1,63 +0,0 @@ -// @ts-expect-error no types -import ReaderClass from 'protobufjs/src/reader.js' -// @ts-expect-error no types -import ReaderBufferClass from 'protobufjs/src/reader_buffer.js' -// @ts-expect-error no types -import util from 'protobufjs/src/util/minimal.js' -// @ts-expect-error no types -import WriterClass from 'protobufjs/src/writer.js' -// @ts-expect-error no types -import WriterBufferClass from 'protobufjs/src/writer_buffer.js' -import type { Reader, Writer } from './index.js' - -function configure (): void { - util._configure() - ReaderClass._configure(ReaderBufferClass) - WriterClass._configure(WriterBufferClass) -} - -// Set up buffer utility according to the environment -configure() - -// monkey patch the reader to add native bigint support -const methods = [ - 'uint64', 'int64', 'sint64', 'fixed64', 'sfixed64' -] - -function patchReader (obj: any): any { - for (const method of methods) { - if (obj[method] == null) { - continue - } - - const original = obj[method] - obj[method] = function (): bigint { - return BigInt(original.call(this).toString()) - } - } - - return obj -} - -export function reader (buf: Uint8Array): Reader { - return patchReader(new ReaderClass(buf)) -} - -function patchWriter (obj: any): any { - for (const method of methods) { - if (obj[method] == null) { - continue - } - - const original = obj[method] - obj[method] = function (val: bigint) { - return original.call(this, val.toString()) - } - } - - return obj -} - -export function writer (): Writer { - return patchWriter(WriterClass.create()) -} diff --git a/packages/protons-runtime/src/utils/float.ts b/packages/protons-runtime/src/utils/float.ts new file mode 100644 index 0000000..2ee66fb --- /dev/null +++ b/packages/protons-runtime/src/utils/float.ts @@ -0,0 +1,109 @@ +const f32 = new Float32Array([-0]) +const f8b = new Uint8Array(f32.buffer) + +/** + * Writes a 32 bit float to a buffer using little endian byte order + */ +export function writeFloatLE (val: number, buf: Uint8Array, pos: number): void { + f32[0] = val + buf[pos] = f8b[0] + buf[pos + 1] = f8b[1] + buf[pos + 2] = f8b[2] + buf[pos + 3] = f8b[3] +} + +/** + * Writes a 32 bit float to a buffer using big endian byte order + */ +export function writeFloatBE (val: number, buf: Uint8Array, pos: number): void { + f32[0] = val + buf[pos] = f8b[3] + buf[pos + 1] = f8b[2] + buf[pos + 2] = f8b[1] + buf[pos + 3] = f8b[0] +} + +/** + * Reads a 32 bit float from a buffer using little endian byte order + */ +export function readFloatLE (buf: Uint8Array, pos: number): number { + f8b[0] = buf[pos] + f8b[1] = buf[pos + 1] + f8b[2] = buf[pos + 2] + f8b[3] = buf[pos + 3] + return f32[0] +} + +/** + * Reads a 32 bit float from a buffer using big endian byte order + */ +export function readFloatBE (buf: Uint8Array, pos: number): number { + f8b[3] = buf[pos] + f8b[2] = buf[pos + 1] + f8b[1] = buf[pos + 2] + f8b[0] = buf[pos + 3] + return f32[0] +} + +const f64 = new Float64Array([-0]) +const d8b = new Uint8Array(f64.buffer) + +/** + * Writes a 64 bit double to a buffer using little endian byte order + */ +export function writeDoubleLE (val: number, buf: Uint8Array, pos: number): void { + f64[0] = val + buf[pos] = d8b[0] + buf[pos + 1] = d8b[1] + buf[pos + 2] = d8b[2] + buf[pos + 3] = d8b[3] + buf[pos + 4] = d8b[4] + buf[pos + 5] = d8b[5] + buf[pos + 6] = d8b[6] + buf[pos + 7] = d8b[7] +} + +/** + * Writes a 64 bit double to a buffer using big endian byte order + */ +export function writeDoubleBE (val: number, buf: Uint8Array, pos: number): void { + f64[0] = val + buf[pos] = d8b[7] + buf[pos + 1] = d8b[6] + buf[pos + 2] = d8b[5] + buf[pos + 3] = d8b[4] + buf[pos + 4] = d8b[3] + buf[pos + 5] = d8b[2] + buf[pos + 6] = d8b[1] + buf[pos + 7] = d8b[0] +} + +/** + * Reads a 64 bit double from a buffer using little endian byte order + */ +export function readDoubleLE (buf: Uint8Array, pos: number): number { + d8b[0] = buf[pos] + d8b[1] = buf[pos + 1] + d8b[2] = buf[pos + 2] + d8b[3] = buf[pos + 3] + d8b[4] = buf[pos + 4] + d8b[5] = buf[pos + 5] + d8b[6] = buf[pos + 6] + d8b[7] = buf[pos + 7] + return f64[0] +} + +/** + * Reads a 64 bit double from a buffer using big endian byte order + */ +export function readDoubleBE (buf: Uint8Array, pos: number): number { + d8b[7] = buf[pos] + d8b[6] = buf[pos + 1] + d8b[5] = buf[pos + 2] + d8b[4] = buf[pos + 3] + d8b[3] = buf[pos + 4] + d8b[2] = buf[pos + 5] + d8b[1] = buf[pos + 6] + d8b[0] = buf[pos + 7] + return f64[0] +} diff --git a/packages/protons-runtime/src/utils/longbits.ts b/packages/protons-runtime/src/utils/longbits.ts new file mode 100644 index 0000000..a8e41ea --- /dev/null +++ b/packages/protons-runtime/src/utils/longbits.ts @@ -0,0 +1,155 @@ +/** + * Constructs new long bits. + * + * @classdesc Helper class for working with the low and high bits of a 64 bit value. + * @memberof util + * @function Object() { [native code] } + * @param {number} lo - Low 32 bits, unsigned + * @param {number} hi - High 32 bits, unsigned + */ +export class LongBits { + public lo: number + public hi: number + + constructor (lo: number, hi: number) { + // note that the casts below are theoretically unnecessary as of today, but older statically + // generated converter code might still call the ctor with signed 32bits. kept for compat. + + /** + * Low bits + */ + this.lo = lo | 0 + + /** + * High bits + */ + this.hi = hi | 0 + } + + /** + * Converts this long bits to a possibly unsafe JavaScript number + */ + toBigInt (unsigned: boolean = false): bigint { + if (unsigned) { + const result = BigInt(this.lo >>> 0) + (BigInt(this.hi >>> 0) << 32n) + return result + } + + if ((this.hi >>> 31) !== 0) { + const lo = ~this.lo + 1 >>> 0 + let hi = ~this.hi >>> 0 + if (lo === 0) { + hi = hi + 1 >>> 0 + } + return -(BigInt(lo) + (BigInt(hi) << 32n)) + } + + return BigInt(this.lo >>> 0) + (BigInt(this.hi >>> 0) << 32n) + } + + /** + * Zig-zag encodes this long bits + */ + zzEncode (): this { + const mask = this.hi >> 31 + this.hi = ((this.hi << 1 | this.lo >>> 31) ^ mask) >>> 0 + this.lo = (this.lo << 1 ^ mask) >>> 0 + return this + } + + /** + * Zig-zag decodes this long bits + */ + zzDecode (): this { + const mask = -(this.lo & 1) + this.lo = ((this.lo >>> 1 | this.hi << 31) ^ mask) >>> 0 + this.hi = (this.hi >>> 1 ^ mask) >>> 0 + return this + } + + /** + * Calculates the length of this longbits when encoded as a varint. + */ + length (): number { + const part0 = this.lo + const part1 = (this.lo >>> 28 | this.hi << 4) >>> 0 + const part2 = this.hi >>> 24 + return part2 === 0 + ? part1 === 0 + ? part0 < 16384 + ? part0 < 128 ? 1 : 2 + : part0 < 2097152 ? 3 : 4 + : part1 < 16384 + ? part1 < 128 ? 5 : 6 + : part1 < 2097152 ? 7 : 8 + : part2 < 128 ? 9 : 10 + } + + /** + * Constructs new long bits from the specified number + */ + static fromBigInt (value: bigint): LongBits { + if (value === 0n) { return zero } + + const negative = value < 0 + if (negative) { + value = -value + } + let hi = Number(value >> 32n) + let lo = Number(value - (BigInt(hi) << 32n)) + + if (negative) { + hi = ~hi >>> 0 + lo = ~lo >>> 0 + if (++lo > TWO_32) { + lo = 0 + if (++hi > TWO_32) { hi = 0 } + } + } + + return new LongBits(lo, hi) + } + + /** + * Constructs new long bits from the specified number + */ + static fromNumber (value: number): LongBits { + if (value === 0) { return zero } + const sign = value < 0 + if (sign) { value = -value } + let lo = value >>> 0 + let hi = (value - lo) / 4294967296 >>> 0 + if (sign) { + hi = ~hi >>> 0 + lo = ~lo >>> 0 + if (++lo > 4294967295) { + lo = 0 + if (++hi > 4294967295) { hi = 0 } + } + } + return new LongBits(lo, hi) + } + + /** + * Constructs new long bits from a number, long or string + */ + static from (value: bigint | number | string | { low: number, high: number }): LongBits { + if (typeof value === 'number') { + return LongBits.fromNumber(value) + } + if (typeof value === 'bigint') { + return LongBits.fromBigInt(value) + } + if (typeof value === 'string') { + return LongBits.fromBigInt(BigInt(value)) + } + return value.low != null || value.high != null ? new LongBits(value.low >>> 0, value.high >>> 0) : zero + } +} + +const zero = new LongBits(0, 0) +zero.toBigInt = function () { return 0n } +zero.zzEncode = zero.zzDecode = function () { return this } +zero.length = function () { return 1 } + +const TWO_32 = 4294967296n diff --git a/packages/protons-runtime/src/utils/pool.ts b/packages/protons-runtime/src/utils/pool.ts new file mode 100644 index 0000000..b2ce623 --- /dev/null +++ b/packages/protons-runtime/src/utils/pool.ts @@ -0,0 +1,30 @@ +import { allocUnsafe } from 'uint8arrays/alloc' + +/** + * A general purpose buffer pool + */ +export default function pool (size?: number): (size: number) => Uint8Array { + const SIZE = size ?? 8192 + const MAX = SIZE >>> 1 + let slab: Uint8Array + let offset = SIZE + return function poolAlloc (size: number) { + if (size < 1 || size > MAX) { + return allocUnsafe(size) + } + + if (offset + size > SIZE) { + slab = allocUnsafe(SIZE) + offset = 0 + } + + const buf = slab.subarray(offset, offset += size) + + if ((offset & 7) !== 0) { + // align to 32 bit + offset = (offset | 7) + 1 + } + + return buf + } +} diff --git a/packages/protons-runtime/src/utils/reader.ts b/packages/protons-runtime/src/utils/reader.ts new file mode 100644 index 0000000..a70c441 --- /dev/null +++ b/packages/protons-runtime/src/utils/reader.ts @@ -0,0 +1,311 @@ +import { readFloatLE, readDoubleLE } from './float.js' +import { LongBits } from './longbits.js' +import * as utf8 from './utf8.js' +import type { Reader } from '../index.js' +import type { Uint8ArrayList } from 'uint8arraylist' + +/* istanbul ignore next */ +function indexOutOfRange (reader: Reader, writeLength?: number): RangeError { + return RangeError(`index out of range: ${reader.pos} + ${writeLength ?? 1} > ${reader.len}`) +} + +function readFixed32End (buf: Uint8Array, end: number): number { // note that this uses `end`, not `pos` + return (buf[end - 4] | + buf[end - 3] << 8 | + buf[end - 2] << 16 | + buf[end - 1] << 24) >>> 0 +} + +/** + * Constructs a new reader instance using the specified buffer. + */ +export class Uint8ArrayReader implements Reader { + public buf: Uint8Array + public pos: number + public len: number + + public _slice = Uint8Array.prototype.subarray + + constructor (buffer: Uint8Array) { + /** + * Read buffer + */ + this.buf = buffer + + /** + * Read buffer position + */ + this.pos = 0 + + /** + * Read buffer length + */ + this.len = buffer.length + } + + /** + * Reads a varint as an unsigned 32 bit value + */ + uint32 (): number { + let value = 4294967295 + + value = (this.buf[this.pos] & 127) >>> 0; if (this.buf[this.pos++] < 128) return value + value = (value | (this.buf[this.pos] & 127) << 7) >>> 0; if (this.buf[this.pos++] < 128) return value + value = (value | (this.buf[this.pos] & 127) << 14) >>> 0; if (this.buf[this.pos++] < 128) return value + value = (value | (this.buf[this.pos] & 127) << 21) >>> 0; if (this.buf[this.pos++] < 128) return value + value = (value | (this.buf[this.pos] & 15) << 28) >>> 0; if (this.buf[this.pos++] < 128) return value + + if ((this.pos += 5) > this.len) { + this.pos = this.len + throw indexOutOfRange(this, 10) + } + + return value + } + + /** + * Reads a varint as a signed 32 bit value + */ + int32 (): number { + return this.uint32() | 0 + } + + /** + * Reads a zig-zag encoded varint as a signed 32 bit value + */ + sint32 (): number { + const value = this.uint32() + return value >>> 1 ^ -(value & 1) | 0 + } + + /** + * Reads a varint as a boolean + */ + bool (): boolean { + return this.uint32() !== 0 + } + + /** + * Reads fixed 32 bits as an unsigned 32 bit integer + */ + fixed32 (): number { + if (this.pos + 4 > this.len) { throw indexOutOfRange(this, 4) } + + const res = readFixed32End(this.buf, this.pos += 4) + + return res + } + + /** + * Reads fixed 32 bits as a signed 32 bit integer + */ + sfixed32 (): number { + if (this.pos + 4 > this.len) { + throw indexOutOfRange(this, 4) + } + + const res = readFixed32End(this.buf, this.pos += 4) | 0 + + return res + } + + /** + * Reads a float (32 bit) as a number + */ + float (): number { + if (this.pos + 4 > this.len) { + throw indexOutOfRange(this, 4) + } + + const value = readFloatLE(this.buf, this.pos) + this.pos += 4 + return value + } + + /** + * Reads a double (64 bit float) as a number + */ + double (): number { + /* istanbul ignore if */ + if (this.pos + 8 > this.len) { throw indexOutOfRange(this, 4) } + + const value = readDoubleLE(this.buf, this.pos) + this.pos += 8 + return value + } + + /** + * Reads a sequence of bytes preceded by its length as a varint + */ + bytes (): Uint8Array { + const length = this.uint32() + const start = this.pos + const end = this.pos + length + + /* istanbul ignore if */ + if (end > this.len) { + throw indexOutOfRange(this, length) + } + + this.pos += length + + return start === end // fix for IE 10/Win8 and others' subarray returning array of size 1 + ? new Uint8Array(0) + : this.buf.subarray(start, end) + } + + /** + * Reads a string preceded by its byte length as a varint + */ + string (): string { + const bytes = this.bytes() + return utf8.read(bytes, 0, bytes.length) + } + + /** + * Skips the specified number of bytes if specified, otherwise skips a varint + */ + skip (length?: number): this { + if (typeof length === 'number') { + /* istanbul ignore if */ + if (this.pos + length > this.len) { throw indexOutOfRange(this, length) } + this.pos += length + } else { + do { + /* istanbul ignore if */ + if (this.pos >= this.len) { + throw indexOutOfRange(this) + } + } while ((this.buf[this.pos++] & 128) !== 0) + } + return this + } + + /** + * Skips the next element of the specified wire type + */ + skipType (wireType: number): this { + switch (wireType) { + case 0: + this.skip() + break + case 1: + this.skip(8) + break + case 2: + this.skip(this.uint32()) + break + case 3: + while ((wireType = this.uint32() & 7) !== 4) { + this.skipType(wireType) + } + break + case 5: + this.skip(4) + break + + /* istanbul ignore next */ + default: + throw Error(`invalid wire type ${wireType} at offset ${this.pos}`) + } + return this + } + + private readLongVarint (): LongBits { + // tends to deopt with local vars for octet etc. + const bits = new LongBits(0, 0) + let i = 0 + if (this.len - this.pos > 4) { // fast route (lo) + for (; i < 4; ++i) { + // 1st..4th + bits.lo = (bits.lo | (this.buf[this.pos] & 127) << i * 7) >>> 0 + if (this.buf[this.pos++] < 128) { return bits } + } + // 5th + bits.lo = (bits.lo | (this.buf[this.pos] & 127) << 28) >>> 0 + bits.hi = (bits.hi | (this.buf[this.pos] & 127) >> 4) >>> 0 + if (this.buf[this.pos++] < 128) { return bits } + i = 0 + } else { + for (; i < 3; ++i) { + /* istanbul ignore if */ + if (this.pos >= this.len) { throw indexOutOfRange(this) } + // 1st..3th + bits.lo = (bits.lo | (this.buf[this.pos] & 127) << i * 7) >>> 0 + if (this.buf[this.pos++] < 128) { return bits } + } + // 4th + bits.lo = (bits.lo | (this.buf[this.pos++] & 127) << i * 7) >>> 0 + return bits + } + if (this.len - this.pos > 4) { // fast route (hi) + for (; i < 5; ++i) { + // 6th..10th + bits.hi = (bits.hi | (this.buf[this.pos] & 127) << i * 7 + 3) >>> 0 + if (this.buf[this.pos++] < 128) { return bits } + } + } else { + for (; i < 5; ++i) { + if (this.pos >= this.len) { + throw indexOutOfRange(this) + } + + // 6th..10th + bits.hi = (bits.hi | (this.buf[this.pos] & 127) << i * 7 + 3) >>> 0 + if (this.buf[this.pos++] < 128) { return bits } + } + } + + throw Error('invalid varint encoding') + } + + private readFixed64 (): LongBits { + if (this.pos + 8 > this.len) { + throw indexOutOfRange(this, 8) + } + + const lo = readFixed32End(this.buf, this.pos += 4) + const hi = readFixed32End(this.buf, this.pos += 4) + + return new LongBits(lo, hi) + } + + /** + * Reads a varint as a signed 64 bit value + */ + int64 (): bigint { + return this.readLongVarint().toBigInt() + } + + /** + * Reads a varint as an unsigned 64 bit value + */ + uint64 (): bigint { + return this.readLongVarint().toBigInt(true) + } + + /** + * Reads a zig-zag encoded varint as a signed 64 bit value + */ + sint64 (): bigint { + return this.readLongVarint().zzDecode().toBigInt() + } + + /** + * Reads fixed 64 bits + */ + fixed64 (): bigint { + return this.readFixed64().toBigInt() + } + + /** + * Reads zig-zag encoded fixed 64 bits + */ + sfixed64 (): bigint { + return this.readFixed64().toBigInt() + } +} + +export function createReader (buf: Uint8Array | Uint8ArrayList): Reader { + return new Uint8ArrayReader(buf instanceof Uint8Array ? buf : buf.subarray()) +} diff --git a/packages/protons-runtime/src/utils/utf8.ts b/packages/protons-runtime/src/utils/utf8.ts new file mode 100644 index 0000000..2d097b7 --- /dev/null +++ b/packages/protons-runtime/src/utils/utf8.ts @@ -0,0 +1,103 @@ +/** + * Calculates the UTF8 byte length of a string + */ +export function length (string: string): number { + let len = 0 + let c = 0 + for (let i = 0; i < string.length; ++i) { + c = string.charCodeAt(i) + + if (c < 128) { + len += 1 + } else if (c < 2048) { + len += 2 + } else if ((c & 0xFC00) === 0xD800 && (string.charCodeAt(i + 1) & 0xFC00) === 0xDC00) { + ++i + len += 4 + } else { + len += 3 + } + } + + return len +} + +/** + * Reads UTF8 bytes as a string + */ +export function read (buffer: Uint8Array, start: number, end: number): string { + const len = end - start + + if (len < 1) { + return '' + } + + let parts: string[] | undefined + const chunk: number[] = [] + let i = 0 // char offset + let t: number // temporary + + while (start < end) { + t = buffer[start++] + + if (t < 128) { + chunk[i++] = t + } else if (t > 191 && t < 224) { + chunk[i++] = (t & 31) << 6 | buffer[start++] & 63 + } else if (t > 239 && t < 365) { + t = ((t & 7) << 18 | (buffer[start++] & 63) << 12 | (buffer[start++] & 63) << 6 | buffer[start++] & 63) - 0x10000 + chunk[i++] = 0xD800 + (t >> 10) + chunk[i++] = 0xDC00 + (t & 1023) + } else { + chunk[i++] = (t & 15) << 12 | (buffer[start++] & 63) << 6 | buffer[start++] & 63 + } + + if (i > 8191) { + (parts ?? (parts = [])).push(String.fromCharCode.apply(String, chunk)) + i = 0 + } + } + + if (parts != null) { + if (i > 0) { + parts.push(String.fromCharCode.apply(String, chunk.slice(0, i))) + } + + return parts.join('') + } + + return String.fromCharCode.apply(String, chunk.slice(0, i)) +} + +/** + * Writes a string as UTF8 bytes + */ +export function write (string: string, buffer: Uint8Array, offset: number): number { + const start = offset + let c1 // character 1 + let c2 // character 2 + + for (let i = 0; i < string.length; ++i) { + c1 = string.charCodeAt(i) + + if (c1 < 128) { + buffer[offset++] = c1 + } else if (c1 < 2048) { + buffer[offset++] = c1 >> 6 | 192 + buffer[offset++] = c1 & 63 | 128 + } else if ((c1 & 0xFC00) === 0xD800 && ((c2 = string.charCodeAt(i + 1)) & 0xFC00) === 0xDC00) { + c1 = 0x10000 + ((c1 & 0x03FF) << 10) + (c2 & 0x03FF) + ++i + buffer[offset++] = c1 >> 18 | 240 + buffer[offset++] = c1 >> 12 & 63 | 128 + buffer[offset++] = c1 >> 6 & 63 | 128 + buffer[offset++] = c1 & 63 | 128 + } else { + buffer[offset++] = c1 >> 12 | 224 + buffer[offset++] = c1 >> 6 & 63 | 128 + buffer[offset++] = c1 & 63 | 128 + } + } + + return offset - start +} diff --git a/packages/protons-runtime/src/utils/writer.ts b/packages/protons-runtime/src/utils/writer.ts new file mode 100644 index 0000000..a2f4cdd --- /dev/null +++ b/packages/protons-runtime/src/utils/writer.ts @@ -0,0 +1,437 @@ +import { allocUnsafe } from 'uint8arrays/alloc' +import { writeFloatLE, writeDoubleLE } from './float.js' +import { LongBits } from './longbits.js' +import pool from './pool.js' +import * as utf8 from './utf8.js' +import type { Writer } from '../index.js' + +interface WriterOperation { + (val: T, buf: Uint8Array, pos: number): any +} + +/** + * Constructs a new writer operation instance. + * + * @classdesc Scheduled writer operation + */ +class Op { + /** + * Function to call + */ + public fn: WriterOperation + + /** + * Value byte length + */ + public len: number + + /** + * Next operation + */ + public next?: Op + + /** + * Value to write + */ + public val: T + + constructor (fn: WriterOperation, len: number, val: T) { + this.fn = fn + this.len = len + this.next = undefined + this.val = val // type varies + } +} + +/* istanbul ignore next */ +function noop (): void {} // eslint-disable-line no-empty-function + +/** + * Constructs a new writer state instance + */ +class State { + /** + * Current head + */ + public head: Op + + /** + * Current tail + */ + public tail: Op + + /** + * Current buffer length + */ + public len: number + + /** + * Next state + */ + public next?: State + + constructor (writer: Uint8ArrayWriter) { + this.head = writer.head + this.tail = writer.tail + this.len = writer.len + this.next = writer.states + } +} + +const bufferPool = pool() + +/** + * Allocates a buffer of the specified size + */ +function alloc (size: number): Uint8Array { + if (globalThis.Buffer != null) { + return allocUnsafe(size) + } + + return bufferPool(size) +} + +/** + * When a value is written, the writer calculates its byte length and puts it into a linked + * list of operations to perform when finish() is called. This both allows us to allocate + * buffers of the exact required size and reduces the amount of work we have to do compared + * to first calculating over objects and then encoding over objects. In our case, the encoding + * part is just a linked list walk calling operations with already prepared values. + */ +class Uint8ArrayWriter implements Writer { + /** + * Current length + */ + public len: number + + /** + * Operations head + */ + public head: Op + + /** + * Operations tail + */ + public tail: Op + + /** + * Linked forked states + */ + public states?: any + + constructor () { + this.len = 0 + this.head = new Op(noop, 0, 0) + this.tail = this.head + this.states = null + } + + /** + * Pushes a new operation to the queue + */ + _push (fn: WriterOperation, len: number, val: any): this { + this.tail = this.tail.next = new Op(fn, len, val) + this.len += len + + return this + } + + /** + * Writes an unsigned 32 bit value as a varint + */ + uint32 (value: number): this { + // here, the call to this.push has been inlined and a varint specific Op subclass is used. + // uint32 is by far the most frequently used operation and benefits significantly from this. + this.len += (this.tail = this.tail.next = new VarintOp( + (value = value >>> 0) < + 128 + ? 1 + : value < 16384 + ? 2 + : value < 2097152 + ? 3 + : value < 268435456 + ? 4 + : 5, + value)).len + return this + } + + /** + * Writes a signed 32 bit value as a varint` + */ + int32 (value: number): this { + return value < 0 + ? this._push(writeVarint64, 10, LongBits.fromNumber(value)) // 10 bytes per spec + : this.uint32(value) + } + + /** + * Writes a 32 bit value as a varint, zig-zag encoded + */ + sint32 (value: number): this { + return this.uint32((value << 1 ^ value >> 31) >>> 0) + } + + /** + * Writes an unsigned 64 bit value as a varint + */ + uint64 (value: bigint): this { + const bits = LongBits.fromBigInt(value) + return this._push(writeVarint64, bits.length(), bits) + } + + /** + * Writes a signed 64 bit value as a varint + */ + int64 (value: bigint): this { + return this.uint64(value) + } + + /** + * Writes a signed 64 bit value as a varint, zig-zag encoded + */ + sint64 (value: bigint): this { + const bits = LongBits.fromBigInt(value).zzEncode() + return this._push(writeVarint64, bits.length(), bits) + } + + /** + * Writes a boolish value as a varint + */ + bool (value: boolean): this { + return this._push(writeByte, 1, value ? 1 : 0) + } + + /** + * Writes an unsigned 32 bit value as fixed 32 bits + */ + fixed32 (value: number): this { + return this._push(writeFixed32, 4, value >>> 0) + } + + /** + * Writes a signed 32 bit value as fixed 32 bits + */ + sfixed32 (value: number): this { + return this.fixed32(value) + } + + /** + * Writes an unsigned 64 bit value as fixed 64 bits + */ + fixed64 (value: bigint): this { + const bits = LongBits.fromBigInt(value) + + return this._push(writeFixed32, 4, bits.lo)._push(writeFixed32, 4, bits.hi) + } + + /** + * Writes a signed 64 bit value as fixed 64 bits + */ + sfixed64 (value: bigint): this { + return this.fixed64(value) + } + + /** + * Writes a float (32 bit) + */ + float (value: number): this { + return this._push(writeFloatLE, 4, value) + } + + /** + * Writes a double (64 bit float). + * + * @function + * @param {number} value - Value to write + * @returns {Writer} `this` + */ + double (value: number): this { + return this._push(writeDoubleLE, 8, value) + } + + /** + * Writes a sequence of bytes + */ + bytes (value: Uint8Array): this { + const len = value.length >>> 0 + + if (len === 0) { + return this._push(writeByte, 1, 0) + } + + return this.uint32(len)._push(writeBytes, len, value) + } + + /** + * Writes a string + */ + string (value: string): this { + const len = utf8.length(value) + return len !== 0 + ? this.uint32(len)._push(utf8.write, len, value) + : this._push(writeByte, 1, 0) + } + + /** + * Forks this writer's state by pushing it to a stack. + * Calling {@link Writer#reset|reset} or {@link Writer#ldelim|ldelim} resets the writer to the previous state. + */ + fork (): this { + this.states = new State(this) + this.head = this.tail = new Op(noop, 0, 0) + this.len = 0 + return this + } + + /** + * Resets this instance to the last state + */ + reset (): this { + if (this.states != null) { + this.head = this.states.head + this.tail = this.states.tail + this.len = this.states.len + this.states = this.states.next + } else { + this.head = this.tail = new Op(noop, 0, 0) + this.len = 0 + } + return this + } + + /** + * Resets to the last state and appends the fork state's current write length as a varint followed by its operations. + */ + ldelim (): this { + const head = this.head + const tail = this.tail + const len = this.len + this.reset().uint32(len) + if (len !== 0) { + this.tail.next = head.next // skip noop + this.tail = tail + this.len += len + } + return this + } + + /** + * Finishes the write operation + */ + finish (): Uint8Array { + let head = this.head.next // skip noop + const buf = alloc(this.len) + let pos = 0 + while (head != null) { + head.fn(head.val, buf, pos) + pos += head.len + head = head.next + } + // this.head = this.tail = null; + return buf + } +} + +function writeByte (val: number, buf: Uint8Array, pos: number): void { + buf[pos] = val & 255 +} + +function writeVarint32 (val: number, buf: Uint8Array, pos: number): void { + while (val > 127) { + buf[pos++] = val & 127 | 128 + val >>>= 7 + } + buf[pos] = val +} + +/** + * Constructs a new varint writer operation instance. + * + * @classdesc Scheduled varint writer operation + */ +class VarintOp extends Op { + public next?: Op + + constructor (len: number, val: number) { + super(writeVarint32, len, val) + this.next = undefined + } +} + +function writeVarint64 (val: LongBits, buf: Uint8Array, pos: number): void { + while (val.hi !== 0) { + buf[pos++] = val.lo & 127 | 128 + val.lo = (val.lo >>> 7 | val.hi << 25) >>> 0 + val.hi >>>= 7 + } + while (val.lo > 127) { + buf[pos++] = val.lo & 127 | 128 + val.lo = val.lo >>> 7 + } + buf[pos++] = val.lo +} + +function writeFixed32 (val: number, buf: Uint8Array, pos: number): void { + buf[pos] = val & 255 + buf[pos + 1] = val >>> 8 & 255 + buf[pos + 2] = val >>> 16 & 255 + buf[pos + 3] = val >>> 24 +} + +function writeBytes (val: Uint8Array, buf: Uint8Array, pos: number): void { + buf.set(val, pos) +} + +if (globalThis.Buffer != null) { + Uint8ArrayWriter.prototype.bytes = function (value: Uint8Array) { + const len = value.length >>> 0 + + this.uint32(len) + + if (len > 0) { + this._push(writeBytesBuffer, len, value) + } + + return this + } + + Uint8ArrayWriter.prototype.string = function (value: string) { + const len = globalThis.Buffer.byteLength(value) + + this.uint32(len) + + if (len > 0) { + this._push(writeStringBuffer, len, value) + } + + return this + } +} + +function writeBytesBuffer (val: Uint8Array, buf: Uint8Array, pos: number): void { + buf.set(val, pos) // faster than copy (requires node >= 4 where Buffers extend Uint8Array and set is properly inherited) + // also works for plain array values +} + +function writeStringBuffer (val: string, buf: Uint8Array, pos: number): void { + if (val.length < 40) { + // plain js is faster for short strings (probably due to redundant assertions) + utf8.write(val, buf, pos) + // @ts-expect-error buf isn't a Uint8Array? + } else if (buf.utf8Write != null) { + // @ts-expect-error buf isn't a Uint8Array? + buf.utf8Write(val, pos) + } else { + // @ts-expect-error .write is a function on node Buffers + buf.write(val, pos) + } +} + +/** + * Creates a new writer + */ +export function createWriter (): Writer { + return new Uint8ArrayWriter() +} diff --git a/packages/protons/package.json b/packages/protons/package.json index c1ef040..d369ba3 100644 --- a/packages/protons/package.json +++ b/packages/protons/package.json @@ -139,6 +139,6 @@ "pbjs": "^0.0.14", "protobufjs": "^7.0.0", "protons-runtime": "^5.0.0", - "uint8arraylist": "^2.3.2" + "uint8arraylist": "^2.4.3" } }