From 9c097d504a6acc193a5ce0a4cbf3551c948dcf90 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Sun, 17 Dec 2023 22:19:54 -0500 Subject: [PATCH] GH-39248: [JS] Unify code paths for utf8 and largeUtf8 (#39249) Reduce the code size by using common code paths. We only call `Number` a few times on numbers, which should be a noop. * Closes: #39248 --- js/.vscode/settings.json | 2 +- js/src/visitor/get.ts | 20 +++----------------- js/src/visitor/set.ts | 19 +++---------------- js/src/visitor/vectorassembler.ts | 24 +++--------------------- 4 files changed, 10 insertions(+), 55 deletions(-) diff --git a/js/.vscode/settings.json b/js/.vscode/settings.json index 113a662180c3c..e52da54e544ec 100644 --- a/js/.vscode/settings.json +++ b/js/.vscode/settings.json @@ -2,7 +2,7 @@ "typescript.tsdk": "node_modules/typescript/lib", "editor.trimAutoWhitespace": true, "editor.codeActionsOnSave": { - "source.fixAll.eslint": false + "source.fixAll.eslint": "explicit" }, "[javascript]": { "editor.tabSize": 4, diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts index a801c90047c89..112d2f2983e53 100644 --- a/js/src/visitor/get.ts +++ b/js/src/visitor/get.ts @@ -116,16 +116,7 @@ function wrapGet(fn: (data: Data, _1: any) => any) { /** @ignore */ const getNull = (_data: Data, _index: number): T['TValue'] => null; /** @ignore */ -const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number) => { - if (index + 1 >= valueOffsets.length) { - return null as any; - } - const x = valueOffsets[index]; - const y = valueOffsets[index + 1]; - return values.subarray(x, y); -}; -/** @ignore */ -const getLargeVariableWidthBytes = (values: Uint8Array, valueOffsets: BigInt64Array, index: number) => { +const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array | BigInt64Array, index: number) => { if (index + 1 >= valueOffsets.length) { return null as any; } @@ -162,15 +153,10 @@ const getFixedSizeBinary = ({ stride, values }: Data< /** @ignore */ const getBinary = ({ values, valueOffsets }: Data, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index); /** @ignore */ -const getUtf8 = ({ values, valueOffsets }: Data, index: number): T['TValue'] => { +const getUtf8 = ({ values, valueOffsets }: Data, index: number): T['TValue'] => { const bytes = getVariableWidthBytes(values, valueOffsets, index); return bytes !== null ? decodeUtf8(bytes) : null as any; }; -/** @ignore */ -const getLargeUtf8 = ({ values, valueOffsets }: Data, index: number): T['TValue'] => { - const bytes = getLargeVariableWidthBytes(values, valueOffsets, index); - return bytes !== null ? decodeUtf8(bytes) : null as any; -}; /* istanbul ignore next */ /** @ignore */ @@ -344,7 +330,7 @@ GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16); GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric); GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric); GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8); -GetVisitor.prototype.visitLargeUtf8 = wrapGet(getLargeUtf8); +GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8); GetVisitor.prototype.visitBinary = wrapGet(getBinary); GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary); GetVisitor.prototype.visitDate = wrapGet(getDate); diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts index a439ec8311fd6..15b0721660f55 100644 --- a/js/src/visitor/set.ts +++ b/js/src/visitor/set.ts @@ -125,16 +125,7 @@ export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epo }; /** @ignore */ -export const setVariableWidthBytes = (values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => { - if (index + 1 < valueOffsets.length) { - const x = valueOffsets[index]; - const y = valueOffsets[index + 1]; - values.set(value.subarray(0, y - x), x); - } -}; - -/** @ignore */ -export const setLargeVariableWidthBytes = (values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => { +export const setVariableWidthBytes = (values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => { if (index + 1 < valueOffsets.length) { const x = bigIntToNumber(valueOffsets[index]); const y = bigIntToNumber(valueOffsets[index + 1]); @@ -176,13 +167,9 @@ export const setFixedSizeBinary = ({ stride, values } /** @ignore */ const setBinary = ({ values, valueOffsets }: Data, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value); /** @ignore */ -const setUtf8 = ({ values, valueOffsets }: Data, index: number, value: T['TValue']) => { +const setUtf8 = ({ values, valueOffsets }: Data, index: number, value: T['TValue']) => { setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value)); }; -/** @ignore */ -const setLargeUtf8 = ({ values, valueOffsets }: Data, index: number, value: T['TValue']) => { - setLargeVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value)); -}; /* istanbul ignore next */ export const setDate = (data: Data, index: number, value: T['TValue']): void => { @@ -381,7 +368,7 @@ SetVisitor.prototype.visitFloat16 = wrapSet(setFloat16); SetVisitor.prototype.visitFloat32 = wrapSet(setFloat); SetVisitor.prototype.visitFloat64 = wrapSet(setFloat); SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8); -SetVisitor.prototype.visitLargeUtf8 = wrapSet(setLargeUtf8); +SetVisitor.prototype.visitLargeUtf8 = wrapSet(setUtf8); SetVisitor.prototype.visitBinary = wrapSet(setBinary); SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary); SetVisitor.prototype.visitDate = wrapSet(setDate); diff --git a/js/src/visitor/vectorassembler.ts b/js/src/visitor/vectorassembler.ts index 7a9d3bdd57b0d..df820e6f5e00c 100644 --- a/js/src/visitor/vectorassembler.ts +++ b/js/src/visitor/vectorassembler.ts @@ -42,6 +42,7 @@ export interface VectorAssembler extends Visitor { visitInt(data: Data): this; visitFloat(data: Data): this; visitUtf8(data: Data): this; + visitLargeUtf8(data: Data): this; visitBinary(data: Data): this; visitFixedSizeBinary(data: Data): this; visitDate(data: Data): this; @@ -202,29 +203,10 @@ function assembleFlatVector(this: VectorAssembler, data: Data) { - const { length, values, valueOffsets } = data; - const { [0]: begin, [length]: end } = valueOffsets; - return _assembleFlatListVector.call(this, length, begin, end, values, valueOffsets); -} - -/** @ignore */ -function assembleLargeFlatListVector(this: VectorAssembler, data: Data) { +function assembleFlatListVector(this: VectorAssembler, data: Data) { const { length, values, valueOffsets } = data; const begin = bigIntToNumber(valueOffsets[0]); const end = bigIntToNumber(valueOffsets[length]); - return _assembleFlatListVector.call(this, length, begin, end, values, valueOffsets); -} - -/** @ignore */ -function _assembleFlatListVector( - this: VectorAssembler, - length: number, - begin: number, - end: number, - values: T['TArray'], - valueOffsets: T['TOffsetArray'] -) { const byteLength = Math.min(end - begin, values.byteLength - begin); // Push in the order FlatList types read their buffers addBuffer.call(this, rebaseValueOffsets(-begin, length + 1, valueOffsets as any)); // valueOffsets buffer first @@ -255,7 +237,7 @@ VectorAssembler.prototype.visitBool = assembleBoolVector; VectorAssembler.prototype.visitInt = assembleFlatVector; VectorAssembler.prototype.visitFloat = assembleFlatVector; VectorAssembler.prototype.visitUtf8 = assembleFlatListVector; -VectorAssembler.prototype.visitLargeUtf8 = assembleLargeFlatListVector; +VectorAssembler.prototype.visitLargeUtf8 = assembleFlatListVector; VectorAssembler.prototype.visitBinary = assembleFlatListVector; VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector; VectorAssembler.prototype.visitDate = assembleFlatVector;