Skip to content

Commit

Permalink
apacheGH-39248: [JS] Unify code paths for utf8 and largeUtf8 (apache#…
Browse files Browse the repository at this point in the history
…39249)

Reduce the code size by using common code paths. We only call `Number` a
few times on numbers, which should be a noop.

* Closes: apache#39248
  • Loading branch information
domoritz authored Dec 18, 2023
1 parent 50ace05 commit 9c097d5
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 55 deletions.
2 changes: 1 addition & 1 deletion js/.vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"typescript.tsdk": "node_modules/typescript/lib",
"editor.trimAutoWhitespace": true,
"editor.codeActionsOnSave": {
"source.fixAll.eslint": false
"source.fixAll.eslint": "explicit"
},
"[javascript]": {
"editor.tabSize": 4,
Expand Down
20 changes: 3 additions & 17 deletions js/src/visitor/get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,7 @@ function wrapGet<T extends DataType>(fn: (data: Data<T>, _1: any) => any) {
/** @ignore */
const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] => null;
/** @ignore */
const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number) => {
if (index + 1 >= valueOffsets.length) {
return null as any;
}
const x = valueOffsets[index];
const y = valueOffsets[index + 1];
return values.subarray(x, y);
};
/** @ignore */
const getLargeVariableWidthBytes = (values: Uint8Array, valueOffsets: BigInt64Array, index: number) => {
const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array | BigInt64Array, index: number) => {
if (index + 1 >= valueOffsets.length) {
return null as any;
}
Expand Down Expand Up @@ -162,15 +153,10 @@ const getFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<
/** @ignore */
const getBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
/** @ignore */
const getUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
const getUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
const bytes = getVariableWidthBytes(values, valueOffsets, index);
return bytes !== null ? decodeUtf8(bytes) : null as any;
};
/** @ignore */
const getLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
const bytes = getLargeVariableWidthBytes(values, valueOffsets, index);
return bytes !== null ? decodeUtf8(bytes) : null as any;
};

/* istanbul ignore next */
/** @ignore */
Expand Down Expand Up @@ -344,7 +330,7 @@ GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16);
GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric);
GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric);
GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8);
GetVisitor.prototype.visitLargeUtf8 = wrapGet(getLargeUtf8);
GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8);
GetVisitor.prototype.visitBinary = wrapGet(getBinary);
GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary);
GetVisitor.prototype.visitDate = wrapGet(getDate);
Expand Down
19 changes: 3 additions & 16 deletions js/src/visitor/set.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,7 @@ export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epo
};

/** @ignore */
export const setVariableWidthBytes = <T extends Int32Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
if (index + 1 < valueOffsets.length) {
const x = valueOffsets[index];
const y = valueOffsets[index + 1];
values.set(value.subarray(0, y - x), x);
}
};

/** @ignore */
export const setLargeVariableWidthBytes = <T extends BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
export const setVariableWidthBytes = <T extends Int32Array | BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
if (index + 1 < valueOffsets.length) {
const x = bigIntToNumber(valueOffsets[index]);
const y = bigIntToNumber(valueOffsets[index + 1]);
Expand Down Expand Up @@ -176,13 +167,9 @@ export const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }
/** @ignore */
const setBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value);
/** @ignore */
const setUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
const setUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
};
/** @ignore */
const setLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
setLargeVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
};

/* istanbul ignore next */
export const setDate = <T extends Date_>(data: Data<T>, index: number, value: T['TValue']): void => {
Expand Down Expand Up @@ -381,7 +368,7 @@ SetVisitor.prototype.visitFloat16 = wrapSet(setFloat16);
SetVisitor.prototype.visitFloat32 = wrapSet(setFloat);
SetVisitor.prototype.visitFloat64 = wrapSet(setFloat);
SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8);
SetVisitor.prototype.visitLargeUtf8 = wrapSet(setLargeUtf8);
SetVisitor.prototype.visitLargeUtf8 = wrapSet(setUtf8);
SetVisitor.prototype.visitBinary = wrapSet(setBinary);
SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary);
SetVisitor.prototype.visitDate = wrapSet(setDate);
Expand Down
24 changes: 3 additions & 21 deletions js/src/visitor/vectorassembler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export interface VectorAssembler extends Visitor {
visitInt<T extends Int>(data: Data<T>): this;
visitFloat<T extends Float>(data: Data<T>): this;
visitUtf8<T extends Utf8>(data: Data<T>): this;
visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): this;
visitBinary<T extends Binary>(data: Data<T>): this;
visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): this;
visitDate<T extends Date_>(data: Data<T>): this;
Expand Down Expand Up @@ -202,29 +203,10 @@ function assembleFlatVector<T extends Int | Float | FixedSizeBinary | Date_ | Ti
}

/** @ignore */
function assembleFlatListVector<T extends Utf8 | Binary>(this: VectorAssembler, data: Data<T>) {
const { length, values, valueOffsets } = data;
const { [0]: begin, [length]: end } = valueOffsets;
return _assembleFlatListVector.call(this, length, begin, end, values, valueOffsets);
}

/** @ignore */
function assembleLargeFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this: VectorAssembler, data: Data<T>) {
function assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this: VectorAssembler, data: Data<T>) {
const { length, values, valueOffsets } = data;
const begin = bigIntToNumber(valueOffsets[0]);
const end = bigIntToNumber(valueOffsets[length]);
return _assembleFlatListVector.call(this, length, begin, end, values, valueOffsets);
}

/** @ignore */
function _assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(
this: VectorAssembler,
length: number,
begin: number,
end: number,
values: T['TArray'],
valueOffsets: T['TOffsetArray']
) {
const byteLength = Math.min(end - begin, values.byteLength - begin);
// Push in the order FlatList types read their buffers
addBuffer.call(this, rebaseValueOffsets(-begin, length + 1, valueOffsets as any)); // valueOffsets buffer first
Expand Down Expand Up @@ -255,7 +237,7 @@ VectorAssembler.prototype.visitBool = assembleBoolVector;
VectorAssembler.prototype.visitInt = assembleFlatVector;
VectorAssembler.prototype.visitFloat = assembleFlatVector;
VectorAssembler.prototype.visitUtf8 = assembleFlatListVector;
VectorAssembler.prototype.visitLargeUtf8 = assembleLargeFlatListVector;
VectorAssembler.prototype.visitLargeUtf8 = assembleFlatListVector;
VectorAssembler.prototype.visitBinary = assembleFlatListVector;
VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector;
VectorAssembler.prototype.visitDate = assembleFlatVector;
Expand Down

0 comments on commit 9c097d5

Please sign in to comment.