Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pass nullMask to setValuesFromUncompressed #3247

Merged
merged 1 commit into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/catalog/catalog_entry/table_catalog_entry.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "catalog/catalog_entry/table_catalog_entry.h"

#include <algorithm>

#include "catalog/catalog_entry/node_table_catalog_entry.h"
#include "catalog/catalog_entry/rdf_graph_catalog_entry.h"
#include "catalog/catalog_entry/rel_group_catalog_entry.h"
Expand Down
38 changes: 14 additions & 24 deletions src/common/arrow/arrow_null_mask_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,12 @@ bool ArrowNullMaskTree::copyFromBuffer(const void* buffer, uint64_t srcOffset, u
return true;
}

bool ArrowNullMaskTree::applyParentBitmap(const NullMask* parent, uint64_t count) {
bool ArrowNullMaskTree::applyParentBitmap(const NullMask* parent) {
if (parent == nullptr) {
return false;
}
const uint64_t* buffer = parent->data;
if (buffer != nullptr) {
for (uint64_t i = 0; i < (count >> NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2); i++) {
mask->buffer[i] |= buffer[i];
}
if (parent->getData() != nullptr) {
*mask |= *parent;
return true;
}
return false;
Expand All @@ -50,8 +47,7 @@ void ArrowNullMaskTree::scanListPushDown(const ArrowSchema* schema, const ArrowA
uint64_t srcOffset, uint64_t count) {
const offsetsT* offsets = ((const offsetsT*)array->buffers[1]) + srcOffset;
offsetsT auxiliaryLength = offsets[count] - offsets[0];
NullMask pushDownMask((auxiliaryLength + NullMask::NUM_BITS_PER_NULL_ENTRY - 1) >>
NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2);
NullMask pushDownMask(auxiliaryLength);
for (uint64_t i = 0; i < count; i++) {
pushDownMask.setNullFromRange(offsets[i] - offsets[0], offsets[i + 1] - offsets[i],
isNull(i));
Expand All @@ -70,13 +66,11 @@ void ArrowNullMaskTree::scanStructPushDown(const ArrowSchema* schema, const Arro

ArrowNullMaskTree::ArrowNullMaskTree(const ArrowSchema* schema, const ArrowArray* array,
uint64_t srcOffset, uint64_t count, const NullMask* parentBitmap)
: offset{0},
mask{std::make_shared<common::NullMask>((count + NullMask::NUM_BITS_PER_NULL_ENTRY - 1) >>
NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2)},
: offset{0}, mask{std::make_shared<common::NullMask>(count)},
children(std::make_shared<std::vector<ArrowNullMaskTree>>()) {
if (schema->dictionary != nullptr) {
copyFromBuffer(array->buffers[0], srcOffset, count);
applyParentBitmap(parentBitmap, count);
applyParentBitmap(parentBitmap);
dictionary = std::make_shared<ArrowNullMaskTree>(schema->dictionary, array->dictionary,
array->dictionary->offset, array->dictionary->length);
return;
Expand Down Expand Up @@ -108,34 +102,34 @@ ArrowNullMaskTree::ArrowNullMaskTree(const ArrowSchema* schema, const ArrowArray
case 'w':
case 't':
copyFromBuffer(array->buffers[0], srcOffset, count);
applyParentBitmap(parentBitmap, count);
applyParentBitmap(parentBitmap);
break;
case '+':
switch (arrowType[1]) {
case 'l':
copyFromBuffer(array->buffers[0], srcOffset, count);
applyParentBitmap(parentBitmap, count);
applyParentBitmap(parentBitmap);
scanListPushDown<int32_t>(schema, array, srcOffset, count);
break;
case 'L':
copyFromBuffer(array->buffers[0], srcOffset, count);
applyParentBitmap(parentBitmap, count);
applyParentBitmap(parentBitmap);
scanListPushDown<int64_t>(schema, array, srcOffset, count);
break;
case 'w':
// TODO manh: array null resolution
KU_UNREACHABLE;
copyFromBuffer(array->buffers[0], srcOffset, count);
applyParentBitmap(parentBitmap, count);
applyParentBitmap(parentBitmap);
break;
case 's':
copyFromBuffer(array->buffers[0], srcOffset, count);
applyParentBitmap(parentBitmap, count);
applyParentBitmap(parentBitmap);
scanStructPushDown(schema, array, srcOffset, count);
break;
case 'm':
copyFromBuffer(array->buffers[0], srcOffset, count);
applyParentBitmap(parentBitmap, count);
applyParentBitmap(parentBitmap);
scanListPushDown<int32_t>(schema, array, srcOffset, count);
break;
case 'u': {
Expand Down Expand Up @@ -176,9 +170,7 @@ ArrowNullMaskTree::ArrowNullMaskTree(const ArrowSchema* schema, const ArrowArray
}
}
if (parentBitmap != nullptr) {
for (uint64_t i = 0; i < count >> NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2; i++) {
mask->buffer[i] |= parentBitmap->buffer[i];
}
*mask |= *parentBitmap;
}
} break;
case 'v':
Expand All @@ -192,9 +184,7 @@ ArrowNullMaskTree::ArrowNullMaskTree(const ArrowSchema* schema, const ArrowArray
true);
}
if (parentBitmap != nullptr) {
for (uint64_t i = 0; i < count >> NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2; i++) {
mask->buffer[i] |= parentBitmap->buffer[i];
}
*mask |= *parentBitmap;
}
break;
case 'r':
Expand Down
58 changes: 50 additions & 8 deletions src/common/null_mask.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#include "common/null_mask.h"

#include <algorithm>
#include <cstring>

#include "common/assert.h"

namespace kuzu {
namespace common {

Expand All @@ -17,10 +20,41 @@ void NullMask::setNull(uint64_t* nullEntries, uint32_t pos, bool isNull) {

bool NullMask::copyNullMask(const uint64_t* srcNullEntries, uint64_t srcOffset,
uint64_t* dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert) {
auto [srcNullEntryPos, srcNullBitPos] = getNullEntryAndBitPos(srcOffset);
auto [dstNullEntryPos, dstNullBitPos] = getNullEntryAndBitPos(dstOffset);
// If both offsets are aligned relative to each other then copy up to the first byte using the
// non-aligned method, then copy aligned, then copy the end unaligned again.
if (!invert && (srcOffset % 8 == dstOffset % 8) && numBitsToCopy >= 8 &&
numBitsToCopy - (srcOffset % 8) >= 8) {
auto numBitsInFirstByte = 8 - (srcOffset % 8);
bool hasNullInSrcNullMask = false;
if (copyUnaligned(srcNullEntries, srcOffset, dstNullEntries, dstOffset, numBitsInFirstByte,
false)) {
hasNullInSrcNullMask = true;
}
auto* src =
reinterpret_cast<const uint8_t*>(srcNullEntries) + (srcOffset + numBitsInFirstByte) / 8;
auto* dst =
reinterpret_cast<uint8_t*>(dstNullEntries) + (dstOffset + numBitsInFirstByte) / 8;
auto numBytesForAlignedCopy = (numBitsToCopy - numBitsInFirstByte) / 8;
memcpy(dst, src, numBytesForAlignedCopy);
if (std::any_of(src, src + numBytesForAlignedCopy, [&](uint8_t val) { return val != 0; })) {
hasNullInSrcNullMask = true;
}
auto lastByteStart = numBitsInFirstByte + numBytesForAlignedCopy * 8;
auto numBitsInLastByte = numBitsToCopy - numBitsInFirstByte - numBytesForAlignedCopy * 8;
return copyUnaligned(srcNullEntries, srcOffset + lastByteStart, dstNullEntries,
dstOffset + lastByteStart, numBitsInLastByte, false) ||
hasNullInSrcNullMask;
} else {
return copyUnaligned(srcNullEntries, srcOffset, dstNullEntries, dstOffset, numBitsToCopy,
invert);
}
}
bool NullMask::copyUnaligned(const uint64_t* srcNullEntries, uint64_t srcOffset,
uint64_t* dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert) {
uint64_t bitPos = 0;
bool hasNullInSrcNullMask = false;
auto [srcNullEntryPos, srcNullBitPos] = getNullEntryAndBitPos(srcOffset + bitPos);
auto [dstNullEntryPos, dstNullBitPos] = getNullEntryAndBitPos(dstOffset + bitPos);
while (bitPos < numBitsToCopy) {
auto curDstNullEntryPos = dstNullEntryPos;
auto curDstNullBitPos = dstNullBitPos;
Expand Down Expand Up @@ -80,16 +114,17 @@ bool NullMask::copyNullMask(const uint64_t* srcNullEntries, uint64_t srcOffset,
}

void NullMask::resize(uint64_t capacity) {
auto resizedBuffer = std::make_unique<uint64_t[]>(capacity);
memcpy(resizedBuffer.get(), buffer.get(), numNullEntries);
auto numNullEntries = (capacity + NUM_BITS_PER_NULL_ENTRY - 1) / NUM_BITS_PER_NULL_ENTRY;
auto resizedBuffer = std::make_unique<uint64_t[]>(numNullEntries);
memcpy(resizedBuffer.get(), data.data(), data.size_bytes());
buffer = std::move(resizedBuffer);
data = buffer.get();
numNullEntries = capacity;
data = std::span(buffer.get(), numNullEntries);
}

bool NullMask::copyFromNullBits(const uint64_t* srcNullEntries, uint64_t srcOffset,
uint64_t dstOffset, uint64_t numBitsToCopy, bool invert) {
if (copyNullMask(srcNullEntries, srcOffset, this->data, dstOffset, numBitsToCopy, invert)) {
if (copyNullMask(srcNullEntries, srcOffset, this->data.data(), dstOffset, numBitsToCopy,
invert)) {
this->mayContainNulls = true;
return true;
}
Expand All @@ -100,7 +135,7 @@ void NullMask::setNullFromRange(uint64_t offset, uint64_t numBitsToSet, bool isN
if (isNull) {
this->mayContainNulls = true;
}
setNullRange(data, offset, numBitsToSet, isNull);
setNullRange(data.data(), offset, numBitsToSet, isNull);
}

void NullMask::setNullRange(uint64_t* nullEntries, uint64_t offset, uint64_t numBitsToSet,
Expand Down Expand Up @@ -145,5 +180,12 @@ void NullMask::setNullRange(uint64_t* nullEntries, uint64_t offset, uint64_t num
}
}

void NullMask::operator|=(const NullMask& other) {
KU_ASSERT(other.data.size() == data.size());
for (size_t i = 0; i < data.size(); i++) {
data[i] |= other.getData()[i];
}
}

} // namespace common
} // namespace kuzu
3 changes: 1 addition & 2 deletions src/common/vector/auxiliary_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ void ListAuxiliaryBuffer::resizeDataVector(ValueVector* dataVector) {
auto buffer = std::make_unique<uint8_t[]>(capacity * dataVector->getNumBytesPerValue());
memcpy(buffer.get(), dataVector->valueBuffer.get(), size * dataVector->getNumBytesPerValue());
dataVector->valueBuffer = std::move(buffer);
dataVector->nullMask->resize((capacity + NullMask::NUM_BITS_PER_NULL_ENTRY - 1) >>
NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2);
dataVector->nullMask.resize(capacity);
// If the dataVector is a struct vector, we need to resize its field vectors.
if (dataVector->dataType.getPhysicalType() == PhysicalTypeID::STRUCT) {
resizeStructDataVector(dataVector);
Expand Down
8 changes: 4 additions & 4 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "common/vector/value_vector.h"

#include "common/constants.h"
#include "common/exception/runtime.h"
#include "common/null_buffer.h"
#include "common/types/blob.h"
Expand All @@ -11,7 +12,7 @@ namespace kuzu {
namespace common {

ValueVector::ValueVector(LogicalType dataType, storage::MemoryManager* memoryManager)
: dataType{std::move(dataType)} {
: dataType{std::move(dataType)}, nullMask{DEFAULT_VECTOR_CAPACITY} {
if (this->dataType.getLogicalTypeID() == LogicalTypeID::ANY) {
// LCOV_EXCL_START
// Alternatively we can assign a default type here but I don't think it's a good practice.
Expand All @@ -21,7 +22,6 @@ ValueVector::ValueVector(LogicalType dataType, storage::MemoryManager* memoryMan
}
numBytesPerValue = getDataTypeSize(this->dataType);
initializeValueBuffer();
nullMask = std::make_unique<NullMask>();
auxiliaryBuffer = AuxiliaryBufferFactory::getAuxiliaryBuffer(this->dataType, memoryManager);
}

Expand Down Expand Up @@ -61,7 +61,7 @@ bool ValueVector::discardNull(ValueVector& vector) {

bool ValueVector::setNullFromBits(const uint64_t* srcNullEntries, uint64_t srcOffset,
uint64_t dstOffset, uint64_t numBitsToCopy, bool invert) {
return nullMask->copyFromNullBits(srcNullEntries, srcOffset, dstOffset, numBitsToCopy, invert);
return nullMask.copyFromNullBits(srcNullEntries, srcOffset, dstOffset, numBitsToCopy, invert);
}

template<typename T>
Expand Down Expand Up @@ -383,7 +383,7 @@ void ValueVector::setValue(uint32_t pos, std::string_view val) {
}

void ValueVector::setNull(uint32_t pos, bool isNull) {
nullMask->setNull(pos, isNull);
nullMask.setNull(pos, isNull);
}

void StringVector::addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr) {
Expand Down
4 changes: 2 additions & 2 deletions src/function/vector_cast_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static void resolveNestedVector(std::shared_ptr<ValueVector> inputVector, ValueV
// copy data and nullmask from input
memcpy(resultVector->getData(), inputVector->getData(),
numOfEntries * resultVector->getNumBytesPerValue());
resultVector->setNullFromBits(inputVector->getNullMaskData(), 0, 0, numOfEntries);
resultVector->setNullFromBits(inputVector->getNullMask().getData(), 0, 0, numOfEntries);

numOfEntries = ListVector::getDataVectorSize(inputVector.get());
ListVector::resizeDataVector(resultVector, numOfEntries);
Expand Down Expand Up @@ -56,7 +56,7 @@ static void resolveNestedVector(std::shared_ptr<ValueVector> inputVector, ValueV
// copy data and nullmask from input
memcpy(resultVector->getData(), inputVector->getData(),
numOfEntries * resultVector->getNumBytesPerValue());
resultVector->setNullFromBits(inputVector->getNullMaskData(), 0, 0, numOfEntries);
resultVector->setNullFromBits(inputVector->getNullMask().getData(), 0, 0, numOfEntries);

auto inputFieldVectors = StructVector::getFieldVectors(inputVector.get());
auto resultFieldVectors = StructVector::getFieldVectors(resultVector);
Expand Down
2 changes: 1 addition & 1 deletion src/include/common/arrow/arrow_nullmask_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ArrowNullMaskTree {

private:
bool copyFromBuffer(const void* buffer, uint64_t srcOffset, uint64_t count);
bool applyParentBitmap(const NullMask* buffer, uint64_t count);
bool applyParentBitmap(const NullMask* buffer);

template<typename offsetsT>
void scanListPushDown(const ArrowSchema* schema, const ArrowArray* array, uint64_t srcOffset,
Expand Down
Loading
Loading