Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add null to struct fields #1533

Merged
merged 1 commit into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <stdexcept>

#include "common/exception.h"
#include "common/null_buffer.h"
#include "common/ser_deser.h"
#include "common/types/types_include.h"

Expand Down Expand Up @@ -478,6 +479,8 @@ uint32_t Types::getDataTypeSize(DataTypeID dataTypeID) {
}
}

// This function returns the size of the dataType when stored in a row layout. (e.g.
// factorizedTable).
uint32_t Types::getDataTypeSize(const DataType& dataType) {
switch (dataType.typeID) {
case FIXED_LIST: {
Expand All @@ -491,6 +494,7 @@ uint32_t Types::getDataTypeSize(const DataType& dataType) {
for (auto& childType : structTypeInfo->getChildrenTypes()) {
size += getDataTypeSize(*childType);
}
size += NullBuffer::getNumBytesForNullValues(structTypeInfo->getChildrenNames().size());
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
return size;
}
case INTERNAL_ID:
Expand Down
12 changes: 9 additions & 3 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "common/types/value.h"

#include "common/null_bytes.h"
#include "common/null_buffer.h"
#include "common/string_utils.h"

namespace kuzu {
Expand Down Expand Up @@ -378,11 +378,17 @@ std::vector<std::unique_ptr<Value>> Value::convertKUStructToVector(const uint8_t
std::vector<std::unique_ptr<Value>> structVal;
auto childrenTypes = structTypeInfo->getChildrenTypes();
auto numFields = childrenTypes.size();
auto structNullValues = kuStruct;
auto structValues = structNullValues + NullBuffer::getNumBytesForNullValues(numFields);
for (auto i = 0; i < numFields; i++) {
auto childValue = std::make_unique<Value>(Value::createDefaultValue(*childrenTypes[i]));
childValue->copyValueFrom(kuStruct);
if (NullBuffer::isNull(structNullValues, i)) {
childValue->setNull(true);
} else {
childValue->copyValueFrom(structValues);
}
structVal.emplace_back(std::move(childValue));
kuStruct += Types::getDataTypeSize(*childrenTypes[i]);
structValues += Types::getDataTypeSize(*childrenTypes[i]);
}
return structVal;
}
Expand Down
44 changes: 35 additions & 9 deletions src/common/vector/value_vector_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "common/vector/value_vector_utils.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/null_bytes.h"
#include "common/null_buffer.h"

using namespace kuzu;
using namespace common;
Expand All @@ -10,9 +10,18 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeIntoPos(
ValueVector& resultVector, uint64_t pos, const uint8_t* srcData) {
switch (resultVector.dataType.typeID) {
case STRUCT: {
for (auto& childVector : StructVector::getChildrenVectors(&resultVector)) {
copyNonNullDataWithSameTypeIntoPos(*childVector, pos, srcData);
srcData += Types::getDataTypeSize(childVector->dataType);
auto structFields = StructVector::getChildrenVectors(&resultVector);
auto structNullBytes = srcData;
auto structValues =
structNullBytes + NullBuffer::getNumBytesForNullValues(structFields.size());
for (auto i = 0u; i < structFields.size(); i++) {
auto structField = structFields[i];
if (NullBuffer::isNull(structNullBytes, i)) {
structField->setNull(pos, true /* isNull */);
} else {
copyNonNullDataWithSameTypeIntoPos(*structField, pos, structValues);
}
structValues += Types::getDataTypeSize(structField->dataType);
}
} break;
case VAR_LIST: {
Expand Down Expand Up @@ -45,9 +54,22 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeOutFromPos(const ValueVector&
uint64_t pos, uint8_t* dstData, InMemOverflowBuffer& dstOverflowBuffer) {
switch (srcVector.dataType.typeID) {
case STRUCT: {
for (auto& childVector : StructVector::getChildrenVectors(&srcVector)) {
copyNonNullDataWithSameTypeOutFromPos(*childVector, pos, dstData, dstOverflowBuffer);
dstData += Types::getDataTypeSize(childVector->dataType);
// The storage structure of STRUCT type in factorizedTable is:
// [NULLBYTES, FIELD1, FIELD2, ...]
auto structFields = StructVector::getChildrenVectors(&srcVector);
NullBuffer::initNullBytes(dstData, structFields.size());
auto structNullBytes = dstData;
auto structValues =
structNullBytes + NullBuffer::getNumBytesForNullValues(structFields.size());
for (auto i = 0u; i < structFields.size(); i++) {
auto structField = structFields[i];
if (structField->isNull(pos)) {
NullBuffer::setNull(structNullBytes, i);
} else {
copyNonNullDataWithSameTypeOutFromPos(
*structField, pos, structValues, dstOverflowBuffer);
}
structValues += Types::getDataTypeSize(structField->dataType);
}
} break;
case VAR_LIST: {
Expand Down Expand Up @@ -111,8 +133,12 @@ void ValueVectorUtils::copyValue(uint8_t* dstValue, common::ValueVector& dstVect
for (auto i = 0u; i < srcFields.size(); i++) {
auto srcField = srcFields[i];
auto dstField = dstFields[i];
copyValue(dstField->getData() + dstField->getNumBytesPerValue() * dstPos, *dstField,
srcField->getData() + srcField->getNumBytesPerValue() * srcPos, *srcField);
if (srcField->isNull(srcPos)) {
dstField->setNull(dstPos, true /* isNull */);
} else {
copyValue(dstField->getData() + dstField->getNumBytesPerValue() * dstPos, *dstField,
srcField->getData() + srcField->getNumBytesPerValue() * srcPos, *srcField);
}
}
} break;
case STRING: {
Expand Down
51 changes: 51 additions & 0 deletions src/function/vector_struct_operations.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "function/struct/vector_struct_operations.h"

#include "binder/expression/literal_expression.h"
#include "binder/expression_binder.h"
#include "function/function_definition.h"

namespace kuzu {
Expand All @@ -19,13 +20,63 @@ std::unique_ptr<FunctionBindData> StructPackVectorOperations::bindFunc(
const binder::expression_vector& arguments, kuzu::function::FunctionDefinition* definition) {
std::vector<std::unique_ptr<common::StructField>> fields;
for (auto& argument : arguments) {
if (argument->getDataType().typeID == common::ANY) {
binder::ExpressionBinder::resolveAnyDataType(
*argument, common::DataType{common::INT64});
}
fields.emplace_back(std::make_unique<common::StructField>(
argument->getAlias(), argument->getDataType().copy()));
}
auto resultType = common::DataType(std::move(fields));
return std::make_unique<FunctionBindData>(resultType);
}

void StructPackVectorOperations::execFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result) {
for (auto i = 0u; i < parameters.size(); i++) {
auto& parameter = parameters[i];
if (parameter->state == result.state) {
continue;
}
// If the parameter's state is inconsistent with the result's state, we need to copy the
// parameter's value to the corresponding child vector.
copyParameterValueToStructFieldVector(
parameter.get(), common::StructVector::getChildVector(&result, i).get());
}
}

void StructPackVectorOperations::copyParameterValueToStructFieldVector(
const common::ValueVector* parameter, common::ValueVector* structField) {
// If the parameter is unFlat, then its state must be consistent with the result's state.
// Thus, we don't need to copy values to structFieldVector.
assert(parameter->state->isFlat());
auto srcPos = parameter->state->selVector->selectedPositions[0];
auto srcValue = parameter->getData() + parameter->getNumBytesPerValue() * srcPos;
bool isSrcValueNull = parameter->isNull(srcPos);
if (structField->state->isFlat()) {
auto pos = structField->state->selVector->selectedPositions[0];
if (isSrcValueNull) {
structField->setNull(pos, true /* isNull */);
} else {
common::ValueVectorUtils::copyValue(
structField->getData() + structField->getNumBytesPerValue() * pos, *structField,
srcValue, *parameter);
}
} else {
for (auto j = 0u; j < structField->state->selVector->selectedSize; j++) {
auto pos = structField->state->selVector->selectedPositions[j];
if (isSrcValueNull) {
structField->setNull(pos, true /* isNull */);
} else {
common::ValueVectorUtils::copyValue(
structField->getData() + structField->getNumBytesPerValue() * pos, *structField,
srcValue, *parameter);
}
}
}
}

std::vector<std::unique_ptr<VectorOperationDefinition>>
StructExtractVectorOperations::getDefinitions() {
std::vector<std::unique_ptr<VectorOperationDefinition>> definitions;
Expand Down
File renamed without changes.
35 changes: 2 additions & 33 deletions src/include/function/struct/vector_struct_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,40 +11,9 @@ struct StructPackVectorOperations : public VectorOperations {
static std::unique_ptr<FunctionBindData> bindFunc(
const binder::expression_vector& arguments, FunctionDefinition* definition);
static void execFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result) {
for (auto i = 0u; i < parameters.size(); i++) {
auto& parameter = parameters[i];
if (parameter->state == result.state) {
continue;
}
// If the parameter's state is inconsistent with the result's state, we need to copy the
// parameter's value to the corresponding child vector.
copyParameterValueToStructFieldVector(
parameter.get(), common::StructVector::getChildVector(&result, i).get());
}
}
common::ValueVector& result);
static void copyParameterValueToStructFieldVector(
const common::ValueVector* parameter, common::ValueVector* structField) {
// If the parameter is unFlat, then its state must be consistent with the result's state.
// Thus, we don't need to copy values to structFieldVector.
assert(parameter->state->isFlat());
auto srcValue =
parameter->getData() +
parameter->getNumBytesPerValue() * parameter->state->selVector->selectedPositions[0];
if (structField->state->isFlat()) {
common::ValueVectorUtils::copyValue(
structField->getData() + structField->getNumBytesPerValue() *
structField->state->selVector->selectedPositions[0],
*structField, srcValue, *parameter);
} else {
for (auto j = 0u; j < structField->state->selVector->selectedSize; j++) {
auto pos = structField->state->selVector->selectedPositions[j];
common::ValueVectorUtils::copyValue(
structField->getData() + structField->getNumBytesPerValue() * pos, *structField,
srcValue, *parameter);
}
}
}
const common::ValueVector* parameter, common::ValueVector* structField);
};

struct StructExtractBindData : public FunctionBindData {
Expand Down
2 changes: 1 addition & 1 deletion src/processor/result/factorized_table.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "processor/result/factorized_table.h"

#include "common/exception.h"
#include "common/null_bytes.h"
#include "common/null_buffer.h"
#include "common/vector/value_vector_utils.h"

using namespace kuzu::common;
Expand Down
3 changes: 2 additions & 1 deletion src/storage/copier/rel_copy_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,12 +775,13 @@ void RelCopyExecutor::calculateListHeadersTask(offset_t numNodes, atomic_uint64_
auto numNodesInChunk =
std::min((offset_t)ListsMetadataConstants::LISTS_CHUNK_SIZE, numNodes - nodeOffset);
csr_offset_t csrOffset = (*listSizes)[chunkNodeOffset].load(std::memory_order_relaxed);
for (auto i = 1u; i <= numNodesInChunk; i++) {
for (auto i = 1u; i < numNodesInChunk; i++) {
auto currNodeOffset = chunkNodeOffset + i;
auto numElementsInList = (*listSizes)[currNodeOffset].load(std::memory_order_relaxed);
listHeadersBuilder->setCSROffset(currNodeOffset, csrOffset);
csrOffset += numElementsInList;
}
listHeadersBuilder->setCSROffset(chunkNodeOffset + numNodesInChunk, csrOffset);
nodeOffset += numNodesInChunk;
}
logger->trace("End: adjListHeadersBuilder={0:p}", (void*)listHeadersBuilder);
Expand Down
6 changes: 4 additions & 2 deletions src/storage/in_mem_storage_structure/in_mem_node_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,10 @@ NodeInMemStructColumn::NodeInMemStructColumn(
->getStructFields();
for (auto& structField : structFields) {
auto fieldPath = StorageUtils::appendStructFieldName(filePath, structField->getName());
fields.push_back(NodeInMemColumnFactory::getNodeInMemColumn(
fieldPath, *structField->getType(), numElements));
auto fieldColumn = NodeInMemColumnFactory::getNodeInMemColumn(
fieldPath, *structField->getType(), numElements);
fieldColumn->getNullMask()->setAllNonNull();
fields.push_back(std::move(fieldColumn));
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/storage/storage_structure/disk_overflow_file.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "storage/storage_structure/disk_overflow_file.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/null_bytes.h"
#include "common/null_buffer.h"
#include "common/string_utils.h"
#include "common/type_utils.h"

Expand Down
10 changes: 10 additions & 0 deletions test/test_files/tinysnb/projection/single_label.test
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,16 @@ Dan|Carol
---- 1
1|10|[4,5]

-NAME ReturnStructLiteralWithNull
-QUERY RETURN {info: {name: "AliceBobCarolDan", gender: "female", hobby: null}, height: 1.8, age: null}
---- 1
{INFO: {NAME: AliceBobCarolDan, GENDER: female, HOBBY: }, HEIGHT: 1.800000, AGE: }

-NAME ReturnStructListLiteralWithNull
-QUERY RETURN {info: {name: "smith", gender: null, hobby: [null, "footBall"]}, height: 1.8, age: null}
---- 1
{INFO: {NAME: smith, GENDER: , HOBBY: [,footBall]}, HEIGHT: 1.800000, AGE: }

-NAME ReturnStruct
-QUERY MATCH (m:movies) RETURN m.description
---- 3
Expand Down