Skip to content

Commit

Permalink
Add null to struct fields
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed May 11, 2023
1 parent 4f02b2d commit f9bb78f
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 50 deletions.
2 changes: 2 additions & 0 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <stdexcept>

#include "common/exception.h"
#include "common/null_buffer.h"
#include "common/ser_deser.h"
#include "common/types/types_include.h"

Expand Down Expand Up @@ -491,6 +492,7 @@ uint32_t Types::getDataTypeSize(const DataType& dataType) {
for (auto& childType : structTypeInfo->getChildrenTypes()) {
size += getDataTypeSize(*childType);
}
size += NullBuffer::getNumBytesForNullValues(structTypeInfo->getChildrenNames().size());
return size;
}
case INTERNAL_ID:
Expand Down
12 changes: 9 additions & 3 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "common/types/value.h"

#include "common/null_bytes.h"
#include "common/null_buffer.h"
#include "common/string_utils.h"

namespace kuzu {
Expand Down Expand Up @@ -378,11 +378,17 @@ std::vector<std::unique_ptr<Value>> Value::convertKUStructToVector(const uint8_t
std::vector<std::unique_ptr<Value>> structVal;
auto childrenTypes = structTypeInfo->getChildrenTypes();
auto numFields = childrenTypes.size();
auto structNullValues = kuStruct;
auto structValues = structNullValues + NullBuffer::getNumBytesForNullValues(numFields);
for (auto i = 0; i < numFields; i++) {
auto childValue = std::make_unique<Value>(Value::createDefaultValue(*childrenTypes[i]));
childValue->copyValueFrom(kuStruct);
if (NullBuffer::isNull(structNullValues, i)) {
childValue->setNull(true);
} else {
childValue->copyValueFrom(structValues);
}
structVal.emplace_back(std::move(childValue));
kuStruct += Types::getDataTypeSize(*childrenTypes[i]);
structValues += Types::getDataTypeSize(*childrenTypes[i]);
}
return structVal;
}
Expand Down
44 changes: 35 additions & 9 deletions src/common/vector/value_vector_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "common/vector/value_vector_utils.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/null_bytes.h"
#include "common/null_buffer.h"

using namespace kuzu;
using namespace common;
Expand All @@ -10,9 +10,18 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeIntoPos(
ValueVector& resultVector, uint64_t pos, const uint8_t* srcData) {
switch (resultVector.dataType.typeID) {
case STRUCT: {
for (auto& childVector : StructVector::getChildrenVectors(&resultVector)) {
copyNonNullDataWithSameTypeIntoPos(*childVector, pos, srcData);
srcData += Types::getDataTypeSize(childVector->dataType);
auto structFields = StructVector::getChildrenVectors(&resultVector);
auto structNullBytes = srcData;
auto structValues =
structNullBytes + NullBuffer::getNumBytesForNullValues(structFields.size());
for (auto i = 0u; i < structFields.size(); i++) {
auto structField = structFields[i];
if (NullBuffer::isNull(structNullBytes, i)) {
structField->setNull(pos, true /* isNull */);
} else {
copyNonNullDataWithSameTypeIntoPos(*structField, pos, structValues);
}
structValues += Types::getDataTypeSize(structField->dataType);
}
} break;
case VAR_LIST: {
Expand Down Expand Up @@ -45,9 +54,22 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeOutFromPos(const ValueVector&
uint64_t pos, uint8_t* dstData, InMemOverflowBuffer& dstOverflowBuffer) {
switch (srcVector.dataType.typeID) {
case STRUCT: {
for (auto& childVector : StructVector::getChildrenVectors(&srcVector)) {
copyNonNullDataWithSameTypeOutFromPos(*childVector, pos, dstData, dstOverflowBuffer);
dstData += Types::getDataTypeSize(childVector->dataType);
// The storage structure of STRUCT type in factorizedTable is:
// [NULLBYTES, FIELD1, FIELD2, ...]
auto structFields = StructVector::getChildrenVectors(&srcVector);
NullBuffer::initNullBytes(dstData, structFields.size());
auto structNullBytes = dstData;
auto structValues =
structNullBytes + NullBuffer::getNumBytesForNullValues(structFields.size());
for (auto i = 0u; i < structFields.size(); i++) {
auto structField = structFields[i];
if (structField->isNull(pos)) {
NullBuffer::setNull(structNullBytes, i);
} else {
copyNonNullDataWithSameTypeOutFromPos(
*structField, pos, structValues, dstOverflowBuffer);
}
structValues += Types::getDataTypeSize(structField->dataType);
}
} break;
case VAR_LIST: {
Expand Down Expand Up @@ -111,8 +133,12 @@ void ValueVectorUtils::copyValue(uint8_t* dstValue, common::ValueVector& dstVect
for (auto i = 0u; i < srcFields.size(); i++) {
auto srcField = srcFields[i];
auto dstField = dstFields[i];
copyValue(dstField->getData() + dstField->getNumBytesPerValue() * dstPos, *dstField,
srcField->getData() + srcField->getNumBytesPerValue() * srcPos, *srcField);
if (srcField->isNull(srcPos)) {
dstField->setNull(dstPos, true /* isNull */);
} else {
copyValue(dstField->getData() + dstField->getNumBytesPerValue() * dstPos, *dstField,
srcField->getData() + srcField->getNumBytesPerValue() * srcPos, *srcField);
}
}
} break;
case STRING: {
Expand Down
51 changes: 51 additions & 0 deletions src/function/vector_struct_operations.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "function/struct/vector_struct_operations.h"

#include "binder/expression/literal_expression.h"
#include "binder/expression_binder.h"
#include "function/function_definition.h"

namespace kuzu {
Expand All @@ -19,13 +20,63 @@ std::unique_ptr<FunctionBindData> StructPackVectorOperations::bindFunc(
const binder::expression_vector& arguments, kuzu::function::FunctionDefinition* definition) {
std::vector<std::unique_ptr<common::StructField>> fields;
for (auto& argument : arguments) {
if (argument->getDataType().typeID == common::ANY) {
binder::ExpressionBinder::resolveAnyDataType(
*argument, common::DataType{common::INT64});
}
fields.emplace_back(std::make_unique<common::StructField>(
argument->getAlias(), argument->getDataType().copy()));
}
auto resultType = common::DataType(std::move(fields));
return std::make_unique<FunctionBindData>(resultType);
}

void StructPackVectorOperations::execFunc(
const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result) {
for (auto i = 0u; i < parameters.size(); i++) {
auto& parameter = parameters[i];
if (parameter->state == result.state) {
continue;
}
// If the parameter's state is inconsistent with the result's state, we need to copy the
// parameter's value to the corresponding child vector.
copyParameterValueToStructFieldVector(
parameter.get(), common::StructVector::getChildVector(&result, i).get());
}
}

void StructPackVectorOperations::copyParameterValueToStructFieldVector(
const common::ValueVector* parameter, common::ValueVector* structField) {
// If the parameter is unFlat, then its state must be consistent with the result's state.
// Thus, we don't need to copy values to structFieldVector.
assert(parameter->state->isFlat());
auto srcPos = parameter->state->selVector->selectedPositions[0];
auto srcValue = parameter->getData() + parameter->getNumBytesPerValue() * srcPos;
bool isSrcValueNull = parameter->isNull(srcPos);
if (structField->state->isFlat()) {
auto pos = structField->state->selVector->selectedPositions[0];
if (isSrcValueNull) {
structField->setNull(pos, true /* isNull */);
} else {
common::ValueVectorUtils::copyValue(
structField->getData() + structField->getNumBytesPerValue() * pos, *structField,
srcValue, *parameter);
}
} else {
for (auto j = 0u; j < structField->state->selVector->selectedSize; j++) {
auto pos = structField->state->selVector->selectedPositions[j];
if (isSrcValueNull) {
structField->setNull(pos, true /* isNull */);
} else {
common::ValueVectorUtils::copyValue(
structField->getData() + structField->getNumBytesPerValue() * pos, *structField,
srcValue, *parameter);
}
}
}
}

std::vector<std::unique_ptr<VectorOperationDefinition>>
StructExtractVectorOperations::getDefinitions() {
std::vector<std::unique_ptr<VectorOperationDefinition>> definitions;
Expand Down
File renamed without changes.
35 changes: 2 additions & 33 deletions src/include/function/struct/vector_struct_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,40 +11,9 @@ struct StructPackVectorOperations : public VectorOperations {
static std::unique_ptr<FunctionBindData> bindFunc(
const binder::expression_vector& arguments, FunctionDefinition* definition);
static void execFunc(const std::vector<std::shared_ptr<common::ValueVector>>& parameters,
common::ValueVector& result) {
for (auto i = 0u; i < parameters.size(); i++) {
auto& parameter = parameters[i];
if (parameter->state == result.state) {
continue;
}
// If the parameter's state is inconsistent with the result's state, we need to copy the
// parameter's value to the corresponding child vector.
copyParameterValueToStructFieldVector(
parameter.get(), common::StructVector::getChildVector(&result, i).get());
}
}
common::ValueVector& result);
static void copyParameterValueToStructFieldVector(
const common::ValueVector* parameter, common::ValueVector* structField) {
// If the parameter is unFlat, then its state must be consistent with the result's state.
// Thus, we don't need to copy values to structFieldVector.
assert(parameter->state->isFlat());
auto srcValue =
parameter->getData() +
parameter->getNumBytesPerValue() * parameter->state->selVector->selectedPositions[0];
if (structField->state->isFlat()) {
common::ValueVectorUtils::copyValue(
structField->getData() + structField->getNumBytesPerValue() *
structField->state->selVector->selectedPositions[0],
*structField, srcValue, *parameter);
} else {
for (auto j = 0u; j < structField->state->selVector->selectedSize; j++) {
auto pos = structField->state->selVector->selectedPositions[j];
common::ValueVectorUtils::copyValue(
structField->getData() + structField->getNumBytesPerValue() * pos, *structField,
srcValue, *parameter);
}
}
}
const common::ValueVector* parameter, common::ValueVector* structField);
};

struct StructExtractBindData : public FunctionBindData {
Expand Down
2 changes: 1 addition & 1 deletion src/processor/result/factorized_table.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "processor/result/factorized_table.h"

#include "common/exception.h"
#include "common/null_bytes.h"
#include "common/null_buffer.h"
#include "common/vector/value_vector_utils.h"

using namespace kuzu::common;
Expand Down
3 changes: 2 additions & 1 deletion src/storage/copier/rel_copy_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,12 +775,13 @@ void RelCopyExecutor::calculateListHeadersTask(offset_t numNodes, atomic_uint64_
auto numNodesInChunk =
std::min((offset_t)ListsMetadataConstants::LISTS_CHUNK_SIZE, numNodes - nodeOffset);
csr_offset_t csrOffset = (*listSizes)[chunkNodeOffset].load(std::memory_order_relaxed);
for (auto i = 1u; i <= numNodesInChunk; i++) {
for (auto i = 1u; i < numNodesInChunk; i++) {
auto currNodeOffset = chunkNodeOffset + i;
auto numElementsInList = (*listSizes)[currNodeOffset].load(std::memory_order_relaxed);
listHeadersBuilder->setCSROffset(currNodeOffset, csrOffset);
csrOffset += numElementsInList;
}
listHeadersBuilder->setCSROffset(chunkNodeOffset + numNodesInChunk, csrOffset);
nodeOffset += numNodesInChunk;
}
logger->trace("End: adjListHeadersBuilder={0:p}", (void*)listHeadersBuilder);
Expand Down
6 changes: 4 additions & 2 deletions src/storage/in_mem_storage_structure/in_mem_node_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,10 @@ NodeInMemStructColumn::NodeInMemStructColumn(
->getStructFields();
for (auto& structField : structFields) {
auto fieldPath = StorageUtils::appendStructFieldName(filePath, structField->getName());
fields.push_back(NodeInMemColumnFactory::getNodeInMemColumn(
fieldPath, *structField->getType(), numElements));
auto fieldColumn = NodeInMemColumnFactory::getNodeInMemColumn(
fieldPath, *structField->getType(), numElements);
fieldColumn->getNullMask()->setAllNonNull();
fields.push_back(std::move(fieldColumn));
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/storage/storage_structure/disk_overflow_file.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "storage/storage_structure/disk_overflow_file.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/null_bytes.h"
#include "common/null_buffer.h"
#include "common/string_utils.h"
#include "common/type_utils.h"

Expand Down
10 changes: 10 additions & 0 deletions test/test_files/tinysnb/projection/single_label.test
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,16 @@ Dan|Carol
---- 1
1|10|[4,5]

-NAME ReturnStructLiteralWithNull
-QUERY RETURN {info: {name: "AliceBobCarolDan", gender: "female", hobby: null}, height: 1.8, age: null}
---- 1
{INFO: {NAME: AliceBobCarolDan, GENDER: female, HOBBY: }, HEIGHT: 1.800000, AGE: }

-NAME ReturnStructListLiteralWithNull
-QUERY RETURN {info: {name: "smith", gender: null, hobby: [null, "footBall"]}, height: 1.8, age: null}
---- 1
{INFO: {NAME: smith, GENDER: , HOBBY: [,footBall]}, HEIGHT: 1.800000, AGE: }

-NAME ReturnStruct
-QUERY MATCH (m:movies) RETURN m.description
---- 3
Expand Down

0 comments on commit f9bb78f

Please sign in to comment.