Skip to content

Commit

Permalink
Add support to struct of list
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed May 8, 2023
1 parent bfc1b0d commit 0cec208
Show file tree
Hide file tree
Showing 16 changed files with 156 additions and 139 deletions.
2 changes: 1 addition & 1 deletion src/common/data_chunk/data_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace kuzu {
namespace common {

void DataChunk::insert(uint32_t pos, std::shared_ptr<ValueVector> valueVector) {
valueVector->setState(this->state);
valueVector->setState(state);
assert(valueVectors.size() > pos);
valueVectors[pos] = std::move(valueVector);
}
Expand Down
13 changes: 11 additions & 2 deletions src/common/vector/auxiliary_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,17 @@ void StringAuxiliaryBuffer::addString(
InMemOverflowBufferUtils::copyString(value, len, entry, *inMemOverflowBuffer);
}

StructAuxiliaryBuffer::StructAuxiliaryBuffer(
const DataType& type, storage::MemoryManager* memoryManager) {
auto structTypeInfo = reinterpret_cast<StructTypeInfo*>(type.getExtraTypeInfo());
childrenVectors.reserve(structTypeInfo->getChildrenTypes().size());
for (auto structFieldType : structTypeInfo->getChildrenTypes()) {
childrenVectors.push_back(std::make_shared<ValueVector>(*structFieldType, memoryManager));
}
}

ListAuxiliaryBuffer::ListAuxiliaryBuffer(
kuzu::common::DataType& dataVectorType, storage::MemoryManager* memoryManager)
const DataType& dataVectorType, storage::MemoryManager* memoryManager)
: capacity{common::DEFAULT_VECTOR_CAPACITY}, size{0}, dataVector{std::make_unique<ValueVector>(
dataVectorType, memoryManager)} {}

Expand All @@ -41,7 +50,7 @@ std::unique_ptr<AuxiliaryBuffer> AuxiliaryBufferFactory::getAuxiliaryBuffer(
case STRING:
return std::make_unique<StringAuxiliaryBuffer>(memoryManager);
case STRUCT:
return std::make_unique<StructAuxiliaryBuffer>();
return std::make_unique<StructAuxiliaryBuffer>(type, memoryManager);
case VAR_LIST:
return std::make_unique<ListAuxiliaryBuffer>(*type.getChildType(), memoryManager);
default:
Expand Down
48 changes: 36 additions & 12 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
#include "common/vector/value_vector.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/auxiliary_buffer.h"
#include "common/vector/value_vector_utils.h"

namespace kuzu {
namespace common {

ValueVector::ValueVector(DataType dataType, storage::MemoryManager* memoryManager)
: dataType{std::move(dataType)} {
// TODO(Ziyi): remove this if/else statement once we removed the ku_list.
numBytesPerValue = this->dataType.typeID == VAR_LIST ? sizeof(common::list_entry_t) :
Types::getDataTypeSize(this->dataType);
valueBuffer = std::make_unique<uint8_t[]>(numBytesPerValue * DEFAULT_VECTOR_CAPACITY);
setNumBytesPerValue();
initializeValueBuffer();
nullMask = std::make_unique<NullMask>();
auxiliaryBuffer = AuxiliaryBufferFactory::getAuxiliaryBuffer(this->dataType, memoryManager);
}

void ValueVector::setState(std::shared_ptr<DataChunkState> state) {
this->state = state;
if (dataType.typeID == STRUCT) {
auto childrenVectors = StructVector::getChildrenVectors(this);
for (auto childVector : childrenVectors) {
childVector->setState(state);
}
}
}

bool NodeIDVector::discardNull(ValueVector& vector) {
if (vector.hasNoNullsGuarantee()) {
return true;
Expand Down Expand Up @@ -45,16 +51,34 @@ void ValueVector::setValue(uint32_t pos, T val) {
((T*)valueBuffer.get())[pos] = val;
}

template<>
void ValueVector::setValue(uint32_t pos, common::list_entry_t val) {
((list_entry_t*)valueBuffer.get())[pos] = val;
}

template<>
void ValueVector::setValue(uint32_t pos, std::string val) {
StringVector::addString(this, pos, val.data(), val.length());
}

void ValueVector::setNumBytesPerValue() {
switch (dataType.typeID) {
case STRUCT: {
numBytesPerValue = sizeof(struct_entry_t);
} break;
case VAR_LIST: {
numBytesPerValue = sizeof(list_entry_t);
} break;
default: {
numBytesPerValue = Types::getDataTypeSize(dataType);
}
}
}

void ValueVector::initializeValueBuffer() {
valueBuffer = std::make_unique<uint8_t[]>(numBytesPerValue * DEFAULT_VECTOR_CAPACITY);
if (dataType.typeID == STRUCT) {
// For struct valueVectors, each struct_entry_t stores its current position in the
// valueVector.
StructVector::initializeEntries(this);
}
}

template void ValueVector::setValue<nodeID_t>(uint32_t pos, nodeID_t val);
template void ValueVector::setValue<bool>(uint32_t pos, bool val);
template void ValueVector::setValue<int64_t>(uint32_t pos, int64_t val);
Expand All @@ -64,7 +88,7 @@ template void ValueVector::setValue<date_t>(uint32_t pos, date_t val);
template void ValueVector::setValue<timestamp_t>(uint32_t pos, timestamp_t val);
template void ValueVector::setValue<interval_t>(uint32_t pos, interval_t val);
template void ValueVector::setValue<ku_string_t>(uint32_t pos, ku_string_t val);
template void ValueVector::setValue<ku_list_t>(uint32_t pos, ku_list_t val);
template void ValueVector::setValue<list_entry_t>(uint32_t pos, list_entry_t val);

} // namespace common
} // namespace kuzu
103 changes: 39 additions & 64 deletions src/common/vector/value_vector_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,24 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeIntoPos(
case STRUCT: {
for (auto& childVector : StructVector::getChildrenVectors(&resultVector)) {
copyNonNullDataWithSameTypeIntoPos(*childVector, pos, srcData);
srcData += childVector->getNumBytesPerValue();
srcData += Types::getDataTypeSize(childVector->dataType);
}
} break;
case VAR_LIST: {
copyKuListToVector(resultVector, pos, *reinterpret_cast<const ku_list_t*>(srcData));
auto srcKuList = *(ku_list_t*)srcData;
auto srcListValues = reinterpret_cast<uint8_t*>(srcKuList.overflowPtr);
auto dstListEntry = ListVector::addList(&resultVector, srcKuList.size);
resultVector.setValue<list_entry_t>(pos, dstListEntry);
auto resultDataVector = common::ListVector::getDataVector(&resultVector);
for (auto i = 0u; i < srcKuList.size; i++) {
copyNonNullDataWithSameTypeIntoPos(
*resultDataVector, dstListEntry.offset + i, srcListValues);
srcListValues += Types::getDataTypeSize(resultDataVector->dataType);
}
} break;
default: {
copyNonNullDataWithSameType(resultVector.dataType, srcData,
resultVector.getData() + pos * resultVector.getNumBytesPerValue(),
resultVector.getData() + pos * Types::getDataTypeSize(resultVector.dataType),
*StringVector::getInMemOverflowBuffer(&resultVector));
}
}
Expand All @@ -31,17 +40,27 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeOutFromPos(const ValueVector&
case STRUCT: {
for (auto& childVector : StructVector::getChildrenVectors(&srcVector)) {
copyNonNullDataWithSameTypeOutFromPos(*childVector, pos, dstData, dstOverflowBuffer);
dstData += childVector->getNumBytesPerValue();
dstData += Types::getDataTypeSize(childVector->dataType);
}
} break;
case VAR_LIST: {
auto kuList = ValueVectorUtils::convertListEntryToKuList(srcVector, pos, dstOverflowBuffer);
memcpy(dstData, &kuList, sizeof(kuList));

auto srcListEntry = srcVector.getValue<list_entry_t>(pos);
auto srcListDataVector = common::ListVector::getDataVector(&srcVector);
ku_list_t dstList;
dstList.size = srcListEntry.size;
InMemOverflowBufferUtils::allocateSpaceForList(dstList,
Types::getDataTypeSize(srcListDataVector->dataType) * dstList.size, dstOverflowBuffer);
for (auto i = 0u; i < srcListEntry.size; i++) {
copyNonNullDataWithSameTypeOutFromPos(*srcListDataVector, srcListEntry.offset + i,
reinterpret_cast<uint8_t*>(dstList.overflowPtr) +
i * Types::getDataTypeSize(srcListDataVector->dataType),
dstOverflowBuffer);
}
memcpy(dstData, &dstList, sizeof(dstList));
} break;
default: {
copyNonNullDataWithSameType(srcVector.dataType,
srcVector.getData() + pos * srcVector.getNumBytesPerValue(), dstData,
srcVector.getData() + pos * Types::getDataTypeSize(srcVector.dataType), dstData,
dstOverflowBuffer);
}
}
Expand All @@ -64,6 +83,18 @@ void ValueVectorUtils::copyValue(uint8_t* dstValue, common::ValueVector& dstVect
dstValues += numBytesPerValue;
}
} break;
case STRUCT: {
auto srcFields = common::StructVector::getChildrenVectors(&srcVector);
auto dstFields = common::StructVector::getChildrenVectors(&dstVector);
auto srcPos = *(int64_t*)srcValue;
auto dstPos = *(int64_t*)dstValue;
for (auto i = 0u; i < srcFields.size(); i++) {
auto srcField = srcFields[i];
auto dstField = dstFields[i];
copyValue(dstField->getData() + dstField->getNumBytesPerValue() * dstPos, *dstField,
srcField->getData() + srcField->getNumBytesPerValue() * srcPos, *srcField);
}
} break;
case STRING: {
common::InMemOverflowBufferUtils::copyString(*(common::ku_string_t*)srcValue,
*(common::ku_string_t*)dstValue, *StringVector::getInMemOverflowBuffer(&dstVector));
Expand All @@ -76,66 +107,10 @@ void ValueVectorUtils::copyValue(uint8_t* dstValue, common::ValueVector& dstVect

void ValueVectorUtils::copyNonNullDataWithSameType(const DataType& dataType, const uint8_t* srcData,
uint8_t* dstData, InMemOverflowBuffer& inMemOverflowBuffer) {
assert(dataType.typeID != STRUCT);
if (dataType.typeID == STRING) {
InMemOverflowBufferUtils::copyString(
*(ku_string_t*)srcData, *(ku_string_t*)dstData, inMemOverflowBuffer);
} else {
memcpy(dstData, srcData, Types::getDataTypeSize(dataType));
}
}

ku_list_t ValueVectorUtils::convertListEntryToKuList(
const ValueVector& srcVector, uint64_t pos, InMemOverflowBuffer& dstOverflowBuffer) {
auto listEntry = srcVector.getValue<list_entry_t>(pos);
auto listValues = ListVector::getListValues(&srcVector, listEntry);
ku_list_t dstList;
dstList.size = listEntry.size;
InMemOverflowBufferUtils::allocateSpaceForList(dstList,
Types::getDataTypeSize(*srcVector.dataType.getChildType()) * dstList.size,
dstOverflowBuffer);
auto srcDataVector = ListVector::getDataVector(&srcVector);
if (srcDataVector->dataType.typeID == VAR_LIST) {
for (auto i = 0u; i < dstList.size; i++) {
auto kuList =
convertListEntryToKuList(*srcDataVector, listEntry.offset + i, dstOverflowBuffer);
(reinterpret_cast<ku_list_t*>(dstList.overflowPtr))[i] = kuList;
}
} else {
memcpy(reinterpret_cast<uint8_t*>(dstList.overflowPtr), listValues,
srcDataVector->getNumBytesPerValue() * listEntry.size);
if (srcDataVector->dataType.typeID == STRING) {
for (auto i = 0u; i < dstList.size; i++) {
InMemOverflowBufferUtils::copyString(
(reinterpret_cast<ku_string_t*>(listValues))[i],
(reinterpret_cast<ku_string_t*>(dstList.overflowPtr))[i], dstOverflowBuffer);
}
}
}
return dstList;
}

void ValueVectorUtils::copyKuListToVector(
ValueVector& dstVector, uint64_t pos, const ku_list_t& srcList) {
auto srcListValues = reinterpret_cast<uint8_t*>(srcList.overflowPtr);
auto dstListEntry = ListVector::addList(&dstVector, srcList.size);
dstVector.setValue<list_entry_t>(pos, dstListEntry);
if (dstVector.dataType.getChildType()->typeID == VAR_LIST) {
for (auto i = 0u; i < srcList.size; i++) {
ValueVectorUtils::copyKuListToVector(*ListVector::getDataVector(&dstVector),
dstListEntry.offset + i, reinterpret_cast<ku_list_t*>(srcList.overflowPtr)[i]);
}
} else {
auto dstDataVector = ListVector::getDataVector(&dstVector);
auto dstListValues = ListVector::getListValues(&dstVector, dstListEntry);
memcpy(dstListValues, srcListValues, srcList.size * dstDataVector->getNumBytesPerValue());
if (dstDataVector->dataType.getTypeID() == STRING) {
for (auto i = 0u; i < srcList.size; i++) {
InMemOverflowBufferUtils::copyString(
(reinterpret_cast<ku_string_t*>(srcListValues))[i],
(reinterpret_cast<ku_string_t*>(dstListValues))[i],
*StringVector::getInMemOverflowBuffer(dstDataVector));
}
}
}
}
4 changes: 2 additions & 2 deletions src/expression_evaluator/base_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ void BaseExpressionEvaluator::resolveResultStateFromChildren(
for (auto& input : inputEvaluators) {
if (!input->isResultFlat()) {
isResultFlat_ = false;
resultVector->state = input->resultVector->state;
resultVector->setState(input->resultVector->state);
return;
}
}
// All children are flat.
isResultFlat_ = true;
resultVector->state = common::DataChunkState::getSingleValueDataChunkState();
resultVector->setState(common::DataChunkState::getSingleValueDataChunkState());
}

} // namespace evaluator
Expand Down
14 changes: 5 additions & 9 deletions src/expression_evaluator/function_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,11 @@ void FunctionExpressionEvaluator::resolveResultVector(
// If the resultVector and inputVector are in different dataChunks, we should create a new
// child valueVector, which shares the state with the resultVector, instead of reusing the
// inputVector.
for (auto& inputEvaluator : inputEvaluators) {
if (inputEvaluator->resultVector->state != resultVector->state) {
auto structFieldVector = std::make_shared<common::ValueVector>(
inputEvaluator->resultVector->dataType, memoryManager);
structFieldVector->state = resultVector->state;
common::StructVector::addChildVector(resultVector.get(), structFieldVector);
} else {
common::StructVector::addChildVector(
resultVector.get(), inputEvaluator->resultVector);
for (auto i = 0u; i < inputEvaluators.size(); i++) {
auto inputEvaluator = inputEvaluators[i];
if (inputEvaluator->resultVector->state == resultVector->state) {
common::StructVector::referenceVector(
resultVector.get(), i, inputEvaluator->resultVector);
}
}
}
Expand Down
4 changes: 0 additions & 4 deletions src/function/vector_list_operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ std::unique_ptr<FunctionBindData> ListCreationVectorOperation::bindFunc(
throw BinderException(
"Cannot resolve child data type for " + LIST_CREATION_FUNC_NAME + ".");
}
// TODO(Ziyi): Support list of structs.
if (arguments[0]->getDataType().getTypeID() == common::STRUCT) {
throw BinderException("Cannot create a list of structs.");
}
for (auto i = 1u; i < arguments.size(); i++) {
if (arguments[i]->getDataType() != arguments[0]->getDataType()) {
throw BinderException(getListFunctionIncompatibleChildrenTypeErrorMsg(
Expand Down
4 changes: 0 additions & 4 deletions src/include/common/data_chunk/data_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ class DataChunk {

void insert(uint32_t pos, std::shared_ptr<ValueVector> valueVector);

inline void addValueVector(std::shared_ptr<ValueVector> valueVector) {
valueVectors.push_back(valueVector);
}

inline uint32_t getNumValueVectors() const { return valueVectors.size(); }

inline std::shared_ptr<ValueVector> getValueVector(uint64_t valueVectorPos) {
Expand Down
1 change: 1 addition & 0 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ using vector_idx_t = uint32_t;
constexpr vector_idx_t INVALID_VECTOR_IDX = UINT32_MAX;
using block_idx_t = uint64_t;
using field_idx_t = uint64_t;
using struct_entry_t = int64_t;

// System representation for a variable-sized overflow value.
struct overflow_value_t {
Expand Down
9 changes: 5 additions & 4 deletions src/include/common/vector/auxiliary_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ class StringAuxiliaryBuffer : public AuxiliaryBuffer {

class StructAuxiliaryBuffer : public AuxiliaryBuffer {
public:
StructAuxiliaryBuffer() = default;
StructAuxiliaryBuffer(const DataType& type, storage::MemoryManager* memoryManager);

inline void addChildVector(std::shared_ptr<ValueVector> valueVector) {
childrenVectors.emplace_back(std::move(valueVector));
inline void referenceChildVector(
vector_idx_t idx, std::shared_ptr<ValueVector> vectorToReference) {
childrenVectors[idx] = std::move(vectorToReference);
}
inline const std::vector<std::shared_ptr<ValueVector>>& getChildrenVectors() const {
return childrenVectors;
Expand All @@ -51,7 +52,7 @@ class StructAuxiliaryBuffer : public AuxiliaryBuffer {
// contiguous subsequence of elements in this vector.
class ListAuxiliaryBuffer : public AuxiliaryBuffer {
public:
ListAuxiliaryBuffer(DataType& dataVectorType, storage::MemoryManager* memoryManager);
ListAuxiliaryBuffer(const DataType& dataVectorType, storage::MemoryManager* memoryManager);

inline ValueVector* getDataVector() const { return dataVector.get(); }

Expand Down
Loading

0 comments on commit 0cec208

Please sign in to comment.