Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to struct of list #1518

Merged
merged 1 commit into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/common/data_chunk/data_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace kuzu {
namespace common {

void DataChunk::insert(uint32_t pos, std::shared_ptr<ValueVector> valueVector) {
valueVector->setState(this->state);
valueVector->setState(state);
assert(valueVectors.size() > pos);
valueVectors[pos] = std::move(valueVector);
}
Expand Down
13 changes: 11 additions & 2 deletions src/common/vector/auxiliary_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,17 @@ void StringAuxiliaryBuffer::addString(
InMemOverflowBufferUtils::copyString(value, len, entry, *inMemOverflowBuffer);
}

StructAuxiliaryBuffer::StructAuxiliaryBuffer(
const DataType& type, storage::MemoryManager* memoryManager) {
auto structTypeInfo = reinterpret_cast<StructTypeInfo*>(type.getExtraTypeInfo());
childrenVectors.reserve(structTypeInfo->getChildrenTypes().size());
for (auto structFieldType : structTypeInfo->getChildrenTypes()) {
childrenVectors.push_back(std::make_shared<ValueVector>(*structFieldType, memoryManager));
}
}

ListAuxiliaryBuffer::ListAuxiliaryBuffer(
kuzu::common::DataType& dataVectorType, storage::MemoryManager* memoryManager)
const DataType& dataVectorType, storage::MemoryManager* memoryManager)
: capacity{common::DEFAULT_VECTOR_CAPACITY}, size{0}, dataVector{std::make_unique<ValueVector>(
dataVectorType, memoryManager)} {}

Expand All @@ -41,7 +50,7 @@ std::unique_ptr<AuxiliaryBuffer> AuxiliaryBufferFactory::getAuxiliaryBuffer(
case STRING:
return std::make_unique<StringAuxiliaryBuffer>(memoryManager);
case STRUCT:
return std::make_unique<StructAuxiliaryBuffer>();
return std::make_unique<StructAuxiliaryBuffer>(type, memoryManager);
case VAR_LIST:
return std::make_unique<ListAuxiliaryBuffer>(*type.getChildType(), memoryManager);
default:
Expand Down
48 changes: 36 additions & 12 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
#include "common/vector/value_vector.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/auxiliary_buffer.h"
#include "common/vector/value_vector_utils.h"

namespace kuzu {
namespace common {

ValueVector::ValueVector(DataType dataType, storage::MemoryManager* memoryManager)
: dataType{std::move(dataType)} {
// TODO(Ziyi): remove this if/else statement once we removed the ku_list.
numBytesPerValue = this->dataType.typeID == VAR_LIST ? sizeof(common::list_entry_t) :
Types::getDataTypeSize(this->dataType);
valueBuffer = std::make_unique<uint8_t[]>(numBytesPerValue * DEFAULT_VECTOR_CAPACITY);
setNumBytesPerValue();
initializeValueBuffer();
nullMask = std::make_unique<NullMask>();
auxiliaryBuffer = AuxiliaryBufferFactory::getAuxiliaryBuffer(this->dataType, memoryManager);
}

void ValueVector::setState(std::shared_ptr<DataChunkState> state) {
this->state = state;
if (dataType.typeID == STRUCT) {
auto childrenVectors = StructVector::getChildrenVectors(this);
for (auto childVector : childrenVectors) {
childVector->setState(state);
}
}
}

bool NodeIDVector::discardNull(ValueVector& vector) {
if (vector.hasNoNullsGuarantee()) {
return true;
Expand Down Expand Up @@ -45,16 +51,34 @@ void ValueVector::setValue(uint32_t pos, T val) {
((T*)valueBuffer.get())[pos] = val;
}

template<>
void ValueVector::setValue(uint32_t pos, common::list_entry_t val) {
((list_entry_t*)valueBuffer.get())[pos] = val;
}

template<>
void ValueVector::setValue(uint32_t pos, std::string val) {
StringVector::addString(this, pos, val.data(), val.length());
}

void ValueVector::setNumBytesPerValue() {
switch (dataType.typeID) {
case STRUCT: {
numBytesPerValue = sizeof(struct_entry_t);
} break;
case VAR_LIST: {
numBytesPerValue = sizeof(list_entry_t);
} break;
default: {
numBytesPerValue = Types::getDataTypeSize(dataType);
}
}
}

void ValueVector::initializeValueBuffer() {
valueBuffer = std::make_unique<uint8_t[]>(numBytesPerValue * DEFAULT_VECTOR_CAPACITY);
if (dataType.typeID == STRUCT) {
// For struct valueVectors, each struct_entry_t stores its current position in the
// valueVector.
StructVector::initializeEntries(this);
}
}

template void ValueVector::setValue<nodeID_t>(uint32_t pos, nodeID_t val);
template void ValueVector::setValue<bool>(uint32_t pos, bool val);
template void ValueVector::setValue<int64_t>(uint32_t pos, int64_t val);
Expand All @@ -64,7 +88,7 @@ template void ValueVector::setValue<date_t>(uint32_t pos, date_t val);
template void ValueVector::setValue<timestamp_t>(uint32_t pos, timestamp_t val);
template void ValueVector::setValue<interval_t>(uint32_t pos, interval_t val);
template void ValueVector::setValue<ku_string_t>(uint32_t pos, ku_string_t val);
template void ValueVector::setValue<ku_list_t>(uint32_t pos, ku_list_t val);
template void ValueVector::setValue<list_entry_t>(uint32_t pos, list_entry_t val);

} // namespace common
} // namespace kuzu
103 changes: 39 additions & 64 deletions src/common/vector/value_vector_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,24 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeIntoPos(
case STRUCT: {
for (auto& childVector : StructVector::getChildrenVectors(&resultVector)) {
copyNonNullDataWithSameTypeIntoPos(*childVector, pos, srcData);
srcData += childVector->getNumBytesPerValue();
srcData += Types::getDataTypeSize(childVector->dataType);
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
}
} break;
case VAR_LIST: {
copyKuListToVector(resultVector, pos, *reinterpret_cast<const ku_list_t*>(srcData));
auto srcKuList = *(ku_list_t*)srcData;
auto srcListValues = reinterpret_cast<uint8_t*>(srcKuList.overflowPtr);
auto dstListEntry = ListVector::addList(&resultVector, srcKuList.size);
resultVector.setValue<list_entry_t>(pos, dstListEntry);
auto resultDataVector = common::ListVector::getDataVector(&resultVector);
for (auto i = 0u; i < srcKuList.size; i++) {
copyNonNullDataWithSameTypeIntoPos(
*resultDataVector, dstListEntry.offset + i, srcListValues);
srcListValues += Types::getDataTypeSize(resultDataVector->dataType);
}
} break;
default: {
copyNonNullDataWithSameType(resultVector.dataType, srcData,
resultVector.getData() + pos * resultVector.getNumBytesPerValue(),
resultVector.getData() + pos * Types::getDataTypeSize(resultVector.dataType),
*StringVector::getInMemOverflowBuffer(&resultVector));
}
}
Expand All @@ -31,17 +40,27 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeOutFromPos(const ValueVector&
case STRUCT: {
for (auto& childVector : StructVector::getChildrenVectors(&srcVector)) {
copyNonNullDataWithSameTypeOutFromPos(*childVector, pos, dstData, dstOverflowBuffer);
dstData += childVector->getNumBytesPerValue();
dstData += Types::getDataTypeSize(childVector->dataType);
}
} break;
case VAR_LIST: {
auto kuList = ValueVectorUtils::convertListEntryToKuList(srcVector, pos, dstOverflowBuffer);
memcpy(dstData, &kuList, sizeof(kuList));

auto srcListEntry = srcVector.getValue<list_entry_t>(pos);
auto srcListDataVector = common::ListVector::getDataVector(&srcVector);
ku_list_t dstList;
dstList.size = srcListEntry.size;
InMemOverflowBufferUtils::allocateSpaceForList(dstList,
Types::getDataTypeSize(srcListDataVector->dataType) * dstList.size, dstOverflowBuffer);
for (auto i = 0u; i < srcListEntry.size; i++) {
copyNonNullDataWithSameTypeOutFromPos(*srcListDataVector, srcListEntry.offset + i,
reinterpret_cast<uint8_t*>(dstList.overflowPtr) +
i * Types::getDataTypeSize(srcListDataVector->dataType),
dstOverflowBuffer);
}
memcpy(dstData, &dstList, sizeof(dstList));
} break;
default: {
copyNonNullDataWithSameType(srcVector.dataType,
srcVector.getData() + pos * srcVector.getNumBytesPerValue(), dstData,
srcVector.getData() + pos * Types::getDataTypeSize(srcVector.dataType), dstData,
dstOverflowBuffer);
}
}
Expand All @@ -64,6 +83,18 @@ void ValueVectorUtils::copyValue(uint8_t* dstValue, common::ValueVector& dstVect
dstValues += numBytesPerValue;
}
} break;
case STRUCT: {
auto srcFields = common::StructVector::getChildrenVectors(&srcVector);
auto dstFields = common::StructVector::getChildrenVectors(&dstVector);
auto srcPos = *(int64_t*)srcValue;
auto dstPos = *(int64_t*)dstValue;
for (auto i = 0u; i < srcFields.size(); i++) {
auto srcField = srcFields[i];
auto dstField = dstFields[i];
copyValue(dstField->getData() + dstField->getNumBytesPerValue() * dstPos, *dstField,
srcField->getData() + srcField->getNumBytesPerValue() * srcPos, *srcField);
}
} break;
case STRING: {
common::InMemOverflowBufferUtils::copyString(*(common::ku_string_t*)srcValue,
*(common::ku_string_t*)dstValue, *StringVector::getInMemOverflowBuffer(&dstVector));
Expand All @@ -76,66 +107,10 @@ void ValueVectorUtils::copyValue(uint8_t* dstValue, common::ValueVector& dstVect

void ValueVectorUtils::copyNonNullDataWithSameType(const DataType& dataType, const uint8_t* srcData,
uint8_t* dstData, InMemOverflowBuffer& inMemOverflowBuffer) {
assert(dataType.typeID != STRUCT);
if (dataType.typeID == STRING) {
InMemOverflowBufferUtils::copyString(
*(ku_string_t*)srcData, *(ku_string_t*)dstData, inMemOverflowBuffer);
} else {
memcpy(dstData, srcData, Types::getDataTypeSize(dataType));
}
}

ku_list_t ValueVectorUtils::convertListEntryToKuList(
const ValueVector& srcVector, uint64_t pos, InMemOverflowBuffer& dstOverflowBuffer) {
auto listEntry = srcVector.getValue<list_entry_t>(pos);
auto listValues = ListVector::getListValues(&srcVector, listEntry);
ku_list_t dstList;
dstList.size = listEntry.size;
InMemOverflowBufferUtils::allocateSpaceForList(dstList,
Types::getDataTypeSize(*srcVector.dataType.getChildType()) * dstList.size,
dstOverflowBuffer);
auto srcDataVector = ListVector::getDataVector(&srcVector);
if (srcDataVector->dataType.typeID == VAR_LIST) {
for (auto i = 0u; i < dstList.size; i++) {
auto kuList =
convertListEntryToKuList(*srcDataVector, listEntry.offset + i, dstOverflowBuffer);
(reinterpret_cast<ku_list_t*>(dstList.overflowPtr))[i] = kuList;
}
} else {
memcpy(reinterpret_cast<uint8_t*>(dstList.overflowPtr), listValues,
srcDataVector->getNumBytesPerValue() * listEntry.size);
if (srcDataVector->dataType.typeID == STRING) {
for (auto i = 0u; i < dstList.size; i++) {
InMemOverflowBufferUtils::copyString(
(reinterpret_cast<ku_string_t*>(listValues))[i],
(reinterpret_cast<ku_string_t*>(dstList.overflowPtr))[i], dstOverflowBuffer);
}
}
}
return dstList;
}

void ValueVectorUtils::copyKuListToVector(
ValueVector& dstVector, uint64_t pos, const ku_list_t& srcList) {
auto srcListValues = reinterpret_cast<uint8_t*>(srcList.overflowPtr);
auto dstListEntry = ListVector::addList(&dstVector, srcList.size);
dstVector.setValue<list_entry_t>(pos, dstListEntry);
if (dstVector.dataType.getChildType()->typeID == VAR_LIST) {
for (auto i = 0u; i < srcList.size; i++) {
ValueVectorUtils::copyKuListToVector(*ListVector::getDataVector(&dstVector),
dstListEntry.offset + i, reinterpret_cast<ku_list_t*>(srcList.overflowPtr)[i]);
}
} else {
auto dstDataVector = ListVector::getDataVector(&dstVector);
auto dstListValues = ListVector::getListValues(&dstVector, dstListEntry);
memcpy(dstListValues, srcListValues, srcList.size * dstDataVector->getNumBytesPerValue());
if (dstDataVector->dataType.getTypeID() == STRING) {
for (auto i = 0u; i < srcList.size; i++) {
InMemOverflowBufferUtils::copyString(
(reinterpret_cast<ku_string_t*>(srcListValues))[i],
(reinterpret_cast<ku_string_t*>(dstListValues))[i],
*StringVector::getInMemOverflowBuffer(dstDataVector));
}
}
}
}
4 changes: 2 additions & 2 deletions src/expression_evaluator/base_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ void BaseExpressionEvaluator::resolveResultStateFromChildren(
for (auto& input : inputEvaluators) {
if (!input->isResultFlat()) {
isResultFlat_ = false;
resultVector->state = input->resultVector->state;
resultVector->setState(input->resultVector->state);
return;
}
}
// All children are flat.
isResultFlat_ = true;
resultVector->state = common::DataChunkState::getSingleValueDataChunkState();
resultVector->setState(common::DataChunkState::getSingleValueDataChunkState());
}

} // namespace evaluator
Expand Down
14 changes: 5 additions & 9 deletions src/expression_evaluator/function_evaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,11 @@ void FunctionExpressionEvaluator::resolveResultVector(
// If the resultVector and inputVector are in different dataChunks, we should create a new
// child valueVector, which shares the state with the resultVector, instead of reusing the
// inputVector.
for (auto& inputEvaluator : inputEvaluators) {
if (inputEvaluator->resultVector->state != resultVector->state) {
auto structFieldVector = std::make_shared<common::ValueVector>(
inputEvaluator->resultVector->dataType, memoryManager);
structFieldVector->state = resultVector->state;
common::StructVector::addChildVector(resultVector.get(), structFieldVector);
} else {
common::StructVector::addChildVector(
resultVector.get(), inputEvaluator->resultVector);
for (auto i = 0u; i < inputEvaluators.size(); i++) {
auto inputEvaluator = inputEvaluators[i];
if (inputEvaluator->resultVector->state == resultVector->state) {
common::StructVector::referenceVector(
resultVector.get(), i, inputEvaluator->resultVector);
}
}
}
Expand Down
4 changes: 0 additions & 4 deletions src/function/vector_list_operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ std::unique_ptr<FunctionBindData> ListCreationVectorOperation::bindFunc(
throw BinderException(
"Cannot resolve child data type for " + LIST_CREATION_FUNC_NAME + ".");
}
// TODO(Ziyi): Support list of structs.
if (arguments[0]->getDataType().getTypeID() == common::STRUCT) {
throw BinderException("Cannot create a list of structs.");
}
for (auto i = 1u; i < arguments.size(); i++) {
if (arguments[i]->getDataType() != arguments[0]->getDataType()) {
throw BinderException(getListFunctionIncompatibleChildrenTypeErrorMsg(
Expand Down
4 changes: 0 additions & 4 deletions src/include/common/data_chunk/data_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ class DataChunk {

void insert(uint32_t pos, std::shared_ptr<ValueVector> valueVector);

inline void addValueVector(std::shared_ptr<ValueVector> valueVector) {
valueVectors.push_back(valueVector);
}

inline uint32_t getNumValueVectors() const { return valueVectors.size(); }

inline std::shared_ptr<ValueVector> getValueVector(uint64_t valueVectorPos) {
Expand Down
1 change: 1 addition & 0 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ using vector_idx_t = uint32_t;
constexpr vector_idx_t INVALID_VECTOR_IDX = UINT32_MAX;
using block_idx_t = uint64_t;
using field_idx_t = uint64_t;
using struct_entry_t = int64_t;

// System representation for a variable-sized overflow value.
struct overflow_value_t {
Expand Down
9 changes: 5 additions & 4 deletions src/include/common/vector/auxiliary_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ class StringAuxiliaryBuffer : public AuxiliaryBuffer {

class StructAuxiliaryBuffer : public AuxiliaryBuffer {
public:
StructAuxiliaryBuffer() = default;
StructAuxiliaryBuffer(const DataType& type, storage::MemoryManager* memoryManager);

inline void addChildVector(std::shared_ptr<ValueVector> valueVector) {
childrenVectors.emplace_back(std::move(valueVector));
inline void referenceChildVector(
vector_idx_t idx, std::shared_ptr<ValueVector> vectorToReference) {
childrenVectors[idx] = std::move(vectorToReference);
}
inline const std::vector<std::shared_ptr<ValueVector>>& getChildrenVectors() const {
return childrenVectors;
Expand All @@ -51,7 +52,7 @@ class StructAuxiliaryBuffer : public AuxiliaryBuffer {
// contiguous subsequence of elements in this vector.
class ListAuxiliaryBuffer : public AuxiliaryBuffer {
public:
ListAuxiliaryBuffer(DataType& dataVectorType, storage::MemoryManager* memoryManager);
ListAuxiliaryBuffer(const DataType& dataVectorType, storage::MemoryManager* memoryManager);

inline ValueVector* getDataVector() const { return dataVector.get(); }

Expand Down
Loading