diff --git a/extension/duckdb_scanner/src/duckdb_scan.cpp b/extension/duckdb_scanner/src/duckdb_scan.cpp index e1ac8454e0..8559c308fb 100644 --- a/extension/duckdb_scanner/src/duckdb_scan.cpp +++ b/extension/duckdb_scanner/src/duckdb_scan.cpp @@ -143,6 +143,7 @@ void getDuckDBVectorConversionFunc(PhysicalTypeID physicalTypeID, case PhysicalTypeID::INTERVAL: { conversion_func = convertDuckDBVectorToVector; } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { conversion_func = convertDuckDBVectorToVector; } break; diff --git a/src/common/types/types.cpp b/src/common/types/types.cpp index 6430ac47f4..69244ede62 100644 --- a/src/common/types/types.cpp +++ b/src/common/types/types.cpp @@ -53,6 +53,8 @@ std::string PhysicalTypeUtils::physicalTypeToString(PhysicalTypeID physicalType) return "STRUCT"; case PhysicalTypeID::LIST: return "LIST"; + case PhysicalTypeID::ARRAY: + return "ARRAY"; case PhysicalTypeID::POINTER: return "POINTER"; default: @@ -96,8 +98,12 @@ uint32_t PhysicalTypeUtils::getFixedTypeSize(PhysicalTypeID physicalType) { } } -bool ListTypeInfo::operator==(const ListTypeInfo& other) const { - return *childType == *other.childType; +bool ListTypeInfo::operator==(const ExtraTypeInfo& other) const { + auto otherListTypeInfo = ku_dynamic_cast(&other); + if (otherListTypeInfo) { + return *childType == *otherListTypeInfo->childType; + } + return false; } std::unique_ptr ListTypeInfo::copy() const { @@ -112,8 +118,13 @@ void ListTypeInfo::serializeInternal(Serializer& serializer) const { childType->serialize(serializer); } -bool ArrayTypeInfo::operator==(const ArrayTypeInfo& other) const { - return *childType == *other.childType && numElements == other.numElements; +bool ArrayTypeInfo::operator==(const ExtraTypeInfo& other) const { + auto otherArrayTypeInfo = ku_dynamic_cast(&other); + if (otherArrayTypeInfo) { + return *childType == *otherArrayTypeInfo->childType && + numElements == otherArrayTypeInfo->numElements; + } + return false; } std::unique_ptr ArrayTypeInfo::deserialize(Deserializer& deserializer) { @@ -223,16 +234,20 @@ std::vector StructTypeInfo::getStructFields() const { return structFields; } -bool StructTypeInfo::operator==(const StructTypeInfo& other) const { - if (fields.size() != other.fields.size()) { - return false; - } - for (auto i = 0u; i < fields.size(); ++i) { - if (fields[i] != other.fields[i]) { +bool StructTypeInfo::operator==(const ExtraTypeInfo& other) const { + auto otherStructTypeInfo = ku_dynamic_cast(&other); + if (otherStructTypeInfo) { + if (fields.size() != otherStructTypeInfo->fields.size()) { return false; } + for (auto i = 0u; i < fields.size(); ++i) { + if (fields[i] != otherStructTypeInfo->fields[i]) { + return false; + } + } + return true; } - return true; + return false; } std::unique_ptr StructTypeInfo::deserialize(Deserializer& deserializer) { @@ -257,6 +272,7 @@ LogicalType::LogicalType(LogicalTypeID typeID) : typeID{typeID}, extraTypeInfo{n physicalType = getPhysicalType(typeID); // Complex types should not use this constructor as they need extra type information KU_ASSERT(physicalType != PhysicalTypeID::LIST); + KU_ASSERT(physicalType != PhysicalTypeID::ARRAY); // Node/Rel types are exempted due to some complex code in bind_graph_pattern.cpp KU_ASSERT(physicalType != PhysicalTypeID::STRUCT || typeID == LogicalTypeID::NODE || typeID == LogicalTypeID::REL || typeID == LogicalTypeID::RECURSIVE_REL); @@ -285,21 +301,10 @@ bool LogicalType::operator==(const LogicalType& other) const { if (typeID != other.typeID) { return false; } - switch (other.getPhysicalType()) { - case PhysicalTypeID::LIST: - if (typeID == LogicalTypeID::ARRAY) { - return *ku_dynamic_cast(extraTypeInfo.get()) == - *ku_dynamic_cast(other.extraTypeInfo.get()); - } else { - return *ku_dynamic_cast(extraTypeInfo.get()) == - *ku_dynamic_cast(other.extraTypeInfo.get()); - } - case PhysicalTypeID::STRUCT: - return *ku_dynamic_cast(extraTypeInfo.get()) == - *ku_dynamic_cast(other.extraTypeInfo.get()); - default: - return true; + if (extraTypeInfo) { + return *extraTypeInfo == *other.extraTypeInfo; } + return true; } bool LogicalType::operator!=(const LogicalType& other) const { @@ -389,6 +394,7 @@ void LogicalType::serialize(Serializer& serializer) const { serializer.serializeValue(physicalType); switch (physicalType) { case PhysicalTypeID::LIST: + case PhysicalTypeID::ARRAY: case PhysicalTypeID::STRUCT: extraTypeInfo->serialize(serializer); default: @@ -404,11 +410,10 @@ std::unique_ptr LogicalType::deserialize(Deserializer& deserializer std::unique_ptr extraTypeInfo; switch (physicalType) { case PhysicalTypeID::LIST: { - if (typeID == LogicalTypeID::ARRAY) { - extraTypeInfo = ArrayTypeInfo::deserialize(deserializer); - } else { - extraTypeInfo = ListTypeInfo::deserialize(deserializer); - } + extraTypeInfo = ListTypeInfo::deserialize(deserializer); + } break; + case PhysicalTypeID::ARRAY: { + extraTypeInfo = ArrayTypeInfo::deserialize(deserializer); } break; case PhysicalTypeID::STRUCT: { extraTypeInfo = StructTypeInfo::deserialize(deserializer); @@ -516,11 +521,13 @@ PhysicalTypeID LogicalType::getPhysicalType(LogicalTypeID typeID) { case LogicalTypeID::STRING: { return PhysicalTypeID::STRING; } break; - case LogicalTypeID::ARRAY: case LogicalTypeID::MAP: case LogicalTypeID::LIST: { return PhysicalTypeID::LIST; } break; + case LogicalTypeID::ARRAY: { + return PhysicalTypeID::ARRAY; + } break; case LogicalTypeID::NODE: case LogicalTypeID::REL: case LogicalTypeID::RECURSIVE_REL: @@ -721,6 +728,7 @@ uint32_t LogicalTypeUtils::getRowLayoutSize(const LogicalType& type) { case PhysicalTypeID::STRING: { return sizeof(ku_string_t); } + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { return sizeof(ku_list_t); } diff --git a/src/common/types/value/value.cpp b/src/common/types/value/value.cpp index 8911e70b63..7e78e3712d 100644 --- a/src/common/types/value/value.cpp +++ b/src/common/types/value/value.cpp @@ -407,6 +407,7 @@ void Value::copyValueFrom(const Value& other) { case PhysicalTypeID::STRING: { strVal = other.strVal; } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::STRUCT: { for (auto& child : other.children) { @@ -624,6 +625,7 @@ void Value::serialize(Serializer& serializer) const { case PhysicalTypeID::STRING: { serializer.serializeValue(strVal); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::STRUCT: { for (auto i = 0u; i < childrenSize; ++i) { @@ -688,6 +690,7 @@ std::unique_ptr Value::deserialize(Deserializer& deserializer) { case PhysicalTypeID::STRING: { deserializer.deserializeValue(val->strVal); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::STRUCT: { deserializer.deserializeVectorOfPtrs(val->children); diff --git a/src/common/vector/auxiliary_buffer.cpp b/src/common/vector/auxiliary_buffer.cpp index e40f021c95..f972211088 100644 --- a/src/common/vector/auxiliary_buffer.cpp +++ b/src/common/vector/auxiliary_buffer.cpp @@ -81,6 +81,9 @@ std::unique_ptr AuxiliaryBufferFactory::getAuxiliaryBuffer(Logi return std::make_unique(type, memoryManager); case PhysicalTypeID::LIST: return std::make_unique(*ListType::getChildType(&type), memoryManager); + case PhysicalTypeID::ARRAY: + return std::make_unique(*ArrayType::getChildType(&type), + memoryManager); default: return nullptr; } diff --git a/src/common/vector/value_vector.cpp b/src/common/vector/value_vector.cpp index 06ebea1423..e997ca5688 100644 --- a/src/common/vector/value_vector.cpp +++ b/src/common/vector/value_vector.cpp @@ -74,6 +74,7 @@ void ValueVector::copyFromRowData(uint32_t pos, const uint8_t* rowData) { case PhysicalTypeID::STRUCT: { StructVector::copyFromRowData(this, pos, rowData); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { ListVector::copyFromRowData(this, pos, rowData); } break; @@ -93,6 +94,7 @@ void ValueVector::copyToRowData(uint32_t pos, uint8_t* rowData, case PhysicalTypeID::STRUCT: { StructVector::copyToRowData(this, pos, rowData, rowOverflowBuffer); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { ListVector::copyToRowData(this, pos, rowData, rowOverflowBuffer); } break; @@ -113,6 +115,7 @@ void ValueVector::copyFromVectorData(uint8_t* dstData, const ValueVector* srcVec case PhysicalTypeID::STRUCT: { StructVector::copyFromVectorData(this, dstData, srcVector, srcVectorData); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { ListVector::copyFromVectorData(this, dstData, srcVector, srcVectorData); } break; @@ -185,6 +188,7 @@ void ValueVector::copyFromValue(uint64_t pos, const Value& value) { StringVector::addString(this, *(ku_string_t*)dstValue, value.strVal.data(), value.strVal.length()); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { auto listEntry = reinterpret_cast(dstValue); auto numValues = NestedVal::getChildrenSize(&value); @@ -259,6 +263,7 @@ std::unique_ptr ValueVector::getAsValue(uint64_t pos) const { case PhysicalTypeID::STRING: { value->strVal = getValue(pos).getAsString(); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { auto dataVector = ListVector::getDataVector(this); auto listEntry = getValue(pos); @@ -294,6 +299,7 @@ void ValueVector::resetAuxiliaryBuffer() { ->resetOverflowBuffer(); return; } + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { auto listAuxiliaryBuffer = ku_dynamic_cast(auxiliaryBuffer.get()); @@ -322,6 +328,7 @@ uint32_t ValueVector::getDataTypeSize(const LogicalType& type) { case PhysicalTypeID::STRUCT: { return sizeof(struct_entry_t); } + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { return sizeof(list_entry_t); } @@ -504,7 +511,8 @@ void StringVector::copyToRowData(const ValueVector* vector, uint32_t pos, uint8_ } void ListVector::copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData) { - KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST || + vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); auto& srcKuList = *(ku_list_t*)rowData; auto srcNullBytes = reinterpret_cast(srcKuList.overflowPtr); auto srcListValues = srcNullBytes + NullBuffer::getNumBytesForNullValues(srcKuList.size); diff --git a/src/function/cast/cast_array.cpp b/src/function/cast/cast_array.cpp index 2131da1c2e..a309c252a9 100644 --- a/src/function/cast/cast_array.cpp +++ b/src/function/cast/cast_array.cpp @@ -41,13 +41,12 @@ bool CastArrayHelper::containsListToArray(const LogicalType* srcType, const Logi if (checkCompatibleNestedTypes(srcType->getLogicalTypeID(), dstType->getLogicalTypeID())) { switch (srcType->getPhysicalType()) { case PhysicalTypeID::LIST: { - auto srcChildType = (srcType->getLogicalTypeID() == LogicalTypeID::ARRAY) ? - ArrayType::getChildType(srcType) : - ListType::getChildType(srcType); - auto dstChildType = (dstType->getLogicalTypeID() == LogicalTypeID::ARRAY) ? - ArrayType::getChildType(dstType) : - ListType::getChildType(dstType); - return containsListToArray(srcChildType, dstChildType); + return containsListToArray(ListType::getChildType(srcType), + ListType::getChildType(dstType)); + } + case PhysicalTypeID::ARRAY: { + return containsListToArray(ArrayType::getChildType(srcType), + ListType::getChildType(dstType)); } case PhysicalTypeID::STRUCT: { auto srcFieldTypes = StructType::getFieldTypes(srcType); @@ -79,35 +78,40 @@ void CastArrayHelper::validateListEntry(ValueVector* inputVector, LogicalType* r auto inputType = inputVector->dataType; switch (resultType->getPhysicalType()) { - case PhysicalTypeID::LIST: { + case PhysicalTypeID::ARRAY: { if (inputType.getPhysicalType() == PhysicalTypeID::LIST) { - if (inputType.getLogicalTypeID() == LogicalTypeID::ARRAY && - resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) { - if (ArrayType::getNumElements(&inputType) != - ArrayType::getNumElements(resultType)) { - throw ConversionException( - stringFormat("Unsupported casting function from {} to {}.", - inputType.toString(), resultType->toString())); - } + auto listEntry = inputVector->getValue(pos); + if (listEntry.size != ArrayType::getNumElements(resultType)) { + throw ConversionException{ + stringFormat("Unsupported casting LIST with incorrect list entry to ARRAY. " + "Expected: {}, Actual: {}.", + ArrayType::getNumElements(resultType), + inputVector->getValue(pos).size)}; } - if (inputType.getLogicalTypeID() == LogicalTypeID::LIST && - resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) { - auto listEntry = inputVector->getValue(pos); - if (listEntry.size != ArrayType::getNumElements(resultType)) { - throw ConversionException{ - stringFormat("Unsupported casting LIST with incorrect list entry to ARRAY. " - "Expected: {}, Actual: {}.", - ArrayType::getNumElements(resultType), - inputVector->getValue(pos).size)}; - } + auto inputChildVector = ListVector::getDataVector(inputVector); + for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) { + validateListEntry(inputChildVector, ArrayType::getChildType(resultType), i); } + } else if (inputType.getPhysicalType() == PhysicalTypeID::ARRAY) { + if (ArrayType::getNumElements(&inputType) != ArrayType::getNumElements(resultType)) { + throw ConversionException( + stringFormat("Unsupported casting function from {} to {}.", + inputType.toString(), resultType->toString())); + } + auto listEntry = inputVector->getValue(pos); + auto inputChildVector = ListVector::getDataVector(inputVector); + for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) { + validateListEntry(inputChildVector, ArrayType::getChildType(resultType), i); + } + } + } break; + case PhysicalTypeID::LIST: { + if (inputType.getPhysicalType() == PhysicalTypeID::LIST || + inputType.getPhysicalType() == PhysicalTypeID::ARRAY) { auto listEntry = inputVector->getValue(pos); auto inputChildVector = ListVector::getDataVector(inputVector); - auto resultChildType = (resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) ? - ArrayType::getChildType(resultType) : - ListType::getChildType(resultType); for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) { - validateListEntry(inputChildVector, resultChildType, i); + validateListEntry(inputChildVector, ListType::getChildType(resultType), i); } } } break; diff --git a/src/function/comparison_functions.cpp b/src/function/comparison_functions.cpp index 12cb9fcd10..13d88d4597 100644 --- a/src/function/comparison_functions.cpp +++ b/src/function/comparison_functions.cpp @@ -86,6 +86,7 @@ static void executeNestedOperation(uint8_t& result, ValueVector* leftVector, rightVector->getValue(rightPos), result, nullptr /* left */, nullptr /* right */); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { OP::operation(leftVector->getValue(leftPos), rightVector->getValue(rightPos), result, leftVector, rightVector); diff --git a/src/function/table/call/storage_info.cpp b/src/function/table/call/storage_info.cpp index 9693f87d1a..029c35e6bf 100644 --- a/src/function/table/call/storage_info.cpp +++ b/src/function/table/call/storage_info.cpp @@ -85,6 +85,7 @@ struct StorageInfoSharedState final : public CallFuncSharedState { result.push_back(dictionary.getDataColumn()); result.push_back(dictionary.getOffsetColumn()); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { auto listColumn = ku_dynamic_cast(column); result.push_back(listColumn->getDataColumn()); diff --git a/src/function/vector_cast_functions.cpp b/src/function/vector_cast_functions.cpp index 9e7fd39709..7c2687c21c 100644 --- a/src/function/vector_cast_functions.cpp +++ b/src/function/vector_cast_functions.cpp @@ -21,8 +21,10 @@ static void resolveNestedVector(std::shared_ptr inputVector, ValueV auto inputType = &inputVector->dataType; auto resultType = &resultVector->dataType; while (true) { - if (inputType->getPhysicalType() == PhysicalTypeID::LIST && - resultType->getPhysicalType() == PhysicalTypeID::LIST) { + if ((inputType->getPhysicalType() == PhysicalTypeID::LIST || + inputType->getPhysicalType() == PhysicalTypeID::ARRAY) && + (resultType->getPhysicalType() == PhysicalTypeID::LIST || + resultType->getPhysicalType() == PhysicalTypeID::ARRAY)) { // copy data and nullmask from input memcpy(resultVector->getData(), inputVector->getData(), numOfEntries * resultVector->getNumBytesPerValue()); diff --git a/src/function/vector_hash_functions.cpp b/src/function/vector_hash_functions.cpp index 5d142f9798..0f9cb2c88e 100644 --- a/src/function/vector_hash_functions.cpp +++ b/src/function/vector_hash_functions.cpp @@ -179,6 +179,7 @@ void VectorHashFunction::computeHash(ValueVector* operand, ValueVector* result) case PhysicalTypeID::STRUCT: { computeStructVecHash(operand, result); } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { computeListVectorHash(operand, result); } break; diff --git a/src/function/vector_list_functions.cpp b/src/function/vector_list_functions.cpp index 87792daf3a..73b51d48e0 100644 --- a/src/function/vector_list_functions.cpp +++ b/src/function/vector_list_functions.cpp @@ -92,6 +92,7 @@ static scalar_func_exec_t getBinaryListExecFuncSwitchRight(const LogicalType& ri execFunc = ScalarFunction::BinaryExecListStructFunction; } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { execFunc = ScalarFunction::BinaryExecListStructFunction; @@ -400,6 +401,7 @@ static std::unique_ptr ListExtractBindFunc( scalarFunction->execFunc = BinaryExecListExtractFunction; } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { scalarFunction->execFunc = BinaryExecListExtractFunction; diff --git a/src/function/vector_map_functions.cpp b/src/function/vector_map_functions.cpp index 60685f70c0..a3530c3198 100644 --- a/src/function/vector_map_functions.cpp +++ b/src/function/vector_map_functions.cpp @@ -102,6 +102,7 @@ static std::unique_ptr MapExtractBindFunc( scalarFunction->execFunc = ScalarFunction::BinaryExecListStructFunction; } break; + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { scalarFunction->execFunc = ScalarFunction::BinaryExecListStructFunction; diff --git a/src/include/common/type_utils.h b/src/include/common/type_utils.h index 5d06b4b4ac..9de870fe0f 100644 --- a/src/include/common/type_utils.h +++ b/src/include/common/type_utils.h @@ -230,6 +230,7 @@ class TypeUtils { return func(internalID_t()); case PhysicalTypeID::STRING: return func(ku_string_t()); + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: return func(list_entry_t()); case PhysicalTypeID::STRUCT: diff --git a/src/include/common/types/types.h b/src/include/common/types/types.h index f97ea73364..d44a926338 100644 --- a/src/include/common/types/types.h +++ b/src/include/common/types/types.h @@ -162,8 +162,9 @@ enum class PhysicalTypeID : uint8_t { // Variable size types. STRING = 20, LIST = 22, - STRUCT = 23, - POINTER = 24, + ARRAY = 23, + STRUCT = 24, + POINTER = 25, }; class LogicalType; @@ -174,6 +175,8 @@ class ExtraTypeInfo { inline void serialize(Serializer& serializer) const { serializeInternal(serializer); } + virtual bool operator==(const ExtraTypeInfo& other) const = 0; + virtual std::unique_ptr copy() const = 0; protected: @@ -186,7 +189,7 @@ class ListTypeInfo : public ExtraTypeInfo { explicit ListTypeInfo(std::unique_ptr childType) : childType{std::move(childType)} {} inline LogicalType* getChildType() const { return childType.get(); } - bool operator==(const ListTypeInfo& other) const; + bool operator==(const ExtraTypeInfo& other) const override; std::unique_ptr copy() const override; static std::unique_ptr deserialize(Deserializer& deserializer); @@ -204,7 +207,7 @@ class ArrayTypeInfo : public ListTypeInfo { explicit ArrayTypeInfo(std::unique_ptr childType, uint64_t numElements) : ListTypeInfo{std::move(childType)}, numElements{numElements} {} inline uint64_t getNumElements() const { return numElements; } - bool operator==(const ArrayTypeInfo& other) const; + bool operator==(const ExtraTypeInfo& other) const override; static std::unique_ptr deserialize(Deserializer& deserializer); std::unique_ptr copy() const override; @@ -253,7 +256,7 @@ class StructTypeInfo : public ExtraTypeInfo { std::vector getChildrenTypes() const; std::vector getChildrenNames() const; std::vector getStructFields() const; - bool operator==(const kuzu::common::StructTypeInfo& other) const; + bool operator==(const ExtraTypeInfo& other) const override; static std::unique_ptr deserialize(Deserializer& deserializer); std::unique_ptr copy() const override; @@ -445,7 +448,8 @@ using logical_type_vec_t = std::vector; struct ListType { static inline LogicalType* getChildType(const LogicalType* type) { - KU_ASSERT(type->getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(type->getPhysicalType() == PhysicalTypeID::LIST || + type->getPhysicalType() == PhysicalTypeID::ARRAY); auto listTypeInfo = reinterpret_cast(type->extraTypeInfo.get()); return listTypeInfo->getChildType(); } @@ -453,13 +457,13 @@ struct ListType { struct ArrayType { static inline LogicalType* getChildType(const LogicalType* type) { - KU_ASSERT(type->getLogicalTypeID() == LogicalTypeID::ARRAY); + KU_ASSERT(type->getPhysicalType() == PhysicalTypeID::ARRAY); auto arrayTypeInfo = reinterpret_cast(type->extraTypeInfo.get()); return arrayTypeInfo->getChildType(); } static inline uint64_t getNumElements(const LogicalType* type) { - KU_ASSERT(type->getLogicalTypeID() == LogicalTypeID::ARRAY); + KU_ASSERT(type->getPhysicalType() == PhysicalTypeID::ARRAY); auto arrayTypeInfo = reinterpret_cast(type->extraTypeInfo.get()); return arrayTypeInfo->getNumElements(); } diff --git a/src/include/common/vector/value_vector.h b/src/include/common/vector/value_vector.h index 5f9afe437e..20d7cd2c72 100644 --- a/src/include/common/vector/value_vector.h +++ b/src/include/common/vector/value_vector.h @@ -146,46 +146,54 @@ struct KUZU_API BlobVector { } }; +// Currently, ListVector is used for both VAR_LIST and ARRAY physical type class KUZU_API ListVector { public: static void setDataVector(const ValueVector* vector, std::shared_ptr dataVector) { - KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST || + vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); auto listBuffer = ku_dynamic_cast(vector->auxiliaryBuffer.get()); listBuffer->setDataVector(std::move(dataVector)); } static ValueVector* getDataVector(const ValueVector* vector) { - KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST || + vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); return ku_dynamic_cast( vector->auxiliaryBuffer.get()) ->getDataVector(); } static std::shared_ptr getSharedDataVector(const ValueVector* vector) { - KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST || + vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); return ku_dynamic_cast( vector->auxiliaryBuffer.get()) ->getSharedDataVector(); } static uint64_t getDataVectorSize(const ValueVector* vector) { - KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST || + vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); return ku_dynamic_cast( vector->auxiliaryBuffer.get()) ->getSize(); } static uint8_t* getListValues(const ValueVector* vector, const list_entry_t& listEntry) { - KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST || + vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); auto dataVector = getDataVector(vector); return dataVector->getData() + dataVector->getNumBytesPerValue() * listEntry.offset; } static uint8_t* getListValuesWithOffset(const ValueVector* vector, const list_entry_t& listEntry, offset_t elementOffsetInList) { - KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST || + vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); return getListValues(vector, listEntry) + elementOffsetInList * getDataVector(vector)->getNumBytesPerValue(); } static list_entry_t addList(ValueVector* vector, uint64_t listSize) { - KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST || + vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); return ku_dynamic_cast( vector->auxiliaryBuffer.get()) ->addList(listSize); diff --git a/src/include/function/comparison/vector_comparison_functions.h b/src/include/function/comparison/vector_comparison_functions.h index 2f7a439a20..a67c5282bd 100644 --- a/src/include/function/comparison/vector_comparison_functions.h +++ b/src/include/function/comparison/vector_comparison_functions.h @@ -112,6 +112,7 @@ struct ComparisonFunction { func = BinaryComparisonExecFunction; } break; + case common::PhysicalTypeID::ARRAY: case common::PhysicalTypeID::LIST: { func = BinaryComparisonExecFunction; @@ -178,6 +179,7 @@ struct ComparisonFunction { case common::PhysicalTypeID::INTERVAL: { func = BinaryComparisonSelectFunction; } break; + case common::PhysicalTypeID::ARRAY: case common::PhysicalTypeID::LIST: { func = BinaryComparisonSelectFunction; } break; diff --git a/src/main/storage_driver.cpp b/src/main/storage_driver.cpp index 4e1cefa797..3b07cd8500 100644 --- a/src/main/storage_driver.cpp +++ b/src/main/storage_driver.cpp @@ -64,7 +64,8 @@ uint64_t StorageDriver::getNumRels(const std::string& relName) { void StorageDriver::scanColumn(Transaction* transaction, storage::Column* column, offset_t* offsets, size_t size, uint8_t* result) { auto dataType = column->getDataType(); - if (dataType.getPhysicalType() == PhysicalTypeID::LIST) { + if (dataType.getPhysicalType() == PhysicalTypeID::LIST || + dataType.getPhysicalType() == PhysicalTypeID::ARRAY) { auto resultVector = ValueVector(dataType); for (auto i = 0u; i < size; ++i) { auto nodeOffset = offsets[i]; diff --git a/src/storage/compression/compression.cpp b/src/storage/compression/compression.cpp index d95e76f166..0f3da80db7 100644 --- a/src/storage/compression/compression.cpp +++ b/src/storage/compression/compression.cpp @@ -33,6 +33,7 @@ uint32_t getDataTypeSizeInChunk(const common::PhysicalTypeID& dataType) { case PhysicalTypeID::STRING: { return sizeof(uint32_t); } + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::INTERNAL_ID: { return sizeof(offset_t); @@ -108,6 +109,7 @@ bool CompressionMetadata::canUpdateInPlace(const uint8_t* data, uint32_t pos, BitpackHeader::readHeader(this->data)); } case PhysicalTypeID::INTERNAL_ID: + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::UINT64: { auto value = reinterpret_cast(data)[pos]; @@ -164,6 +166,7 @@ uint64_t CompressionMetadata::numValues(uint64_t pageSize, const LogicalType& da case PhysicalTypeID::INT8: return IntegerBitpacking::numValues(pageSize, BitpackHeader::readHeader(data)); case PhysicalTypeID::INTERNAL_ID: + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::UINT64: return IntegerBitpacking::numValues(pageSize, @@ -221,6 +224,7 @@ std::optional ConstantCompression::analyze(const ColumnChun } return std::optional(CompressionMetadata(CompressionType::CONSTANT, value)); } + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::STRING: case PhysicalTypeID::INTERNAL_ID: @@ -644,6 +648,7 @@ void ReadCompressedValuesFromPageToVector::operator()(const uint8_t* frame, Page resultVector->getData(), posInVector, numValuesToRead, metadata); } case PhysicalTypeID::INTERNAL_ID: + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::UINT64: { return IntegerBitpacking().decompressFromPage(frame, pageCursor.elemPosInPage, @@ -705,6 +710,7 @@ void ReadCompressedValuesFromPage::operator()(const uint8_t* frame, PageCursor& result, startPosInResult, numValuesToRead, metadata); } case PhysicalTypeID::INTERNAL_ID: + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::UINT64: { return IntegerBitpacking().decompressFromPage(frame, pageCursor.elemPosInPage, @@ -767,6 +773,7 @@ void WriteCompressedValuesToPage::operator()(uint8_t* frame, uint16_t posInFrame posInFrame, numValues, metadata); } case PhysicalTypeID::INTERNAL_ID: + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::UINT64: { return IntegerBitpacking().setValuesFromUncompressed(data, dataOffset, frame, diff --git a/src/storage/stats/table_statistics_collection.cpp b/src/storage/stats/table_statistics_collection.cpp index 565b7608ec..03430abe49 100644 --- a/src/storage/stats/table_statistics_collection.cpp +++ b/src/storage/stats/table_statistics_collection.cpp @@ -103,6 +103,12 @@ std::unique_ptr TablesStatistics::createMetadataDAHInfo( metadataDAHInfo->childrenInfos.push_back( createMetadataDAHInfo(*ListType::getChildType(&dataType), metadataFH, bm, wal)); } break; + case PhysicalTypeID::ARRAY: { + metadataDAHInfo->childrenInfos.push_back( + createMetadataDAHInfo(*LogicalType::UINT32(), metadataFH, bm, wal)); + metadataDAHInfo->childrenInfos.push_back( + createMetadataDAHInfo(*ArrayType::getChildType(&dataType), metadataFH, bm, wal)); + } break; case PhysicalTypeID::STRING: { auto dataMetadataDAHInfo = std::make_unique(); auto offsetMetadataDAHInfo = std::make_unique(); diff --git a/src/storage/storage_utils.cpp b/src/storage/storage_utils.cpp index b918f1e640..40ff08b145 100644 --- a/src/storage/storage_utils.cpp +++ b/src/storage/storage_utils.cpp @@ -86,6 +86,7 @@ uint32_t StorageUtils::getDataTypeSize(PhysicalTypeID type) { case PhysicalTypeID::STRING: { return sizeof(ku_string_t); } + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { return sizeof(ku_list_t); } @@ -104,6 +105,7 @@ uint32_t StorageUtils::getDataTypeSize(const LogicalType& type) { case PhysicalTypeID::STRING: { return sizeof(ku_string_t); } + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { return sizeof(ku_list_t); } diff --git a/src/storage/store/column_chunk.cpp b/src/storage/store/column_chunk.cpp index 0f64270cc2..697f2663dd 100644 --- a/src/storage/store/column_chunk.cpp +++ b/src/storage/store/column_chunk.cpp @@ -126,6 +126,7 @@ static std::shared_ptr getCompression(const LogicalType& dataTyp return std::make_shared>(); } case PhysicalTypeID::INTERNAL_ID: + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::UINT64: { return std::make_shared>(); @@ -182,6 +183,7 @@ void ColumnChunk::initializeFunction() { case PhysicalTypeID::INT16: case PhysicalTypeID::INT8: case PhysicalTypeID::INTERNAL_ID: + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: case PhysicalTypeID::UINT64: case PhysicalTypeID::UINT32: @@ -266,7 +268,8 @@ void ColumnChunk::write(ColumnChunk* chunk, ColumnChunk* dstOffsets, RelMultipli // Thus, an assertion is added at the first line. void ColumnChunk::write(ValueVector* vector, offset_t offsetInVector, offset_t offsetInChunk) { KU_ASSERT(dataType.getPhysicalType() != PhysicalTypeID::BOOL && - dataType.getPhysicalType() != PhysicalTypeID::LIST); + dataType.getPhysicalType() != PhysicalTypeID::LIST && + dataType.getPhysicalType() != PhysicalTypeID::ARRAY); nullChunk->setNull(offsetInChunk, vector->isNull(offsetInVector)); if (offsetInChunk >= numValues) { numValues = offsetInChunk + 1; @@ -641,6 +644,7 @@ std::unique_ptr ColumnChunkFactory::createColumnChunk(LogicalType d return std::make_unique(std::move(dataType), capacity, enableCompression, inMemory); } + case PhysicalTypeID::ARRAY: case PhysicalTypeID::LIST: { return std::make_unique(std::move(dataType), capacity, enableCompression, inMemory); diff --git a/src/storage/store/list_column_chunk.cpp b/src/storage/store/list_column_chunk.cpp index c643a04c0c..17d7baa767 100644 --- a/src/storage/store/list_column_chunk.cpp +++ b/src/storage/store/list_column_chunk.cpp @@ -34,7 +34,8 @@ ListColumnChunk::ListColumnChunk(LogicalType dataType, uint64_t capacity, bool e ColumnChunkFactory::createColumnChunk(*ListType::getChildType(&this->dataType)->copy(), enableCompression, 0 /* capacity */, inMemory)); checkOffsetSortedAsc = false; - KU_ASSERT(this->dataType.getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(this->dataType.getPhysicalType() == PhysicalTypeID::LIST || + this->dataType.getPhysicalType() == PhysicalTypeID::ARRAY); } bool ListColumnChunk::isOffsetsConsecutiveAndSortedAscending(uint64_t startPos, @@ -235,7 +236,8 @@ void ListColumnChunk::write(ValueVector* vector, offset_t offsetInVector, offset void ListColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, offset_t dstOffsetInChunk, offset_t numValuesToCopy) { - KU_ASSERT(srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::LIST || + srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::ARRAY); checkOffsetSortedAsc = true; auto srcListChunk = ku_dynamic_cast(srcChunk); auto offsetInDataChunkToAppend = listDataColumnChunk->getNumValues(); @@ -262,7 +264,8 @@ void ListColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, void ListColumnChunk::copy(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, offset_t dstOffsetInChunk, offset_t numValuesToCopy) { - KU_ASSERT(srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::LIST); + KU_ASSERT(srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::LIST || + srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::ARRAY); KU_ASSERT(dstOffsetInChunk >= numValues); while (numValues < dstOffsetInChunk) { appendNullList();