Skip to content

Commit

Permalink
Add physical type ARRAY (#3175)
Browse files Browse the repository at this point in the history
  • Loading branch information
manh9203 committed Apr 9, 2024
1 parent 0b70b02 commit 4451bf8
Show file tree
Hide file tree
Showing 22 changed files with 157 additions and 84 deletions.
1 change: 1 addition & 0 deletions extension/duckdb_scanner/src/duckdb_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ void getDuckDBVectorConversionFunc(PhysicalTypeID physicalTypeID,
case PhysicalTypeID::INTERVAL: {
conversion_func = convertDuckDBVectorToVector<interval_t>;
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
conversion_func = convertDuckDBVectorToVector<list_entry_t>;
} break;
Expand Down
70 changes: 39 additions & 31 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ std::string PhysicalTypeUtils::physicalTypeToString(PhysicalTypeID physicalType)
return "STRUCT";
case PhysicalTypeID::LIST:
return "LIST";
case PhysicalTypeID::ARRAY:
return "ARRAY";
case PhysicalTypeID::POINTER:
return "POINTER";
default:
Expand Down Expand Up @@ -96,8 +98,12 @@ uint32_t PhysicalTypeUtils::getFixedTypeSize(PhysicalTypeID physicalType) {
}
}

bool ListTypeInfo::operator==(const ListTypeInfo& other) const {
return *childType == *other.childType;
bool ListTypeInfo::operator==(const ExtraTypeInfo& other) const {
auto otherListTypeInfo = ku_dynamic_cast<const ExtraTypeInfo*, const ListTypeInfo*>(&other);
if (otherListTypeInfo) {
return *childType == *otherListTypeInfo->childType;
}
return false;
}

std::unique_ptr<ExtraTypeInfo> ListTypeInfo::copy() const {
Expand All @@ -112,8 +118,13 @@ void ListTypeInfo::serializeInternal(Serializer& serializer) const {
childType->serialize(serializer);
}

bool ArrayTypeInfo::operator==(const ArrayTypeInfo& other) const {
return *childType == *other.childType && numElements == other.numElements;
bool ArrayTypeInfo::operator==(const ExtraTypeInfo& other) const {
auto otherArrayTypeInfo = ku_dynamic_cast<const ExtraTypeInfo*, const ArrayTypeInfo*>(&other);
if (otherArrayTypeInfo) {
return *childType == *otherArrayTypeInfo->childType &&
numElements == otherArrayTypeInfo->numElements;
}
return false;
}

std::unique_ptr<ExtraTypeInfo> ArrayTypeInfo::deserialize(Deserializer& deserializer) {
Expand Down Expand Up @@ -223,16 +234,20 @@ std::vector<const StructField*> StructTypeInfo::getStructFields() const {
return structFields;
}

bool StructTypeInfo::operator==(const StructTypeInfo& other) const {
if (fields.size() != other.fields.size()) {
return false;
}
for (auto i = 0u; i < fields.size(); ++i) {
if (fields[i] != other.fields[i]) {
bool StructTypeInfo::operator==(const ExtraTypeInfo& other) const {
auto otherStructTypeInfo = ku_dynamic_cast<const ExtraTypeInfo*, const StructTypeInfo*>(&other);
if (otherStructTypeInfo) {
if (fields.size() != otherStructTypeInfo->fields.size()) {
return false;
}
for (auto i = 0u; i < fields.size(); ++i) {
if (fields[i] != otherStructTypeInfo->fields[i]) {
return false;
}
}
return true;
}
return true;
return false;
}

std::unique_ptr<ExtraTypeInfo> StructTypeInfo::deserialize(Deserializer& deserializer) {
Expand All @@ -257,6 +272,7 @@ LogicalType::LogicalType(LogicalTypeID typeID) : typeID{typeID}, extraTypeInfo{n
physicalType = getPhysicalType(typeID);
// Complex types should not use this constructor as they need extra type information
KU_ASSERT(physicalType != PhysicalTypeID::LIST);
KU_ASSERT(physicalType != PhysicalTypeID::ARRAY);
// Node/Rel types are exempted due to some complex code in bind_graph_pattern.cpp
KU_ASSERT(physicalType != PhysicalTypeID::STRUCT || typeID == LogicalTypeID::NODE ||
typeID == LogicalTypeID::REL || typeID == LogicalTypeID::RECURSIVE_REL);
Expand Down Expand Up @@ -285,21 +301,10 @@ bool LogicalType::operator==(const LogicalType& other) const {
if (typeID != other.typeID) {
return false;
}
switch (other.getPhysicalType()) {
case PhysicalTypeID::LIST:
if (typeID == LogicalTypeID::ARRAY) {
return *ku_dynamic_cast<ExtraTypeInfo*, ArrayTypeInfo*>(extraTypeInfo.get()) ==
*ku_dynamic_cast<ExtraTypeInfo*, ArrayTypeInfo*>(other.extraTypeInfo.get());
} else {
return *ku_dynamic_cast<ExtraTypeInfo*, ListTypeInfo*>(extraTypeInfo.get()) ==
*ku_dynamic_cast<ExtraTypeInfo*, ListTypeInfo*>(other.extraTypeInfo.get());
}
case PhysicalTypeID::STRUCT:
return *ku_dynamic_cast<ExtraTypeInfo*, StructTypeInfo*>(extraTypeInfo.get()) ==
*ku_dynamic_cast<ExtraTypeInfo*, StructTypeInfo*>(other.extraTypeInfo.get());
default:
return true;
if (extraTypeInfo) {
return *extraTypeInfo == *other.extraTypeInfo;
}
return true;
}

bool LogicalType::operator!=(const LogicalType& other) const {
Expand Down Expand Up @@ -389,6 +394,7 @@ void LogicalType::serialize(Serializer& serializer) const {
serializer.serializeValue(physicalType);
switch (physicalType) {
case PhysicalTypeID::LIST:
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::STRUCT:
extraTypeInfo->serialize(serializer);
default:
Expand All @@ -404,11 +410,10 @@ std::unique_ptr<LogicalType> LogicalType::deserialize(Deserializer& deserializer
std::unique_ptr<ExtraTypeInfo> extraTypeInfo;
switch (physicalType) {
case PhysicalTypeID::LIST: {
if (typeID == LogicalTypeID::ARRAY) {
extraTypeInfo = ArrayTypeInfo::deserialize(deserializer);
} else {
extraTypeInfo = ListTypeInfo::deserialize(deserializer);
}
extraTypeInfo = ListTypeInfo::deserialize(deserializer);
} break;
case PhysicalTypeID::ARRAY: {
extraTypeInfo = ArrayTypeInfo::deserialize(deserializer);
} break;
case PhysicalTypeID::STRUCT: {
extraTypeInfo = StructTypeInfo::deserialize(deserializer);
Expand Down Expand Up @@ -516,11 +521,13 @@ PhysicalTypeID LogicalType::getPhysicalType(LogicalTypeID typeID) {
case LogicalTypeID::STRING: {
return PhysicalTypeID::STRING;
} break;
case LogicalTypeID::ARRAY:
case LogicalTypeID::MAP:
case LogicalTypeID::LIST: {
return PhysicalTypeID::LIST;
} break;
case LogicalTypeID::ARRAY: {
return PhysicalTypeID::ARRAY;
} break;
case LogicalTypeID::NODE:
case LogicalTypeID::REL:
case LogicalTypeID::RECURSIVE_REL:
Expand Down Expand Up @@ -721,6 +728,7 @@ uint32_t LogicalTypeUtils::getRowLayoutSize(const LogicalType& type) {
case PhysicalTypeID::STRING: {
return sizeof(ku_string_t);
}
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
return sizeof(ku_list_t);
}
Expand Down
3 changes: 3 additions & 0 deletions src/common/types/value/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ void Value::copyValueFrom(const Value& other) {
case PhysicalTypeID::STRING: {
strVal = other.strVal;
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST:
case PhysicalTypeID::STRUCT: {
for (auto& child : other.children) {
Expand Down Expand Up @@ -624,6 +625,7 @@ void Value::serialize(Serializer& serializer) const {
case PhysicalTypeID::STRING: {
serializer.serializeValue(strVal);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST:
case PhysicalTypeID::STRUCT: {
for (auto i = 0u; i < childrenSize; ++i) {
Expand Down Expand Up @@ -688,6 +690,7 @@ std::unique_ptr<Value> Value::deserialize(Deserializer& deserializer) {
case PhysicalTypeID::STRING: {
deserializer.deserializeValue(val->strVal);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST:
case PhysicalTypeID::STRUCT: {
deserializer.deserializeVectorOfPtrs(val->children);
Expand Down
3 changes: 3 additions & 0 deletions src/common/vector/auxiliary_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ std::unique_ptr<AuxiliaryBuffer> AuxiliaryBufferFactory::getAuxiliaryBuffer(Logi
return std::make_unique<StructAuxiliaryBuffer>(type, memoryManager);
case PhysicalTypeID::LIST:
return std::make_unique<ListAuxiliaryBuffer>(*ListType::getChildType(&type), memoryManager);
case PhysicalTypeID::ARRAY:
return std::make_unique<ListAuxiliaryBuffer>(*ArrayType::getChildType(&type),
memoryManager);
default:
return nullptr;
}
Expand Down
10 changes: 9 additions & 1 deletion src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ void ValueVector::copyFromRowData(uint32_t pos, const uint8_t* rowData) {
case PhysicalTypeID::STRUCT: {
StructVector::copyFromRowData(this, pos, rowData);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
ListVector::copyFromRowData(this, pos, rowData);
} break;
Expand All @@ -93,6 +94,7 @@ void ValueVector::copyToRowData(uint32_t pos, uint8_t* rowData,
case PhysicalTypeID::STRUCT: {
StructVector::copyToRowData(this, pos, rowData, rowOverflowBuffer);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
ListVector::copyToRowData(this, pos, rowData, rowOverflowBuffer);
} break;
Expand All @@ -113,6 +115,7 @@ void ValueVector::copyFromVectorData(uint8_t* dstData, const ValueVector* srcVec
case PhysicalTypeID::STRUCT: {
StructVector::copyFromVectorData(this, dstData, srcVector, srcVectorData);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
ListVector::copyFromVectorData(this, dstData, srcVector, srcVectorData);
} break;
Expand Down Expand Up @@ -185,6 +188,7 @@ void ValueVector::copyFromValue(uint64_t pos, const Value& value) {
StringVector::addString(this, *(ku_string_t*)dstValue, value.strVal.data(),
value.strVal.length());
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
auto listEntry = reinterpret_cast<list_entry_t*>(dstValue);
auto numValues = NestedVal::getChildrenSize(&value);
Expand Down Expand Up @@ -259,6 +263,7 @@ std::unique_ptr<Value> ValueVector::getAsValue(uint64_t pos) const {
case PhysicalTypeID::STRING: {
value->strVal = getValue<ku_string_t>(pos).getAsString();
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
auto dataVector = ListVector::getDataVector(this);
auto listEntry = getValue<list_entry_t>(pos);
Expand Down Expand Up @@ -294,6 +299,7 @@ void ValueVector::resetAuxiliaryBuffer() {
->resetOverflowBuffer();
return;
}
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
auto listAuxiliaryBuffer =
ku_dynamic_cast<AuxiliaryBuffer*, ListAuxiliaryBuffer*>(auxiliaryBuffer.get());
Expand Down Expand Up @@ -322,6 +328,7 @@ uint32_t ValueVector::getDataTypeSize(const LogicalType& type) {
case PhysicalTypeID::STRUCT: {
return sizeof(struct_entry_t);
}
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
return sizeof(list_entry_t);
}
Expand Down Expand Up @@ -504,7 +511,8 @@ void StringVector::copyToRowData(const ValueVector* vector, uint32_t pos, uint8_
}

void ListVector::copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData) {
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST);
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST ||
vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY);
auto& srcKuList = *(ku_list_t*)rowData;
auto srcNullBytes = reinterpret_cast<uint8_t*>(srcKuList.overflowPtr);
auto srcListValues = srcNullBytes + NullBuffer::getNumBytesForNullValues(srcKuList.size);
Expand Down
64 changes: 34 additions & 30 deletions src/function/cast/cast_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,12 @@ bool CastArrayHelper::containsListToArray(const LogicalType* srcType, const Logi
if (checkCompatibleNestedTypes(srcType->getLogicalTypeID(), dstType->getLogicalTypeID())) {
switch (srcType->getPhysicalType()) {
case PhysicalTypeID::LIST: {
auto srcChildType = (srcType->getLogicalTypeID() == LogicalTypeID::ARRAY) ?
ArrayType::getChildType(srcType) :
ListType::getChildType(srcType);
auto dstChildType = (dstType->getLogicalTypeID() == LogicalTypeID::ARRAY) ?
ArrayType::getChildType(dstType) :
ListType::getChildType(dstType);
return containsListToArray(srcChildType, dstChildType);
return containsListToArray(ListType::getChildType(srcType),
ListType::getChildType(dstType));
}
case PhysicalTypeID::ARRAY: {
return containsListToArray(ArrayType::getChildType(srcType),
ListType::getChildType(dstType));
}
case PhysicalTypeID::STRUCT: {
auto srcFieldTypes = StructType::getFieldTypes(srcType);
Expand Down Expand Up @@ -79,35 +78,40 @@ void CastArrayHelper::validateListEntry(ValueVector* inputVector, LogicalType* r
auto inputType = inputVector->dataType;

switch (resultType->getPhysicalType()) {
case PhysicalTypeID::LIST: {
case PhysicalTypeID::ARRAY: {
if (inputType.getPhysicalType() == PhysicalTypeID::LIST) {
if (inputType.getLogicalTypeID() == LogicalTypeID::ARRAY &&
resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) {
if (ArrayType::getNumElements(&inputType) !=
ArrayType::getNumElements(resultType)) {
throw ConversionException(
stringFormat("Unsupported casting function from {} to {}.",
inputType.toString(), resultType->toString()));
}
auto listEntry = inputVector->getValue<list_entry_t>(pos);
if (listEntry.size != ArrayType::getNumElements(resultType)) {
throw ConversionException{
stringFormat("Unsupported casting LIST with incorrect list entry to ARRAY. "
"Expected: {}, Actual: {}.",
ArrayType::getNumElements(resultType),
inputVector->getValue<list_entry_t>(pos).size)};
}
if (inputType.getLogicalTypeID() == LogicalTypeID::LIST &&
resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) {
auto listEntry = inputVector->getValue<list_entry_t>(pos);
if (listEntry.size != ArrayType::getNumElements(resultType)) {
throw ConversionException{
stringFormat("Unsupported casting LIST with incorrect list entry to ARRAY. "
"Expected: {}, Actual: {}.",
ArrayType::getNumElements(resultType),
inputVector->getValue<list_entry_t>(pos).size)};
}
auto inputChildVector = ListVector::getDataVector(inputVector);
for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) {
validateListEntry(inputChildVector, ArrayType::getChildType(resultType), i);
}
} else if (inputType.getPhysicalType() == PhysicalTypeID::ARRAY) {
if (ArrayType::getNumElements(&inputType) != ArrayType::getNumElements(resultType)) {
throw ConversionException(
stringFormat("Unsupported casting function from {} to {}.",
inputType.toString(), resultType->toString()));
}
auto listEntry = inputVector->getValue<list_entry_t>(pos);
auto inputChildVector = ListVector::getDataVector(inputVector);
for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) {
validateListEntry(inputChildVector, ArrayType::getChildType(resultType), i);
}
}
} break;
case PhysicalTypeID::LIST: {
if (inputType.getPhysicalType() == PhysicalTypeID::LIST ||
inputType.getPhysicalType() == PhysicalTypeID::ARRAY) {
auto listEntry = inputVector->getValue<list_entry_t>(pos);
auto inputChildVector = ListVector::getDataVector(inputVector);
auto resultChildType = (resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) ?
ArrayType::getChildType(resultType) :
ListType::getChildType(resultType);
for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) {
validateListEntry(inputChildVector, resultChildType, i);
validateListEntry(inputChildVector, ListType::getChildType(resultType), i);
}
}
} break;
Expand Down
1 change: 1 addition & 0 deletions src/function/comparison_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ static void executeNestedOperation(uint8_t& result, ValueVector* leftVector,
rightVector->getValue<internalID_t>(rightPos), result, nullptr /* left */,
nullptr /* right */);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
OP::operation(leftVector->getValue<list_entry_t>(leftPos),
rightVector->getValue<list_entry_t>(rightPos), result, leftVector, rightVector);
Expand Down
1 change: 1 addition & 0 deletions src/function/table/call/storage_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ struct StorageInfoSharedState final : public CallFuncSharedState {
result.push_back(dictionary.getDataColumn());
result.push_back(dictionary.getOffsetColumn());
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
auto listColumn = ku_dynamic_cast<Column*, ListColumn*>(column);
result.push_back(listColumn->getDataColumn());
Expand Down
6 changes: 4 additions & 2 deletions src/function/vector_cast_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ static void resolveNestedVector(std::shared_ptr<ValueVector> inputVector, ValueV
auto inputType = &inputVector->dataType;
auto resultType = &resultVector->dataType;
while (true) {
if (inputType->getPhysicalType() == PhysicalTypeID::LIST &&
resultType->getPhysicalType() == PhysicalTypeID::LIST) {
if ((inputType->getPhysicalType() == PhysicalTypeID::LIST ||
inputType->getPhysicalType() == PhysicalTypeID::ARRAY) &&
(resultType->getPhysicalType() == PhysicalTypeID::LIST ||
resultType->getPhysicalType() == PhysicalTypeID::ARRAY)) {
// copy data and nullmask from input
memcpy(resultVector->getData(), inputVector->getData(),
numOfEntries * resultVector->getNumBytesPerValue());
Expand Down
1 change: 1 addition & 0 deletions src/function/vector_hash_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ void VectorHashFunction::computeHash(ValueVector* operand, ValueVector* result)
case PhysicalTypeID::STRUCT: {
computeStructVecHash(operand, result);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
computeListVectorHash(operand, result);
} break;
Expand Down
2 changes: 2 additions & 0 deletions src/function/vector_list_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ static scalar_func_exec_t getBinaryListExecFuncSwitchRight(const LogicalType& ri
execFunc = ScalarFunction::BinaryExecListStructFunction<list_entry_t, internalID_t,
RESULT_TYPE, OPERATION>;
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
execFunc = ScalarFunction::BinaryExecListStructFunction<list_entry_t, list_entry_t,
RESULT_TYPE, OPERATION>;
Expand Down Expand Up @@ -400,6 +401,7 @@ static std::unique_ptr<FunctionBindData> ListExtractBindFunc(
scalarFunction->execFunc =
BinaryExecListExtractFunction<list_entry_t, int64_t, ku_string_t, ListExtract>;
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
scalarFunction->execFunc =
BinaryExecListExtractFunction<list_entry_t, int64_t, list_entry_t, ListExtract>;
Expand Down
Loading

0 comments on commit 4451bf8

Please sign in to comment.