Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Physical Type ARRAY #3175

Merged
merged 1 commit into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions extension/duckdb_scanner/src/duckdb_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ void getDuckDBVectorConversionFunc(PhysicalTypeID physicalTypeID,
case PhysicalTypeID::INTERVAL: {
conversion_func = convertDuckDBVectorToVector<interval_t>;
} break;
case PhysicalTypeID::ARRAY:
manh9203 marked this conversation as resolved.
Show resolved Hide resolved
case PhysicalTypeID::LIST: {
conversion_func = convertDuckDBVectorToVector<list_entry_t>;
} break;
Expand Down
70 changes: 39 additions & 31 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ std::string PhysicalTypeUtils::physicalTypeToString(PhysicalTypeID physicalType)
return "STRUCT";
case PhysicalTypeID::LIST:
return "LIST";
case PhysicalTypeID::ARRAY:
return "ARRAY";
case PhysicalTypeID::POINTER:
return "POINTER";
default:
Expand Down Expand Up @@ -96,8 +98,12 @@ uint32_t PhysicalTypeUtils::getFixedTypeSize(PhysicalTypeID physicalType) {
}
}

bool ListTypeInfo::operator==(const ListTypeInfo& other) const {
return *childType == *other.childType;
bool ListTypeInfo::operator==(const ExtraTypeInfo& other) const {
auto otherListTypeInfo = ku_dynamic_cast<const ExtraTypeInfo*, const ListTypeInfo*>(&other);
if (otherListTypeInfo) {
return *childType == *otherListTypeInfo->childType;
}
return false;
}

std::unique_ptr<ExtraTypeInfo> ListTypeInfo::copy() const {
Expand All @@ -112,8 +118,13 @@ void ListTypeInfo::serializeInternal(Serializer& serializer) const {
childType->serialize(serializer);
}

bool ArrayTypeInfo::operator==(const ArrayTypeInfo& other) const {
return *childType == *other.childType && numElements == other.numElements;
bool ArrayTypeInfo::operator==(const ExtraTypeInfo& other) const {
auto otherArrayTypeInfo = ku_dynamic_cast<const ExtraTypeInfo*, const ArrayTypeInfo*>(&other);
if (otherArrayTypeInfo) {
return *childType == *otherArrayTypeInfo->childType &&
numElements == otherArrayTypeInfo->numElements;
}
return false;
}

std::unique_ptr<ExtraTypeInfo> ArrayTypeInfo::deserialize(Deserializer& deserializer) {
Expand Down Expand Up @@ -223,16 +234,20 @@ std::vector<const StructField*> StructTypeInfo::getStructFields() const {
return structFields;
}

bool StructTypeInfo::operator==(const StructTypeInfo& other) const {
if (fields.size() != other.fields.size()) {
return false;
}
for (auto i = 0u; i < fields.size(); ++i) {
if (fields[i] != other.fields[i]) {
bool StructTypeInfo::operator==(const ExtraTypeInfo& other) const {
auto otherStructTypeInfo = ku_dynamic_cast<const ExtraTypeInfo*, const StructTypeInfo*>(&other);
if (otherStructTypeInfo) {
if (fields.size() != otherStructTypeInfo->fields.size()) {
return false;
}
for (auto i = 0u; i < fields.size(); ++i) {
if (fields[i] != otherStructTypeInfo->fields[i]) {
return false;
}
}
return true;
}
return true;
return false;
}

std::unique_ptr<ExtraTypeInfo> StructTypeInfo::deserialize(Deserializer& deserializer) {
Expand All @@ -257,6 +272,7 @@ LogicalType::LogicalType(LogicalTypeID typeID) : typeID{typeID}, extraTypeInfo{n
physicalType = getPhysicalType(typeID);
// Complex types should not use this constructor as they need extra type information
KU_ASSERT(physicalType != PhysicalTypeID::LIST);
KU_ASSERT(physicalType != PhysicalTypeID::ARRAY);
// Node/Rel types are exempted due to some complex code in bind_graph_pattern.cpp
KU_ASSERT(physicalType != PhysicalTypeID::STRUCT || typeID == LogicalTypeID::NODE ||
typeID == LogicalTypeID::REL || typeID == LogicalTypeID::RECURSIVE_REL);
Expand Down Expand Up @@ -285,21 +301,10 @@ bool LogicalType::operator==(const LogicalType& other) const {
if (typeID != other.typeID) {
return false;
}
switch (other.getPhysicalType()) {
case PhysicalTypeID::LIST:
if (typeID == LogicalTypeID::ARRAY) {
return *ku_dynamic_cast<ExtraTypeInfo*, ArrayTypeInfo*>(extraTypeInfo.get()) ==
*ku_dynamic_cast<ExtraTypeInfo*, ArrayTypeInfo*>(other.extraTypeInfo.get());
} else {
return *ku_dynamic_cast<ExtraTypeInfo*, ListTypeInfo*>(extraTypeInfo.get()) ==
*ku_dynamic_cast<ExtraTypeInfo*, ListTypeInfo*>(other.extraTypeInfo.get());
}
case PhysicalTypeID::STRUCT:
return *ku_dynamic_cast<ExtraTypeInfo*, StructTypeInfo*>(extraTypeInfo.get()) ==
*ku_dynamic_cast<ExtraTypeInfo*, StructTypeInfo*>(other.extraTypeInfo.get());
default:
return true;
if (extraTypeInfo) {
return *extraTypeInfo == *other.extraTypeInfo;
}
return true;
}

bool LogicalType::operator!=(const LogicalType& other) const {
Expand Down Expand Up @@ -389,6 +394,7 @@ void LogicalType::serialize(Serializer& serializer) const {
serializer.serializeValue(physicalType);
switch (physicalType) {
case PhysicalTypeID::LIST:
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::STRUCT:
extraTypeInfo->serialize(serializer);
default:
Expand All @@ -404,11 +410,10 @@ std::unique_ptr<LogicalType> LogicalType::deserialize(Deserializer& deserializer
std::unique_ptr<ExtraTypeInfo> extraTypeInfo;
switch (physicalType) {
case PhysicalTypeID::LIST: {
if (typeID == LogicalTypeID::ARRAY) {
extraTypeInfo = ArrayTypeInfo::deserialize(deserializer);
} else {
extraTypeInfo = ListTypeInfo::deserialize(deserializer);
}
extraTypeInfo = ListTypeInfo::deserialize(deserializer);
} break;
case PhysicalTypeID::ARRAY: {
extraTypeInfo = ArrayTypeInfo::deserialize(deserializer);
} break;
case PhysicalTypeID::STRUCT: {
extraTypeInfo = StructTypeInfo::deserialize(deserializer);
Expand Down Expand Up @@ -516,11 +521,13 @@ PhysicalTypeID LogicalType::getPhysicalType(LogicalTypeID typeID) {
case LogicalTypeID::STRING: {
return PhysicalTypeID::STRING;
} break;
case LogicalTypeID::ARRAY:
case LogicalTypeID::MAP:
case LogicalTypeID::LIST: {
return PhysicalTypeID::LIST;
} break;
case LogicalTypeID::ARRAY: {
return PhysicalTypeID::ARRAY;
} break;
case LogicalTypeID::NODE:
case LogicalTypeID::REL:
case LogicalTypeID::RECURSIVE_REL:
Expand Down Expand Up @@ -721,6 +728,7 @@ uint32_t LogicalTypeUtils::getRowLayoutSize(const LogicalType& type) {
case PhysicalTypeID::STRING: {
return sizeof(ku_string_t);
}
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
return sizeof(ku_list_t);
}
Expand Down
3 changes: 3 additions & 0 deletions src/common/types/value/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ void Value::copyValueFrom(const Value& other) {
case PhysicalTypeID::STRING: {
strVal = other.strVal;
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST:
case PhysicalTypeID::STRUCT: {
for (auto& child : other.children) {
Expand Down Expand Up @@ -624,6 +625,7 @@ void Value::serialize(Serializer& serializer) const {
case PhysicalTypeID::STRING: {
serializer.serializeValue(strVal);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST:
case PhysicalTypeID::STRUCT: {
for (auto i = 0u; i < childrenSize; ++i) {
Expand Down Expand Up @@ -688,6 +690,7 @@ std::unique_ptr<Value> Value::deserialize(Deserializer& deserializer) {
case PhysicalTypeID::STRING: {
deserializer.deserializeValue(val->strVal);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST:
case PhysicalTypeID::STRUCT: {
deserializer.deserializeVectorOfPtrs(val->children);
Expand Down
3 changes: 3 additions & 0 deletions src/common/vector/auxiliary_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ std::unique_ptr<AuxiliaryBuffer> AuxiliaryBufferFactory::getAuxiliaryBuffer(Logi
return std::make_unique<StructAuxiliaryBuffer>(type, memoryManager);
case PhysicalTypeID::LIST:
return std::make_unique<ListAuxiliaryBuffer>(*ListType::getChildType(&type), memoryManager);
case PhysicalTypeID::ARRAY:
return std::make_unique<ListAuxiliaryBuffer>(*ArrayType::getChildType(&type),
memoryManager);
default:
return nullptr;
}
Expand Down
10 changes: 9 additions & 1 deletion src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ void ValueVector::copyFromRowData(uint32_t pos, const uint8_t* rowData) {
case PhysicalTypeID::STRUCT: {
StructVector::copyFromRowData(this, pos, rowData);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
ListVector::copyFromRowData(this, pos, rowData);
} break;
Expand All @@ -93,6 +94,7 @@ void ValueVector::copyToRowData(uint32_t pos, uint8_t* rowData,
case PhysicalTypeID::STRUCT: {
StructVector::copyToRowData(this, pos, rowData, rowOverflowBuffer);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
ListVector::copyToRowData(this, pos, rowData, rowOverflowBuffer);
} break;
Expand All @@ -113,6 +115,7 @@ void ValueVector::copyFromVectorData(uint8_t* dstData, const ValueVector* srcVec
case PhysicalTypeID::STRUCT: {
StructVector::copyFromVectorData(this, dstData, srcVector, srcVectorData);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
ListVector::copyFromVectorData(this, dstData, srcVector, srcVectorData);
} break;
Expand Down Expand Up @@ -185,6 +188,7 @@ void ValueVector::copyFromValue(uint64_t pos, const Value& value) {
StringVector::addString(this, *(ku_string_t*)dstValue, value.strVal.data(),
value.strVal.length());
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
auto listEntry = reinterpret_cast<list_entry_t*>(dstValue);
auto numValues = NestedVal::getChildrenSize(&value);
Expand Down Expand Up @@ -259,6 +263,7 @@ std::unique_ptr<Value> ValueVector::getAsValue(uint64_t pos) const {
case PhysicalTypeID::STRING: {
value->strVal = getValue<ku_string_t>(pos).getAsString();
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
auto dataVector = ListVector::getDataVector(this);
auto listEntry = getValue<list_entry_t>(pos);
Expand Down Expand Up @@ -294,6 +299,7 @@ void ValueVector::resetAuxiliaryBuffer() {
->resetOverflowBuffer();
return;
}
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
auto listAuxiliaryBuffer =
ku_dynamic_cast<AuxiliaryBuffer*, ListAuxiliaryBuffer*>(auxiliaryBuffer.get());
Expand Down Expand Up @@ -322,6 +328,7 @@ uint32_t ValueVector::getDataTypeSize(const LogicalType& type) {
case PhysicalTypeID::STRUCT: {
return sizeof(struct_entry_t);
}
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
return sizeof(list_entry_t);
}
Expand Down Expand Up @@ -504,7 +511,8 @@ void StringVector::copyToRowData(const ValueVector* vector, uint32_t pos, uint8_
}

void ListVector::copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData) {
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST);
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::LIST ||
vector->dataType.getPhysicalType() == PhysicalTypeID::ARRAY);
auto& srcKuList = *(ku_list_t*)rowData;
auto srcNullBytes = reinterpret_cast<uint8_t*>(srcKuList.overflowPtr);
auto srcListValues = srcNullBytes + NullBuffer::getNumBytesForNullValues(srcKuList.size);
Expand Down
64 changes: 34 additions & 30 deletions src/function/cast/cast_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,12 @@ bool CastArrayHelper::containsListToArray(const LogicalType* srcType, const Logi
if (checkCompatibleNestedTypes(srcType->getLogicalTypeID(), dstType->getLogicalTypeID())) {
switch (srcType->getPhysicalType()) {
case PhysicalTypeID::LIST: {
auto srcChildType = (srcType->getLogicalTypeID() == LogicalTypeID::ARRAY) ?
ArrayType::getChildType(srcType) :
ListType::getChildType(srcType);
auto dstChildType = (dstType->getLogicalTypeID() == LogicalTypeID::ARRAY) ?
ArrayType::getChildType(dstType) :
ListType::getChildType(dstType);
return containsListToArray(srcChildType, dstChildType);
return containsListToArray(ListType::getChildType(srcType),
ListType::getChildType(dstType));
}
case PhysicalTypeID::ARRAY: {
return containsListToArray(ArrayType::getChildType(srcType),
ListType::getChildType(dstType));
}
case PhysicalTypeID::STRUCT: {
auto srcFieldTypes = StructType::getFieldTypes(srcType);
Expand Down Expand Up @@ -79,35 +78,40 @@ void CastArrayHelper::validateListEntry(ValueVector* inputVector, LogicalType* r
auto inputType = inputVector->dataType;

switch (resultType->getPhysicalType()) {
case PhysicalTypeID::LIST: {
case PhysicalTypeID::ARRAY: {
if (inputType.getPhysicalType() == PhysicalTypeID::LIST) {
if (inputType.getLogicalTypeID() == LogicalTypeID::ARRAY &&
resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) {
if (ArrayType::getNumElements(&inputType) !=
ArrayType::getNumElements(resultType)) {
throw ConversionException(
stringFormat("Unsupported casting function from {} to {}.",
inputType.toString(), resultType->toString()));
}
auto listEntry = inputVector->getValue<list_entry_t>(pos);
if (listEntry.size != ArrayType::getNumElements(resultType)) {
throw ConversionException{
stringFormat("Unsupported casting LIST with incorrect list entry to ARRAY. "
"Expected: {}, Actual: {}.",
ArrayType::getNumElements(resultType),
inputVector->getValue<list_entry_t>(pos).size)};
}
if (inputType.getLogicalTypeID() == LogicalTypeID::LIST &&
resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) {
auto listEntry = inputVector->getValue<list_entry_t>(pos);
if (listEntry.size != ArrayType::getNumElements(resultType)) {
throw ConversionException{
stringFormat("Unsupported casting LIST with incorrect list entry to ARRAY. "
"Expected: {}, Actual: {}.",
ArrayType::getNumElements(resultType),
inputVector->getValue<list_entry_t>(pos).size)};
}
auto inputChildVector = ListVector::getDataVector(inputVector);
for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) {
validateListEntry(inputChildVector, ArrayType::getChildType(resultType), i);
}
} else if (inputType.getPhysicalType() == PhysicalTypeID::ARRAY) {
if (ArrayType::getNumElements(&inputType) != ArrayType::getNumElements(resultType)) {
throw ConversionException(
stringFormat("Unsupported casting function from {} to {}.",
inputType.toString(), resultType->toString()));
}
auto listEntry = inputVector->getValue<list_entry_t>(pos);
auto inputChildVector = ListVector::getDataVector(inputVector);
for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) {
validateListEntry(inputChildVector, ArrayType::getChildType(resultType), i);
}
}
} break;
case PhysicalTypeID::LIST: {
if (inputType.getPhysicalType() == PhysicalTypeID::LIST ||
inputType.getPhysicalType() == PhysicalTypeID::ARRAY) {
auto listEntry = inputVector->getValue<list_entry_t>(pos);
auto inputChildVector = ListVector::getDataVector(inputVector);
auto resultChildType = (resultType->getLogicalTypeID() == LogicalTypeID::ARRAY) ?
ArrayType::getChildType(resultType) :
ListType::getChildType(resultType);
for (auto i = listEntry.offset; i < listEntry.offset + listEntry.size; i++) {
validateListEntry(inputChildVector, resultChildType, i);
validateListEntry(inputChildVector, ListType::getChildType(resultType), i);
}
}
} break;
Expand Down
1 change: 1 addition & 0 deletions src/function/comparison_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ static void executeNestedOperation(uint8_t& result, ValueVector* leftVector,
rightVector->getValue<internalID_t>(rightPos), result, nullptr /* left */,
nullptr /* right */);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
OP::operation(leftVector->getValue<list_entry_t>(leftPos),
rightVector->getValue<list_entry_t>(rightPos), result, leftVector, rightVector);
Expand Down
1 change: 1 addition & 0 deletions src/function/table/call/storage_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ struct StorageInfoSharedState final : public CallFuncSharedState {
result.push_back(dictionary.getDataColumn());
result.push_back(dictionary.getOffsetColumn());
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
auto listColumn = ku_dynamic_cast<Column*, ListColumn*>(column);
result.push_back(listColumn->getDataColumn());
Expand Down
6 changes: 4 additions & 2 deletions src/function/vector_cast_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ static void resolveNestedVector(std::shared_ptr<ValueVector> inputVector, ValueV
auto inputType = &inputVector->dataType;
auto resultType = &resultVector->dataType;
while (true) {
if (inputType->getPhysicalType() == PhysicalTypeID::LIST &&
resultType->getPhysicalType() == PhysicalTypeID::LIST) {
if ((inputType->getPhysicalType() == PhysicalTypeID::LIST ||
inputType->getPhysicalType() == PhysicalTypeID::ARRAY) &&
(resultType->getPhysicalType() == PhysicalTypeID::LIST ||
resultType->getPhysicalType() == PhysicalTypeID::ARRAY)) {
// copy data and nullmask from input
memcpy(resultVector->getData(), inputVector->getData(),
numOfEntries * resultVector->getNumBytesPerValue());
Expand Down
1 change: 1 addition & 0 deletions src/function/vector_hash_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ void VectorHashFunction::computeHash(ValueVector* operand, ValueVector* result)
case PhysicalTypeID::STRUCT: {
computeStructVecHash(operand, result);
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
computeListVectorHash(operand, result);
} break;
Expand Down
2 changes: 2 additions & 0 deletions src/function/vector_list_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ static scalar_func_exec_t getBinaryListExecFuncSwitchRight(const LogicalType& ri
execFunc = ScalarFunction::BinaryExecListStructFunction<list_entry_t, internalID_t,
RESULT_TYPE, OPERATION>;
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
execFunc = ScalarFunction::BinaryExecListStructFunction<list_entry_t, list_entry_t,
RESULT_TYPE, OPERATION>;
Expand Down Expand Up @@ -400,6 +401,7 @@ static std::unique_ptr<FunctionBindData> ListExtractBindFunc(
scalarFunction->execFunc =
BinaryExecListExtractFunction<list_entry_t, int64_t, ku_string_t, ListExtract>;
} break;
case PhysicalTypeID::ARRAY:
case PhysicalTypeID::LIST: {
scalarFunction->execFunc =
BinaryExecListExtractFunction<list_entry_t, int64_t, list_entry_t, ListExtract>;
Expand Down
Loading
Loading