Skip to content

Commit

Permalink
fix rebase errors
Browse files Browse the repository at this point in the history
  • Loading branch information
mxwli committed Apr 8, 2024
1 parent 76da8a8 commit 46dbb69
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 28 deletions.
4 changes: 2 additions & 2 deletions src/common/arrow/arrow_array_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ static void scanArrowArrayFixedList(const ArrowSchema* schema, const ArrowArray*
ValueVector& outputVector, ArrowNullMaskTree* mask, uint64_t srcOffset, uint64_t dstOffset,
uint64_t count) {
mask->copyToValueVector(&outputVector, dstOffset, count);
int64_t numValuesInList = FixedListType::getNumValuesInList(&outputVector.dataType);
int64_t numValuesInList = ArrayType::getNumValuesInList(&outputVector.dataType);
ArrowConverter::fromArrowArray(schema->children[0], array->children[0], outputVector,
mask->getChild(0), srcOffset * numValuesInList, dstOffset * numValuesInList,
count * numValuesInList);
Expand Down Expand Up @@ -499,7 +499,7 @@ void ArrowConverter::fromArrowArray(const ArrowSchema* schema, const ArrowArray*
return scanArrowArrayList<int64_t>(schema, array, outputVector, mask, srcOffset,
dstOffset, count);
case 'w':
// FIXED_LIST
// ARRAY
// TODO Manh: Array Scanning
KU_UNREACHABLE;
// return scanArrowArrayFixedList(
Expand Down
7 changes: 4 additions & 3 deletions src/common/arrow/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,11 @@ void ArrowConverter::setArrowFormat(
initializeChild(rootHolder.nestedChildren.back()[0]);
child.children = &rootHolder.nestedChildrenPtr.back()[0];
child.children[0]->name = "l";
setArrowFormat(rootHolder, **child.children, *VarListType::getChildType(&dataType));
setArrowFormat(rootHolder, **child.children, *ListType::getChildType(&dataType));
} break;
case LogicalTypeID::ARRAY: {
auto numValuesPerArray = "+w:" + std::to_string(typeInfo.fixedNumValues);
auto numValuesPerArray =
"+w:" + std::to_string(ArrayType::getNumElements(&dataType));
child.format = copyName(rootHolder, numValuesPerArray);
child.n_children = 1;
rootHolder.nestedChildren.emplace_back();
Expand All @@ -150,7 +151,7 @@ void ArrowConverter::setArrowFormat(
initializeChild(rootHolder.nestedChildren.back()[0]);
child.children = &rootHolder.nestedChildrenPtr.back()[0];
child.children[0]->name = "l";
setArrowFormat(rootHolder, **child.children, *FixedListType::getChildType(&dataType));
setArrowFormat(rootHolder, **child.children, *ArrayType::getChildType(&dataType));
} break;
case LogicalTypeID::STRUCT:
case LogicalTypeID::INTERNAL_ID:
Expand Down
42 changes: 21 additions & 21 deletions src/common/arrow/arrow_row_batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ void ArrowRowBatch::templateInitializeVector<LogicalTypeID::STRING>(
}

template<>
void ArrowRowBatch::templateInitializeVector<LogicalTypeID::VAR_LIST>(
void ArrowRowBatch::templateInitializeVector<LogicalTypeID::LIST>(
ArrowVector* vector, const LogicalType& type, std::int64_t capacity) {
initializeNullBits(vector->validity, capacity);
auto childType = *VarListType::getChildType(&type);
auto childType = *ListType::getChildType(&type);
// Initialize offsets and child buffer.
vector->data.reserve((capacity + 1) * sizeof(std::uint32_t));
((std::uint32_t*)vector->data.data())[0] = 0;
Expand Down Expand Up @@ -187,7 +187,7 @@ std::unique_ptr<ArrowVector> ArrowRowBatch::createVector(
// LCOV_EXCL_START
throw common::RuntimeException{
common::stringFormat("Unsupported type: {} for arrow conversion.",
LogicalTypeUtils::toString(typeInfo.typeID))};
type.toString())};
// LCOV_EXCL_STOP
}
}
Expand Down Expand Up @@ -249,7 +249,7 @@ void ArrowRowBatch::templateCopyNonNullValue<LogicalTypeID::STRING>(

template<>
void ArrowRowBatch::templateCopyNonNullValue<LogicalTypeID::UUID>(ArrowVector* vector,
const main::LogicalType& /*type*/, Value* value, std::int64_t pos) {
const LogicalType& /*type*/, Value* value, std::int64_t pos) {
auto offsets = (std::uint32_t*)vector->data.data();
auto str = UUID::toString(value->val.int128Val);
auto strLength = str.length();
Expand All @@ -260,7 +260,7 @@ void ArrowRowBatch::templateCopyNonNullValue<LogicalTypeID::UUID>(ArrowVector* v

template<>
void ArrowRowBatch::templateCopyNonNullValue<LogicalTypeID::LIST>(ArrowVector* vector,
const main::LogicalType& type, Value* value, std::int64_t pos) {
const LogicalType& type, Value* value, std::int64_t pos) {
vector->data.resize((pos + 2) * sizeof(std::uint32_t));
auto offsets = (std::uint32_t*)vector->data.data();
auto numElements = value->childrenSize;
Expand All @@ -277,25 +277,25 @@ void ArrowRowBatch::templateCopyNonNullValue<LogicalTypeID::LIST>(ArrowVector* v
// value into it
// If vector->childData[0] is an ARRAY, its data buffer is supposed to be empty,
// so we don't resize it here
if (VarListType::getChildType(&type)->getLogicalTypeID() != LogicalTypeID::LIST &&
VarListType::getChildType(&type)->getLogicalTypeID() != LogicalTypeID::ARRAY) {
if (ListType::getChildType(&type)->getLogicalTypeID() != LogicalTypeID::LIST &&
ListType::getChildType(&type)->getLogicalTypeID() != LogicalTypeID::ARRAY) {
vector->childData[0]->data.resize(
numChildElements * storage::StorageUtils::getDataTypeSize(LogicalType{
VarListType::getChildType(&type)->getLogicalTypeID()}));
ListType::getChildType(&type)->getLogicalTypeID()}));
}
for (auto i = 0u; i < numElements; i++) {
appendValue(vector->childData[0].get(), *VarListType::getChildType(&type),
appendValue(vector->childData[0].get(), *ListType::getChildType(&type),
value->children[i].get());
}
}

template<>
void ArrowRowBatch::templateCopyNonNullValue<LogicalTypeID::ARRAY>(ArrowVector* vector,
const LogicalType& type, Value* value, std::int64_t pos) {
auto numValuesPerList = value->childrenSize;
auto numValuesInChild = numElements * (pos + 1);
auto numValuesPerArray = ArrayType::getNumElements(&type);
auto numValuesInChild = numValuesPerArray * (pos + 1);
auto currentNumBytesForChildValidity = vector->childData[0]->validity.size();
auto numBytesForChildValidity = getNumBytesForBits(numChildElements);
auto numBytesForChildValidity = getNumBytesForBits(numValuesPerArray);
vector->childData[0]->validity.resize(numBytesForChildValidity);
// Initialize validity mask which is used to mark each value is valid (non-null) or not (null).
for (auto i = currentNumBytesForChildValidity; i < numBytesForChildValidity; i++) {
Expand All @@ -305,14 +305,14 @@ void ArrowRowBatch::templateCopyNonNullValue<LogicalTypeID::ARRAY>(ArrowVector*
// value into it
// If vector->childData[0] is an ARRAY, its data buffer is supposed to be empty,
// so we don't resize it here
if (VarListType::getChildType(&type)->getLogicalTypeID() != LogicalTypeID::LIST &&
VarListType::getChildType(&type)->getLogicalTypeID() != LogicalTypeID::ARRAY) {
if (ListType::getChildType(&type)->getLogicalTypeID() != LogicalTypeID::LIST &&
ListType::getChildType(&type)->getLogicalTypeID() != LogicalTypeID::ARRAY) {
vector->childData[0]->data.resize(
numChildElements * storage::StorageUtils::getDataTypeSize(LogicalType{
VarListType::getChildType(&type)->getLogicalTypeID()}));
numValuesPerArray * storage::StorageUtils::getDataTypeSize(LogicalType{
ListType::getChildType(&type)->getLogicalTypeID()}));
}
for (auto i = 0u; i < numElements; i++) {
appendValue(vector->childData[0].get(), *VarListType::getChildType(&type),
for (auto i = 0u; i < numValuesPerArray; i++) {
appendValue(vector->childData[0].get(), *ArrayType::getChildType(&type),
value->children[i].get());
}
}
Expand Down Expand Up @@ -382,7 +382,7 @@ void ArrowRowBatch::copyNonNullValue(
templateCopyNonNullValue<LogicalTypeID::INT128>(vector, type, value, pos);
} break;
case LogicalTypeID::UUID: {
templateCopyNonNullValue<LogicalTypeID::UUID>(vector, typeInfo, value, pos);
templateCopyNonNullValue<LogicalTypeID::UUID>(vector, type, value, pos);
} break;
case LogicalTypeID::INT64: {
templateCopyNonNullValue<LogicalTypeID::INT64>(vector, type, value, pos);
Expand Down Expand Up @@ -634,14 +634,14 @@ ArrowArray* ArrowRowBatch::templateCreateArray<LogicalTypeID::STRING>(
}

template<>
ArrowArray* ArrowRowBatch::templateCreateArray<LogicalTypeID::VAR_LIST>(
ArrowArray* ArrowRowBatch::templateCreateArray<LogicalTypeID::LIST>(
ArrowVector& vector, const LogicalType& type) {
auto result = createArrayFromVector(vector);
vector.childPointers.resize(1);
result->children = vector.childPointers.data();
result->n_children = 1;
vector.childPointers[0] =
convertVectorToArray(*vector.childData[0], *VarListType::getChildType(&type));
convertVectorToArray(*vector.childData[0], *ListType::getChildType(&type));
vector.array = std::move(result);
return vector.array.get();
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/arrow/arrow_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ LogicalType ArrowConverter::fromArrowSchema(const ArrowSchema* schema) {
case 'w':
throw RuntimeException("Fixed list is currently WIP.");
// TODO Manh: Array Binding
// return *LogicalType::FIXED_LIST(
// return *LogicalType::ARRAY(
// std::make_unique<LogicalType>(fromArrowSchema(schema->children[0])),
// std::stoi(arrowType+3));
case 's':
Expand Down
2 changes: 1 addition & 1 deletion tools/python_api/src_cpp/py_query_result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ bool PyQueryResult::getNextArrowChunk(const std::vector<kuzu::common::LogicalTyp

auto batchImportFunc = importCache->pyarrow.lib.RecordBatch._import_from_c();

auto schema = ArrowConverter::toArrowSchema(typesInfo);
auto schema = ArrowConverter::toArrowSchema(types, names);
batches.append(batchImportFunc((std::uint64_t)&data, (std::uint64_t)schema.get()));
return true;
}
Expand Down

0 comments on commit 46dbb69

Please sign in to comment.