Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove DataTypeInfo and use LogicalType and column names #2539

Merged
merged 11 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ nodejs:
$(call run-cmake-release, -DBUILD_NODEJS=TRUE)

python:
$(call run-cmake-release, -DBUILD_PYTHON=TRUE)
$(call run-cmake-debug, -DBUILD_PYTHON=TRUE)
mxwli marked this conversation as resolved.
Show resolved Hide resolved

rust:
ifeq ($(OS),Windows_NT)
Expand Down
4 changes: 2 additions & 2 deletions src/common/arrow/arrow_array_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ static void scanArrowArrayFixedList(const ArrowSchema* schema, const ArrowArray*
ValueVector& outputVector, ArrowNullMaskTree* mask, uint64_t srcOffset, uint64_t dstOffset,
uint64_t count) {
mask->copyToValueVector(&outputVector, dstOffset, count);
int64_t numValuesInList = FixedListType::getNumValuesInList(&outputVector.dataType);
int64_t numValuesInList = ArrayType::getNumValuesInList(&outputVector.dataType);
ArrowConverter::fromArrowArray(schema->children[0], array->children[0], outputVector,
mask->getChild(0), srcOffset * numValuesInList, dstOffset * numValuesInList,
count * numValuesInList);
Expand Down Expand Up @@ -499,7 +499,7 @@ void ArrowConverter::fromArrowArray(const ArrowSchema* schema, const ArrowArray*
return scanArrowArrayList<int64_t>(schema, array, outputVector, mask, srcOffset,
dstOffset, count);
case 'w':
// FIXED_LIST
// ARRAY
// TODO Manh: Array Scanning
KU_UNREACHABLE;
// return scanArrowArrayFixedList(
Expand Down
66 changes: 46 additions & 20 deletions src/common/arrow/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ void ArrowConverter::initializeChild(ArrowSchema& child, const std::string& name
}

void ArrowConverter::setArrowFormatForStruct(ArrowSchemaHolder& rootHolder, ArrowSchema& child,
const main::DataTypeInfo& typeInfo) {
auto& childrenTypesInfo = typeInfo.childrenTypesInfo;
const LogicalType& dataType) {
child.format = "+s";
// name is set by parent.
child.n_children = (std::int64_t)childrenTypesInfo.size();
child.n_children = (std::int64_t)StructType::getNumFields(&dataType);
rootHolder.nestedChildren.emplace_back();
rootHolder.nestedChildren.back().resize(child.n_children);
rootHolder.nestedChildrenPtr.emplace_back();
Expand All @@ -57,14 +56,36 @@ void ArrowConverter::setArrowFormatForStruct(ArrowSchemaHolder& rootHolder, Arro
child.children = &rootHolder.nestedChildrenPtr.back()[0];
for (auto i = 0u; i < child.n_children; i++) {
initializeChild(*child.children[i]);
child.children[i]->name = copyName(rootHolder, childrenTypesInfo[i]->name);
setArrowFormat(rootHolder, *child.children[i], *childrenTypesInfo[i]);
auto structField = StructType::getField(&dataType, i);
child.children[i]->name = copyName(rootHolder, structField->getName());
setArrowFormat(rootHolder, *child.children[i], *structField->getType());
}
}

void ArrowConverter::setArrowFormatForInternalID(ArrowSchemaHolder& rootHolder, ArrowSchema& child,
const LogicalType& /*dataType*/) {
child.format = "+s";
// name is set by parent.
child.n_children = 2;
rootHolder.nestedChildren.emplace_back();
rootHolder.nestedChildren.back().resize(child.n_children);
rootHolder.nestedChildrenPtr.emplace_back();
rootHolder.nestedChildrenPtr.back().resize(child.n_children);
for (auto i = 0u; i < child.n_children; i++) {
rootHolder.nestedChildrenPtr.back()[i] = &rootHolder.nestedChildren.back()[i];
}
child.children = &rootHolder.nestedChildrenPtr.back()[0];
initializeChild(*child.children[0]);
child.children[0]->name = copyName(rootHolder, "table");
setArrowFormat(rootHolder, *child.children[0], *LogicalType::INT64());
initializeChild(*child.children[1]);
child.children[1]->name = copyName(rootHolder, "offset");
setArrowFormat(rootHolder, *child.children[1], *LogicalType::INT64());
}

void ArrowConverter::setArrowFormat(ArrowSchemaHolder& rootHolder, ArrowSchema& child,
const main::DataTypeInfo& typeInfo) {
switch (typeInfo.typeID) {
const LogicalType& dataType) {
switch (dataType.getLogicalTypeID()) {
case LogicalTypeID::BOOL: {
child.format = "b";
} break;
Expand Down Expand Up @@ -137,10 +158,10 @@ void ArrowConverter::setArrowFormat(ArrowSchemaHolder& rootHolder, ArrowSchema&
initializeChild(rootHolder.nestedChildren.back()[0]);
child.children = &rootHolder.nestedChildrenPtr.back()[0];
child.children[0]->name = "l";
setArrowFormat(rootHolder, **child.children, *typeInfo.childrenTypesInfo[0]);
setArrowFormat(rootHolder, **child.children, *ListType::getChildType(&dataType));
} break;
case LogicalTypeID::ARRAY: {
auto numValuesPerArray = "+w:" + std::to_string(typeInfo.fixedNumValues);
auto numValuesPerArray = "+w:" + std::to_string(ArrayType::getNumElements(&dataType));
child.format = copyName(rootHolder, numValuesPerArray);
child.n_children = 1;
rootHolder.nestedChildren.emplace_back();
Expand All @@ -150,25 +171,27 @@ void ArrowConverter::setArrowFormat(ArrowSchemaHolder& rootHolder, ArrowSchema&
initializeChild(rootHolder.nestedChildren.back()[0]);
child.children = &rootHolder.nestedChildrenPtr.back()[0];
child.children[0]->name = "l";
setArrowFormat(rootHolder, **child.children, *typeInfo.childrenTypesInfo[0]);
setArrowFormat(rootHolder, **child.children, *ArrayType::getChildType(&dataType));
} break;
case LogicalTypeID::STRUCT:
case LogicalTypeID::INTERNAL_ID:
case LogicalTypeID::NODE:
case LogicalTypeID::REL: {
setArrowFormatForStruct(rootHolder, child, typeInfo);
} break;
case LogicalTypeID::REL:
setArrowFormatForStruct(rootHolder, child, dataType);
break;
case LogicalTypeID::INTERNAL_ID:
setArrowFormatForInternalID(rootHolder, child, dataType);
break;
default:
KU_UNREACHABLE;
}
}

std::unique_ptr<ArrowSchema> ArrowConverter::toArrowSchema(
const std::vector<std::unique_ptr<main::DataTypeInfo>>& typesInfo) {
const std::vector<LogicalType>& dataTypes, const std::vector<std::string>& columnNames) {
auto outSchema = std::make_unique<ArrowSchema>();
auto rootHolder = std::make_unique<ArrowSchemaHolder>();

auto columnCount = (int64_t)typesInfo.size();
auto columnCount = (int64_t)dataTypes.size();
rootHolder->children.resize(columnCount);
rootHolder->childrenPtrs.resize(columnCount);
for (auto i = 0u; i < columnCount; i++) {
Expand All @@ -186,8 +209,8 @@ std::unique_ptr<ArrowSchema> ArrowConverter::toArrowSchema(
for (auto i = 0u; i < columnCount; i++) {
auto& child = rootHolder->children[i];
initializeChild(child);
child.name = copyName(*rootHolder, typesInfo[i]->name);
setArrowFormat(*rootHolder, child, *typesInfo[i]);
child.name = copyName(*rootHolder, columnNames[i]);
setArrowFormat(*rootHolder, child, dataTypes[i]);
}

outSchema->private_data = rootHolder.release();
Expand All @@ -197,8 +220,11 @@ std::unique_ptr<ArrowSchema> ArrowConverter::toArrowSchema(

void ArrowConverter::toArrowArray(main::QueryResult& queryResult, ArrowArray* outArray,
std::int64_t chunkSize) {
auto typesInfo = queryResult.getColumnTypesInfo();
auto rowBatch = make_unique<ArrowRowBatch>(std::move(typesInfo), chunkSize);
common::logical_types_t types;
for (auto type : queryResult.getColumnDataTypes()) {
types.push_back(std::make_unique<LogicalType>(type));
}
auto rowBatch = make_unique<ArrowRowBatch>(std::move(types), chunkSize);
*outArray = rowBatch->append(queryResult, chunkSize);
}

Expand Down
Loading
Loading