From e159b7a1222a2e69b3879227f036bd259f0f225f Mon Sep 17 00:00:00 2001 From: xiyang Date: Tue, 3 Jan 2023 15:28:09 +0800 Subject: [PATCH] compute schema based on expressionsToMaterialize --- .../logical_operator/logical_hash_join.h | 29 +++-- .../logical_operator/logical_intersect.h | 20 ++-- .../logical_operator/logical_order_by.h | 4 +- .../logical_operator/logical_union.h | 9 +- .../logical_plan/logical_operator/schema.h | 26 +++-- .../logical_plan/logical_operator/sink_util.h | 35 ++---- src/include/processor/mapper/plan_mapper.h | 2 +- src/include/processor/operator/scan_node_id.h | 11 +- src/planner/join_order_enumerator.cpp | 52 +++++---- src/planner/operator/logical_accumulate.cpp | 2 +- .../operator/logical_cross_product.cpp | 2 +- src/planner/operator/logical_hash_join.cpp | 44 +++++-- src/planner/operator/logical_intersect.cpp | 8 +- src/planner/operator/logical_order_by.cpp | 10 +- src/planner/operator/logical_plan_util.cpp | 2 +- src/planner/operator/logical_union.cpp | 8 +- src/planner/operator/schema.cpp | 29 +++-- src/planner/operator/sink_util.cpp | 108 +++++------------- src/planner/query_planner.cpp | 7 +- src/processor/mapper/map_hash_join.cpp | 25 ++-- src/processor/mapper/map_intersect.cpp | 18 ++- src/processor/mapper/map_scan_node.cpp | 4 +- 22 files changed, 210 insertions(+), 245 deletions(-) diff --git a/src/include/planner/logical_plan/logical_operator/logical_hash_join.h b/src/include/planner/logical_plan/logical_operator/logical_hash_join.h index a6951b478a..abdee05b0e 100644 --- a/src/include/planner/logical_plan/logical_operator/logical_hash_join.h +++ b/src/include/planner/logical_plan/logical_operator/logical_hash_join.h @@ -14,28 +14,27 @@ using namespace kuzu::binder; class LogicalHashJoin : public LogicalOperator { public: // Inner and left join. - LogicalHashJoin(vector> joinNodes, JoinType joinType, - bool isProbeAcc, expression_vector expressionsToMaterialize, - shared_ptr probeSideChild, shared_ptr buildSideChild) - : LogicalHashJoin{std::move(joinNodes), joinType, nullptr, UINT32_MAX, isProbeAcc, + LogicalHashJoin(expression_vector joinNodeIDs, JoinType joinType, bool isProbeAcc, + expression_vector expressionsToMaterialize, shared_ptr probeSideChild, + shared_ptr buildSideChild) + : LogicalHashJoin{std::move(joinNodeIDs), joinType, nullptr, UINT32_MAX, isProbeAcc, std::move(expressionsToMaterialize), std::move(probeSideChild), std::move(buildSideChild)} {} // Mark join. - LogicalHashJoin(vector> joinNodes, shared_ptr mark, - uint32_t markPos, bool isProbeAcc, shared_ptr probeSideChild, + LogicalHashJoin(expression_vector joinNodeIDs, shared_ptr mark, uint32_t markPos, + bool isProbeAcc, shared_ptr probeSideChild, shared_ptr buildSideChild) - : LogicalHashJoin{std::move(joinNodes), JoinType::MARK, std::move(mark), markPos, + : LogicalHashJoin{std::move(joinNodeIDs), JoinType::MARK, std::move(mark), markPos, isProbeAcc, expression_vector{} /* expressionsToMaterialize */, std::move(probeSideChild), std::move(buildSideChild)} {} - LogicalHashJoin(vector> joinNodes, JoinType joinType, - shared_ptr mark, uint32_t markPos, bool isProbeAcc, - expression_vector expressionsToMaterialize, shared_ptr probeSideChild, - shared_ptr buildSideChild) + LogicalHashJoin(expression_vector joinNodeIDs, JoinType joinType, shared_ptr mark, + uint32_t markPos, bool isProbeAcc, expression_vector expressionsToMaterialize, + shared_ptr probeSideChild, shared_ptr buildSideChild) : LogicalOperator{LogicalOperatorType::HASH_JOIN, std::move(probeSideChild), std::move(buildSideChild)}, - joinNodes(std::move(joinNodes)), joinType{joinType}, mark{std::move(mark)}, + joinNodeIDs(std::move(joinNodeIDs)), joinType{joinType}, mark{std::move(mark)}, markPos{markPos}, isProbeAcc{isProbeAcc}, expressionsToMaterialize{ std::move(expressionsToMaterialize)} {} @@ -46,7 +45,7 @@ class LogicalHashJoin : public LogicalOperator { inline expression_vector getExpressionsToMaterialize() const { return expressionsToMaterialize; } - inline vector> getJoinNodes() const { return joinNodes; } + inline expression_vector getJoinNodeIDs() const { return joinNodeIDs; } inline JoinType getJoinType() const { return joinType; } inline shared_ptr getMark() const { @@ -57,12 +56,12 @@ class LogicalHashJoin : public LogicalOperator { inline Schema* getBuildSideSchema() const { return children[1]->getSchema(); } inline unique_ptr copy() override { - return make_unique(joinNodes, joinType, mark, markPos, isProbeAcc, + return make_unique(joinNodeIDs, joinType, mark, markPos, isProbeAcc, expressionsToMaterialize, children[0]->copy(), children[1]->copy()); } private: - vector> joinNodes; + expression_vector joinNodeIDs; JoinType joinType; shared_ptr mark; // when joinType is Mark uint32_t markPos; diff --git a/src/include/planner/logical_plan/logical_operator/logical_intersect.h b/src/include/planner/logical_plan/logical_operator/logical_intersect.h index 9ecf34ebf0..442be60192 100644 --- a/src/include/planner/logical_plan/logical_operator/logical_intersect.h +++ b/src/include/planner/logical_plan/logical_operator/logical_intersect.h @@ -8,24 +8,24 @@ namespace kuzu { namespace planner { struct LogicalIntersectBuildInfo { - LogicalIntersectBuildInfo(shared_ptr key, expression_vector expressions) - : key{std::move(key)}, expressionsToMaterialize{std::move(expressions)} {} + LogicalIntersectBuildInfo(shared_ptr keyNodeID, expression_vector expressions) + : keyNodeID{std::move(keyNodeID)}, expressionsToMaterialize{std::move(expressions)} {} inline unique_ptr copy() { - return make_unique(key, expressionsToMaterialize); + return make_unique(keyNodeID, expressionsToMaterialize); } - shared_ptr key; + shared_ptr keyNodeID; expression_vector expressionsToMaterialize; }; class LogicalIntersect : public LogicalOperator { public: - LogicalIntersect(shared_ptr intersectNode, - shared_ptr probeChild, vector> buildChildren, + LogicalIntersect(shared_ptr intersectNodeID, shared_ptr probeChild, + vector> buildChildren, vector> buildInfos) : LogicalOperator{LogicalOperatorType::INTERSECT, std::move(probeChild)}, - intersectNode{std::move(intersectNode)}, buildInfos{std::move(buildInfos)} { + intersectNodeID{std::move(intersectNodeID)}, buildInfos{std::move(buildInfos)} { for (auto& child : buildChildren) { children.push_back(std::move(child)); } @@ -33,9 +33,9 @@ class LogicalIntersect : public LogicalOperator { void computeSchema() override; - string getExpressionsForPrinting() const override { return intersectNode->getRawName(); } + string getExpressionsForPrinting() const override { return intersectNodeID->getRawName(); } - inline shared_ptr getIntersectNode() const { return intersectNode; } + inline shared_ptr getIntersectNodeID() const { return intersectNodeID; } inline LogicalIntersectBuildInfo* getBuildInfo(uint32_t idx) const { return buildInfos[idx].get(); } @@ -44,7 +44,7 @@ class LogicalIntersect : public LogicalOperator { unique_ptr copy() override; private: - shared_ptr intersectNode; + shared_ptr intersectNodeID; vector> buildInfos; }; diff --git a/src/include/planner/logical_plan/logical_operator/logical_order_by.h b/src/include/planner/logical_plan/logical_operator/logical_order_by.h index e117cdee6b..0da4309e35 100644 --- a/src/include/planner/logical_plan/logical_operator/logical_order_by.h +++ b/src/include/planner/logical_plan/logical_operator/logical_order_by.h @@ -15,7 +15,9 @@ class LogicalOrderBy : public LogicalOperator { void computeSchema() override; - string getExpressionsForPrinting() const override; + inline string getExpressionsForPrinting() const override { + return ExpressionUtil::toString(expressionsToOrderBy); + } inline expression_vector getExpressionsToOrderBy() const { return expressionsToOrderBy; } inline vector getIsAscOrders() const { return isAscOrders; } diff --git a/src/include/planner/logical_plan/logical_operator/logical_union.h b/src/include/planner/logical_plan/logical_operator/logical_union.h index 30327d7721..dd1e97c2ac 100644 --- a/src/include/planner/logical_plan/logical_operator/logical_union.h +++ b/src/include/planner/logical_plan/logical_operator/logical_union.h @@ -7,11 +7,9 @@ namespace planner { class LogicalUnion : public LogicalOperator { public: - LogicalUnion(expression_vector expressions, vector> schemasBeforeUnion, - vector> children) + LogicalUnion(expression_vector expressions, vector> children) : LogicalOperator{LogicalOperatorType::UNION_ALL, std::move(children)}, - expressionsToUnion{std::move(expressions)}, schemasBeforeUnion{ - std::move(schemasBeforeUnion)} {} + expressionsToUnion{std::move(expressions)} {} void computeSchema() override; @@ -19,13 +17,12 @@ class LogicalUnion : public LogicalOperator { inline expression_vector getExpressionsToUnion() { return expressionsToUnion; } - inline Schema* getSchemaBeforeUnion(uint32_t idx) { return schemasBeforeUnion[idx].get(); } + inline Schema* getSchemaBeforeUnion(uint32_t idx) { return children[idx]->getSchema(); } unique_ptr copy() override; private: expression_vector expressionsToUnion; - vector> schemasBeforeUnion; }; } // namespace planner diff --git a/src/include/planner/logical_plan/logical_operator/schema.h b/src/include/planner/logical_plan/logical_operator/schema.h index 279b9b6edb..bc1fd1b706 100644 --- a/src/include/planner/logical_plan/logical_operator/schema.h +++ b/src/include/planner/logical_plan/logical_operator/schema.h @@ -10,6 +10,8 @@ using namespace kuzu::common; namespace kuzu { namespace planner { +typedef uint32_t f_group_pos; + class FactorizationGroup { friend class Schema; @@ -56,7 +58,7 @@ class FactorizationGroup { class Schema { public: - inline uint32_t getNumGroups() const { return groups.size(); } + inline f_group_pos getNumGroups() const { return groups.size(); } inline FactorizationGroup* getGroup(shared_ptr expression) const { return getGroup(getGroupPos(expression->getUniqueName())); @@ -68,7 +70,7 @@ class Schema { inline FactorizationGroup* getGroup(uint32_t pos) const { return groups[pos].get(); } - uint32_t createGroup(); + f_group_pos createGroup(); void insertToScope(const shared_ptr& expression, uint32_t groupPos); @@ -76,28 +78,28 @@ class Schema { void insertToGroupAndScope(const expression_vector& expressions, uint32_t groupPos); - inline uint32_t getGroupPos(const Expression& expression) const { + inline f_group_pos getGroupPos(const Expression& expression) const { return getGroupPos(expression.getUniqueName()); } - inline uint32_t getGroupPos(const string& expressionName) const { + inline f_group_pos getGroupPos(const string& expressionName) const { assert(expressionNameToGroupPos.contains(expressionName)); return expressionNameToGroupPos.at(expressionName); } - inline pair getExpressionPos(const Expression& expression) const { + inline pair getExpressionPos(const Expression& expression) const { auto groupPos = getGroupPos(expression); return make_pair(groupPos, groups[groupPos]->getExpressionPos(expression)); } - inline void flattenGroup(uint32_t pos) { groups[pos]->setFlat(); } - inline void setGroupAsSingleState(uint32_t pos) { groups[pos]->setSingleState(); } + inline void flattenGroup(f_group_pos pos) { groups[pos]->setFlat(); } + inline void setGroupAsSingleState(f_group_pos pos) { groups[pos]->setSingleState(); } bool isExpressionInScope(const Expression& expression) const; inline expression_vector getExpressionsInScope() const { return expressionsInScope; } - expression_vector getExpressionsInScope(uint32_t pos) const; + expression_vector getExpressionsInScope(f_group_pos pos) const; expression_vector getSubExpressionsInScope(const shared_ptr& expression); @@ -109,7 +111,7 @@ class Schema { } // Get the group positions containing at least one expression in scope. - unordered_set getGroupsPosInScope() const; + unordered_set getGroupsPosInScope() const; unique_ptr copy() const; @@ -123,5 +125,11 @@ class Schema { expression_vector expressionsInScope; }; +class SchemaUtils { +public: + static vector getExpressionsPerGroup( + const expression_vector& expressions, const Schema& schema); +}; + } // namespace planner } // namespace kuzu diff --git a/src/include/planner/logical_plan/logical_operator/sink_util.h b/src/include/planner/logical_plan/logical_operator/sink_util.h index 10751185a4..f1601f4dab 100644 --- a/src/include/planner/logical_plan/logical_operator/sink_util.h +++ b/src/include/planner/logical_plan/logical_operator/sink_util.h @@ -4,39 +4,22 @@ namespace kuzu { namespace planner { -using namespace kuzu::binder; -// This class contains the logic for re-computing factorization structure after +// This class contains the logic for re-computing factorization structure after sinking class SinkOperatorUtil { public: - static void mergeSchema(const Schema& inputSchema, Schema& result, const vector& keys); + static void mergeSchema(const Schema& inputSchema, const expression_vector& expressionsToMerge, + Schema& resultSchema); - static void mergeSchema(const Schema& inputSchema, Schema& result); - - static void recomputeSchema(const Schema& inputSchema, Schema& result); - - static unordered_set getGroupsPosIgnoringKeyGroups( - const Schema& schema, const vector& keys); + static void recomputeSchema(const Schema& inputSchema, + const expression_vector& expressionsToMerge, Schema& resultSchema); private: - static void mergeKeyGroup(const Schema& inputSchema, Schema& resultSchema, uint32_t keyGroupPos, - const vector& keysInGroup); - - static inline expression_vector getFlatPayloadsIgnoringKeyGroup( - const Schema& schema, const vector& keys) { - return getFlatPayloads(schema, getGroupsPosIgnoringKeyGroups(schema, keys)); - } - static inline expression_vector getFlatPayloads(const Schema& schema) { - return getFlatPayloads(schema, schema.getGroupsPosInScope()); - } - static expression_vector getFlatPayloads( - const Schema& schema, const unordered_set& payloadGroupsPos); + static unordered_map getUnFlatPayloadsPerGroup( + const Schema& schema, const expression_vector& payloads); - static inline bool hasUnFlatPayload(const Schema& schema) { - return hasUnFlatPayload(schema, schema.getGroupsPosInScope()); - } - static bool hasUnFlatPayload( - const Schema& schema, const unordered_set& payloadGroupsPos); + static expression_vector getFlatPayloads( + const Schema& schema, const expression_vector& payloads); static uint32_t appendPayloadsToNewGroup(Schema& schema, expression_vector& payloads); }; diff --git a/src/include/processor/mapper/plan_mapper.h b/src/include/processor/mapper/plan_mapper.h index 3b084f8607..211b902a53 100644 --- a/src/include/processor/mapper/plan_mapper.h +++ b/src/include/processor/mapper/plan_mapper.h @@ -88,7 +88,7 @@ class PlanMapper { const Schema& outSchema, vector& isInputGroupByHashKeyVectorFlat); static BuildDataInfo generateBuildDataInfo(const Schema& buildSideSchema, - const vector>& keys, const expression_vector& payloads); + const expression_vector& keys, const expression_vector& payloads); public: StorageManager& storageManager; diff --git a/src/include/processor/operator/scan_node_id.h b/src/include/processor/operator/scan_node_id.h index 782ba423b7..86f067f5a3 100644 --- a/src/include/processor/operator/scan_node_id.h +++ b/src/include/processor/operator/scan_node_id.h @@ -118,15 +118,14 @@ class ScanNodeIDSharedState { class ScanNodeID : public PhysicalOperator { public: - ScanNodeID(string nodeName, const DataPos& outDataPos, + ScanNodeID(string nodeID, const DataPos& outDataPos, shared_ptr sharedState, uint32_t id, const string& paramsString) : PhysicalOperator{PhysicalOperatorType::SCAN_NODE_ID, id, paramsString}, - nodeName{std::move(nodeName)}, outDataPos{outDataPos}, sharedState{ - std::move(sharedState)} {} + nodeID{std::move(nodeID)}, outDataPos{outDataPos}, sharedState{std::move(sharedState)} {} bool isSource() const override { return true; } - inline string getNodeName() const { return nodeName; } + inline string getNodeID() const { return nodeID; } inline ScanNodeIDSharedState* getSharedState() const { return sharedState.get(); } void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; @@ -134,7 +133,7 @@ class ScanNodeID : public PhysicalOperator { bool getNextTuplesInternal() override; inline unique_ptr clone() override { - return make_unique(nodeName, outDataPos, sharedState, id, paramsString); + return make_unique(nodeID, outDataPos, sharedState, id, paramsString); } private: @@ -144,7 +143,7 @@ class ScanNodeID : public PhysicalOperator { ScanTableNodeIDSharedState* tableState, node_offset_t startOffset, node_offset_t endOffset); private: - string nodeName; + string nodeID; DataPos outDataPos; shared_ptr sharedState; diff --git a/src/planner/join_order_enumerator.cpp b/src/planner/join_order_enumerator.cpp index b56cde5db1..fbcd200fb1 100644 --- a/src/planner/join_order_enumerator.cpp +++ b/src/planner/join_order_enumerator.cpp @@ -592,7 +592,7 @@ void JoinOrderEnumerator::planJoin(const vector>& joi } } -static bool isJoinKeyUniqueOnBuildSide(const string& joinNodeID, LogicalPlan& buildPlan) { +static bool isJoinKeyUniqueOnBuildSide(const Expression& joinNodeID, LogicalPlan& buildPlan) { auto buildSchema = buildPlan.getSchema(); auto numGroupsInScope = buildSchema->getGroupsPosInScope().size(); bool hasProjectedOutGroups = buildSchema->getNumGroups() > numGroupsInScope; @@ -614,7 +614,7 @@ static bool isJoinKeyUniqueOnBuildSide(const string& joinNodeID, LogicalPlan& bu return false; } auto scanNodeID = (LogicalScanNode*)firstop; - if (scanNodeID->getNode()->getInternalIDPropertyName() != joinNodeID) { + if (scanNodeID->getNode()->getInternalIDPropertyName() != joinNodeID.getUniqueName()) { return false; } return true; @@ -622,6 +622,10 @@ static bool isJoinKeyUniqueOnBuildSide(const string& joinNodeID, LogicalPlan& bu void JoinOrderEnumerator::appendHashJoin(const vector>& joinNodes, JoinType joinType, bool isProbeAcc, LogicalPlan& probePlan, LogicalPlan& buildPlan) { + expression_vector joinNodeIDs; + for (auto& joinNode : joinNodes) { + joinNodeIDs.push_back(joinNode->getInternalIDProperty()); + } probePlan.increaseCost(probePlan.getCardinality() + buildPlan.getCardinality()); // Flat probe side key group in either of the following two cases: // 1. there are multiple join nodes; @@ -631,24 +635,21 @@ void JoinOrderEnumerator::appendHashJoin(const vector // TODO(Guodong): when the build side has only flat payloads, we should consider getting rid of // flattening probe key, instead duplicating keys as in vectorized processing if necessary. auto needFlattenProbeJoinKey = false; - needFlattenProbeJoinKey |= joinNodes.size() > 1; - needFlattenProbeJoinKey |= - !isJoinKeyUniqueOnBuildSide(joinNodes[0]->getInternalIDPropertyName(), buildPlan); + needFlattenProbeJoinKey |= joinNodeIDs.size() > 1; + needFlattenProbeJoinKey |= !isJoinKeyUniqueOnBuildSide(*joinNodeIDs[0], buildPlan); if (needFlattenProbeJoinKey) { - for (auto& joinNode : joinNodes) { - auto probeSideKeyGroupPos = - probePlan.getSchema()->getGroupPos(joinNode->getInternalIDPropertyName()); + for (auto& joinNodeID : joinNodeIDs) { + auto probeSideKeyGroupPos = probePlan.getSchema()->getGroupPos(*joinNodeID); QueryPlanner::appendFlattenIfNecessary(probeSideKeyGroupPos, probePlan); } } // Flat all but one build side key groups. unordered_set joinNodesGroupPos; - for (auto& joinNode : joinNodes) { - joinNodesGroupPos.insert( - buildPlan.getSchema()->getGroupPos(joinNode->getInternalIDPropertyName())); + for (auto& joinNodeID : joinNodeIDs) { + joinNodesGroupPos.insert(buildPlan.getSchema()->getGroupPos(*joinNodeID)); } QueryPlanner::appendFlattensButOne(joinNodesGroupPos, buildPlan); - auto hashJoin = make_shared(joinNodes, joinType, isProbeAcc, + auto hashJoin = make_shared(joinNodeIDs, joinType, isProbeAcc, buildPlan.getSchema()->getExpressionsInScope(), probePlan.getLastOperator(), buildPlan.getLastOperator()); hashJoin->computeSchema(); @@ -663,18 +664,20 @@ void JoinOrderEnumerator::appendHashJoin(const vector void JoinOrderEnumerator::appendMarkJoin(const vector>& joinNodes, const shared_ptr& mark, bool isProbeAcc, LogicalPlan& probePlan, LogicalPlan& buildPlan) { + expression_vector joinNodeIDs; + for (auto& joinNode : joinNodes) { + joinNodeIDs.push_back(joinNode->getInternalIDProperty()); + } // Apply flattening all but one on join nodes of both probe and build side. unordered_set joinNodeGroupsPosInProbeSide, joinNodeGroupsPosInBuildSide; - for (auto& joinNode : joinNodes) { - joinNodeGroupsPosInProbeSide.insert( - probePlan.getSchema()->getGroupPos(joinNode->getInternalIDPropertyName())); - joinNodeGroupsPosInBuildSide.insert( - buildPlan.getSchema()->getGroupPos(joinNode->getInternalIDPropertyName())); + for (auto& joinNodeID : joinNodeIDs) { + joinNodeGroupsPosInProbeSide.insert(probePlan.getSchema()->getGroupPos(*joinNodeID)); + joinNodeGroupsPosInBuildSide.insert(buildPlan.getSchema()->getGroupPos(*joinNodeID)); } auto markGroupPos = QueryPlanner::appendFlattensButOne(joinNodeGroupsPosInProbeSide, probePlan); QueryPlanner::appendFlattensButOne(joinNodeGroupsPosInBuildSide, buildPlan); probePlan.increaseCost(probePlan.getCardinality() + buildPlan.getCardinality()); - auto hashJoin = make_shared(joinNodes, mark, markGroupPos, isProbeAcc, + auto hashJoin = make_shared(joinNodeIDs, mark, markGroupPos, isProbeAcc, probePlan.getLastOperator(), buildPlan.getLastOperator()); hashJoin->computeSchema(); probePlan.setLastOperator(std::move(hashJoin)); @@ -683,24 +686,23 @@ void JoinOrderEnumerator::appendMarkJoin(const vector void JoinOrderEnumerator::appendIntersect(const shared_ptr& intersectNode, vector>& boundNodes, LogicalPlan& probePlan, vector>& buildPlans) { - auto intersectNodeID = intersectNode->getInternalIDPropertyName(); + auto intersectNodeID = intersectNode->getInternalIDProperty(); assert(boundNodes.size() == buildPlans.size()); vector> buildChildren; vector> buildInfos; for (auto i = 0u; i < buildPlans.size(); ++i) { - auto boundNode = boundNodes[i]; + auto boundNodeID = boundNodes[i]->getInternalIDProperty(); QueryPlanner::appendFlattenIfNecessary( - probePlan.getSchema()->getGroupPos(boundNode->getInternalIDPropertyName()), probePlan); + probePlan.getSchema()->getGroupPos(*boundNodeID), probePlan); auto buildPlan = buildPlans[i].get(); auto buildSchema = buildPlan->getSchema(); - QueryPlanner::appendFlattenIfNecessary( - buildSchema->getGroupPos(boundNode->getInternalIDPropertyName()), *buildPlan); + QueryPlanner::appendFlattenIfNecessary(buildSchema->getGroupPos(*boundNodeID), *buildPlan); auto expressions = buildSchema->getExpressionsInScope(); - auto buildInfo = make_unique(boundNode, expressions); + auto buildInfo = make_unique(boundNodeID, expressions); buildChildren.push_back(buildPlan->getLastOperator()); buildInfos.push_back(std::move(buildInfo)); } - auto logicalIntersect = make_shared(intersectNode, + auto logicalIntersect = make_shared(intersectNodeID, probePlan.getLastOperator(), std::move(buildChildren), std::move(buildInfos)); logicalIntersect->computeSchema(); probePlan.setLastOperator(std::move(logicalIntersect)); diff --git a/src/planner/operator/logical_accumulate.cpp b/src/planner/operator/logical_accumulate.cpp index c680605170..cf483a6083 100644 --- a/src/planner/operator/logical_accumulate.cpp +++ b/src/planner/operator/logical_accumulate.cpp @@ -8,7 +8,7 @@ namespace planner { void LogicalAccumulate::computeSchema() { auto childSchema = children[0]->getSchema(); createEmptySchema(); - SinkOperatorUtil::recomputeSchema(*childSchema, *schema); + SinkOperatorUtil::recomputeSchema(*childSchema, expressions, *schema); } } // namespace planner diff --git a/src/planner/operator/logical_cross_product.cpp b/src/planner/operator/logical_cross_product.cpp index aea7f3e859..47eb2f345d 100644 --- a/src/planner/operator/logical_cross_product.cpp +++ b/src/planner/operator/logical_cross_product.cpp @@ -9,7 +9,7 @@ void LogicalCrossProduct::computeSchema() { auto probeSchema = children[0]->getSchema(); auto buildSchema = children[1]->getSchema(); schema = probeSchema->copy(); - SinkOperatorUtil::mergeSchema(*buildSchema, *schema); + SinkOperatorUtil::mergeSchema(*buildSchema, buildSchema->getExpressionsInScope(), *schema); } } // namespace planner diff --git a/src/planner/operator/logical_hash_join.cpp b/src/planner/operator/logical_hash_join.cpp index 06eabbcbf0..05bb5b089a 100644 --- a/src/planner/operator/logical_hash_join.cpp +++ b/src/planner/operator/logical_hash_join.cpp @@ -10,22 +10,50 @@ void LogicalHashJoin::computeSchema() { auto buildSchema = children[1]->getSchema(); schema = probeSchema->copy(); if (joinType != JoinType::MARK) { - vector keys; - for (auto& joinNode : joinNodes) { - keys.push_back(joinNode->getInternalIDPropertyName()); + // resolve key groups + unordered_map> keyGroupPosToKeys; + for (auto& joinNodeID : joinNodeIDs) { + auto groupPos = buildSchema->getGroupPos(*joinNodeID); + if (!keyGroupPosToKeys.contains(groupPos)) { + keyGroupPosToKeys.insert({groupPos, unordered_set()}); + } + keyGroupPosToKeys.at(groupPos).insert(joinNodeID->getUniqueName()); } - SinkOperatorUtil::mergeSchema(*buildSchema, *schema, keys); + // resolve expressions to materialize in each group + auto expressionsToMaterializePerGroup = + SchemaUtils::getExpressionsPerGroup(expressionsToMaterialize, *buildSchema); + expression_vector expressionsToMaterializeInNonKeyGroups; + for (auto i = 0; i < buildSchema->getNumGroups(); ++i) { + auto expressions = expressionsToMaterializePerGroup[i]; + bool isKeyGroup = keyGroupPosToKeys.contains(i); + if (isKeyGroup) { // merge key group + auto keys = keyGroupPosToKeys.at(i); + auto resultGroupPos = schema->getGroupPos(*keys.begin()); + for (auto& expression : expressions) { + if (keys.contains(expression->getUniqueName())) { + continue; + } + schema->insertToGroupAndScope(expression, resultGroupPos); + } + } else { + for (auto& expression : expressions) { + expressionsToMaterializeInNonKeyGroups.push_back(expression); + } + } + } + SinkOperatorUtil::mergeSchema( + *buildSchema, expressionsToMaterializeInNonKeyGroups, *schema); } else { schema->insertToGroupAndScope(mark, markPos); } } string LogicalHashJoin::getExpressionsForPrinting() const { - expression_vector expressions; - for (auto& joinNode : joinNodes) { - expressions.push_back(joinNode); + expression_vector joinNodes; + for (auto& joinNodeID : joinNodeIDs) { + joinNodes.push_back(joinNodeID->getChild(0)); } - return ExpressionUtil::toString(expressions); + return ExpressionUtil::toString(joinNodes); } } // namespace planner diff --git a/src/planner/operator/logical_intersect.cpp b/src/planner/operator/logical_intersect.cpp index 31f6a95da8..7ac042be8f 100644 --- a/src/planner/operator/logical_intersect.cpp +++ b/src/planner/operator/logical_intersect.cpp @@ -8,14 +8,14 @@ void LogicalIntersect::computeSchema() { schema = probeSchema->copy(); // Write intersect node and rels into a new group regardless of whether rel is n-n. auto outGroupPos = schema->createGroup(); - schema->insertToGroupAndScope(intersectNode->getInternalIDProperty(), outGroupPos); + schema->insertToGroupAndScope(intersectNodeID, outGroupPos); for (auto i = 1; i < children.size(); ++i) { auto buildSchema = children[i]->getSchema(); auto buildInfo = buildInfos[i - 1].get(); // Write rel properties into output group. for (auto& expression : buildSchema->getExpressionsInScope()) { - if (expression->getUniqueName() == intersectNode->getInternalIDPropertyName() || - expression->getUniqueName() == buildInfo->key->getInternalIDPropertyName()) { + if (expression->getUniqueName() == intersectNodeID->getUniqueName() || + expression->getUniqueName() == buildInfo->keyNodeID->getUniqueName()) { continue; } schema->insertToGroupAndScope(expression, outGroupPos); @@ -31,7 +31,7 @@ unique_ptr LogicalIntersect::copy() { buildInfos_.push_back(buildInfos[i - 1]->copy()); } auto result = make_unique( - intersectNode, children[0]->copy(), std::move(buildChildren), std::move(buildInfos_)); + intersectNodeID, children[0]->copy(), std::move(buildChildren), std::move(buildInfos_)); return result; } diff --git a/src/planner/operator/logical_order_by.cpp b/src/planner/operator/logical_order_by.cpp index 17a716cc9d..994a87c84b 100644 --- a/src/planner/operator/logical_order_by.cpp +++ b/src/planner/operator/logical_order_by.cpp @@ -8,15 +8,7 @@ namespace planner { void LogicalOrderBy::computeSchema() { auto childSchema = children[0]->getSchema(); schema = make_unique(); - SinkOperatorUtil::recomputeSchema(*childSchema, *schema); -} - -string LogicalOrderBy::getExpressionsForPrinting() const { - auto result = string(); - for (auto& expression : expressionsToOrderBy) { - result += expression->getUniqueName() + ", "; - } - return result; + SinkOperatorUtil::recomputeSchema(*childSchema, expressionsToMaterialize, *schema); } } // namespace planner diff --git a/src/planner/operator/logical_plan_util.cpp b/src/planner/operator/logical_plan_util.cpp index 6dcdfd33d4..f1308838e2 100644 --- a/src/planner/operator/logical_plan_util.cpp +++ b/src/planner/operator/logical_plan_util.cpp @@ -91,7 +91,7 @@ void LogicalPlanUtil::encodeCrossProduct(LogicalOperator* logicalOperator, strin void LogicalPlanUtil::encodeIntersect(LogicalOperator* logicalOperator, string& encodeString) { auto logicalIntersect = (LogicalIntersect*)logicalOperator; - encodeString += "I(" + logicalIntersect->getIntersectNode()->getRawName() + ")"; + encodeString += "I(" + logicalIntersect->getIntersectNodeID()->getRawName() + ")"; } void LogicalPlanUtil::encodeHashJoin(LogicalOperator* logicalOperator, string& encodeString) { diff --git a/src/planner/operator/logical_union.cpp b/src/planner/operator/logical_union.cpp index f633d71a01..8e37737964 100644 --- a/src/planner/operator/logical_union.cpp +++ b/src/planner/operator/logical_union.cpp @@ -8,18 +8,16 @@ namespace planner { void LogicalUnion::computeSchema() { auto firstChildSchema = children[0]->getSchema(); schema = make_unique(); - SinkOperatorUtil::recomputeSchema(*firstChildSchema, *schema); + SinkOperatorUtil::recomputeSchema( + *firstChildSchema, firstChildSchema->getExpressionsInScope(), *schema); } unique_ptr LogicalUnion::copy() { - vector> copiedSchemas; vector> copiedChildren; for (auto i = 0u; i < getNumChildren(); ++i) { - copiedSchemas.push_back(schemasBeforeUnion[i]->copy()); copiedChildren.push_back(getChild(i)->copy()); } - return make_unique( - expressionsToUnion, std::move(copiedSchemas), std::move(copiedChildren)); + return make_unique(expressionsToUnion, std::move(copiedChildren)); } } // namespace planner diff --git a/src/planner/operator/schema.cpp b/src/planner/operator/schema.cpp index 251833cdad..fc3f6b382b 100644 --- a/src/planner/operator/schema.cpp +++ b/src/planner/operator/schema.cpp @@ -3,26 +3,26 @@ namespace kuzu { namespace planner { -uint32_t Schema::createGroup() { +f_group_pos Schema::createGroup() { auto pos = groups.size(); groups.push_back(make_unique()); return pos; } -void Schema::insertToScope(const shared_ptr& expression, uint32_t groupPos) { +void Schema::insertToScope(const shared_ptr& expression, f_group_pos groupPos) { assert(!expressionNameToGroupPos.contains(expression->getUniqueName())); expressionNameToGroupPos.insert({expression->getUniqueName(), groupPos}); expressionsInScope.push_back(expression); } -void Schema::insertToGroupAndScope(const shared_ptr& expression, uint32_t groupPos) { +void Schema::insertToGroupAndScope(const shared_ptr& expression, f_group_pos groupPos) { assert(!expressionNameToGroupPos.contains(expression->getUniqueName())); expressionNameToGroupPos.insert({expression->getUniqueName(), groupPos}); groups[groupPos]->insertExpression(expression); expressionsInScope.push_back(expression); } -void Schema::insertToGroupAndScope(const expression_vector& expressions, uint32_t groupPos) { +void Schema::insertToGroupAndScope(const expression_vector& expressions, f_group_pos groupPos) { for (auto& expression : expressions) { insertToGroupAndScope(expression, groupPos); } @@ -37,7 +37,7 @@ bool Schema::isExpressionInScope(const Expression& expression) const { return false; } -expression_vector Schema::getExpressionsInScope(uint32_t pos) const { +expression_vector Schema::getExpressionsInScope(f_group_pos pos) const { expression_vector result; for (auto& expression : expressionsInScope) { if (getGroupPos(expression->getUniqueName()) == pos) { @@ -61,16 +61,16 @@ expression_vector Schema::getSubExpressionsInScope(const shared_ptr& return results; } -unordered_set Schema::getDependentGroupsPos(const shared_ptr& expression) { - unordered_set result; +unordered_set Schema::getDependentGroupsPos(const shared_ptr& expression) { + unordered_set result; for (auto& subExpression : getSubExpressionsInScope(expression)) { result.insert(getGroupPos(subExpression->getUniqueName())); } return result; } -unordered_set Schema::getGroupsPosInScope() const { - unordered_set result; +unordered_set Schema::getGroupsPosInScope() const { + unordered_set result; for (auto& expressionInScope : expressionsInScope) { result.insert(getGroupPos(expressionInScope->getUniqueName())); } @@ -92,5 +92,16 @@ void Schema::clear() { clearExpressionsInScope(); } +vector SchemaUtils::getExpressionsPerGroup( + const binder::expression_vector& expressions, const Schema& schema) { + vector result; + result.resize(schema.getNumGroups()); + for (auto& expression : expressions) { + auto groupPos = schema.getGroupPos(*expression); + result[groupPos].push_back(expression); + } + return result; +} + } // namespace planner } // namespace kuzu diff --git a/src/planner/operator/sink_util.cpp b/src/planner/operator/sink_util.cpp index acaa4274d1..4cb4f3f5c3 100644 --- a/src/planner/operator/sink_util.cpp +++ b/src/planner/operator/sink_util.cpp @@ -4,104 +4,58 @@ namespace kuzu { namespace planner { void SinkOperatorUtil::mergeSchema( - const Schema& inputSchema, Schema& result, const vector& keys) { - unordered_map> keyGroupPosToKeysMap; - for (auto& key : keys) { - auto groupPos = inputSchema.getGroupPos(key); - keyGroupPosToKeysMap[groupPos].push_back(key); - } - for (auto& [keyGroupPos, keysInGroup] : keyGroupPosToKeysMap) { - mergeKeyGroup(inputSchema, result, keyGroupPos, keysInGroup); - } - if (getGroupsPosIgnoringKeyGroups(inputSchema, keys).empty()) { // nothing else to merge - return; - } - auto flatPayloads = getFlatPayloadsIgnoringKeyGroup(inputSchema, keys); - if (!flatPayloads.empty()) { - auto flatPayloadsOutputGroupPos = appendPayloadsToNewGroup(result, flatPayloads); - result.setGroupAsSingleState(flatPayloadsOutputGroupPos); - } - for (auto& payloadGroupPos : getGroupsPosIgnoringKeyGroups(inputSchema, keys)) { - auto payloadGroup = inputSchema.getGroup(payloadGroupPos); - if (!payloadGroup->isFlat()) { - auto payloads = inputSchema.getExpressionsInScope(payloadGroupPos); - auto outputGroupPos = appendPayloadsToNewGroup(result, payloads); - result.getGroup(outputGroupPos)->setMultiplier(payloadGroup->getMultiplier()); - } - } -} - -void SinkOperatorUtil::mergeSchema(const Schema& inputSchema, Schema& result) { - auto flatPayloads = getFlatPayloads(inputSchema); - if (!hasUnFlatPayload(inputSchema)) { - appendPayloadsToNewGroup(result, flatPayloads); + const Schema& inputSchema, const expression_vector& expressionsToMerge, Schema& resultSchema) { + auto flatPayloads = getFlatPayloads(inputSchema, expressionsToMerge); + auto unFlatPayloadsPerGroup = getUnFlatPayloadsPerGroup(inputSchema, expressionsToMerge); + if (unFlatPayloadsPerGroup.empty()) { + appendPayloadsToNewGroup(resultSchema, flatPayloads); } else { if (!flatPayloads.empty()) { - auto flatPayloadsOutputGroupPos = appendPayloadsToNewGroup(result, flatPayloads); - result.setGroupAsSingleState(flatPayloadsOutputGroupPos); + auto groupPos = appendPayloadsToNewGroup(resultSchema, flatPayloads); + resultSchema.setGroupAsSingleState(groupPos); } - for (auto& payloadGroupPos : inputSchema.getGroupsPosInScope()) { - auto payloadGroup = inputSchema.getGroup(payloadGroupPos); - if (!payloadGroup->isFlat()) { - auto payloads = inputSchema.getExpressionsInScope(payloadGroupPos); - auto outputGroupPos = appendPayloadsToNewGroup(result, payloads); - result.getGroup(outputGroupPos)->setMultiplier(payloadGroup->getMultiplier()); - } + for (auto& [inputGroupPos, payloads] : unFlatPayloadsPerGroup) { + auto resultGroupPos = appendPayloadsToNewGroup(resultSchema, payloads); + resultSchema.getGroup(resultGroupPos) + ->setMultiplier(inputSchema.getGroup(inputGroupPos)->getMultiplier()); } } } -void SinkOperatorUtil::recomputeSchema(const Schema& inputSchema, Schema& result) { - assert(!inputSchema.getExpressionsInScope().empty()); - result.clear(); - mergeSchema(inputSchema, result); +void SinkOperatorUtil::recomputeSchema( + const Schema& inputSchema, const expression_vector& expressionsToMerge, Schema& resultSchema) { + assert(!expressionsToMerge.empty()); + resultSchema.clear(); + mergeSchema(inputSchema, expressionsToMerge, resultSchema); } -unordered_set SinkOperatorUtil::getGroupsPosIgnoringKeyGroups( - const Schema& schema, const vector& keys) { - auto payloadGroupsPos = schema.getGroupsPosInScope(); - for (auto& key : keys) { - auto keyGroupPos = schema.getGroupPos(key); - payloadGroupsPos.erase(keyGroupPos); - } - return payloadGroupsPos; -} - -void SinkOperatorUtil::mergeKeyGroup(const Schema& inputSchema, Schema& resultSchema, - uint32_t keyGroupPos, const vector& keysInGroup) { - auto resultKeyGroupPos = resultSchema.getGroupPos(keysInGroup[0]); - for (auto& expression : inputSchema.getExpressionsInScope(keyGroupPos)) { - if (find(keysInGroup.begin(), keysInGroup.end(), expression->getUniqueName()) != - keysInGroup.end()) { +unordered_map SinkOperatorUtil::getUnFlatPayloadsPerGroup( + const Schema& schema, const expression_vector& payloads) { + unordered_map result; + for (auto& payload : payloads) { + auto groupPos = schema.getGroupPos(*payload); + if (schema.getGroup(groupPos)->isFlat()) { continue; } - resultSchema.insertToGroupAndScope(expression, resultKeyGroupPos); + if (!result.contains(groupPos)) { + result.insert({groupPos, expression_vector{}}); + } + result.at(groupPos).push_back(payload); } + return result; } expression_vector SinkOperatorUtil::getFlatPayloads( - const Schema& schema, const unordered_set& payloadGroupsPos) { + const Schema& schema, const expression_vector& payloads) { expression_vector result; - for (auto& payloadGroupPos : payloadGroupsPos) { - if (schema.getGroup(payloadGroupPos)->isFlat()) { - for (auto& payload : schema.getExpressionsInScope(payloadGroupPos)) { - result.push_back(payload); - } + for (auto& payload : payloads) { + if (schema.getGroup(payload)->isFlat()) { + result.push_back(payload); } } return result; } -bool SinkOperatorUtil::hasUnFlatPayload( - const Schema& schema, const unordered_set& payloadGroupsPos) { - for (auto& payloadGroupPos : payloadGroupsPos) { - if (!schema.getGroup(payloadGroupPos)->isFlat()) { - return true; - } - } - return false; -} - uint32_t SinkOperatorUtil::appendPayloadsToNewGroup(Schema& schema, expression_vector& payloads) { auto outputGroupPos = schema.createGroup(); for (auto& payload : payloads) { diff --git a/src/planner/query_planner.cpp b/src/planner/query_planner.cpp index d49cb1bdb8..f367467944 100644 --- a/src/planner/query_planner.cpp +++ b/src/planner/query_planner.cpp @@ -397,16 +397,13 @@ unique_ptr QueryPlanner::createUnionPlan( } // we compute the schema based on first child auto plan = make_unique(); - vector> schemaBeforeUnion; vector> children; for (auto& childPlan : childrenPlans) { plan->increaseCost(childPlan->getCost()); - schemaBeforeUnion.push_back(childPlan->getSchema()->copy()); children.push_back(childPlan->getLastOperator()); } - auto logicalUnion = - make_shared(childrenPlans[0]->getSchema()->getExpressionsInScope(), - std::move(schemaBeforeUnion), std::move(children)); + auto logicalUnion = make_shared( + childrenPlans[0]->getSchema()->getExpressionsInScope(), std::move(children)); logicalUnion->computeSchema(); plan->setLastOperator(logicalUnion); if (!isUnionAll) { diff --git a/src/processor/mapper/map_hash_join.cpp b/src/processor/mapper/map_hash_join.cpp index 12abd9cbe7..b81d983acb 100644 --- a/src/processor/mapper/map_hash_join.cpp +++ b/src/processor/mapper/map_hash_join.cpp @@ -41,7 +41,7 @@ static void constructAccPipeline(FactorizedTableScan* tableScan, HashJoinProbe* hashJoinProbe->addChild(std::move(resultCollector)); } -static void mapASPJoin(NodeExpression* joinNode, HashJoinProbe* hashJoinProbe) { +static void mapASPJoin(Expression* joinNodeID, HashJoinProbe* hashJoinProbe) { // fetch scan node ID on build side auto hashJoinBuild = hashJoinProbe->getChild(1); assert(hashJoinBuild->getOperatorType() == PhysicalOperatorType::HASH_JOIN_BUILD); @@ -49,7 +49,7 @@ static void mapASPJoin(NodeExpression* joinNode, HashJoinProbe* hashJoinProbe) { for (auto& op : PhysicalPlanUtil::collectOperators(hashJoinBuild, PhysicalOperatorType::SCAN_NODE_ID)) { auto scanNodeID = (ScanNodeID*)op; - if (scanNodeID->getNodeName() == joinNode->getUniqueName()) { + if (scanNodeID->getNodeID() == joinNodeID->getUniqueName()) { scanNodeIDCandidates.push_back(scanNodeID); } } @@ -89,21 +89,19 @@ static void mapAccJoin(HashJoinProbe* hashJoinProbe) { } BuildDataInfo PlanMapper::generateBuildDataInfo(const Schema& buildSideSchema, - const vector>& keys, const expression_vector& payloads) { + const expression_vector& keys, const expression_vector& payloads) { vector> buildKeysPosAndType, buildPayloadsPosAndTypes; vector isBuildPayloadsFlat, isBuildPayloadsInKeyChunk; vector isBuildDataChunkContainKeys(buildSideSchema.getNumGroups(), false); - unordered_set joinNodeIDs; + unordered_set joinKeyNames; for (auto& key : keys) { - auto nodeID = key->getInternalIDPropertyName(); - auto buildSideKeyPos = - DataPos(buildSideSchema.getExpressionPos(*key->getInternalIDProperty())); + auto buildSideKeyPos = DataPos(buildSideSchema.getExpressionPos(*key)); isBuildDataChunkContainKeys[buildSideKeyPos.dataChunkPos] = true; buildKeysPosAndType.emplace_back(buildSideKeyPos, NODE_ID); - joinNodeIDs.emplace(nodeID); + joinKeyNames.insert(key->getUniqueName()); } for (auto& payload : payloads) { - if (joinNodeIDs.find(payload->getUniqueName()) != joinNodeIDs.end()) { + if (joinKeyNames.find(payload->getUniqueName()) != joinKeyNames.end()) { continue; } auto payloadPos = DataPos(buildSideSchema.getExpressionPos(*payload)); @@ -126,11 +124,10 @@ unique_ptr PlanMapper::mapLogicalHashJoinToPhysical( // Populate build side and probe side vector positions auto paramsString = hashJoin->getExpressionsForPrinting(); auto buildDataInfo = generateBuildDataInfo( - *buildSchema, hashJoin->getJoinNodes(), hashJoin->getExpressionsToMaterialize()); + *buildSchema, hashJoin->getJoinNodeIDs(), hashJoin->getExpressionsToMaterialize()); vector probeKeysDataPos; - for (auto& joinNode : hashJoin->getJoinNodes()) { - probeKeysDataPos.emplace_back( - outSchema->getExpressionPos(*joinNode->getInternalIDProperty())); + for (auto& joinNodeID : hashJoin->getJoinNodeIDs()) { + probeKeysDataPos.emplace_back(outSchema->getExpressionPos(*joinNodeID)); } vector probePayloadsOutPos; for (auto& [dataPos, _] : buildDataInfo.payloadsPosAndType) { @@ -155,7 +152,7 @@ unique_ptr PlanMapper::mapLogicalHashJoinToPhysical( paramsString); if (hashJoin->getIsProbeAcc()) { if (containASPOnPipeline(hashJoin)) { - mapASPJoin(hashJoin->getJoinNodes()[0].get(), hashJoinProbe.get()); + mapASPJoin(hashJoin->getJoinNodeIDs()[0].get(), hashJoinProbe.get()); } else { assert(containFTableScan(hashJoin)); mapAccJoin(hashJoinProbe.get()); diff --git a/src/processor/mapper/map_intersect.cpp b/src/processor/mapper/map_intersect.cpp index 49aa28d58b..d10db17e2b 100644 --- a/src/processor/mapper/map_intersect.cpp +++ b/src/processor/mapper/map_intersect.cpp @@ -20,34 +20,32 @@ unique_ptr PlanMapper::mapLogicalIntersectToPhysical( // Map build side children. for (auto i = 1u; i < logicalIntersect->getNumChildren(); i++) { auto buildInfo = logicalIntersect->getBuildInfo(i - 1); - auto buildKey = buildInfo->key->getInternalIDPropertyName(); auto buildSchema = logicalIntersect->getChild(i)->getSchema(); auto buildSidePrevOperator = mapLogicalOperatorToPhysical(logicalIntersect->getChild(i)); vector payloadsDataPos; auto buildDataInfo = generateBuildDataInfo( - *buildSchema, {buildInfo->key}, buildInfo->expressionsToMaterialize); + *buildSchema, {buildInfo->keyNodeID}, buildInfo->expressionsToMaterialize); for (auto& [dataPos, _] : buildDataInfo.payloadsPosAndType) { auto expression = buildSchema->getGroup(dataPos.dataChunkPos) ->getExpressions()[dataPos.valueVectorPos]; if (expression->getUniqueName() == - logicalIntersect->getIntersectNode()->getInternalIDPropertyName()) { + logicalIntersect->getIntersectNodeID()->getUniqueName()) { continue; } payloadsDataPos.emplace_back(outSchema->getExpressionPos(*expression)); } auto sharedState = make_shared(); sharedStates.push_back(sharedState); - children.push_back( - make_unique(make_unique(*buildSchema), sharedState, - buildDataInfo, std::move(buildSidePrevOperator), getOperatorID(), buildKey)); + children.push_back(make_unique( + make_unique(*buildSchema), sharedState, buildDataInfo, + std::move(buildSidePrevOperator), getOperatorID(), buildInfo->keyNodeID->getRawName())); IntersectDataInfo info{ - DataPos(outSchema->getExpressionPos(*buildInfo->key->getInternalIDProperty())), - payloadsDataPos}; + DataPos(outSchema->getExpressionPos(*buildInfo->keyNodeID)), payloadsDataPos}; intersectDataInfos.push_back(info); } // Map intersect. - auto outputDataPos = DataPos(outSchema->getExpressionPos( - *logicalIntersect->getIntersectNode()->getInternalIDProperty())); + auto outputDataPos = + DataPos(outSchema->getExpressionPos(*logicalIntersect->getIntersectNodeID())); return make_unique(outputDataPos, intersectDataInfos, sharedStates, std::move(children), getOperatorID(), logicalIntersect->getExpressionsForPrinting()); } diff --git a/src/processor/mapper/map_scan_node.cpp b/src/processor/mapper/map_scan_node.cpp index 3e8cc6fa00..d20adc6a21 100644 --- a/src/processor/mapper/map_scan_node.cpp +++ b/src/processor/mapper/map_scan_node.cpp @@ -19,8 +19,8 @@ unique_ptr PlanMapper::mapLogicalScanNodeToPhysical( auto nodeTable = nodesStore.getNodeTable(tableID); sharedState->addTableState(nodeTable); } - return make_unique(node->getUniqueName(), dataPos, sharedState, getOperatorID(), - logicalScan->getExpressionsForPrinting()); + return make_unique(node->getInternalIDPropertyName(), dataPos, sharedState, + getOperatorID(), logicalScan->getExpressionsForPrinting()); } unique_ptr PlanMapper::mapLogicalIndexScanNodeToPhysical(