Skip to content

Commit

Permalink
clean
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Aug 14, 2023
1 parent bbf54df commit 2d5cfb8
Show file tree
Hide file tree
Showing 13 changed files with 68 additions and 62 deletions.
3 changes: 2 additions & 1 deletion src/binder/expression/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ bool ExpressionUtil::isExpressionsWithDataType(
return true;
}

expression_vector ExpressionUtil::getExpressionsWithDataType(const expression_vector& expressions, common::LogicalTypeID dataTypeID) {
expression_vector ExpressionUtil::getExpressionsWithDataType(
const expression_vector& expressions, common::LogicalTypeID dataTypeID) {
expression_vector result;
for (auto& expression : expressions) {
if (expression->dataType.getLogicalTypeID() == dataTypeID) {
Expand Down
3 changes: 2 additions & 1 deletion src/include/binder/expression/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ struct ExpressionEquality {
struct ExpressionUtil {
static bool isExpressionsWithDataType(
const expression_vector& expressions, common::LogicalTypeID dataTypeID);
static expression_vector getExpressionsWithDataType(const expression_vector& expressions, common::LogicalTypeID dataTypeID);
static expression_vector getExpressionsWithDataType(
const expression_vector& expressions, common::LogicalTypeID dataTypeID);

static uint32_t find(Expression* target, expression_vector expressions);

Expand Down
7 changes: 0 additions & 7 deletions src/include/planner/logical_plan/sip/side_way_info_passing.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,5 @@ enum class SidewaysInfoPassing : uint8_t {
PROHIBIT_BUILD_TO_PROBE = 5,
};

struct SidewaysInfoPassingUtils {
static inline bool isSipEnabled(SidewaysInfoPassing sip) {
return sip == SidewaysInfoPassing::PROBE_TO_BUILD ||
sip == SidewaysInfoPassing::BUILD_TO_PROBE;
}
};

} // namespace planner
} // namespace kuzu
11 changes: 7 additions & 4 deletions src/include/planner/query_planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,16 @@ class QueryPlanner {
std::vector<std::unique_ptr<LogicalPlan>> enumerateQueryGraphCollection(
const binder::QueryGraphCollection& queryGraphCollection,
const binder::expression_vector& predicates);
std::vector<std::unique_ptr<LogicalPlan>> enumerateQueryGraph(
SubqueryType subqueryType, const expression_vector& correlatedExpressions, binder::QueryGraph* queryGraph, binder::expression_vector& predicates);
std::vector<std::unique_ptr<LogicalPlan>> enumerateQueryGraph(SubqueryType subqueryType,
const expression_vector& correlatedExpressions, binder::QueryGraph* queryGraph,
binder::expression_vector& predicates);

// Plan node/rel table scan
void planBaseTableScans(SubqueryType subqueryType, const expression_vector& correlatedExpressions);
void planBaseTableScans(
SubqueryType subqueryType, const expression_vector& correlatedExpressions);
void planCorrelatedExpressionsScan(const binder::expression_vector& correlatedExpressions);
std::unique_ptr<LogicalPlan> getCorrelatedExpressionScanPlan(const binder::expression_vector& correlatedExpressions);
std::unique_ptr<LogicalPlan> getCorrelatedExpressionScanPlan(
const binder::expression_vector& correlatedExpressions);
void planNodeScan(uint32_t nodePos);
void planNodeIDScan(uint32_t nodePos);
void planRelScan(uint32_t relPos);
Expand Down
1 change: 0 additions & 1 deletion src/optimizer/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ void Optimizer::optimize(planner::LogicalPlan* plan) {
auto removeFactorizationRewriter = RemoveFactorizationRewriter();
removeFactorizationRewriter.rewrite(plan);

auto s = plan->toString();
auto correlatedSubqueryUnnestSolver = CorrelatedSubqueryUnnestSolver(nullptr);
correlatedSubqueryUnnestSolver.solve(plan->getLastOperator().get());

Expand Down
7 changes: 3 additions & 4 deletions src/planner/join_order/cardinality_estimator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ uint64_t CardinalityEstimator::estimateScanNode(LogicalOperator* op) {
return atLeastOne(getNodeIDDom(scanNode->getNode()->getInternalIDPropertyName()));
}

uint64_t CardinalityEstimator::estimateHashJoin(const expression_vector& joinKeys,
const LogicalPlan& probePlan, const LogicalPlan& buildPlan) {
uint64_t CardinalityEstimator::estimateHashJoin(
const expression_vector& joinKeys, const LogicalPlan& probePlan, const LogicalPlan& buildPlan) {
auto denominator = 1u;
for (auto& joinKey : joinKeys) {
// TODO(Xiyang): we should be able to estimate non-ID-based joins as well.
Expand All @@ -37,8 +37,7 @@ uint64_t CardinalityEstimator::estimateHashJoin(const expression_vector& joinKey
}
}
return atLeastOne(probePlan.estCardinality *
JoinOrderUtil::getJoinKeysFlatCardinality(joinKeys, buildPlan) /
denominator);
JoinOrderUtil::getJoinKeysFlatCardinality(joinKeys, buildPlan) / denominator);
}

uint64_t CardinalityEstimator::estimateCrossProduct(
Expand Down
7 changes: 3 additions & 4 deletions src/planner/plan/append_expressions_scan.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "planner/query_planner.h"

#include "planner/logical_plan/scan/logical_expressions_scan.h"
#include "planner/query_planner.h"

using namespace kuzu::binder;

Expand All @@ -13,5 +12,5 @@ void QueryPlanner::appendExpressionsScan(const expression_vector& expressions, L
plan.setLastOperator(expressionsScan);
}

}
}
} // namespace planner
} // namespace kuzu
1 change: 0 additions & 1 deletion src/planner/plan/append_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ void QueryPlanner::appendHashJoin(const expression_vector& joinNodeIDs, JoinType
// Update cost
probePlan.setCost(CostModel::computeHashJoinCost(joinNodeIDs, probePlan, buildPlan));
// Update cardinality
// T
probePlan.setCardinality(
cardinalityEstimator->estimateHashJoin(joinNodeIDs, probePlan, buildPlan));
probePlan.setLastOperator(std::move(hashJoin));
Expand Down
61 changes: 40 additions & 21 deletions src/planner/plan/plan_join_order.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "binder/expression_visitor.h"
#include "planner/join_order/cost_model.h"

#include "planner/logical_plan/scan/logical_scan_node.h"
#include "planner/query_planner.h"

Expand All @@ -24,7 +23,8 @@ std::unique_ptr<LogicalPlan> QueryPlanner::planQueryGraphCollectionInNewContext(
return getBestPlan(std::move(plans));
}

static int32_t getConnectedQueryGraphIdx(const QueryGraphCollection& queryGraphCollection, const expression_set& expressionSet) {
static int32_t getConnectedQueryGraphIdx(
const QueryGraphCollection& queryGraphCollection, const expression_set& expressionSet) {
for (auto i = 0u; i < queryGraphCollection.getNumQueryGraphs(); ++i) {
auto queryGraph = queryGraphCollection.getQueryGraph(i);
for (auto& queryNode : queryGraph->getQueryNodes()) {
Expand All @@ -42,12 +42,16 @@ std::vector<std::unique_ptr<LogicalPlan>> QueryPlanner::enumerateQueryGraphColle
auto correlatedExpressionSet = context->getCorrelatedExpressionsSet();
int32_t queryGraphIdxToPlanExpressionsScan = -1;
if (context->subqueryType == SubqueryType::CORRELATED) {
queryGraphIdxToPlanExpressionsScan = getConnectedQueryGraphIdx(queryGraphCollection, correlatedExpressionSet);
// Pick a query graph to plan ExpressionsScan. If -1 is returned, we fall back to cross
// product.
queryGraphIdxToPlanExpressionsScan =
getConnectedQueryGraphIdx(queryGraphCollection, correlatedExpressionSet);
}
std::unordered_set<uint32_t> evaluatedPredicatesIndices;
std::vector<std::vector<std::unique_ptr<LogicalPlan>>> plansPerQueryGraph;
for (auto i = 0u; i < queryGraphCollection.getNumQueryGraphs(); ++i) {
auto queryGraph = queryGraphCollection.getQueryGraph(i);
// Extract predicates for current query graph
std::unordered_set<uint32_t> predicateToEvaluateIndices;
for (auto j = 0u; j < predicates.size(); ++j) {
if (evaluatedPredicatesIndices.contains(j)) {
Expand All @@ -57,45 +61,58 @@ std::vector<std::unique_ptr<LogicalPlan>> QueryPlanner::enumerateQueryGraphColle
predicateToEvaluateIndices.insert(j);
}
}
evaluatedPredicatesIndices.insert(predicateToEvaluateIndices.begin(), predicateToEvaluateIndices.end());
evaluatedPredicatesIndices.insert(
predicateToEvaluateIndices.begin(), predicateToEvaluateIndices.end());
expression_vector predicatesToEvaluate;
for (auto idx : predicateToEvaluateIndices) {
predicatesToEvaluate.push_back(predicates[idx]);
}
std::vector<std::unique_ptr<LogicalPlan>> plans;
switch (context->subqueryType) {
case SubqueryType::NONE: {
plans = enumerateQueryGraph(SubqueryType::NONE, expression_vector{}, queryGraph, predicatesToEvaluate);
case SubqueryType::NONE: {
// Plan current query graph as an isolated query graph.
plans = enumerateQueryGraph(
SubqueryType::NONE, expression_vector{}, queryGraph, predicatesToEvaluate);
} break;
case SubqueryType::INTERNAL_ID_CORRELATED: {
plans = enumerateQueryGraph(SubqueryType::INTERNAL_ID_CORRELATED, context->correlatedExpressions, queryGraph, predicatesToEvaluate);
// All correlated expressions are node IDs. Plan as isolated query graph but do not scan
// any properties of correlated node IDs because they must be scanned in outer query.
plans = enumerateQueryGraph(SubqueryType::INTERNAL_ID_CORRELATED,
context->correlatedExpressions, queryGraph, predicatesToEvaluate);
} break;
case SubqueryType::CORRELATED: {
if (i == queryGraphIdxToPlanExpressionsScan) {
plans = enumerateQueryGraph(SubqueryType::CORRELATED, context->correlatedExpressions, queryGraph, predicatesToEvaluate);
// Plan ExpressionsScan with current query graph.
plans = enumerateQueryGraph(SubqueryType::CORRELATED,
context->correlatedExpressions, queryGraph, predicatesToEvaluate);
} else {
plans = enumerateQueryGraph(SubqueryType::NONE, expression_vector{}, queryGraph, predicatesToEvaluate);
// Plan current query graph as an isolated query graph.
plans = enumerateQueryGraph(
SubqueryType::NONE, expression_vector{}, queryGraph, predicatesToEvaluate);
}
} break ;
} break;
default:
throw NotImplementedException("QueryPlanner::enumerateQueryGraphCollection");

Check warning on line 95 in src/planner/plan/plan_join_order.cpp

View check run for this annotation

Codecov / codecov/patch

src/planner/plan/plan_join_order.cpp#L94-L95

Added lines #L94 - L95 were not covered by tests
}
plansPerQueryGraph.push_back(std::move(plans));
}
if (context->subqueryType == SubqueryType::CORRELATED && queryGraphIdxToPlanExpressionsScan == -1) {
// Fail to plan ExpressionsScan with any query graph. Plan it independently and fall back to
// cross product.
if (context->subqueryType == SubqueryType::CORRELATED &&
queryGraphIdxToPlanExpressionsScan == -1) {
auto plan = std::make_unique<LogicalPlan>();
appendExpressionsScan(context->getCorrelatedExpressions(), *plan);
appendDistinct(context->getCorrelatedExpressions(), *plan);
std::vector<std::unique_ptr<LogicalPlan>> plans;
plans.push_back(std::move(plan));
plansPerQueryGraph.push_back(std::move(plans));
}
// take cross products
// Take cross products
auto result = std::move(plansPerQueryGraph[0]);
for (auto i = 1u; i < plansPerQueryGraph.size(); ++i) {
result = planCrossProduct(std::move(result), std::move(plansPerQueryGraph[i]));
}
// apply remaining predicates
// Apply remaining predicates
expression_vector remainingPredicates;
for (auto i = 0u; i < predicates.size(); ++i) {
if (!evaluatedPredicatesIndices.contains(i)) {
Expand All @@ -111,7 +128,8 @@ std::vector<std::unique_ptr<LogicalPlan>> QueryPlanner::enumerateQueryGraphColle
}

std::vector<std::unique_ptr<LogicalPlan>> QueryPlanner::enumerateQueryGraph(
SubqueryType subqueryType, const expression_vector& correlatedExpressions, QueryGraph* queryGraph, expression_vector& predicates) {
SubqueryType subqueryType, const expression_vector& correlatedExpressions,
QueryGraph* queryGraph, expression_vector& predicates) {
context->init(queryGraph, predicates);
cardinalityEstimator->initNodeIDDom(queryGraph);
planBaseTableScans(subqueryType, correlatedExpressions);
Expand Down Expand Up @@ -175,24 +193,25 @@ static binder::expression_vector getNewlyMatchedExpressions(const SubqueryGraph&
std::vector<SubqueryGraph>{prevSubgraph}, newSubgraph, expressions);
}

void QueryPlanner::planBaseTableScans(SubqueryType subqueryType, const expression_vector& correlatedExpressions) {
void QueryPlanner::planBaseTableScans(
SubqueryType subqueryType, const expression_vector& correlatedExpressions) {
auto queryGraph = context->getQueryGraph();
auto correlatedExpressionSet = expression_set{correlatedExpressions.begin(), correlatedExpressions.end()};
auto correlatedExpressionSet =
expression_set{correlatedExpressions.begin(), correlatedExpressions.end()};
switch (subqueryType) {
case SubqueryType::NONE: {
for (auto nodePos = 0u; nodePos < queryGraph->getNumQueryNodes(); ++nodePos) {
planNodeScan(nodePos);
}
} break ;
} break;
case SubqueryType::INTERNAL_ID_CORRELATED: {
for (auto nodePos = 0u; nodePos < queryGraph->getNumQueryNodes(); ++nodePos) {
auto queryNode = queryGraph->getQueryNode(nodePos);
if (correlatedExpressionSet.contains(queryNode->getInternalIDProperty())) {
// In un-nested subquery, e.g. MATCH (a) OPTIONAL MATCH (a)-[e1]->(b), the inner
// query
// ("(a)-[e1]->(b)") needs to scan a, which is already scanned in the outer query
// (a). To avoid scanning storage twice, we keep track of node table "a" and make
// sure when planning inner query, we only scan internal ID of "a".
// query ("(a)-[e1]->(b)") needs to scan a, which is already scanned in the outer
// query (a). To avoid scanning storage twice, we keep track of node table "a" and
// make sure when planning inner query, we only scan internal ID of "a".
planNodeIDScan(nodePos);
} else {
planNodeScan(nodePos);
Expand Down
19 changes: 7 additions & 12 deletions src/planner/plan/plan_subquery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,6 @@ static expression_vector getCorrelatedExpressions(const QueryGraphCollection& co
return result;
}

static expression_vector getJoinNodeIDs(expression_vector& expressions) {
expression_vector joinNodeIDs;
for (auto& expression : expressions) {
if (expression->dataType.getLogicalTypeID() == LogicalTypeID::INTERNAL_ID) {
joinNodeIDs.push_back(expression);
}
}
return joinNodeIDs;
}

void QueryPlanner::planOptionalMatch(const QueryGraphCollection& queryGraphCollection,
const expression_vector& predicates, LogicalPlan& leftPlan) {
if (leftPlan.isEmpty()) {
Expand All @@ -54,9 +44,13 @@ void QueryPlanner::planOptionalMatch(const QueryGraphCollection& queryGraphColle
correlatedExpressions, LogicalTypeID::INTERNAL_ID);
std::unique_ptr<LogicalPlan> rightPlan;
if (isInternalIDCorrelated) {
// If all correlated expressions are node IDs. We can trivially unnest by scanning internal
// ID in both outer and inner plan as these are fast in-memory operations. For node
// properties, we only scan in the outer query.
rightPlan = planQueryGraphCollectionInNewContext(SubqueryType::INTERNAL_ID_CORRELATED,
correlatedExpressions, leftPlan.getCardinality(), queryGraphCollection, predicates);
} else {
// Unnest using ExpressionsScan which scans the accumulated table on probe side.
rightPlan = planQueryGraphCollectionInNewContext(SubqueryType::CORRELATED,
correlatedExpressions, leftPlan.getCardinality(), queryGraphCollection, predicates);
appendAccumulate(AccumulateType::REGULAR, correlatedExpressions, leftPlan);
Expand All @@ -78,7 +72,8 @@ void QueryPlanner::planRegularMatch(const QueryGraphCollection& queryGraphCollec
predicatesToPullUp.push_back(predicate);
}
}
auto joinNodeIDs = getJoinNodeIDs(correlatedExpressions);
auto joinNodeIDs = ExpressionUtil::getExpressionsWithDataType(
correlatedExpressions, LogicalTypeID::INTERNAL_ID);
std::unique_ptr<LogicalPlan> rightPlan;
if (joinNodeIDs.empty()) {
rightPlan = planQueryGraphCollectionInNewContext(SubqueryType::NONE, correlatedExpressions,
Expand Down Expand Up @@ -107,11 +102,11 @@ void QueryPlanner::planExistsSubquery(
throw common::NotImplementedException(
"Exists subquery with no correlated join conditions is not yet supported.");

Check warning on line 103 in src/planner/plan/plan_subquery.cpp

View check run for this annotation

Codecov / codecov/patch

src/planner/plan/plan_subquery.cpp#L102-L103

Added lines #L102 - L103 were not covered by tests
}
// See planOptionalMatch for un-nesting logic.
bool isInternalIDCorrelated = ExpressionUtil::isExpressionsWithDataType(
correlatedExpressions, LogicalTypeID::INTERNAL_ID);
std::unique_ptr<LogicalPlan> innerPlan;
if (isInternalIDCorrelated) {
// Unnest as mark join. See planOptionalMatch for unnesting logic.
innerPlan = planQueryGraphCollectionInNewContext(SubqueryType::INTERNAL_ID_CORRELATED,
correlatedExpressions, outerPlan.getCardinality(), *subquery->getQueryGraphCollection(),
predicates);
Expand Down
5 changes: 2 additions & 3 deletions src/processor/map/create_factorized_table_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ std::unique_ptr<PhysicalOperator> PlanMapper::createFactorizedTableScan(
}

std::unique_ptr<PhysicalOperator> PlanMapper::createFactorizedTableScanAligned(
const expression_vector& expressions, Schema* schema,
std::shared_ptr<FactorizedTable> table, uint64_t maxMorselSize,
std::unique_ptr<PhysicalOperator> prevOperator) {
const expression_vector& expressions, Schema* schema, std::shared_ptr<FactorizedTable> table,
uint64_t maxMorselSize, std::unique_ptr<PhysicalOperator> prevOperator) {
std::vector<ft_col_idx_t> columnIndices;
for (auto i = 0u; i < expressions.size(); ++i) {
columnIndices.push_back(i);
Expand Down
4 changes: 2 additions & 2 deletions src/processor/map/map_expressions_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapExpressionsScan(
assert(physicalOp->getChild(0)->getOperatorType() == PhysicalOperatorType::RESULT_COLLECTOR);
auto resultCollector = (ResultCollector*)physicalOp->getChild(0);
auto table = resultCollector->getResultFactorizedTable();
return createFactorizedTableScan(
expressionsToScan, colIndicesToScan, schema, table, DEFAULT_VECTOR_CAPACITY /* maxMorselSize */, nullptr);
return createFactorizedTableScan(expressionsToScan, colIndicesToScan, schema, table,
DEFAULT_VECTOR_CAPACITY /* maxMorselSize */, nullptr);
}

} // namespace processor
Expand Down
1 change: 0 additions & 1 deletion src/storage/wal/wal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ void WAL::logAddPropertyRecord(table_id_t tableID, property_id_t propertyID) {
}

void WAL::clearWAL() {
bufferManager.clearEvictionQueue();
bufferManager.removeFilePagesFromFrames(*fileHandle);
fileHandle->resetToZeroPagesAndPageCapacity();
initCurrentPage();
Expand Down

0 comments on commit 2d5cfb8

Please sign in to comment.