Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add remove unnecessary join optimizer #1329

Merged
merged 1 commit into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions src/binder/bind/bind_graph_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,21 @@ std::unique_ptr<QueryGraph> Binder::bindPatternElement(
return queryGraph;
}

// E.g. MATCH (:person)-[:studyAt]->(:person) ...
static void validateNodeRelConnectivity(const Catalog& catalog_, const RelExpression& rel,
const NodeExpression& srcNode, const NodeExpression& dstNode) {
std::set<std::pair<table_id_t, table_id_t>> srcDstTableIDs;
for (auto relTableID : rel.getTableIDs()) {
static std::vector<table_id_t> pruneRelTableIDs(const Catalog& catalog_,
const std::vector<table_id_t>& relTableIDs, const NodeExpression& srcNode,
const NodeExpression& dstNode) {
auto srcNodeTableIDs = srcNode.getTableIDsSet();
auto dstNodeTableIDs = dstNode.getTableIDsSet();
std::vector<table_id_t> result;
for (auto& relTableID : relTableIDs) {
auto relTableSchema = catalog_.getReadOnlyVersion()->getRelTableSchema(relTableID);
srcDstTableIDs.insert({relTableSchema->srcTableID, relTableSchema->dstTableID});
}
for (auto srcTableID : srcNode.getTableIDs()) {
for (auto dstTableID : dstNode.getTableIDs()) {
if (srcDstTableIDs.contains(std::make_pair(srcTableID, dstTableID))) {
return;
}
if (!srcNodeTableIDs.contains(relTableSchema->srcTableID) ||
!dstNodeTableIDs.contains(relTableSchema->dstTableID)) {
continue;
}
result.push_back(relTableID);
}
throw BinderException("Nodes " + srcNode.toString() + " and " + dstNode.toString() +
" are not connected through rel " + rel.toString() + ".");
return result;
}

static std::vector<std::pair<std::string, std::vector<Property>>> getPropertyNameAndSchemasPairs(
Expand Down Expand Up @@ -124,12 +122,17 @@ void Binder::bindQueryRel(const RelPattern& relPattern,
if (srcNode->getUniqueName() == dstNode->getUniqueName()) {
throw BinderException("Self-loop rel " + parsedName + " is not supported.");
}
// prune rel table IDs
tableIDs = pruneRelTableIDs(catalog, tableIDs, *srcNode, *dstNode);
if (tableIDs.empty()) {
throw BinderException("Nodes " + srcNode->toString() + " and " + dstNode->toString() +
" are not connected through rel " + parsedName + ".");
}
// bind variable length
auto [lowerBound, upperBound] = bindVariableLengthRelBound(relPattern);
auto queryRel = make_shared<RelExpression>(getUniqueExpressionName(parsedName), parsedName,
tableIDs, srcNode, dstNode, lowerBound, upperBound);
queryRel->setAlias(parsedName);
validateNodeRelConnectivity(catalog, *queryRel, *srcNode, *dstNode);
// resolve properties associate with rel table
std::vector<RelTableSchema*> relTableSchemas;
for (auto tableID : tableIDs) {
Expand Down
2 changes: 2 additions & 0 deletions src/catalog/catalog_structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "common/exception.h"
#include "common/utils.h"

using namespace kuzu::common;

namespace kuzu {
namespace catalog {

Expand Down
7 changes: 5 additions & 2 deletions src/include/binder/expression/node_rel_expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,18 @@ class NodeOrRelExpression : public Expression {
virtual ~NodeOrRelExpression() override = default;

inline void addTableIDs(const std::vector<common::table_id_t>& tableIDsToAdd) {
auto tableIDsMap = std::unordered_set<common::table_id_t>(tableIDs.begin(), tableIDs.end());
auto tableIDsSet = getTableIDsSet();
for (auto tableID : tableIDsToAdd) {
if (!tableIDsMap.contains(tableID)) {
if (!tableIDsSet.contains(tableID)) {
tableIDs.push_back(tableID);
}
}
}
inline bool isMultiLabeled() const { return tableIDs.size() > 1; }
inline std::vector<common::table_id_t> getTableIDs() const { return tableIDs; }
inline std::unordered_set<common::table_id_t> getTableIDsSet() const {
return {tableIDs.begin(), tableIDs.end()};
}
inline common::table_id_t getSingleTableID() const {
assert(tableIDs.size() == 1);
return tableIDs[0];
Expand Down
87 changes: 47 additions & 40 deletions src/include/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,36 +34,36 @@ class CatalogContent {
/**
* Node and Rel table functions.
*/
table_id_t addNodeTableSchema(std::string tableName, property_id_t primaryKeyId,
common::table_id_t addNodeTableSchema(std::string tableName, common::property_id_t primaryKeyId,
std::vector<PropertyNameDataType> propertyDefinitions);

table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
const std::vector<PropertyNameDataType>& propertyDefinitions, table_id_t srcTableID,
table_id_t dstTableID);
common::table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
const std::vector<PropertyNameDataType>& propertyDefinitions, common::table_id_t srcTableID,
common::table_id_t dstTableID);

inline bool containNodeTable(table_id_t tableID) const {
inline bool containNodeTable(common::table_id_t tableID) const {
return nodeTableSchemas.contains(tableID);
}
inline bool containRelTable(table_id_t tableID) const {
inline bool containRelTable(common::table_id_t tableID) const {
return relTableSchemas.contains(tableID);
}
inline bool containTable(const std::string& name) const {
return containNodeTable(name) || containRelTable(name);
}

inline std::string getTableName(table_id_t tableID) const {
inline std::string getTableName(common::table_id_t tableID) const {
return getTableSchema(tableID)->tableName;
}

inline NodeTableSchema* getNodeTableSchema(table_id_t tableID) const {
inline NodeTableSchema* getNodeTableSchema(common::table_id_t tableID) const {
assert(containNodeTable(tableID));
return nodeTableSchemas.at(tableID).get();
}
inline RelTableSchema* getRelTableSchema(table_id_t tableID) const {
inline RelTableSchema* getRelTableSchema(common::table_id_t tableID) const {
assert(containRelTable(tableID));
return relTableSchemas.at(tableID).get();
}
inline TableSchema* getTableSchema(table_id_t tableID) const {
inline TableSchema* getTableSchema(common::table_id_t tableID) const {
assert(containRelTable(tableID) || containNodeTable(tableID));
return nodeTableSchemas.contains(tableID) ?
(TableSchema*)nodeTableSchemas.at(tableID).get() :
Expand All @@ -77,11 +77,12 @@ class CatalogContent {
return relTableNameToIDMap.contains(tableName);
}

inline table_id_t getTableID(const std::string& tableName) const {
inline common::table_id_t getTableID(const std::string& tableName) const {
return nodeTableNameToIDMap.contains(tableName) ? nodeTableNameToIDMap.at(tableName) :
relTableNameToIDMap.at(tableName);
}
inline bool isSingleMultiplicityInDirection(table_id_t tableID, RelDirection direction) const {
inline bool isSingleMultiplicityInDirection(
common::table_id_t tableID, common::RelDirection direction) const {
return relTableSchemas.at(tableID)->isSingleMultiplicityInDirection(direction);
}

Expand All @@ -90,53 +91,57 @@ class CatalogContent {
*/
// getNodeProperty and getRelProperty should be called after checking if property exists
// (containNodeProperty and containRelProperty).
const Property& getNodeProperty(table_id_t tableID, const std::string& propertyName) const;
const Property& getRelProperty(table_id_t tableID, const std::string& propertyName) const;
const Property& getNodeProperty(
common::table_id_t tableID, const std::string& propertyName) const;
const Property& getRelProperty(
common::table_id_t tableID, const std::string& propertyName) const;

std::vector<Property> getAllNodeProperties(table_id_t tableID) const;
inline const std::vector<Property>& getRelProperties(table_id_t tableID) const {
std::vector<Property> getAllNodeProperties(common::table_id_t tableID) const;
inline const std::vector<Property>& getRelProperties(common::table_id_t tableID) const {
return relTableSchemas.at(tableID)->properties;
}
inline std::vector<table_id_t> getNodeTableIDs() const {
std::vector<table_id_t> nodeTableIDs;
inline std::vector<common::table_id_t> getNodeTableIDs() const {
std::vector<common::table_id_t> nodeTableIDs;
for (auto& [tableID, _] : nodeTableSchemas) {
nodeTableIDs.push_back(tableID);
}
return nodeTableIDs;
}
inline std::vector<table_id_t> getRelTableIDs() const {
std::vector<table_id_t> relTableIDs;
inline std::vector<common::table_id_t> getRelTableIDs() const {
std::vector<common::table_id_t> relTableIDs;
for (auto& [tableID, _] : relTableSchemas) {
relTableIDs.push_back(tableID);
}
return relTableIDs;
}
inline std::unordered_map<table_id_t, std::unique_ptr<NodeTableSchema>>& getNodeTableSchemas() {
inline std::unordered_map<common::table_id_t, std::unique_ptr<NodeTableSchema>>&
getNodeTableSchemas() {
return nodeTableSchemas;
}
inline std::unordered_map<table_id_t, std::unique_ptr<RelTableSchema>>& getRelTableSchemas() {
inline std::unordered_map<common::table_id_t, std::unique_ptr<RelTableSchema>>&
getRelTableSchemas() {
return relTableSchemas;
}

void dropTableSchema(table_id_t tableID);
void dropTableSchema(common::table_id_t tableID);

void renameTable(table_id_t tableID, std::string newName);
void renameTable(common::table_id_t tableID, std::string newName);

void saveToFile(const std::string& directory, common::DBFileType dbFileType);
void readFromFile(const std::string& directory, common::DBFileType dbFileType);

private:
inline table_id_t assignNextTableID() { return nextTableID++; }
inline common::table_id_t assignNextTableID() { return nextTableID++; }

private:
std::shared_ptr<spdlog::logger> logger;
std::unordered_map<table_id_t, std::unique_ptr<NodeTableSchema>> nodeTableSchemas;
std::unordered_map<table_id_t, std::unique_ptr<RelTableSchema>> relTableSchemas;
std::unordered_map<common::table_id_t, std::unique_ptr<NodeTableSchema>> nodeTableSchemas;
std::unordered_map<common::table_id_t, std::unique_ptr<RelTableSchema>> relTableSchemas;
// These two maps are maintained as caches. They are not serialized to the catalog file, but
// is re-constructed when reading from the catalog file.
std::unordered_map<std::string, table_id_t> nodeTableNameToIDMap;
std::unordered_map<std::string, table_id_t> relTableNameToIDMap;
table_id_t nextTableID;
std::unordered_map<std::string, common::table_id_t> nodeTableNameToIDMap;
std::unordered_map<std::string, common::table_id_t> relTableNameToIDMap;
common::table_id_t nextTableID;
};

class Catalog {
Expand Down Expand Up @@ -180,25 +185,27 @@ class Catalog {

common::ExpressionType getFunctionType(const std::string& name) const;

table_id_t addNodeTableSchema(std::string tableName, property_id_t primaryKeyId,
common::table_id_t addNodeTableSchema(std::string tableName, common::property_id_t primaryKeyId,
std::vector<PropertyNameDataType> propertyDefinitions);

table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
const std::vector<PropertyNameDataType>& propertyDefinitions, table_id_t srcTableID,
table_id_t dstTableID);
common::table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
const std::vector<PropertyNameDataType>& propertyDefinitions, common::table_id_t srcTableID,
common::table_id_t dstTableID);

void dropTableSchema(table_id_t tableID);
void dropTableSchema(common::table_id_t tableID);

void renameTable(table_id_t tableID, std::string newName);
void renameTable(common::table_id_t tableID, std::string newName);

void addProperty(table_id_t tableID, std::string propertyName, DataType dataType);
void addProperty(
common::table_id_t tableID, std::string propertyName, common::DataType dataType);

void dropProperty(table_id_t tableID, property_id_t propertyID);
void dropProperty(common::table_id_t tableID, common::property_id_t propertyID);

void renameProperty(table_id_t tableID, property_id_t propertyID, std::string newName);
void renameProperty(
common::table_id_t tableID, common::property_id_t propertyID, std::string newName);

std::unordered_set<RelTableSchema*> getAllRelTableSchemasContainBoundTable(
table_id_t boundTableID);
common::table_id_t boundTableID);

protected:
std::unique_ptr<function::BuiltInVectorOperations> builtInVectorOperations;
Expand Down
Loading