Skip to content

Commit

Permalink
add remove unnecessary join optimizer
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Mar 1, 2023
1 parent 1354a79 commit 7ab0b94
Show file tree
Hide file tree
Showing 14 changed files with 215 additions and 113 deletions.
33 changes: 18 additions & 15 deletions src/binder/bind/bind_graph_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,21 @@ std::unique_ptr<QueryGraph> Binder::bindPatternElement(
return queryGraph;
}

// E.g. MATCH (:person)-[:studyAt]->(:person) ...
static void validateNodeRelConnectivity(const Catalog& catalog_, const RelExpression& rel,
const NodeExpression& srcNode, const NodeExpression& dstNode) {
std::set<std::pair<table_id_t, table_id_t>> srcDstTableIDs;
for (auto relTableID : rel.getTableIDs()) {
static std::vector<table_id_t> pruneRelTableIDs(const Catalog& catalog_,
const std::vector<table_id_t>& relTableIDs, const NodeExpression& srcNode,
const NodeExpression& dstNode) {
auto srcNodeTableIDs = srcNode.getTableIDsSet();
auto dstNodeTableIDs = dstNode.getTableIDsSet();
std::vector<table_id_t> result;
for (auto& relTableID : relTableIDs) {
auto relTableSchema = catalog_.getReadOnlyVersion()->getRelTableSchema(relTableID);
srcDstTableIDs.insert({relTableSchema->srcTableID, relTableSchema->dstTableID});
}
for (auto srcTableID : srcNode.getTableIDs()) {
for (auto dstTableID : dstNode.getTableIDs()) {
if (srcDstTableIDs.contains(std::make_pair(srcTableID, dstTableID))) {
return;
}
if (!srcNodeTableIDs.contains(relTableSchema->srcTableID) ||
!dstNodeTableIDs.contains(relTableSchema->dstTableID)) {
continue;
}
result.push_back(relTableID);
}
throw BinderException("Nodes " + srcNode.toString() + " and " + dstNode.toString() +
" are not connected through rel " + rel.toString() + ".");
return result;
}

static std::vector<std::pair<std::string, std::vector<Property>>> getPropertyNameAndSchemasPairs(
Expand Down Expand Up @@ -124,12 +122,17 @@ void Binder::bindQueryRel(const RelPattern& relPattern,
if (srcNode->getUniqueName() == dstNode->getUniqueName()) {
throw BinderException("Self-loop rel " + parsedName + " is not supported.");
}
// prune rel table IDs
tableIDs = pruneRelTableIDs(catalog, tableIDs, *srcNode, *dstNode);
if (tableIDs.empty()) {
throw BinderException("Nodes " + srcNode->toString() + " and " + dstNode->toString() +
" are not connected through rel " + parsedName + ".");
}
// bind variable length
auto [lowerBound, upperBound] = bindVariableLengthRelBound(relPattern);
auto queryRel = make_shared<RelExpression>(getUniqueExpressionName(parsedName), parsedName,
tableIDs, srcNode, dstNode, lowerBound, upperBound);
queryRel->setAlias(parsedName);
validateNodeRelConnectivity(catalog, *queryRel, *srcNode, *dstNode);
// resolve properties associate with rel table
std::vector<RelTableSchema*> relTableSchemas;
for (auto tableID : tableIDs) {
Expand Down
2 changes: 2 additions & 0 deletions src/catalog/catalog_structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "common/exception.h"
#include "common/utils.h"

using namespace kuzu::common;

namespace kuzu {
namespace catalog {

Expand Down
7 changes: 5 additions & 2 deletions src/include/binder/expression/node_rel_expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,18 @@ class NodeOrRelExpression : public Expression {
virtual ~NodeOrRelExpression() override = default;

inline void addTableIDs(const std::vector<common::table_id_t>& tableIDsToAdd) {
auto tableIDsMap = std::unordered_set<common::table_id_t>(tableIDs.begin(), tableIDs.end());
auto tableIDsSet = getTableIDsSet();
for (auto tableID : tableIDsToAdd) {
if (!tableIDsMap.contains(tableID)) {
if (!tableIDsSet.contains(tableID)) {
tableIDs.push_back(tableID);
}
}
}
inline bool isMultiLabeled() const { return tableIDs.size() > 1; }
inline std::vector<common::table_id_t> getTableIDs() const { return tableIDs; }
inline std::unordered_set<common::table_id_t> getTableIDsSet() const {
return {tableIDs.begin(), tableIDs.end()};
}
inline common::table_id_t getSingleTableID() const {
assert(tableIDs.size() == 1);
return tableIDs[0];
Expand Down
87 changes: 47 additions & 40 deletions src/include/catalog/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,36 +34,36 @@ class CatalogContent {
/**
* Node and Rel table functions.
*/
table_id_t addNodeTableSchema(std::string tableName, property_id_t primaryKeyId,
common::table_id_t addNodeTableSchema(std::string tableName, common::property_id_t primaryKeyId,
std::vector<PropertyNameDataType> propertyDefinitions);

table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
const std::vector<PropertyNameDataType>& propertyDefinitions, table_id_t srcTableID,
table_id_t dstTableID);
common::table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
const std::vector<PropertyNameDataType>& propertyDefinitions, common::table_id_t srcTableID,
common::table_id_t dstTableID);

inline bool containNodeTable(table_id_t tableID) const {
inline bool containNodeTable(common::table_id_t tableID) const {
return nodeTableSchemas.contains(tableID);
}
inline bool containRelTable(table_id_t tableID) const {
inline bool containRelTable(common::table_id_t tableID) const {
return relTableSchemas.contains(tableID);
}
inline bool containTable(const std::string& name) const {
return containNodeTable(name) || containRelTable(name);
}

inline std::string getTableName(table_id_t tableID) const {
inline std::string getTableName(common::table_id_t tableID) const {
return getTableSchema(tableID)->tableName;
}

inline NodeTableSchema* getNodeTableSchema(table_id_t tableID) const {
inline NodeTableSchema* getNodeTableSchema(common::table_id_t tableID) const {
assert(containNodeTable(tableID));
return nodeTableSchemas.at(tableID).get();
}
inline RelTableSchema* getRelTableSchema(table_id_t tableID) const {
inline RelTableSchema* getRelTableSchema(common::table_id_t tableID) const {
assert(containRelTable(tableID));
return relTableSchemas.at(tableID).get();
}
inline TableSchema* getTableSchema(table_id_t tableID) const {
inline TableSchema* getTableSchema(common::table_id_t tableID) const {
assert(containRelTable(tableID) || containNodeTable(tableID));
return nodeTableSchemas.contains(tableID) ?
(TableSchema*)nodeTableSchemas.at(tableID).get() :
Expand All @@ -77,11 +77,12 @@ class CatalogContent {
return relTableNameToIDMap.contains(tableName);
}

inline table_id_t getTableID(const std::string& tableName) const {
inline common::table_id_t getTableID(const std::string& tableName) const {
return nodeTableNameToIDMap.contains(tableName) ? nodeTableNameToIDMap.at(tableName) :
relTableNameToIDMap.at(tableName);
}
inline bool isSingleMultiplicityInDirection(table_id_t tableID, RelDirection direction) const {
inline bool isSingleMultiplicityInDirection(
common::table_id_t tableID, common::RelDirection direction) const {
return relTableSchemas.at(tableID)->isSingleMultiplicityInDirection(direction);
}

Expand All @@ -90,53 +91,57 @@ class CatalogContent {
*/
// getNodeProperty and getRelProperty should be called after checking if property exists
// (containNodeProperty and containRelProperty).
const Property& getNodeProperty(table_id_t tableID, const std::string& propertyName) const;
const Property& getRelProperty(table_id_t tableID, const std::string& propertyName) const;
const Property& getNodeProperty(
common::table_id_t tableID, const std::string& propertyName) const;
const Property& getRelProperty(
common::table_id_t tableID, const std::string& propertyName) const;

std::vector<Property> getAllNodeProperties(table_id_t tableID) const;
inline const std::vector<Property>& getRelProperties(table_id_t tableID) const {
std::vector<Property> getAllNodeProperties(common::table_id_t tableID) const;
inline const std::vector<Property>& getRelProperties(common::table_id_t tableID) const {
return relTableSchemas.at(tableID)->properties;
}
inline std::vector<table_id_t> getNodeTableIDs() const {
std::vector<table_id_t> nodeTableIDs;
inline std::vector<common::table_id_t> getNodeTableIDs() const {
std::vector<common::table_id_t> nodeTableIDs;
for (auto& [tableID, _] : nodeTableSchemas) {
nodeTableIDs.push_back(tableID);
}
return nodeTableIDs;
}
inline std::vector<table_id_t> getRelTableIDs() const {
std::vector<table_id_t> relTableIDs;
inline std::vector<common::table_id_t> getRelTableIDs() const {
std::vector<common::table_id_t> relTableIDs;
for (auto& [tableID, _] : relTableSchemas) {
relTableIDs.push_back(tableID);
}
return relTableIDs;
}
inline std::unordered_map<table_id_t, std::unique_ptr<NodeTableSchema>>& getNodeTableSchemas() {
inline std::unordered_map<common::table_id_t, std::unique_ptr<NodeTableSchema>>&
getNodeTableSchemas() {
return nodeTableSchemas;
}
inline std::unordered_map<table_id_t, std::unique_ptr<RelTableSchema>>& getRelTableSchemas() {
inline std::unordered_map<common::table_id_t, std::unique_ptr<RelTableSchema>>&
getRelTableSchemas() {
return relTableSchemas;
}

void dropTableSchema(table_id_t tableID);
void dropTableSchema(common::table_id_t tableID);

void renameTable(table_id_t tableID, std::string newName);
void renameTable(common::table_id_t tableID, std::string newName);

void saveToFile(const std::string& directory, common::DBFileType dbFileType);
void readFromFile(const std::string& directory, common::DBFileType dbFileType);

private:
inline table_id_t assignNextTableID() { return nextTableID++; }
inline common::table_id_t assignNextTableID() { return nextTableID++; }

private:
std::shared_ptr<spdlog::logger> logger;
std::unordered_map<table_id_t, std::unique_ptr<NodeTableSchema>> nodeTableSchemas;
std::unordered_map<table_id_t, std::unique_ptr<RelTableSchema>> relTableSchemas;
std::unordered_map<common::table_id_t, std::unique_ptr<NodeTableSchema>> nodeTableSchemas;
std::unordered_map<common::table_id_t, std::unique_ptr<RelTableSchema>> relTableSchemas;
// These two maps are maintained as caches. They are not serialized to the catalog file, but
// is re-constructed when reading from the catalog file.
std::unordered_map<std::string, table_id_t> nodeTableNameToIDMap;
std::unordered_map<std::string, table_id_t> relTableNameToIDMap;
table_id_t nextTableID;
std::unordered_map<std::string, common::table_id_t> nodeTableNameToIDMap;
std::unordered_map<std::string, common::table_id_t> relTableNameToIDMap;
common::table_id_t nextTableID;
};

class Catalog {
Expand Down Expand Up @@ -180,25 +185,27 @@ class Catalog {

common::ExpressionType getFunctionType(const std::string& name) const;

table_id_t addNodeTableSchema(std::string tableName, property_id_t primaryKeyId,
common::table_id_t addNodeTableSchema(std::string tableName, common::property_id_t primaryKeyId,
std::vector<PropertyNameDataType> propertyDefinitions);

table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
const std::vector<PropertyNameDataType>& propertyDefinitions, table_id_t srcTableID,
table_id_t dstTableID);
common::table_id_t addRelTableSchema(std::string tableName, RelMultiplicity relMultiplicity,
const std::vector<PropertyNameDataType>& propertyDefinitions, common::table_id_t srcTableID,
common::table_id_t dstTableID);

void dropTableSchema(table_id_t tableID);
void dropTableSchema(common::table_id_t tableID);

void renameTable(table_id_t tableID, std::string newName);
void renameTable(common::table_id_t tableID, std::string newName);

void addProperty(table_id_t tableID, std::string propertyName, DataType dataType);
void addProperty(
common::table_id_t tableID, std::string propertyName, common::DataType dataType);

void dropProperty(table_id_t tableID, property_id_t propertyID);
void dropProperty(common::table_id_t tableID, common::property_id_t propertyID);

void renameProperty(table_id_t tableID, property_id_t propertyID, std::string newName);
void renameProperty(
common::table_id_t tableID, common::property_id_t propertyID, std::string newName);

std::unordered_set<RelTableSchema*> getAllRelTableSchemasContainBoundTable(
table_id_t boundTableID);
common::table_id_t boundTableID);

protected:
std::unique_ptr<function::BuiltInVectorOperations> builtInVectorOperations;
Expand Down
Loading

0 comments on commit 7ab0b94

Please sign in to comment.