From 7447b355abb3e3bafc13969858ce31d12011f5f1 Mon Sep 17 00:00:00 2001 From: rfdavid Date: Tue, 4 Jul 2023 13:26:06 -0400 Subject: [PATCH] Bind copy and planner --- CMakeLists.txt | 3 +++ examples/cpp/main.cpp | 17 ++++++++---- src/binder/bind/bind_copy.cpp | 7 +++-- src/include/binder/copy/bound_copy.h | 7 ++--- src/include/parser/copy.h | 2 +- src/include/planner/join_order_enumerator.h | 2 ++ src/include/planner/planner.h | 10 ++++--- src/planner/planner.cpp | 30 +++++++++++++++------ 8 files changed, 55 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 639365e32e9..95ba12341ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,3 +178,6 @@ elseif (${BUILD_BENCHMARK}) add_subdirectory(test/test_helper) endif() add_subdirectory(tools) + +# REMOVE ME +add_subdirectory(examples/cpp) diff --git a/examples/cpp/main.cpp b/examples/cpp/main.cpp index 48c441763a8..a2f594e530d 100644 --- a/examples/cpp/main.cpp +++ b/examples/cpp/main.cpp @@ -4,17 +4,24 @@ using namespace kuzu::main; int main() { - auto database = std::make_unique("" /* fill db path */); + auto database = std::make_unique("/tmp/puffer_fish" /* fill db path */); auto connection = std::make_unique(database.get()); - // Create schema. + connection->query("CREATE NODE TABLE Person(name STRING, age INT64, PRIMARY KEY(name));"); - // Create nodes. connection->query("CREATE (:Person {name: 'Alice', age: 25});"); connection->query("CREATE (:Person {name: 'Bob', age: 30});"); + connection->query("COPY Person TO '/tmp/out.csv';"); + + // Create schema. + //connection->query("CREATE NODE TABLE Person(name STRING, age INT64, PRIMARY KEY(name));"); + // Create nodes. + //connection->query("CREATE (:Person {name: 'Alice', age: 25});"); + //connection->query("CREATE (:Person {name: 'Bob', age: 30});"); + // Execute a simple query. - auto result = connection->query("MATCH (a:Person) RETURN a.name AS NAME, a.age AS AGE;"); + //auto result = connection->query("MATCH (a:Person) RETURN a.name AS NAME, a.age AS AGE;"); // Print query result. - std::cout << result->toString(); + //std::cout << result->toString(); } diff --git a/src/binder/bind/bind_copy.cpp b/src/binder/bind/bind_copy.cpp index 922ab999cef..4be200acc7e 100644 --- a/src/binder/bind/bind_copy.cpp +++ b/src/binder/bind/bind_copy.cpp @@ -14,7 +14,7 @@ std::unique_ptr Binder::bindCopyClause(const Statement& statemen auto& copyCSV = (Copy&)statement; auto catalogContent = catalog.getReadOnlyVersion(); auto tableName = copyCSV.getTableName(); - auto copyDirection = copyCSV.getDirection(); + auto copyDirection = copyCSV.getCopyDirection(); validateTableExist(catalog, tableName); auto tableID = catalogContent->getTableID(tableName); auto csvReaderConfig = bindParsingOptions(copyCSV.getParsingOptions()); @@ -43,7 +43,10 @@ std::unique_ptr Binder::bindCopyClause(const Statement& statemen tableSchema->tableName)); } } - return std::make_unique( + + std::shared_ptr node = createQueryNode(InternalKeyword::ANONYMOUS, std::vector{tableID}); + + return std::make_unique(node, CopyDescription(copyDirection, boundFilePaths, csvReaderConfig, actualFileType), tableID, tableName); } diff --git a/src/include/binder/copy/bound_copy.h b/src/include/binder/copy/bound_copy.h index ea6b28bc899..9472b82150d 100644 --- a/src/include/binder/copy/bound_copy.h +++ b/src/include/binder/copy/bound_copy.h @@ -13,14 +13,15 @@ namespace binder { class BoundCopy : public BoundStatement { public: - BoundCopy( + BoundCopy(std::shared_ptr nodeExpression, common::CopyDescription copyDescription, common::table_id_t tableID, std::string tableName) : BoundStatement{common::StatementType::COPY, BoundStatementResult::createSingleStringColumnResult()}, + nodeExpression{std::move(nodeExpression)}, copyDescription{std::move(copyDescription)}, tableID{tableID}, tableName{std::move( tableName)} {} - inline std::shared_ptr getNodeOrRelExpression() const { return nodeOrRelExpression; } + inline std::shared_ptr getNodeExpression() const { return nodeExpression; } inline common::CopyDescription getCopyDescription() const { return copyDescription; } @@ -35,7 +36,7 @@ class BoundCopy : public BoundStatement { std::string tableName; // CopyTo only - std::shared_ptr nodeOrRelExpression; + std::shared_ptr nodeExpression; expression_vector expressionsToCopy; }; diff --git a/src/include/parser/copy.h b/src/include/parser/copy.h index 68b5bcc4690..bde673b04f5 100644 --- a/src/include/parser/copy.h +++ b/src/include/parser/copy.h @@ -26,7 +26,7 @@ class Copy : public Statement { return &parsingOptions; } inline common::CopyDescription::FileType getFileType() const { return fileType; } - inline common::CopyDescription::CopyDirection getDirection() const { return copyDirection; } + inline common::CopyDescription::CopyDirection getCopyDirection() const { return copyDirection; } private: common::CopyDescription::CopyDirection copyDirection; diff --git a/src/include/planner/join_order_enumerator.h b/src/include/planner/join_order_enumerator.h index 35be4b47ab2..85563dc33f2 100644 --- a/src/include/planner/join_order_enumerator.h +++ b/src/include/planner/join_order_enumerator.h @@ -19,6 +19,8 @@ class JoinOrderEnumeratorContext; * filter push down */ class JoinOrderEnumerator { + friend class Planner; + public: JoinOrderEnumerator(const catalog::Catalog& catalog, QueryPlanner* queryPlanner) : catalog{catalog}, diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index 38a67cc8406..e940f80602a 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -36,11 +36,13 @@ class Planner { const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, const storage::RelsStatistics& relsStatistics, const BoundStatement& statement); - static std::unique_ptr planCopy( - const catalog::Catalog& catalog, const BoundStatement& statement); + static std::unique_ptr planCopy(const catalog::Catalog& catalog, + const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, const storage::RelsStatistics& relsStatistics, + const BoundStatement& statement); - static std::unique_ptr planCopyTo( - const catalog::Catalog& catalog, const BoundStatement& statement); + static std::unique_ptr planCopyTo(const catalog::Catalog& catalog, + const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, const storage::RelsStatistics& relsStatistics, + const BoundStatement& statement); static std::unique_ptr planCopyFrom( const catalog::Catalog& catalog, const BoundStatement& statement); diff --git a/src/planner/planner.cpp b/src/planner/planner.cpp index b16e878fa81..8a44e5cb60b 100644 --- a/src/planner/planner.cpp +++ b/src/planner/planner.cpp @@ -24,7 +24,9 @@ #include "planner/logical_plan/logical_operator/logical_rename_table.h" #include "planner/logical_plan/logical_operator/logical_standalone_call.h" +// TODO: will be reviewed #include "planner/join_order_enumerator.h" +#include "planner/logical_plan/logical_operator/logical_scan_node.h" using namespace kuzu::catalog; using namespace kuzu::common; @@ -48,7 +50,7 @@ std::unique_ptr Planner::getBestPlan(const Catalog& catalog, plan = planCreateRelTable(statement); } break; case StatementType::COPY: { - plan = planCopy(catalog, statement); + plan = planCopy(catalog, nodesStatistics, relsStatistics, statement); } break; case StatementType::DROP_TABLE: { plan = planDropTable(statement); @@ -166,14 +168,15 @@ std::unique_ptr Planner::planRenameProperty(const BoundStatement& s return plan; } -std::unique_ptr Planner::planCopy( - const catalog::Catalog& catalog, const BoundStatement& statement) { +std::unique_ptr Planner::planCopy(const Catalog& catalog, + const NodesStatisticsAndDeletedIDs& nodesStatistics, const RelsStatistics& relsStatistics, + const BoundStatement& statement) { auto& boundCopyStatement = (BoundCopy&)statement; if (boundCopyStatement.getCopyDescription().copyDirection == CopyDescription::CopyDirection::FROM) { return planCopyFrom(catalog, statement); } else { - return planCopyTo(catalog, statement); + return planCopyTo(catalog, nodesStatistics, relsStatistics, statement); } } @@ -200,13 +203,24 @@ std::unique_ptr Planner::planCopyFrom( return plan; } -std::unique_ptr Planner::planCopyTo( - const catalog::Catalog& catalog, const BoundStatement& statement) { +std::unique_ptr Planner::planCopyTo(const Catalog& catalog, + const NodesStatisticsAndDeletedIDs& nodesStatistics, const RelsStatistics& relsStatistics, + const BoundStatement& statement) { auto plan = std::make_unique(); auto& boundCopyStatement = (BoundCopy&)statement; - auto nodeOrRelExpression = boundCopyStatement.getNodeOrRelExpression(); - appendScanNodeID(nodeOrRelExpression, plan); + auto planner = QueryPlanner(catalog, nodesStatistics, relsStatistics); + JoinOrderEnumerator joinOrderEnumerator(catalog, &planner); + + auto nodeExpression = boundCopyStatement.getNodeExpression(); + + +/* auto scan = make_shared(nodeExpression); + scan->computeFactorizedSchema(); + plan->setLastOperator(std::move(scan)); +*/ + + joinOrderEnumerator.appendScanNodeID(nodeExpression, *plan); auto copyTo = make_shared(boundCopyStatement.getCopyDescription(), boundCopyStatement.getTableID(), boundCopyStatement.getTableName(),