From 1803e7250c8278d6e7565e18c68d58df1b571503 Mon Sep 17 00:00:00 2001 From: ziyi chen Date: Sun, 17 Mar 2024 22:09:01 -0400 Subject: [PATCH] Implement catalog cache for postgres scanner --- extension/duckdb_scanner/CMakeLists.txt | 4 +- .../duckdb_scanner/src/duckdb_catalog.cpp | 29 +++++++ extension/duckdb_scanner/src/duckdb_scan.cpp | 43 +++-------- .../duckdb_scanner/src/duckdb_storage.cpp | 75 ++++++++++++++++++- .../src/duckdb_table_catalog_entry.cpp | 20 +++++ .../src/include/duckdb_catalog.h | 33 ++++++++ .../duckdb_scanner/src/include/duckdb_scan.h | 6 +- .../src/include/duckdb_table_catalog_entry.h | 33 ++++++++ .../test/test_files/duckdb_scanner.test | 6 +- src/binder/bind/bind_reading_clause.cpp | 7 +- src/catalog/catalog_content.cpp | 19 +++-- .../binder/ddl/bound_create_table_info.h | 57 ++++++++------ src/include/catalog/catalog_content.h | 20 +++-- .../catalog/catalog_entry/catalog_entry.h | 2 +- .../catalog_entry/catalog_entry_type.h | 1 + .../catalog_entry/table_catalog_entry.h | 6 +- src/include/catalog/catalog_set.h | 2 +- src/include/common/enums/table_type.h | 1 + src/include/common/types/ku_string.h | 14 ++-- src/include/common/vector/value_vector.h | 3 +- src/include/main/attached_database.h | 11 +-- src/include/main/client_context.h | 2 +- src/include/main/database_manager.h | 4 +- src/include/storage/storage_extension.h | 7 +- src/main/database_manager.cpp | 14 ++-- src/processor/operator/attach_database.cpp | 3 +- 26 files changed, 311 insertions(+), 111 deletions(-) create mode 100644 extension/duckdb_scanner/src/duckdb_catalog.cpp create mode 100644 extension/duckdb_scanner/src/duckdb_table_catalog_entry.cpp create mode 100644 extension/duckdb_scanner/src/include/duckdb_catalog.h create mode 100644 extension/duckdb_scanner/src/include/duckdb_table_catalog_entry.h diff --git a/extension/duckdb_scanner/CMakeLists.txt b/extension/duckdb_scanner/CMakeLists.txt index 7f33580c469..8adca714d24 100644 --- a/extension/duckdb_scanner/CMakeLists.txt +++ b/extension/duckdb_scanner/CMakeLists.txt @@ -10,7 +10,9 @@ add_library(duckdb_scanner src/duckdb_scanner_extension.cpp src/duckdb_storage.cpp src/duckdb_scan.cpp - src/duckdb_type_converter.cpp) + src/duckdb_type_converter.cpp + src/duckdb_catalog.cpp + src/duckdb_table_catalog_entry.cpp) set_target_properties(duckdb_scanner PROPERTIES OUTPUT_NAME duckdb_scanner diff --git a/extension/duckdb_scanner/src/duckdb_catalog.cpp b/extension/duckdb_scanner/src/duckdb_catalog.cpp new file mode 100644 index 00000000000..1bf5cb263fc --- /dev/null +++ b/extension/duckdb_scanner/src/duckdb_catalog.cpp @@ -0,0 +1,29 @@ +#include "duckdb_catalog.h" + +namespace kuzu { +namespace duckdb_scanner { + +common::table_id_t DuckDBCatalogContent::createForeignTable( + const binder::BoundCreateTableInfo& info) { + auto tableID = assignNextTableID(); + auto extraInfo = common::ku_dynamic_cast(info.extraInfo.get()); + std::vector columnTypes; + std::vector columnNames; + for (auto& propertyInfo : extraInfo->propertyInfos) { + columnNames.push_back(propertyInfo.name); + columnTypes.push_back(propertyInfo.type); + } + DuckDBScanBindData bindData(common::stringFormat("SELECT * FROM {}", info.tableName), + extraInfo->dbPath, std::move(columnTypes), std::move(columnNames)); + auto tableEntry = std::make_unique( + info.tableName, tableID, getScanFunction(std::move(bindData))); + for (auto& propertyInfo : extraInfo->propertyInfos) { + tableEntry->addProperty(propertyInfo.name, propertyInfo.type.copy()); + } + tables->createEntry(std::move(tableEntry)); + return tableID; +} + +} // namespace duckdb_scanner +} // namespace kuzu diff --git a/extension/duckdb_scanner/src/duckdb_scan.cpp b/extension/duckdb_scanner/src/duckdb_scan.cpp index b2225972f9a..95b0aa7ee9b 100644 --- a/extension/duckdb_scanner/src/duckdb_scan.cpp +++ b/extension/duckdb_scanner/src/duckdb_scan.cpp @@ -2,7 +2,6 @@ #include "common/exception/binder.h" #include "common/types/types.h" -#include "duckdb_type_converter.h" #include "function/table/bind_input.h" using namespace kuzu::function; @@ -38,8 +37,8 @@ struct DuckDBScanFunction { static common::offset_t tableFunc( function::TableFuncInput& input, function::TableFuncOutput& output); - static std::unique_ptr bindFunc( - std::string dbPath, main::ClientContext* /*context*/, function::TableFuncBindInput* input); + static std::unique_ptr bindFunc(DuckDBScanBindData bindData, + main::ClientContext* /*context*/, function::TableFuncBindInput* input); static std::unique_ptr initSharedState( function::TableFunctionInitInput& input); @@ -211,40 +210,18 @@ common::offset_t DuckDBScanFunction::tableFunc( return output.dataChunk.state->selVector->selectedSize; } -std::unique_ptr DuckDBScanFunction::bindFunc(std::string dbPath, - main::ClientContext* /*clientContext*/, function::TableFuncBindInput* input) { - auto tableName = input->inputs[0].getValue(); - duckdb::DBConfig dbConfig{true /* read_only */}; - duckdb::DuckDB db(dbPath, &dbConfig); - duckdb::Connection conn(db); - auto result = conn.Query( - common::stringFormat("select data_type,column_name from information_schema.columns where " - "table_name = '{}' and table_schema='main';", - tableName)); - if (result->RowCount() == 0) { - throw common::BinderException( - common::stringFormat("No table named: {} in database.", tableName)); - } - std::vector columnTypes; - std::vector columnNames; - columnTypes.reserve(result->RowCount()); - columnNames.reserve(result->RowCount()); - for (auto i = 0u; i < result->RowCount(); i++) { - columnTypes.push_back( - DuckDBTypeConverter::convertDuckDBType(result->GetValue(0, i).GetValue())); - columnNames.push_back(result->GetValue(1, i).GetValue()); - } - auto query = common::stringFormat("SELECT * FROM {}", tableName); - return std::make_unique( - query, dbPath, std::move(columnTypes), std::move(columnNames)); +std::unique_ptr DuckDBScanFunction::bindFunc( + DuckDBScanBindData bindData, main::ClientContext* /*clientContext*/, + function::TableFuncBindInput* /*input*/) { + return bindData.copy(); } -TableFunction getScanFunction(std::string dbPath) { +TableFunction getScanFunction(DuckDBScanBindData bindData) { return TableFunction(DuckDBScanFunction::DUCKDB_SCAN_FUNC_NAME, DuckDBScanFunction::tableFunc, - std::bind( - DuckDBScanFunction::bindFunc, dbPath, std::placeholders::_1, std::placeholders::_2), + std::bind(DuckDBScanFunction::bindFunc, std::move(bindData), std::placeholders::_1, + std::placeholders::_2), DuckDBScanFunction::initSharedState, DuckDBScanFunction::initLocalState, - std::vector{LogicalTypeID::STRING}); + std::vector{}); } } // namespace duckdb_scanner diff --git a/extension/duckdb_scanner/src/duckdb_storage.cpp b/extension/duckdb_scanner/src/duckdb_storage.cpp index b0f130d38a4..1d88fe7f153 100644 --- a/extension/duckdb_scanner/src/duckdb_storage.cpp +++ b/extension/duckdb_scanner/src/duckdb_storage.cpp @@ -1,11 +1,82 @@ #include "duckdb_storage.h" +#include "binder/ddl/bound_create_table_info.h" +#include "catalog/catalog_entry/table_catalog_entry.h" +#include "common/exception/binder.h" +#include "duckdb_catalog.h" #include "duckdb_scan.h" +#include "duckdb_type_converter.h" namespace kuzu { namespace duckdb_scanner { -std::unique_ptr attachDuckDB(std::string dbName, std::string dbPath) { +static void getTableInfo(duckdb::Connection& con, std::string tableName, + std::vector& columnTypes, std::vector& columnNames) { + auto result = con.Query( + common::stringFormat("select data_type,column_name from information_schema.columns where " + "table_name = '{}' and table_schema='main';", + tableName)); + if (result->RowCount() == 0) { + throw common::BinderException( + common::stringFormat("No table named: {} in database.", tableName)); + } + columnTypes.reserve(result->RowCount()); + columnNames.reserve(result->RowCount()); + for (auto i = 0u; i < result->RowCount(); i++) { + columnTypes.push_back( + DuckDBTypeConverter::convertDuckDBType(result->GetValue(0, i).GetValue())); + columnNames.push_back(result->GetValue(1, i).GetValue()); + } +} + +std::unique_ptr getCreateTableInfo( + duckdb::Connection& con, std::string tableName, std::string dbPath) { + std::vector columnTypes; + std::vector columnNames; + try { + getTableInfo(con, tableName, columnTypes, columnNames); + } catch (common::BinderException&) { return nullptr; } + std::vector propertyInfos; + for (auto i = 0u; i < columnNames.size(); i++) { + auto propertyInfo = binder::PropertyInfo(columnNames[i], columnTypes[i]); + propertyInfos.push_back(std::move(propertyInfo)); + } + return std::make_unique(common::TableType::FOREIGN, tableName, + std::make_unique( + std::move(dbPath), std::move(propertyInfos))); +} + +std::unique_ptr createCatalog( + std::string dbPath, main::ClientContext* context) { + duckdb::DuckDB db(dbPath); + duckdb::Connection con(db); + auto query = "select table_name from information_schema.tables where table_schema = 'main';"; + auto result = con.SendQuery(query); + std::unique_ptr resultChunk; + try { + resultChunk = result->Fetch(); + } catch (std::exception& e) { return 0; } + if (resultChunk == nullptr) { + return 0; + } + auto tableNamesVector = std::make_unique( + common::LogicalTypeID::STRING, context->getMemoryManager()); + duckdb_conversion_func_t conversionFunc; + getDuckDBVectorConversionFunc(common::PhysicalTypeID::STRING, conversionFunc); + conversionFunc(resultChunk->data[0], *tableNamesVector, resultChunk->size()); + std::unique_ptr catalogContent = std::make_unique(); + for (auto i = 0u; i < resultChunk->size(); i++) { + auto tableName = tableNamesVector->getValue(i).getAsString(); + auto createTableInfo = getCreateTableInfo(con, tableName, dbPath); + if (createTableInfo != nullptr) { + catalogContent->createForeignTable(*createTableInfo); + } + } + return catalogContent; +} + +std::unique_ptr attachDuckDB( + std::string dbName, std::string dbPath, main::ClientContext* clientContext) { if (dbName == "") { if (dbPath.find('.') != std::string::npos) { auto fileNamePos = dbPath.find_last_of('/') + 1; @@ -14,7 +85,7 @@ std::unique_ptr attachDuckDB(std::string dbName, std::st dbName = dbPath; } } - return std::make_unique(dbName, getScanFunction(dbPath)); + return std::make_unique(dbName, createCatalog(dbPath, clientContext)); } DuckDBStorageExtension::DuckDBStorageExtension() : StorageExtension{attachDuckDB} {} diff --git a/extension/duckdb_scanner/src/duckdb_table_catalog_entry.cpp b/extension/duckdb_scanner/src/duckdb_table_catalog_entry.cpp new file mode 100644 index 00000000000..caf1267e7a2 --- /dev/null +++ b/extension/duckdb_scanner/src/duckdb_table_catalog_entry.cpp @@ -0,0 +1,20 @@ +#include "duckdb_table_catalog_entry.h" + +namespace kuzu { +namespace catalog { + +DuckDBTableCatalogEntry::DuckDBTableCatalogEntry( + std::string name, common::table_id_t tableID, function::TableFunction scanFunction) + : TableCatalogEntry{CatalogEntryType::FOREIGN_TABLE_ENTRY, std::move(name), tableID}, + scanFunction{std::move(scanFunction)} {} + +common::TableType DuckDBTableCatalogEntry::getTableType() const { + return common::TableType::FOREIGN; +} + +std::unique_ptr DuckDBTableCatalogEntry::copy() const { + return std::make_unique(*this); +} + +} // namespace catalog +} // namespace kuzu diff --git a/extension/duckdb_scanner/src/include/duckdb_catalog.h b/extension/duckdb_scanner/src/include/duckdb_catalog.h new file mode 100644 index 00000000000..8ba6c0a3dd7 --- /dev/null +++ b/extension/duckdb_scanner/src/include/duckdb_catalog.h @@ -0,0 +1,33 @@ +#pragma once + +#include "catalog/catalog_content.h" +#include "catalog/catalog_entry/table_catalog_entry.h" +#include "duckdb_scan.h" +#include "duckdb_table_catalog_entry.h" + +namespace kuzu { +namespace duckdb_scanner { + +struct BoundExtraCreateDuckDBTableInfo final : public binder::BoundExtraCreateTableInfo { + std::string dbPath; + + BoundExtraCreateDuckDBTableInfo( + std::string dbPath, std::vector propertyInfos) + : BoundExtraCreateTableInfo{std::move(propertyInfos)}, dbPath{std::move(dbPath)} {} + BoundExtraCreateDuckDBTableInfo(const BoundExtraCreateDuckDBTableInfo& other) + : BoundExtraCreateTableInfo{copyVector(other.propertyInfos)}, dbPath{other.dbPath} {} + + std::unique_ptr copy() const override { + return std::make_unique(*this); + } +}; + +class DuckDBCatalogContent : public catalog::CatalogContent { +public: + DuckDBCatalogContent() : catalog::CatalogContent{nullptr /* vfs */} {} + + common::table_id_t createForeignTable(const binder::BoundCreateTableInfo& info); +}; + +} // namespace duckdb_scanner +} // namespace kuzu diff --git a/extension/duckdb_scanner/src/include/duckdb_scan.h b/extension/duckdb_scanner/src/include/duckdb_scan.h index 94a3eed4374..a783dfeb453 100644 --- a/extension/duckdb_scanner/src/include/duckdb_scan.h +++ b/extension/duckdb_scanner/src/include/duckdb_scan.h @@ -9,6 +9,7 @@ #pragma GCC diagnostic ignored "-Wunused-parameter" // Supress warnings from duckdb.hpp #undef ARROW_FLAG_DICTIONARY_ORDERED +#include "common/types/types.h" #include "duckdb.hpp" #pragma GCC diagnostic pop @@ -35,7 +36,10 @@ struct DuckDBScanSharedState : public function::TableFuncSharedState { std::unique_ptr queryResult; }; -function::TableFunction getScanFunction(std::string dbPath); +void getDuckDBVectorConversionFunc( + common::PhysicalTypeID physicalTypeID, duckdb_conversion_func_t& conversion_func); + +function::TableFunction getScanFunction(DuckDBScanBindData bindData); } // namespace duckdb_scanner } // namespace kuzu diff --git a/extension/duckdb_scanner/src/include/duckdb_table_catalog_entry.h b/extension/duckdb_scanner/src/include/duckdb_table_catalog_entry.h new file mode 100644 index 00000000000..b896fc6ec2a --- /dev/null +++ b/extension/duckdb_scanner/src/include/duckdb_table_catalog_entry.h @@ -0,0 +1,33 @@ +#pragma once + +#include "catalog/catalog_entry/table_catalog_entry.h" +#include "function/table_functions.h" + +namespace kuzu { +namespace catalog { + +class DuckDBTableCatalogEntry final : public TableCatalogEntry { +public: + //===--------------------------------------------------------------------===// + // constructors + //===--------------------------------------------------------------------===// + DuckDBTableCatalogEntry( + std::string name, common::table_id_t tableID, function::TableFunction scanFunction); + + //===--------------------------------------------------------------------===// + // getter & setter + //===--------------------------------------------------------------------===// + common::TableType getTableType() const override; + function::TableFunction getScanFunction() override { return scanFunction; } + + //===--------------------------------------------------------------------===// + // serialization & deserialization + //===--------------------------------------------------------------------===// + std::unique_ptr copy() const override; + +private: + function::TableFunction scanFunction; +}; + +} // namespace catalog +} // namespace kuzu diff --git a/extension/duckdb_scanner/test/test_files/duckdb_scanner.test b/extension/duckdb_scanner/test/test_files/duckdb_scanner.test index 05a6ea6374d..aa5a891cd63 100644 --- a/extension/duckdb_scanner/test/test_files/duckdb_scanner.test +++ b/extension/duckdb_scanner/test/test_files/duckdb_scanner.test @@ -37,7 +37,7 @@ The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie|2544| the movie is very v 49992|50|31.582059|False|2056-05-02||[62,24,94]|[LpQO8OT3x45a]|[[268,281,166],[144,16,126,208,298],[22,287]]|{ID: 936, "name": sGPSafxMAhKiP} -STATEMENT LOAD FROM tinysnb_person1 RETURN *; ---- error -Binder exception: No table named: person1 in database. +Catalog exception: Table: person1 does not exist. -STATEMENT DETACH tinysnb; ---- ok -STATEMENT LOAD FROM tinysnb_person RETURN *; @@ -57,10 +57,6 @@ Binder exception: No database named tinysnb has been attached. -STATEMENT LOAD FROM tinysnb_person RETURN count(*); ---- 1 8 --LOG UnsupportedDuckDBType --STATEMENT LOAD FROM tinysnb_unsupportedType RETURN *; ----- error -Binder exception: Unsupported duckdb type: ENUM('sad', 'ok', 'happy'). -CASE InvalidDuckDBDatabase -STATEMENT LOAD FROM tinysnb1_person RETURN *; diff --git a/src/binder/bind/bind_reading_clause.cpp b/src/binder/bind/bind_reading_clause.cpp index cb06e92091a..36dc3816878 100644 --- a/src/binder/bind/bind_reading_clause.cpp +++ b/src/binder/bind/bind_reading_clause.cpp @@ -179,9 +179,12 @@ std::unique_ptr Binder::bindLoadFrom(const ReadingClause& re throw BinderException{ common::stringFormat("No database named {} has been attached.", dbName)}; } - scanFunction = attachedDB->getScanFunction(); + auto tableName = common::StringUtils::split(objectName, "_")[1]; + auto tableID = attachedDB->getCatalogContent()->getTableID(tableName); + auto tableCatalogEntry = ku_dynamic_cast( + attachedDB->getCatalogContent()->getTableCatalogEntry(tableID)); + scanFunction = tableCatalogEntry->getScanFunction(); bindInput = std::make_unique(); - bindInput->inputs.push_back(Value(common::StringUtils::split(objectName, "_")[1])); } } break; case ScanSourceType::FILE: { diff --git a/src/catalog/catalog_content.cpp b/src/catalog/catalog_content.cpp index bfc5d307242..c687e12fb16 100644 --- a/src/catalog/catalog_content.cpp +++ b/src/catalog/catalog_content.cpp @@ -40,8 +40,9 @@ CatalogContent::CatalogContent(const std::string& directory, VirtualFileSystem* table_id_t CatalogContent::createNodeTable(const binder::BoundCreateTableInfo& info) { table_id_t tableID = assignNextTableID(); - auto extraInfo = ku_dynamic_cast( - info.extraInfo.get()); + auto extraInfo = + ku_dynamic_cast( + info.extraInfo.get()); auto nodeTableEntry = std::make_unique(info.tableName, tableID, extraInfo->primaryKeyIdx); for (auto& propertyInfo : extraInfo->propertyInfos) { @@ -54,8 +55,9 @@ table_id_t CatalogContent::createNodeTable(const binder::BoundCreateTableInfo& i table_id_t CatalogContent::createRelTable(const binder::BoundCreateTableInfo& info) { table_id_t tableID = assignNextTableID(); - auto extraInfo = ku_dynamic_cast( - info.extraInfo.get()); + auto extraInfo = + ku_dynamic_cast( + info.extraInfo.get()); auto srcTableEntry = ku_dynamic_cast( getTableCatalogEntry(extraInfo->srcTableID)); auto dstTableEntry = ku_dynamic_cast( @@ -91,17 +93,18 @@ table_id_t CatalogContent::createRelGroup(const binder::BoundCreateTableInfo& in table_id_t CatalogContent::createRDFGraph(const binder::BoundCreateTableInfo& info) { table_id_t rdfGraphID = assignNextTableID(); - auto extraInfo = ku_dynamic_cast( - info.extraInfo.get()); + auto extraInfo = + ku_dynamic_cast( + info.extraInfo.get()); auto& resourceInfo = extraInfo->resourceInfo; auto& literalInfo = extraInfo->literalInfo; auto& resourceTripleInfo = extraInfo->resourceTripleInfo; auto& literalTripleInfo = extraInfo->literalTripleInfo; auto resourceTripleExtraInfo = - ku_dynamic_cast( + ku_dynamic_cast( resourceTripleInfo.extraInfo.get()); auto literalTripleExtraInfo = - ku_dynamic_cast( + ku_dynamic_cast( literalTripleInfo.extraInfo.get()); // Resource table auto resourceTableID = createNodeTable(resourceInfo); diff --git a/src/include/binder/ddl/bound_create_table_info.h b/src/include/binder/ddl/bound_create_table_info.h index ea9d35f10ea..9a7abde7c76 100644 --- a/src/include/binder/ddl/bound_create_table_info.h +++ b/src/include/binder/ddl/bound_create_table_info.h @@ -11,18 +11,18 @@ enum class RelMultiplicity : uint8_t; } namespace binder { -struct BoundExtraCreateTableInfo { - virtual ~BoundExtraCreateTableInfo() = default; - virtual inline std::unique_ptr copy() const = 0; +struct BoundExtraCreateCatalogEntryInfo { + virtual ~BoundExtraCreateCatalogEntryInfo() = default; + virtual inline std::unique_ptr copy() const = 0; }; struct BoundCreateTableInfo { common::TableType type; std::string tableName; - std::unique_ptr extraInfo; + std::unique_ptr extraInfo; BoundCreateTableInfo(common::TableType type, std::string tableName, - std::unique_ptr extraInfo) + std::unique_ptr extraInfo) : type{type}, tableName{std::move(tableName)}, extraInfo{std::move(extraInfo)} {} EXPLICIT_COPY_DEFAULT_MOVE(BoundCreateTableInfo); @@ -43,44 +43,57 @@ struct PropertyInfo { PropertyInfo(const PropertyInfo& other) : name{other.name}, type{other.type} {} }; -struct BoundExtraCreateNodeTableInfo : public BoundExtraCreateTableInfo { - common::property_id_t primaryKeyIdx; +struct BoundExtraCreateTableInfo : public BoundExtraCreateCatalogEntryInfo { std::vector propertyInfos; + explicit BoundExtraCreateTableInfo(std::vector propertyInfos) + : propertyInfos{std::move(propertyInfos)} {} + + BoundExtraCreateTableInfo(const BoundExtraCreateTableInfo& other) + : BoundExtraCreateTableInfo{copyVector(other.propertyInfos)} {} + + std::unique_ptr copy() const override { + return std::make_unique(*this); + } +}; + +struct BoundExtraCreateNodeTableInfo final : public BoundExtraCreateTableInfo { + common::property_id_t primaryKeyIdx; + BoundExtraCreateNodeTableInfo( common::property_id_t primaryKeyIdx, std::vector propertyInfos) - : primaryKeyIdx{primaryKeyIdx}, propertyInfos{std::move(propertyInfos)} {} + : BoundExtraCreateTableInfo{std::move(propertyInfos)}, primaryKeyIdx{primaryKeyIdx} {} BoundExtraCreateNodeTableInfo(const BoundExtraCreateNodeTableInfo& other) - : primaryKeyIdx{other.primaryKeyIdx}, propertyInfos{copyVector(other.propertyInfos)} {} + : BoundExtraCreateTableInfo{copyVector(other.propertyInfos)}, primaryKeyIdx{ + other.primaryKeyIdx} {} - inline std::unique_ptr copy() const final { + std::unique_ptr copy() const override { return std::make_unique(*this); } }; -struct BoundExtraCreateRelTableInfo : public BoundExtraCreateTableInfo { +struct BoundExtraCreateRelTableInfo final : public BoundExtraCreateTableInfo { common::RelMultiplicity srcMultiplicity; common::RelMultiplicity dstMultiplicity; common::table_id_t srcTableID; common::table_id_t dstTableID; - std::vector propertyInfos; BoundExtraCreateRelTableInfo(common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity, common::table_id_t srcTableID, common::table_id_t dstTableID, std::vector propertyInfos) - : srcMultiplicity{srcMultiplicity}, dstMultiplicity{dstMultiplicity}, - srcTableID{srcTableID}, dstTableID{dstTableID}, propertyInfos{std::move(propertyInfos)} {} + : BoundExtraCreateTableInfo{std::move(propertyInfos)}, srcMultiplicity{srcMultiplicity}, + dstMultiplicity{dstMultiplicity}, srcTableID{srcTableID}, dstTableID{dstTableID} {} BoundExtraCreateRelTableInfo(const BoundExtraCreateRelTableInfo& other) - : srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity}, - srcTableID{other.srcTableID}, dstTableID{other.dstTableID}, propertyInfos{copyVector( - other.propertyInfos)} {} + : BoundExtraCreateTableInfo{copyVector(other.propertyInfos)}, + srcMultiplicity{other.srcMultiplicity}, dstMultiplicity{other.dstMultiplicity}, + srcTableID{other.srcTableID}, dstTableID{other.dstTableID} {} - inline std::unique_ptr copy() const final { + std::unique_ptr copy() const override { return std::make_unique(*this); } }; -struct BoundExtraCreateRelTableGroupInfo : public BoundExtraCreateTableInfo { +struct BoundExtraCreateRelTableGroupInfo final : public BoundExtraCreateCatalogEntryInfo { std::vector infos; explicit BoundExtraCreateRelTableGroupInfo(std::vector infos) @@ -88,12 +101,12 @@ struct BoundExtraCreateRelTableGroupInfo : public BoundExtraCreateTableInfo { BoundExtraCreateRelTableGroupInfo(const BoundExtraCreateRelTableGroupInfo& other) : infos{copyVector(other.infos)} {} - inline std::unique_ptr copy() const final { + inline std::unique_ptr copy() const override { return std::make_unique(*this); } }; -struct BoundExtraCreateRdfGraphInfo : public BoundExtraCreateTableInfo { +struct BoundExtraCreateRdfGraphInfo final : public BoundExtraCreateCatalogEntryInfo { BoundCreateTableInfo resourceInfo; BoundCreateTableInfo literalInfo; BoundCreateTableInfo resourceTripleInfo; @@ -110,7 +123,7 @@ struct BoundExtraCreateRdfGraphInfo : public BoundExtraCreateTableInfo { resourceTripleInfo{other.resourceTripleInfo.copy()}, literalTripleInfo{other.literalTripleInfo.copy()} {} - inline std::unique_ptr copy() const final { + inline std::unique_ptr copy() const override { return std::make_unique(*this); } }; diff --git a/src/include/catalog/catalog_content.h b/src/include/catalog/catalog_content.h index 7cc32f32fda..a1ffe6ad0bf 100644 --- a/src/include/catalog/catalog_content.h +++ b/src/include/catalog/catalog_content.h @@ -18,7 +18,7 @@ class CatalogContent { friend class Catalog; public: - explicit CatalogContent(common::VirtualFileSystem* vfs); + KUZU_API explicit CatalogContent(common::VirtualFileSystem* vfs); CatalogContent(const std::string& directory, common::VirtualFileSystem* vfs); @@ -26,14 +26,20 @@ class CatalogContent { std::unordered_map tableNameToIDMap, common::table_id_t nextTableID, std::unique_ptr functions, common::VirtualFileSystem* vfs) - : tableNameToIDMap{std::move(tableNameToIDMap)}, nextTableID{nextTableID}, vfs{vfs}, - tables{std::move(tables)}, functions{std::move(functions)} {} + : tables{std::move(tables)}, tableNameToIDMap{std::move(tableNameToIDMap)}, + nextTableID{nextTableID}, vfs{vfs}, functions{std::move(functions)} {} + + common::table_id_t getTableID(const std::string& tableName) const; + CatalogEntry* getTableCatalogEntry(common::table_id_t tableID) const; void saveToFile(const std::string& directory, common::FileVersionType dbFileType); void readFromFile(const std::string& directory, common::FileVersionType dbFileType); std::unique_ptr copy() const; +protected: + common::table_id_t assignNextTableID() { return nextTableID++; } + private: // ----------------------------- Functions ---------------------------- void registerBuiltInFunctions(); @@ -46,7 +52,6 @@ class CatalogContent { function::ScalarMacroFunction* getScalarMacroFunction(const std::string& name) const; // ----------------------------- Table entries ---------------------------- - common::table_id_t assignNextTableID() { return nextTableID++; } uint64_t getNumTables() const { return tables->getEntries().size(); } bool containsTable(const std::string& tableName) const; @@ -54,8 +59,6 @@ class CatalogContent { std::string getTableName(common::table_id_t tableID) const; - CatalogEntry* getTableCatalogEntry(common::table_id_t tableID) const; - template std::vector getTableCatalogEntries(CatalogEntryType catalogType) const { std::vector result; @@ -67,7 +70,6 @@ class CatalogContent { return result; } - common::table_id_t getTableID(const std::string& tableName) const; std::vector getTableIDs(CatalogEntryType catalogType) const; common::table_id_t createNodeTable(const binder::BoundCreateTableInfo& info); @@ -77,13 +79,15 @@ class CatalogContent { void dropTable(common::table_id_t tableID); void renameTable(common::table_id_t tableID, const std::string& newName); +protected: + std::unique_ptr tables; + private: // These two maps are maintained as caches. They are not serialized to the catalog file, but // is re-constructed when reading from the catalog file. std::unordered_map tableNameToIDMap; common::table_id_t nextTableID; common::VirtualFileSystem* vfs; - std::unique_ptr tables; std::unique_ptr functions; }; diff --git a/src/include/catalog/catalog_entry/catalog_entry.h b/src/include/catalog/catalog_entry/catalog_entry.h index e2b0ab3e5b2..65e4fedade2 100644 --- a/src/include/catalog/catalog_entry/catalog_entry.h +++ b/src/include/catalog/catalog_entry/catalog_entry.h @@ -9,7 +9,7 @@ namespace kuzu { namespace catalog { -class CatalogEntry { +class KUZU_API CatalogEntry { public: //===--------------------------------------------------------------------===// // constructor & destructor diff --git a/src/include/catalog/catalog_entry/catalog_entry_type.h b/src/include/catalog/catalog_entry/catalog_entry_type.h index e94d9590676..d44db816aca 100644 --- a/src/include/catalog/catalog_entry/catalog_entry_type.h +++ b/src/include/catalog/catalog_entry/catalog_entry_type.h @@ -15,6 +15,7 @@ enum class CatalogEntryType : uint8_t { SCALAR_FUNCTION_ENTRY = 6, REWRITE_FUNCTION_ENTRY = 7, TABLE_FUNCTION_ENTRY = 8, + FOREIGN_TABLE_ENTRY = 9, }; } // namespace catalog diff --git a/src/include/catalog/catalog_entry/table_catalog_entry.h b/src/include/catalog/catalog_entry/table_catalog_entry.h index 93990ea4d6f..12c9cc7f8fb 100644 --- a/src/include/catalog/catalog_entry/table_catalog_entry.h +++ b/src/include/catalog/catalog_entry/table_catalog_entry.h @@ -5,11 +5,12 @@ #include "catalog/property.h" #include "catalog_entry.h" #include "common/enums/table_type.h" +#include "function/table_functions.h" namespace kuzu { namespace catalog { -class TableCatalogEntry : public CatalogEntry { +class KUZU_API TableCatalogEntry : public CatalogEntry { public: //===--------------------------------------------------------------------===// // constructors @@ -27,9 +28,10 @@ class TableCatalogEntry : public CatalogEntry { common::table_id_t getTableID() const { return tableID; } std::string getComment() const { return comment; } void setComment(std::string newComment) { comment = std::move(newComment); } - virtual bool isParent(common::table_id_t tableID) = 0; + virtual bool isParent(common::table_id_t /*tableID*/) { return false; }; // TODO(Guodong/Ziyi): This function should be removed. Instead we should use CatalogEntryType. virtual common::TableType getTableType() const = 0; + virtual function::TableFunction getScanFunction() { KU_UNREACHABLE; } //===--------------------------------------------------------------------===// // properties functions diff --git a/src/include/catalog/catalog_set.h b/src/include/catalog/catalog_set.h index 1850c18548b..0165068ff20 100644 --- a/src/include/catalog/catalog_set.h +++ b/src/include/catalog/catalog_set.h @@ -14,7 +14,7 @@ class CatalogSet { //===--------------------------------------------------------------------===// bool containsEntry(const std::string& name) const; CatalogEntry* getEntry(const std::string& name); - void createEntry(std::unique_ptr entry); + KUZU_API void createEntry(std::unique_ptr entry); void removeEntry(const std::string& name); void renameEntry(const std::string& oldName, const std::string& newName); common::case_insensitive_map_t>& getEntries() { return entries; } diff --git a/src/include/common/enums/table_type.h b/src/include/common/enums/table_type.h index eab71d76fd2..b41d9137418 100644 --- a/src/include/common/enums/table_type.h +++ b/src/include/common/enums/table_type.h @@ -13,6 +13,7 @@ enum class TableType : uint8_t { REL = 2, RDF = 3, REL_GROUP = 4, + FOREIGN = 5, }; struct TableTypeUtils { diff --git a/src/include/common/types/ku_string.h b/src/include/common/types/ku_string.h index 327f683e460..765adb3f981 100644 --- a/src/include/common/types/ku_string.h +++ b/src/include/common/types/ku_string.h @@ -4,6 +4,8 @@ #include #include +#include "common/api.h" + namespace kuzu { namespace common { @@ -25,7 +27,7 @@ struct ku_string_t { static bool isShortString(uint32_t len) { return len <= SHORT_STR_LENGTH; } - inline const uint8_t* getData() const { + const uint8_t* getData() const { return isShortString(len) ? prefix : reinterpret_cast(overflowPtr); } @@ -34,20 +36,20 @@ struct ku_string_t { void set(const std::string& value); void set(const char* value, uint64_t length); void set(const ku_string_t& value); - inline void setShortString(const char* value, uint64_t length) { + void setShortString(const char* value, uint64_t length) { this->len = length; memcpy(prefix, value, length); } - inline void setLongString(const char* value, uint64_t length) { + void setLongString(const char* value, uint64_t length) { this->len = length; memcpy(prefix, value, PREFIX_LENGTH); memcpy(reinterpret_cast(overflowPtr), value, length); } - inline void setShortString(const ku_string_t& value) { + void setShortString(const ku_string_t& value) { this->len = value.len; memcpy(prefix, value.prefix, value.len); } - inline void setLongString(const ku_string_t& value) { + void setLongString(const ku_string_t& value) { this->len = value.len; memcpy(prefix, value.prefix, PREFIX_LENGTH); memcpy(reinterpret_cast(overflowPtr), reinterpret_cast(value.overflowPtr), @@ -65,7 +67,7 @@ struct ku_string_t { } std::string getAsShortString() const; - std::string getAsString() const; + KUZU_API std::string getAsString() const; std::string_view getAsStringView() const; bool operator==(const ku_string_t& rhs) const; diff --git a/src/include/common/vector/value_vector.h b/src/include/common/vector/value_vector.h index 49ff32355ad..6d0dcfdb06b 100644 --- a/src/include/common/vector/value_vector.h +++ b/src/include/common/vector/value_vector.h @@ -26,7 +26,8 @@ class ValueVector { friend class ArrowColumnVector; public: - explicit ValueVector(LogicalType dataType, storage::MemoryManager* memoryManager = nullptr); + KUZU_API explicit ValueVector( + LogicalType dataType, storage::MemoryManager* memoryManager = nullptr); explicit ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager* memoryManager = nullptr) : ValueVector(LogicalType(dataTypeID), memoryManager) { KU_ASSERT(dataTypeID != LogicalTypeID::VAR_LIST); diff --git a/src/include/main/attached_database.h b/src/include/main/attached_database.h index 29f8293d4e9..811536e5aa2 100644 --- a/src/include/main/attached_database.h +++ b/src/include/main/attached_database.h @@ -1,24 +1,25 @@ #pragma once +#include #include -#include "function/table_functions.h" +#include "catalog/catalog_content.h" namespace kuzu { namespace main { class AttachedDatabase { public: - AttachedDatabase(std::string dbName, function::TableFunction scanFunction) - : dbName{std::move(dbName)}, scanFunction{std::move(scanFunction)} {} + AttachedDatabase(std::string dbName, std::unique_ptr catalogContent) + : dbName{std::move(dbName)}, catalogContent{std::move(catalogContent)} {} std::string getDBName() { return dbName; } - function::TableFunction getScanFunction() { return scanFunction; } + catalog::CatalogContent* getCatalogContent() { return catalogContent.get(); } private: std::string dbName; - function::TableFunction scanFunction; + std::unique_ptr catalogContent; }; } // namespace main diff --git a/src/include/main/client_context.h b/src/include/main/client_context.h index c4b3cd6b398..bd021e8f3b0 100644 --- a/src/include/main/client_context.h +++ b/src/include/main/client_context.h @@ -93,7 +93,7 @@ class ClientContext { // Database component getters. KUZU_API Database* getDatabase() const { return database; } storage::StorageManager* getStorageManager(); - storage::MemoryManager* getMemoryManager(); + KUZU_API storage::MemoryManager* getMemoryManager(); catalog::Catalog* getCatalog(); common::VirtualFileSystem* getVFSUnsafe() const; common::RandomEngine* getRandomEngine(); diff --git a/src/include/main/database_manager.h b/src/include/main/database_manager.h index 35be0a6738b..20f4d187982 100644 --- a/src/include/main/database_manager.h +++ b/src/include/main/database_manager.h @@ -8,8 +8,8 @@ namespace main { class DatabaseManager { public: void registerAttachedDatabase(std::unique_ptr attachedDatabase); - AttachedDatabase* getAttachedDatabase(const std::string& name); - void detachDatabase(const std::string& databaseName); + AttachedDatabase* getAttachedDatabase(std::string name); + void detachDatabase(std::string databaseName); private: std::vector> attachedDatabases; diff --git a/src/include/storage/storage_extension.h b/src/include/storage/storage_extension.h index c9aa991cc05..99aca94d3d1 100644 --- a/src/include/storage/storage_extension.h +++ b/src/include/storage/storage_extension.h @@ -6,7 +6,7 @@ namespace kuzu { namespace storage { using attach_function_t = std::unique_ptr (*)( - std::string dbPath, std::string dbName); + std::string dbPath, std::string dbName, main::ClientContext* clientContext); class StorageExtension { public: @@ -14,8 +14,9 @@ class StorageExtension { : attachFunction{std::move(attachFunction)} {} virtual bool canHandleDB(std::string /*dbType*/) const { return false; } - std::unique_ptr attach(std::string dbPath, std::string dbName) const { - return attachFunction(dbPath, dbName); + std::unique_ptr attach( + std::string dbPath, std::string dbName, main::ClientContext* clientContext) const { + return attachFunction(dbPath, dbName, clientContext); } virtual ~StorageExtension() = default; diff --git a/src/main/database_manager.cpp b/src/main/database_manager.cpp index 9c290769dd9..5c1d8d0b577 100644 --- a/src/main/database_manager.cpp +++ b/src/main/database_manager.cpp @@ -9,22 +9,24 @@ void DatabaseManager::registerAttachedDatabase(std::unique_ptr attachedDatabases.push_back(std::move(attachedDatabase)); } -AttachedDatabase* DatabaseManager::getAttachedDatabase(const std::string& name) { +AttachedDatabase* DatabaseManager::getAttachedDatabase(std::string name) { + common::StringUtils::toUpper(name); for (auto& attachedDatabase : attachedDatabases) { - if (attachedDatabase->getDBName() == name) { + auto attachedDBName = attachedDatabase->getDBName(); + common::StringUtils::toUpper(attachedDBName); + if (attachedDBName == name) { return attachedDatabase.get(); } } return nullptr; } -void DatabaseManager::detachDatabase(const std::string& databaseName) { - auto upperCaseDBName = databaseName; - common::StringUtils::toUpper(upperCaseDBName); +void DatabaseManager::detachDatabase(std::string databaseName) { + common::StringUtils::toUpper(databaseName); for (auto it = attachedDatabases.begin(); it != attachedDatabases.end(); ++it) { auto attachedDBName = (*it)->getDBName(); common::StringUtils::toUpper(attachedDBName); - if (attachedDBName == upperCaseDBName) { + if (attachedDBName == databaseName) { attachedDatabases.erase(it); return; } diff --git a/src/processor/operator/attach_database.cpp b/src/processor/operator/attach_database.cpp index 507d3513acc..ec54db06fb3 100644 --- a/src/processor/operator/attach_database.cpp +++ b/src/processor/operator/attach_database.cpp @@ -11,7 +11,8 @@ bool AttachDatabase::getNextTuplesInternal(kuzu::processor::ExecutionContext* co for (auto& [name, storageExtension] : context->clientContext->getDatabase()->getStorageExtensions()) { if (storageExtension->canHandleDB(attachInfo.dbType)) { - auto db = storageExtension->attach(attachInfo.dbAlias, attachInfo.dbPath); + auto db = storageExtension->attach( + attachInfo.dbAlias, attachInfo.dbPath, context->clientContext); context->clientContext->getDatabase() ->getDatabaseManagerUnsafe() ->registerAttachedDatabase(std::move(db));