From f1a6a543a1c39f392544cc8dab57703d9ac0f48d Mon Sep 17 00:00:00 2001 From: Manh Dinh Date: Mon, 25 Mar 2024 17:18:44 -0400 Subject: [PATCH] Refactor table functions --- src/binder/bind/copy/bind_copy_rdf_graph.cpp | 23 ++- src/binder/binder.cpp | 13 +- src/catalog/catalog_entry/CMakeLists.txt | 3 +- .../table_function_catalog_entry.cpp | 18 -- src/function/built_in_function_utils.cpp | 71 +------- src/function/function_collection.cpp | 25 +++ src/function/table/call/current_setting.cpp | 5 +- src/function/table/call/db_version.cpp | 2 +- src/function/table/call/show_tables.cpp | 4 +- src/function/table/call/storage_info.cpp | 6 +- src/function/table/call/table_info.cpp | 2 +- .../table_function_catalog_entry.h | 23 --- src/include/common/enums/expression_type.h | 30 ---- .../function/built_in_function_utils.h | 5 - src/include/function/table/call_functions.h | 12 ++ .../reader/csv/parallel_csv_reader.h | 2 + .../persistent/reader/csv/serial_csv_reader.h | 2 + .../persistent/reader/npy/npy_reader.h | 2 + .../reader/parquet/parquet_reader.h | 2 + .../operator/persistent/reader/rdf/rdf_scan.h | 49 ++--- .../table_scan/ftable_scan_function.h | 2 + .../map/create_factorized_table_scan.cpp | 2 +- .../reader/csv/parallel_csv_reader.cpp | 6 +- .../reader/csv/serial_csv_reader.cpp | 6 +- .../persistent/reader/npy/npy_reader.cpp | 2 +- .../reader/parquet/parquet_reader.cpp | 6 +- .../persistent/reader/rdf/rdf_scan.cpp | 168 +++++++++--------- .../table_scan/ftable_scan_function.cpp | 2 +- .../src_cpp/include/pandas/pandas_scan.h | 2 + .../src_cpp/include/pyarrow/pyarrow_scan.h | 2 + .../python_api/src_cpp/pandas/pandas_scan.cpp | 2 +- tools/python_api/src_cpp/py_database.cpp | 2 +- .../src_cpp/pyarrow/pyarrow_scan.cpp | 4 +- 33 files changed, 206 insertions(+), 299 deletions(-) delete mode 100644 src/catalog/catalog_entry/table_function_catalog_entry.cpp delete mode 100644 src/include/catalog/catalog_entry/table_function_catalog_entry.h diff --git a/src/binder/bind/copy/bind_copy_rdf_graph.cpp b/src/binder/bind/copy/bind_copy_rdf_graph.cpp index 91be6b04fd..1626b17b6c 100644 --- a/src/binder/bind/copy/bind_copy_rdf_graph.cpp +++ b/src/binder/bind/copy/bind_copy_rdf_graph.cpp @@ -9,6 +9,7 @@ #include "function/table/bind_input.h" #include "main/client_context.h" #include "parser/copy.h" +#include "processor/operator/persistent/reader/rdf/rdf_scan.h" using namespace kuzu::binder; using namespace kuzu::catalog; @@ -50,13 +51,12 @@ std::unique_ptr Binder::bindCopyRdfFrom( Function* func; // Bind file scan; auto inMemory = RdfReaderConfig::construct(config->options).inMemory; - func = BuiltInFunctionsUtils::matchFunction(READ_RDF_ALL_TRIPLE_FUNC_NAME, functions); + func = BuiltInFunctionsUtils::matchFunction(RdfAllTripleScan::name, functions); auto scanFunc = ku_dynamic_cast(func); auto bindData = scanFunc->bindFunc(clientContext, bindInput.get()); // Bind copy resource. - func = inMemory ? - BuiltInFunctionsUtils::matchFunction(IN_MEM_READ_RDF_RESOURCE_FUNC_NAME, functions) : - BuiltInFunctionsUtils::matchFunction(READ_RDF_RESOURCE_FUNC_NAME, functions); + func = inMemory ? BuiltInFunctionsUtils::matchFunction(RdfResourceInMemScan::name, functions) : + BuiltInFunctionsUtils::matchFunction(RdfResourceScan::name, functions); auto rScanFunc = ku_dynamic_cast(func); auto rColumns = expression_vector{r}; auto rFileScanInfo = BoundFileScanInfo(*rScanFunc, bindData->copy(), std::move(rColumns)); @@ -65,9 +65,8 @@ std::unique_ptr Binder::bindCopyRdfFrom( auto rEntry = catalog->getTableCatalogEntry(clientContext->getTx(), rTableID); auto rCopyInfo = BoundCopyFromInfo(rEntry, std::move(rSource), offset, nullptr /* extraInfo */); // Bind copy literal. - func = inMemory ? - BuiltInFunctionsUtils::matchFunction(IN_MEM_READ_RDF_LITERAL_FUNC_NAME, functions) : - BuiltInFunctionsUtils::matchFunction(READ_RDF_LITERAL_FUNC_NAME, functions); + func = inMemory ? BuiltInFunctionsUtils::matchFunction(RdfLiteralInMemScan::name, functions) : + BuiltInFunctionsUtils::matchFunction(RdfLiteralScan::name, functions); auto lScanFunc = ku_dynamic_cast(func); auto lColumns = expression_vector{l, lang}; auto lFileScanInfo = BoundFileScanInfo(*lScanFunc, bindData->copy(), std::move(lColumns)); @@ -77,9 +76,8 @@ std::unique_ptr Binder::bindCopyRdfFrom( auto lCopyInfo = BoundCopyFromInfo(lEntry, std::move(lSource), offset, nullptr /* extraInfo */); // Bind copy resource triples func = inMemory ? - BuiltInFunctionsUtils::matchFunction( - IN_MEM_READ_RDF_RESOURCE_TRIPLE_FUNC_NAME, functions) : - BuiltInFunctionsUtils::matchFunction(READ_RDF_RESOURCE_TRIPLE_FUNC_NAME, functions); + BuiltInFunctionsUtils::matchFunction(RdfResourceTripleInMemScan::name, functions) : + BuiltInFunctionsUtils::matchFunction(RdfResourceTripleScan::name, functions); auto rrrScanFunc = ku_dynamic_cast(func); auto rrrColumns = expression_vector{s, p, o}; auto rrrFileScanInfo = BoundFileScanInfo(*rrrScanFunc, bindData->copy(), rrrColumns); @@ -99,9 +97,8 @@ std::unique_ptr Binder::bindCopyRdfFrom( BoundCopyFromInfo(rrrEntry, std::move(rrrSource), offset, std::move(rrrExtraInfo)); // Bind copy literal triples func = inMemory ? - BuiltInFunctionsUtils::matchFunction( - IN_MEM_READ_RDF_LITERAL_TRIPLE_FUNC_NAME, functions) : - BuiltInFunctionsUtils::matchFunction(READ_RDF_LITERAL_TRIPLE_FUNC_NAME, functions); + BuiltInFunctionsUtils::matchFunction(RdfLiteralTripleInMemScan::name, functions) : + BuiltInFunctionsUtils::matchFunction(RdfLiteralTripleScan::name, functions); auto rrlScanFunc = ku_dynamic_cast(func); auto rrlColumns = expression_vector{s, p, oOffset}; auto rrlFileScanInfo = BoundFileScanInfo(*rrlScanFunc, bindData->copy(), rrlColumns); diff --git a/src/binder/binder.cpp b/src/binder/binder.cpp index fd950492d8..91ab2ff520 100644 --- a/src/binder/binder.cpp +++ b/src/binder/binder.cpp @@ -10,10 +10,15 @@ #include "common/string_utils.h" #include "function/built_in_function_utils.h" #include "function/table_functions.h" +#include "processor/operator/persistent/reader/csv/parallel_csv_reader.h" +#include "processor/operator/persistent/reader/csv/serial_csv_reader.h" +#include "processor/operator/persistent/reader/npy/npy_reader.h" +#include "processor/operator/persistent/reader/parquet/parquet_reader.h" using namespace kuzu::catalog; using namespace kuzu::common; using namespace kuzu::parser; +using namespace kuzu::processor; namespace kuzu { namespace binder { @@ -201,17 +206,17 @@ function::TableFunction Binder::getScanFunction(FileType fileType, const ReaderC switch (fileType) { case FileType::PARQUET: { func = function::BuiltInFunctionsUtils::matchFunction( - READ_PARQUET_FUNC_NAME, inputTypes, functions); + ParquetScanFunction::name, inputTypes, functions); } break; case FileType::NPY: { func = function::BuiltInFunctionsUtils::matchFunction( - READ_NPY_FUNC_NAME, inputTypes, functions); + NpyScanFunction::name, inputTypes, functions); } break; case FileType::CSV: { auto csvConfig = CSVReaderConfig::construct(config.options); func = function::BuiltInFunctionsUtils::matchFunction( - csvConfig.parallel ? READ_CSV_PARALLEL_FUNC_NAME : READ_CSV_SERIAL_FUNC_NAME, - inputTypes, functions); + csvConfig.parallel ? ParallelCSVScan::name : SerialCSVScan::name, inputTypes, + functions); } break; default: KU_UNREACHABLE; diff --git a/src/catalog/catalog_entry/CMakeLists.txt b/src/catalog/catalog_entry/CMakeLists.txt index 752366cbfb..648fc3041b 100644 --- a/src/catalog/catalog_entry/CMakeLists.txt +++ b/src/catalog/catalog_entry/CMakeLists.txt @@ -7,8 +7,7 @@ add_library(kuzu_catalog_entry rel_table_catalog_entry.cpp rel_group_catalog_entry.cpp rdf_graph_catalog_entry.cpp - scalar_macro_catalog_entry.cpp - table_function_catalog_entry.cpp) + scalar_macro_catalog_entry.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/catalog/catalog_entry/table_function_catalog_entry.cpp b/src/catalog/catalog_entry/table_function_catalog_entry.cpp deleted file mode 100644 index 26b4f69dd5..0000000000 --- a/src/catalog/catalog_entry/table_function_catalog_entry.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "catalog/catalog_entry/table_function_catalog_entry.h" - -#include "common/utils.h" - -namespace kuzu { -namespace catalog { - -TableFunctionCatalogEntry::TableFunctionCatalogEntry( - std::string name, function::function_set functionSet) - : FunctionCatalogEntry{ - CatalogEntryType::TABLE_FUNCTION_ENTRY, std::move(name), std::move(functionSet)} {} - -std::unique_ptr TableFunctionCatalogEntry::copy() const { - return std::make_unique(getName(), common::copyVector(functionSet)); -} - -} // namespace catalog -} // namespace kuzu diff --git a/src/function/built_in_function_utils.cpp b/src/function/built_in_function_utils.cpp index b2db1038a9..ad9ed31717 100644 --- a/src/function/built_in_function_utils.cpp +++ b/src/function/built_in_function_utils.cpp @@ -1,6 +1,6 @@ #include "function/built_in_function_utils.h" -#include "catalog/catalog_entry/table_function_catalog_entry.h" +#include "catalog/catalog_entry/function_catalog_entry.h" #include "catalog/catalog_set.h" #include "common/exception/binder.h" #include "common/exception/catalog.h" @@ -8,13 +8,6 @@ #include "function/arithmetic/vector_arithmetic_functions.h" #include "function/function_collection.h" #include "function/scalar_function.h" -#include "function/table/call_functions.h" -#include "processor/operator/persistent/reader/csv/parallel_csv_reader.h" -#include "processor/operator/persistent/reader/csv/serial_csv_reader.h" -#include "processor/operator/persistent/reader/npy/npy_reader.h" -#include "processor/operator/persistent/reader/parquet/parquet_reader.h" -#include "processor/operator/persistent/reader/rdf/rdf_scan.h" -#include "processor/operator/table_scan/ftable_scan_function.h" using namespace kuzu::common; using namespace kuzu::catalog; @@ -31,9 +24,12 @@ static void validateNonEmptyCandidateFunctions(std::vector& candidate function::function_set& set); void BuiltInFunctionsUtils::createFunctions(CatalogSet* catalogSet) { - registerTableFunctions(catalogSet); - - registerFunctions(catalogSet); + auto functions = FunctionCollection::getFunctions(); + for (auto i = 0u; functions[i].name != nullptr; ++i) { + auto functionSet = functions[i].getFunctionSetFunc(); + catalogSet->createEntry(std::make_unique( + functions[i].catalogEntryType, functions[i].name, std::move(functionSet))); + } } Function* BuiltInFunctionsUtils::matchFunction(const std::string& name, CatalogSet* catalogSet) { @@ -489,59 +485,6 @@ void BuiltInFunctionsUtils::validateSpecialCases(std::vector& candida } } -void BuiltInFunctionsUtils::registerTableFunctions(CatalogSet* catalogSet) { - catalogSet->createEntry(std::make_unique( - CURRENT_SETTING_FUNC_NAME, CurrentSettingFunction::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - DB_VERSION_FUNC_NAME, DBVersionFunction::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - SHOW_TABLES_FUNC_NAME, ShowTablesFunction::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - TABLE_INFO_FUNC_NAME, TableInfoFunction::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - SHOW_CONNECTION_FUNC_NAME, ShowConnectionFunction::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - STORAGE_INFO_FUNC_NAME, StorageInfoFunction::getFunctionSet())); - // Read functions - catalogSet->createEntry(std::make_unique( - READ_PARQUET_FUNC_NAME, ParquetScanFunction::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_NPY_FUNC_NAME, NpyScanFunction::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_CSV_SERIAL_FUNC_NAME, SerialCSVScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_CSV_PARALLEL_FUNC_NAME, ParallelCSVScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_RDF_RESOURCE_FUNC_NAME, RdfResourceScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_RDF_LITERAL_FUNC_NAME, RdfLiteralScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_RDF_RESOURCE_TRIPLE_FUNC_NAME, RdfResourceTripleScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_RDF_LITERAL_TRIPLE_FUNC_NAME, RdfLiteralTripleScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_RDF_ALL_TRIPLE_FUNC_NAME, RdfAllTripleScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - IN_MEM_READ_RDF_RESOURCE_FUNC_NAME, RdfResourceInMemScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - IN_MEM_READ_RDF_LITERAL_FUNC_NAME, RdfLiteralInMemScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - IN_MEM_READ_RDF_RESOURCE_TRIPLE_FUNC_NAME, RdfResourceTripleInMemScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - IN_MEM_READ_RDF_LITERAL_TRIPLE_FUNC_NAME, RdfLiteralTripleInMemScan::getFunctionSet())); - catalogSet->createEntry(std::make_unique( - READ_FTABLE_FUNC_NAME, FTableScan::getFunctionSet())); -} - -void BuiltInFunctionsUtils::registerFunctions(catalog::CatalogSet* catalogSet) { - auto functions = FunctionCollection::getFunctions(); - for (auto i = 0u; functions[i].name != nullptr; ++i) { - auto functionSet = functions[i].getFunctionSetFunc(); - catalogSet->createEntry(std::make_unique( - functions[i].catalogEntryType, functions[i].name, std::move(functionSet))); - } -} - static std::string getFunctionMatchFailureMsg(const std::string name, const std::vector& inputTypes, const std::string& supportedInputs, bool isDistinct = false) { diff --git a/src/function/function_collection.cpp b/src/function/function_collection.cpp index ffaad9d5da..60179caded 100644 --- a/src/function/function_collection.cpp +++ b/src/function/function_collection.cpp @@ -17,9 +17,18 @@ #include "function/schema/vector_node_rel_functions.h" #include "function/string/vector_string_functions.h" #include "function/struct/vector_struct_functions.h" +#include "function/table/call_functions.h" #include "function/timestamp/vector_timestamp_functions.h" #include "function/union/vector_union_functions.h" #include "function/uuid/vector_uuid_functions.h" +#include "processor/operator/persistent/reader/csv/parallel_csv_reader.h" +#include "processor/operator/persistent/reader/csv/serial_csv_reader.h" +#include "processor/operator/persistent/reader/npy/npy_reader.h" +#include "processor/operator/persistent/reader/parquet/parquet_reader.h" +#include "processor/operator/persistent/reader/rdf/rdf_scan.h" +#include "processor/operator/table_scan/ftable_scan_function.h" + +using namespace kuzu::processor; namespace kuzu { namespace function { @@ -32,6 +41,8 @@ namespace function { { _PARAM::getFunctionSet, _PARAM::name, CatalogEntryType::REWRITE_FUNCTION_ENTRY } #define AGGREGATE_FUNCTION(_PARAM) \ { _PARAM::getFunctionSet, _PARAM::name, CatalogEntryType::AGGREGATE_FUNCTION_ENTRY } +#define TABLE_FUNCTION(_PARAM) \ + { _PARAM::getFunctionSet, _PARAM::name, CatalogEntryType::TABLE_FUNCTION_ENTRY } #define FINAL_FUNCTION \ { nullptr, nullptr, CatalogEntryType::SCALAR_FUNCTION_ENTRY } @@ -173,6 +184,20 @@ FunctionCollection* FunctionCollection::getFunctions() { AGGREGATE_FUNCTION(AggregateMinFunction), AGGREGATE_FUNCTION(AggregateMaxFunction), AGGREGATE_FUNCTION(CollectFunction), + // Table functions + TABLE_FUNCTION(CurrentSettingFunction), TABLE_FUNCTION(DBVersionFunction), + TABLE_FUNCTION(ShowTablesFunction), TABLE_FUNCTION(TableInfoFunction), + TABLE_FUNCTION(ShowConnectionFunction), TABLE_FUNCTION(StorageInfoFunction), + + // Read functions + TABLE_FUNCTION(ParquetScanFunction), TABLE_FUNCTION(NpyScanFunction), + TABLE_FUNCTION(SerialCSVScan), TABLE_FUNCTION(ParallelCSVScan), + TABLE_FUNCTION(RdfResourceScan), TABLE_FUNCTION(RdfLiteralScan), + TABLE_FUNCTION(RdfResourceTripleScan), TABLE_FUNCTION(RdfLiteralTripleScan), + TABLE_FUNCTION(RdfAllTripleScan), TABLE_FUNCTION(RdfResourceInMemScan), + TABLE_FUNCTION(RdfLiteralInMemScan), TABLE_FUNCTION(RdfResourceTripleInMemScan), + TABLE_FUNCTION(RdfLiteralTripleInMemScan), TABLE_FUNCTION(FTableScan), + // End of array FINAL_FUNCTION}; diff --git a/src/function/table/call/current_setting.cpp b/src/function/table/call/current_setting.cpp index 5008229f8d..61f328a1a7 100644 --- a/src/function/table/call/current_setting.cpp +++ b/src/function/table/call/current_setting.cpp @@ -51,9 +51,8 @@ static std::unique_ptr bindFunc( function_set CurrentSettingFunction::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(CURRENT_SETTING_FUNC_NAME, tableFunc, - bindFunc, initSharedState, initEmptyLocalState, - std::vector{LogicalTypeID::STRING})); + functionSet.push_back(std::make_unique(name, tableFunc, bindFunc, + initSharedState, initEmptyLocalState, std::vector{LogicalTypeID::STRING})); return functionSet; } diff --git a/src/function/table/call/db_version.cpp b/src/function/table/call/db_version.cpp index 3c5b21785e..13f0472dd0 100644 --- a/src/function/table/call/db_version.cpp +++ b/src/function/table/call/db_version.cpp @@ -31,7 +31,7 @@ static std::unique_ptr bindFunc(ClientContext*, TableFuncBind function_set DBVersionFunction::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(DB_VERSION_FUNC_NAME, tableFunc, bindFunc, + functionSet.push_back(std::make_unique(name, tableFunc, bindFunc, initSharedState, initEmptyLocalState, std::vector{})); return functionSet; } diff --git a/src/function/table/call/show_tables.cpp b/src/function/table/call/show_tables.cpp index 8c120dd29b..42d3af36e4 100644 --- a/src/function/table/call/show_tables.cpp +++ b/src/function/table/call/show_tables.cpp @@ -61,8 +61,8 @@ static std::unique_ptr bindFunc( function_set ShowTablesFunction::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(SHOW_TABLES_FUNC_NAME, tableFunc, - bindFunc, initSharedState, initEmptyLocalState, std::vector{})); + functionSet.push_back(std::make_unique(name, tableFunc, bindFunc, + initSharedState, initEmptyLocalState, std::vector{})); return functionSet; } diff --git a/src/function/table/call/storage_info.cpp b/src/function/table/call/storage_info.cpp index dba3109a32..03e3d44b73 100644 --- a/src/function/table/call/storage_info.cpp +++ b/src/function/table/call/storage_info.cpp @@ -226,9 +226,9 @@ static std::unique_ptr bindFunc( function_set StorageInfoFunction::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(STORAGE_INFO_FUNC_NAME, tableFunc, - bindFunc, initStorageInfoSharedState, initLocalState, - std::vector{LogicalTypeID::STRING})); + functionSet.push_back( + std::make_unique(name, tableFunc, bindFunc, initStorageInfoSharedState, + initLocalState, std::vector{LogicalTypeID::STRING})); return functionSet; } diff --git a/src/function/table/call/table_info.cpp b/src/function/table/call/table_info.cpp index 616848fc81..fbf26c526c 100644 --- a/src/function/table/call/table_info.cpp +++ b/src/function/table/call/table_info.cpp @@ -92,7 +92,7 @@ static std::unique_ptr bindFunc( function_set TableInfoFunction::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(TABLE_INFO_FUNC_NAME, tableFunc, bindFunc, + functionSet.push_back(std::make_unique(name, tableFunc, bindFunc, initSharedState, initEmptyLocalState, std::vector{LogicalTypeID::STRING})); return functionSet; } diff --git a/src/include/catalog/catalog_entry/table_function_catalog_entry.h b/src/include/catalog/catalog_entry/table_function_catalog_entry.h deleted file mode 100644 index ba5b8e9d5d..0000000000 --- a/src/include/catalog/catalog_entry/table_function_catalog_entry.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include "function_catalog_entry.h" - -namespace kuzu { -namespace catalog { - -class TableFunctionCatalogEntry : public FunctionCatalogEntry { -public: - //===--------------------------------------------------------------------===// - // constructors - //===--------------------------------------------------------------------===// - TableFunctionCatalogEntry() = default; - TableFunctionCatalogEntry(std::string name, function::function_set functionSet); - - //===--------------------------------------------------------------------===// - // serialization & deserialization - //===--------------------------------------------------------------------===// - std::unique_ptr copy() const override; -}; - -} // namespace catalog -} // namespace kuzu diff --git a/src/include/common/enums/expression_type.h b/src/include/common/enums/expression_type.h index 115df910e3..50c4f5e9c6 100644 --- a/src/include/common/enums/expression_type.h +++ b/src/include/common/enums/expression_type.h @@ -6,36 +6,6 @@ namespace kuzu { namespace common { -/** - * Function name is a temporary identifier used for binder because grammar does not parse built in - * functions. After binding, expression type should replace function name and used as identifier. - */ - -// Table functions -const char* const TABLE_INFO_FUNC_NAME = "TABLE_INFO"; -const char* const DB_VERSION_FUNC_NAME = "DB_VERSION"; -const char* const CURRENT_SETTING_FUNC_NAME = "CURRENT_SETTING"; -const char* const SHOW_TABLES_FUNC_NAME = "SHOW_TABLES"; -const char* const SHOW_CONNECTION_FUNC_NAME = "SHOW_CONNECTION"; -const char* const STORAGE_INFO_FUNC_NAME = "STORAGE_INFO"; -// Table functions - read functions -const char* const READ_PARQUET_FUNC_NAME = "READ_PARQUET"; -const char* const READ_NPY_FUNC_NAME = "READ_NPY"; -const char* const READ_CSV_SERIAL_FUNC_NAME = "READ_CSV_SERIAL"; -const char* const READ_CSV_PARALLEL_FUNC_NAME = "READ_CSV_PARALLEL"; -const char* const READ_RDF_RESOURCE_FUNC_NAME = "READ_RDF_RESOURCE"; -const char* const READ_RDF_LITERAL_FUNC_NAME = "READ_RDF_LITERAL"; -const char* const READ_RDF_RESOURCE_TRIPLE_FUNC_NAME = "READ_RDF_RESOURCE_TRIPLE"; -const char* const READ_RDF_LITERAL_TRIPLE_FUNC_NAME = "READ_RDF_LITERAL_TRIPLE"; -const char* const READ_RDF_ALL_TRIPLE_FUNC_NAME = "READ_RDF_ALL_TRIPLE"; -const char* const IN_MEM_READ_RDF_RESOURCE_FUNC_NAME = "IN_MEM_READ_RDF_RESOURCE"; -const char* const IN_MEM_READ_RDF_LITERAL_FUNC_NAME = "IN_MEM_READ_RDF_LITERAL"; -const char* const IN_MEM_READ_RDF_RESOURCE_TRIPLE_FUNC_NAME = "IN_MEM_READ_RDF_RESOURCE_TRIPLE"; -const char* const IN_MEM_READ_RDF_LITERAL_TRIPLE_FUNC_NAME = "IN_MEM_READ_RDF_LITERAL_TRIPLE"; -const char* const READ_PANDAS_FUNC_NAME = "READ_PANDAS"; -const char* const READ_PYARROW_FUNC_NAME = "READ_PYARROW"; -const char* const READ_FTABLE_FUNC_NAME = "READ_FTABLE"; - enum class ExpressionType : uint8_t { // Boolean Connection Expressions diff --git a/src/include/function/built_in_function_utils.h b/src/include/function/built_in_function_utils.h index 3f0c75dcbb..f74d39f1e2 100644 --- a/src/include/function/built_in_function_utils.h +++ b/src/include/function/built_in_function_utils.h @@ -79,11 +79,6 @@ class BuiltInFunctionsUtils { static void validateSpecialCases(std::vector& candidateFunctions, const std::string& name, const std::vector& inputTypes, function::function_set& set); - - // Table functions. - static void registerTableFunctions(catalog::CatalogSet* catalogSet); - - static void registerFunctions(catalog::CatalogSet* catalogSet); }; } // namespace function diff --git a/src/include/function/table/call_functions.h b/src/include/function/table/call_functions.h index a7a7174bcb..c8aee63157 100644 --- a/src/include/function/table/call_functions.h +++ b/src/include/function/table/call_functions.h @@ -51,26 +51,38 @@ struct CallFunction { }; struct CurrentSettingFunction : public CallFunction { + static constexpr const char* name = "CURRENT_SETTING"; + static function_set getFunctionSet(); }; struct DBVersionFunction : public CallFunction { + static constexpr const char* name = "DB_VERSION"; + static function_set getFunctionSet(); }; struct ShowTablesFunction : public CallFunction { + static constexpr const char* name = "SHOW_TABLES"; + static function_set getFunctionSet(); }; struct TableInfoFunction : public CallFunction { + static constexpr const char* name = "TABLE_INFO"; + static function_set getFunctionSet(); }; struct ShowConnectionFunction final : public CallFunction { + static constexpr const char* name = "SHOW_CONNECTION"; + static function_set getFunctionSet(); }; struct StorageInfoFunction final : public CallFunction { + static constexpr const char* name = "STORAGE_INFO"; + static function_set getFunctionSet(); }; diff --git a/src/include/processor/operator/persistent/reader/csv/parallel_csv_reader.h b/src/include/processor/operator/persistent/reader/csv/parallel_csv_reader.h index 90709b2c20..bd07876201 100644 --- a/src/include/processor/operator/persistent/reader/csv/parallel_csv_reader.h +++ b/src/include/processor/operator/persistent/reader/csv/parallel_csv_reader.h @@ -49,6 +49,8 @@ struct ParallelCSVScanSharedState final : public function::ScanFileSharedState { }; struct ParallelCSVScan { + static constexpr const char* name = "READ_CSV_PARALLEL"; + static function::function_set getFunctionSet(); }; diff --git a/src/include/processor/operator/persistent/reader/csv/serial_csv_reader.h b/src/include/processor/operator/persistent/reader/csv/serial_csv_reader.h index 78f1f26684..fc20f180bb 100644 --- a/src/include/processor/operator/persistent/reader/csv/serial_csv_reader.h +++ b/src/include/processor/operator/persistent/reader/csv/serial_csv_reader.h @@ -41,6 +41,8 @@ struct SerialCSVScanSharedState final : public function::ScanFileSharedState { }; struct SerialCSVScan { + static constexpr const char* name = "READ_CSV_SERIAL"; + static function::function_set getFunctionSet(); static void bindColumns(const function::ScanTableFuncBindInput* bindInput, std::vector& columnNames, std::vector& columnTypes); diff --git a/src/include/processor/operator/persistent/reader/npy/npy_reader.h b/src/include/processor/operator/persistent/reader/npy/npy_reader.h index dfa36cb6b5..c2a76f85ae 100644 --- a/src/include/processor/operator/persistent/reader/npy/npy_reader.h +++ b/src/include/processor/operator/persistent/reader/npy/npy_reader.h @@ -62,6 +62,8 @@ struct NpyScanSharedState final : public function::ScanSharedState { }; struct NpyScanFunction { + static constexpr const char* name = "READ_NPY"; + static function::function_set getFunctionSet(); }; diff --git a/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h b/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h index bfe624de11..4a412f7070 100644 --- a/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h +++ b/src/include/processor/operator/persistent/reader/parquet/parquet_reader.h @@ -107,6 +107,8 @@ struct ParquetScanLocalState final : public function::TableFuncLocalState { }; struct ParquetScanFunction { + static constexpr const char* name = "READ_PARQUET"; + static function::function_set getFunctionSet(); }; diff --git a/src/include/processor/operator/persistent/reader/rdf/rdf_scan.h b/src/include/processor/operator/persistent/reader/rdf/rdf_scan.h index f55e95c6fc..15dfc2c971 100644 --- a/src/include/processor/operator/persistent/reader/rdf/rdf_scan.h +++ b/src/include/processor/operator/persistent/reader/rdf/rdf_scan.h @@ -154,70 +154,57 @@ struct RdfInMemScanSharedState : public function::BaseScanSharedState { }; struct RdfResourceScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "READ_RDF_RESOURCE"; - static std::unique_ptr initSharedState( - function::TableFunctionInitInput& input); + static function::function_set getFunctionSet(); }; struct RdfLiteralScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "READ_RDF_LITERAL"; - static std::unique_ptr initSharedState( - function::TableFunctionInitInput& input); + static function::function_set getFunctionSet(); }; struct RdfResourceTripleScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "READ_RDF_RESOURCE_TRIPLE"; - static std::unique_ptr initSharedState( - function::TableFunctionInitInput& input); + static function::function_set getFunctionSet(); }; struct RdfLiteralTripleScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "READ_RDF_LITERAL_TRIPLE"; - static std::unique_ptr initSharedState( - function::TableFunctionInitInput& input); + static function::function_set getFunctionSet(); }; struct RdfAllTripleScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "READ_RDF_ALL_TRIPLE"; - static common::offset_t tableFunc( - function::TableFuncInput& input, function::TableFuncOutput& output); - static std::unique_ptr bindFunc( - main::ClientContext*, function::TableFuncBindInput* input_); - static std::unique_ptr initSharedState( - function::TableFunctionInitInput& input); + static function::function_set getFunctionSet(); }; struct RdfResourceInMemScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "IN_MEM_READ_RDF_RESOURCE"; - static common::offset_t tableFunc( - function::TableFuncInput& input, function::TableFuncOutput& output); + static function::function_set getFunctionSet(); }; struct RdfLiteralInMemScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "IN_MEM_READ_RDF_LITERAL"; - static common::offset_t tableFunc( - function::TableFuncInput& input, function::TableFuncOutput& output); + static function::function_set getFunctionSet(); }; struct RdfResourceTripleInMemScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "IN_MEM_READ_RDF_RESOURCE_TRIPLE"; - static common::offset_t tableFunc( - function::TableFuncInput& input, function::TableFuncOutput& output); + static function::function_set getFunctionSet(); }; struct RdfLiteralTripleInMemScan { - static function::function_set getFunctionSet(); + static constexpr const char* name = "IN_MEM_READ_RDF_LITERAL_TRIPLE"; - static common::offset_t tableFunc( - function::TableFuncInput& input, function::TableFuncOutput& output); + static function::function_set getFunctionSet(); }; } // namespace processor diff --git a/src/include/processor/operator/table_scan/ftable_scan_function.h b/src/include/processor/operator/table_scan/ftable_scan_function.h index 393b0cfc62..dcfa2df7d9 100644 --- a/src/include/processor/operator/table_scan/ftable_scan_function.h +++ b/src/include/processor/operator/table_scan/ftable_scan_function.h @@ -26,6 +26,8 @@ struct FTableScanBindData : public function::TableFuncBindData { }; struct FTableScan { + static constexpr const char* name = "READ_FTABLE"; + static function::function_set getFunctionSet(); }; diff --git a/src/processor/map/create_factorized_table_scan.cpp b/src/processor/map/create_factorized_table_scan.cpp index f3142792d1..d10da28b8a 100644 --- a/src/processor/map/create_factorized_table_scan.cpp +++ b/src/processor/map/create_factorized_table_scan.cpp @@ -27,7 +27,7 @@ std::unique_ptr PlanMapper::createFTableScan(const expression_ auto bindData = std::make_unique(table, std::move(colIndices), maxMorselSize); auto function = function::BuiltInFunctionsUtils::matchFunction( - READ_FTABLE_FUNC_NAME, clientContext->getCatalog()->getFunctions(clientContext->getTx())); + FTableScan::name, clientContext->getCatalog()->getFunctions(clientContext->getTx())); auto info = InQueryCallInfo(); info.function = *ku_dynamic_cast(function); info.bindData = std::move(bindData); diff --git a/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp b/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp index 1071ad77a4..d7954bc65b 100644 --- a/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp +++ b/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp @@ -207,9 +207,9 @@ static double progressFunc(TableFuncSharedState* sharedState) { function_set ParallelCSVScan::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(READ_CSV_PARALLEL_FUNC_NAME, tableFunc, - bindFunc, initSharedState, initLocalState, progressFunc, - std::vector{LogicalTypeID::STRING})); + functionSet.push_back( + std::make_unique(name, tableFunc, bindFunc, initSharedState, initLocalState, + progressFunc, std::vector{LogicalTypeID::STRING})); return functionSet; } diff --git a/src/processor/operator/persistent/reader/csv/serial_csv_reader.cpp b/src/processor/operator/persistent/reader/csv/serial_csv_reader.cpp index 74cdc5a9fa..32141ccfa6 100644 --- a/src/processor/operator/persistent/reader/csv/serial_csv_reader.cpp +++ b/src/processor/operator/persistent/reader/csv/serial_csv_reader.cpp @@ -146,9 +146,9 @@ static double progressFunc(TableFuncSharedState* sharedState) { function_set SerialCSVScan::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(READ_CSV_SERIAL_FUNC_NAME, tableFunc, - bindFunc, initSharedState, initLocalState, progressFunc, - std::vector{LogicalTypeID::STRING})); + functionSet.push_back( + std::make_unique(name, tableFunc, bindFunc, initSharedState, initLocalState, + progressFunc, std::vector{LogicalTypeID::STRING})); return functionSet; } diff --git a/src/processor/operator/persistent/reader/npy/npy_reader.cpp b/src/processor/operator/persistent/reader/npy/npy_reader.cpp index b343bd89ef..3481361d3d 100644 --- a/src/processor/operator/persistent/reader/npy/npy_reader.cpp +++ b/src/processor/operator/persistent/reader/npy/npy_reader.cpp @@ -332,7 +332,7 @@ static std::unique_ptr initLocalState( function_set NpyScanFunction::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(READ_NPY_FUNC_NAME, tableFunc, bindFunc, + functionSet.push_back(std::make_unique(name, tableFunc, bindFunc, initSharedState, initLocalState, std::vector{LogicalTypeID::STRING})); return functionSet; } diff --git a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp index ab92d89394..bc6960d6f6 100644 --- a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +++ b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp @@ -705,9 +705,9 @@ static double progressFunc(TableFuncSharedState* sharedState) { function_set ParquetScanFunction::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(READ_PARQUET_FUNC_NAME, tableFunc, - bindFunc, initSharedState, initLocalState, progressFunc, - std::vector{LogicalTypeID::STRING})); + functionSet.push_back( + std::make_unique(name, tableFunc, bindFunc, initSharedState, initLocalState, + progressFunc, std::vector{LogicalTypeID::STRING})); return functionSet; } diff --git a/src/processor/operator/persistent/reader/rdf/rdf_scan.cpp b/src/processor/operator/persistent/reader/rdf/rdf_scan.cpp index d3b0267d97..1ceeffde2b 100644 --- a/src/processor/operator/persistent/reader/rdf/rdf_scan.cpp +++ b/src/processor/operator/persistent/reader/rdf/rdf_scan.cpp @@ -74,85 +74,13 @@ static std::unique_ptr initLocalState( return std::make_unique(); } -function_set RdfResourceScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(READ_RDF_RESOURCE_FUNC_NAME, scanTableFunc, nullptr, - initSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -function_set RdfLiteralScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(READ_RDF_LITERAL_FUNC_NAME, scanTableFunc, nullptr, - initSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -function_set RdfResourceTripleScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(READ_RDF_RESOURCE_TRIPLE_FUNC_NAME, scanTableFunc, - nullptr, initSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -function_set RdfLiteralTripleScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(READ_RDF_LITERAL_TRIPLE_FUNC_NAME, scanTableFunc, - nullptr, initSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -function::function_set RdfAllTripleScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(READ_RDF_ALL_TRIPLE_FUNC_NAME, tableFunc, bindFunc, - initSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -function_set RdfResourceInMemScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(IN_MEM_READ_RDF_RESOURCE_FUNC_NAME, tableFunc, - nullptr, inMemScanInitSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -function_set RdfLiteralInMemScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(IN_MEM_READ_RDF_LITERAL_FUNC_NAME, tableFunc, - nullptr, inMemScanInitSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -function_set RdfResourceTripleInMemScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(IN_MEM_READ_RDF_RESOURCE_TRIPLE_FUNC_NAME, - tableFunc, nullptr, inMemScanInitSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -function_set RdfLiteralTripleInMemScan::getFunctionSet() { - function_set functionSet; - auto func = std::make_unique(IN_MEM_READ_RDF_LITERAL_TRIPLE_FUNC_NAME, tableFunc, - nullptr, inMemScanInitSharedState, initLocalState, std::vector{}); - functionSet.push_back(std::move(func)); - return functionSet; -} - -offset_t RdfAllTripleScan::tableFunc(TableFuncInput& input, TableFuncOutput&) { +static offset_t RdfAllTripleScanTableFunc(TableFuncInput& input, TableFuncOutput&) { auto sharedState = reinterpret_cast(input.sharedState); sharedState->readAll(); return 0; } -std::unique_ptr RdfAllTripleScan::bindFunc( +static std::unique_ptr RdfAllTripleScanBindFunc( main::ClientContext*, function::TableFuncBindInput* input_) { auto input = ku_dynamic_cast(input_); return std::make_unique(std::vector{}, @@ -160,7 +88,7 @@ std::unique_ptr RdfAllTripleScan::bindFunc( std::make_shared()); } -offset_t RdfResourceInMemScan::tableFunc(TableFuncInput& input, TableFuncOutput& output) { +static offset_t RdfResourceInMemScanTableFunc(TableFuncInput& input, TableFuncOutput& output) { auto sharedState = ku_dynamic_cast(input.sharedState); auto sVector = output.dataChunk.getValueVector(0).get(); @@ -185,7 +113,7 @@ offset_t RdfResourceInMemScan::tableFunc(TableFuncInput& input, TableFuncOutput& return vectorPos; } -offset_t RdfLiteralInMemScan::tableFunc(TableFuncInput& input, TableFuncOutput& output) { +static offset_t RdfLiteralInMemScanTableFunc(TableFuncInput& input, TableFuncOutput& output) { auto sharedState = ku_dynamic_cast(input.sharedState); auto oVector = output.dataChunk.getValueVector(0).get(); @@ -202,7 +130,8 @@ offset_t RdfLiteralInMemScan::tableFunc(TableFuncInput& input, TableFuncOutput& return numTuplesToScan; } -offset_t RdfResourceTripleInMemScan::tableFunc(TableFuncInput& input, TableFuncOutput& output) { +static offset_t RdfResourceTripleInMemScanTableFunc( + TableFuncInput& input, TableFuncOutput& output) { auto sharedState = ku_dynamic_cast(input.sharedState); auto [startIdx, numTuplesToScan] = sharedState->getResourceTripleRange(); @@ -218,7 +147,7 @@ offset_t RdfResourceTripleInMemScan::tableFunc(TableFuncInput& input, TableFuncO return numTuplesToScan; } -offset_t RdfLiteralTripleInMemScan::tableFunc(TableFuncInput& input, TableFuncOutput& output) { +static offset_t RdfLiteralTripleInMemScanTableFunc(TableFuncInput& input, TableFuncOutput& output) { auto sharedState = ku_dynamic_cast(input.sharedState); auto [startIdx, numTuplesToScan] = sharedState->getLiteralTripleRange(); @@ -234,7 +163,7 @@ offset_t RdfLiteralTripleInMemScan::tableFunc(TableFuncInput& input, TableFuncOu return numTuplesToScan; } -std::unique_ptr RdfResourceScan::initSharedState( +static std::unique_ptr RdfResourceScanInitSharedState( TableFunctionInitInput& input) { auto bindData = reinterpret_cast(input.bindData); auto rdfConfig = RdfReaderConfig::construct(bindData->config.options); @@ -242,7 +171,7 @@ std::unique_ptr RdfResourceScan::initSharedState( bindData->config.copy(), std::move(rdfConfig)); } -std::unique_ptr RdfLiteralScan::initSharedState( +static std::unique_ptr RdfLiteralScanInitSharedState( TableFunctionInitInput& input) { auto bindData = reinterpret_cast(input.bindData); auto rdfConfig = RdfReaderConfig::construct(bindData->config.options); @@ -250,7 +179,7 @@ std::unique_ptr RdfLiteralScan::initSharedState( bindData->config.copy(), std::move(rdfConfig)); } -std::unique_ptr RdfResourceTripleScan::initSharedState( +static std::unique_ptr RdfResourceTripleScanInitSharedState( TableFunctionInitInput& input) { auto bindData = reinterpret_cast(input.bindData); auto rdfConfig = RdfReaderConfig::construct(bindData->config.options); @@ -258,7 +187,7 @@ std::unique_ptr RdfResourceTripleScan::initSharedState( bindData->config.copy(), std::move(rdfConfig)); } -std::unique_ptr RdfLiteralTripleScan::initSharedState( +static std::unique_ptr RdfLiteralTripleScanInitSharedState( TableFunctionInitInput& input) { auto bindData = reinterpret_cast(input.bindData); auto rdfConfig = RdfReaderConfig::construct(bindData->config.options); @@ -266,7 +195,7 @@ std::unique_ptr RdfLiteralTripleScan::initSharedState( bindData->config.copy(), std::move(rdfConfig)); } -std::unique_ptr RdfAllTripleScan::initSharedState( +static std::unique_ptr RdfAllTripleScanInitSharedState( TableFunctionInitInput& input) { auto bindData = ku_dynamic_cast(input.bindData); auto rdfConfig = RdfReaderConfig::construct(bindData->config.options); @@ -274,5 +203,78 @@ std::unique_ptr RdfAllTripleScan::initSharedState( bindData->config.copy(), std::move(rdfConfig), bindData->store); } +function_set RdfResourceScan::getFunctionSet() { + function_set functionSet; + auto func = std::make_unique(name, scanTableFunc, nullptr, + RdfResourceScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + +function_set RdfLiteralScan::getFunctionSet() { + function_set functionSet; + auto func = std::make_unique(name, scanTableFunc, nullptr, + RdfLiteralScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + +function_set RdfResourceTripleScan::getFunctionSet() { + function_set functionSet; + auto func = std::make_unique(name, scanTableFunc, nullptr, + RdfResourceTripleScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + +function_set RdfLiteralTripleScan::getFunctionSet() { + function_set functionSet; + auto func = std::make_unique(name, scanTableFunc, nullptr, + RdfLiteralTripleScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + +function::function_set RdfAllTripleScan::getFunctionSet() { + function_set functionSet; + auto func = + std::make_unique(name, RdfAllTripleScanTableFunc, RdfAllTripleScanBindFunc, + RdfAllTripleScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + +function_set RdfResourceInMemScan::getFunctionSet() { + function_set functionSet; + auto func = std::make_unique(name, RdfResourceInMemScanTableFunc, nullptr, + inMemScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + +function_set RdfLiteralInMemScan::getFunctionSet() { + function_set functionSet; + auto func = std::make_unique(name, RdfLiteralInMemScanTableFunc, nullptr, + inMemScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + +function_set RdfResourceTripleInMemScan::getFunctionSet() { + function_set functionSet; + auto func = std::make_unique(name, RdfResourceTripleInMemScanTableFunc, nullptr, + inMemScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + +function_set RdfLiteralTripleInMemScan::getFunctionSet() { + function_set functionSet; + auto func = std::make_unique(name, RdfLiteralTripleInMemScanTableFunc, nullptr, + inMemScanInitSharedState, initLocalState, std::vector{}); + functionSet.push_back(std::move(func)); + return functionSet; +} + } // namespace processor } // namespace kuzu diff --git a/src/processor/operator/table_scan/ftable_scan_function.cpp b/src/processor/operator/table_scan/ftable_scan_function.cpp index 709313adb8..c4d978adc1 100644 --- a/src/processor/operator/table_scan/ftable_scan_function.cpp +++ b/src/processor/operator/table_scan/ftable_scan_function.cpp @@ -61,7 +61,7 @@ static std::unique_ptr initLocalState( function_set FTableScan::getFunctionSet() { function_set functionSet; - functionSet.push_back(std::make_unique(READ_FTABLE_FUNC_NAME, tableFunc, nullptr, + functionSet.push_back(std::make_unique(name, tableFunc, nullptr /*bindFunc*/, initSharedState, initLocalState, std::vector{})); return functionSet; } diff --git a/tools/python_api/src_cpp/include/pandas/pandas_scan.h b/tools/python_api/src_cpp/include/pandas/pandas_scan.h index 681f80f6b9..7c36db5f1d 100644 --- a/tools/python_api/src_cpp/include/pandas/pandas_scan.h +++ b/tools/python_api/src_cpp/include/pandas/pandas_scan.h @@ -26,6 +26,8 @@ struct PandasScanSharedState : public function::BaseScanSharedState { }; struct PandasScanFunction { + static constexpr const char* name = "READ_PANDAS"; + static function::function_set getFunctionSet(); }; diff --git a/tools/python_api/src_cpp/include/pyarrow/pyarrow_scan.h b/tools/python_api/src_cpp/include/pyarrow/pyarrow_scan.h index a1463d39ba..ee99f34cd3 100644 --- a/tools/python_api/src_cpp/include/pyarrow/pyarrow_scan.h +++ b/tools/python_api/src_cpp/include/pyarrow/pyarrow_scan.h @@ -50,6 +50,8 @@ struct PyArrowTableScanFunctionData final : public function::TableFuncBindData { }; struct PyArrowTableScanFunction { + static constexpr const char* name = "READ_PYARROW"; + static function::function_set getFunctionSet(); static function::TableFunction getFunction(); diff --git a/tools/python_api/src_cpp/pandas/pandas_scan.cpp b/tools/python_api/src_cpp/pandas/pandas_scan.cpp index 6be70a4e66..71bf339210 100644 --- a/tools/python_api/src_cpp/pandas/pandas_scan.cpp +++ b/tools/python_api/src_cpp/pandas/pandas_scan.cpp @@ -121,7 +121,7 @@ static double progressFunc(TableFuncSharedState* sharedState) { } static TableFunction getFunction() { - return TableFunction(READ_PANDAS_FUNC_NAME, tableFunc, bindFunc, initSharedState, + return TableFunction(PandasScanFunction::name, tableFunc, bindFunc, initSharedState, initLocalState, progressFunc, std::vector{LogicalTypeID::POINTER}); } diff --git a/tools/python_api/src_cpp/py_database.cpp b/tools/python_api/src_cpp/py_database.cpp index d7ecc9aced..b99a5968b7 100644 --- a/tools/python_api/src_cpp/py_database.cpp +++ b/tools/python_api/src_cpp/py_database.cpp @@ -50,7 +50,7 @@ PyDatabase::PyDatabase(const std::string& databasePath, uint64_t bufferPoolSize, auto systemConfig = SystemConfig(bufferPoolSize, maxNumThreads, compression, readOnly, maxDBSize); database = std::make_unique(databasePath, systemConfig); - database->addBuiltInFunction(READ_PANDAS_FUNC_NAME, kuzu::PandasScanFunction::getFunctionSet()); + database->addBuiltInFunction(kuzu::PandasScanFunction::name, kuzu::PandasScanFunction::getFunctionSet()); storageDriver = std::make_unique(database.get()); py::gil_scoped_acquire acquire; if (kuzu::importCache.get() == nullptr) { diff --git a/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp b/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp index 9cfdc98630..d4cf53d3f7 100644 --- a/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp +++ b/tools/python_api/src_cpp/pyarrow/pyarrow_scan.cpp @@ -98,13 +98,13 @@ function::function_set PyArrowTableScanFunction::getFunctionSet() { function_set functionSet; functionSet.push_back( - std::make_unique(READ_PYARROW_FUNC_NAME, tableFunc, bindFunc, + std::make_unique(name, tableFunc, bindFunc, initSharedState, initLocalState, progressFunc, std::vector{LogicalTypeID::POINTER})); return functionSet; } TableFunction PyArrowTableScanFunction::getFunction() { - return TableFunction(READ_PYARROW_FUNC_NAME, tableFunc, bindFunc, initSharedState, + return TableFunction(name, tableFunc, bindFunc, initSharedState, initLocalState, progressFunc, std::vector{LogicalTypeID::POINTER}); }