From 73949ed000d8b952e3e7157b8b6fef3a20801eb2 Mon Sep 17 00:00:00 2001 From: hououou Date: Thu, 21 Mar 2024 17:12:21 -0400 Subject: [PATCH] rebase --- src/binder/bind/bind_import_database.cpp | 29 ++++++++++++++++--- .../node_table_catalog_entry.cpp | 9 ++---- .../catalog_entry/rel_table_catalog_entry.cpp | 12 ++++---- src/catalog/property.cpp | 5 ++-- src/function/scalar_macro_function.cpp | 9 ++---- src/include/catalog/property.h | 3 +- .../common/copier_config/csv_reader_config.h | 15 +++------- .../operator/persistent/export_db.cpp | 6 ++-- 8 files changed, 46 insertions(+), 42 deletions(-) diff --git a/src/binder/bind/bind_import_database.cpp b/src/binder/bind/bind_import_database.cpp index f6bc6ce8d6..f8038c5547 100644 --- a/src/binder/bind/bind_import_database.cpp +++ b/src/binder/bind/bind_import_database.cpp @@ -2,8 +2,11 @@ #include "binder/binder.h" #include "binder/copy/bound_import_database.h" +#include "common/copier_config/csv_reader_config.h" #include "common/exception/binder.h" #include "common/file_system/virtual_file_system.h" +#include "parser/copy.h" +#include "parser/parser.h" #include "parser/port_db.h" using namespace kuzu::common; @@ -12,7 +15,7 @@ using namespace kuzu::parser; namespace kuzu { namespace binder { -std::string getFilePath( +static std::string getQueryFromFile( common::VirtualFileSystem* vfs, const std::string boundFilePath, const std::string fileName) { auto filePath = vfs->joinPath(boundFilePath, fileName); if (!vfs->fileOrPathExists(filePath)) { @@ -37,9 +40,27 @@ std::unique_ptr Binder::bindImportDatabaseClause(const Statement throw BinderException(stringFormat("Directory {} does not exist.", boundFilePath)); } std::string finalQueryStatements; - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::SCHEMA_NAME); - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::COPY_NAME); - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::MACRO_NAME); + finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::SCHEMA_NAME); + // replace the path in copy from statement with the bound path + auto copyQuery = getQueryFromFile(fs, boundFilePath, ImportDBConstants::COPY_NAME); + auto parsedStatements = Parser::parseQuery(copyQuery); + for (auto& parsedStatement : parsedStatements) { + KU_ASSERT(parsedStatement->getStatementType() == StatementType::COPY_FROM); + auto copyFromStatement = + ku_dynamic_cast(parsedStatement.get()); + KU_ASSERT(copyFromStatement->getSource()->type == common::ScanSourceType::FILE); + auto filePaths = ku_dynamic_cast( + copyFromStatement->getSource()) + ->filePaths; + KU_ASSERT(filePaths.size() == 1); + auto copyFilePath = boundFilePath + "/" + filePaths[0]; + auto csvConfig = CSVReaderConfig::construct( + bindParsingOptions(copyFromStatement->getParsingOptionsRef())); + auto csvQuery = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(), + copyFilePath, csvConfig.option.toCypher()); + finalQueryStatements += csvQuery; + } + finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::MACRO_NAME); return std::make_unique(boundFilePath, finalQueryStatements); } } // namespace binder diff --git a/src/catalog/catalog_entry/node_table_catalog_entry.cpp b/src/catalog/catalog_entry/node_table_catalog_entry.cpp index 451bcb128e..296838dae0 100644 --- a/src/catalog/catalog_entry/node_table_catalog_entry.cpp +++ b/src/catalog/catalog_entry/node_table_catalog_entry.cpp @@ -1,7 +1,5 @@ #include "catalog/catalog_entry/node_table_catalog_entry.h" -#include - namespace kuzu { namespace catalog { @@ -44,11 +42,8 @@ std::unique_ptr NodeTableCatalogEntry::copy() const { } std::string NodeTableCatalogEntry::toCypher(main::ClientContext* /*clientContext*/) const { - std::stringstream ss; - ss << "CREATE NODE TABLE " << getName() << "("; - Property::toCypher(getPropertiesRef(), ss); - ss << " PRIMARY KEY(" << getPrimaryKey()->getName() << "));"; - return ss.str(); + return common::stringFormat("CREATE NODE TABLE {} ({} PRIMARY KEY({}));", getName(), + Property::toCypher(getPropertiesRef()), getPrimaryKey()->getName()); } } // namespace catalog diff --git a/src/catalog/catalog_entry/rel_table_catalog_entry.cpp b/src/catalog/catalog_entry/rel_table_catalog_entry.cpp index a119860529..b08a91c780 100644 --- a/src/catalog/catalog_entry/rel_table_catalog_entry.cpp +++ b/src/catalog/catalog_entry/rel_table_catalog_entry.cpp @@ -83,12 +83,12 @@ std::string RelTableCatalogEntry::toCypher(main::ClientContext* clientContext) c auto catalog = clientContext->getCatalog(); auto srcTableName = catalog->getTableName(clientContext->getTx(), srcTableID); auto dstTableName = catalog->getTableName(clientContext->getTx(), dstTableID); - ss << "CREATE REL TABLE " << getName() << "( FROM " << srcTableName << " TO " << dstTableName - << ", "; - Property::toCypher(getPropertiesRef(), ss); - auto srcMultiStr = srcMultiplicity == RelMultiplicity::MANY ? "MANY" : "ONE"; - auto dstMultiStr = dstMultiplicity == RelMultiplicity::MANY ? "MANY" : "ONE"; - ss << srcMultiStr << "_" << dstMultiStr << ");"; + auto srcMultiStr = srcMultiplicity == common::RelMultiplicity::MANY ? "MANY" : "ONE"; + auto dstMultiStr = dstMultiplicity == common::RelMultiplicity::MANY ? "MANY" : "ONE"; + std::string tableInfo = + stringFormat("CREATE REL TABLE {} (FROM {} TO {}, ", getName(), srcTableName, dstTableName); + ss << tableInfo << Property::toCypher(getPropertiesRef()) << srcMultiStr << "_" << dstMultiStr + << ");"; return ss.str(); } diff --git a/src/catalog/property.cpp b/src/catalog/property.cpp index 2dabd86f2f..f3da2ab0c8 100644 --- a/src/catalog/property.cpp +++ b/src/catalog/property.cpp @@ -29,8 +29,8 @@ Property Property::deserialize(Deserializer& deserializer) { return Property(name, std::move(dataType), propertyID, tableID); } -void Property::toCypher( - const std::vector& properties, std::stringstream& ss) { +std::string Property::toCypher(const std::vector& properties) { + std::stringstream ss; for (auto& prop : properties) { if (prop.getDataType()->getPhysicalType() == PhysicalTypeID::INTERNAL_ID) { continue; @@ -42,6 +42,7 @@ void Property::toCypher( } ss << prop.getName() << " " << propStr << ","; } + return ss.str(); } } // namespace catalog diff --git a/src/function/scalar_macro_function.cpp b/src/function/scalar_macro_function.cpp index 61f82d6231..b2fed1e81b 100644 --- a/src/function/scalar_macro_function.cpp +++ b/src/function/scalar_macro_function.cpp @@ -1,9 +1,8 @@ #include "function/scalar_macro_function.h" -#include - #include "common/serializer/deserializer.h" #include "common/serializer/serializer.h" +#include "common/string_format.h" #include "common/string_utils.h" using namespace kuzu::common; @@ -59,7 +58,6 @@ std::unique_ptr ScalarMacroFunction::deserialize(Deserializ } std::string ScalarMacroFunction::toCypher(const std::string& name) const { - std::stringstream ss; std::vector paramStrings; for (auto& param : positionalArgs) { paramStrings.push_back(param); @@ -67,9 +65,8 @@ std::string ScalarMacroFunction::toCypher(const std::string& name) const { for (auto& defaultParam : defaultArgs) { paramStrings.push_back(defaultParam.first + ":=" + defaultParam.second->toString()); } - ss << "CREATE MACRO " << name << "(" << StringUtils::join(paramStrings, ",") << ") AS " - << expression->toString(); - return ss.str(); + return stringFormat("CREATE MACRO {} ({}) AS {}", name, StringUtils::join(paramStrings, ","), + expression->toString()); } } // namespace function } // namespace kuzu diff --git a/src/include/catalog/property.h b/src/include/catalog/property.h index 5ed9ef52c7..4a92105fb3 100644 --- a/src/include/catalog/property.h +++ b/src/include/catalog/property.h @@ -35,8 +35,7 @@ class Property { void serialize(common::Serializer& serializer) const; static Property deserialize(common::Deserializer& deserializer); - static void toCypher( - const std::vector& properties, std::stringstream& ss); + static std::string toCypher(const std::vector& properties); private: Property(const Property& other) diff --git a/src/include/common/copier_config/csv_reader_config.h b/src/include/common/copier_config/csv_reader_config.h index 29496b212c..60c34b2331 100644 --- a/src/include/common/copier_config/csv_reader_config.h +++ b/src/include/common/copier_config/csv_reader_config.h @@ -1,7 +1,5 @@ #pragma once -#include - #include "common/constants.h" #include "common/copy_constructors.h" #include "common/types/value/value.h" @@ -23,16 +21,11 @@ struct CSVOption { hasHeader{CopyConstants::DEFAULT_CSV_HAS_HEADER} {} EXPLICIT_COPY_DEFAULT_MOVE(CSVOption); + // TODO: COPY FROM and COPY TO should support transform special options, like '\'. std::string toCypher() const { - std::stringstream ss; - ss << " (escape = '\\" << escapeChar << "' , delim = '" << delimiter << "' , quote = '\\" - << quoteChar << "', header="; - if (hasHeader) { - ss << "true);"; - } else { - ss << "false);"; - } - return ss.str(); + std::string header = hasHeader ? "true" : "false"; + return stringFormat("(escape ='\\{}', delim ='{}', quote='\\{}', header={})", escapeChar, + delimiter, quoteChar, header); } private: diff --git a/src/processor/operator/persistent/export_db.cpp b/src/processor/operator/persistent/export_db.cpp index 2a3c4a5dcc..17a513a2c8 100644 --- a/src/processor/operator/persistent/export_db.cpp +++ b/src/processor/operator/persistent/export_db.cpp @@ -29,13 +29,11 @@ static void writeStringStreamToFile( static void writeCopyStatement( stringstream& ss, std::string tableName, ReaderConfig* boundFileInfo) { - ss << "COPY "; - ss << tableName << " FROM \"" << boundFileInfo->filePaths[0] << "/" << tableName; auto fileTypeStr = FileTypeUtils::toString(boundFileInfo->fileType); StringUtils::toLower(fileTypeStr); - ss << "." << fileTypeStr; auto csvConfig = common::CSVReaderConfig::construct(boundFileInfo->options); - ss << "\"" << csvConfig.option.toCypher() << std::endl; + ss << stringFormat("COPY {} FROM \"{}.{}\" {};\n", tableName, tableName, fileTypeStr, + csvConfig.option.toCypher()); } std::string getSchemaCypher(main::ClientContext* clientContext, transaction::Transaction* tx) {