From 9951547ada52cef397952ad3f0163a6d882e0c9e Mon Sep 17 00:00:00 2001 From: Kuzu CI Date: Thu, 14 Mar 2024 20:47:25 -0400 Subject: [PATCH] fix path --- src/binder/bind/bind_import_database.cpp | 50 ++++++++++++++++++-- src/common/file_system/file_system.cpp | 34 +++++++++++++ src/include/common/file_system/file_system.h | 6 +++ src/include/parser/copy.h | 5 ++ 4 files changed, 90 insertions(+), 5 deletions(-) diff --git a/src/binder/bind/bind_import_database.cpp b/src/binder/bind/bind_import_database.cpp index f6bc6ce8d6d..2f0566e601b 100644 --- a/src/binder/bind/bind_import_database.cpp +++ b/src/binder/bind/bind_import_database.cpp @@ -1,9 +1,12 @@ #include #include "binder/binder.h" +#include "binder/copy/bound_file_scan_info.h" #include "binder/copy/bound_import_database.h" #include "common/exception/binder.h" #include "common/file_system/virtual_file_system.h" +#include "parser/copy.h" +#include "parser/parser.h" #include "parser/port_db.h" using namespace kuzu::common; @@ -12,7 +15,26 @@ using namespace kuzu::parser; namespace kuzu { namespace binder { -std::string getFilePath( +// std::string replaceCopyFromPath(common::VirtualFileSystem* vfs, const std::string boundFilePath, +// std::string query){ +// auto parsedStatements = Parser::parseQuery(query); +// auto result = std::string(); +// for(auto &parsedStatement : parsedStatements){ +// KU_ASSERT(parsedStatement->getStatementType()==StatementType::COPY_FROM); +// auto copyFromStatement = ku_dynamic_cast(parsedStatement.get()); KU_ASSERT(copyFromStatement->getFilePath().size()==1); +// auto csvFilePath = copyFromStatement->getFilePath()[0]; +// auto extractedFileName = vfs->extractName(csvFilePath); +// csvFilePath=vfs->joinPath(boundFilePath, extractedFileName); +// auto csvConfig = +// CSVReaderConfig::construct(bindParsingOptions(copyFromStatement->getParsingOptionsRef())); +// result+="COPY "+ copyFromStatement->getTableName()+ " FROM '"+csvFilePath+" +// "+csvConfig.option.toCypher()+"';"; +// } +// return result; +//} + +std::string getQueryFromFile( common::VirtualFileSystem* vfs, const std::string boundFilePath, const std::string fileName) { auto filePath = vfs->joinPath(boundFilePath, fileName); if (!vfs->fileOrPathExists(filePath)) { @@ -26,7 +48,10 @@ std::string getFilePath( auto fsize = fileInfo->getFileSize(); auto buffer = std::make_unique(fsize); fileInfo->readFile(buffer.get(), fsize); - return std::string(buffer.get(), fsize); + auto query = std::string(buffer.get(), fsize); + return query; + // fileName==ImportDBConstants::COPY_NAME? + // replaceCopyFromPath(vfs,boundFilePath,query):std::string(buffer.get(), fsize); } std::unique_ptr Binder::bindImportDatabaseClause(const Statement& statement) { @@ -37,9 +62,24 @@ std::unique_ptr Binder::bindImportDatabaseClause(const Statement throw BinderException(stringFormat("Directory {} does not exist.", boundFilePath)); } std::string finalQueryStatements; - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::SCHEMA_NAME); - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::COPY_NAME); - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::MACRO_NAME); + finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::SCHEMA_NAME); + auto copyQuery = getQueryFromFile(fs, boundFilePath, ImportDBConstants::COPY_NAME); + auto parsedStatements = Parser::parseQuery(copyQuery); + for (auto& parsedStatement : parsedStatements) { + KU_ASSERT(parsedStatement->getStatementType() == StatementType::COPY_FROM); + auto copyFromStatement = + ku_dynamic_cast(parsedStatement.get()); + KU_ASSERT(copyFromStatement->getFilePath().size() == 1); + auto csvFilePath = copyFromStatement->getFilePath()[0]; + auto extractedFileName = fs->extractName(csvFilePath); + csvFilePath = fs->joinPath(boundFilePath, extractedFileName); + auto csvConfig = CSVReaderConfig::construct( + bindParsingOptions(copyFromStatement->getParsingOptionsRef())); + auto csvQuery = "COPY " + copyFromStatement->getTableName() + " FROM '" + csvFilePath + + "' " + csvConfig.option.toCypher() + "\n"; + finalQueryStatements += csvQuery; + } + finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::MACRO_NAME); return std::make_unique(boundFilePath, finalQueryStatements); } } // namespace binder diff --git a/src/common/file_system/file_system.cpp b/src/common/file_system/file_system.cpp index bc595b94d8c..8c98176a284 100644 --- a/src/common/file_system/file_system.cpp +++ b/src/common/file_system/file_system.cpp @@ -1,5 +1,7 @@ #include "common/file_system/file_system.h" +#include "common/string_utils.h" + namespace kuzu { namespace common { @@ -40,5 +42,37 @@ void FileSystem::truncate(FileInfo* /*fileInfo*/, uint64_t /*size*/) const { KU_UNREACHABLE; } +std::string FileSystem::pathSeparator() { + auto separatorStr = "/"; +#ifdef _WIN32 + separatorStr = "\\"; +#endif + return separatorStr; +} + +std::string FileSystem::convertSeparators(const std::string& path) { + auto separatorStr = pathSeparator(); + char separator = separatorStr[0]; + if (separator == '/') { + // on unix-based systems we only accept / as a separator + return path; + } + // on windows-based systems we accept both + auto returnPath = path; + StringUtils::replaceAll(returnPath, "/", separatorStr); + return returnPath; +} + +std::string FileSystem::extractName(const std::string& path) { + if (path.empty()) { + return std::string(); + } + auto normalized_path = convertSeparators(path); + auto sep = pathSeparator(); + auto splits = StringUtils::split(normalized_path, sep); + KU_ASSERT(!splits.empty()); + return splits.back(); +} + } // namespace common } // namespace kuzu diff --git a/src/include/common/file_system/file_system.h b/src/include/common/file_system/file_system.h index 8e7860a1349..6983ff10acc 100644 --- a/src/include/common/file_system/file_system.h +++ b/src/include/common/file_system/file_system.h @@ -43,6 +43,12 @@ class KUZU_API FileSystem { virtual bool canHandleFile(const std::string& /*path*/) const { KU_UNREACHABLE; } + std::string pathSeparator(); + + std::string extractName(const std::string& path); + + std::string convertSeparators(const std::string& path); + protected: virtual void readFromFile( FileInfo* fileInfo, void* buffer, uint64_t numBytes, uint64_t position) const = 0; diff --git a/src/include/parser/copy.h b/src/include/parser/copy.h index bbc9b23ad80..6a0e1c7f33c 100644 --- a/src/include/parser/copy.h +++ b/src/include/parser/copy.h @@ -30,6 +30,11 @@ class CopyFrom : public Copy { inline bool byColumn() const { return byColumn_; } inline BaseScanSource* getSource() const { return source.get(); } + inline std::vector getFilePath() const { + KU_ASSERT(source->type == common::ScanSourceType::FILE); + auto fileSource = dynamic_cast(source.get()); + return fileSource->filePaths; + } inline std::string getTableName() const { return tableName; }