diff --git a/src/binder/bind/bind_import_database.cpp b/src/binder/bind/bind_import_database.cpp index f6bc6ce8d6d..d47a3bb6418 100644 --- a/src/binder/bind/bind_import_database.cpp +++ b/src/binder/bind/bind_import_database.cpp @@ -4,6 +4,8 @@ #include "binder/copy/bound_import_database.h" #include "common/exception/binder.h" #include "common/file_system/virtual_file_system.h" +#include "parser/copy.h" +#include "parser/parser.h" #include "parser/port_db.h" using namespace kuzu::common; @@ -12,7 +14,7 @@ using namespace kuzu::parser; namespace kuzu { namespace binder { -std::string getFilePath( +std::string getQueryFromFile( common::VirtualFileSystem* vfs, const std::string boundFilePath, const std::string fileName) { auto filePath = vfs->joinPath(boundFilePath, fileName); if (!vfs->fileOrPathExists(filePath)) { @@ -37,9 +39,28 @@ std::unique_ptr Binder::bindImportDatabaseClause(const Statement throw BinderException(stringFormat("Directory {} does not exist.", boundFilePath)); } std::string finalQueryStatements; - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::SCHEMA_NAME); - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::COPY_NAME); - finalQueryStatements += getFilePath(fs, boundFilePath, ImportDBConstants::MACRO_NAME); + finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::SCHEMA_NAME); + auto copyQuery = getQueryFromFile(fs, boundFilePath, ImportDBConstants::COPY_NAME); + auto parsedStatements = Parser::parseQuery(copyQuery); + for (auto& parsedStatement : parsedStatements) { + KU_ASSERT(parsedStatement->getStatementType() == StatementType::COPY_FROM); + auto copyFromStatement = + ku_dynamic_cast(parsedStatement.get()); + KU_ASSERT(copyFromStatement->getSource()->type == common::ScanSourceType::FILE); + auto filePaths = ku_dynamic_cast( + copyFromStatement->getSource()) + ->filePaths; + KU_ASSERT(filePaths.size() == 1); + auto csvFilePath = filePaths[0]; + auto extractedFileName = fs->extractName(csvFilePath); + csvFilePath = boundFilePath + "/" + extractedFileName; + auto csvConfig = CSVReaderConfig::construct( + bindParsingOptions(copyFromStatement->getParsingOptionsRef())); + auto csvQuery = "COPY " + copyFromStatement->getTableName() + " FROM '" + csvFilePath + + "' " + csvConfig.option.toCypher() + "\n"; + finalQueryStatements += csvQuery; + } + finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::MACRO_NAME); return std::make_unique(boundFilePath, finalQueryStatements); } } // namespace binder diff --git a/src/common/file_system/file_system.cpp b/src/common/file_system/file_system.cpp index bc595b94d8c..feb9ba7edc9 100644 --- a/src/common/file_system/file_system.cpp +++ b/src/common/file_system/file_system.cpp @@ -1,5 +1,7 @@ #include "common/file_system/file_system.h" +#include "common/string_utils.h" + namespace kuzu { namespace common { @@ -40,5 +42,37 @@ void FileSystem::truncate(FileInfo* /*fileInfo*/, uint64_t /*size*/) const { KU_UNREACHABLE; } +std::string FileSystem::pathSeparator() { + auto separatorStr = "/"; +#ifdef _WIN32 + separatorStr = "\\"; +#endif + return separatorStr; +} + +std::string FileSystem::convertSeparators(const std::string& path) { + auto separatorStr = pathSeparator(); + char separator = separatorStr[0]; + if (separator == '/') { + // on unix-based systems we only accept / as a separator + return path; + } + // on windows-based systems we accept both + auto returnPath = path; + StringUtils::replaceAll(returnPath, "/", separatorStr); + return returnPath; +} + +std::string FileSystem::extractName(const std::string& path) { + if (path.empty()) { + return std::string(); + } + auto normalizedPath = convertSeparators(path); + auto sep = pathSeparator(); + auto splits = StringUtils::split(normalizedPath, sep); + KU_ASSERT(!splits.empty()); + return splits.back(); +} + } // namespace common } // namespace kuzu diff --git a/src/include/common/file_system/file_system.h b/src/include/common/file_system/file_system.h index 8e7860a1349..6983ff10acc 100644 --- a/src/include/common/file_system/file_system.h +++ b/src/include/common/file_system/file_system.h @@ -43,6 +43,12 @@ class KUZU_API FileSystem { virtual bool canHandleFile(const std::string& /*path*/) const { KU_UNREACHABLE; } + std::string pathSeparator(); + + std::string extractName(const std::string& path); + + std::string convertSeparators(const std::string& path); + protected: virtual void readFromFile( FileInfo* fileInfo, void* buffer, uint64_t numBytes, uint64_t position) const = 0;