diff --git a/dataset/copy-test/node/parquet/copy.cypher b/dataset/copy-test/node/parquet/copy.cypher index a5823700cd..4e9d385846 100644 --- a/dataset/copy-test/node/parquet/copy.cypher +++ b/dataset/copy-test/node/parquet/copy.cypher @@ -1 +1 @@ -COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true); +COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet"; diff --git a/src/binder/bind/bind_import_database.cpp b/src/binder/bind/bind_import_database.cpp index a919214764..383149567d 100644 --- a/src/binder/bind/bind_import_database.cpp +++ b/src/binder/bind/bind_import_database.cpp @@ -55,12 +55,19 @@ std::unique_ptr Binder::bindImportDatabaseClause(const Statement copyFromStatement->getSource()) ->filePaths; KU_ASSERT(filePaths.size() == 1); + auto fileType = bindFileType(filePaths); auto copyFilePath = boundFilePath + "/" + filePaths[0]; - auto csvConfig = CSVReaderConfig::construct( - bindParsingOptions(copyFromStatement->getParsingOptionsRef())); - auto csvQuery = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(), - copyFilePath, csvConfig.option.toCypher()); - finalQueryStatements += csvQuery; + std::string query; + if (fileType == FileType::CSV) { + auto csvConfig = CSVReaderConfig::construct( + bindParsingOptions(copyFromStatement->getParsingOptionsRef())); + query = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(), + copyFilePath, csvConfig.option.toCypher()); + } else { + query = stringFormat("COPY {} FROM \"{}\";", copyFromStatement->getTableName(), + copyFilePath); + } + finalQueryStatements += query; } finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::MACRO_NAME); return std::make_unique(boundFilePath, finalQueryStatements); diff --git a/src/processor/operator/persistent/reader/npy/npy_reader.cpp b/src/processor/operator/persistent/reader/npy/npy_reader.cpp index 5fd2298bbf..4672322531 100644 --- a/src/processor/operator/persistent/reader/npy/npy_reader.cpp +++ b/src/processor/operator/persistent/reader/npy/npy_reader.cpp @@ -296,6 +296,7 @@ static void bindColumns(const common::ReaderConfig& readerConfig, static std::unique_ptr bindFunc(main::ClientContext* /*context*/, function::TableFuncBindInput* input) { auto scanInput = reinterpret_cast(input); + KU_ASSERT(scanInput->config.options.empty()); std::vector detectedColumnNames; std::vector detectedColumnTypes; bindColumns(scanInput->config, detectedColumnNames, detectedColumnTypes); diff --git a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp index f7e40d05bb..419485ddbd 100644 --- a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +++ b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp @@ -2,6 +2,7 @@ #include +#include "common/exception/binder.h" #include "common/exception/copy.h" #include "common/file_system/virtual_file_system.h" #include "common/string_format.h" @@ -652,6 +653,9 @@ static std::unique_ptr bindFunc(main::ClientContext function::TableFuncBindInput* input) { auto scanInput = ku_dynamic_cast(input); + if (!scanInput->config.options.empty()) { + throw BinderException{"Copy from Parquet cannot have options."}; + } std::vector detectedColumnNames; std::vector detectedColumnTypes; bindColumns(scanInput, detectedColumnNames, detectedColumnTypes); diff --git a/test/test_files/copy/copy_node_parquet.test b/test/test_files/copy/copy_node_parquet.test index f169341072..43148238f2 100644 --- a/test/test_files/copy/copy_node_parquet.test +++ b/test/test_files/copy/copy_node_parquet.test @@ -53,3 +53,8 @@ -STATEMENT MATCH (row:tableOfTypes) WHERE 0 <= row.doubleColumn AND row.doubleColumn <= 10 AND 0 <= row.int64Column AND row.int64Column <= 10 RETURN count(*); ---- 1 546 + +-LOG CopyWithOptionsErrorTest +-STATEMENT COPY tableOfTypes FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true); +---- error +Binder exception: Copy from Parquet cannot have options. diff --git a/test/test_files/copy/copy_partial_column.test b/test/test_files/copy/copy_partial_column.test index 9b662766e9..91c1f3ee6c 100644 --- a/test/test_files/copy/copy_partial_column.test +++ b/test/test_files/copy/copy_partial_column.test @@ -184,7 +184,7 @@ Copy exception: Found NULL, which violates the non-null constraint of the primar ---- ok -STATEMENT COPY tableOfTypes12 (id, int64Column, doubleColumn, booleanColumn, dateColumn, stringColumn, listOfInt64, listOfString, listOfListOfInt64, structColumn) FROM -"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true); +"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet"; ---- ok -STATEMENT MATCH (row:tableOfTypes12) WHERE row.id >= 20 AND row.id <= 24 RETURN row.*; ---- 5 @@ -205,6 +205,6 @@ listOfString, listOfListOfInt64, structColumn) FROM ---- ok -STATEMENT COPY tableOfTypes12 (id, dateColumn, doubleColumn, booleanColumn, int64Column, stringColumn, listOfInt64, listOfString, listOfListOfInt64, structColumn) FROM -"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true); +"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet"; ---- error Binder exception: Column `dateColumn` type mismatch. Expected DATE but got INT64. diff --git a/test/test_files/exceptions/copy/wrong_header.test b/test/test_files/exceptions/copy/wrong_header.test index b9b0351b76..1172f3b0c0 100644 --- a/test/test_files/exceptions/copy/wrong_header.test +++ b/test/test_files/exceptions/copy/wrong_header.test @@ -111,7 +111,7 @@ Binder exception: Number of columns mismatch. Expected 4 but got 3. -CASE NodeUnmatchedNumColumns -STATEMENT create node table person (ID1 SERIAL, ID INT64, fName INT64, age INT64, PRIMARY KEY (ID1)) ---- ok --STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet" (HEADER=true) +-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet" ---- error Binder exception: Number of columns mismatch. Expected 3 but got 10. @@ -120,6 +120,6 @@ Binder exception: Number of columns mismatch. Expected 3 but got 10. ---- ok -STATEMENT create rel table knows (FROM person TO person, time date, age INT64) ---- ok --STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet" (HEADER=true) +-STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet" ---- error Binder exception: Number of columns mismatch. Expected 4 but got 3.