From 4c95042bc16835ab6724a8ac1044c1356fd5f47d Mon Sep 17 00:00:00 2001 From: Manh Dinh Date: Wed, 24 Apr 2024 19:55:30 -0400 Subject: [PATCH] Fix bind file scan --- dataset/copy-test/node/parquet/copy.cypher | 2 +- src/binder/bind/bind_file_scan.cpp | 3 +++ src/binder/bind/bind_import_database.cpp | 17 ++++++++++++----- test/test_files/copy/copy_node_parquet.test | 6 ++++++ test/test_files/copy/copy_partial_column.test | 4 ++-- .../exceptions/copy/wrong_header.test | 4 ++-- 6 files changed, 26 insertions(+), 10 deletions(-) diff --git a/dataset/copy-test/node/parquet/copy.cypher b/dataset/copy-test/node/parquet/copy.cypher index a5823700cd0..4e9d385846c 100644 --- a/dataset/copy-test/node/parquet/copy.cypher +++ b/dataset/copy-test/node/parquet/copy.cypher @@ -1 +1 @@ -COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true); +COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet"; diff --git a/src/binder/bind/bind_file_scan.cpp b/src/binder/bind/bind_file_scan.cpp index cf7c3035abe..ec33fbde1fa 100644 --- a/src/binder/bind/bind_file_scan.cpp +++ b/src/binder/bind/bind_file_scan.cpp @@ -75,6 +75,9 @@ std::unique_ptr Binder::bindScanSource(BaseScanSource* sour auto filePaths = bindFilePaths(fileSource->filePaths); // Bind file type. auto fileType = bindFileType(filePaths); + if (fileType != FileType::CSV && options.size() != 0) { + throw BinderException{"Only copy from CSV can have options."}; + } auto config = std::make_unique(fileType, std::move(filePaths)); // Bind options. config->options = bindParsingOptions(options); diff --git a/src/binder/bind/bind_import_database.cpp b/src/binder/bind/bind_import_database.cpp index a9192147642..383149567d8 100644 --- a/src/binder/bind/bind_import_database.cpp +++ b/src/binder/bind/bind_import_database.cpp @@ -55,12 +55,19 @@ std::unique_ptr Binder::bindImportDatabaseClause(const Statement copyFromStatement->getSource()) ->filePaths; KU_ASSERT(filePaths.size() == 1); + auto fileType = bindFileType(filePaths); auto copyFilePath = boundFilePath + "/" + filePaths[0]; - auto csvConfig = CSVReaderConfig::construct( - bindParsingOptions(copyFromStatement->getParsingOptionsRef())); - auto csvQuery = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(), - copyFilePath, csvConfig.option.toCypher()); - finalQueryStatements += csvQuery; + std::string query; + if (fileType == FileType::CSV) { + auto csvConfig = CSVReaderConfig::construct( + bindParsingOptions(copyFromStatement->getParsingOptionsRef())); + query = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(), + copyFilePath, csvConfig.option.toCypher()); + } else { + query = stringFormat("COPY {} FROM \"{}\";", copyFromStatement->getTableName(), + copyFilePath); + } + finalQueryStatements += query; } finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::MACRO_NAME); return std::make_unique(boundFilePath, finalQueryStatements); diff --git a/test/test_files/copy/copy_node_parquet.test b/test/test_files/copy/copy_node_parquet.test index f169341072a..6fc212bdca0 100644 --- a/test/test_files/copy/copy_node_parquet.test +++ b/test/test_files/copy/copy_node_parquet.test @@ -53,3 +53,9 @@ -STATEMENT MATCH (row:tableOfTypes) WHERE 0 <= row.doubleColumn AND row.doubleColumn <= 10 AND 0 <= row.int64Column AND row.int64Column <= 10 RETURN count(*); ---- 1 546 + +-CASE CopyWithOptionsErrorTest + +-STATEMENT COPY tableOfTypes FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true); +---- error +Binder exception: Only copy from CSV can have options. diff --git a/test/test_files/copy/copy_partial_column.test b/test/test_files/copy/copy_partial_column.test index 9b662766e99..91c1f3ee6c4 100644 --- a/test/test_files/copy/copy_partial_column.test +++ b/test/test_files/copy/copy_partial_column.test @@ -184,7 +184,7 @@ Copy exception: Found NULL, which violates the non-null constraint of the primar ---- ok -STATEMENT COPY tableOfTypes12 (id, int64Column, doubleColumn, booleanColumn, dateColumn, stringColumn, listOfInt64, listOfString, listOfListOfInt64, structColumn) FROM -"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true); +"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet"; ---- ok -STATEMENT MATCH (row:tableOfTypes12) WHERE row.id >= 20 AND row.id <= 24 RETURN row.*; ---- 5 @@ -205,6 +205,6 @@ listOfString, listOfListOfInt64, structColumn) FROM ---- ok -STATEMENT COPY tableOfTypes12 (id, dateColumn, doubleColumn, booleanColumn, int64Column, stringColumn, listOfInt64, listOfString, listOfListOfInt64, structColumn) FROM -"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true); +"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet"; ---- error Binder exception: Column `dateColumn` type mismatch. Expected DATE but got INT64. diff --git a/test/test_files/exceptions/copy/wrong_header.test b/test/test_files/exceptions/copy/wrong_header.test index b9b0351b76c..1172f3b0c04 100644 --- a/test/test_files/exceptions/copy/wrong_header.test +++ b/test/test_files/exceptions/copy/wrong_header.test @@ -111,7 +111,7 @@ Binder exception: Number of columns mismatch. Expected 4 but got 3. -CASE NodeUnmatchedNumColumns -STATEMENT create node table person (ID1 SERIAL, ID INT64, fName INT64, age INT64, PRIMARY KEY (ID1)) ---- ok --STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet" (HEADER=true) +-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet" ---- error Binder exception: Number of columns mismatch. Expected 3 but got 10. @@ -120,6 +120,6 @@ Binder exception: Number of columns mismatch. Expected 3 but got 10. ---- ok -STATEMENT create rel table knows (FROM person TO person, time date, age INT64) ---- ok --STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet" (HEADER=true) +-STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet" ---- error Binder exception: Number of columns mismatch. Expected 4 but got 3.