Skip to content

Commit

Permalink
Fix #2704 (#3379)
Browse files Browse the repository at this point in the history
* Fix bind file scan
* Fix parquet and npy reader bindFunc
  • Loading branch information
manh9203 committed Apr 26, 2024
1 parent ac08521 commit f61108a
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 10 deletions.
2 changes: 1 addition & 1 deletion dataset/copy-test/node/parquet/copy.cypher
Original file line number Diff line number Diff line change
@@ -1 +1 @@
COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet";
17 changes: 12 additions & 5 deletions src/binder/bind/bind_import_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,19 @@ std::unique_ptr<BoundStatement> Binder::bindImportDatabaseClause(const Statement
copyFromStatement->getSource())
->filePaths;
KU_ASSERT(filePaths.size() == 1);
auto fileType = bindFileType(filePaths);
auto copyFilePath = boundFilePath + "/" + filePaths[0];
auto csvConfig = CSVReaderConfig::construct(
bindParsingOptions(copyFromStatement->getParsingOptionsRef()));
auto csvQuery = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(),
copyFilePath, csvConfig.option.toCypher());
finalQueryStatements += csvQuery;
std::string query;
if (fileType == FileType::CSV) {
auto csvConfig = CSVReaderConfig::construct(
bindParsingOptions(copyFromStatement->getParsingOptionsRef()));
query = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(),
copyFilePath, csvConfig.option.toCypher());
} else {
query = stringFormat("COPY {} FROM \"{}\";", copyFromStatement->getTableName(),
copyFilePath);
}
finalQueryStatements += query;
}
finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::MACRO_NAME);
return std::make_unique<BoundImportDatabase>(boundFilePath, finalQueryStatements);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ static void bindColumns(const common::ReaderConfig& readerConfig,
static std::unique_ptr<function::TableFuncBindData> bindFunc(main::ClientContext* /*context*/,
function::TableFuncBindInput* input) {
auto scanInput = reinterpret_cast<function::ScanTableFuncBindInput*>(input);
KU_ASSERT(scanInput->config.options.empty());
std::vector<std::string> detectedColumnNames;
std::vector<common::LogicalType> detectedColumnTypes;
bindColumns(scanInput->config, detectedColumnNames, detectedColumnTypes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <fcntl.h>

#include "common/exception/binder.h"
#include "common/exception/copy.h"
#include "common/file_system/virtual_file_system.h"
#include "common/string_format.h"
Expand Down Expand Up @@ -652,6 +653,9 @@ static std::unique_ptr<function::TableFuncBindData> bindFunc(main::ClientContext
function::TableFuncBindInput* input) {
auto scanInput =
ku_dynamic_cast<function::TableFuncBindInput*, function::ScanTableFuncBindInput*>(input);
if (!scanInput->config.options.empty()) {
throw BinderException{"Copy from Parquet cannot have options."};
}
std::vector<std::string> detectedColumnNames;
std::vector<common::LogicalType> detectedColumnTypes;
bindColumns(scanInput, detectedColumnNames, detectedColumnTypes);
Expand Down
5 changes: 5 additions & 0 deletions test/test_files/copy/copy_node_parquet.test
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,8 @@
-STATEMENT MATCH (row:tableOfTypes) WHERE 0 <= row.doubleColumn AND row.doubleColumn <= 10 AND 0 <= row.int64Column AND row.int64Column <= 10 RETURN count(*);
---- 1
546

-LOG CopyWithOptionsErrorTest
-STATEMENT COPY tableOfTypes FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
---- error
Binder exception: Copy from Parquet cannot have options.
4 changes: 2 additions & 2 deletions test/test_files/copy/copy_partial_column.test
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ Copy exception: Found NULL, which violates the non-null constraint of the primar
---- ok
-STATEMENT COPY tableOfTypes12 (id, int64Column, doubleColumn, booleanColumn, dateColumn, stringColumn, listOfInt64,
listOfString, listOfListOfInt64, structColumn) FROM
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet";
---- ok
-STATEMENT MATCH (row:tableOfTypes12) WHERE row.id >= 20 AND row.id <= 24 RETURN row.*;
---- 5
Expand All @@ -205,6 +205,6 @@ listOfString, listOfListOfInt64, structColumn) FROM
---- ok
-STATEMENT COPY tableOfTypes12 (id, dateColumn, doubleColumn, booleanColumn, int64Column, stringColumn, listOfInt64,
listOfString, listOfListOfInt64, structColumn) FROM
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet";
---- error
Binder exception: Column `dateColumn` type mismatch. Expected DATE but got INT64.
4 changes: 2 additions & 2 deletions test/test_files/exceptions/copy/wrong_header.test
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Binder exception: Number of columns mismatch. Expected 4 but got 3.
-CASE NodeUnmatchedNumColumns
-STATEMENT create node table person (ID1 SERIAL, ID INT64, fName INT64, age INT64, PRIMARY KEY (ID1))
---- ok
-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet" (HEADER=true)
-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet"
---- error
Binder exception: Number of columns mismatch. Expected 3 but got 10.

Expand All @@ -120,6 +120,6 @@ Binder exception: Number of columns mismatch. Expected 3 but got 10.
---- ok
-STATEMENT create rel table knows (FROM person TO person, time date, age INT64)
---- ok
-STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet" (HEADER=true)
-STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet"
---- error
Binder exception: Number of columns mismatch. Expected 4 but got 3.

0 comments on commit f61108a

Please sign in to comment.