Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #2704 #3379

Merged
merged 4 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dataset/copy-test/node/parquet/copy.cypher
Original file line number Diff line number Diff line change
@@ -1 +1 @@
COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet";
17 changes: 12 additions & 5 deletions src/binder/bind/bind_import_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,19 @@ std::unique_ptr<BoundStatement> Binder::bindImportDatabaseClause(const Statement
copyFromStatement->getSource())
->filePaths;
KU_ASSERT(filePaths.size() == 1);
auto fileType = bindFileType(filePaths);
auto copyFilePath = boundFilePath + "/" + filePaths[0];
auto csvConfig = CSVReaderConfig::construct(
bindParsingOptions(copyFromStatement->getParsingOptionsRef()));
auto csvQuery = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(),
copyFilePath, csvConfig.option.toCypher());
finalQueryStatements += csvQuery;
std::string query;
if (fileType == FileType::CSV) {
auto csvConfig = CSVReaderConfig::construct(
bindParsingOptions(copyFromStatement->getParsingOptionsRef()));
query = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(),
copyFilePath, csvConfig.option.toCypher());
} else {
manh9203 marked this conversation as resolved.
Show resolved Hide resolved
query = stringFormat("COPY {} FROM \"{}\";", copyFromStatement->getTableName(),
copyFilePath);
}
finalQueryStatements += query;
}
finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::MACRO_NAME);
return std::make_unique<BoundImportDatabase>(boundFilePath, finalQueryStatements);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ static void bindColumns(const common::ReaderConfig& readerConfig,
static std::unique_ptr<function::TableFuncBindData> bindFunc(main::ClientContext* /*context*/,
function::TableFuncBindInput* input) {
auto scanInput = reinterpret_cast<function::ScanTableFuncBindInput*>(input);
KU_ASSERT(scanInput->config.options.empty());
std::vector<std::string> detectedColumnNames;
std::vector<common::LogicalType> detectedColumnTypes;
bindColumns(scanInput->config, detectedColumnNames, detectedColumnTypes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <fcntl.h>

#include "common/exception/binder.h"
#include "common/exception/copy.h"
#include "common/file_system/virtual_file_system.h"
#include "common/string_format.h"
Expand Down Expand Up @@ -652,6 +653,9 @@ static std::unique_ptr<function::TableFuncBindData> bindFunc(main::ClientContext
function::TableFuncBindInput* input) {
auto scanInput =
ku_dynamic_cast<function::TableFuncBindInput*, function::ScanTableFuncBindInput*>(input);
if (!scanInput->config.options.empty()) {
throw BinderException{"Copy from Parquet cannot have options."};
}
std::vector<std::string> detectedColumnNames;
std::vector<common::LogicalType> detectedColumnTypes;
bindColumns(scanInput, detectedColumnNames, detectedColumnTypes);
Expand Down
5 changes: 5 additions & 0 deletions test/test_files/copy/copy_node_parquet.test
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,8 @@
-STATEMENT MATCH (row:tableOfTypes) WHERE 0 <= row.doubleColumn AND row.doubleColumn <= 10 AND 0 <= row.int64Column AND row.int64Column <= 10 RETURN count(*);
---- 1
546

-LOG CopyWithOptionsErrorTest
-STATEMENT COPY tableOfTypes FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
---- error
Binder exception: Copy from Parquet cannot have options.
4 changes: 2 additions & 2 deletions test/test_files/copy/copy_partial_column.test
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ Copy exception: Found NULL, which violates the non-null constraint of the primar
---- ok
-STATEMENT COPY tableOfTypes12 (id, int64Column, doubleColumn, booleanColumn, dateColumn, stringColumn, listOfInt64,
listOfString, listOfListOfInt64, structColumn) FROM
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet";
---- ok
-STATEMENT MATCH (row:tableOfTypes12) WHERE row.id >= 20 AND row.id <= 24 RETURN row.*;
---- 5
Expand All @@ -205,6 +205,6 @@ listOfString, listOfListOfInt64, structColumn) FROM
---- ok
-STATEMENT COPY tableOfTypes12 (id, dateColumn, doubleColumn, booleanColumn, int64Column, stringColumn, listOfInt64,
listOfString, listOfListOfInt64, structColumn) FROM
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet";
---- error
Binder exception: Column `dateColumn` type mismatch. Expected DATE but got INT64.
4 changes: 2 additions & 2 deletions test/test_files/exceptions/copy/wrong_header.test
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Binder exception: Number of columns mismatch. Expected 4 but got 3.
-CASE NodeUnmatchedNumColumns
-STATEMENT create node table person (ID1 SERIAL, ID INT64, fName INT64, age INT64, PRIMARY KEY (ID1))
---- ok
-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet" (HEADER=true)
-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet"
---- error
Binder exception: Number of columns mismatch. Expected 3 but got 10.

Expand All @@ -120,6 +120,6 @@ Binder exception: Number of columns mismatch. Expected 3 but got 10.
---- ok
-STATEMENT create rel table knows (FROM person TO person, time date, age INT64)
---- ok
-STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet" (HEADER=true)
-STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet"
---- error
Binder exception: Number of columns mismatch. Expected 4 but got 3.
Loading