Skip to content

Commit

Permalink
Fix bind file scan
Browse files Browse the repository at this point in the history
  • Loading branch information
manh9203 committed Apr 24, 2024
1 parent b491af2 commit 4c95042
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 10 deletions.
2 changes: 1 addition & 1 deletion dataset/copy-test/node/parquet/copy.cypher
Original file line number Diff line number Diff line change
@@ -1 +1 @@
COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
COPY tableOfTypes FROM "dataset/copy-test/node/parquet/types_50k*.parquet";
3 changes: 3 additions & 0 deletions src/binder/bind/bind_file_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ std::unique_ptr<BoundBaseScanSource> Binder::bindScanSource(BaseScanSource* sour
auto filePaths = bindFilePaths(fileSource->filePaths);
// Bind file type.
auto fileType = bindFileType(filePaths);
if (fileType != FileType::CSV && options.size() != 0) {
throw BinderException{"Only copy from CSV can have options."};
}
auto config = std::make_unique<ReaderConfig>(fileType, std::move(filePaths));
// Bind options.
config->options = bindParsingOptions(options);
Expand Down
17 changes: 12 additions & 5 deletions src/binder/bind/bind_import_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,19 @@ std::unique_ptr<BoundStatement> Binder::bindImportDatabaseClause(const Statement
copyFromStatement->getSource())
->filePaths;
KU_ASSERT(filePaths.size() == 1);
auto fileType = bindFileType(filePaths);
auto copyFilePath = boundFilePath + "/" + filePaths[0];
auto csvConfig = CSVReaderConfig::construct(
bindParsingOptions(copyFromStatement->getParsingOptionsRef()));
auto csvQuery = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(),
copyFilePath, csvConfig.option.toCypher());
finalQueryStatements += csvQuery;
std::string query;
if (fileType == FileType::CSV) {
auto csvConfig = CSVReaderConfig::construct(
bindParsingOptions(copyFromStatement->getParsingOptionsRef()));
query = stringFormat("COPY {} FROM \"{}\" {};", copyFromStatement->getTableName(),
copyFilePath, csvConfig.option.toCypher());
} else {
query = stringFormat("COPY {} FROM \"{}\";", copyFromStatement->getTableName(),
copyFilePath);
}
finalQueryStatements += query;
}
finalQueryStatements += getQueryFromFile(fs, boundFilePath, ImportDBConstants::MACRO_NAME);
return std::make_unique<BoundImportDatabase>(boundFilePath, finalQueryStatements);
Expand Down
6 changes: 6 additions & 0 deletions test/test_files/copy/copy_node_parquet.test
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,9 @@
-STATEMENT MATCH (row:tableOfTypes) WHERE 0 <= row.doubleColumn AND row.doubleColumn <= 10 AND 0 <= row.int64Column AND row.int64Column <= 10 RETURN count(*);
---- 1
546

-CASE CopyWithOptionsErrorTest

-STATEMENT COPY tableOfTypes FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
---- error
Binder exception: Only copy from CSV can have options.
4 changes: 2 additions & 2 deletions test/test_files/copy/copy_partial_column.test
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ Copy exception: Found NULL, which violates the non-null constraint of the primar
---- ok
-STATEMENT COPY tableOfTypes12 (id, int64Column, doubleColumn, booleanColumn, dateColumn, stringColumn, listOfInt64,
listOfString, listOfListOfInt64, structColumn) FROM
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet";
---- ok
-STATEMENT MATCH (row:tableOfTypes12) WHERE row.id >= 20 AND row.id <= 24 RETURN row.*;
---- 5
Expand All @@ -205,6 +205,6 @@ listOfString, listOfListOfInt64, structColumn) FROM
---- ok
-STATEMENT COPY tableOfTypes12 (id, dateColumn, doubleColumn, booleanColumn, int64Column, stringColumn, listOfInt64,
listOfString, listOfListOfInt64, structColumn) FROM
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet" (HEADER=true);
"${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k*.parquet";
---- error
Binder exception: Column `dateColumn` type mismatch. Expected DATE but got INT64.
4 changes: 2 additions & 2 deletions test/test_files/exceptions/copy/wrong_header.test
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ Binder exception: Number of columns mismatch. Expected 4 but got 3.
-CASE NodeUnmatchedNumColumns
-STATEMENT create node table person (ID1 SERIAL, ID INT64, fName INT64, age INT64, PRIMARY KEY (ID1))
---- ok
-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet" (HEADER=true)
-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-test/node/parquet/types_50k_1.parquet"
---- error
Binder exception: Number of columns mismatch. Expected 3 but got 10.

Expand All @@ -120,6 +120,6 @@ Binder exception: Number of columns mismatch. Expected 3 but got 10.
---- ok
-STATEMENT create rel table knows (FROM person TO person, time date, age INT64)
---- ok
-STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet" (HEADER=true)
-STATEMENT COPY knows FROM "${KUZU_ROOT_DIRECTORY}/dataset/demo-db/parquet/follows.parquet"
---- error
Binder exception: Number of columns mismatch. Expected 4 but got 3.

0 comments on commit 4c95042

Please sign in to comment.