Skip to content

Commit

Permalink
Merge pull request #2208 from kuzudb/utf8-check
Browse files Browse the repository at this point in the history
Throw copy exception on invalid utf8 string
  • Loading branch information
acquamarin committed Oct 13, 2023
2 parents 964925f + 11f3a52 commit 3280465
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 9 deletions.
File renamed without changes.
1 change: 1 addition & 0 deletions dataset/copy-fault-tests/invalid-utf8/schema.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
create node table person (ID STRING, PRIMARY KEY (ID));
4 changes: 4 additions & 0 deletions src/processor/operator/persistent/reader/csv/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "processor/operator/persistent/reader/csv/parallel_csv_reader.h"
#include "processor/operator/persistent/reader/csv/serial_csv_reader.h"
#include "storage/store/table_copy_utils.h"
#include "utf8proc_wrapper.h"

using namespace kuzu::common;

Expand Down Expand Up @@ -461,6 +462,9 @@ void copyStringToVector(ValueVector* vector, uint64_t rowToAdd, std::string_view
vector, rowToAdd, reinterpret_cast<char*>(blobBuffer.get()), blobLen);
} break;
case LogicalTypeID::STRING: {
if (!utf8proc::Utf8Proc::isValid(strVal.data(), strVal.length())) {
throw common::CopyException{"Invalid UTF8-encoded string."};
}
StringVector::addString(vector, rowToAdd, strVal.data(), strVal.length());
} break;
case LogicalTypeID::DATE: {
Expand Down
9 changes: 9 additions & 0 deletions test/test_files/exceptions/copy/invalid_utf8.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-GROUP CopyInvalidUTF8
-DATASET CSV copy-fault-tests/invalid-utf8

--

-CASE InvalidUTF8
-STATEMENT COPY person FROM "${KUZU_ROOT_DIRECTORY}/dataset/copy-fault-tests/invalid-utf8/invalid-utf8.csv"
---- error
Copy exception: Invalid UTF8-encoded string.
9 changes: 0 additions & 9 deletions test/test_files/exceptions/runtime/invalid_utf8.test

This file was deleted.

0 comments on commit 3280465

Please sign in to comment.