Skip to content

Commit

Permalink
Fix issue 1323
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed Mar 1, 2023
1 parent d151554 commit 48faead
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 3 deletions.
1 change: 1 addition & 0 deletions dataset/copy-fault-tests/null-pk/schema.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
create node table person (fName STRING, PRIMARY KEY (fName));
3 changes: 3 additions & 0 deletions dataset/copy-fault-tests/null-pk/vPerson.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
alice

bob
1 change: 1 addition & 0 deletions dataset/copy-special-char-test/vPerson.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
4|-only one ## should be recognized-|2013-05-01
5|this is a ##plain## #string|2013-05-01
6|this is another ##plain## #string with \|2013-05-01
7|NA|2022-01-11
7 changes: 5 additions & 2 deletions src/storage/copy_arrow/copy_node_arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ void CopyNodeArrow::populatePKIndex(
InMemColumn* column, HashIndexBuilder<T>* pkIndex, offset_t startOffset, uint64_t numValues) {
for (auto i = 0u; i < numValues; i++) {
auto offset = i + startOffset;
if (column->isNullAtNodeOffset(offset)) {
throw ReaderException("Primary key cannot be null.");
}
if constexpr (std::is_same<T, int64_t>::value) {
auto key = (int64_t*)column->getElement(offset);
if (!pkIndex->append(*key, offset)) {
Expand Down Expand Up @@ -257,15 +260,15 @@ void CopyNodeArrow::putPropsOfLineIntoColumns(
column->setElement(nodeOffset, reinterpret_cast<uint8_t*>(&val));
} break;
case DOUBLE: {
double_t val = TypeUtils::convertStringToNumber<double_t>(data);
auto val = TypeUtils::convertStringToNumber<double_t>(data);
column->setElement(nodeOffset, reinterpret_cast<uint8_t*>(&val));
} break;
case FLOAT: {
auto val = TypeUtils::convertStringToNumber<float_t>(data);
column->setElement(nodeOffset, reinterpret_cast<uint8_t*>(&val));
} break;
case BOOL: {
bool val = TypeUtils::convertToBoolean(data);
auto val = TypeUtils::convertToBoolean(data);
column->setElement(nodeOffset, reinterpret_cast<uint8_t*>(&val));
} break;
case DATE: {
Expand Down
4 changes: 3 additions & 1 deletion src/storage/copy_arrow/copy_structures_arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,19 +187,21 @@ arrow::Status CopyStructuresArrow::initCSVReader(
ARROW_ASSIGN_OR_RAISE(arrow_input_stream, arrow::io::ReadableFile::Open(filePath));
auto arrowRead = arrow::csv::ReadOptions::Defaults();
arrowRead.block_size = CopyConstants::CSV_READING_BLOCK_SIZE;

if (!copyDescription.csvReaderConfig->hasHeader) {
arrowRead.autogenerate_column_names = true;
}

auto arrowConvert = arrow::csv::ConvertOptions::Defaults();
arrowConvert.strings_can_be_null = true;
// Only the empty string is treated as NULL.
arrowConvert.null_values = {""};
arrowConvert.quoted_strings_can_be_null = false;

auto arrowParse = arrow::csv::ParseOptions::Defaults();
arrowParse.delimiter = copyDescription.csvReaderConfig->delimiter;
arrowParse.escape_char = copyDescription.csvReaderConfig->escapeChar;
arrowParse.quote_char = copyDescription.csvReaderConfig->quoteChar;
arrowParse.ignore_empty_lines = false;
arrowParse.escaping = true;

ARROW_ASSIGN_OR_RAISE(
Expand Down
13 changes: 13 additions & 0 deletions test/copy/copy_fault_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ class CopyInvalidNumberTest : public CopyFaultTest {
}
};

class CopyNullPKTest : public CopyFaultTest {
std::string getInputDir() override {
return TestHelper::appendKuzuRootPath("dataset/copy-fault-tests/null-pk/");
}
};

TEST_F(CopyDuplicateIDTest, DuplicateIDsError) {
validateCopyException(
"COPY person FROM \"" +
Expand Down Expand Up @@ -154,3 +160,10 @@ TEST_F(CopyInvalidNumberTest, InvalidNumberError) {
TestHelper::appendKuzuRootPath("dataset/copy-fault-tests/invalid-number/vMovie.csv\""),
"Invalid number: 312abc.");
}

TEST_F(CopyNullPKTest, NullPKErrpr) {
validateCopyException(
"COPY person FROM \"" +
TestHelper::appendKuzuRootPath("dataset/copy-fault-tests/null-pk/vPerson.csv\""),
"Reader exception: Primary key cannot be null.");
}
1 change: 1 addition & 0 deletions test/copy/copy_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ TEST_F(CopySpecialCharTest, CopySpecialChars) {
EXPECT_EQ("only one # should be recognized", col->readValue(4).strVal);
EXPECT_EQ("this is a #plain# string", col->readValue(5).strVal);
EXPECT_EQ("this is another #plain# string with \\", col->readValue(6).strVal);
EXPECT_EQ("NA", col->readValue(7).strVal);

tableID = catalog->getReadOnlyVersion()->getTableID("organisation");
propertyIdx = catalog->getReadOnlyVersion()->getNodeProperty(tableID, "name");
Expand Down

0 comments on commit 48faead

Please sign in to comment.