Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove setValueFromString from ColumnChunk #2169

Merged
merged 1 commit into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 0 additions & 24 deletions src/include/storage/store/column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,6 @@ class ColumnChunk {
// Returns the size of the data type in bytes
static uint32_t getDataTypeSizeInChunk(common::LogicalType& dataType);

template<typename T>
void setValueFromString(const char* value, uint64_t length, common::offset_t pos) {
setValue<T>(function::castStringToNum<T>(value, length), pos);
}

static inline common::page_idx_t getNumPagesForBytes(uint64_t numBytes) {
return (numBytes + common::BufferPoolConstants::PAGE_4KB_SIZE - 1) /
common::BufferPoolConstants::PAGE_4KB_SIZE;
Expand Down Expand Up @@ -270,24 +265,5 @@ struct ColumnChunkFactory {
bool enableCompression, common::CSVReaderConfig* csvReaderConfig = nullptr);
};

// BOOL
template<>
void ColumnChunk::setValueFromString<bool>(const char* value, uint64_t length, uint64_t pos);
// FIXED_LIST
template<>
void ColumnChunk::setValueFromString<uint8_t*>(const char* value, uint64_t length, uint64_t pos);
// INTERVAL
template<>
void ColumnChunk::setValueFromString<common::interval_t>(
const char* value, uint64_t length, uint64_t pos);
// DATE
template<>
void ColumnChunk::setValueFromString<common::date_t>(
const char* value, uint64_t length, uint64_t pos);
// TIMESTAMP
template<>
void ColumnChunk::setValueFromString<common::timestamp_t>(
const char* value, uint64_t length, uint64_t pos);

} // namespace storage
} // namespace kuzu
17 changes: 3 additions & 14 deletions src/include/storage/store/string_column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ class StringColumnChunk : public ColumnChunk {

void update(common::ValueVector* vector, common::vector_idx_t vectorIdx) override;

template<typename T>
void setValueFromString(const char* value, uint64_t length, uint64_t pos) {
throw common::NotImplementedException("VarSizedColumnChunk::setValueFromString");
}
template<typename T>
T getValue(common::offset_t pos) const {
throw common::NotImplementedException("VarSizedColumnChunk::getValue");
Expand All @@ -31,28 +27,21 @@ class StringColumnChunk : public ColumnChunk {
common::page_idx_t flushOverflowBuffer(BMFileHandle* dataFH, common::page_idx_t startPageIdx);

inline InMemOverflowFile* getOverflowFile() { return overflowFile.get(); }
inline common::offset_t getLastOffsetInPage() { return overflowCursor.offsetInPage; }
inline common::offset_t getLastOffsetInPage() const { return overflowCursor.offsetInPage; }

private:
void appendStringColumnChunk(StringColumnChunk* other, common::offset_t startPosInOtherChunk,
common::offset_t startPosInChunk, uint32_t numValuesToAppend);

void write(const common::Value& val, uint64_t posToWrite) override;

void setValueFromString(const char* value, uint64_t length, uint64_t pos);

private:
std::unique_ptr<InMemOverflowFile> overflowFile;
PageByteCursor overflowCursor;
};

// BLOB
template<>
void StringColumnChunk::setValueFromString<common::blob_t>(
const char* value, uint64_t length, uint64_t pos);
// STRING
template<>
void StringColumnChunk::setValueFromString<common::ku_string_t>(
const char* value, uint64_t length, uint64_t pos);

// STRING
template<>
std::string StringColumnChunk::getValue<std::string>(common::offset_t pos) const;
Expand Down
5 changes: 0 additions & 5 deletions src/include/storage/store/struct_column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ class StructColumnChunk : public ColumnChunk {
void append(common::ValueVector* vector, common::offset_t startPosInChunk) final;

private:
// TODO(Guodong): These methods are duplicated from `InMemStructColumnChunk`, which will be
// merged later.
void setStructFields(const char* value, uint64_t length, uint64_t pos);
void setValueToStructField(common::offset_t pos, const std::string& structFieldValue,
common::struct_field_idx_t structFiledIdx);
void write(const common::Value& val, uint64_t posToWrite) final;
};

Expand Down
2 changes: 0 additions & 2 deletions src/include/storage/store/var_list_column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ class VarListColumnChunk : public ColumnChunk {
return varListDataColumnChunk.dataColumnChunk.get();
}

void setValueFromString(const char* value, uint64_t length, uint64_t pos);

void resetToEmpty() final;

void append(common::ValueVector* vector, common::offset_t startPosInChunk) final;
Expand Down
39 changes: 0 additions & 39 deletions src/storage/store/column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,45 +492,6 @@ std::unique_ptr<ColumnChunk> ColumnChunkFactory::createColumnChunk(
return chunk;
}

// Bool
template<>
void ColumnChunk::setValueFromString<bool>(const char* value, uint64_t length, uint64_t pos) {
std::istringstream boolStream{std::string(value)};
bool booleanVal;
boolStream >> std::boolalpha >> booleanVal;
setValue(booleanVal, pos);
}

// Fixed list
template<>
void ColumnChunk::setValueFromString<uint8_t*>(const char* value, uint64_t length, uint64_t pos) {
auto fixedListVal =
TableCopyUtils::getArrowFixedList(value, 1, length - 2, dataType, *csvReaderConfig);
memcpy(buffer.get() + pos * numBytesPerValue, fixedListVal.get(), numBytesPerValue);
}

// Interval
template<>
void ColumnChunk::setValueFromString<interval_t>(const char* value, uint64_t length, uint64_t pos) {
auto val = Interval::fromCString(value, length);
setValue(val, pos);
}

// Date
template<>
void ColumnChunk::setValueFromString<date_t>(const char* value, uint64_t length, uint64_t pos) {
auto val = Date::fromCString(value, length);
setValue(val, pos);
}

// Timestamp
template<>
void ColumnChunk::setValueFromString<timestamp_t>(
const char* value, uint64_t length, uint64_t pos) {
auto val = Timestamp::fromCString(value, length);
setValue(val, pos);
}

offset_t ColumnChunk::getOffsetInBuffer(offset_t pos) const {
auto numElementsInAPage =
PageUtils::getNumElementsInAPage(numBytesPerValue, false /* hasNull */);
Expand Down
23 changes: 3 additions & 20 deletions src/storage/store/string_column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
nullChunk->setNull(offsetInChunk, vector->isNull(pos));
if (!vector->isNull(pos)) {
auto kuStr = vector->getValue<ku_string_t>(pos);
setValueFromString<ku_string_t>(kuStr.getAsString().c_str(), kuStr.len, offsetInChunk);
setValueFromString(kuStr.getAsString().c_str(), kuStr.len, offsetInChunk);

Check warning on line 67 in src/storage/store/string_column_chunk.cpp

View check run for this annotation

Codecov / codecov/patch

src/storage/store/string_column_chunk.cpp#L67

Added line #L67 was not covered by tests
}
}
}
Expand Down Expand Up @@ -106,27 +106,10 @@
return;
}
auto strVal = val.getValue<std::string>();
setValueFromString<ku_string_t>(strVal.c_str(), strVal.length(), posToWrite);
setValueFromString(strVal.c_str(), strVal.length(), posToWrite);

Check warning on line 109 in src/storage/store/string_column_chunk.cpp

View check run for this annotation

Codecov / codecov/patch

src/storage/store/string_column_chunk.cpp#L109

Added line #L109 was not covered by tests
}

// BLOB
template<>
void StringColumnChunk::setValueFromString<blob_t>(
const char* value, uint64_t length, uint64_t pos) {
if (length > BufferPoolConstants::PAGE_4KB_SIZE) {
throw CopyException(
ExceptionMessage::overLargeStringValueException(std::to_string(length)));
}
auto blobBuffer = std::make_unique<uint8_t[]>(length);
auto blobLen = Blob::fromString(value, length, blobBuffer.get());
auto val = overflowFile->copyString((char*)blobBuffer.get(), blobLen, overflowCursor);
setValue(val, pos);
}

// STRING
template<>
void StringColumnChunk::setValueFromString<ku_string_t>(
const char* value, uint64_t length, uint64_t pos) {
void StringColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) {

Check warning on line 112 in src/storage/store/string_column_chunk.cpp

View check run for this annotation

Codecov / codecov/patch

src/storage/store/string_column_chunk.cpp#L112

Added line #L112 was not covered by tests
if (length > BufferPoolConstants::PAGE_4KB_SIZE) {
throw CopyException(
ExceptionMessage::overLargeStringValueException(std::to_string(length)));
Expand Down
117 changes: 0 additions & 117 deletions src/storage/store/struct_column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,123 +49,6 @@ void StructColumnChunk::append(common::ValueVector* vector, common::offset_t sta
numValues += vector->state->selVector->selectedSize;
}

void StructColumnChunk::setStructFields(const char* value, uint64_t length, uint64_t pos) {
// Removes the leading and the trailing brackets.
switch (dataType.getLogicalTypeID()) {
case LogicalTypeID::STRUCT: {
auto structString = std::string(value, length).substr(1, length - 2);
auto structFieldIdxAndValuePairs =
TableCopyUtils::parseStructFieldNameAndValues(dataType, structString, *csvReaderConfig);
for (auto& fieldIdxAndValue : structFieldIdxAndValuePairs) {
setValueToStructField(pos, fieldIdxAndValue.fieldValue, fieldIdxAndValue.fieldIdx);
}
} break;
case LogicalTypeID::UNION: {
union_field_idx_t selectedFieldIdx = INVALID_STRUCT_FIELD_IDX;
for (auto i = 0u; i < UnionType::getNumFields(&dataType); i++) {
auto internalFieldIdx = UnionType::getInternalFieldIdx(i);
if (TableCopyUtils::tryCast(*UnionType::getFieldType(&dataType, i), value, length)) {
childrenChunks[internalFieldIdx]->getNullChunk()->setNull(pos, false /* isNull */);
setValueToStructField(pos, std::string(value, length), internalFieldIdx);
selectedFieldIdx = i;
break;
} else {
childrenChunks[internalFieldIdx]->getNullChunk()->setNull(pos, true /* isNull */);
}
}
if (selectedFieldIdx == INVALID_STRUCT_FIELD_IDX) {
throw ParserException{StringUtils::string_format(
"No parsing rule matches value: {}.", std::string(value, length))};
}
childrenChunks[UnionType::TAG_FIELD_IDX]->setValue(selectedFieldIdx, pos);
childrenChunks[UnionType::TAG_FIELD_IDX]->getNullChunk()->setNull(pos, false /* isNull */);
} break;
default: {
throw NotImplementedException("StructColumnChunk::setStructFields");
}
}
}

void StructColumnChunk::setValueToStructField(
offset_t pos, const std::string& structFieldValue, struct_field_idx_t structFiledIdx) {
auto fieldChunk = childrenChunks[structFiledIdx].get();
switch (fieldChunk->getDataType().getLogicalTypeID()) {
case LogicalTypeID::INT64: {
fieldChunk->setValueFromString<int64_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::INT32: {
fieldChunk->setValueFromString<int32_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::INT16: {
fieldChunk->setValueFromString<int16_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::INT8: {
fieldChunk->setValueFromString<int8_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::UINT64: {
fieldChunk->setValueFromString<uint64_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::UINT32: {
fieldChunk->setValueFromString<uint32_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::UINT16: {
fieldChunk->setValueFromString<uint16_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::UINT8: {
fieldChunk->setValueFromString<uint8_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::DOUBLE: {
fieldChunk->setValueFromString<double_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::FLOAT: {
fieldChunk->setValueFromString<float_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::BOOL: {
fieldChunk->setValueFromString<bool>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::DATE: {
fieldChunk->setValueFromString<date_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::TIMESTAMP: {
fieldChunk->setValueFromString<timestamp_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::INTERVAL: {
fieldChunk->setValueFromString<interval_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::STRING: {
reinterpret_cast<StringColumnChunk*>(fieldChunk)
->setValueFromString<ku_string_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::VAR_LIST: {
reinterpret_cast<VarListColumnChunk*>(fieldChunk)
->setValueFromString(structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::STRUCT: {
reinterpret_cast<StructColumnChunk*>(fieldChunk)
->setStructFields(structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
default: {
throw NotImplementedException{StringUtils::string_format(
"Unsupported data type: {}.", LogicalTypeUtils::dataTypeToString(dataType))};
}
}
}

void StructColumnChunk::write(const Value& val, uint64_t posToWrite) {
assert(val.getDataType()->getPhysicalType() == PhysicalTypeID::STRUCT);
auto numElements = NestedVal::getChildrenSize(&val);
Expand Down
6 changes: 0 additions & 6 deletions src/storage/store/var_list_column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,6 @@ void VarListColumnChunk::write(const Value& listVal, uint64_t posToWrite) {
setValue(varListDataColumnChunk.getNumValues(), posToWrite);
}

void VarListColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) {
auto listVal =
TableCopyUtils::getVarListValue(value, 1, length - 2, dataType, *csvReaderConfig);
write(*listVal, pos);
}

void VarListColumnChunk::resetToEmpty() {
ColumnChunk::resetToEmpty();
varListDataColumnChunk.reset();
Expand Down