Skip to content

Commit

Permalink
Merge pull request #2169 from kuzudb/clean-column-chunk
Browse files Browse the repository at this point in the history
Remove `setValueFromString` from ColumnChunk
  • Loading branch information
ray6080 committed Oct 9, 2023
2 parents 5c89dc5 + 9d2e725 commit 531e50c
Show file tree
Hide file tree
Showing 8 changed files with 6 additions and 227 deletions.
24 changes: 0 additions & 24 deletions src/include/storage/store/column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,6 @@ class ColumnChunk {
// Returns the size of the data type in bytes
static uint32_t getDataTypeSizeInChunk(common::LogicalType& dataType);

template<typename T>
void setValueFromString(const char* value, uint64_t length, common::offset_t pos) {
setValue<T>(function::castStringToNum<T>(value, length), pos);
}

static inline common::page_idx_t getNumPagesForBytes(uint64_t numBytes) {
return (numBytes + common::BufferPoolConstants::PAGE_4KB_SIZE - 1) /
common::BufferPoolConstants::PAGE_4KB_SIZE;
Expand Down Expand Up @@ -270,24 +265,5 @@ struct ColumnChunkFactory {
bool enableCompression, common::CSVReaderConfig* csvReaderConfig = nullptr);
};

// BOOL
template<>
void ColumnChunk::setValueFromString<bool>(const char* value, uint64_t length, uint64_t pos);
// FIXED_LIST
template<>
void ColumnChunk::setValueFromString<uint8_t*>(const char* value, uint64_t length, uint64_t pos);
// INTERVAL
template<>
void ColumnChunk::setValueFromString<common::interval_t>(
const char* value, uint64_t length, uint64_t pos);
// DATE
template<>
void ColumnChunk::setValueFromString<common::date_t>(
const char* value, uint64_t length, uint64_t pos);
// TIMESTAMP
template<>
void ColumnChunk::setValueFromString<common::timestamp_t>(
const char* value, uint64_t length, uint64_t pos);

} // namespace storage
} // namespace kuzu
17 changes: 3 additions & 14 deletions src/include/storage/store/string_column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ class StringColumnChunk : public ColumnChunk {

void update(common::ValueVector* vector, common::vector_idx_t vectorIdx) override;

template<typename T>
void setValueFromString(const char* value, uint64_t length, uint64_t pos) {
throw common::NotImplementedException("VarSizedColumnChunk::setValueFromString");
}
template<typename T>
T getValue(common::offset_t pos) const {
throw common::NotImplementedException("VarSizedColumnChunk::getValue");
Expand All @@ -31,28 +27,21 @@ class StringColumnChunk : public ColumnChunk {
common::page_idx_t flushOverflowBuffer(BMFileHandle* dataFH, common::page_idx_t startPageIdx);

inline InMemOverflowFile* getOverflowFile() { return overflowFile.get(); }
inline common::offset_t getLastOffsetInPage() { return overflowCursor.offsetInPage; }
inline common::offset_t getLastOffsetInPage() const { return overflowCursor.offsetInPage; }

private:
void appendStringColumnChunk(StringColumnChunk* other, common::offset_t startPosInOtherChunk,
common::offset_t startPosInChunk, uint32_t numValuesToAppend);

void write(const common::Value& val, uint64_t posToWrite) override;

void setValueFromString(const char* value, uint64_t length, uint64_t pos);

private:
std::unique_ptr<InMemOverflowFile> overflowFile;
PageByteCursor overflowCursor;
};

// BLOB
template<>
void StringColumnChunk::setValueFromString<common::blob_t>(
const char* value, uint64_t length, uint64_t pos);
// STRING
template<>
void StringColumnChunk::setValueFromString<common::ku_string_t>(
const char* value, uint64_t length, uint64_t pos);

// STRING
template<>
std::string StringColumnChunk::getValue<std::string>(common::offset_t pos) const;
Expand Down
5 changes: 0 additions & 5 deletions src/include/storage/store/struct_column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ class StructColumnChunk : public ColumnChunk {
void append(common::ValueVector* vector, common::offset_t startPosInChunk) final;

private:
// TODO(Guodong): These methods are duplicated from `InMemStructColumnChunk`, which will be
// merged later.
void setStructFields(const char* value, uint64_t length, uint64_t pos);
void setValueToStructField(common::offset_t pos, const std::string& structFieldValue,
common::struct_field_idx_t structFiledIdx);
void write(const common::Value& val, uint64_t posToWrite) final;
};

Expand Down
2 changes: 0 additions & 2 deletions src/include/storage/store/var_list_column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ class VarListColumnChunk : public ColumnChunk {
return varListDataColumnChunk.dataColumnChunk.get();
}

void setValueFromString(const char* value, uint64_t length, uint64_t pos);

void resetToEmpty() final;

void append(common::ValueVector* vector, common::offset_t startPosInChunk) final;
Expand Down
39 changes: 0 additions & 39 deletions src/storage/store/column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,45 +492,6 @@ std::unique_ptr<ColumnChunk> ColumnChunkFactory::createColumnChunk(
return chunk;
}

// Bool
template<>
void ColumnChunk::setValueFromString<bool>(const char* value, uint64_t length, uint64_t pos) {
std::istringstream boolStream{std::string(value)};
bool booleanVal;
boolStream >> std::boolalpha >> booleanVal;
setValue(booleanVal, pos);
}

// Fixed list
template<>
void ColumnChunk::setValueFromString<uint8_t*>(const char* value, uint64_t length, uint64_t pos) {
auto fixedListVal =
TableCopyUtils::getArrowFixedList(value, 1, length - 2, dataType, *csvReaderConfig);
memcpy(buffer.get() + pos * numBytesPerValue, fixedListVal.get(), numBytesPerValue);
}

// Interval
template<>
void ColumnChunk::setValueFromString<interval_t>(const char* value, uint64_t length, uint64_t pos) {
auto val = Interval::fromCString(value, length);
setValue(val, pos);
}

// Date
template<>
void ColumnChunk::setValueFromString<date_t>(const char* value, uint64_t length, uint64_t pos) {
auto val = Date::fromCString(value, length);
setValue(val, pos);
}

// Timestamp
template<>
void ColumnChunk::setValueFromString<timestamp_t>(
const char* value, uint64_t length, uint64_t pos) {
auto val = Timestamp::fromCString(value, length);
setValue(val, pos);
}

offset_t ColumnChunk::getOffsetInBuffer(offset_t pos) const {
auto numElementsInAPage =
PageUtils::getNumElementsInAPage(numBytesPerValue, false /* hasNull */);
Expand Down
23 changes: 3 additions & 20 deletions src/storage/store/string_column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void StringColumnChunk::update(ValueVector* vector, vector_idx_t vectorIdx) {
nullChunk->setNull(offsetInChunk, vector->isNull(pos));
if (!vector->isNull(pos)) {
auto kuStr = vector->getValue<ku_string_t>(pos);
setValueFromString<ku_string_t>(kuStr.getAsString().c_str(), kuStr.len, offsetInChunk);
setValueFromString(kuStr.getAsString().c_str(), kuStr.len, offsetInChunk);
}
}
}
Expand Down Expand Up @@ -106,27 +106,10 @@ void StringColumnChunk::write(const Value& val, uint64_t posToWrite) {
return;
}
auto strVal = val.getValue<std::string>();
setValueFromString<ku_string_t>(strVal.c_str(), strVal.length(), posToWrite);
setValueFromString(strVal.c_str(), strVal.length(), posToWrite);
}

// BLOB
template<>
void StringColumnChunk::setValueFromString<blob_t>(
const char* value, uint64_t length, uint64_t pos) {
if (length > BufferPoolConstants::PAGE_4KB_SIZE) {
throw CopyException(
ExceptionMessage::overLargeStringValueException(std::to_string(length)));
}
auto blobBuffer = std::make_unique<uint8_t[]>(length);
auto blobLen = Blob::fromString(value, length, blobBuffer.get());
auto val = overflowFile->copyString((char*)blobBuffer.get(), blobLen, overflowCursor);
setValue(val, pos);
}

// STRING
template<>
void StringColumnChunk::setValueFromString<ku_string_t>(
const char* value, uint64_t length, uint64_t pos) {
void StringColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) {
if (length > BufferPoolConstants::PAGE_4KB_SIZE) {
throw CopyException(
ExceptionMessage::overLargeStringValueException(std::to_string(length)));
Expand Down
117 changes: 0 additions & 117 deletions src/storage/store/struct_column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,123 +49,6 @@ void StructColumnChunk::append(common::ValueVector* vector, common::offset_t sta
numValues += vector->state->selVector->selectedSize;
}

void StructColumnChunk::setStructFields(const char* value, uint64_t length, uint64_t pos) {
// Removes the leading and the trailing brackets.
switch (dataType.getLogicalTypeID()) {
case LogicalTypeID::STRUCT: {
auto structString = std::string(value, length).substr(1, length - 2);
auto structFieldIdxAndValuePairs =
TableCopyUtils::parseStructFieldNameAndValues(dataType, structString, *csvReaderConfig);
for (auto& fieldIdxAndValue : structFieldIdxAndValuePairs) {
setValueToStructField(pos, fieldIdxAndValue.fieldValue, fieldIdxAndValue.fieldIdx);
}
} break;
case LogicalTypeID::UNION: {
union_field_idx_t selectedFieldIdx = INVALID_STRUCT_FIELD_IDX;
for (auto i = 0u; i < UnionType::getNumFields(&dataType); i++) {
auto internalFieldIdx = UnionType::getInternalFieldIdx(i);
if (TableCopyUtils::tryCast(*UnionType::getFieldType(&dataType, i), value, length)) {
childrenChunks[internalFieldIdx]->getNullChunk()->setNull(pos, false /* isNull */);
setValueToStructField(pos, std::string(value, length), internalFieldIdx);
selectedFieldIdx = i;
break;
} else {
childrenChunks[internalFieldIdx]->getNullChunk()->setNull(pos, true /* isNull */);
}
}
if (selectedFieldIdx == INVALID_STRUCT_FIELD_IDX) {
throw ParserException{StringUtils::string_format(
"No parsing rule matches value: {}.", std::string(value, length))};
}
childrenChunks[UnionType::TAG_FIELD_IDX]->setValue(selectedFieldIdx, pos);
childrenChunks[UnionType::TAG_FIELD_IDX]->getNullChunk()->setNull(pos, false /* isNull */);
} break;
default: {
throw NotImplementedException("StructColumnChunk::setStructFields");
}
}
}

void StructColumnChunk::setValueToStructField(
offset_t pos, const std::string& structFieldValue, struct_field_idx_t structFiledIdx) {
auto fieldChunk = childrenChunks[structFiledIdx].get();
switch (fieldChunk->getDataType().getLogicalTypeID()) {
case LogicalTypeID::INT64: {
fieldChunk->setValueFromString<int64_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::INT32: {
fieldChunk->setValueFromString<int32_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::INT16: {
fieldChunk->setValueFromString<int16_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::INT8: {
fieldChunk->setValueFromString<int8_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::UINT64: {
fieldChunk->setValueFromString<uint64_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::UINT32: {
fieldChunk->setValueFromString<uint32_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::UINT16: {
fieldChunk->setValueFromString<uint16_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::UINT8: {
fieldChunk->setValueFromString<uint8_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::DOUBLE: {
fieldChunk->setValueFromString<double_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::FLOAT: {
fieldChunk->setValueFromString<float_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::BOOL: {
fieldChunk->setValueFromString<bool>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::DATE: {
fieldChunk->setValueFromString<date_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::TIMESTAMP: {
fieldChunk->setValueFromString<timestamp_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::INTERVAL: {
fieldChunk->setValueFromString<interval_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::STRING: {
reinterpret_cast<StringColumnChunk*>(fieldChunk)
->setValueFromString<ku_string_t>(
structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::VAR_LIST: {
reinterpret_cast<VarListColumnChunk*>(fieldChunk)
->setValueFromString(structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
case LogicalTypeID::STRUCT: {
reinterpret_cast<StructColumnChunk*>(fieldChunk)
->setStructFields(structFieldValue.c_str(), structFieldValue.length(), pos);
} break;
default: {
throw NotImplementedException{StringUtils::string_format(
"Unsupported data type: {}.", LogicalTypeUtils::dataTypeToString(dataType))};
}
}
}

void StructColumnChunk::write(const Value& val, uint64_t posToWrite) {
assert(val.getDataType()->getPhysicalType() == PhysicalTypeID::STRUCT);
auto numElements = NestedVal::getChildrenSize(&val);
Expand Down
6 changes: 0 additions & 6 deletions src/storage/store/var_list_column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,6 @@ void VarListColumnChunk::write(const Value& listVal, uint64_t posToWrite) {
setValue(varListDataColumnChunk.getNumValues(), posToWrite);
}

void VarListColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) {
auto listVal =
TableCopyUtils::getVarListValue(value, 1, length - 2, dataType, *csvReaderConfig);
write(*listVal, pos);
}

void VarListColumnChunk::resetToEmpty() {
ColumnChunk::resetToEmpty();
varListDataColumnChunk.reset();
Expand Down

0 comments on commit 531e50c

Please sign in to comment.