diff --git a/src/include/storage/store/column_chunk.h b/src/include/storage/store/column_chunk.h index 14070e1bb7..4cff129b37 100644 --- a/src/include/storage/store/column_chunk.h +++ b/src/include/storage/store/column_chunk.h @@ -112,7 +112,7 @@ class ColumnChunk { virtual void copyVectorToBuffer(common::ValueVector* vector, common::offset_t startPosInChunk); private: - uint64_t getBufferSize() const; + uint64_t getBufferSize(uint64_t capacity_) const; protected: common::LogicalType dataType; diff --git a/src/storage/store/column_chunk.cpp b/src/storage/store/column_chunk.cpp index 9beb2460f7..60445d41fe 100644 --- a/src/storage/store/column_chunk.cpp +++ b/src/storage/store/column_chunk.cpp @@ -157,7 +157,7 @@ ColumnChunk::ColumnChunk( void ColumnChunk::initializeBuffer(offset_t capacity_) { numBytesPerValue = getDataTypeSizeInChunk(dataType); capacity = capacity_; - bufferSize = getBufferSize(); + bufferSize = getBufferSize(capacity); buffer = std::make_unique(bufferSize); if (nullChunk) { nullChunk->initializeBuffer(capacity_); @@ -200,7 +200,8 @@ void ColumnChunk::resetToEmpty() { if (nullChunk) { nullChunk->resetToEmpty(); } - memset(buffer.get(), 0, bufferSize); + KU_ASSERT(bufferSize == getBufferSize(capacity)); + memset(buffer.get(), 0x00, bufferSize); numValues = 0; } @@ -288,8 +289,10 @@ void ColumnChunk::copy(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, offset_ } void ColumnChunk::resize(uint64_t newCapacity) { - capacity = newCapacity; - auto numBytesAfterResize = getBufferSize(); + if (newCapacity > capacity) { + capacity = newCapacity; + } + auto numBytesAfterResize = getBufferSize(newCapacity); if (numBytesAfterResize > bufferSize) { auto resizedBuffer = std::make_unique(numBytesAfterResize); memcpy(resizedBuffer.get(), buffer.get(), bufferSize); @@ -374,31 +377,33 @@ ColumnChunkMetadata ColumnChunk::getMetadataToFlush() const { return ColumnChunkMetadata(INVALID_PAGE_IDX, 0, numValues, *constantMetadata); } } + KU_ASSERT(bufferSize == getBufferSize(capacity)); return getMetadataFunction(buffer.get(), bufferSize, capacity, numValues); } ColumnChunkMetadata ColumnChunk::flushBuffer( BMFileHandle* dataFH, page_idx_t startPageIdx, const ColumnChunkMetadata& metadata) { if (!metadata.compMeta.isConstant()) { + KU_ASSERT(bufferSize == getBufferSize(capacity)); return flushBufferFunction(buffer.get(), bufferSize, dataFH, startPageIdx, metadata); } return metadata; } -uint64_t ColumnChunk::getBufferSize() const { +uint64_t ColumnChunk::getBufferSize(uint64_t capacity_) const { switch (dataType.getLogicalTypeID()) { case LogicalTypeID::BOOL: { // 8 values per byte, and we need a buffer size which is a multiple of 8 bytes. - return ceil(capacity / 8.0 / 8.0) * 8; + return ceil(capacity_ / 8.0 / 8.0) * 8; } case LogicalTypeID::FIXED_LIST: { auto numElementsInAPage = PageUtils::getNumElementsInAPage(numBytesPerValue, false /* hasNull */); - auto numPages = capacity / numElementsInAPage + (capacity % numElementsInAPage ? 1 : 0); + auto numPages = capacity_ / numElementsInAPage + (capacity_ % numElementsInAPage ? 1 : 0); return BufferPoolConstants::PAGE_4KB_SIZE * numPages; } default: { - return numBytesPerValue * capacity; + return numBytesPerValue * capacity_; } } } @@ -468,6 +473,7 @@ void NullColumnChunk::setNull(offset_t pos, bool isNull) { // TODO(Guodong): Better let NullColumnChunk also support `append` a vector. if (pos >= numValues) { numValues = pos + 1; + KU_ASSERT(numValues <= capacity); } } diff --git a/src/storage/store/struct_column_chunk.cpp b/src/storage/store/struct_column_chunk.cpp index f047221be5..2c83e40308 100644 --- a/src/storage/store/struct_column_chunk.cpp +++ b/src/storage/store/struct_column_chunk.cpp @@ -51,6 +51,7 @@ void StructColumnChunk::append(ValueVector* vector) { void StructColumnChunk::resize(uint64_t newCapacity) { ColumnChunk::resize(newCapacity); + capacity = newCapacity; for (auto& child : childChunks) { child->resize(newCapacity); } diff --git a/src/storage/store/var_list_column_chunk.cpp b/src/storage/store/var_list_column_chunk.cpp index 1865181ea6..058dfda7ca 100644 --- a/src/storage/store/var_list_column_chunk.cpp +++ b/src/storage/store/var_list_column_chunk.cpp @@ -65,7 +65,7 @@ void VarListColumnChunk::append(ValueVector* vector) { while (numValues + numToAppend >= newCapacity) { newCapacity *= 1.5; } - if (capacity != newCapacity) { + if (capacity < newCapacity) { resize(newCapacity); } auto nextListOffsetInChunk = getListOffset(numValues);