Skip to content

Commit

Permalink
Fix capacity of compressed column chunks
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminwinger committed Sep 22, 2023
1 parent eac8b3f commit d24555e
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
3 changes: 2 additions & 1 deletion src/include/storage/store/column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ class ColumnChunk {
common::LogicalType dataType;
uint32_t numBytesPerValue;
uint64_t bufferSize;
uint64_t capacity;
std::unique_ptr<uint8_t[]> buffer;
std::unique_ptr<NullColumnChunk> nullChunk;
std::vector<std::unique_ptr<ColumnChunk>> childrenChunks;
Expand All @@ -179,7 +180,7 @@ class ColumnChunk {
std::function<ColumnChunkMetadata(const uint8_t*, uint64_t, BMFileHandle*, common::page_idx_t,
const PreliminaryColumnChunkMetadata&)>
flushBufferFunction;
std::function<PreliminaryColumnChunkMetadata(const uint8_t*, uint64_t, uint64_t)>
std::function<PreliminaryColumnChunkMetadata(const uint8_t*, uint64_t, uint64_t, uint64_t)>
getMetadataFunction;
};

Expand Down
14 changes: 9 additions & 5 deletions src/storage/store/column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ ColumnChunkMetadata fixedSizedFlushBuffer(const uint8_t* buffer, uint64_t buffer
}

PreliminaryColumnChunkMetadata fixedSizedGetMetadata(
const uint8_t* buffer, uint64_t bufferSize, uint64_t numValues) {
const uint8_t* buffer, uint64_t bufferSize, uint64_t capacity, uint64_t numValues) {
// Since we compress into memory, storage is the same as fixed-sized values,
// but we need to mark it as being boolean compressed.
return PreliminaryColumnChunkMetadata(
ColumnChunk::getNumPagesForBytes(bufferSize), numValues, CompressionMetadata());
}

PreliminaryColumnChunkMetadata booleanGetMetadata(
const uint8_t* buffer, uint64_t bufferSize, uint64_t numValues) {
const uint8_t* buffer, uint64_t bufferSize, uint64_t capacity, uint64_t numValues) {
// Since we compress into memory, storage is the same as fixed-sized values,
// but we need to mark it as being boolean compressed.
return PreliminaryColumnChunkMetadata(ColumnChunk::getNumPagesForBytes(bufferSize), numValues,
Expand Down Expand Up @@ -87,10 +87,10 @@ class GetCompressionMetadata {
GetCompressionMetadata(const GetCompressionMetadata& other) = default;

PreliminaryColumnChunkMetadata operator()(
const uint8_t* buffer, uint64_t bufferSize, uint64_t numValues) {
const uint8_t* buffer, uint64_t bufferSize, uint64_t capacity, uint64_t numValues) {
auto metadata = alg->startCompression(buffer, numValues);
auto numValuesPerPage = metadata.numValues(BufferPoolConstants::PAGE_4KB_SIZE, dataType);
auto numPages = numValues / numValuesPerPage + (numValues % numValuesPerPage == 0 ? 0 : 1);
auto numPages = capacity / numValuesPerPage + (capacity % numValuesPerPage == 0 ? 0 : 1);
return PreliminaryColumnChunkMetadata(numPages, numValues, metadata);
}
};
Expand Down Expand Up @@ -133,6 +133,7 @@ ColumnChunk::ColumnChunk(

void ColumnChunk::initialize(offset_t capacity) {
bufferSize = numBytesPerValue * capacity;
this->capacity = capacity;
buffer = std::make_unique<uint8_t[]>(bufferSize);
if (nullChunk) {
static_cast<ColumnChunk*>(nullChunk.get())->initialize(capacity);
Expand Down Expand Up @@ -313,6 +314,7 @@ void ColumnChunk::resize(uint64_t numValues) {
memcpy(resizedBuffer.get(), buffer.get(), bufferSize);
bufferSize = numBytesAfterResize;
buffer = std::move(resizedBuffer);
this->capacity = numValues;
if (nullChunk) {
nullChunk->resize(numValues);
}
Expand Down Expand Up @@ -461,7 +463,7 @@ void ColumnChunk::templateCopyValuesAsString(
}

PreliminaryColumnChunkMetadata ColumnChunk::getMetadataToFlush() const {
return getMetadataFunction(buffer.get(), bufferSize, numValues);
return getMetadataFunction(buffer.get(), bufferSize, capacity, numValues);
}

ColumnChunkMetadata ColumnChunk::flushBuffer(
Expand Down Expand Up @@ -696,6 +698,7 @@ void ColumnChunk::copyVectorToBuffer(
inline void BoolColumnChunk::initialize(common::offset_t capacity) {
numBytesPerValue = 0;
bufferSize = numBytesForValues(capacity);
this->capacity = capacity;
buffer = std::make_unique<uint8_t[]>(bufferSize);
if (nullChunk) {
static_cast<BoolColumnChunk*>(nullChunk.get())->initialize(capacity);
Expand All @@ -710,6 +713,7 @@ void BoolColumnChunk::resize(uint64_t capacity) {
memcpy(reservedBuffer.get(), buffer.get(), bufferSize);
buffer = std::move(reservedBuffer);
bufferSize = numBytesAfterResize;
this->capacity = numValues;
if (nullChunk) {
nullChunk->resize(capacity);
}
Expand Down

0 comments on commit d24555e

Please sign in to comment.