Skip to content

Commit

Permalink
improve performance
Browse files Browse the repository at this point in the history
  • Loading branch information
hououou committed Mar 16, 2024
1 parent 757959a commit 2a87361
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 4 deletions.
4 changes: 4 additions & 0 deletions src/include/storage/store/var_list_column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ class VarListColumnChunk : public ColumnChunk {
}

void resetOffset();
void resetFromOtherChunk(VarListColumnChunk* other);
void finalize() final;
bool isOffsetSortedAscending(uint64_t startPos, uint64_t endPos) const;

protected:
void copyListValues(const common::list_entry_t& entry, common::ValueVector* dataVector);
Expand All @@ -98,6 +101,7 @@ class VarListColumnChunk : public ColumnChunk {
protected:
std::unique_ptr<ColumnChunk> sizeColumnChunk;
std::unique_ptr<VarListDataColumnChunk> varListDataColumnChunk;
bool checkOrder;
};

} // namespace storage
Expand Down
77 changes: 73 additions & 4 deletions src/storage/store/var_list_column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,23 @@ VarListColumnChunk::VarListColumnChunk(
varListDataColumnChunk = std::make_unique<VarListDataColumnChunk>(
ColumnChunkFactory::createColumnChunk(*VarListType::getChildType(&this->dataType)->copy(),
enableCompression, 0 /* capacity */, inMemory));
checkOrder = true;
KU_ASSERT(this->dataType.getPhysicalType() == PhysicalTypeID::VAR_LIST);
}

bool VarListColumnChunk::isOffsetSortedAscending(uint64_t startPos, uint64_t endPos) const {
offset_t prevEndOffset = getListStartOffset(startPos);
for (auto i = startPos; i < endPos; i++) {
offset_t currentEndOffset = getListEndOffset(i);
auto length = getListLen(i);
prevEndOffset += length;
if (currentEndOffset != prevEndOffset) {
return false;
}
}
return true;
}

void VarListColumnChunk::append(
ColumnChunk* other, offset_t startPosInOtherChunk, uint32_t numValuesToAppend) {
auto otherListChunk = ku_dynamic_cast<ColumnChunk*, VarListColumnChunk*>(other);
Expand All @@ -50,11 +64,21 @@ void VarListColumnChunk::append(
setValue<offset_t>(offsetInDataChunkToAppend, numValues);
}
varListDataColumnChunk->resizeBuffer(offsetInDataChunkToAppend);
for (auto i = 0u; i < numValuesToAppend; i++) {
auto startOffset = otherListChunk->getListStartOffset(startPosInOtherChunk + i);
auto appendLen = otherListChunk->getListLen(startPosInOtherChunk + i);
if (checkOrder && otherListChunk->isOffsetSortedAscending(
startPosInOtherChunk, startPosInOtherChunk + numValuesToAppend)) {
auto startOffset = otherListChunk->getListStartOffset(startPosInOtherChunk);
numValuesToAppend = numValuesToAppend == 0 ? 0 : numValuesToAppend - 1;
auto endOffset = otherListChunk->getListEndOffset(startPosInOtherChunk + numValuesToAppend);
varListDataColumnChunk->dataColumnChunk->append(
otherListChunk->varListDataColumnChunk->dataColumnChunk.get(), startOffset, appendLen);
otherListChunk->varListDataColumnChunk->dataColumnChunk.get(), startOffset, endOffset);
} else {
for (auto i = 0u; i < numValuesToAppend; i++) {
auto startOffset = otherListChunk->getListStartOffset(startPosInOtherChunk + i);
auto appendLen = otherListChunk->getListLen(startPosInOtherChunk + i);
varListDataColumnChunk->dataColumnChunk->append(
otherListChunk->varListDataColumnChunk->dataColumnChunk.get(), startOffset,
appendLen);
}
}
}

Expand Down Expand Up @@ -111,6 +135,7 @@ void VarListColumnChunk::appendNullList() {
}

void VarListColumnChunk::write(ColumnChunk* chunk, ColumnChunk* dstOffsets, bool /*isCSR*/) {
checkOrder = false;
auto otherListChunk = ku_dynamic_cast<ColumnChunk*, VarListColumnChunk*>(chunk);
offset_t currentIndex = varListDataColumnChunk->getNumValues();
varListDataColumnChunk->resizeBuffer(varListDataColumnChunk->getNumValues() +
Expand Down Expand Up @@ -142,6 +167,7 @@ void VarListColumnChunk::write(ColumnChunk* chunk, ColumnChunk* dstOffsets, bool

void VarListColumnChunk::write(
ValueVector* vector, offset_t offsetInVector, offset_t offsetInChunk) {
checkOrder = false;
auto selVector = std::make_unique<SelectionVector>(1);
selVector->resetSelectorToValuePosBuffer();
selVector->selectedPositions[0] = offsetInVector;
Expand All @@ -167,6 +193,7 @@ void VarListColumnChunk::write(
void VarListColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk,
offset_t dstOffsetInChunk, offset_t numValuesToCopy) {
KU_ASSERT(srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::VAR_LIST);
checkOrder = false;
nullChunk->write(srcChunk->getNullChunk(), srcOffsetInChunk, dstOffsetInChunk, numValuesToCopy);
sizeColumnChunk->getNullChunk()->write(
srcChunk->getNullChunk(), srcOffsetInChunk, dstOffsetInChunk, numValuesToCopy);
Expand Down Expand Up @@ -227,5 +254,47 @@ void VarListColumnChunk::resetOffset() {
}
}

void VarListColumnChunk::finalize() {
auto newColumnChunk = ColumnChunkFactory::createColumnChunk(
std::move(*dataType.copy()), enableCompression, capacity);
uint64_t totalListLen = varListDataColumnChunk->getNumValues();
uint64_t resizeThreshold = capacity / 2;
if (totalListLen < resizeThreshold) {
return;
}
auto newVarListChunk = ku_dynamic_cast<ColumnChunk*, VarListColumnChunk*>(newColumnChunk.get());
newVarListChunk->resize(numValues);
newVarListChunk->getDataColumnChunk()->resize(totalListLen);
auto dataColumnChunk = newVarListChunk->getDataColumnChunk();
newVarListChunk->varListDataColumnChunk->resizeBuffer(totalListLen);
offset_t offsetInChunk = 0;
offset_t currentIndex = 0;
for (auto i = 0u; i < numValues; i++) {
if (nullChunk->isNull(i)) {
newVarListChunk->appendNullList();
} else {
auto startOffset = getListStartOffset(i);
auto appendLen = getListLen(i);
dataColumnChunk->append(
varListDataColumnChunk->dataColumnChunk.get(), startOffset, appendLen);
offsetInChunk += appendLen;
newVarListChunk->getNullChunk()->setNull(currentIndex, false);
newVarListChunk->sizeColumnChunk->getNullChunk()->setNull(currentIndex, false);
newVarListChunk->sizeColumnChunk->setValue<uint32_t>(appendLen, currentIndex);
newVarListChunk->setValue<offset_t>(offsetInChunk, currentIndex);
}
currentIndex++;
}
// Move offsets, null, data from newVarListChunk to this column chunk. And release indices.
resetFromOtherChunk(newVarListChunk);
}
void VarListColumnChunk::resetFromOtherChunk(VarListColumnChunk* other) {
buffer = std::move(other->buffer);
nullChunk = std::move(other->nullChunk);
sizeColumnChunk = std::move(other->sizeColumnChunk);
varListDataColumnChunk = std::move(other->varListDataColumnChunk);
numValues = other->numValues;
checkOrder = true;
}
} // namespace storage
} // namespace kuzu

0 comments on commit 2a87361

Please sign in to comment.