Skip to content

Commit

Permalink
Avoid unnecessary copies when reading strings into the result vector
Browse files Browse the repository at this point in the history
  • Loading branch information
benjaminwinger committed Nov 13, 2023
1 parent 3ebb5d4 commit ec84643
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 5 deletions.
11 changes: 11 additions & 0 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,17 @@ void StringVector::addString(
}
}

ku_string_t& StringVector::reserveString(ValueVector* vector, uint32_t vectorPos, uint64_t length) {
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
auto& dstStr = vector->getValue<ku_string_t>(vectorPos);
dstStr.len = length;
if (!ku_string_t::isShortString(length)) {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(length));
}
return dstStr;
}

void StringVector::addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr) {
KU_ASSERT(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
Expand Down
3 changes: 3 additions & 0 deletions src/include/common/vector/value_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ class StringVector {
static void addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr);
static void addString(
ValueVector* vector, uint32_t vectorPos, const char* srcStr, uint64_t length);
// Add empty string with space reserved for the provided size
// Returned value can be modified to set the string contents
static ku_string_t& reserveString(ValueVector* vector, uint32_t vectorPos, uint64_t length);
static void addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr);
static void addString(
ValueVector* vector, ku_string_t& dstStr, const char* srcStr, uint64_t length);
Expand Down
13 changes: 8 additions & 5 deletions src/storage/store/string_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,14 @@ void StringColumn::scanValueToVector(Transaction* transaction, const ReadState&
string_offset_t startOffset, string_offset_t endOffset, ValueVector* resultVector,
uint64_t offsetInVector) {
KU_ASSERT(endOffset >= startOffset);
// TODO: Add string to vector first and read directly instead of using a temporary buffer
std::unique_ptr<char[]> stringRead = std::make_unique<char[]>(endOffset - startOffset);
dataColumn->scan(transaction, dataState, startOffset, endOffset, (uint8_t*)stringRead.get());
StringVector::addString(
resultVector, offsetInVector, stringRead.get(), endOffset - startOffset);
// Add string to vector first and read directly into the vector
auto& kuString =
StringVector::reserveString(resultVector, offsetInVector, endOffset - startOffset);
dataColumn->scan(transaction, dataState, startOffset, endOffset, (uint8_t*)kuString.getData());
// Update prefix to match the scanned string data
if (!ku_string_t::isShortString(kuString.len)) {
memcpy(kuString.prefix, kuString.getData(), ku_string_t::PREFIX_LENGTH);
}
}

void StringColumn::scan(Transaction* transaction, node_group_idx_t nodeGroupIdx,
Expand Down

0 comments on commit ec84643

Please sign in to comment.