Skip to content

Commit

Permalink
remove overflow buffer utils
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Jun 16, 2023
1 parent 054ac20 commit 2e6c8cc
Show file tree
Hide file tree
Showing 19 changed files with 121 additions and 122 deletions.
1 change: 0 additions & 1 deletion src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ add_library(kuzu_common
expression_type.cpp
file_utils.cpp
in_mem_overflow_buffer.cpp
in_mem_overflow_buffer_utils.cpp
logging_level_utils.cpp
metric.cpp
null_mask.cpp
Expand Down
19 changes: 0 additions & 19 deletions src/common/in_mem_overflow_buffer_utils.cpp

This file was deleted.

13 changes: 4 additions & 9 deletions src/common/types/ku_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,18 @@ void ku_string_t::set(const std::string& value) {
}

void ku_string_t::set(const char* value, uint64_t length) {
this->len = length;
if (length <= SHORT_STR_LENGTH) {
memcpy(prefix, value, length);
setShortString(value, length);
} else {
memcpy(prefix, value, PREFIX_LENGTH);
memcpy(reinterpret_cast<char*>(overflowPtr), value, length);
setLongString(value, length);
}
}

void ku_string_t::set(const ku_string_t& value) {
this->len = value.len;
if (value.len <= SHORT_STR_LENGTH) {
memcpy(prefix, value.prefix, value.len);
setShortString(value);
} else {
memcpy(prefix, value.prefix, PREFIX_LENGTH);
memcpy(reinterpret_cast<char*>(overflowPtr), reinterpret_cast<char*>(value.overflowPtr),
value.len);
setLongString(value);
}
}

Expand Down
8 changes: 0 additions & 8 deletions src/common/vector/auxiliary_buffer.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
#include "common/vector/auxiliary_buffer.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/value_vector.h"

namespace kuzu {
namespace common {

void StringAuxiliaryBuffer::addString(
common::ValueVector* vector, uint32_t pos, char* value, uint64_t len) const {
assert(vector->dataType.getLogicalTypeID() == LogicalTypeID::STRING);
auto& entry = ((ku_string_t*)vector->getData())[pos];
InMemOverflowBufferUtils::copyString(value, len, entry, *inMemOverflowBuffer);
}

StructAuxiliaryBuffer::StructAuxiliaryBuffer(
const LogicalType& type, storage::MemoryManager* memoryManager) {
auto fieldTypes = StructType::getFieldTypes(&type);
Expand Down
63 changes: 57 additions & 6 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@ void ValueVector::setValue(uint32_t pos, T val) {
((T*)valueBuffer.get())[pos] = val;
}

template<>
void ValueVector::setValue(uint32_t pos, std::string val) {
StringVector::addString(this, pos, val.data(), val.length());
}

void ValueVector::resetAuxiliaryBuffer() {
switch (dataType.getPhysicalType()) {
case PhysicalTypeID::STRING: {
Expand Down Expand Up @@ -126,8 +121,64 @@ template void ValueVector::setValue<double_t>(uint32_t pos, double_t val);
template void ValueVector::setValue<date_t>(uint32_t pos, date_t val);
template void ValueVector::setValue<timestamp_t>(uint32_t pos, timestamp_t val);
template void ValueVector::setValue<interval_t>(uint32_t pos, interval_t val);
template void ValueVector::setValue<ku_string_t>(uint32_t pos, ku_string_t val);
template void ValueVector::setValue<list_entry_t>(uint32_t pos, list_entry_t val);

template<>
void ValueVector::setValue(uint32_t pos, ku_string_t val) {
StringVector::addString(this, pos, val);
}
template<>
void ValueVector::setValue(uint32_t pos, std::string val) {
StringVector::addString(this, pos, val.data(), val.length());
}

void StringVector::addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
auto& dstStr = vector->getValue<ku_string_t>(vectorPos);
if (ku_string_t::isShortString(srcStr.len)) {
dstStr.setShortString(srcStr);
} else {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(srcStr.len));
dstStr.setLongString(srcStr);
}
}

void StringVector::addString(
ValueVector* vector, uint32_t vectorPos, const char* srcStr, uint64_t length) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
auto& dstStr = vector->getValue<ku_string_t>(vectorPos);
if (ku_string_t::isShortString(length)) {
dstStr.setShortString(srcStr, length);
} else {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(length));
dstStr.setLongString(srcStr, length);
}
}

void StringVector::addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
if (ku_string_t::isShortString(srcStr.len)) {
dstStr.setShortString(srcStr);
} else {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(srcStr.len));
dstStr.setLongString(srcStr);
}
}

void StringVector::addString(
ValueVector* vector, ku_string_t& dstStr, const char* srcStr, uint64_t length) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
if (ku_string_t::isShortString(length)) {
dstStr.setShortString(srcStr, length);
} else {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(length));
dstStr.setLongString(srcStr, length);
}
}

} // namespace common
} // namespace kuzu
22 changes: 11 additions & 11 deletions src/common/vector/value_vector_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "common/vector/value_vector_utils.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/null_buffer.h"
#include "processor/result/factorized_table.h"

Expand Down Expand Up @@ -46,10 +45,7 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeIntoPos(
}
} break;
case PhysicalTypeID::STRING: {
auto dstData = resultVector.getData() +
pos * processor::FactorizedTable::getDataTypeSize(resultVector.dataType);
InMemOverflowBufferUtils::copyString(*(ku_string_t*)srcData, *(ku_string_t*)dstData,
*StringVector::getInMemOverflowBuffer(&resultVector));
StringVector::addString(&resultVector, pos, *(ku_string_t*)srcData);
} break;
default: {
auto dataTypeSize = processor::FactorizedTable::getDataTypeSize(resultVector.dataType);
Expand Down Expand Up @@ -107,10 +103,15 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeOutFromPos(const ValueVector&
memcpy(dstData, &dstList, sizeof(dstList));
} break;
case PhysicalTypeID::STRING: {
auto srcData = srcVector.getData() +
pos * processor::FactorizedTable::getDataTypeSize(srcVector.dataType);
InMemOverflowBufferUtils::copyString(
*(ku_string_t*)srcData, *(ku_string_t*)dstData, dstOverflowBuffer);
auto& srcStr = srcVector.getValue<ku_string_t>(pos);
auto& dstStr = *(ku_string_t*)dstData;
if (ku_string_t::isShortString(srcStr.len)) {
dstStr.setShortString(srcStr);
} else {
dstStr.overflowPtr =
reinterpret_cast<uint64_t>(dstOverflowBuffer.allocateSpace(srcStr.len));
dstStr.setLongString(srcStr);
}
} break;
default: {
auto dataTypeSize = processor::FactorizedTable::getDataTypeSize(srcVector.dataType);
Expand Down Expand Up @@ -158,8 +159,7 @@ void ValueVectorUtils::copyValue(uint8_t* dstValue, common::ValueVector& dstVect
}
} break;
case PhysicalTypeID::STRING: {
common::InMemOverflowBufferUtils::copyString(*(common::ku_string_t*)srcValue,
*(common::ku_string_t*)dstValue, *StringVector::getInMemOverflowBuffer(&dstVector));
StringVector::addString(&dstVector, *(ku_string_t*)dstValue, *(ku_string_t*)srcValue);
} break;
default: {
memcpy(dstValue, srcValue, srcVector.getNumBytesPerValue());
Expand Down
8 changes: 3 additions & 5 deletions src/expression_evaluator/literal_evaluator.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "expression_evaluator/literal_evaluator.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/value_vector_utils.h"

using namespace kuzu::common;
Expand Down Expand Up @@ -28,7 +27,7 @@ void LiteralExpressionEvaluator::resolveResultVector(
}

void LiteralExpressionEvaluator::copyValueToVector(
uint8_t* dstValue, common::ValueVector* dstVector, const common::Value* srcValue) {
uint8_t* dstValue, common::ValueVector* dstVector, const Value* srcValue) {
auto numBytesPerValue = dstVector->getNumBytesPerValue();
switch (srcValue->getDataType().getPhysicalType()) {
case common::PhysicalTypeID::INT64: {
Expand All @@ -53,9 +52,8 @@ void LiteralExpressionEvaluator::copyValueToVector(
memcpy(dstValue, &srcValue->val.intervalVal, numBytesPerValue);
} break;
case common::PhysicalTypeID::STRING: {
common::InMemOverflowBufferUtils::copyString(srcValue->strVal.data(),
srcValue->strVal.length(), *(common::ku_string_t*)dstValue,
*common::StringVector::getInMemOverflowBuffer(dstVector));
StringVector::addString(dstVector, *(common::ku_string_t*)dstValue, srcValue->strVal.data(),
srcValue->strVal.length());
} break;
case common::PhysicalTypeID::VAR_LIST: {
auto listListEntry = reinterpret_cast<common::list_entry_t*>(dstValue);
Expand Down
28 changes: 0 additions & 28 deletions src/include/common/in_mem_overflow_buffer_utils.h

This file was deleted.

2 changes: 0 additions & 2 deletions src/include/common/types/ku_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ struct ku_list_t {
void set(const uint8_t* values, const LogicalType& dataType) const;

private:
friend class InMemOverflowBufferUtils;

void set(const std::vector<uint8_t*>& parameters, LogicalTypeID childTypeId);

public:
Expand Down
19 changes: 19 additions & 0 deletions src/include/common/types/ku_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,25 @@ struct ku_string_t {
void set(const std::string& value);
void set(const char* value, uint64_t length);
void set(const ku_string_t& value);
inline void setShortString(const char* value, uint64_t length) {
this->len = length;
memcpy(prefix, value, length);
}
inline void setLongString(const char* value, uint64_t length) {
this->len = length;
memcpy(prefix, value, PREFIX_LENGTH);
memcpy(reinterpret_cast<char*>(overflowPtr), value, length);
}
inline void setShortString(const ku_string_t& value) {
this->len = value.len;
memcpy(prefix, value.prefix, value.len);
}
void setLongString(const ku_string_t& value) {
this->len = value.len;
memcpy(prefix, value.prefix, PREFIX_LENGTH);
memcpy(reinterpret_cast<char*>(overflowPtr), reinterpret_cast<char*>(value.overflowPtr),
value.len);
}

std::string getAsShortString() const;
std::string getAsString() const;
Expand Down
4 changes: 3 additions & 1 deletion src/include/common/vector/auxiliary_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ class StringAuxiliaryBuffer : public AuxiliaryBuffer {
}

inline InMemOverflowBuffer* getOverflowBuffer() const { return inMemOverflowBuffer.get(); }
inline uint8_t* allocateOverflow(uint64_t size) {
return inMemOverflowBuffer->allocateSpace(size);
}
inline void resetOverflowBuffer() const { inMemOverflowBuffer->resetBuffer(); }
void addString(common::ValueVector* vector, uint32_t pos, char* value, uint64_t len) const;

private:
std::unique_ptr<InMemOverflowBuffer> inMemOverflowBuffer;
Expand Down
11 changes: 6 additions & 5 deletions src/include/common/vector/value_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,12 @@ class StringVector {
->getOverflowBuffer();
}

static inline void addString(ValueVector* vector, uint32_t pos, char* value, uint64_t len) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
->addString(vector, pos, value, len);
}
static void addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr);
static void addString(
ValueVector* vector, uint32_t vectorPos, const char* srcStr, uint64_t length);
static void addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr);
static void addString(
ValueVector* vector, ku_string_t& dstStr, const char* srcStr, uint64_t length);
};

class ListVector {
Expand Down
1 change: 0 additions & 1 deletion src/include/function/aggregate/collect.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/value_vector_utils.h"
#include "processor/result/factorized_table.h"

Expand Down
1 change: 0 additions & 1 deletion src/include/function/cast/cast_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <cassert>

#include "common/exception.h"
#include "common/in_mem_overflow_buffer_utils.h"
#include "common/type_utils.h"
#include "common/vector/value_vector.h"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <cassert>
#include <cstring>

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/types/ku_list.h"
#include "common/vector/value_vector.h"

Expand Down
11 changes: 5 additions & 6 deletions src/include/storage/storage_structure/disk_overflow_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ class DiskOverflowFile : public StorageStructure {
transaction::TransactionType trxType, common::ValueVector& vector, uint64_t vectorPos) {
assert(vector.dataType.getLogicalTypeID() == common::LogicalTypeID::STRING &&
!vector.isNull(vectorPos));
auto& kuString = ((common::ku_string_t*)vector.getData())[vectorPos];
lookupString(trxType, kuString, *common::StringVector::getInMemOverflowBuffer(&vector));
lookupString(trxType, &vector, vector.getValue<common::ku_string_t>(vectorPos));
}

void readListToVector(transaction::TransactionType trxType, common::ku_list_t& kuList,
Expand Down Expand Up @@ -72,10 +71,10 @@ class DiskOverflowFile : public StorageStructure {
common::page_idx_t pageIdx = UINT32_MAX;
uint8_t* frame = nullptr;
};
void lookupString(transaction::TransactionType trxType, common::ku_string_t& kuStr,
common::InMemOverflowBuffer& inMemOverflowBuffer);
void lookupString(transaction::TransactionType trxType, common::ku_string_t& kuStr,
common::InMemOverflowBuffer& inMemOverflowBuffer, OverflowPageCache& overflowPageCache);
void lookupString(transaction::TransactionType trxType, common::ValueVector* vector,
common::ku_string_t& dstStr);
void lookupString(transaction::TransactionType trxType, common::ValueVector* vector,
common::ku_string_t& dstStr, OverflowPageCache& overflowPageCache);
void addNewPageIfNecessaryWithoutLock(uint32_t numBytesToAppend);
void setStringOverflowWithoutLock(
const char* inMemSrcStr, uint64_t len, common::ku_string_t& diskDstString);
Expand Down
1 change: 0 additions & 1 deletion src/storage/storage_structure/column.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "storage/storage_structure/column.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "storage/storage_structure/storage_structure_utils.h"

using namespace kuzu::common;
Expand Down
Loading

0 comments on commit 2e6c8cc

Please sign in to comment.