Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove overflow buffer utils #1684

Merged
merged 1 commit into from
Jun 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ add_library(kuzu_common
expression_type.cpp
file_utils.cpp
in_mem_overflow_buffer.cpp
in_mem_overflow_buffer_utils.cpp
logging_level_utils.cpp
metric.cpp
null_mask.cpp
Expand Down
19 changes: 0 additions & 19 deletions src/common/in_mem_overflow_buffer_utils.cpp

This file was deleted.

15 changes: 4 additions & 11 deletions src/common/types/ku_string.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#include "common/types/ku_string.h"

#include <cstring>

namespace kuzu {
namespace common {

Expand All @@ -10,23 +8,18 @@ void ku_string_t::set(const std::string& value) {
}

void ku_string_t::set(const char* value, uint64_t length) {
this->len = length;
if (length <= SHORT_STR_LENGTH) {
memcpy(prefix, value, length);
setShortString(value, length);
} else {
memcpy(prefix, value, PREFIX_LENGTH);
memcpy(reinterpret_cast<char*>(overflowPtr), value, length);
setLongString(value, length);
}
}

void ku_string_t::set(const ku_string_t& value) {
this->len = value.len;
if (value.len <= SHORT_STR_LENGTH) {
memcpy(prefix, value.prefix, value.len);
setShortString(value);
} else {
memcpy(prefix, value.prefix, PREFIX_LENGTH);
memcpy(reinterpret_cast<char*>(overflowPtr), reinterpret_cast<char*>(value.overflowPtr),
value.len);
setLongString(value);
}
}

Expand Down
8 changes: 0 additions & 8 deletions src/common/vector/auxiliary_buffer.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
#include "common/vector/auxiliary_buffer.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/value_vector.h"

namespace kuzu {
namespace common {

void StringAuxiliaryBuffer::addString(
common::ValueVector* vector, uint32_t pos, char* value, uint64_t len) const {
assert(vector->dataType.getLogicalTypeID() == LogicalTypeID::STRING);
auto& entry = ((ku_string_t*)vector->getData())[pos];
InMemOverflowBufferUtils::copyString(value, len, entry, *inMemOverflowBuffer);
}

StructAuxiliaryBuffer::StructAuxiliaryBuffer(
const LogicalType& type, storage::MemoryManager* memoryManager) {
auto fieldTypes = StructType::getFieldTypes(&type);
Expand Down
63 changes: 57 additions & 6 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,6 @@ void ValueVector::setValue(uint32_t pos, T val) {
((T*)valueBuffer.get())[pos] = val;
}

template<>
void ValueVector::setValue(uint32_t pos, std::string val) {
StringVector::addString(this, pos, val.data(), val.length());
}

void ValueVector::resetAuxiliaryBuffer() {
switch (dataType.getPhysicalType()) {
case PhysicalTypeID::STRING: {
Expand Down Expand Up @@ -126,8 +121,64 @@ template void ValueVector::setValue<double_t>(uint32_t pos, double_t val);
template void ValueVector::setValue<date_t>(uint32_t pos, date_t val);
template void ValueVector::setValue<timestamp_t>(uint32_t pos, timestamp_t val);
template void ValueVector::setValue<interval_t>(uint32_t pos, interval_t val);
template void ValueVector::setValue<ku_string_t>(uint32_t pos, ku_string_t val);
template void ValueVector::setValue<list_entry_t>(uint32_t pos, list_entry_t val);

template<>
void ValueVector::setValue(uint32_t pos, ku_string_t val) {
StringVector::addString(this, pos, val);
}
template<>
void ValueVector::setValue(uint32_t pos, std::string val) {
StringVector::addString(this, pos, val.data(), val.length());
}

void StringVector::addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
auto& dstStr = vector->getValue<ku_string_t>(vectorPos);
if (ku_string_t::isShortString(srcStr.len)) {
dstStr.setShortString(srcStr);
} else {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(srcStr.len));
dstStr.setLongString(srcStr);
}
}

void StringVector::addString(
ValueVector* vector, uint32_t vectorPos, const char* srcStr, uint64_t length) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
auto& dstStr = vector->getValue<ku_string_t>(vectorPos);
if (ku_string_t::isShortString(length)) {
dstStr.setShortString(srcStr, length);
} else {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(length));
dstStr.setLongString(srcStr, length);
}
}

void StringVector::addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
if (ku_string_t::isShortString(srcStr.len)) {
dstStr.setShortString(srcStr);
} else {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(srcStr.len));
dstStr.setLongString(srcStr);
}
}

void StringVector::addString(
ValueVector* vector, ku_string_t& dstStr, const char* srcStr, uint64_t length) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
auto stringBuffer = reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get());
if (ku_string_t::isShortString(length)) {
dstStr.setShortString(srcStr, length);
} else {
dstStr.overflowPtr = reinterpret_cast<uint64_t>(stringBuffer->allocateOverflow(length));
dstStr.setLongString(srcStr, length);
}
}

} // namespace common
} // namespace kuzu
22 changes: 11 additions & 11 deletions src/common/vector/value_vector_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "common/vector/value_vector_utils.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/null_buffer.h"
#include "processor/result/factorized_table.h"

Expand Down Expand Up @@ -46,10 +45,7 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeIntoPos(
}
} break;
case PhysicalTypeID::STRING: {
auto dstData = resultVector.getData() +
pos * processor::FactorizedTable::getDataTypeSize(resultVector.dataType);
InMemOverflowBufferUtils::copyString(*(ku_string_t*)srcData, *(ku_string_t*)dstData,
*StringVector::getInMemOverflowBuffer(&resultVector));
StringVector::addString(&resultVector, pos, *(ku_string_t*)srcData);
} break;
default: {
auto dataTypeSize = processor::FactorizedTable::getDataTypeSize(resultVector.dataType);
Expand Down Expand Up @@ -107,10 +103,15 @@ void ValueVectorUtils::copyNonNullDataWithSameTypeOutFromPos(const ValueVector&
memcpy(dstData, &dstList, sizeof(dstList));
} break;
case PhysicalTypeID::STRING: {
auto srcData = srcVector.getData() +
pos * processor::FactorizedTable::getDataTypeSize(srcVector.dataType);
InMemOverflowBufferUtils::copyString(
*(ku_string_t*)srcData, *(ku_string_t*)dstData, dstOverflowBuffer);
auto& srcStr = srcVector.getValue<ku_string_t>(pos);
auto& dstStr = *(ku_string_t*)dstData;
if (ku_string_t::isShortString(srcStr.len)) {
dstStr.setShortString(srcStr);
} else {
dstStr.overflowPtr =
reinterpret_cast<uint64_t>(dstOverflowBuffer.allocateSpace(srcStr.len));
dstStr.setLongString(srcStr);
}
} break;
default: {
auto dataTypeSize = processor::FactorizedTable::getDataTypeSize(srcVector.dataType);
Expand Down Expand Up @@ -158,8 +159,7 @@ void ValueVectorUtils::copyValue(uint8_t* dstValue, common::ValueVector& dstVect
}
} break;
case PhysicalTypeID::STRING: {
common::InMemOverflowBufferUtils::copyString(*(common::ku_string_t*)srcValue,
*(common::ku_string_t*)dstValue, *StringVector::getInMemOverflowBuffer(&dstVector));
StringVector::addString(&dstVector, *(ku_string_t*)dstValue, *(ku_string_t*)srcValue);
} break;
default: {
memcpy(dstValue, srcValue, srcVector.getNumBytesPerValue());
Expand Down
8 changes: 3 additions & 5 deletions src/expression_evaluator/literal_evaluator.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "expression_evaluator/literal_evaluator.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/value_vector_utils.h"

using namespace kuzu::common;
Expand Down Expand Up @@ -28,7 +27,7 @@ void LiteralExpressionEvaluator::resolveResultVector(
}

void LiteralExpressionEvaluator::copyValueToVector(
uint8_t* dstValue, common::ValueVector* dstVector, const common::Value* srcValue) {
uint8_t* dstValue, common::ValueVector* dstVector, const Value* srcValue) {
auto numBytesPerValue = dstVector->getNumBytesPerValue();
switch (srcValue->getDataType().getPhysicalType()) {
case common::PhysicalTypeID::INT64: {
Expand All @@ -53,9 +52,8 @@ void LiteralExpressionEvaluator::copyValueToVector(
memcpy(dstValue, &srcValue->val.intervalVal, numBytesPerValue);
} break;
case common::PhysicalTypeID::STRING: {
common::InMemOverflowBufferUtils::copyString(srcValue->strVal.data(),
srcValue->strVal.length(), *(common::ku_string_t*)dstValue,
*common::StringVector::getInMemOverflowBuffer(dstVector));
StringVector::addString(dstVector, *(common::ku_string_t*)dstValue, srcValue->strVal.data(),
srcValue->strVal.length());
} break;
case common::PhysicalTypeID::VAR_LIST: {
auto listListEntry = reinterpret_cast<common::list_entry_t*>(dstValue);
Expand Down
28 changes: 0 additions & 28 deletions src/include/common/in_mem_overflow_buffer_utils.h

This file was deleted.

2 changes: 0 additions & 2 deletions src/include/common/types/ku_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ struct ku_list_t {
void set(const uint8_t* values, const LogicalType& dataType) const;

private:
friend class InMemOverflowBufferUtils;

void set(const std::vector<uint8_t*>& parameters, LogicalTypeID childTypeId);

public:
Expand Down
20 changes: 20 additions & 0 deletions src/include/common/types/ku_string.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <cstdint>
#include <cstring>
#include <string>

namespace kuzu {
Expand Down Expand Up @@ -32,6 +33,25 @@ struct ku_string_t {
void set(const std::string& value);
void set(const char* value, uint64_t length);
void set(const ku_string_t& value);
inline void setShortString(const char* value, uint64_t length) {
this->len = length;
memcpy(prefix, value, length);
}
inline void setLongString(const char* value, uint64_t length) {
this->len = length;
memcpy(prefix, value, PREFIX_LENGTH);
memcpy(reinterpret_cast<char*>(overflowPtr), value, length);
}
inline void setShortString(const ku_string_t& value) {
this->len = value.len;
memcpy(prefix, value.prefix, value.len);
}
inline void setLongString(const ku_string_t& value) {
this->len = value.len;
memcpy(prefix, value.prefix, PREFIX_LENGTH);
memcpy(reinterpret_cast<char*>(overflowPtr), reinterpret_cast<char*>(value.overflowPtr),
value.len);
}

std::string getAsShortString() const;
std::string getAsString() const;
Expand Down
4 changes: 3 additions & 1 deletion src/include/common/vector/auxiliary_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ class StringAuxiliaryBuffer : public AuxiliaryBuffer {
}

inline InMemOverflowBuffer* getOverflowBuffer() const { return inMemOverflowBuffer.get(); }
inline uint8_t* allocateOverflow(uint64_t size) {
return inMemOverflowBuffer->allocateSpace(size);
}
inline void resetOverflowBuffer() const { inMemOverflowBuffer->resetBuffer(); }
void addString(common::ValueVector* vector, uint32_t pos, char* value, uint64_t len) const;

private:
std::unique_ptr<InMemOverflowBuffer> inMemOverflowBuffer;
Expand Down
11 changes: 6 additions & 5 deletions src/include/common/vector/value_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,12 @@ class StringVector {
->getOverflowBuffer();
}

static inline void addString(ValueVector* vector, uint32_t pos, char* value, uint64_t len) {
assert(vector->dataType.getPhysicalType() == PhysicalTypeID::STRING);
reinterpret_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
->addString(vector, pos, value, len);
}
static void addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr);
static void addString(
ValueVector* vector, uint32_t vectorPos, const char* srcStr, uint64_t length);
static void addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr);
static void addString(
ValueVector* vector, ku_string_t& dstStr, const char* srcStr, uint64_t length);
};

class ListVector {
Expand Down
1 change: 0 additions & 1 deletion src/include/function/aggregate/collect.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/value_vector_utils.h"
#include "processor/result/factorized_table.h"

Expand Down
1 change: 0 additions & 1 deletion src/include/function/cast/cast_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <cassert>

#include "common/exception.h"
#include "common/in_mem_overflow_buffer_utils.h"
#include "common/type_utils.h"
#include "common/vector/value_vector.h"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <cassert>
#include <cstring>

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/types/ku_list.h"
#include "common/vector/value_vector.h"

Expand Down
11 changes: 5 additions & 6 deletions src/include/storage/storage_structure/disk_overflow_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ class DiskOverflowFile : public StorageStructure {
transaction::TransactionType trxType, common::ValueVector& vector, uint64_t vectorPos) {
assert(vector.dataType.getLogicalTypeID() == common::LogicalTypeID::STRING &&
!vector.isNull(vectorPos));
auto& kuString = ((common::ku_string_t*)vector.getData())[vectorPos];
lookupString(trxType, kuString, *common::StringVector::getInMemOverflowBuffer(&vector));
lookupString(trxType, &vector, vector.getValue<common::ku_string_t>(vectorPos));
}

void readListToVector(transaction::TransactionType trxType, common::ku_list_t& kuList,
Expand Down Expand Up @@ -72,10 +71,10 @@ class DiskOverflowFile : public StorageStructure {
common::page_idx_t pageIdx = UINT32_MAX;
uint8_t* frame = nullptr;
};
void lookupString(transaction::TransactionType trxType, common::ku_string_t& kuStr,
common::InMemOverflowBuffer& inMemOverflowBuffer);
void lookupString(transaction::TransactionType trxType, common::ku_string_t& kuStr,
common::InMemOverflowBuffer& inMemOverflowBuffer, OverflowPageCache& overflowPageCache);
void lookupString(transaction::TransactionType trxType, common::ValueVector* vector,
common::ku_string_t& dstStr);
void lookupString(transaction::TransactionType trxType, common::ValueVector* vector,
common::ku_string_t& dstStr, OverflowPageCache& overflowPageCache);
void addNewPageIfNecessaryWithoutLock(uint32_t numBytesToAppend);
void setStringOverflowWithoutLock(
const char* inMemSrcStr, uint64_t len, common::ku_string_t& diskDstString);
Expand Down
1 change: 0 additions & 1 deletion src/storage/storage_structure/column.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "storage/storage_structure/column.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "storage/storage_structure/storage_structure_utils.h"

using namespace kuzu::common;
Expand Down
Loading
Loading