Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

List valueVector refactor #1503

Merged
merged 1 commit into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/common/in_mem_overflow_buffer_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@ void InMemOverflowBufferUtils::copyString(
dest.set(src);
}

void InMemOverflowBufferUtils::copyListNonRecursive(const uint8_t* srcValues, ku_list_t& dst,
const DataType& dataType, InMemOverflowBuffer& inMemOverflowBuffer) {
InMemOverflowBufferUtils::allocateSpaceForList(
dst, dst.size * Types::getDataTypeSize(*dataType.getChildType()), inMemOverflowBuffer);
dst.set(srcValues, dataType);
}

void InMemOverflowBufferUtils::copyListRecursiveIfNested(const ku_list_t& src, ku_list_t& dst,
const DataType& dataType, InMemOverflowBuffer& inMemOverflowBuffer, uint32_t srcStartIdx,
uint32_t srcEndIdx) {
Expand Down
11 changes: 10 additions & 1 deletion src/common/null_mask.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#include "common/null_mask.h"

namespace kuzu {
#include <cstring>

namespace kuzu {
namespace common {

void NullMask::setNull(uint32_t pos, bool isNull) {
Expand Down Expand Up @@ -78,5 +79,13 @@ bool NullMask::copyNullMask(const uint64_t* srcNullEntries, uint64_t srcOffset,
return hasNullInSrcNullMask;
}

void NullMask::resize(uint64_t capacity) {
auto resizedBuffer = std::make_unique<uint64_t[]>(capacity);
memcpy(resizedBuffer.get(), buffer.get(), numNullEntries);
buffer = std::move(resizedBuffer);
data = buffer.get();
numNullEntries = capacity;
}

} // namespace common
} // namespace kuzu
136 changes: 120 additions & 16 deletions src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "common/exception.h"
#include "common/string_utils.h"
#include "common/vector/value_vector.h"

namespace kuzu {
namespace common {
Expand Down Expand Up @@ -30,41 +31,58 @@ bool TypeUtils::convertToBoolean(const char* data) {
". Input is not equal to True or False (in a case-insensitive manner)");
}

std::string TypeUtils::elementToString(
const DataType& dataType, uint8_t* overflowPtr, uint64_t pos) {
std::string TypeUtils::listValueToString(
const DataType& dataType, uint8_t* listValues, uint64_t pos) {
switch (dataType.typeID) {
case BOOL:
return TypeUtils::toString(((bool*)overflowPtr)[pos]);
return TypeUtils::toString(((bool*)listValues)[pos]);
case INT64:
return TypeUtils::toString(((int64_t*)overflowPtr)[pos]);
return TypeUtils::toString(((int64_t*)listValues)[pos]);
case DOUBLE:
return TypeUtils::toString(((double_t*)overflowPtr)[pos]);
return TypeUtils::toString(((double_t*)listValues)[pos]);
case DATE:
return TypeUtils::toString(((date_t*)overflowPtr)[pos]);
return TypeUtils::toString(((date_t*)listValues)[pos]);
case TIMESTAMP:
return TypeUtils::toString(((timestamp_t*)overflowPtr)[pos]);
return TypeUtils::toString(((timestamp_t*)listValues)[pos]);
case INTERVAL:
return TypeUtils::toString(((interval_t*)overflowPtr)[pos]);
return TypeUtils::toString(((interval_t*)listValues)[pos]);
case STRING:
return TypeUtils::toString(((ku_string_t*)overflowPtr)[pos]);
return TypeUtils::toString(((ku_string_t*)listValues)[pos]);
case VAR_LIST:
return TypeUtils::toString(((ku_list_t*)overflowPtr)[pos], dataType);
return TypeUtils::toString(((ku_list_t*)listValues)[pos], dataType);
default:
throw RuntimeException("Invalid data type " + Types::dataTypeToString(dataType) +
" for TypeUtils::elementToString.");
" for TypeUtils::listValueToString.");
}
}

std::string TypeUtils::toString(const ku_list_t& val, const DataType& dataType) {
std::string result = "[";
for (auto i = 0u; i < val.size; ++i) {
result += listValueToString(
*dataType.getChildType(), reinterpret_cast<uint8_t*>(val.overflowPtr), i);
result += (i == val.size - 1 ? "]" : ",");
}
return result;
}

std::string TypeUtils::toString(const list_entry_t& val, void* valVector) {
auto listVector = (common::ValueVector*)valVector;
std::string result = "[";
auto values = ListVector::getListValues(listVector, val);
for (auto i = 0u; i < val.size - 1; ++i) {
result += elementToString(
*dataType.getChildType(), reinterpret_cast<uint8_t*>(val.overflowPtr), i) +
result += (listVector->dataType.getChildType()->typeID == VAR_LIST ?
toString(reinterpret_cast<common::list_entry_t*>(values)[i],
ListVector::getDataVector(listVector)) :
listValueToString(*listVector->dataType.getChildType(), values, i)) +
",";
}
result += elementToString(*dataType.getChildType(), reinterpret_cast<uint8_t*>(val.overflowPtr),
val.size - 1) +
"]";
result +=
(listVector->dataType.getChildType()->typeID == VAR_LIST ?
toString(reinterpret_cast<common::list_entry_t*>(values)[val.size - 1],
ListVector::getDataVector(listVector)) :
listValueToString(*listVector->dataType.getChildType(), values, val.size - 1)) +
"]";
return result;
}

Expand All @@ -73,5 +91,91 @@ std::string TypeUtils::prefixConversionExceptionMessage(const char* data, DataTy
Types::dataTypeToString(dataTypeID) + ".";
}

template<>
bool TypeUtils::isValueEqual(
common::list_entry_t& leftEntry, common::list_entry_t& rightEntry, void* left, void* right) {
auto leftVector = (ValueVector*)left;
auto rightVector = (ValueVector*)right;
if (leftVector->dataType != rightVector->dataType || leftEntry.size != rightEntry.size) {
return false;
}
auto leftValues = ListVector::getListValues(leftVector, leftEntry);
auto rightValues = ListVector::getListValues(rightVector, rightEntry);
switch (leftVector->dataType.getChildType()->typeID) {
case BOOL: {
for (auto i = 0u; i < leftEntry.size; i++) {
if (!isValueEqual(reinterpret_cast<uint8_t*>(leftValues)[i],
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
reinterpret_cast<uint8_t*>(rightValues)[i], left, right)) {
return false;
}
}
} break;
case INT64: {
for (auto i = 0u; i < leftEntry.size; i++) {
if (!isValueEqual(reinterpret_cast<int64_t*>(leftValues)[i],
reinterpret_cast<int64_t*>(rightValues)[i], left, right)) {
return false;
}
}
} break;
case DOUBLE: {
for (auto i = 0u; i < leftEntry.size; i++) {
if (!isValueEqual(reinterpret_cast<double_t*>(leftValues)[i],
reinterpret_cast<double_t*>(rightValues)[i], left, right)) {
return false;
}
}
} break;
case STRING: {
for (auto i = 0u; i < leftEntry.size; i++) {
if (!isValueEqual(reinterpret_cast<ku_string_t*>(leftValues)[i],
reinterpret_cast<ku_string_t*>(rightValues)[i], left, right)) {
return false;
}
}
} break;
case DATE: {
for (auto i = 0u; i < leftEntry.size; i++) {
if (!isValueEqual(reinterpret_cast<date_t*>(leftValues)[i],
reinterpret_cast<date_t*>(rightValues)[i], left, right)) {
return false;
}
}
} break;
case TIMESTAMP: {
for (auto i = 0u; i < leftEntry.size; i++) {
if (!isValueEqual(reinterpret_cast<timestamp_t*>(leftValues)[i],
reinterpret_cast<timestamp_t*>(rightValues)[i], left, right)) {
return false;
}
}
} break;
case INTERVAL: {
for (auto i = 0u; i < leftEntry.size; i++) {
if (!isValueEqual(reinterpret_cast<interval_t*>(leftValues)[i],
reinterpret_cast<interval_t*>(rightValues)[i], left, right)) {
return false;
}
}
} break;
case VAR_LIST: {
for (auto i = 0u; i < leftEntry.size; i++) {
if (!isValueEqual(reinterpret_cast<list_entry_t*>(leftValues)[i],
reinterpret_cast<list_entry_t*>(rightValues)[i],
ListVector::getDataVector(leftVector),
ListVector::getDataVector(rightVector))) {
return false;
}
}
} break;
default: {
throw RuntimeException("Unsupported data type " +
Types::dataTypeToString(leftVector->dataType) +
" for TypeUtils::isValueEqual.");
}
}
return true;
}

} // namespace common
} // namespace kuzu
2 changes: 1 addition & 1 deletion src/common/types/internal_id_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
namespace kuzu {
namespace common {

internalID_t::internalID_t() : offset{INVALID_NODE_OFFSET}, tableID{INVALID_TABLE_ID} {}
internalID_t::internalID_t() : offset{INVALID_OFFSET}, tableID{INVALID_TABLE_ID} {}

internalID_t::internalID_t(offset_t offset, table_id_t tableID)
: offset(offset), tableID(tableID) {}
Expand Down
3 changes: 2 additions & 1 deletion src/common/vector/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
add_library(kuzu_common_vector
OBJECT
value_vector.cpp
value_vector_utils.cpp)
value_vector_utils.cpp
auxiliary_buffer.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_common_vector>
Expand Down
53 changes: 53 additions & 0 deletions src/common/vector/auxiliary_buffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include "common/vector/auxiliary_buffer.h"

#include "common/in_mem_overflow_buffer_utils.h"
#include "common/vector/value_vector.h"

namespace kuzu {
namespace common {

void StringAuxiliaryBuffer::addString(
common::ValueVector* vector, uint32_t pos, char* value, uint64_t len) const {
assert(vector->dataType.typeID == STRING);
auto& entry = ((ku_string_t*)vector->getData())[pos];
InMemOverflowBufferUtils::copyString(value, len, entry, *inMemOverflowBuffer);
}

ListAuxiliaryBuffer::ListAuxiliaryBuffer(
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
kuzu::common::DataType& dataVectorType, storage::MemoryManager* memoryManager)
: capacity{common::DEFAULT_VECTOR_CAPACITY}, size{0}, dataVector{std::make_unique<ValueVector>(
dataVectorType, memoryManager)} {}

list_entry_t ListAuxiliaryBuffer::addList(uint64_t listSize) {
auto listEntry = list_entry_t{size, listSize};
bool needResizeDataVector = size + listSize > capacity;
while (size + listSize > capacity) {
capacity *= 2;
}
auto numBytesPerElement = dataVector->getNumBytesPerValue();
if (needResizeDataVector) {
auto buffer = std::make_unique<uint8_t[]>(capacity * numBytesPerElement);
memcpy(dataVector->valueBuffer.get(), buffer.get(), size * numBytesPerElement);
dataVector->valueBuffer = std::move(buffer);
dataVector->nullMask->resize(capacity);
}
size += listSize;
return listEntry;
}

std::unique_ptr<AuxiliaryBuffer> AuxiliaryBufferFactory::getAuxiliaryBuffer(
DataType& type, storage::MemoryManager* memoryManager) {
switch (type.typeID) {
case STRING:
return std::make_unique<StringAuxiliaryBuffer>(memoryManager);
case STRUCT:
return std::make_unique<StructAuxiliaryBuffer>();
case VAR_LIST:
return std::make_unique<ListAuxiliaryBuffer>(*type.getChildType(), memoryManager);
default:
return nullptr;
}
}

} // namespace common
} // namespace kuzu
Loading