Skip to content

Commit

Permalink
Merge pull request #1698 from kuzudb/issue-1678
Browse files Browse the repository at this point in the history
Fix issue-1678
  • Loading branch information
andyfengHKU committed Jun 20, 2023
2 parents 251d978 + 3f2955b commit 75526c9
Show file tree
Hide file tree
Showing 8 changed files with 23 additions and 64 deletions.
4 changes: 0 additions & 4 deletions src/include/common/types/ku_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,6 @@ struct ku_string_t {
return isShortString(len) ? prefix : reinterpret_cast<uint8_t*>(overflowPtr);
}

inline uint8_t* getDataWritable() {
return isShortString(len) ? prefix : reinterpret_cast<uint8_t*>(overflowPtr);
}

// These functions do *NOT* allocate/resize the overflow buffer, it only copies the content and
// set the length.
void set(const std::string& value);
Expand Down
17 changes: 9 additions & 8 deletions src/include/function/cast/cast_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,15 @@ struct CastToBlob {
common::ValueVector& inputVector, common::ValueVector& resultVector) {
result.value.len = common::Blob::getBlobSize(input);
if (!common::ku_string_t::isShortString(result.value.len)) {
result.value.overflowPtr = reinterpret_cast<int64_t>(
common::StringVector::getInMemOverflowBuffer(&resultVector)
->allocateSpace(result.value.len));
}
common::Blob::fromString(reinterpret_cast<const char*>(input.getData()), input.len,
result.value.getDataWritable());
if (!common::ku_string_t::isShortString(result.value.len)) {
memcpy(result.value.prefix, result.value.getData(), common::ku_string_t::PREFIX_LENGTH);
auto overflowBuffer = common::StringVector::getInMemOverflowBuffer(&resultVector);
auto overflowPtr = overflowBuffer->allocateSpace(result.value.len);
result.value.overflowPtr = reinterpret_cast<int64_t>(overflowPtr);
common::Blob::fromString(
reinterpret_cast<const char*>(input.getData()), input.len, overflowPtr);
memcpy(result.value.prefix, overflowPtr, common::ku_string_t::PREFIX_LENGTH);
} else {
common::Blob::fromString(
reinterpret_cast<const char*>(input.getData()), input.len, result.value.prefix);
}
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,7 @@ struct ListExtract {
template<>
inline void ListExtract::setValue(
common::ku_string_t& src, common::ku_string_t& dest, common::ValueVector& resultValueVector) {
if (!common::ku_string_t::isShortString(src.len)) {
dest.overflowPtr = reinterpret_cast<uint64_t>(
common::StringVector::getInMemOverflowBuffer(&resultValueVector)
->allocateSpace(src.len));
}
dest.set(src);
common::StringVector::addString(&resultValueVector, dest, src);
}

} // namespace operation
Expand Down
12 changes: 2 additions & 10 deletions src/include/function/list/operations/list_slice_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,8 @@ struct ListSlice {
int64_t startIdx = (begin == 0) ? 1 : begin;
int64_t endIdx = (end == 0) ? str.len : end;
result.len = std::min(endIdx - startIdx + 1, str.len - startIdx + 1);

if (!common::ku_string_t::isShortString(result.len)) {
result.overflowPtr = reinterpret_cast<uint64_t>(
common::StringVector::getInMemOverflowBuffer(&resultValueVector)
->allocateSpace(result.len));
}
memcpy((uint8_t*)result.getData(), str.getData() + startIdx - 1, result.len);
if (!common::ku_string_t::isShortString(result.len)) {
memcpy(result.prefix, result.getData(), common::ku_string_t::PREFIX_LENGTH);
}
common::StringVector::addString(
&resultValueVector, result, (const char*)(str.getData() + startIdx - 1), result.len);
}
};

Expand Down
13 changes: 2 additions & 11 deletions src/include/function/string/operations/base_pad_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,8 @@ struct BasePadOperation {
}
std::string paddedResult;
padOperation(src, count, characterToPad, paddedResult);
result.len = paddedResult.size();
if (common::ku_string_t::isShortString(result.len)) {
memcpy(result.prefix, paddedResult.data(), result.len);
} else {
result.overflowPtr = reinterpret_cast<uint64_t>(
common::StringVector::getInMemOverflowBuffer(&resultValueVector)
->allocateSpace(result.len));
auto buffer = reinterpret_cast<char*>(result.overflowPtr);
memcpy(buffer, paddedResult.data(), result.len);
memcpy(result.prefix, buffer, common::ku_string_t::PREFIX_LENGTH);
}
common::StringVector::addString(
&resultValueVector, result, paddedResult.data(), paddedResult.size());
}

static std::pair<uint32_t, uint32_t> padCountChars(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@ struct BaseRegexpOperation {

static inline void copyToKuzuString(
const std::string& value, common::ku_string_t& kuString, common::ValueVector& valueVector) {
if (!common::ku_string_t::isShortString(value.length())) {
kuString.overflowPtr = reinterpret_cast<uint64_t>(
common::StringVector::getInMemOverflowBuffer(&valueVector)
->allocateSpace(value.length()));
}
kuString.set(value);
common::StringVector::addString(&valueVector, kuString, value.data(), value.length());
}
};

Expand Down
11 changes: 3 additions & 8 deletions src/include/function/string/operations/pad_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <cstring>

#include "common/types/ku_string.h"
#include "common/vector/value_vector.h"

namespace kuzu {
namespace function {
Expand All @@ -21,15 +22,9 @@ struct PadOperation {
return;
}
assert(characterToPad.len == 1);
result.len = count;
if (!common::ku_string_t::isShortString(result.len)) {
result.overflowPtr = reinterpret_cast<uint64_t>(
resultValueVector.getOverflowBuffer().allocateSpace(result.len));
}
padOperation(result, src, characterToPad);
if (!common::ku_string_t::isShortString(result.len)) {
memcpy(result.prefix, result.getData(), common::ku_string_t::PREFIX_LENGTH);
}
common::StringVector::addString(
&resultValueVector, result, (const char*)result.getData(), count);
}
};

Expand Down
16 changes: 5 additions & 11 deletions src/include/function/string/operations/substr_operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,16 @@ struct SubStr {

static inline void copySubstr(common::ku_string_t& src, int64_t start, int64_t len,
common::ku_string_t& result, common::ValueVector& resultValueVector, bool isAscii) {
result.len = std::min(len, src.len - start + 1);
if (!common::ku_string_t::isShortString(result.len)) {
result.overflowPtr = reinterpret_cast<uint64_t>(
common::StringVector::getInMemOverflowBuffer(&resultValueVector)
->allocateSpace(result.len));
}
auto length = std::min(len, src.len - start + 1);
if (isAscii) {
// For normal ASCII char case, we get to the proper byte position to copy from by doing
// a -1 (since it is guaranteed each char is 1 byte).
memcpy((uint8_t*)result.getData(), src.getData() + start - 1, result.len);
common::StringVector::addString(
&resultValueVector, result, (const char*)(src.getData() + start - 1), length);
} else {
// For utf8 char copy, the function gets the exact starting byte position to copy from.
memcpy((uint8_t*)result.getData(), src.getData() + start, result.len);
}
if (!common::ku_string_t::isShortString(result.len)) {
memcpy(result.prefix, result.getData(), common::ku_string_t::PREFIX_LENGTH);
common::StringVector::addString(
&resultValueVector, result, (const char*)(src.getData() + start), length);
}
}
};
Expand Down

0 comments on commit 75526c9

Please sign in to comment.