Skip to content

Commit

Permalink
Fix list scan bug
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed Sep 26, 2023
1 parent 5cbb422 commit c81ae37
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 1,546 deletions.
13 changes: 5 additions & 8 deletions src/include/storage/store/var_list_node_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,13 @@ namespace storage {

struct ListOffsetInfoInStorage {
common::offset_t prevNodeListOffset;
std::unique_ptr<common::ValueVector> offsetVector;
std::vector<std::unique_ptr<common::ValueVector>> offsetVectors;

ListOffsetInfoInStorage(
common::offset_t prevNodeListOffset, std::unique_ptr<common::ValueVector> offsetVector)
: prevNodeListOffset{prevNodeListOffset}, offsetVector{std::move(offsetVector)} {}
ListOffsetInfoInStorage(common::offset_t prevNodeListOffset,
std::vector<std::unique_ptr<common::ValueVector>> offsetVectors)
: prevNodeListOffset{prevNodeListOffset}, offsetVectors{std::move(offsetVectors)} {}

inline common::offset_t getListOffset(uint64_t nodePos) const {
return nodePos == 0 ? prevNodeListOffset :
offsetVector->getValue<common::offset_t>(nodePos - 1);
}
common::offset_t getListOffset(uint64_t nodePos) const;

inline uint64_t getListLength(uint64_t nodePos) const {
return getListOffset(nodePos + 1) - getListOffset(nodePos);
Expand Down
33 changes: 28 additions & 5 deletions src/storage/store/var_list_node_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ using namespace kuzu::transaction;
namespace kuzu {
namespace storage {

common::offset_t ListOffsetInfoInStorage::getListOffset(uint64_t nodePos) const {
if (nodePos == 0) {
return prevNodeListOffset;
} else {
auto offsetVector = offsetVectors[(nodePos - 1) / common::DEFAULT_VECTOR_CAPACITY].get();
return offsetVector->getValue<common::offset_t>(
(nodePos - 1) % common::DEFAULT_VECTOR_CAPACITY);
}
}

void VarListNodeColumn::scan(Transaction* transaction, node_group_idx_t nodeGroupIdx,
offset_t startOffsetInGroup, offset_t endOffsetInGroup, ValueVector* resultVector,
uint64_t offsetInVector) {
Expand Down Expand Up @@ -151,13 +161,26 @@ offset_t VarListNodeColumn::readOffset(
ListOffsetInfoInStorage VarListNodeColumn::getListOffsetInfoInStorage(Transaction* transaction,
node_group_idx_t nodeGroupIdx, offset_t startOffsetInNodeGroup, offset_t endOffsetInNodeGroup,
std::shared_ptr<DataChunkState> state) {
auto offsetVector = std::make_unique<ValueVector>(LogicalTypeID::INT64);
offsetVector->setState(std::move(state));
NodeColumn::scan(transaction, nodeGroupIdx, startOffsetInNodeGroup, endOffsetInNodeGroup,
offsetVector.get());
auto numOffsetsToRead = endOffsetInNodeGroup - startOffsetInNodeGroup;
auto numOffsetVectors = numOffsetsToRead / DEFAULT_VECTOR_CAPACITY +
(numOffsetsToRead % DEFAULT_VECTOR_CAPACITY ? 1 : 0);
std::vector<std::unique_ptr<ValueVector>> offsetVectors;
offsetVectors.reserve(numOffsetVectors);
uint64_t numOffsetsRead = 0;
for (auto i = 0u; i < numOffsetVectors; i++) {
auto offsetVector = std::make_unique<ValueVector>(LogicalTypeID::INT64);
auto numOffsetsToReadInCurBatch =
std::min(numOffsetsToRead - numOffsetsRead, DEFAULT_VECTOR_CAPACITY);
offsetVector->setState(state);
NodeColumn::scan(transaction, nodeGroupIdx, startOffsetInNodeGroup + numOffsetsRead,
startOffsetInNodeGroup + numOffsetsRead + numOffsetsToReadInCurBatch,
offsetVector.get());
offsetVectors.push_back(std::move(offsetVector));
numOffsetsRead += numOffsetsToReadInCurBatch;
}
auto prevNodeListOffsetInStorage =
readListOffsetInStorage(transaction, nodeGroupIdx, startOffsetInNodeGroup);
return {prevNodeListOffsetInStorage, std::move(offsetVector)};
return {prevNodeListOffsetInStorage, std::move(offsetVectors)};
}

} // namespace storage
Expand Down
3 changes: 1 addition & 2 deletions third_party/miniparquet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ add_library(miniparquet STATIC
src/thrift/transport/TTransportException.cpp
src/thrift/transport/TBufferTransports.cpp
src/snappy/snappy.cc
src/snappy/snappy-sinksource.cc
src/miniparquet.cpp)
src/snappy/snappy-sinksource.cc)

target_include_directories(
miniparquet
Expand Down
4 changes: 0 additions & 4 deletions third_party/miniparquet/src/Makevars

This file was deleted.

Loading

0 comments on commit c81ae37

Please sign in to comment.