From cbc66dbac7137d43e2182161806f3934488931ad Mon Sep 17 00:00:00 2001 From: ziyi chen Date: Wed, 2 Aug 2023 18:16:40 -0400 Subject: [PATCH] update --- src/common/vector/auxiliary_buffer.cpp | 2 +- src/include/common/vector/auxiliary_buffer.h | 2 +- src/include/common/vector/value_vector.h | 4 +- src/include/storage/copier/column_chunk.h | 30 +--- .../storage/copier/list_column_chunk.h | 14 +- .../storage/copier/struct_column_chunk.h | 2 +- .../storage/copier/var_sized_column_chunk.h | 2 +- src/include/storage/store/list_node_column.h | 30 +++- src/storage/copier/column_chunk.cpp | 96 +++++++---- src/storage/copier/list_column_chunk.cpp | 47 ++--- src/storage/copier/struct_column_chunk.cpp | 8 +- src/storage/copier/var_sized_column_chunk.cpp | 6 +- src/storage/store/list_node_column.cpp | 160 +++++++++++------- src/storage/store/node_column.cpp | 2 +- test/runner/e2e_ddl_test.cpp | 12 -- 15 files changed, 246 insertions(+), 171 deletions(-) diff --git a/src/common/vector/auxiliary_buffer.cpp b/src/common/vector/auxiliary_buffer.cpp index 3296eb736d6..beeb65614e7 100644 --- a/src/common/vector/auxiliary_buffer.cpp +++ b/src/common/vector/auxiliary_buffer.cpp @@ -33,7 +33,7 @@ list_entry_t ListAuxiliaryBuffer::addList(uint64_t listSize) { return listEntry; } -void ListAuxiliaryBuffer::reserve(uint64_t numValues) { +void ListAuxiliaryBuffer::resize(uint64_t numValues) { if (numValues <= capacity) { size = numValues; return; diff --git a/src/include/common/vector/auxiliary_buffer.h b/src/include/common/vector/auxiliary_buffer.h index 5906f8fabf0..a099b15fe2b 100644 --- a/src/include/common/vector/auxiliary_buffer.h +++ b/src/include/common/vector/auxiliary_buffer.h @@ -80,7 +80,7 @@ class ListAuxiliaryBuffer : public AuxiliaryBuffer { inline void resetSize() { size = 0; } - void reserve(uint64_t numValues); + void resize(uint64_t numValues); private: void resizeDataVector(ValueVector* dataVector); diff --git a/src/include/common/vector/value_vector.h b/src/include/common/vector/value_vector.h index 36c81162c1f..85a285bc216 100644 --- a/src/include/common/vector/value_vector.h +++ b/src/include/common/vector/value_vector.h @@ -148,8 +148,8 @@ class ListVector { return reinterpret_cast(vector->auxiliaryBuffer.get()) ->addList(listSize); } - static inline void reserveDataVector(ValueVector* vector, uint64_t numValues) { - reinterpret_cast(vector->auxiliaryBuffer.get())->reserve(numValues); + static inline void resizeDataVector(ValueVector* vector, uint64_t numValues) { + reinterpret_cast(vector->auxiliaryBuffer.get())->resize(numValues); } static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData); diff --git a/src/include/storage/copier/column_chunk.h b/src/include/storage/copier/column_chunk.h index 8ad6289615c..b3e776b4146 100644 --- a/src/include/storage/copier/column_chunk.h +++ b/src/include/storage/copier/column_chunk.h @@ -64,11 +64,9 @@ class ColumnChunk { common::BufferPoolConstants::PAGE_4KB_SIZE; } - inline uint64_t getNumElements() const { return numElements; } + virtual void writeVal(const common::Value& val, uint64_t posToWrite); - virtual void appendVal(const common::Value& val); - - virtual void reserve(uint64_t numValues); + inline uint64_t getNumBytesPerValue() const { return numBytesPerValue; } protected: ColumnChunk(common::LogicalType dataType, common::offset_t numValues, @@ -93,12 +91,12 @@ class ColumnChunk { common::offset_t getOffsetInBuffer(common::offset_t pos) const; -private: - void reserveForBuffer(uint64_t numValues); - public: std::unique_ptr buffer; +private: + virtual void resize(uint64_t numBytesToResize); + protected: common::LogicalType dataType; uint32_t numBytesPerValue; @@ -106,8 +104,6 @@ class ColumnChunk { std::unique_ptr nullChunk; std::vector> childrenChunks; const common::CopyDescription* copyDescription; - uint64_t numElements = 0; - uint64_t capacity = common::StorageConstants::NODE_GROUP_SIZE; }; class NullColumnChunk : public ColumnChunk { @@ -118,22 +114,12 @@ class NullColumnChunk : public ColumnChunk { resetNullBuffer(); } - void reserve(uint64_t numValues) override { - while (capacity < numValues) { - capacity *= 2; - } - auto reservedNumBytes = capacity * numBytesPerValue; - auto reservedBuffer = std::make_unique(reservedNumBytes); - memset(reservedBuffer.get(), 0 /* non null */, reservedNumBytes); - memcpy(reservedBuffer.get(), buffer.get(), numBytes); - buffer = std::move(reservedBuffer); - numBytes = reservedNumBytes; - } - inline void resetNullBuffer() { memset(buffer.get(), 0 /* non null */, numBytes); } inline bool isNull(common::offset_t pos) const { return getValue(pos); } inline void setNull(common::offset_t pos, bool isNull) { ((bool*)buffer.get())[pos] = isNull; } + + void resize(uint64_t numBytesToResize) final; }; class FixedListColumnChunk : public ColumnChunk { @@ -144,7 +130,7 @@ class FixedListColumnChunk : public ColumnChunk { void append(ColumnChunk* other, common::offset_t startPosInOtherChunk, common::offset_t startPosInChunk, uint32_t numValuesToAppend) final; - void appendVal(const common::Value& listVal) final; + void writeVal(const common::Value& fixedListVal, uint64_t posToWrite) final; }; struct ColumnChunkFactory { diff --git a/src/include/storage/copier/list_column_chunk.h b/src/include/storage/copier/list_column_chunk.h index 2712dcb9106..1a4cce89300 100644 --- a/src/include/storage/copier/list_column_chunk.h +++ b/src/include/storage/copier/list_column_chunk.h @@ -12,7 +12,7 @@ class ListColumnChunk : public ColumnChunk { public: ListColumnChunk(LogicalType dataType, CopyDescription* copyDescription); - inline ColumnChunk* getDataColumnChunk() const { return dataColumnChunk.get(); } + inline ColumnChunk* getDataColumnChunk() const { return dataChunk.get(); } void setValueFromString(const char* value, uint64_t length, uint64_t pos); @@ -25,14 +25,18 @@ class ListColumnChunk : public ColumnChunk { void copyListFromArrowList( arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend); - void appendVal(const common::Value& listVal) override; + void writeVal(const common::Value& listVal, uint64_t posToWrite) override; - common::page_idx_t getNumPages() const final { - return ColumnChunk::getNumPages() + dataColumnChunk->getNumPages(); + inline common::page_idx_t getNumPages() const final { + return ColumnChunk::getNumPages() + dataChunk->getNumPages(); } + void resizeDataChunk(uint64_t numValues); + private: - std::unique_ptr dataColumnChunk; + std::unique_ptr dataChunk; + uint64_t numValuesInDataChunk; + uint64_t capacityInDataChunk; }; } // namespace storage diff --git a/src/include/storage/copier/struct_column_chunk.h b/src/include/storage/copier/struct_column_chunk.h index 3a2db771819..df9e864e836 100644 --- a/src/include/storage/copier/struct_column_chunk.h +++ b/src/include/storage/copier/struct_column_chunk.h @@ -33,7 +33,7 @@ class StructColumnChunk : public ColumnChunk { common::LogicalType& type, const std::string& structString); static std::string parseStructFieldName(const std::string& structString, uint64_t& curPos); std::string parseStructFieldValue(const std::string& structString, uint64_t& curPos); - void appendVal(const common::Value& listVal) final; + void writeVal(const common::Value& val, uint64_t posToWrite) final; }; } // namespace storage diff --git a/src/include/storage/copier/var_sized_column_chunk.h b/src/include/storage/copier/var_sized_column_chunk.h index 6b6721d2763..39aae783f17 100644 --- a/src/include/storage/copier/var_sized_column_chunk.h +++ b/src/include/storage/copier/var_sized_column_chunk.h @@ -44,7 +44,7 @@ class VarSizedColumnChunk : public ColumnChunk { void appendVarListColumnChunk(VarSizedColumnChunk* other, common::offset_t startPosInOtherChunk, common::offset_t startPosInChunk, uint32_t numValuesToAppend); - void appendVal(const common::Value& val) override; + void writeVal(const common::Value& val, uint64_t posToWrite) override; private: std::unique_ptr overflowFile; diff --git a/src/include/storage/store/list_node_column.h b/src/include/storage/store/list_node_column.h index 337d96fec8d..fdefad7a5f3 100644 --- a/src/include/storage/store/list_node_column.h +++ b/src/include/storage/store/list_node_column.h @@ -27,6 +27,12 @@ class ListNodeColumn : public NodeColumn { ColumnChunk* columnChunk, common::page_idx_t startPageIdx, uint64_t nodeGroupIdx) override; private: + void scanUnfiltered(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, + common::ValueVector* offsetVector, common::ValueVector* resultVector); + + void scanFiltered(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, + common::ValueVector* offsetVector, common::ValueVector* resultVector); + void checkpointInMemory() override; void rollbackInMemory() override; @@ -34,9 +40,29 @@ class ListNodeColumn : public NodeColumn { void scanWithOffsets(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup, common::ValueVector* resultVector, - uint64_t startPosInVector) override; + uint64_t posToWriteListEntry) override; + + common::offset_t readOffset(transaction::Transaction* transaction, common::offset_t valuePos); - common::offset_t getOffset(transaction::Transaction* transaction, common::offset_t nodeOffset); + void scanListOffset(transaction::Transaction* transaction, common::offset_t startNodeOffset, + common::node_group_idx_t nodeGroupIdx, common::ValueVector* offsetVector); + + inline common::offset_t readListOffsetInStorage( + transaction::Transaction* transaction, common::offset_t nodeOffset) { + return nodeOffset == 0 ? 0 : readOffset(transaction, nodeOffset - 1); + } + + inline common::offset_t getListOffsetInStorage(transaction::Transaction* transaction, + common::ValueVector* offsetVector, common::offset_t nodeOffset, uint64_t nodePos) { + return nodePos == 0 ? readListOffsetInStorage(transaction, nodeOffset) : + offsetVector->getValue(nodePos - 1); + } + + inline uint64_t getListLength(transaction::Transaction* transaction, + common::ValueVector* offsetVector, common::offset_t nodeOffset, uint64_t nodePos) { + return getListOffsetInStorage(transaction, offsetVector, nodeOffset + 1, nodePos + 1) - + getListOffsetInStorage(transaction, offsetVector, nodeOffset, nodePos); + } private: std::unique_ptr dataNodeColumn; diff --git a/src/storage/copier/column_chunk.cpp b/src/storage/copier/column_chunk.cpp index a9c7e30e234..487c64dbf23 100644 --- a/src/storage/copier/column_chunk.cpp +++ b/src/storage/copier/column_chunk.cpp @@ -50,26 +50,6 @@ void ColumnChunk::append(ColumnChunk* other, common::offset_t startPosInOtherChu numValuesToAppend * numBytesPerValue); } -void ColumnChunk::reserve(uint64_t numValues) { - if (numValues <= capacity) { - return; - } - while (capacity < numValues) { - capacity *= 2; - } - auto reservedNumBytes = capacity * numBytesPerValue; - auto reservedBuffer = std::make_unique(reservedNumBytes); - memcpy(reservedBuffer.get(), buffer.get(), numBytes); - numBytes = reservedNumBytes; - buffer = std::move(reservedBuffer); - if (nullChunk) { - nullChunk->reserve(numValues); - } - for (auto& child : childrenChunks) { - child->reserve(numValues); - } -} - void ColumnChunk::append( arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend) { switch (array->type_id()) { @@ -126,35 +106,35 @@ void ColumnChunk::append( } } -void ColumnChunk::appendVal(const common::Value& val) { - nullChunk->setNull(numElements, val.isNull()); +void ColumnChunk::writeVal(const common::Value& val, uint64_t posToWrite) { + nullChunk->setNull(posToWrite, val.isNull()); switch (dataType.getPhysicalType()) { case PhysicalTypeID::BOOL: { - setValue(val.getValue(), numElements++); + setValue(val.getValue(), posToWrite); } break; case PhysicalTypeID::INT64: { - setValue(val.getValue(), numElements++); + setValue(val.getValue(), posToWrite); } break; case PhysicalTypeID::INT32: { - setValue(val.getValue(), numElements++); + setValue(val.getValue(), posToWrite); } break; case PhysicalTypeID::INT16: { - setValue(val.getValue(), numElements++); + setValue(val.getValue(), posToWrite); } break; case PhysicalTypeID::DOUBLE: { - setValue(val.getValue(), numElements++); + setValue(val.getValue(), posToWrite); } break; case PhysicalTypeID::FLOAT: { - setValue(val.getValue(), numElements++); + setValue(val.getValue(), posToWrite); } break; case PhysicalTypeID::INTERVAL: { - setValue(val.getValue(), numElements++); + setValue(val.getValue(), posToWrite); } break; case PhysicalTypeID::INTERNAL_ID: { - setValue(val.getValue(), numElements++); + setValue(val.getValue(), posToWrite); } break; default: { - throw NotImplementedException{"ColumnChunk::appendVal"}; + throw NotImplementedException{"ColumnChunk::writeVal"}; } } } @@ -307,11 +287,34 @@ void FixedListColumnChunk::append(ColumnChunk* other, common::offset_t startPosI } } -void FixedListColumnChunk::appendVal(const common::Value& listVal) { - assert(listVal.getDataType()->getPhysicalType() == PhysicalTypeID::FIXED_LIST); - auto numElements = NestedVal::getChildrenSize(&listVal); - for (auto i = 0u; i < numElements; i++) { - throw NotImplementedException{"here"}; +void FixedListColumnChunk::writeVal(const common::Value& fixedListVal, uint64_t posToWrite) { + assert(fixedListVal.getDataType()->getPhysicalType() == PhysicalTypeID::FIXED_LIST); + auto numValues = NestedVal::getChildrenSize(&fixedListVal); + auto numBytesPerValueInList = getDataTypeSizeInChunk(*fixedListVal.getDataType()); + auto bufferToWrite = buffer.get() + posToWrite * numBytesPerValue; + for (auto i = 0u; i < numValues; i++) { + auto val = common::NestedVal::getChildVal(&fixedListVal, i); + switch (fixedListVal.getDataType()->getPhysicalType()) { + case PhysicalTypeID::INT64: { + memcpy(bufferToWrite, &val->getValueReference(), numBytesPerValueInList); + } break; + case PhysicalTypeID::INT32: { + memcpy(bufferToWrite, &val->getValueReference(), numBytesPerValueInList); + } break; + case PhysicalTypeID::INT16: { + memcpy(bufferToWrite, &val->getValueReference(), numBytesPerValueInList); + } break; + case PhysicalTypeID::DOUBLE: { + memcpy(bufferToWrite, &val->getValueReference(), numBytesPerValueInList); + } break; + case PhysicalTypeID::FLOAT: { + memcpy(bufferToWrite, &val->getValueReference(), numBytesPerValueInList); + } break; + default: { + throw NotImplementedException{"FixedListColumnChunk::writeVal"}; + } + } + bufferToWrite += numBytesPerValueInList; } } @@ -394,5 +397,26 @@ common::offset_t ColumnChunk::getOffsetInBuffer(common::offset_t pos) const { return offsetInBuffer; } +void ColumnChunk::resize(uint64_t numBytesToResize) { + auto reservedBuffer = std::make_unique(numBytesToResize); + memcpy(reservedBuffer.get(), buffer.get(), numBytes); + numBytes = numBytesToResize; + buffer = std::move(reservedBuffer); + if (nullChunk) { + nullChunk->resize(numBytesToResize); + } + for (auto& child : childrenChunks) { + child->resize(numBytesToResize); + } +} + +void NullColumnChunk::resize(uint64_t numBytesToResize) { + auto reservedBuffer = std::make_unique(numBytesToResize); + memset(reservedBuffer.get(), 0 /* non null */, numBytesToResize); + memcpy(reservedBuffer.get(), buffer.get(), numBytes); + buffer = std::move(reservedBuffer); + numBytes = numBytesToResize; +} + } // namespace storage } // namespace kuzu diff --git a/src/storage/copier/list_column_chunk.cpp b/src/storage/copier/list_column_chunk.cpp index bc2e214feb9..90d5981f2a6 100644 --- a/src/storage/copier/list_column_chunk.cpp +++ b/src/storage/copier/list_column_chunk.cpp @@ -9,8 +9,10 @@ namespace storage { ListColumnChunk::ListColumnChunk(LogicalType dataType, CopyDescription* copyDescription) : ColumnChunk{std::move(dataType), copyDescription, true /* hasNullChunk */} { assert(this->dataType.getPhysicalType() == PhysicalTypeID::VAR_LIST); - dataColumnChunk = ColumnChunkFactory::createColumnChunk( + dataChunk = ColumnChunkFactory::createColumnChunk( *VarListType::getChildType(&this->dataType), copyDescription); + numValuesInDataChunk = 0; + capacityInDataChunk = StorageConstants::NODE_GROUP_SIZE; } void ListColumnChunk::append( @@ -38,55 +40,58 @@ void ListColumnChunk::copyListFromArrowString( auto posInChunk = startPosInChunk + i; if (arrayData->IsNull(i)) { nullChunk->setNull(posInChunk, true); - setValue((int64_t)dataColumnChunk->getNumElements(), posInChunk); + setValue(numValuesInDataChunk, posInChunk); continue; } auto value = stringArray->GetView(i); auto listVal = TableCopyUtils::getArrowVarList( value.data(), 1, value.size() - 2, dataType, *copyDescription); - auto numValuesInList = NestedVal::getChildrenSize(listVal.get()); - for (auto j = 0u; j < numValuesInList; j++) { - dataColumnChunk->appendVal(*NestedVal::getChildVal(listVal.get(), j)); - } - setValue(dataColumnChunk->getNumElements(), posInChunk); + writeVal(*listVal, posInChunk); } } else { for (auto i = 0u; i < numValuesToAppend; i++) { auto value = stringArray->GetView(i); + auto posInChunk = startPosInChunk + i; auto listVal = TableCopyUtils::getArrowVarList( value.data(), 1, value.size() - 2, dataType, *copyDescription); - auto numValuesInList = NestedVal::getChildrenSize(listVal.get()); - dataColumnChunk->reserve(dataColumnChunk->getNumElements() + numValuesInList); - for (auto j = 0u; j < numValuesInList; j++) { - dataColumnChunk->appendVal(*NestedVal::getChildVal(listVal.get(), j)); - } - setValue(dataColumnChunk->getNumElements(), startPosInChunk + i); + writeVal(*listVal, posInChunk); } } } void ListColumnChunk::copyListFromArrowList( arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend) { - assert(false); + auto listArray = (arrow::ListArray*)array; + append(listArray->offsets().get(), startPosInChunk, numValuesToAppend); + auto startOffset = listArray->value_offset(startPosInChunk); + auto endOffset = listArray->value_offset(startPosInChunk + numValuesToAppend); + dataChunk->append(listArray->offsets().get(), startOffset, endOffset - startOffset); } -void ListColumnChunk::appendVal(const common::Value& listVal) { +void ListColumnChunk::writeVal(const common::Value& listVal, uint64_t posToWrite) { assert(listVal.getDataType()->getPhysicalType() == PhysicalTypeID::VAR_LIST); auto numValuesInList = NestedVal::getChildrenSize(&listVal); + resizeDataChunk(numValuesInDataChunk + numValuesInList); for (auto i = 0u; i < numValuesInList; i++) { - dataColumnChunk->appendVal(*NestedVal::getChildVal(&listVal, i)); + dataChunk->writeVal(*NestedVal::getChildVal(&listVal, i), numValuesInDataChunk); + numValuesInDataChunk++; } - setValue((int64_t)dataColumnChunk->getNumElements(), numElements++); + setValue(numValuesInDataChunk, posToWrite); } void ListColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) { auto listVal = TableCopyUtils::getArrowVarList(value, 1, length - 2, dataType, *copyDescription); - auto numValuesInList = NestedVal::getChildrenSize(listVal.get()); - for (auto j = 0u; j < numValuesInList; j++) { - dataColumnChunk->appendVal(*NestedVal::getChildVal(listVal.get(), j)); + writeVal(*listVal, pos); +} + +void ListColumnChunk::resizeDataChunk(uint64_t numValues) { + if (numValues <= capacityInDataChunk) { + return; + } + while (capacityInDataChunk < numValues) { + capacityInDataChunk *= 2; } - setValue((int64_t)dataColumnChunk->getNumElements(), pos); } } // namespace storage diff --git a/src/storage/copier/struct_column_chunk.cpp b/src/storage/copier/struct_column_chunk.cpp index be6a54cd7e4..9910dbc497f 100644 --- a/src/storage/copier/struct_column_chunk.cpp +++ b/src/storage/copier/struct_column_chunk.cpp @@ -227,11 +227,11 @@ std::string StructColumnChunk::parseStructFieldValue( } } -void StructColumnChunk::appendVal(const common::Value& listVal) { - assert(listVal.getDataType()->getPhysicalType() == PhysicalTypeID::STRUCT); - auto numElements = NestedVal::getChildrenSize(&listVal); +void StructColumnChunk::writeVal(const common::Value& val, uint64_t posToWrite) { + assert(val.getDataType()->getPhysicalType() == PhysicalTypeID::STRUCT); + auto numElements = NestedVal::getChildrenSize(&val); for (auto i = 0u; i < numElements; i++) { - childrenChunks[i]->appendVal(*NestedVal::getChildVal(&listVal, i)); + childrenChunks[i]->writeVal(*NestedVal::getChildVal(&val, i), posToWrite); } } diff --git a/src/storage/copier/var_sized_column_chunk.cpp b/src/storage/copier/var_sized_column_chunk.cpp index 940e23fe7ba..b6cd3257ebd 100644 --- a/src/storage/copier/var_sized_column_chunk.cpp +++ b/src/storage/copier/var_sized_column_chunk.cpp @@ -203,11 +203,11 @@ void VarSizedColumnChunk::copyValuesFromVarList( } } -void VarSizedColumnChunk::appendVal(const common::Value& val) { +void VarSizedColumnChunk::writeVal(const common::Value& val, uint64_t posToWrite) { assert(val.getDataType()->getPhysicalType() == PhysicalTypeID::STRING); - nullChunk->setNull(numElements, val.isNull()); + nullChunk->setNull(posToWrite, val.isNull()); auto strVal = val.getValue(); - setValueFromString(strVal.c_str(), strVal.length(), numElements++); + setValueFromString(strVal.c_str(), strVal.length(), posToWrite); } } // namespace storage diff --git a/src/storage/store/list_node_column.cpp b/src/storage/store/list_node_column.cpp index 07790248898..c48792ca1d9 100644 --- a/src/storage/store/list_node_column.cpp +++ b/src/storage/store/list_node_column.cpp @@ -11,57 +11,26 @@ namespace storage { void ListNodeColumn::scanInternal(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, common::ValueVector* resultVector) { resultVector->resetAuxiliaryBuffer(); - // Prepare offset for scan. auto offsetVector = std::make_unique(LogicalTypeID::INT64); offsetVector->setState(resultVector->state); - auto startNodeOffset = nodeIDVector->readNodeOffset(0); auto nodeGroupIdx = getNodeGroupIdxFromNodeOffset(startNodeOffset); - - auto startNodeOffsetInGroup = - startNodeOffset - (nodeGroupIdx << StorageConstants::NODE_GROUP_SIZE_LOG2); - auto pageCursor = - PageUtils::getPageElementCursorForPos(startNodeOffsetInGroup, numValuesPerPage); - auto chunkMeta = metadataDA->get(nodeGroupIdx, transaction->getType()); - pageCursor.pageIdx += chunkMeta.pageIdx; - NodeColumn::scanUnfiltered( - transaction, pageCursor, nodeIDVector->state->originalSize, offsetVector.get()); - - offset_t baseOffset = 0; - for (auto i = 0u; i < resultVector->state->selVector->selectedSize; i++) { - auto pos = resultVector->state->selVector->selectedPositions[i]; - auto nodeID = nodeIDVector->readNodeOffset(pos); - auto lastListOffset = i == 0 ? (nodeID == 0 ? 0 : getOffset(transaction, nodeID - 1)) : - offsetVector->getValue(pos - 1); - auto curListOffset = offsetVector->getValue(pos); - auto listLen = curListOffset - lastListOffset; - resultVector->setValue(pos, list_entry_t{(offset_t)baseOffset, (uint64_t)listLen}); - ListVector::reserveDataVector(resultVector, baseOffset + listLen); - dataNodeColumn->scanWithOffsets(transaction, nodeGroupIdx, lastListOffset, curListOffset, - ListVector::getDataVector(resultVector), baseOffset); - baseOffset += listLen; + scanListOffset(transaction, startNodeOffset, nodeGroupIdx, offsetVector.get()); + if (resultVector->state->selVector->isUnfiltered()) { + scanUnfiltered(transaction, nodeIDVector, offsetVector.get(), resultVector); + } else { + scanFiltered(transaction, nodeIDVector, offsetVector.get(), resultVector); } } -offset_t ListNodeColumn::getOffset(transaction::Transaction* transaction, offset_t nodeOffset) { - auto offsetVector = std::make_unique(common::LogicalTypeID::INT64); - auto nodeGroupIdx = getNodeGroupIdxFromNodeOffset(nodeOffset); - offsetVector->state = DataChunkState::getSingleValueDataChunkState(); - auto pageCursor = PageUtils::getPageElementCursorForPos(nodeOffset, numValuesPerPage); - pageCursor.pageIdx += metadataDA->get(nodeGroupIdx, transaction->getType()).pageIdx; - readFromPage(transaction, pageCursor.pageIdx, [&](uint8_t* frame) -> void { - readNodeColumnFunc( - frame, pageCursor, offsetVector.get(), 0 /* posInVector */, 1 /* numValuesToRead */); - }); - return offsetVector->getValue(0); -} - -void ListNodeColumn::lookupValue(transaction::Transaction* transaction, common::offset_t nodeOffset, common::ValueVector* resultVector, uint32_t posInVector) { - auto listOffset = nodeOffset == 0 ? 0 : getOffset(transaction, nodeOffset - 1); - auto length = getOffset(transaction, nodeOffset) - listOffset; +void ListNodeColumn::lookupValue(transaction::Transaction* transaction, common::offset_t nodeOffset, + common::ValueVector* resultVector, uint32_t posInVector) { + auto listOffset = readListOffsetInStorage(transaction, nodeOffset); + auto length = readListOffsetInStorage(transaction, nodeOffset + 1) - + readListOffsetInStorage(transaction, nodeOffset); auto offsetInVector = posInVector == 0 ? 0 : resultVector->getValue(posInVector - 1); - resultVector->setValue(posInVector, list_entry_t{offsetInVector, (uint64_t)length}); - ListVector::reserveDataVector(resultVector, offsetInVector + length); + resultVector->setValue(posInVector, list_entry_t{offsetInVector, length}); + ListVector::resizeDataVector(resultVector, offsetInVector + length); dataNodeColumn->scanWithOffsets(transaction, getNodeGroupIdxFromNodeOffset(nodeOffset), listOffset, listOffset + length, ListVector::getDataVector(resultVector), offsetInVector); } @@ -75,6 +44,54 @@ page_idx_t ListNodeColumn::append( return numPagesFlushed + numPagesForDataColumn; } +void ListNodeColumn::scanUnfiltered(transaction::Transaction* transaction, + ValueVector* nodeIDVector, ValueVector* offsetVector, ValueVector* resultVector) { + auto startNodeOffset = nodeIDVector->readNodeOffset(0); + auto nodeGroupIdx = getNodeGroupIdxFromNodeOffset(startNodeOffset); + auto numValuesToScan = resultVector->state->selVector->selectedSize; + offset_t offsetInVector = 0; + for (auto i = 0u; i < numValuesToScan; i++) { + auto nodeOffset = nodeIDVector->readNodeOffset(i); + auto listLen = getListLength(transaction, offsetVector, nodeOffset, i); + resultVector->setValue(i, list_entry_t{offsetInVector, listLen}); + offsetInVector += listLen; + } + ListVector::resizeDataVector(resultVector, offsetInVector); + auto startListOffsetInStorage = + getListOffsetInStorage(transaction, offsetVector, startNodeOffset, 0); + auto endListOffsetInStorage = getListOffsetInStorage( + transaction, offsetVector, nodeIDVector->readNodeOffset(numValuesToScan), numValuesToScan); + dataNodeColumn->scanWithOffsets(transaction, nodeGroupIdx, startListOffsetInStorage, + endListOffsetInStorage, ListVector::getDataVector(resultVector)); +} + +void ListNodeColumn::scanFiltered(transaction::Transaction* transaction, + common::ValueVector* nodeIDVector, common::ValueVector* offsetVector, + common::ValueVector* resultVector) { + auto nodeGroupIdx = getNodeGroupIdxFromNodeOffset(nodeIDVector->readNodeOffset(0)); + offset_t listOffset = 0; + for (auto i = 0u; i < resultVector->state->selVector->selectedSize; i++) { + auto pos = resultVector->state->selVector->selectedPositions[i]; + auto nodeOffset = nodeIDVector->readNodeOffset(pos); + auto listLen = getListLength(transaction, offsetVector, nodeOffset, pos); + resultVector->setValue(pos, list_entry_t{(offset_t)listOffset, (uint64_t)listLen}); + listOffset += listLen; + } + ListVector::resizeDataVector(resultVector, listOffset); + listOffset = 0; + for (auto i = 0u; i < resultVector->state->selVector->selectedSize; i++) { + auto pos = resultVector->state->selVector->selectedPositions[i]; + auto nodeOffset = nodeIDVector->readNodeOffset(pos); + auto startOffsetInStorageToScan = + getListOffsetInStorage(transaction, offsetVector, nodeOffset, pos); + auto endOffsetInStorageToScan = + getListOffsetInStorage(transaction, offsetVector, nodeOffset + 1, pos + 1); + dataNodeColumn->scanWithOffsets(transaction, nodeGroupIdx, startOffsetInStorageToScan, + endOffsetInStorageToScan, ListVector::getDataVector(resultVector), listOffset); + listOffset += resultVector->getValue(pos).size; + } +} + void ListNodeColumn::checkpointInMemory() { NodeColumn::checkpointInMemory(); dataNodeColumn->checkpointInMemory(); @@ -88,28 +105,53 @@ void ListNodeColumn::rollbackInMemory() { void ListNodeColumn::scanWithOffsets(transaction::Transaction* transaction, node_group_idx_t nodeGroupIdx, common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup, common::ValueVector* resultVector, - uint64_t startPosInVector) { + uint64_t posToWriteListEntry) { auto offsetVector = std::make_unique(LogicalTypeID::INT64); NodeColumn::scanWithOffsets( transaction, nodeGroupIdx, startOffsetInGroup, endOffsetInGroup, offsetVector.get()); - auto beginOffset = startOffsetInGroup == 0 ? 0 : getOffset(transaction, startOffsetInGroup - 1); - offset_t baseOffset = startPosInVector == 0 ? - 0 : - resultVector->getValue(startPosInVector - 1).offset + - resultVector->getValue(startPosInVector - 1).size; - auto posToWrite = baseOffset; + offset_t listOffset = + posToWriteListEntry == 0 ? + 0 : + resultVector->getValue(posToWriteListEntry - 1).offset + + resultVector->getValue(posToWriteListEntry - 1).size; + auto posToWriteListData = listOffset; auto numValues = endOffsetInGroup - startOffsetInGroup; for (auto i = 0u; i < numValues; i++) { - auto lastNodeOffset = i == 0 ? beginOffset : offsetVector->getValue(i - 1); - auto curNodeOffset = offsetVector->getValue(i); - auto length = curNodeOffset - lastNodeOffset; - resultVector->setValue(i + startPosInVector, list_entry_t{baseOffset, (uint64_t)length}); - ListVector::reserveDataVector(resultVector, baseOffset + length); - baseOffset += length; + auto length = getListLength(transaction, offsetVector.get(), startOffsetInGroup + i, i); + resultVector->setValue(i + posToWriteListEntry, list_entry_t{listOffset, length}); + listOffset += length; } - dataNodeColumn->scanWithOffsets(transaction, nodeGroupIdx, beginOffset, - offsetVector->getValue(numValues - 1), ListVector::getDataVector(resultVector), - posToWrite); + ListVector::resizeDataVector(resultVector, listOffset); + dataNodeColumn->scanWithOffsets(transaction, nodeGroupIdx, + getListOffsetInStorage(transaction, offsetVector.get(), startOffsetInGroup, 0), + getListOffsetInStorage(transaction, offsetVector.get(), endOffsetInGroup, numValues), + ListVector::getDataVector(resultVector), posToWriteListData); +} + +offset_t ListNodeColumn::readOffset( + transaction::Transaction* transaction, common::offset_t valuePos) { + auto offsetVector = std::make_unique(common::LogicalTypeID::INT64); + auto nodeGroupIdx = getNodeGroupIdxFromNodeOffset(valuePos); + offsetVector->state = DataChunkState::getSingleValueDataChunkState(); + auto pageCursor = PageUtils::getPageElementCursorForPos(valuePos, numValuesPerPage); + pageCursor.pageIdx += metadataDA->get(nodeGroupIdx, transaction->getType()).pageIdx; + readFromPage(transaction, pageCursor.pageIdx, [&](uint8_t* frame) -> void { + readNodeColumnFunc( + frame, pageCursor, offsetVector.get(), 0 /* posInVector */, 1 /* numValuesToRead */); + }); + return offsetVector->getValue(0); +} + +void ListNodeColumn::scanListOffset(transaction::Transaction* transaction, + common::offset_t startNodeOffset, common::node_group_idx_t nodeGroupIdx, + common::ValueVector* offsetVector) { + auto startNodeOffsetInGroup = + startNodeOffset - (nodeGroupIdx << common::StorageConstants::NODE_GROUP_SIZE_LOG2); + auto pageCursor = + PageUtils::getPageElementCursorForPos(startNodeOffsetInGroup, numValuesPerPage); + pageCursor.pageIdx += metadataDA->get(nodeGroupIdx, transaction->getType()).pageIdx; + NodeColumn::scanUnfiltered( + transaction, pageCursor, offsetVector->state->originalSize, offsetVector); } } // namespace storage diff --git a/src/storage/store/node_column.cpp b/src/storage/store/node_column.cpp index d8e8bb81730..9f7f7e8d598 100644 --- a/src/storage/store/node_column.cpp +++ b/src/storage/store/node_column.cpp @@ -473,7 +473,7 @@ std::unique_ptr NodeColumnFactory::createNodeColumn(const LogicalTyp throw NotImplementedException("NodeColumnFactory::createNodeColumn"); } } -} // namespace storage +} } // namespace storage } // namespace kuzu diff --git a/test/runner/e2e_ddl_test.cpp b/test/runner/e2e_ddl_test.cpp index ce18ad3b835..327c89127ae 100644 --- a/test/runner/e2e_ddl_test.cpp +++ b/test/runner/e2e_ddl_test.cpp @@ -832,17 +832,5 @@ TEST_F(TinySnbDDLTest, RenamePropertyRecovery) { renameProperty(TransactionTestType::RECOVERY); } -class TinyLoadTest : public DBTest { - -public: - std::string getInputDir() override { - return TestHelper::appendKuzuRootPath("dataset/shortest-path-tests/"); - } -}; - -TEST_F(TinyLoadTest, dasd) { - printf("%s", conn->query("MATCH (p:person) RETURN p.usedNames limit 500")->toString().c_str()); -} - } // namespace testing } // namespace kuzu