From 403e3cb1051327e3ee423af0c172127437fdfa78 Mon Sep 17 00:00:00 2001 From: Guodong Jin Date: Thu, 25 Apr 2024 15:46:35 +0800 Subject: [PATCH] add read state to string column --- src/include/storage/store/column.h | 2 +- src/include/storage/store/dictionary_column.h | 9 ++++++- src/include/storage/store/string_column.h | 3 +++ src/storage/store/dictionary_column.cpp | 18 ++++++++++---- src/storage/store/string_column.cpp | 24 ++++++++++++++----- 5 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/include/storage/store/column.h b/src/include/storage/store/column.h index 93fe62aeec..f65b50a492 100644 --- a/src/include/storage/store/column.h +++ b/src/include/storage/store/column.h @@ -47,7 +47,7 @@ class Column { uint64_t numValuesPerPage = UINT64_MAX; common::node_group_idx_t nodeGroupIdx = common::INVALID_NODE_GROUP_IDX; std::unique_ptr nullState = nullptr; - // Used for struct columns. + // Used for struct/list/string columns. std::vector childrenStates; }; diff --git a/src/include/storage/store/dictionary_column.h b/src/include/storage/store/dictionary_column.h index 446de039ec..ae28d330e4 100644 --- a/src/include/storage/store/dictionary_column.h +++ b/src/include/storage/store/dictionary_column.h @@ -8,17 +8,24 @@ namespace storage { class DictionaryColumn { public: + static constexpr common::vector_idx_t DATA_COLUMN_CHILD_READ_STATE_IDX = 0; + static constexpr common::vector_idx_t OFFSET_COLUMN_CHILD_READ_STATE_IDX = 1; + DictionaryColumn(const std::string& name, const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats stats, bool enableCompression); + void initReadState(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, + common::offset_t startOffsetInChunk, Column::ReadState& columnReadState); + void append(common::node_group_idx_t nodeGroupIdx, const DictionaryChunk& dictChunk); void scan(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, DictionaryChunk& dictChunk); // Offsets to scan should be a sorted list of pairs mapping the index of the entry in the string // dictionary (as read from the index column) to the output index in the result vector to store // the string. - void scan(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, + void scan(transaction::Transaction* transaction, const Column::ReadState& offsetState, + const Column::ReadState& dataState, std::vector>& offsetsToScan, common::ValueVector* resultVector, const ColumnChunkMetadata& indexMeta); diff --git a/src/include/storage/store/string_column.h b/src/include/storage/store/string_column.h index f49087e921..78ba7b141c 100644 --- a/src/include/storage/store/string_column.h +++ b/src/include/storage/store/string_column.h @@ -12,6 +12,9 @@ class StringColumn final : public Column { BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats propertyStatistics, bool enableCompression); + void initReadState(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, + common::offset_t startOffsetInChunk, ReadState& columnReadState) override; + void scan(transaction::Transaction* transaction, ReadState& readState, common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup, common::ValueVector* resultVector, uint64_t offsetInVector = 0) override; diff --git a/src/storage/store/dictionary_column.cpp b/src/storage/store/dictionary_column.cpp index b5f5760fe8..26ab28b5e2 100644 --- a/src/storage/store/dictionary_column.cpp +++ b/src/storage/store/dictionary_column.cpp @@ -13,7 +13,7 @@ using string_offset_t = DictionaryChunk::string_offset_t; DictionaryColumn::DictionaryColumn(const std::string& name, const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, - transaction::Transaction* transaction, RWPropertyStats stats, bool enableCompression) { + Transaction* transaction, RWPropertyStats stats, bool enableCompression) { auto dataColName = StorageUtils::getColumnName(name, StorageUtils::ColumnType::DATA, ""); dataColumn = std::make_unique(dataColName, *LogicalType::UINT8(), *metaDAHeaderInfo.childrenInfos[0], dataFH, metadataFH, bufferManager, wal, transaction, @@ -24,6 +24,16 @@ DictionaryColumn::DictionaryColumn(const std::string& name, const MetadataDAHInf stats, enableCompression, false /*requireNullColumn*/); } +void DictionaryColumn::initReadState(Transaction* transaction, node_group_idx_t nodeGroupIdx, + offset_t startOffsetInChunk, Column::ReadState& readState) { + // We put states for data and offset columns into childrenStates. + readState.childrenStates.resize(2); + dataColumn->initReadState(transaction, nodeGroupIdx, startOffsetInChunk, + readState.childrenStates[DATA_COLUMN_CHILD_READ_STATE_IDX]); + offsetColumn->initReadState(transaction, nodeGroupIdx, startOffsetInChunk, + readState.childrenStates[OFFSET_COLUMN_CHILD_READ_STATE_IDX]); +} + void DictionaryColumn::append(node_group_idx_t nodeGroupIdx, const DictionaryChunk& dictChunk) { KU_ASSERT(dictChunk.sanityCheck()); dataColumn->append(dictChunk.getStringDataChunk(), nodeGroupIdx); @@ -49,12 +59,10 @@ void DictionaryColumn::scan(Transaction* transaction, node_group_idx_t nodeGroup offsetColumn->scan(transaction, nodeGroupIdx, offsetChunk); } -void DictionaryColumn::scan(Transaction* transaction, node_group_idx_t nodeGroupIdx, +void DictionaryColumn::scan(Transaction* transaction, const Column::ReadState& offsetState, + const Column::ReadState& dataState, std::vector>& offsetsToScan, ValueVector* resultVector, const ColumnChunkMetadata& indexMeta) { - auto offsetState = offsetColumn->getReadState(transaction->getType(), nodeGroupIdx); - auto dataState = dataColumn->getReadState(transaction->getType(), nodeGroupIdx); - string_index_t firstOffsetToScan, lastOffsetToScan; auto comp = [](auto pair1, auto pair2) { return pair1.first < pair2.first; }; auto duplicationFactor = (double)offsetState.metadata.numValues / indexMeta.numValues; diff --git a/src/storage/store/string_column.cpp b/src/storage/store/string_column.cpp index 545de48de3..cd30b65997 100644 --- a/src/storage/store/string_column.cpp +++ b/src/storage/store/string_column.cpp @@ -25,6 +25,12 @@ StringColumn::StringColumn(std::string name, LogicalType dataType, dictionary{name, metaDAHeaderInfo, dataFH, metadataFH, bufferManager, wal, transaction, stats, enableCompression} {} +void StringColumn::initReadState(Transaction* transaction, node_group_idx_t nodeGroupIdx, + offset_t startOffsetInChunk, ReadState& readState) { + Column::initReadState(transaction, nodeGroupIdx, startOffsetInChunk, readState); + dictionary.initReadState(transaction, nodeGroupIdx, startOffsetInChunk, readState); +} + void StringColumn::scan(Transaction* transaction, ReadState& readState, offset_t startOffsetInGroup, offset_t endOffsetInGroup, ValueVector* resultVector, uint64_t offsetInVector) { nullColumn->scan(transaction, *readState.nullState, startOffsetInGroup, endOffsetInGroup, @@ -121,8 +127,10 @@ void StringColumn::scanUnfiltered(transaction::Transaction* transaction, ReadSta // All scanned values are null return; } - dictionary.scan(transaction, readState.nodeGroupIdx, offsetsToScan, resultVector, - readState.metadata); + dictionary.scan(transaction, + readState.childrenStates[DictionaryColumn::OFFSET_COLUMN_CHILD_READ_STATE_IDX], + readState.childrenStates[DictionaryColumn::DATA_COLUMN_CHILD_READ_STATE_IDX], offsetsToScan, + resultVector, readState.metadata); } void StringColumn::scanFiltered(transaction::Transaction* transaction, ReadState& readState, @@ -143,8 +151,10 @@ void StringColumn::scanFiltered(transaction::Transaction* transaction, ReadState // All scanned values are null return; } - dictionary.scan(transaction, readState.nodeGroupIdx, offsetsToScan, resultVector, - readState.metadata); + dictionary.scan(transaction, + readState.childrenStates[DictionaryColumn::OFFSET_COLUMN_CHILD_READ_STATE_IDX], + readState.childrenStates[DictionaryColumn::DATA_COLUMN_CHILD_READ_STATE_IDX], offsetsToScan, + resultVector, readState.metadata); } void StringColumn::lookupInternal(Transaction* transaction, ReadState& readState, @@ -166,8 +176,10 @@ void StringColumn::lookupInternal(Transaction* transaction, ReadState& readState // All scanned values are null return; } - dictionary.scan(transaction, readState.nodeGroupIdx, offsetsToScan, resultVector, - readState.metadata); + dictionary.scan(transaction, + readState.childrenStates[DictionaryColumn::OFFSET_COLUMN_CHILD_READ_STATE_IDX], + readState.childrenStates[DictionaryColumn::DATA_COLUMN_CHILD_READ_STATE_IDX], offsetsToScan, + resultVector, readState.metadata); } bool StringColumn::canCommitInPlace(transaction::Transaction* transaction,