From 05195ed675a77f5a9818e28123a8beb31bb35c2a Mon Sep 17 00:00:00 2001 From: xiyang Date: Sun, 2 Jul 2023 13:10:15 -0400 Subject: [PATCH] Refactor scan rel table --- .../operator/scan/generic_scan_rel_tables.h | 66 ++++---- .../processor/operator/scan/scan_rel_table.h | 58 +++++-- .../operator/scan/scan_rel_table_columns.h | 27 ++-- .../operator/scan/scan_rel_table_lists.h | 27 ++-- src/processor/mapper/map_extend.cpp | 142 ++++++++---------- .../operator/scan/generic_scan_rel_tables.cpp | 101 +++++++------ .../operator/scan/scan_rel_table.cpp | 8 +- .../operator/scan/scan_rel_table_columns.cpp | 16 +- .../operator/scan/scan_rel_table_lists.cpp | 16 +- 9 files changed, 242 insertions(+), 219 deletions(-) diff --git a/src/include/processor/operator/scan/generic_scan_rel_tables.h b/src/include/processor/operator/scan/generic_scan_rel_tables.h index a4fedd7711..93d699c8c5 100644 --- a/src/include/processor/operator/scan/generic_scan_rel_tables.h +++ b/src/include/processor/operator/scan/generic_scan_rel_tables.h @@ -7,60 +7,54 @@ namespace kuzu { namespace processor { -class RelTableDataCollection { +class RelTableCollectionScanner { public: - RelTableDataCollection(std::vector relTableDatas, - std::vector> tableScanStates) - : relTableDatas{std::move(relTableDatas)}, tableScanStates{std::move(tableScanStates)} {} + RelTableCollectionScanner(std::vector> scanInfos) + : scanInfos{std::move(scanInfos)} {} - void resetState(); - inline uint32_t getNumTablesInCollection() { return relTableDatas.size(); } + inline void resetState() { + currentTableIdx = 0; + nextTableIdx = 0; + } + void init(); bool scan(common::ValueVector* inVector, const std::vector& outputVectors, transaction::Transaction* transaction); - std::unique_ptr clone() const; + std::unique_ptr clone() const; private: - std::vector relTableDatas; - std::vector> tableScanStates; - - uint32_t currentRelTableIdxToScan = UINT32_MAX; - uint32_t nextRelTableIdxToScan = 0; + std::vector> scanInfos; + std::vector> scanStates; + uint32_t currentTableIdx = UINT32_MAX; + uint32_t nextTableIdx = 0; }; -class GenericScanRelTables : public ScanRelTable { +class ScanMultiRelTable : public ScanRelTable { + using node_table_id_scanner_map_t = + std::unordered_map>; + public: - GenericScanRelTables(const DataPos& inNodeIDVectorPos, std::vector outputVectorsPos, - std::unordered_map> - relTableCollectionPerNodeTable, - std::unique_ptr child, uint32_t id, const std::string& paramsString) - : ScanRelTable{inNodeIDVectorPos, std::move(outputVectorsPos), - PhysicalOperatorType::GENERIC_SCAN_REL_TABLES, std::move(child), id, paramsString}, - relTableCollectionPerNodeTable{std::move(relTableCollectionPerNodeTable)} {} + ScanMultiRelTable(std::unique_ptr posInfo, + node_table_id_scanner_map_t scannerPerNodeTable, std::unique_ptr child, + uint32_t id, const std::string& paramsString) + : ScanRelTable{std::move(posInfo), PhysicalOperatorType::GENERIC_SCAN_REL_TABLES, + std::move(child), id, paramsString}, + scannerPerNodeTable{std::move(scannerPerNodeTable)} {} - void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; + void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) final; - bool getNextTuplesInternal(ExecutionContext* context) override; + bool getNextTuplesInternal(ExecutionContext* context) final; - std::unique_ptr clone() override { - std::unordered_map> - clonedCollections; - for (auto& [tableID, propertyCollection] : relTableCollectionPerNodeTable) { - clonedCollections.insert({tableID, propertyCollection->clone()}); - } - return make_unique(inNodeIDVectorPos, outputVectorsPos, - std::move(clonedCollections), children[0]->clone(), id, paramsString); - } + std::unique_ptr clone() final; private: - bool scanCurrentRelTableDataCollection(); - void initCurrentRelTableDataCollection(const common::nodeID_t& nodeID); + void resetState(); + void initCurrentScanner(const common::nodeID_t& nodeID); private: - std::unordered_map> - relTableCollectionPerNodeTable; - RelTableDataCollection* currentRelTableDataCollection = nullptr; + node_table_id_scanner_map_t scannerPerNodeTable; + RelTableCollectionScanner* currentScanner = nullptr; }; } // namespace processor diff --git a/src/include/processor/operator/scan/scan_rel_table.h b/src/include/processor/operator/scan/scan_rel_table.h index e165464756..49c5052b16 100644 --- a/src/include/processor/operator/scan/scan_rel_table.h +++ b/src/include/processor/operator/scan/scan_rel_table.h @@ -6,23 +6,59 @@ namespace kuzu { namespace processor { +struct ScanRelTalePosInfo { + DataPos inNodeVectorPos; + DataPos outNodeVectorPos; + std::vector outVectorsPos; + + ScanRelTalePosInfo(const DataPos& inNodeVectorPos, const DataPos& outNodeVectorPos, + std::vector outVectorsPos) + : inNodeVectorPos{inNodeVectorPos}, outNodeVectorPos{outNodeVectorPos}, + outVectorsPos{std::move(outVectorsPos)} {} + ScanRelTalePosInfo(const ScanRelTalePosInfo& other) + : inNodeVectorPos{other.inNodeVectorPos}, outNodeVectorPos{other.outNodeVectorPos}, + outVectorsPos{other.outVectorsPos} {} + + inline std::unique_ptr copy() const { + return std::make_unique(*this); + } +}; + +struct RelTableScanInfo { + storage::RelTableDataType relTableDataType; + storage::DirectedRelTableData* tableData; + storage::RelStatistics* relStats; + std::vector propertyIds; + + RelTableScanInfo(storage::RelTableDataType relTableDataType, + storage::DirectedRelTableData* tableData, storage::RelStatistics* relStats, + std::vector propertyIds) + : relTableDataType{relTableDataType}, tableData{tableData}, relStats{relStats}, + propertyIds{std::move(propertyIds)} {} + RelTableScanInfo(const RelTableScanInfo& other) + : relTableDataType{other.relTableDataType}, tableData{other.tableData}, + relStats{other.relStats}, propertyIds{other.propertyIds} {} + + inline std::unique_ptr copy() const { + return std::make_unique(*this); + } +}; + class ScanRelTable : public PhysicalOperator { protected: - ScanRelTable(const DataPos& inNodeIDVectorPos, std::vector outputVectorsPos, - PhysicalOperatorType operatorType, std::unique_ptr child, uint32_t id, - const std::string& paramsString) - : PhysicalOperator{operatorType, std::move(child), id, paramsString}, - inNodeIDVectorPos{inNodeIDVectorPos}, outputVectorsPos{std::move(outputVectorsPos)} {} + ScanRelTable(std::unique_ptr posInfo, PhysicalOperatorType operatorType, + std::unique_ptr child, uint32_t id, const std::string& paramsString) + : PhysicalOperator{operatorType, std::move(child), id, paramsString}, posInfo{std::move( + posInfo)} {} void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; protected: - // vector positions - DataPos inNodeIDVectorPos; - std::vector outputVectorsPos; - // vectors - common::ValueVector* inNodeIDVector; - std::vector outputVectors; + std::unique_ptr posInfo; + + common::ValueVector* inNodeVector; + common::ValueVector* outNodeVector; + std::vector outVectors; }; } // namespace processor diff --git a/src/include/processor/operator/scan/scan_rel_table_columns.h b/src/include/processor/operator/scan/scan_rel_table_columns.h index 22684faedd..08df961412 100644 --- a/src/include/processor/operator/scan/scan_rel_table_columns.h +++ b/src/include/processor/operator/scan/scan_rel_table_columns.h @@ -9,27 +9,24 @@ namespace processor { class ScanRelTableColumns : public ScanRelTable, public SelVectorOverWriter { public: - ScanRelTableColumns(storage::DirectedRelTableData* tableData, storage::RelStatistics* relStats, - std::vector propertyIds, const DataPos& inNodeIDVectorPos, - std::vector outputVectorsPos, std::unique_ptr child, uint32_t id, - const std::string& paramsString) - : ScanRelTable{inNodeIDVectorPos, std::move(outputVectorsPos), - PhysicalOperatorType::SCAN_REL_TABLE_COLUMNS, std::move(child), id, paramsString}, - tableData{tableData} { - scanState = std::make_unique( - relStats, std::move(propertyIds), storage::RelTableDataType::COLUMNS); - } + ScanRelTableColumns(std::unique_ptr scanInfo, + std::unique_ptr posInfo, std::unique_ptr child, + uint32_t id, const std::string& paramsString) + : ScanRelTable{std::move(posInfo), PhysicalOperatorType::SCAN_REL_TABLE_COLUMNS, + std::move(child), id, paramsString}, + scanInfo{std::move(scanInfo)} {} + + void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) final; - bool getNextTuplesInternal(ExecutionContext* context) override; + bool getNextTuplesInternal(ExecutionContext* context) final; inline std::unique_ptr clone() override { - return std::make_unique(tableData, scanState->relStats, - scanState->propertyIds, inNodeIDVectorPos, outputVectorsPos, children[0]->clone(), id, - paramsString); + return std::make_unique( + scanInfo->copy(), posInfo->copy(), children[0]->clone(), id, paramsString); } private: - storage::DirectedRelTableData* tableData; + std::unique_ptr scanInfo; std::unique_ptr scanState; }; diff --git a/src/include/processor/operator/scan/scan_rel_table_lists.h b/src/include/processor/operator/scan/scan_rel_table_lists.h index f92b21eae8..a9b650260b 100644 --- a/src/include/processor/operator/scan/scan_rel_table_lists.h +++ b/src/include/processor/operator/scan/scan_rel_table_lists.h @@ -8,27 +8,24 @@ namespace processor { class ScanRelTableLists : public ScanRelTable { public: - ScanRelTableLists(storage::DirectedRelTableData* tableData, storage::RelStatistics* relStats, - std::vector propertyIds, const DataPos& inNodeIDVectorPos, - std::vector outputVectorsPos, std::unique_ptr child, uint32_t id, - const std::string& paramsString) - : ScanRelTable{inNodeIDVectorPos, std::move(outputVectorsPos), - PhysicalOperatorType::SCAN_REL_TABLE_LISTS, std::move(child), id, paramsString}, - tableData{tableData} { - scanState = std::make_unique( - relStats, std::move(propertyIds), storage::RelTableDataType::LISTS); - } + ScanRelTableLists(std::unique_ptr scanInfo, + std::unique_ptr posInfo, std::unique_ptr child, + uint32_t id, const std::string& paramsString) + : ScanRelTable{std::move(posInfo), PhysicalOperatorType::SCAN_REL_TABLE_LISTS, + std::move(child), id, paramsString}, + scanInfo{std::move(scanInfo)} {} + + void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) final; - bool getNextTuplesInternal(ExecutionContext* context) override; + bool getNextTuplesInternal(ExecutionContext* context) final; inline std::unique_ptr clone() override { - return make_unique(tableData, scanState->relStats, - scanState->propertyIds, inNodeIDVectorPos, outputVectorsPos, children[0]->clone(), id, - paramsString); + return make_unique( + scanInfo->copy(), posInfo->copy(), children[0]->clone(), id, paramsString); } private: - storage::DirectedRelTableData* tableData; + std::unique_ptr scanInfo; std::unique_ptr scanState; }; diff --git a/src/processor/mapper/map_extend.cpp b/src/processor/mapper/map_extend.cpp index 7aba897578..cd061710e1 100644 --- a/src/processor/mapper/map_extend.cpp +++ b/src/processor/mapper/map_extend.cpp @@ -12,24 +12,11 @@ using namespace kuzu::storage; namespace kuzu { namespace processor { -static std::vector populatePropertyIds( - table_id_t relID, const expression_vector& properties) { - std::vector outputColumns; - for (auto& expression : properties) { - auto propertyExpression = (PropertyExpression*)expression.get(); - outputColumns.push_back(propertyExpression->getPropertyID(relID)); - } - return outputColumns; -} - -static std::pair> -getRelTableDataAndScanState(RelDataDirection direction, catalog::RelTableSchema* relTableSchema, - table_id_t boundNodeTableID, RelsStore& relsStore, table_id_t relTableID, - const expression_vector& properties) { - if (relTableSchema->getBoundTableID(direction) != boundNodeTableID) { - // No data stored for given direction and boundNode. - return std::make_pair(nullptr, nullptr); - } +static std::unique_ptr getRelTableScanInfo(RelDataDirection direction, + RelsStore& relsStore, table_id_t relTableID, const expression_vector& properties) { + auto relTableDataType = relsStore.isSingleMultiplicityInDirection(direction, relTableID) ? + RelTableDataType::COLUMNS : + RelTableDataType::LISTS; auto relData = relsStore.getRelTable(relTableID)->getDirectedTableData(direction); std::vector propertyIds; for (auto& property : properties) { @@ -39,59 +26,59 @@ getRelTableDataAndScanState(RelDataDirection direction, catalog::RelTableSchema* INVALID_PROPERTY_ID); } auto relStats = relsStore.getRelsStatistics().getRelStatistics(relTableID); - auto scanState = make_unique(relStats, std::move(propertyIds), - relsStore.isSingleMultiplicityInDirection(direction, relTableID) ? - RelTableDataType::COLUMNS : - RelTableDataType::LISTS); - return std::make_pair(relData, std::move(scanState)); + return std::make_unique( + relTableDataType, relData, relStats, std::move(propertyIds)); } -static std::unique_ptr populateRelTableDataCollection( +static std::unique_ptr populateRelTableCollectionScanner( table_id_t boundNodeTableID, const RelExpression& rel, ExtendDirection extendDirection, const expression_vector& properties, RelsStore& relsStore, const catalog::Catalog& catalog) { - std::vector relTableDatas; - std::vector> tableScanStates; + std::vector> scanInfos; for (auto relTableID : rel.getTableIDs()) { auto relTableSchema = catalog.getReadOnlyVersion()->getRelTableSchema(relTableID); switch (extendDirection) { case ExtendDirection::FWD: { - auto [relTableData, scanState] = getRelTableDataAndScanState(RelDataDirection::FWD, - relTableSchema, boundNodeTableID, relsStore, relTableID, properties); - if (relTableData != nullptr && scanState != nullptr) { - relTableDatas.push_back(relTableData); - tableScanStates.push_back(std::move(scanState)); + if (relTableSchema->getBoundTableID(RelDataDirection::FWD) == boundNodeTableID) { + auto scanInfo = + getRelTableScanInfo(RelDataDirection::FWD, relsStore, relTableID, properties); + if (scanInfo != nullptr) { + scanInfos.push_back(std::move(scanInfo)); + } } } break; case ExtendDirection::BWD: { - auto [relTableData, scanState] = getRelTableDataAndScanState(RelDataDirection::BWD, - relTableSchema, boundNodeTableID, relsStore, relTableID, properties); - if (relTableData != nullptr && scanState != nullptr) { - relTableDatas.push_back(relTableData); - tableScanStates.push_back(std::move(scanState)); + if (relTableSchema->getBoundTableID(RelDataDirection::BWD) == boundNodeTableID) { + auto scanInfo = + getRelTableScanInfo(RelDataDirection::BWD, relsStore, relTableID, properties); + if (scanInfo != nullptr) { + scanInfos.push_back(std::move(scanInfo)); + } } } break; case ExtendDirection::BOTH: { - auto [relTableDataFWD, scanStateFWD] = - getRelTableDataAndScanState(RelDataDirection::FWD, relTableSchema, boundNodeTableID, - relsStore, relTableID, properties); - if (relTableDataFWD != nullptr && scanStateFWD != nullptr) { - relTableDatas.push_back(relTableDataFWD); - tableScanStates.push_back(std::move(scanStateFWD)); + if (relTableSchema->getBoundTableID(RelDataDirection::FWD) == boundNodeTableID) { + auto scanInfoFWD = + getRelTableScanInfo(RelDataDirection::FWD, relsStore, relTableID, properties); + if (scanInfoFWD != nullptr) { + scanInfos.push_back(std::move(scanInfoFWD)); + } } - auto [relTableDataBWD, scanStateBWD] = - getRelTableDataAndScanState(RelDataDirection::BWD, relTableSchema, boundNodeTableID, - relsStore, relTableID, properties); - if (relTableDataBWD != nullptr && scanStateBWD != nullptr) { - relTableDatas.push_back(relTableDataBWD); - tableScanStates.push_back(std::move(scanStateBWD)); + if (relTableSchema->getBoundTableID(RelDataDirection::BWD) == boundNodeTableID) { + auto scanInfoBWD = + getRelTableScanInfo(RelDataDirection::BWD, relsStore, relTableID, properties); + if (scanInfoBWD != nullptr) { + scanInfos.push_back(std::move(scanInfoBWD)); + } } } break; default: - throw common::NotImplementedException("populateRelTableDataCollection"); + throw common::NotImplementedException("populateRelTableCollectionScanner"); } } - return std::make_unique( - std::move(relTableDatas), std::move(tableScanStates)); + if (scanInfos.empty()) { + return nullptr; + } + return std::make_unique(std::move(scanInfos)); } std::unique_ptr PlanMapper::mapLogicalExtendToPhysical( @@ -104,51 +91,40 @@ std::unique_ptr PlanMapper::mapLogicalExtendToPhysical( auto rel = extend->getRel(); auto extendDirection = extend->getDirection(); auto prevOperator = mapLogicalOperatorToPhysical(logicalOperator->getChild(0)); - auto inNodeIDVectorPos = - DataPos(inSchema->getExpressionPos(*boundNode->getInternalIDProperty())); - auto outNodeIDVectorPos = - DataPos(outSchema->getExpressionPos(*nbrNode->getInternalIDProperty())); - std::vector outputVectorsPos; - outputVectorsPos.push_back(outNodeIDVectorPos); + auto inNodeVectorPos = DataPos(inSchema->getExpressionPos(*boundNode->getInternalIDProperty())); + auto outNodeVectorPos = DataPos(outSchema->getExpressionPos(*nbrNode->getInternalIDProperty())); + std::vector outVectorsPos; + outVectorsPos.push_back(outNodeVectorPos); for (auto& expression : extend->getProperties()) { - outputVectorsPos.emplace_back(outSchema->getExpressionPos(*expression)); + outVectorsPos.emplace_back(outSchema->getExpressionPos(*expression)); } + auto posInfo = + std::make_unique(inNodeVectorPos, outNodeVectorPos, outVectorsPos); auto& relsStore = storageManager.getRelsStore(); if (!rel->isMultiLabeled() && !boundNode->isMultiLabeled() && extendDirection != planner::ExtendDirection::BOTH) { auto relDataDirection = ExtendDirectionUtils::getRelDataDirection(extendDirection); - auto relTableID = rel->getSingleTableID(); - auto relTableStats = relsStore.getRelsStatistics().getRelStatistics(relTableID); - if (relsStore.isSingleMultiplicityInDirection(relDataDirection, relTableID)) { - auto propertyIds = populatePropertyIds(relTableID, extend->getProperties()); - return make_unique( - relsStore.getRelTable(relTableID)->getDirectedTableData(relDataDirection), - relTableStats, std::move(propertyIds), inNodeIDVectorPos, - std::move(outputVectorsPos), std::move(prevOperator), getOperatorID(), - extend->getExpressionsForPrinting()); + auto scanInfo = getRelTableScanInfo( + relDataDirection, relsStore, rel->getSingleTableID(), extend->getProperties()); + if (scanInfo->relTableDataType == storage::RelTableDataType::COLUMNS) { + return make_unique(std::move(scanInfo), std::move(posInfo), + std::move(prevOperator), getOperatorID(), extend->getExpressionsForPrinting()); } else { - assert(!relsStore.isSingleMultiplicityInDirection(relDataDirection, relTableID)); - auto propertyIds = populatePropertyIds(relTableID, extend->getProperties()); - return make_unique( - relsStore.getRelTable(relTableID)->getDirectedTableData(relDataDirection), - relTableStats, std::move(propertyIds), inNodeIDVectorPos, - std::move(outputVectorsPos), std::move(prevOperator), getOperatorID(), - extend->getExpressionsForPrinting()); + assert(scanInfo->relTableDataType == storage::RelTableDataType::LISTS); + return make_unique(std::move(scanInfo), std::move(posInfo), + std::move(prevOperator), getOperatorID(), extend->getExpressionsForPrinting()); } } else { // map to generic extend - std::unordered_map> - relTableCollectionPerNodeTable; + std::unordered_map> scanners; for (auto boundNodeTableID : boundNode->getTableIDs()) { - auto relTableCollection = populateRelTableDataCollection(boundNodeTableID, *rel, + auto scanner = populateRelTableCollectionScanner(boundNodeTableID, *rel, extendDirection, extend->getProperties(), relsStore, *catalog); - if (relTableCollection->getNumTablesInCollection() > 0) { - relTableCollectionPerNodeTable.insert( - {boundNodeTableID, std::move(relTableCollection)}); + if (scanner != nullptr) { + scanners.insert({boundNodeTableID, std::move(scanner)}); } } - return std::make_unique(inNodeIDVectorPos, outputVectorsPos, - std::move(relTableCollectionPerNodeTable), std::move(prevOperator), getOperatorID(), - extend->getExpressionsForPrinting()); + return std::make_unique(std::move(posInfo), std::move(scanners), + std::move(prevOperator), getOperatorID(), extend->getExpressionsForPrinting()); } } diff --git a/src/processor/operator/scan/generic_scan_rel_tables.cpp b/src/processor/operator/scan/generic_scan_rel_tables.cpp index e05f55d7ca..d6f7be2421 100644 --- a/src/processor/operator/scan/generic_scan_rel_tables.cpp +++ b/src/processor/operator/scan/generic_scan_rel_tables.cpp @@ -7,90 +7,101 @@ using namespace kuzu::transaction; namespace kuzu { namespace processor { -void RelTableDataCollection::resetState() { - currentRelTableIdxToScan = 0; - nextRelTableIdxToScan = 0; +void RelTableCollectionScanner::init() { + for (auto& scanInfo : scanInfos) { + scanStates.push_back(std::make_unique( + scanInfo->relStats, scanInfo->propertyIds, scanInfo->relTableDataType)); + } } -bool RelTableDataCollection::scan(ValueVector* inVector, +bool RelTableCollectionScanner::scan(ValueVector* inVector, const std::vector& outputVectors, Transaction* transaction) { do { - if (tableScanStates[currentRelTableIdxToScan]->hasMoreAndSwitchSourceIfNecessary()) { - assert(tableScanStates[currentRelTableIdxToScan]->relTableDataType == - storage::RelTableDataType::LISTS); - relTableDatas[currentRelTableIdxToScan]->scan( - transaction, *tableScanStates[currentRelTableIdxToScan], inVector, outputVectors); + if (scanStates[currentTableIdx]->hasMoreAndSwitchSourceIfNecessary()) { + assert( + scanStates[currentTableIdx]->relTableDataType == storage::RelTableDataType::LISTS); + scanInfos[currentTableIdx]->tableData->scan( + transaction, *scanStates[currentTableIdx], inVector, outputVectors); } else { - currentRelTableIdxToScan = nextRelTableIdxToScan; - if (currentRelTableIdxToScan == tableScanStates.size()) { + currentTableIdx = nextTableIdx; + if (currentTableIdx == scanStates.size()) { return false; } - if (tableScanStates[currentRelTableIdxToScan]->relTableDataType == + if (scanStates[currentTableIdx]->relTableDataType == storage::RelTableDataType::COLUMNS) { outputVectors[0]->state->selVector->resetSelectorToValuePosBufferWithSize(1); outputVectors[0]->state->selVector->selectedPositions[0] = inVector->state->selVector->selectedPositions[0]; } else { - tableScanStates[currentRelTableIdxToScan]->syncState->resetState(); + scanStates[currentTableIdx]->syncState->resetState(); } - relTableDatas[currentRelTableIdxToScan]->scan( - transaction, *tableScanStates[currentRelTableIdxToScan], inVector, outputVectors); - nextRelTableIdxToScan++; + scanInfos[currentTableIdx]->tableData->scan( + transaction, *scanStates[currentTableIdx], inVector, outputVectors); + nextTableIdx++; } } while (outputVectors[0]->state->selVector->selectedSize == 0); return true; } -std::unique_ptr RelTableDataCollection::clone() const { - std::vector> clonedScanStates; - for (auto& scanState : tableScanStates) { - clonedScanStates.push_back(make_unique( - scanState->relStats, scanState->propertyIds, scanState->relTableDataType)); +std::unique_ptr RelTableCollectionScanner::clone() const { + std::vector> clonedScanInfos; + for (auto& scanInfo : scanInfos) { + clonedScanInfos.push_back(scanInfo->copy()); } - return make_unique(relTableDatas, std::move(clonedScanStates)); + return make_unique(std::move(clonedScanInfos)); } -void GenericScanRelTables::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { +void ScanMultiRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { ScanRelTable::initLocalStateInternal(resultSet, context); - currentRelTableDataCollection = nullptr; + for (auto& [_, scanner] : scannerPerNodeTable) { + scanner->init(); + } + currentScanner = nullptr; } -bool GenericScanRelTables::getNextTuplesInternal(ExecutionContext* context) { +bool ScanMultiRelTable::getNextTuplesInternal(ExecutionContext* context) { while (true) { - if (scanCurrentRelTableDataCollection()) { - metrics->numOutputTuple.increase(outputVectors[0]->state->selVector->selectedSize); + if (currentScanner != nullptr && + currentScanner->scan(inNodeVector, outVectors, transaction)) { + metrics->numOutputTuple.increase(outVectors[0]->state->selVector->selectedSize); return true; } if (!children[0]->getNextTuple(context)) { - currentRelTableDataCollection = nullptr; - for (auto& [_, relTableDataCollection] : relTableCollectionPerNodeTable) { - relTableDataCollection->resetState(); - } + resetState(); return false; } - auto currentIdx = inNodeIDVector->state->selVector->selectedPositions[0]; - if (inNodeIDVector->isNull(currentIdx)) { - outputVectors[0]->state->selVector->selectedSize = 0; + auto currentIdx = inNodeVector->state->selVector->selectedPositions[0]; + if (inNodeVector->isNull(currentIdx)) { + outVectors[0]->state->selVector->selectedSize = 0; continue; } - auto nodeID = inNodeIDVector->getValue(currentIdx); - initCurrentRelTableDataCollection(nodeID); + auto nodeID = inNodeVector->getValue(currentIdx); + initCurrentScanner(nodeID); + } +} + +std::unique_ptr ScanMultiRelTable::clone() { + node_table_id_scanner_map_t clonedScanners; + for (auto& [tableID, scanner] : scannerPerNodeTable) { + clonedScanners.insert({tableID, scanner->clone()}); } + return make_unique( + posInfo->copy(), std::move(clonedScanners), children[0]->clone(), id, paramsString); } -bool GenericScanRelTables::scanCurrentRelTableDataCollection() { - if (currentRelTableDataCollection == nullptr) { - return false; +void ScanMultiRelTable::resetState() { + currentScanner = nullptr; + for (auto& [_, scanner] : scannerPerNodeTable) { + scanner->resetState(); } - return currentRelTableDataCollection->scan(inNodeIDVector, outputVectors, transaction); } -void GenericScanRelTables::initCurrentRelTableDataCollection(const nodeID_t& nodeID) { - if (relTableCollectionPerNodeTable.contains(nodeID.tableID)) { - currentRelTableDataCollection = relTableCollectionPerNodeTable.at(nodeID.tableID).get(); - currentRelTableDataCollection->resetState(); +void ScanMultiRelTable::initCurrentScanner(const nodeID_t& nodeID) { + if (scannerPerNodeTable.contains(nodeID.tableID)) { + currentScanner = scannerPerNodeTable.at(nodeID.tableID).get(); + currentScanner->resetState(); } else { - currentRelTableDataCollection = nullptr; + currentScanner = nullptr; } } diff --git a/src/processor/operator/scan/scan_rel_table.cpp b/src/processor/operator/scan/scan_rel_table.cpp index fd66e8f9e7..ba765e3ba7 100644 --- a/src/processor/operator/scan/scan_rel_table.cpp +++ b/src/processor/operator/scan/scan_rel_table.cpp @@ -4,10 +4,10 @@ namespace kuzu { namespace processor { void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { - inNodeIDVector = resultSet->getValueVector(inNodeIDVectorPos).get(); - for (auto& dataPos : outputVectorsPos) { - auto vector = resultSet->getValueVector(dataPos); - outputVectors.push_back(vector.get()); + inNodeVector = resultSet->getValueVector(posInfo->inNodeVectorPos).get(); + outNodeVector = resultSet->getValueVector(posInfo->outNodeVectorPos).get(); + for (auto& dataPos : posInfo->outVectorsPos) { + outVectors.push_back(resultSet->getValueVector(dataPos).get()); } } diff --git a/src/processor/operator/scan/scan_rel_table_columns.cpp b/src/processor/operator/scan/scan_rel_table_columns.cpp index 58e2aef553..12c76166d8 100644 --- a/src/processor/operator/scan/scan_rel_table_columns.cpp +++ b/src/processor/operator/scan/scan_rel_table_columns.cpp @@ -3,16 +3,22 @@ namespace kuzu { namespace processor { +void ScanRelTableColumns::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { + ScanRelTable::initLocalStateInternal(resultSet, context); + scanState = std::make_unique( + scanInfo->relStats, scanInfo->propertyIds, storage::RelTableDataType::COLUMNS); +} + bool ScanRelTableColumns::getNextTuplesInternal(ExecutionContext* context) { do { - restoreSelVector(inNodeIDVector->state->selVector); + restoreSelVector(inNodeVector->state->selVector); if (!children[0]->getNextTuple(context)) { return false; } - saveSelVector(inNodeIDVector->state->selVector); - tableData->scan(transaction, *scanState, inNodeIDVector, outputVectors); - } while (inNodeIDVector->state->selVector->selectedSize == 0); - metrics->numOutputTuple.increase(inNodeIDVector->state->selVector->selectedSize); + saveSelVector(inNodeVector->state->selVector); + scanInfo->tableData->scan(transaction, *scanState, inNodeVector, outVectors); + } while (inNodeVector->state->selVector->selectedSize == 0); + metrics->numOutputTuple.increase(inNodeVector->state->selVector->selectedSize); return true; } diff --git a/src/processor/operator/scan/scan_rel_table_lists.cpp b/src/processor/operator/scan/scan_rel_table_lists.cpp index d763812891..b16636dc20 100644 --- a/src/processor/operator/scan/scan_rel_table_lists.cpp +++ b/src/processor/operator/scan/scan_rel_table_lists.cpp @@ -3,11 +3,17 @@ namespace kuzu { namespace processor { +void ScanRelTableLists::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { + ScanRelTable::initLocalStateInternal(resultSet, context); + scanState = std::make_unique( + scanInfo->relStats, scanInfo->propertyIds, storage::RelTableDataType::LISTS); +} + bool ScanRelTableLists::getNextTuplesInternal(ExecutionContext* context) { do { if (scanState->syncState->hasMoreAndSwitchSourceIfNecessary()) { - tableData->scan(transaction, *scanState, inNodeIDVector, outputVectors); - metrics->numOutputTuple.increase(outputVectors[0]->state->selVector->selectedSize); + scanInfo->tableData->scan(transaction, *scanState, inNodeVector, outVectors); + metrics->numOutputTuple.increase(outVectors[0]->state->selVector->selectedSize); return true; } if (!children[0]->getNextTuple(context)) { @@ -15,9 +21,9 @@ bool ScanRelTableLists::getNextTuplesInternal(ExecutionContext* context) { return false; } scanState->syncState->resetState(); - tableData->scan(transaction, *scanState, inNodeIDVector, outputVectors); - } while (outputVectors[0]->state->selVector->selectedSize == 0); - metrics->numOutputTuple.increase(outputVectors[0]->state->selVector->selectedSize); + scanInfo->tableData->scan(transaction, *scanState, inNodeVector, outVectors); + } while (outVectors[0]->state->selVector->selectedSize == 0); + metrics->numOutputTuple.increase(outVectors[0]->state->selVector->selectedSize); return true; }