From 819b0ca39f2a98d2da30493813736020d1d074f0 Mon Sep 17 00:00:00 2001 From: Guodong Jin Date: Sun, 24 Sep 2023 00:48:48 +0800 Subject: [PATCH] reorganize stat files --- .../join_order/cardinality_estimator.h | 14 +- src/include/planner/planner.h | 24 +- src/include/planner/query_planner.h | 4 +- .../operator/ddl/create_node_table.h | 6 +- .../processor/operator/ddl/create_rdf_graph.h | 8 +- .../processor/operator/ddl/create_rel_table.h | 6 +- .../operator/ddl/create_rel_table_group.h | 6 +- .../processor/operator/persistent/copy_node.h | 6 +- .../processor/operator/persistent/copy_rel.h | 6 +- .../operator/persistent/copy_rel_columns.h | 2 +- .../operator/persistent/delete_executor.h | 6 +- .../operator/persistent/insert_executor.h | 4 +- .../operator/scan/scan_rel_table_columns.h | 2 +- src/include/processor/plan_mapper.h | 2 +- src/include/storage/stats/metadata_dah_info.h | 28 ++ .../storage/stats/node_table_statistics.h | 92 +++++++ .../stats/nodes_statistics_and_deleted_ids.h | 245 ------------------ .../storage/stats/nodes_store_statistics.h | 122 +++++++++ .../storage/stats/rel_table_statistics.h | 43 +++ ...s_statistics.h => rels_store_statistics.h} | 49 +--- src/include/storage/stats/table_statistics.h | 139 +--------- .../stats/table_statistics_collection.h | 85 ++++++ src/include/storage/store/node_table.h | 9 +- src/include/storage/store/nodes_store.h | 6 +- src/include/storage/store/rel_table.h | 2 +- src/include/storage/store/rels_store.h | 7 +- src/include/storage/store/table_data.h | 2 +- src/main/database.cpp | 4 +- src/planner/plan/plan_copy.cpp | 2 +- src/planner/planner.cpp | 14 +- src/processor/map/map_delete.cpp | 2 +- .../operator/persistent/copy_node.cpp | 7 + .../operator/persistent/copy_rel.cpp | 2 +- src/storage/stats/CMakeLists.txt | 10 +- src/storage/stats/metadata_dah_info.cpp | 35 +++ ...eted_ids.cpp => node_table_statistics.cpp} | 113 +++----- src/storage/stats/nodes_store_statistics.cpp | 101 ++++++++ src/storage/stats/property_statistics.cpp | 2 +- src/storage/stats/rel_table_statistics.cpp | 22 ++ ...atistics.cpp => rels_store_statistics.cpp} | 21 +- src/storage/stats/table_statistics.cpp | 150 ++--------- .../stats/table_statistics_collection.cpp | 116 +++++++++ src/storage/store/node_table.cpp | 2 +- src/storage/store/nodes_store.cpp | 2 +- src/storage/store/rels_store.cpp | 2 +- src/storage/store/table_data.cpp | 6 +- src/storage/wal_replayer.cpp | 4 +- 47 files changed, 816 insertions(+), 726 deletions(-) create mode 100644 src/include/storage/stats/metadata_dah_info.h create mode 100644 src/include/storage/stats/node_table_statistics.h delete mode 100644 src/include/storage/stats/nodes_statistics_and_deleted_ids.h create mode 100644 src/include/storage/stats/nodes_store_statistics.h create mode 100644 src/include/storage/stats/rel_table_statistics.h rename src/include/storage/stats/{rels_statistics.h => rels_store_statistics.h} (56%) create mode 100644 src/include/storage/stats/table_statistics_collection.h create mode 100644 src/storage/stats/metadata_dah_info.cpp rename src/storage/stats/{nodes_statistics_and_deleted_ids.cpp => node_table_statistics.cpp} (64%) create mode 100644 src/storage/stats/nodes_store_statistics.cpp create mode 100644 src/storage/stats/rel_table_statistics.cpp rename src/storage/stats/{rels_statistics.cpp => rels_store_statistics.cpp} (69%) create mode 100644 src/storage/stats/table_statistics_collection.cpp diff --git a/src/include/planner/join_order/cardinality_estimator.h b/src/include/planner/join_order/cardinality_estimator.h index 3f392aeebd..286988058c 100644 --- a/src/include/planner/join_order/cardinality_estimator.h +++ b/src/include/planner/join_order/cardinality_estimator.h @@ -2,16 +2,16 @@ #include "binder/query/query_graph.h" #include "planner/operator/logical_plan.h" -#include "storage/stats/nodes_statistics_and_deleted_ids.h" -#include "storage/stats/rels_statistics.h" +#include "storage/stats/nodes_store_statistics.h" +#include "storage/stats/rels_store_statistics.h" namespace kuzu { namespace planner { class CardinalityEstimator { public: - CardinalityEstimator(const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics) + CardinalityEstimator(const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics) : nodesStatistics{nodesStatistics}, relsStatistics{relsStatistics} {} void initNodeIDDom(binder::QueryGraph* queryGraph); @@ -37,7 +37,7 @@ class CardinalityEstimator { nodeIDName2dom.insert({key, getNumNodes(node)}); } } - uint64_t getNodeIDDom(const std::string& nodeIDName) { + inline uint64_t getNodeIDDom(const std::string& nodeIDName) { assert(nodeIDName2dom.contains(nodeIDName)); return nodeIDName2dom.at(nodeIDName); } @@ -46,8 +46,8 @@ class CardinalityEstimator { uint64_t getNumRels(const binder::RelExpression& rel); private: - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics; - const storage::RelsStatistics& relsStatistics; + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics; + const storage::RelsStoreStats& relsStatistics; // The domain of nodeID is defined as the number of unique value of nodeID, i.e. num nodes. std::unordered_map nodeIDName2dom; }; diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index 00b8a05b51..26d6c91f1b 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -8,12 +8,12 @@ namespace planner { class Planner { public: static std::unique_ptr getBestPlan(const catalog::Catalog& catalog, - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics, const BoundStatement& statement); + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics, const BoundStatement& statement); static std::vector> getAllPlans(const catalog::Catalog& catalog, - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics, const BoundStatement& statement); + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics, const BoundStatement& statement); private: static std::unique_ptr planCreateTable(const BoundStatement& statement); @@ -33,8 +33,8 @@ class Planner { static std::unique_ptr planCommentOn(const BoundStatement& statement); static std::unique_ptr planExplain(const catalog::Catalog& catalog, - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics, const BoundStatement& statement); + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics, const BoundStatement& statement); static std::unique_ptr planCreateMacro(const BoundStatement& statement); @@ -42,17 +42,17 @@ class Planner { static std::vector> getAllQueryPlans( const catalog::Catalog& catalog, - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics, const BoundStatement& statement); + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics, const BoundStatement& statement); static std::vector> getAllExplainPlans( const catalog::Catalog& catalog, - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics, const BoundStatement& statement); + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics, const BoundStatement& statement); static std::unique_ptr planCopyTo(const catalog::Catalog& catalog, - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics, const BoundStatement& statement); + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics, const BoundStatement& statement); static std::unique_ptr planCopyFrom(const BoundStatement& statement); diff --git a/src/include/planner/query_planner.h b/src/include/planner/query_planner.h index 3a2c06534f..ce7c09af1c 100644 --- a/src/include/planner/query_planner.h +++ b/src/include/planner/query_planner.h @@ -26,8 +26,8 @@ class QueryPlanner { public: QueryPlanner(const catalog::Catalog& catalog, - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics) + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics) : catalog{catalog} { cardinalityEstimator = std::make_unique(nodesStatistics, relsStatistics); diff --git a/src/include/processor/operator/ddl/create_node_table.h b/src/include/processor/operator/ddl/create_node_table.h index 36572e626d..6c7a46442f 100644 --- a/src/include/processor/operator/ddl/create_node_table.h +++ b/src/include/processor/operator/ddl/create_node_table.h @@ -1,7 +1,7 @@ #pragma once #include "processor/operator/ddl/ddl.h" -#include "storage/stats/nodes_statistics_and_deleted_ids.h" +#include "storage/stats/nodes_store_statistics.h" #include "storage/store/nodes_store.h" namespace kuzu { @@ -10,7 +10,7 @@ namespace processor { class CreateNodeTable : public DDL { public: CreateNodeTable(catalog::Catalog* catalog, storage::StorageManager* storageManager, - storage::NodesStatisticsAndDeletedIDs* nodesStatistics, + storage::NodesStoreStatsAndDeletedIDs* nodesStatistics, std::unique_ptr info, const DataPos& outputPos, uint32_t id, const std::string& paramsString) : DDL{PhysicalOperatorType::CREATE_NODE_TABLE, catalog, outputPos, id, paramsString}, @@ -27,7 +27,7 @@ class CreateNodeTable : public DDL { private: storage::StorageManager* storageManager; - storage::NodesStatisticsAndDeletedIDs* nodesStatistics; + storage::NodesStoreStatsAndDeletedIDs* nodesStatistics; std::unique_ptr info; }; diff --git a/src/include/processor/operator/ddl/create_rdf_graph.h b/src/include/processor/operator/ddl/create_rdf_graph.h index 580601f22e..b6f9ce6fa3 100644 --- a/src/include/processor/operator/ddl/create_rdf_graph.h +++ b/src/include/processor/operator/ddl/create_rdf_graph.h @@ -9,8 +9,8 @@ namespace processor { class CreateRdfGraph : public DDL { public: CreateRdfGraph(catalog::Catalog* catalog, storage::StorageManager* storageManager, - storage::NodesStatisticsAndDeletedIDs* nodesStatistics, - storage::RelsStatistics* relsStatistics, std::unique_ptr info, + storage::NodesStoreStatsAndDeletedIDs* nodesStatistics, + storage::RelsStoreStats* relsStatistics, std::unique_ptr info, const DataPos& outputPos, uint32_t id, const std::string& paramsString) : DDL{PhysicalOperatorType::CREATE_RDF_GRAPH, catalog, outputPos, id, paramsString}, storageManager{storageManager}, nodesStatistics{nodesStatistics}, @@ -27,8 +27,8 @@ class CreateRdfGraph : public DDL { private: storage::StorageManager* storageManager; - storage::NodesStatisticsAndDeletedIDs* nodesStatistics; - storage::RelsStatistics* relsStatistics; + storage::NodesStoreStatsAndDeletedIDs* nodesStatistics; + storage::RelsStoreStats* relsStatistics; std::unique_ptr info; }; diff --git a/src/include/processor/operator/ddl/create_rel_table.h b/src/include/processor/operator/ddl/create_rel_table.h index 0c1cad1d08..973fb6d054 100644 --- a/src/include/processor/operator/ddl/create_rel_table.h +++ b/src/include/processor/operator/ddl/create_rel_table.h @@ -1,14 +1,14 @@ #pragma once #include "processor/operator/ddl/ddl.h" -#include "storage/stats/rels_statistics.h" +#include "storage/stats/rels_store_statistics.h" namespace kuzu { namespace processor { class CreateRelTable : public DDL { public: - CreateRelTable(catalog::Catalog* catalog, storage::RelsStatistics* relsStatistics, + CreateRelTable(catalog::Catalog* catalog, storage::RelsStoreStats* relsStatistics, std::unique_ptr info, const DataPos& outputPos, uint32_t id, const std::string& paramsString) : DDL{PhysicalOperatorType::CREATE_REL_TABLE, catalog, outputPos, id, paramsString}, @@ -24,7 +24,7 @@ class CreateRelTable : public DDL { } private: - storage::RelsStatistics* relsStatistics; + storage::RelsStoreStats* relsStatistics; std::unique_ptr info; }; diff --git a/src/include/processor/operator/ddl/create_rel_table_group.h b/src/include/processor/operator/ddl/create_rel_table_group.h index e8704426d2..a74fe7fe93 100644 --- a/src/include/processor/operator/ddl/create_rel_table_group.h +++ b/src/include/processor/operator/ddl/create_rel_table_group.h @@ -1,14 +1,14 @@ #pragma once #include "processor/operator/ddl/ddl.h" -#include "storage/stats/rels_statistics.h" +#include "storage/stats/rels_store_statistics.h" namespace kuzu { namespace processor { class CreateRelTableGroup : public DDL { public: - CreateRelTableGroup(catalog::Catalog* catalog, storage::RelsStatistics* relsStatistics, + CreateRelTableGroup(catalog::Catalog* catalog, storage::RelsStoreStats* relsStatistics, std::unique_ptr info, const DataPos& outputPos, uint32_t id, const std::string& paramsString) : DDL{PhysicalOperatorType::CREATE_REL_TABLE, catalog, outputPos, id, paramsString}, @@ -24,7 +24,7 @@ class CreateRelTableGroup : public DDL { } private: - storage::RelsStatistics* relsStatistics; + storage::RelsStoreStats* relsStatistics; std::unique_ptr info; }; diff --git a/src/include/processor/operator/persistent/copy_node.h b/src/include/processor/operator/persistent/copy_node.h index 5bcc2ee947..1805aa5fad 100644 --- a/src/include/processor/operator/persistent/copy_node.h +++ b/src/include/processor/operator/persistent/copy_node.h @@ -103,11 +103,7 @@ class CopyNode : public Sink { storage::NodeTable* table, storage::NodeGroup* nodeGroup, bool isCopyTurtle); private: - inline bool isCopyAllowed() const { - auto nodesStatistics = copyNodeInfo.table->getNodeStatisticsAndDeletedIDs(); - return nodesStatistics->getNodeStatisticsAndDeletedIDs(copyNodeInfo.table->getTableID()) - ->getNumTuples() == 0; - } + bool isCopyAllowed() const; static void populatePKIndex(storage::PrimaryKeyIndexBuilder* pkIndex, storage::ColumnChunk* chunk, common::offset_t startNodeOffset, common::offset_t numNodes); diff --git a/src/include/processor/operator/persistent/copy_rel.h b/src/include/processor/operator/persistent/copy_rel.h index ce7137aa79..38fcd44d91 100644 --- a/src/include/processor/operator/persistent/copy_rel.h +++ b/src/include/processor/operator/persistent/copy_rel.h @@ -6,7 +6,7 @@ #include "storage/in_mem_storage_structure/in_mem_column.h" #include "storage/in_mem_storage_structure/in_mem_column_chunk.h" #include "storage/in_mem_storage_structure/in_mem_lists.h" -#include "storage/stats/rels_statistics.h" +#include "storage/stats/rels_store_statistics.h" namespace kuzu { namespace processor { @@ -70,7 +70,7 @@ class CopyRelSharedState { friend class CopyRelLists; public: - CopyRelSharedState(common::table_id_t tableID, storage::RelsStatistics* relsStatistics, + CopyRelSharedState(common::table_id_t tableID, storage::RelsStoreStats* relsStatistics, std::unique_ptr fwdRelData, std::unique_ptr bwdRelData, storage::MemoryManager* memoryManager); @@ -85,7 +85,7 @@ class CopyRelSharedState { private: std::mutex mtx; common::table_id_t tableID; - storage::RelsStatistics* relsStatistics; + storage::RelsStoreStats* relsStatistics; std::unique_ptr fwdRelData; std::unique_ptr bwdRelData; std::shared_ptr fTable; diff --git a/src/include/processor/operator/persistent/copy_rel_columns.h b/src/include/processor/operator/persistent/copy_rel_columns.h index 78edc149a3..2345755db7 100644 --- a/src/include/processor/operator/persistent/copy_rel_columns.h +++ b/src/include/processor/operator/persistent/copy_rel_columns.h @@ -3,7 +3,7 @@ #include "processor/operator/persistent/copy_rel.h" #include "processor/operator/sink.h" #include "storage/index/hash_index.h" -#include "storage/stats/rels_statistics.h" +#include "storage/stats/rels_store_statistics.h" namespace kuzu { namespace processor { diff --git a/src/include/processor/operator/persistent/delete_executor.h b/src/include/processor/operator/persistent/delete_executor.h index 7e17bdd2a0..b921f83839 100644 --- a/src/include/processor/operator/persistent/delete_executor.h +++ b/src/include/processor/operator/persistent/delete_executor.h @@ -88,7 +88,7 @@ class RelDeleteExecutor { class SingleLabelRelDeleteExecutor : public RelDeleteExecutor { public: - SingleLabelRelDeleteExecutor(storage::RelsStatistics* relsStatistic, storage::RelTable* table, + SingleLabelRelDeleteExecutor(storage::RelsStoreStats* relsStatistic, storage::RelTable* table, const DataPos& srcNodeIDPos, const DataPos& dstNodeIDPos, const DataPos& relIDPos) : RelDeleteExecutor(srcNodeIDPos, dstNodeIDPos, relIDPos), relsStatistic{relsStatistic}, table{table} {} @@ -103,12 +103,12 @@ class SingleLabelRelDeleteExecutor : public RelDeleteExecutor { } private: - storage::RelsStatistics* relsStatistic; + storage::RelsStoreStats* relsStatistic; storage::RelTable* table; }; class MultiLabelRelDeleteExecutor : public RelDeleteExecutor { - using rel_table_statistic_pair = std::pair; + using rel_table_statistic_pair = std::pair; public: MultiLabelRelDeleteExecutor( diff --git a/src/include/processor/operator/persistent/insert_executor.h b/src/include/processor/operator/persistent/insert_executor.h index 66578007d4..bd9b11948a 100644 --- a/src/include/processor/operator/persistent/insert_executor.h +++ b/src/include/processor/operator/persistent/insert_executor.h @@ -43,7 +43,7 @@ class NodeInsertExecutor { class RelInsertExecutor { public: - RelInsertExecutor(storage::RelsStatistics& relsStatistics, storage::RelTable* table, + RelInsertExecutor(storage::RelsStoreStats& relsStatistics, storage::RelTable* table, const DataPos& srcNodePos, const DataPos& dstNodePos, std::vector propertyLhsPositions, std::vector> propertyRhsEvaluators) @@ -65,7 +65,7 @@ class RelInsertExecutor { const std::vector>& executors); private: - storage::RelsStatistics& relsStatistics; + storage::RelsStoreStats& relsStatistics; storage::RelTable* table; DataPos srcNodePos; DataPos dstNodePos; diff --git a/src/include/processor/operator/scan/scan_rel_table_columns.h b/src/include/processor/operator/scan/scan_rel_table_columns.h index cdfa34b769..919fa37943 100644 --- a/src/include/processor/operator/scan/scan_rel_table_columns.h +++ b/src/include/processor/operator/scan/scan_rel_table_columns.h @@ -2,7 +2,7 @@ #include "processor/operator/filtering_operator.h" #include "processor/operator/scan/scan_rel_table.h" -#include "storage/stats/rels_statistics.h" +#include "storage/stats/rels_store_statistics.h" namespace kuzu { namespace processor { diff --git a/src/include/processor/plan_mapper.h b/src/include/processor/plan_mapper.h index 9b11dfeb38..4dfa639319 100644 --- a/src/include/processor/plan_mapper.h +++ b/src/include/processor/plan_mapper.h @@ -6,7 +6,7 @@ #include "planner/operator/logical_plan.h" #include "processor/operator/result_collector.h" #include "processor/physical_plan.h" -#include "storage/stats/nodes_statistics_and_deleted_ids.h" +#include "storage/stats/node_table_statistics.h" #include "storage/storage_manager.h" namespace kuzu { diff --git a/src/include/storage/stats/metadata_dah_info.h b/src/include/storage/stats/metadata_dah_info.h new file mode 100644 index 0000000000..eb58618bec --- /dev/null +++ b/src/include/storage/stats/metadata_dah_info.h @@ -0,0 +1,28 @@ +#pragma once + +#include "common/types/types.h" + +namespace kuzu { +namespace storage { + +// DAH is the abbreviation for Disk Array Header. +struct MetadataDAHInfo { + common::page_idx_t dataDAHPageIdx = common::INVALID_PAGE_IDX; + common::page_idx_t nullDAHPageIdx = common::INVALID_PAGE_IDX; + std::vector> childrenInfos; + + MetadataDAHInfo() : MetadataDAHInfo{common::INVALID_PAGE_IDX, common::INVALID_PAGE_IDX} {} + MetadataDAHInfo(common::page_idx_t dataDAHPageIdx) + : MetadataDAHInfo{dataDAHPageIdx, common::INVALID_PAGE_IDX} {} + MetadataDAHInfo(common::page_idx_t dataDAHPageIdx, common::page_idx_t nullDAHPageIdx) + : dataDAHPageIdx{dataDAHPageIdx}, nullDAHPageIdx{nullDAHPageIdx} {} + + std::unique_ptr copy(); + + void serialize(common::FileInfo* fileInfo, uint64_t& offset) const; + static std::unique_ptr deserialize( + common::FileInfo* fileInfo, uint64_t& offset); +}; + +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/stats/node_table_statistics.h b/src/include/storage/stats/node_table_statistics.h new file mode 100644 index 0000000000..dd5ef8ff87 --- /dev/null +++ b/src/include/storage/stats/node_table_statistics.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include + +#include "catalog/node_table_schema.h" +#include "storage/stats/table_statistics.h" +#include "storage/store/rels_store.h" + +namespace kuzu { +namespace storage { + +class NodeTableStatsAndDeletedIDs : public TableStatistics { +public: + NodeTableStatsAndDeletedIDs(BMFileHandle* metadataFH, const catalog::TableSchema& schema, + BufferManager* bufferManager, WAL* wal); + NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, + const std::vector& deletedNodeOffsets); + NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, + const std::vector& deletedNodeOffsets, + std::unordered_map>&& + propertyStatistics); + NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other); + + inline common::offset_t getMaxNodeOffset() { + return getMaxNodeOffsetFromNumTuples(getNumTuples()); + } + + inline void setAdjListsAndColumns( + std::pair, std::vector> adjListsAndColumns_) { + adjListsAndColumns = std::move(adjListsAndColumns_); + } + + common::offset_t addNode(); + + void deleteNode(common::offset_t nodeOffset); + + // This function assumes that it is being called right after ScanNodeID has obtained a + // morsel and that the nodeID structs in nodeOffsetVector.values have consecutive node + // offsets and the same tableID. + void setDeletedNodeOffsetsForMorsel( + const std::shared_ptr& nodeOffsetVector); + + void setNumTuples(uint64_t numTuples) override; + + std::vector getDeletedNodeOffsets() const; + + static inline uint64_t getNumTuplesFromMaxNodeOffset(common::offset_t maxNodeOffset) { + return (maxNodeOffset == UINT64_MAX) ? 0ull : maxNodeOffset + 1ull; + } + static inline uint64_t getMaxNodeOffsetFromNumTuples(uint64_t numTuples) { + return numTuples == 0 ? UINT64_MAX : numTuples - 1; + } + + inline void addMetadataDAHInfoForColumn(std::unique_ptr metadataDAHInfo) { + metadataDAHInfos.push_back(std::move(metadataDAHInfo)); + } + inline void removeMetadataDAHInfoForColumn(common::column_id_t columnID) { + assert(columnID < metadataDAHInfos.size()); + metadataDAHInfos.erase(metadataDAHInfos.begin() + columnID); + } + inline MetadataDAHInfo* getMetadataDAHInfo(common::column_id_t columnID) { + assert(columnID < metadataDAHInfos.size()); + return metadataDAHInfos[columnID].get(); + } + + void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; + static std::unique_ptr deserialize(common::table_id_t tableID, + common::offset_t maxNodeOffset, common::FileInfo* fileInfo, uint64_t& offset); + + std::unique_ptr copy() final { + return std::make_unique(*this); + } + +private: + void errorIfNodeHasEdges(common::offset_t nodeOffset); + + // We pass the morselIdx to not do the division nodeOffset/DEFAULT_VECTOR_CAPACITY again + bool isDeleted(common::offset_t nodeOffset, uint64_t morselIdx); + +private: + common::table_id_t tableID; + std::vector> metadataDAHInfos; + // Note: This is initialized explicitly through a call to setAdjListsAndColumns after + // construction. + std::pair, std::vector> adjListsAndColumns; + std::vector hasDeletedNodesPerMorsel; + std::map> deletedNodeOffsetsPerMorsel; +}; + +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/stats/nodes_statistics_and_deleted_ids.h b/src/include/storage/stats/nodes_statistics_and_deleted_ids.h deleted file mode 100644 index 9504a6b8fb..0000000000 --- a/src/include/storage/stats/nodes_statistics_and_deleted_ids.h +++ /dev/null @@ -1,245 +0,0 @@ -#pragma once - -#include -#include - -#include "catalog/node_table_schema.h" -#include "storage/store/rels_store.h" - -namespace kuzu { -namespace storage { - -class NodeTableStatsAndDeletedIDs : public TableStatistics { -public: - NodeTableStatsAndDeletedIDs(BMFileHandle* metadataFH, const catalog::TableSchema& schema, - BufferManager* bufferManager, WAL* wal) - : TableStatistics{schema}, tableID{schema.tableID} { - metadataDAHInfos.clear(); - metadataDAHInfos.reserve(schema.getNumProperties()); - for (auto property : schema.getProperties()) { - metadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( - *property->getDataType(), *metadataFH, bufferManager, wal)); - } - } - - NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, - const std::vector& deletedNodeOffsets) - : NodeTableStatsAndDeletedIDs{tableID, maxNodeOffset, deletedNodeOffsets, {}} {} - NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, - std::unordered_map>&& - propertyStatistics) - : NodeTableStatsAndDeletedIDs(tableID, maxNodeOffset, - std::vector() /* no deleted node offsets during initial loading */, - std::move(propertyStatistics)) {} - - NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, - const std::vector& deletedNodeOffsets, - std::unordered_map>&& - propertyStatistics); - - NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other) - : TableStatistics{other}, tableID{other.tableID}, - adjListsAndColumns{other.adjListsAndColumns}, - hasDeletedNodesPerMorsel{other.hasDeletedNodesPerMorsel}, - deletedNodeOffsetsPerMorsel{other.deletedNodeOffsetsPerMorsel} { - metadataDAHInfos.clear(); - metadataDAHInfos.reserve(other.metadataDAHInfos.size()); - for (auto& metadataDAHInfo : other.metadataDAHInfos) { - metadataDAHInfos.push_back(metadataDAHInfo->copy()); - } - } - - inline common::offset_t getMaxNodeOffset() { - return getMaxNodeOffsetFromNumTuples(getNumTuples()); - } - - inline void setAdjListsAndColumns( - std::pair, std::vector> adjListsAndColumns_) { - adjListsAndColumns = std::move(adjListsAndColumns_); - } - - common::offset_t addNode(); - - void deleteNode(common::offset_t nodeOffset); - - // This function assumes that it is being called right after ScanNodeID has obtained a - // morsel and that the nodeID structs in nodeOffsetVector.values have consecutive node - // offsets and the same tableID. - void setDeletedNodeOffsetsForMorsel( - const std::shared_ptr& nodeOffsetVector); - - void setNumTuples(uint64_t numTuples) override; - - std::vector getDeletedNodeOffsets() const; - - static inline uint64_t getNumTuplesFromMaxNodeOffset(common::offset_t maxNodeOffset) { - return (maxNodeOffset == UINT64_MAX) ? 0ull : maxNodeOffset + 1ull; - } - - static inline uint64_t getMaxNodeOffsetFromNumTuples(uint64_t numTuples) { - return numTuples == 0 ? UINT64_MAX : numTuples - 1; - } - - inline void addMetadataDAHInfoForColumn(std::unique_ptr metadataDAHInfo) { - metadataDAHInfos.push_back(std::move(metadataDAHInfo)); - } - inline void removeMetadataDAHInfoForColumn(common::column_id_t columnID) { - assert(columnID < metadataDAHInfos.size()); - metadataDAHInfos.erase(metadataDAHInfos.begin() + columnID); - } - inline MetadataDAHInfo* getMetadataDAHInfo(common::column_id_t columnID) { - assert(columnID < metadataDAHInfos.size()); - return metadataDAHInfos[columnID].get(); - } - - void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; - static std::unique_ptr deserialize(common::table_id_t tableID, - common::offset_t maxNodeOffset, common::FileInfo* fileInfo, uint64_t& offset); - - std::unique_ptr copy() final { - return std::make_unique(*this); - } - -private: - void errorIfNodeHasEdges(common::offset_t nodeOffset); - - // We pass the morselIdx to not do the division nodeOffset/DEFAULT_VECTOR_CAPACITY again - bool isDeleted(common::offset_t nodeOffset, uint64_t morselIdx); - -private: - common::table_id_t tableID; - std::vector> metadataDAHInfos; - // Note: This is initialized explicitly through a call to setAdjListsAndColumns after - // construction. - std::pair, std::vector> adjListsAndColumns; - std::vector hasDeletedNodesPerMorsel; - std::map> deletedNodeOffsetsPerMorsel; -}; - -// Manages the disk image of the maxNodeOffsets and deleted node IDs (per node table). -// Note: This class is *not* thread-safe. -class NodesStatisticsAndDeletedIDs : public TablesStatistics { -public: - // Should only be used by saveInitialNodesStatisticsAndDeletedIDsToFile to start a database - // from an empty directory. - NodesStatisticsAndDeletedIDs() : TablesStatistics{nullptr} {}; - // Should be used when an already loaded database is started from a directory. - explicit NodesStatisticsAndDeletedIDs(BMFileHandle* metadataFH, BufferManager* bufferManager, - WAL* wal, common::DBFileType dbFileType = common::DBFileType::ORIGINAL) - : TablesStatistics{metadataFH}, bufferManager{bufferManager}, wal{wal} { - readFromFile(wal->getDirectory(), dbFileType); - } - - // Should be used only by tests; - explicit NodesStatisticsAndDeletedIDs( - std::unordered_map>& - nodesStatisticsAndDeletedIDs); - - inline NodeTableStatsAndDeletedIDs* getNodeStatisticsAndDeletedIDs( - common::table_id_t tableID) const { - return (NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForReadOnlyTrx - ->tableStatisticPerTable[tableID] - .get(); - } - - inline void rollbackInMemoryIfNecessary() { - lock_t lck{mtx}; - tablesStatisticsContentForWriteTrx.reset(); - } - - static inline void saveInitialNodesStatisticsAndDeletedIDsToFile(const std::string& directory) { - std::make_unique()->saveToFile( - directory, common::DBFileType::ORIGINAL, transaction::TransactionType::READ_ONLY); - } - - inline void setNumTuplesForTable(common::table_id_t tableID, uint64_t numTuples) override { - initTableStatisticsForWriteTrx(); - ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx - ->tableStatisticPerTable[tableID] - .get()) - ->setNumTuples(numTuples); - } - - inline common::offset_t getMaxNodeOffset( - transaction::Transaction* transaction, common::table_id_t tableID) { - assert(transaction); - if (transaction->getType() == transaction::TransactionType::READ_ONLY) { - return getNodeStatisticsAndDeletedIDs(tableID)->getMaxNodeOffset(); - } else { - std::unique_lock xLck{mtx}; - return tablesStatisticsContentForWriteTrx == nullptr ? - getNodeStatisticsAndDeletedIDs(tableID)->getMaxNodeOffset() : - ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx - ->tableStatisticPerTable[tableID] - .get()) - ->getMaxNodeOffset(); - } - } - - // This function is only used for testing purpose. - inline uint32_t getNumNodeStatisticsAndDeleteIDsPerTable() const { - return tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.size(); - } - - void setAdjListsAndColumns(RelsStore* relsStore); - - // This function assumes that there is a single write transaction. That is why for now we - // keep the interface simple and no transaction is passed. - common::offset_t addNode(common::table_id_t tableID) { - lock_t lck{mtx}; - initTableStatisticsForWriteTrxNoLock(); - return ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx - ->tableStatisticPerTable[tableID] - .get()) - ->addNode(); - } - - // Refer to the comments for addNode. - void deleteNode(common::table_id_t tableID, common::offset_t nodeOffset) { - lock_t lck{mtx}; - initTableStatisticsForWriteTrxNoLock(); - ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx - ->tableStatisticPerTable[tableID] - .get()) - ->deleteNode(nodeOffset); - } - - // This function is only used by storageManager to construct relsStore during start-up, so - // we can just safely return the maxNodeOffsetPerTable for readOnlyVersion. - std::map getMaxNodeOffsetPerTable() const; - - void setDeletedNodeOffsetsForMorsel(transaction::Transaction* transaction, - const std::shared_ptr& nodeOffsetVector, common::table_id_t tableID); - - void addNodeStatisticsAndDeletedIDs(catalog::NodeTableSchema* tableSchema); - - void addMetadataDAHInfo(common::table_id_t tableID, const common::LogicalType& dataType); - void removeMetadataDAHInfo(common::table_id_t tableID, common::column_id_t columnID); - MetadataDAHInfo* getMetadataDAHInfo(transaction::Transaction* transaction, - common::table_id_t tableID, common::column_id_t columnID); - -protected: - inline std::unique_ptr constructTableStatistic( - catalog::TableSchema* tableSchema) override { - return std::make_unique( - metadataFH, *tableSchema, bufferManager, wal); - } - - inline std::unique_ptr constructTableStatistic( - TableStatistics* tableStatistics) override { - return std::make_unique( - *(NodeTableStatsAndDeletedIDs*)tableStatistics); - } - - inline std::string getTableStatisticsFilePath( - const std::string& directory, common::DBFileType dbFileType) override { - return StorageUtils::getNodesStatisticsAndDeletedIDsFilePath(directory, dbFileType); - } - -private: - BufferManager* bufferManager; - WAL* wal; -}; - -} // namespace storage -} // namespace kuzu diff --git a/src/include/storage/stats/nodes_store_statistics.h b/src/include/storage/stats/nodes_store_statistics.h new file mode 100644 index 0000000000..94216e3c5a --- /dev/null +++ b/src/include/storage/stats/nodes_store_statistics.h @@ -0,0 +1,122 @@ +#pragma once + +#include "storage/stats/node_table_statistics.h" +#include "storage/stats/table_statistics_collection.h" + +namespace kuzu { +namespace storage { + +// Manages the disk image of the maxNodeOffsets and deleted node IDs (per node table). +// Note: This class is *not* thread-safe. +class NodesStoreStatsAndDeletedIDs : public TablesStatistics { +public: + // Should only be used by saveInitialNodesStatisticsAndDeletedIDsToFile to start a database + // from an empty directory. + NodesStoreStatsAndDeletedIDs() : TablesStatistics{nullptr} {}; + // Should be used when an already loaded database is started from a directory. + NodesStoreStatsAndDeletedIDs(BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, + common::DBFileType dbFileType = common::DBFileType::ORIGINAL) + : TablesStatistics{metadataFH}, bufferManager{bufferManager}, wal{wal} { + readFromFile(wal->getDirectory(), dbFileType); + } + + // Should be used only by tests; + explicit NodesStoreStatsAndDeletedIDs( + std::unordered_map>& + nodesStatisticsAndDeletedIDs); + + inline NodeTableStatsAndDeletedIDs* getNodeStatisticsAndDeletedIDs( + common::table_id_t tableID) const { + return getNodeTableStats(transaction::TransactionType::READ_ONLY, tableID); + } + + inline void rollbackInMemoryIfNecessary() { + lock_t lck{mtx}; + tablesStatisticsContentForWriteTrx.reset(); + } + + static inline void saveInitialNodesStatisticsAndDeletedIDsToFile(const std::string& directory) { + std::make_unique()->saveToFile( + directory, common::DBFileType::ORIGINAL, transaction::TransactionType::READ_ONLY); + } + + inline void setNumTuplesForTable(common::table_id_t tableID, uint64_t numTuples) override { + initTableStatisticsForWriteTrx(); + getNodeTableStats(transaction::TransactionType::WRITE, tableID)->setNumTuples(numTuples); + } + + common::offset_t getMaxNodeOffset( + transaction::Transaction* transaction, common::table_id_t tableID); + + // This function is only used for testing purpose. + inline uint32_t getNumNodeStatisticsAndDeleteIDsPerTable() const { + return tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.size(); + } + + void setAdjListsAndColumns(RelsStore* relsStore); + + // This function assumes that there is a single write transaction. That is why for now we + // keep the interface simple and no transaction is passed. + inline common::offset_t addNode(common::table_id_t tableID) { + lock_t lck{mtx}; + initTableStatisticsForWriteTrxNoLock(); + return getNodeTableStats(transaction::TransactionType::WRITE, tableID)->addNode(); + } + + // Refer to the comments for addNode. + inline void deleteNode(common::table_id_t tableID, common::offset_t nodeOffset) { + lock_t lck{mtx}; + initTableStatisticsForWriteTrxNoLock(); + getNodeTableStats(transaction::TransactionType::WRITE, tableID)->deleteNode(nodeOffset); + } + + // This function is only used by storageManager to construct relsStore during start-up, so + // we can just safely return the maxNodeOffsetPerTable for readOnlyVersion. + std::map getMaxNodeOffsetPerTable() const; + + void setDeletedNodeOffsetsForMorsel(transaction::Transaction* transaction, + const std::shared_ptr& nodeOffsetVector, common::table_id_t tableID); + + void addNodeStatisticsAndDeletedIDs(catalog::NodeTableSchema* tableSchema); + + void addMetadataDAHInfo(common::table_id_t tableID, const common::LogicalType& dataType); + void removeMetadataDAHInfo(common::table_id_t tableID, common::column_id_t columnID); + MetadataDAHInfo* getMetadataDAHInfo(transaction::Transaction* transaction, + common::table_id_t tableID, common::column_id_t columnID); + +protected: + inline std::unique_ptr constructTableStatistic( + catalog::TableSchema* tableSchema) override { + return std::make_unique( + metadataFH, *tableSchema, bufferManager, wal); + } + + inline std::unique_ptr constructTableStatistic( + TableStatistics* tableStatistics) override { + return std::make_unique( + *(NodeTableStatsAndDeletedIDs*)tableStatistics); + } + + inline std::string getTableStatisticsFilePath( + const std::string& directory, common::DBFileType dbFileType) override { + return StorageUtils::getNodesStatisticsAndDeletedIDsFilePath(directory, dbFileType); + } + +private: + inline NodeTableStatsAndDeletedIDs* getNodeTableStats( + transaction::TransactionType transactionType, common::table_id_t tableID) const { + return transactionType == transaction::TransactionType::READ_ONLY ? + dynamic_cast( + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.at(tableID) + .get()) : + dynamic_cast( + tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID) + .get()); + } + +private: + BufferManager* bufferManager; + WAL* wal; +}; +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/stats/rel_table_statistics.h b/src/include/storage/stats/rel_table_statistics.h new file mode 100644 index 0000000000..1bdd75b201 --- /dev/null +++ b/src/include/storage/stats/rel_table_statistics.h @@ -0,0 +1,43 @@ +#pragma once + +#include + +#include "storage/stats/table_statistics.h" +#include "storage/storage_utils.h" + +namespace kuzu { +namespace storage { + +class RelsStoreStats; +class RelTableStats : public TableStatistics { + friend class RelsStoreStats; + +public: + RelTableStats(const catalog::TableSchema& tableSchema) + : TableStatistics{tableSchema}, nextRelOffset{0} {} + RelTableStats(uint64_t numRels, common::table_id_t tableID, common::offset_t nextRelOffset) + : TableStatistics{common::TableType::REL, numRels, tableID, {}}, nextRelOffset{ + nextRelOffset} {} + RelTableStats(uint64_t numRels, common::table_id_t tableID, + std::unordered_map>&& + propertyStats, + common::offset_t nextRelOffset) + : TableStatistics{common::TableType::REL, numRels, tableID, std::move(propertyStats)}, + nextRelOffset{nextRelOffset} {} + + inline common::offset_t getNextRelOffset() const { return nextRelOffset; } + + void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; + static std::unique_ptr deserialize( + uint64_t numRels, common::table_id_t tableID, common::FileInfo* fileInfo, uint64_t& offset); + + inline std::unique_ptr copy() final { + return std::make_unique(*this); + } + +private: + common::offset_t nextRelOffset; +}; + +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/stats/rels_statistics.h b/src/include/storage/stats/rels_store_statistics.h similarity index 56% rename from src/include/storage/stats/rels_statistics.h rename to src/include/storage/stats/rels_store_statistics.h index 0b71663830..44214b7d04 100644 --- a/src/include/storage/stats/rels_statistics.h +++ b/src/include/storage/stats/rels_store_statistics.h @@ -1,63 +1,29 @@ #pragma once -#include - -#include "storage/stats/table_statistics.h" -#include "storage/storage_utils.h" +#include "storage/stats/rel_table_statistics.h" +#include "storage/stats/table_statistics_collection.h" namespace kuzu { namespace storage { - -class RelsStatistics; -class RelTableStats : public TableStatistics { - friend class RelsStatistics; - -public: - RelTableStats(const catalog::TableSchema& tableSchema) - : TableStatistics{tableSchema}, nextRelOffset{0} {} - RelTableStats(uint64_t numRels, common::table_id_t tableID, common::offset_t nextRelOffset) - : TableStatistics{common::TableType::REL, numRels, tableID, {}}, nextRelOffset{ - nextRelOffset} {} - RelTableStats(uint64_t numRels, common::table_id_t tableID, - std::unordered_map>&& - propertyStats, - common::offset_t nextRelOffset) - : TableStatistics{common::TableType::REL, numRels, tableID, std::move(propertyStats)}, - nextRelOffset{nextRelOffset} {} - - inline common::offset_t getNextRelOffset() const { return nextRelOffset; } - - void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; - static std::unique_ptr deserialize( - uint64_t numRels, common::table_id_t tableID, common::FileInfo* fileInfo, uint64_t& offset); - - inline std::unique_ptr copy() final { - return std::make_unique(*this); - } - -private: - common::offset_t nextRelOffset; -}; - // Manages the disk image of the numRels and numRelsPerDirectionBoundTable. -class RelsStatistics : public TablesStatistics { +class RelsStoreStats : public TablesStatistics { public: // Should only be used by saveInitialRelsStatisticsToFile to start a database from an empty // directory. - RelsStatistics() : TablesStatistics{nullptr} {}; + RelsStoreStats() : TablesStatistics{nullptr} {}; // Should be used when an already loaded database is started from a directory. - explicit RelsStatistics(BMFileHandle* metadataFH, const std::string& directory) + explicit RelsStoreStats(BMFileHandle* metadataFH, const std::string& directory) : TablesStatistics{metadataFH} { readFromFile(directory); } // Should only be used by tests. - explicit RelsStatistics(std::unordered_map> + explicit RelsStoreStats(std::unordered_map> relStatisticPerTable_); static inline void saveInitialRelsStatisticsToFile(const std::string& directory) { - std::make_unique()->saveToFile( + std::make_unique()->saveToFile( directory, common::DBFileType::ORIGINAL, transaction::TransactionType::READ_ONLY); } @@ -96,6 +62,5 @@ class RelsStatistics : public TablesStatistics { ->nextRelOffset += numTuples; } }; - } // namespace storage } // namespace kuzu diff --git a/src/include/storage/stats/table_statistics.h b/src/include/storage/stats/table_statistics.h index d7c3c2e060..2e78ab14e5 100644 --- a/src/include/storage/stats/table_statistics.h +++ b/src/include/storage/stats/table_statistics.h @@ -3,66 +3,22 @@ #include #include -#include "catalog/table_schema.h" -#include "common/ser_deser.h" +#include "storage/stats/metadata_dah_info.h" #include "storage/stats/property_statistics.h" -#include "transaction/transaction.h" namespace kuzu { +namespace catalog { +class TableSchema; +} namespace storage { -// DAH is the abbreviation for Disk Array Header. -class MetadataDAHInfo { -public: - MetadataDAHInfo() : MetadataDAHInfo{common::INVALID_PAGE_IDX, common::INVALID_PAGE_IDX} {} - MetadataDAHInfo(common::page_idx_t dataDAHPageIdx) - : MetadataDAHInfo{dataDAHPageIdx, common::INVALID_PAGE_IDX} {} - MetadataDAHInfo(common::page_idx_t dataDAHPageIdx, common::page_idx_t nullDAHPageIdx) - : dataDAHPageIdx{dataDAHPageIdx}, nullDAHPageIdx{nullDAHPageIdx} {} - - inline std::unique_ptr copy() { - auto result = std::make_unique(dataDAHPageIdx, nullDAHPageIdx); - result->childrenInfos.resize(childrenInfos.size()); - for (size_t i = 0; i < childrenInfos.size(); ++i) { - result->childrenInfos[i] = childrenInfos[i]->copy(); - } - return result; - } - - void serialize(common::FileInfo* fileInfo, uint64_t& offset) const; - static std::unique_ptr deserialize( - common::FileInfo* fileInfo, uint64_t& offset); - - common::page_idx_t dataDAHPageIdx = common::INVALID_PAGE_IDX; - common::page_idx_t nullDAHPageIdx = common::INVALID_PAGE_IDX; - std::vector> childrenInfos; -}; - -class WAL; class TableStatistics { public: - explicit TableStatistics(const catalog::TableSchema& schema) - : tableType{schema.tableType}, numTuples{0}, tableID{schema.tableID} { - for (auto property : schema.getProperties()) { - propertyStatistics[property->getPropertyID()] = std::make_unique(); - } - } - + explicit TableStatistics(const catalog::TableSchema& schema); TableStatistics(common::TableType tableType, uint64_t numTuples, common::table_id_t tableID, std::unordered_map>&& - propertyStatistics) - : tableType{tableType}, numTuples{numTuples}, tableID{tableID}, - propertyStatistics{std::move(propertyStatistics)} { - assert(numTuples != UINT64_MAX); - } - - explicit TableStatistics(const TableStatistics& other) - : tableType{other.tableType}, numTuples{other.numTuples}, tableID{other.tableID} { - for (auto& propertyStats : other.propertyStatistics) { - propertyStatistics[propertyStats.first] = - std::make_unique(*propertyStats.second.get()); - } - } + propertyStatistics); + explicit TableStatistics(const TableStatistics& other); virtual ~TableStatistics() = default; @@ -77,10 +33,6 @@ class TableStatistics { assert(propertyStatistics.contains(propertyID)); return *(propertyStatistics.at(propertyID)); } - inline const std::unordered_map>& - getPropertyStatistics() { - return propertyStatistics; - } inline void setPropertyStatistics( common::property_id_t propertyID, PropertyStatistics newStats) { propertyStatistics[propertyID] = std::make_unique(newStats); @@ -101,82 +53,5 @@ class TableStatistics { propertyStatistics; }; -struct TablesStatisticsContent { - TablesStatisticsContent() = default; - std::unordered_map> tableStatisticPerTable; -}; - -class TablesStatistics { -public: - TablesStatistics(BMFileHandle* metadataFH); - - virtual ~TablesStatistics() = default; - - virtual void setNumTuplesForTable(common::table_id_t tableID, uint64_t numTuples) = 0; - - inline void writeTablesStatisticsFileForWALRecord(const std::string& directory) { - saveToFile(directory, common::DBFileType::WAL_VERSION, transaction::TransactionType::WRITE); - } - - inline bool hasUpdates() { return tablesStatisticsContentForWriteTrx != nullptr; } - - inline void checkpointInMemoryIfNecessary() { - std::unique_lock lck{mtx}; - tablesStatisticsContentForReadOnlyTrx = std::move(tablesStatisticsContentForWriteTrx); - } - - inline TablesStatisticsContent* getReadOnlyVersion() const { - return tablesStatisticsContentForReadOnlyTrx.get(); - } - - inline void addTableStatistic(catalog::TableSchema* tableSchema) { - initTableStatisticsForWriteTrx(); - tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableSchema->tableID] = - constructTableStatistic(tableSchema); - } - inline void removeTableStatistic(common::table_id_t tableID) { - tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.erase(tableID); - } - - inline uint64_t getNumTuplesForTable(common::table_id_t tableID) { - return tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable[tableID] - ->getNumTuples(); - } - - PropertyStatistics& getPropertyStatisticsForTable(const transaction::Transaction& transaction, - common::table_id_t tableID, common::property_id_t propertyID); - - void setPropertyStatisticsForTable( - common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats); - - static std::unique_ptr createMetadataDAHInfo( - const common::LogicalType& dataType, BMFileHandle& metadataFH, BufferManager* bm, WAL* wal); - -protected: - virtual std::unique_ptr constructTableStatistic( - catalog::TableSchema* tableSchema) = 0; - - virtual std::unique_ptr constructTableStatistic( - TableStatistics* tableStatistics) = 0; - - virtual std::string getTableStatisticsFilePath( - const std::string& directory, common::DBFileType dbFileType) = 0; - - void readFromFile(const std::string& directory); - void readFromFile(const std::string& directory, common::DBFileType dbFileType); - - void saveToFile(const std::string& directory, common::DBFileType dbFileType, - transaction::TransactionType transactionType); - - void initTableStatisticsForWriteTrx(); - void initTableStatisticsForWriteTrxNoLock(); - -protected: - BMFileHandle* metadataFH; - std::unique_ptr tablesStatisticsContentForReadOnlyTrx; - std::unique_ptr tablesStatisticsContentForWriteTrx; - std::mutex mtx; -}; - } // namespace storage } // namespace kuzu diff --git a/src/include/storage/stats/table_statistics_collection.h b/src/include/storage/stats/table_statistics_collection.h new file mode 100644 index 0000000000..b7e5cc2c7f --- /dev/null +++ b/src/include/storage/stats/table_statistics_collection.h @@ -0,0 +1,85 @@ +#pragma once + +#include "storage/stats/table_statistics.h" + +namespace kuzu { +namespace storage { + +struct TablesStatisticsContent { + std::unordered_map> tableStatisticPerTable; +}; + +class WAL; +class TablesStatistics { +public: + TablesStatistics(BMFileHandle* metadataFH); + + virtual ~TablesStatistics() = default; + + virtual void setNumTuplesForTable(common::table_id_t tableID, uint64_t numTuples) = 0; + + inline void writeTablesStatisticsFileForWALRecord(const std::string& directory) { + saveToFile(directory, common::DBFileType::WAL_VERSION, transaction::TransactionType::WRITE); + } + + inline bool hasUpdates() { return tablesStatisticsContentForWriteTrx != nullptr; } + + inline void checkpointInMemoryIfNecessary() { + std::unique_lock lck{mtx}; + tablesStatisticsContentForReadOnlyTrx = std::move(tablesStatisticsContentForWriteTrx); + } + + inline TablesStatisticsContent* getReadOnlyVersion() const { + return tablesStatisticsContentForReadOnlyTrx.get(); + } + + inline void addTableStatistic(catalog::TableSchema* tableSchema) { + initTableStatisticsForWriteTrx(); + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableSchema->tableID] = + constructTableStatistic(tableSchema); + } + inline void removeTableStatistic(common::table_id_t tableID) { + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.erase(tableID); + } + + inline uint64_t getNumTuplesForTable(common::table_id_t tableID) { + return tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable[tableID] + ->getNumTuples(); + } + + PropertyStatistics& getPropertyStatisticsForTable(const transaction::Transaction& transaction, + common::table_id_t tableID, common::property_id_t propertyID); + + void setPropertyStatisticsForTable( + common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats); + + static std::unique_ptr createMetadataDAHInfo( + const common::LogicalType& dataType, BMFileHandle& metadataFH, BufferManager* bm, WAL* wal); + +protected: + virtual std::unique_ptr constructTableStatistic( + catalog::TableSchema* tableSchema) = 0; + + virtual std::unique_ptr constructTableStatistic( + TableStatistics* tableStatistics) = 0; + + virtual std::string getTableStatisticsFilePath( + const std::string& directory, common::DBFileType dbFileType) = 0; + + void readFromFile(const std::string& directory); + void readFromFile(const std::string& directory, common::DBFileType dbFileType); + + void saveToFile(const std::string& directory, common::DBFileType dbFileType, + transaction::TransactionType transactionType); + + void initTableStatisticsForWriteTrx(); + void initTableStatisticsForWriteTrxNoLock(); + +protected: + BMFileHandle* metadataFH; + std::unique_ptr tablesStatisticsContentForReadOnlyTrx; + std::unique_ptr tablesStatisticsContentForWriteTrx; + std::mutex mtx; +}; +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/store/node_table.h b/src/include/storage/store/node_table.h index d4893fa089..be08bf916d 100644 --- a/src/include/storage/store/node_table.h +++ b/src/include/storage/store/node_table.h @@ -2,7 +2,7 @@ #include "catalog/catalog.h" #include "storage/index/hash_index.h" -#include "storage/stats/nodes_statistics_and_deleted_ids.h" +#include "storage/stats/nodes_store_statistics.h" #include "storage/store/node_group.h" #include "storage/store/table_data.h" #include "storage/wal/wal.h" @@ -11,13 +11,12 @@ namespace kuzu { namespace catalog { class NodeTableSchema; } - namespace storage { class NodeTable { public: NodeTable(BMFileHandle* dataFH, BMFileHandle* metadataFH, - NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs, BufferManager& bufferManager, + NodesStoreStatsAndDeletedIDs* nodesStatisticsAndDeletedIDs, BufferManager& bufferManager, WAL* wal, catalog::NodeTableSchema* nodeTableSchema); void initializePKIndex(catalog::NodeTableSchema* nodeTableSchema); @@ -59,7 +58,7 @@ class NodeTable { } inline common::column_id_t getPKColumnID() const { return pkColumnID; } inline PrimaryKeyIndex* getPKIndex() const { return pkIndex.get(); } - inline NodesStatisticsAndDeletedIDs* getNodeStatisticsAndDeletedIDs() const { + inline NodesStoreStatsAndDeletedIDs* getNodeStatisticsAndDeletedIDs() const { return nodesStatisticsAndDeletedIDs; } inline common::table_id_t getTableID() const { return tableID; } @@ -80,7 +79,7 @@ class NodeTable { } private: - NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs; + NodesStoreStatsAndDeletedIDs* nodesStatisticsAndDeletedIDs; std::unique_ptr tableData; common::column_id_t pkColumnID; std::unique_ptr pkIndex; diff --git a/src/include/storage/store/nodes_store.h b/src/include/storage/store/nodes_store.h index 92c1496c6d..f9a9ee46a0 100644 --- a/src/include/storage/store/nodes_store.h +++ b/src/include/storage/store/nodes_store.h @@ -3,7 +3,7 @@ #include #include -#include "storage/stats/nodes_statistics_and_deleted_ids.h" +#include "storage/stats/nodes_store_statistics.h" #include "storage/store/node_table.h" namespace kuzu { @@ -17,7 +17,7 @@ class NodesStore { inline PrimaryKeyIndex* getPKIndex(common::table_id_t tableID) { return nodeTables[tableID]->getPKIndex(); } - inline NodesStatisticsAndDeletedIDs* getNodesStatisticsAndDeletedIDs() { + inline NodesStoreStatsAndDeletedIDs* getNodesStatisticsAndDeletedIDs() { return nodesStatisticsAndDeletedIDs.get(); } inline NodeTable* getNodeTable(common::table_id_t tableID) const { @@ -68,7 +68,7 @@ class NodesStore { private: std::map> nodeTables; - std::unique_ptr nodesStatisticsAndDeletedIDs; + std::unique_ptr nodesStatisticsAndDeletedIDs; WAL* wal; BMFileHandle* dataFH; BMFileHandle* metadataFH; diff --git a/src/include/storage/store/rel_table.h b/src/include/storage/store/rel_table.h index c180d7a78f..608c210cde 100644 --- a/src/include/storage/store/rel_table.h +++ b/src/include/storage/store/rel_table.h @@ -2,7 +2,7 @@ #include "catalog/catalog.h" #include "common/utils.h" -#include "storage/stats/rels_statistics.h" +#include "storage/stats/rel_table_statistics.h" #include "storage/storage_structure/column.h" #include "storage/storage_structure/lists/lists.h" #include "storage/storage_utils.h" diff --git a/src/include/storage/store/rels_store.h b/src/include/storage/store/rels_store.h index 9e4c94d793..9ac1481de5 100644 --- a/src/include/storage/store/rels_store.h +++ b/src/include/storage/store/rels_store.h @@ -2,7 +2,7 @@ #include "catalog/catalog.h" #include "common/file_utils.h" -#include "storage/stats/rels_statistics.h" +#include "storage/stats/rels_store_statistics.h" #include "storage/store/rel_table.h" namespace kuzu { @@ -10,7 +10,6 @@ namespace storage { // RelsStore stores adjacent rels of a node as well as the properties of rels in the system. class RelsStore { - public: RelsStore(BMFileHandle* metadataFH, const catalog::Catalog& catalog, MemoryManager& memoryManager, WAL* wal); @@ -49,7 +48,7 @@ class RelsStore { return relTables.at(tableID).get(); } - inline RelsStatistics* getRelsStatistics() { return relsStatistics.get(); } + inline RelsStoreStats* getRelsStatistics() { return relsStatistics.get(); } inline void removeRelTable(common::table_id_t tableID) { relTables.erase(tableID); @@ -94,7 +93,7 @@ class RelsStore { private: std::unordered_map> relTables; - std::unique_ptr relsStatistics; + std::unique_ptr relsStatistics; WAL* wal; }; diff --git a/src/include/storage/store/table_data.h b/src/include/storage/store/table_data.h index ebebb687c8..13a78f5e93 100644 --- a/src/include/storage/store/table_data.h +++ b/src/include/storage/store/table_data.h @@ -6,7 +6,7 @@ namespace kuzu { namespace storage { -class NodesStatisticsAndDeletedIDs; +class NodesStoreStatsAndDeletedIDs; class TableData { public: diff --git a/src/main/database.cpp b/src/main/database.cpp index 04af1094f8..ade2328c25 100644 --- a/src/main/database.cpp +++ b/src/main/database.cpp @@ -74,11 +74,11 @@ void Database::initDBDirAndCoreFilesIfNecessary() const { } if (!FileUtils::fileOrPathExists(StorageUtils::getNodesStatisticsAndDeletedIDsFilePath( databasePath, DBFileType::ORIGINAL))) { - NodesStatisticsAndDeletedIDs::saveInitialNodesStatisticsAndDeletedIDsToFile(databasePath); + NodesStoreStatsAndDeletedIDs::saveInitialNodesStatisticsAndDeletedIDsToFile(databasePath); } if (!FileUtils::fileOrPathExists( StorageUtils::getRelsStatisticsFilePath(databasePath, DBFileType::ORIGINAL))) { - RelsStatistics::saveInitialRelsStatisticsToFile(databasePath); + RelsStoreStats::saveInitialRelsStatisticsToFile(databasePath); } if (!FileUtils::fileOrPathExists( StorageUtils::getCatalogFilePath(databasePath, DBFileType::ORIGINAL))) { diff --git a/src/planner/plan/plan_copy.cpp b/src/planner/plan/plan_copy.cpp index d56f3ad97a..6ff3ffee14 100644 --- a/src/planner/plan/plan_copy.cpp +++ b/src/planner/plan/plan_copy.cpp @@ -68,7 +68,7 @@ std::unique_ptr Planner::planCopyFrom(const BoundStatement& stateme } std::unique_ptr Planner::planCopyTo(const Catalog& catalog, - const NodesStatisticsAndDeletedIDs& nodesStatistics, const RelsStatistics& relsStatistics, + const NodesStoreStatsAndDeletedIDs& nodesStatistics, const RelsStoreStats& relsStatistics, const BoundStatement& statement) { auto& copyClause = reinterpret_cast(statement); auto regularQuery = copyClause.getRegularQuery(); diff --git a/src/planner/planner.cpp b/src/planner/planner.cpp index b8ae52a00c..96119a54ad 100644 --- a/src/planner/planner.cpp +++ b/src/planner/planner.cpp @@ -18,7 +18,7 @@ namespace kuzu { namespace planner { std::unique_ptr Planner::getBestPlan(const Catalog& catalog, - const NodesStatisticsAndDeletedIDs& nodesStatistics, const RelsStatistics& relsStatistics, + const NodesStoreStatsAndDeletedIDs& nodesStatistics, const RelsStoreStats& relsStatistics, const BoundStatement& statement) { std::unique_ptr plan; switch (statement.getStatementType()) { @@ -72,7 +72,7 @@ std::unique_ptr Planner::getBestPlan(const Catalog& catalog, } std::vector> Planner::getAllPlans(const Catalog& catalog, - const NodesStatisticsAndDeletedIDs& nodesStatistics, const RelsStatistics& relsStatistics, + const NodesStoreStatsAndDeletedIDs& nodesStatistics, const RelsStoreStats& relsStatistics, const BoundStatement& statement) { // We enumerate all plans for our testing framework. This API should only be used for QUERY, // EXPLAIN, but not DDL or COPY. @@ -106,7 +106,7 @@ std::unique_ptr Planner::planCommentOn(const BoundStatement& statem } std::unique_ptr Planner::planExplain(const Catalog& catalog, - const NodesStatisticsAndDeletedIDs& nodesStatistics, const RelsStatistics& relsStatistics, + const NodesStoreStatsAndDeletedIDs& nodesStatistics, const RelsStoreStats& relsStatistics, const BoundStatement& statement) { auto& explain = reinterpret_cast(statement); auto statementToExplain = explain.getStatementToExplain(); @@ -129,8 +129,8 @@ std::unique_ptr Planner::planCreateMacro(const BoundStatement& stat } std::vector> Planner::getAllQueryPlans(const catalog::Catalog& catalog, - const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics, const BoundStatement& statement) { + const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics, const BoundStatement& statement) { auto planner = QueryPlanner(catalog, nodesStatistics, relsStatistics); std::vector> plans; for (auto& plan : planner.getAllPlans(statement)) { @@ -141,8 +141,8 @@ std::vector> Planner::getAllQueryPlans(const catalo } std::vector> Planner::getAllExplainPlans( - const catalog::Catalog& catalog, const storage::NodesStatisticsAndDeletedIDs& nodesStatistics, - const storage::RelsStatistics& relsStatistics, const BoundStatement& statement) { + const catalog::Catalog& catalog, const storage::NodesStoreStatsAndDeletedIDs& nodesStatistics, + const storage::RelsStoreStats& relsStatistics, const BoundStatement& statement) { auto& explainStatement = reinterpret_cast(statement); auto statementToExplain = explainStatement.getStatementToExplain(); auto plans = getAllPlans(catalog, nodesStatistics, relsStatistics, *statementToExplain); diff --git a/src/processor/map/map_delete.cpp b/src/processor/map/map_delete.cpp index 5610132060..56fbccd0de 100644 --- a/src/processor/map/map_delete.cpp +++ b/src/processor/map/map_delete.cpp @@ -48,7 +48,7 @@ static std::unique_ptr getRelDeleteExecutor( auto relIDPos = DataPos(inSchema.getExpressionPos(*rel.getInternalIDProperty())); auto statistics = store->getRelsStatistics(); if (rel.isMultiLabeled()) { - std::unordered_map> + std::unordered_map> tableIDToTableMap; for (auto tableID : rel.getTableIDs()) { auto table = store->getRelTable(tableID); diff --git a/src/processor/operator/persistent/copy_node.cpp b/src/processor/operator/persistent/copy_node.cpp index 7297f8fb04..950741e542 100644 --- a/src/processor/operator/persistent/copy_node.cpp +++ b/src/processor/operator/persistent/copy_node.cpp @@ -3,6 +3,7 @@ #include "common/exception/copy.h" #include "common/exception/message.h" #include "common/string_utils.h" +#include "storage/stats/nodes_store_statistics.h" #include "storage/store/string_column_chunk.h" using namespace kuzu::catalog; @@ -212,6 +213,12 @@ void CopyNode::finalize(ExecutionContext* context) { sharedState->fTable.get(), outputMsg, context->memoryManager); } +bool CopyNode::isCopyAllowed() const { + auto nodesStatistics = copyNodeInfo.table->getNodeStatisticsAndDeletedIDs(); + return nodesStatistics->getNodeStatisticsAndDeletedIDs(copyNodeInfo.table->getTableID()) + ->getNumTuples() == 0; +} + template<> uint64_t CopyNode::appendToPKIndex( PrimaryKeyIndexBuilder* pkIndex, ColumnChunk* chunk, offset_t startOffset, uint64_t numValues) { diff --git a/src/processor/operator/persistent/copy_rel.cpp b/src/processor/operator/persistent/copy_rel.cpp index 0ee48e2a28..8e7bcaed63 100644 --- a/src/processor/operator/persistent/copy_rel.cpp +++ b/src/processor/operator/persistent/copy_rel.cpp @@ -10,7 +10,7 @@ using namespace kuzu::storage; namespace kuzu { namespace processor { -CopyRelSharedState::CopyRelSharedState(table_id_t tableID, RelsStatistics* relsStatistics, +CopyRelSharedState::CopyRelSharedState(table_id_t tableID, RelsStoreStats* relsStatistics, std::unique_ptr fwdRelData, std::unique_ptr bwdRelData, MemoryManager* memoryManager) : tableID{tableID}, relsStatistics{relsStatistics}, fwdRelData{std::move(fwdRelData)}, diff --git a/src/storage/stats/CMakeLists.txt b/src/storage/stats/CMakeLists.txt index e3a7f2fd05..5b144e990b 100644 --- a/src/storage/stats/CMakeLists.txt +++ b/src/storage/stats/CMakeLists.txt @@ -1,9 +1,13 @@ add_library(kuzu_storage_stats OBJECT - nodes_statistics_and_deleted_ids.cpp + metadata_dah_info.cpp + node_table_statistics.cpp + nodes_store_statistics.cpp property_statistics.cpp - rels_statistics.cpp - table_statistics.cpp) + rel_table_statistics.cpp + rels_store_statistics.cpp + table_statistics.cpp +table_statistics_collection.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/storage/stats/metadata_dah_info.cpp b/src/storage/stats/metadata_dah_info.cpp new file mode 100644 index 0000000000..511d88a3ae --- /dev/null +++ b/src/storage/stats/metadata_dah_info.cpp @@ -0,0 +1,35 @@ +#include "storage/stats/metadata_dah_info.h" + +#include "common/ser_deser.h" + +using namespace kuzu::common; + +namespace kuzu { +namespace storage { + +std::unique_ptr MetadataDAHInfo::copy() { + auto result = std::make_unique(dataDAHPageIdx, nullDAHPageIdx); + result->childrenInfos.resize(childrenInfos.size()); + for (size_t i = 0; i < childrenInfos.size(); ++i) { + result->childrenInfos[i] = childrenInfos[i]->copy(); + } + return result; +} + +void MetadataDAHInfo::serialize(FileInfo* fileInfo, uint64_t& offset) const { + SerDeser::serializeValue(dataDAHPageIdx, fileInfo, offset); + SerDeser::serializeValue(nullDAHPageIdx, fileInfo, offset); + SerDeser::serializeVectorOfPtrs(childrenInfos, fileInfo, offset); +} + +std::unique_ptr MetadataDAHInfo::deserialize( + FileInfo* fileInfo, uint64_t& offset) { + auto metadataDAHInfo = std::make_unique(); + SerDeser::deserializeValue(metadataDAHInfo->dataDAHPageIdx, fileInfo, offset); + SerDeser::deserializeValue(metadataDAHInfo->nullDAHPageIdx, fileInfo, offset); + SerDeser::deserializeVectorOfPtrs(metadataDAHInfo->childrenInfos, fileInfo, offset); + return metadataDAHInfo; +} + +} // namespace storage +} // namespace kuzu diff --git a/src/storage/stats/nodes_statistics_and_deleted_ids.cpp b/src/storage/stats/node_table_statistics.cpp similarity index 64% rename from src/storage/stats/nodes_statistics_and_deleted_ids.cpp rename to src/storage/stats/node_table_statistics.cpp index 935be0b7cf..ab48dc2532 100644 --- a/src/storage/stats/nodes_statistics_and_deleted_ids.cpp +++ b/src/storage/stats/node_table_statistics.cpp @@ -1,17 +1,43 @@ -#include "storage/stats/nodes_statistics_and_deleted_ids.h" +#include "storage/stats/node_table_statistics.h" -#include "common/constants.h" +#include "common/ser_deser.h" #include "common/string_utils.h" +#include "storage/stats/table_statistics_collection.h" using namespace kuzu::common; namespace kuzu { namespace storage { +NodeTableStatsAndDeletedIDs::NodeTableStatsAndDeletedIDs(BMFileHandle* metadataFH, + const catalog::TableSchema& schema, BufferManager* bufferManager, WAL* wal) + : TableStatistics{schema}, tableID{schema.tableID} { + metadataDAHInfos.clear(); + metadataDAHInfos.reserve(schema.getNumProperties()); + for (auto property : schema.getProperties()) { + metadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( + *property->getDataType(), *metadataFH, bufferManager, wal)); + } +} + +NodeTableStatsAndDeletedIDs::NodeTableStatsAndDeletedIDs( + table_id_t tableID, offset_t maxNodeOffset, const std::vector& deletedNodeOffsets) + : NodeTableStatsAndDeletedIDs{tableID, maxNodeOffset, deletedNodeOffsets, {}} {} + +NodeTableStatsAndDeletedIDs::NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other) + : TableStatistics{other}, tableID{other.tableID}, adjListsAndColumns{other.adjListsAndColumns}, + hasDeletedNodesPerMorsel{other.hasDeletedNodesPerMorsel}, + deletedNodeOffsetsPerMorsel{other.deletedNodeOffsetsPerMorsel} { + metadataDAHInfos.clear(); + metadataDAHInfos.reserve(other.metadataDAHInfos.size()); + for (auto& metadataDAHInfo : other.metadataDAHInfos) { + metadataDAHInfos.push_back(metadataDAHInfo->copy()); + } +} + NodeTableStatsAndDeletedIDs::NodeTableStatsAndDeletedIDs(table_id_t tableID, offset_t maxNodeOffset, const std::vector& deletedNodeOffsets, - std::unordered_map>&& - propertyStatistics) + std::unordered_map>&& propertyStatistics) : tableID{tableID}, TableStatistics{TableType::NODE, getNumTuplesFromMaxNodeOffset(maxNodeOffset), tableID, std::move(propertyStatistics)} { @@ -130,7 +156,7 @@ void NodeTableStatsAndDeletedIDs::serializeInternal(FileInfo* fileInfo, uint64_t std::unique_ptr NodeTableStatsAndDeletedIDs::deserialize( table_id_t tableID, offset_t maxNodeOffset, FileInfo* fileInfo, uint64_t& offset) { - std::vector deletedNodeOffsets; + std::vector deletedNodeOffsets; std::vector> metadataDAHInfos; SerDeser::deserializeVector(deletedNodeOffsets, fileInfo, offset); SerDeser::deserializeVectorOfPtrs(metadataDAHInfos, fileInfo, offset); @@ -169,82 +195,5 @@ bool NodeTableStatsAndDeletedIDs::isDeleted(offset_t nodeOffset, uint64_t morsel return false; } -void NodesStatisticsAndDeletedIDs::setAdjListsAndColumns(RelsStore* relsStore) { - for (auto& tableIDStatistics : tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { - getNodeStatisticsAndDeletedIDs(tableIDStatistics.first) - ->setAdjListsAndColumns(relsStore->getAdjListsAndColumns(tableIDStatistics.first)); - } -} - -std::map NodesStatisticsAndDeletedIDs::getMaxNodeOffsetPerTable() const { - std::map retVal; - for (auto& tableIDStatistics : tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { - retVal[tableIDStatistics.first] = - getNodeStatisticsAndDeletedIDs(tableIDStatistics.first)->getMaxNodeOffset(); - } - return retVal; -} - -void NodesStatisticsAndDeletedIDs::setDeletedNodeOffsetsForMorsel( - transaction::Transaction* transaction, const std::shared_ptr& nodeOffsetVector, - table_id_t tableID) { - // NOTE: We can remove the lock under the following assumptions, that should currently hold: - // 1) During the phases when nodeStatisticsAndDeletedIDsPerTableForReadOnlyTrx change, which - // is during checkpointing, this function, which is called during scans, cannot be called. - // 2) In a read-only transaction, the same morsel cannot be scanned concurrently. 3) A - // write transaction cannot have two concurrent pipelines where one is writing and the - // other is reading nodeStatisticsAndDeletedIDsPerTableForWriteTrx. That is the pipeline in a - // query where scans/reads happen in a write transaction cannot run concurrently with the - // pipeline that performs an add/delete node. - lock_t lck{mtx}; - (transaction->isReadOnly() || tablesStatisticsContentForWriteTrx == nullptr) ? - getNodeStatisticsAndDeletedIDs(tableID)->setDeletedNodeOffsetsForMorsel(nodeOffsetVector) : - ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx - ->tableStatisticPerTable[tableID] - .get()) - ->setDeletedNodeOffsetsForMorsel(nodeOffsetVector); -} - -void NodesStatisticsAndDeletedIDs::addNodeStatisticsAndDeletedIDs( - catalog::NodeTableSchema* tableSchema) { - initTableStatisticsForWriteTrx(); - tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableSchema->tableID] = - constructTableStatistic(tableSchema); -} - -void NodesStatisticsAndDeletedIDs::addMetadataDAHInfo( - table_id_t tableID, const LogicalType& dataType) { - initTableStatisticsForWriteTrx(); - auto tableStats = dynamic_cast( - tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); - tableStats->addMetadataDAHInfoForColumn( - createMetadataDAHInfo(dataType, *metadataFH, bufferManager, wal)); -} - -void NodesStatisticsAndDeletedIDs::removeMetadataDAHInfo( - common::table_id_t tableID, common::column_id_t columnID) { - initTableStatisticsForWriteTrx(); - auto tableStats = dynamic_cast( - tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); - tableStats->removeMetadataDAHInfoForColumn(columnID); -} - -MetadataDAHInfo* NodesStatisticsAndDeletedIDs::getMetadataDAHInfo( - transaction::Transaction* transaction, common::table_id_t tableID, - common::column_id_t columnID) { - if (transaction->isWriteTransaction()) { - initTableStatisticsForWriteTrx(); - assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); - auto nodeTableStats = dynamic_cast( - tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); - return nodeTableStats->getMetadataDAHInfo(columnID); - } else { - assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID)); - auto nodeTableStats = dynamic_cast( - tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable[tableID].get()); - return nodeTableStats->getMetadataDAHInfo(columnID); - } -} - } // namespace storage } // namespace kuzu diff --git a/src/storage/stats/nodes_store_statistics.cpp b/src/storage/stats/nodes_store_statistics.cpp new file mode 100644 index 0000000000..62e4be012a --- /dev/null +++ b/src/storage/stats/nodes_store_statistics.cpp @@ -0,0 +1,101 @@ +#include "storage/stats/nodes_store_statistics.h" + +#include "common/constants.h" +#include "common/string_utils.h" + +using namespace kuzu::common; + +namespace kuzu { +namespace storage { + +offset_t NodesStoreStatsAndDeletedIDs::getMaxNodeOffset( + transaction::Transaction* transaction, table_id_t tableID) { + assert(transaction); + if (transaction->getType() == transaction::TransactionType::READ_ONLY) { + return getNodeStatisticsAndDeletedIDs(tableID)->getMaxNodeOffset(); + } else { + std::unique_lock xLck{mtx}; + return tablesStatisticsContentForWriteTrx == nullptr ? + getNodeStatisticsAndDeletedIDs(tableID)->getMaxNodeOffset() : + getNodeTableStats(transaction::TransactionType::WRITE, tableID) + ->getMaxNodeOffset(); + } +} + +void NodesStoreStatsAndDeletedIDs::setAdjListsAndColumns(RelsStore* relsStore) { + for (auto& tableIDStatistics : tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { + getNodeStatisticsAndDeletedIDs(tableIDStatistics.first) + ->setAdjListsAndColumns(relsStore->getAdjListsAndColumns(tableIDStatistics.first)); + } +} + +std::map NodesStoreStatsAndDeletedIDs::getMaxNodeOffsetPerTable() const { + std::map retVal; + for (auto& tableIDStatistics : tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { + retVal[tableIDStatistics.first] = + getNodeStatisticsAndDeletedIDs(tableIDStatistics.first)->getMaxNodeOffset(); + } + return retVal; +} + +void NodesStoreStatsAndDeletedIDs::setDeletedNodeOffsetsForMorsel( + transaction::Transaction* transaction, const std::shared_ptr& nodeOffsetVector, + table_id_t tableID) { + // NOTE: We can remove the lock under the following assumptions, that should currently hold: + // 1) During the phases when nodeStatisticsAndDeletedIDsPerTableForReadOnlyTrx change, which + // is during checkpointing, this function, which is called during scans, cannot be called. + // 2) In a read-only transaction, the same morsel cannot be scanned concurrently. 3) A + // write transaction cannot have two concurrent pipelines where one is writing and the + // other is reading nodeStatisticsAndDeletedIDsPerTableForWriteTrx. That is the pipeline in a + // query where scans/reads happen in a write transaction cannot run concurrently with the + // pipeline that performs an add/delete node. + lock_t lck{mtx}; + (transaction->isReadOnly() || tablesStatisticsContentForWriteTrx == nullptr) ? + getNodeStatisticsAndDeletedIDs(tableID)->setDeletedNodeOffsetsForMorsel(nodeOffsetVector) : + ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx + ->tableStatisticPerTable[tableID] + .get()) + ->setDeletedNodeOffsetsForMorsel(nodeOffsetVector); +} + +void NodesStoreStatsAndDeletedIDs::addNodeStatisticsAndDeletedIDs( + catalog::NodeTableSchema* tableSchema) { + initTableStatisticsForWriteTrx(); + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableSchema->tableID] = + constructTableStatistic(tableSchema); +} + +void NodesStoreStatsAndDeletedIDs::addMetadataDAHInfo( + table_id_t tableID, const LogicalType& dataType) { + initTableStatisticsForWriteTrx(); + auto tableStats = dynamic_cast( + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); + tableStats->addMetadataDAHInfoForColumn( + createMetadataDAHInfo(dataType, *metadataFH, bufferManager, wal)); +} + +void NodesStoreStatsAndDeletedIDs::removeMetadataDAHInfo(table_id_t tableID, column_id_t columnID) { + initTableStatisticsForWriteTrx(); + auto tableStats = dynamic_cast( + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); + tableStats->removeMetadataDAHInfoForColumn(columnID); +} + +MetadataDAHInfo* NodesStoreStatsAndDeletedIDs::getMetadataDAHInfo( + transaction::Transaction* transaction, table_id_t tableID, column_id_t columnID) { + if (transaction->isWriteTransaction()) { + initTableStatisticsForWriteTrx(); + assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); + auto nodeTableStats = dynamic_cast( + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); + return nodeTableStats->getMetadataDAHInfo(columnID); + } else { + assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID)); + auto nodeTableStats = dynamic_cast( + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable[tableID].get()); + return nodeTableStats->getMetadataDAHInfo(columnID); + } +} + +} // namespace storage +} // namespace kuzu diff --git a/src/storage/stats/property_statistics.cpp b/src/storage/stats/property_statistics.cpp index c12c345314..ff02316dd9 100644 --- a/src/storage/stats/property_statistics.cpp +++ b/src/storage/stats/property_statistics.cpp @@ -1,7 +1,7 @@ #include "storage/stats/property_statistics.h" #include "common/ser_deser.h" -#include "storage/stats/table_statistics.h" +#include "storage/stats/table_statistics_collection.h" namespace kuzu { namespace storage { diff --git a/src/storage/stats/rel_table_statistics.cpp b/src/storage/stats/rel_table_statistics.cpp new file mode 100644 index 0000000000..0e8b820170 --- /dev/null +++ b/src/storage/stats/rel_table_statistics.cpp @@ -0,0 +1,22 @@ +#include "storage/stats/rel_table_statistics.h" + +#include "common/ser_deser.h" + +using namespace kuzu::common; + +namespace kuzu { +namespace storage { + +void RelTableStats::serializeInternal(FileInfo* fileInfo, uint64_t& offset) { + SerDeser::serializeValue(nextRelOffset, fileInfo, offset); +} + +std::unique_ptr RelTableStats::deserialize( + uint64_t numRels, common::table_id_t tableID, FileInfo* fileInfo, uint64_t& offset) { + common::offset_t nextRelOffset; + SerDeser::deserializeValue(nextRelOffset, fileInfo, offset); + return std::make_unique(numRels, tableID, nextRelOffset); +} + +} // namespace storage +} // namespace kuzu diff --git a/src/storage/stats/rels_statistics.cpp b/src/storage/stats/rels_store_statistics.cpp similarity index 69% rename from src/storage/stats/rels_statistics.cpp rename to src/storage/stats/rels_store_statistics.cpp index 5647bf601a..7c50672dba 100644 --- a/src/storage/stats/rels_statistics.cpp +++ b/src/storage/stats/rels_store_statistics.cpp @@ -1,23 +1,14 @@ -#include "storage/stats/rels_statistics.h" +#include "storage/stats/rels_store_statistics.h" + +#include "storage/stats/rel_table_statistics.h" using namespace kuzu::common; namespace kuzu { namespace storage { -void RelTableStats::serializeInternal(FileInfo* fileInfo, uint64_t& offset) { - SerDeser::serializeValue(nextRelOffset, fileInfo, offset); -} - -std::unique_ptr RelTableStats::deserialize( - uint64_t numRels, common::table_id_t tableID, FileInfo* fileInfo, uint64_t& offset) { - common::offset_t nextRelOffset; - SerDeser::deserializeValue(nextRelOffset, fileInfo, offset); - return std::make_unique(numRels, tableID, nextRelOffset); -} - // We should only call this function after we call setNumRelsPerDirectionBoundTableID. -void RelsStatistics::setNumTuplesForTable(table_id_t relTableID, uint64_t numRels) { +void RelsStoreStats::setNumTuplesForTable(table_id_t relTableID, uint64_t numRels) { std::unique_lock lck{mtx}; initTableStatisticsForWriteTrxNoLock(); assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(relTableID)); @@ -28,7 +19,7 @@ void RelsStatistics::setNumTuplesForTable(table_id_t relTableID, uint64_t numRel relStatistics->setNumTuples(numRels); } -void RelsStatistics::updateNumRelsByValue(table_id_t relTableID, int64_t value) { +void RelsStoreStats::updateNumRelsByValue(table_id_t relTableID, int64_t value) { std::unique_lock lck{mtx}; initTableStatisticsForWriteTrxNoLock(); auto relStatistics = @@ -42,7 +33,7 @@ void RelsStatistics::updateNumRelsByValue(table_id_t relTableID, int64_t value) } } -offset_t RelsStatistics::getNextRelOffset( +offset_t RelsStoreStats::getNextRelOffset( transaction::Transaction* transaction, table_id_t tableID) { std::unique_lock lck{mtx}; auto& tableStatisticContent = diff --git a/src/storage/stats/table_statistics.cpp b/src/storage/stats/table_statistics.cpp index 42b2431a3e..4e3ea9a272 100644 --- a/src/storage/stats/table_statistics.cpp +++ b/src/storage/stats/table_statistics.cpp @@ -1,15 +1,39 @@ #include "storage/stats/table_statistics.h" -#include "storage/stats/nodes_statistics_and_deleted_ids.h" -#include "storage/stats/rels_statistics.h" -#include "storage/storage_utils.h" -#include "storage/store/column_chunk.h" +#include "catalog/table_schema.h" +#include "common/ser_deser.h" +#include "storage/stats/node_table_statistics.h" +#include "storage/stats/rel_table_statistics.h" using namespace kuzu::common; namespace kuzu { namespace storage { +TableStatistics::TableStatistics(const catalog::TableSchema& schema) + : tableType{schema.tableType}, numTuples{0}, tableID{schema.tableID} { + for (auto property : schema.getProperties()) { + propertyStatistics[property->getPropertyID()] = std::make_unique(); + } +} + +TableStatistics::TableStatistics(common::TableType tableType, uint64_t numTuples, + common::table_id_t tableID, + std::unordered_map>&& + propertyStatistics) + : tableType{tableType}, numTuples{numTuples}, tableID{tableID}, propertyStatistics{std::move( + propertyStatistics)} { + assert(numTuples != UINT64_MAX); +} + +TableStatistics::TableStatistics(const TableStatistics& other) + : tableType{other.tableType}, numTuples{other.numTuples}, tableID{other.tableID} { + for (auto& propertyStats : other.propertyStatistics) { + propertyStatistics[propertyStats.first] = + std::make_unique(*propertyStats.second.get()); + } +} + void TableStatistics::serialize(FileInfo* fileInfo, uint64_t& offset) { SerDeser::serializeValue(tableType, fileInfo, offset); SerDeser::serializeValue(numTuples, fileInfo, offset); @@ -51,123 +75,5 @@ std::unique_ptr TableStatistics::deserialize( return result; } -void MetadataDAHInfo::serialize(FileInfo* fileInfo, uint64_t& offset) const { - SerDeser::serializeValue(dataDAHPageIdx, fileInfo, offset); - SerDeser::serializeValue(nullDAHPageIdx, fileInfo, offset); - SerDeser::serializeVectorOfPtrs(childrenInfos, fileInfo, offset); -} - -std::unique_ptr MetadataDAHInfo::deserialize( - FileInfo* fileInfo, uint64_t& offset) { - auto metadataDAHInfo = std::make_unique(); - SerDeser::deserializeValue(metadataDAHInfo->dataDAHPageIdx, fileInfo, offset); - SerDeser::deserializeValue(metadataDAHInfo->nullDAHPageIdx, fileInfo, offset); - SerDeser::deserializeVectorOfPtrs(metadataDAHInfo->childrenInfos, fileInfo, offset); - return metadataDAHInfo; -} - -TablesStatistics::TablesStatistics(BMFileHandle* metadataFH) : metadataFH{metadataFH} { - tablesStatisticsContentForReadOnlyTrx = std::make_unique(); -} - -void TablesStatistics::readFromFile(const std::string& directory) { - readFromFile(directory, DBFileType::ORIGINAL); -} - -void TablesStatistics::readFromFile(const std::string& directory, DBFileType dbFileType) { - auto filePath = getTableStatisticsFilePath(directory, dbFileType); - auto fileInfo = FileUtils::openFile(filePath, O_RDONLY); - uint64_t offset = 0; - SerDeser::deserializeUnorderedMap( - tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable, fileInfo.get(), offset); -} - -void TablesStatistics::saveToFile(const std::string& directory, DBFileType dbFileType, - transaction::TransactionType transactionType) { - auto filePath = getTableStatisticsFilePath(directory, dbFileType); - auto fileInfo = FileUtils::openFile(filePath, O_WRONLY | O_CREAT); - uint64_t offset = 0; - auto& tablesStatisticsContent = (transactionType == transaction::TransactionType::READ_ONLY || - tablesStatisticsContentForWriteTrx == nullptr) ? - tablesStatisticsContentForReadOnlyTrx : - tablesStatisticsContentForWriteTrx; - SerDeser::serializeUnorderedMap( - tablesStatisticsContent->tableStatisticPerTable, fileInfo.get(), offset); -} - -void TablesStatistics::initTableStatisticsForWriteTrx() { - std::unique_lock xLck{mtx}; - initTableStatisticsForWriteTrxNoLock(); -} - -void TablesStatistics::initTableStatisticsForWriteTrxNoLock() { - if (tablesStatisticsContentForWriteTrx == nullptr) { - tablesStatisticsContentForWriteTrx = std::make_unique(); - for (auto& [tableID, tableStatistic] : - tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { - tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID] = - tableStatistic->copy(); - } - } -} - -PropertyStatistics& TablesStatistics::getPropertyStatisticsForTable( - const transaction::Transaction& transaction, table_id_t tableID, property_id_t propertyID) { - if (transaction.isReadOnly()) { - assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID)); - auto tableStatistics = - tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.at(tableID).get(); - return tableStatistics->getPropertyStatistics(propertyID); - } else { - initTableStatisticsForWriteTrx(); - assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); - auto tableStatistics = - tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get(); - return tableStatistics->getPropertyStatistics(propertyID); - } -} - -void TablesStatistics::setPropertyStatisticsForTable( - table_id_t tableID, property_id_t propertyID, PropertyStatistics stats) { - initTableStatisticsForWriteTrx(); - assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); - auto tableStatistics = - tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get(); - tableStatistics->setPropertyStatistics(propertyID, stats); -} - -std::unique_ptr TablesStatistics::createMetadataDAHInfo( - const LogicalType& dataType, BMFileHandle& metadataFH, BufferManager* bm, WAL* wal) { - auto metadataDAHInfo = std::make_unique(); - metadataDAHInfo->dataDAHPageIdx = - InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); - metadataDAHInfo->nullDAHPageIdx = - InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); - switch (dataType.getPhysicalType()) { - case PhysicalTypeID::STRUCT: { - auto fields = StructType::getFields(&dataType); - metadataDAHInfo->childrenInfos.resize(fields.size()); - for (auto i = 0u; i < fields.size(); i++) { - metadataDAHInfo->childrenInfos[i] = - createMetadataDAHInfo(*fields[i]->getType(), metadataFH, bm, wal); - } - } break; - case PhysicalTypeID::VAR_LIST: { - metadataDAHInfo->childrenInfos.push_back( - createMetadataDAHInfo(*VarListType::getChildType(&dataType), metadataFH, bm, wal)); - } break; - case PhysicalTypeID::STRING: { - auto childMetadataDAHInfo = std::make_unique(); - childMetadataDAHInfo->dataDAHPageIdx = - InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); - metadataDAHInfo->childrenInfos.push_back(std::move(childMetadataDAHInfo)); - } break; - default: { - // DO NOTHING. - } - } - return metadataDAHInfo; -} - } // namespace storage } // namespace kuzu diff --git a/src/storage/stats/table_statistics_collection.cpp b/src/storage/stats/table_statistics_collection.cpp new file mode 100644 index 0000000000..86b40867ed --- /dev/null +++ b/src/storage/stats/table_statistics_collection.cpp @@ -0,0 +1,116 @@ +#include "storage/stats/table_statistics_collection.h" + +#include "common/ser_deser.h" +#include "storage/storage_structure/disk_array.h" +#include "storage/store/column_chunk.h" + +using namespace kuzu::common; + +namespace kuzu { +namespace storage { + +TablesStatistics::TablesStatistics(BMFileHandle* metadataFH) : metadataFH{metadataFH} { + tablesStatisticsContentForReadOnlyTrx = std::make_unique(); +} + +void TablesStatistics::readFromFile(const std::string& directory) { + readFromFile(directory, DBFileType::ORIGINAL); +} + +void TablesStatistics::readFromFile(const std::string& directory, DBFileType dbFileType) { + auto filePath = getTableStatisticsFilePath(directory, dbFileType); + auto fileInfo = FileUtils::openFile(filePath, O_RDONLY); + uint64_t offset = 0; + SerDeser::deserializeUnorderedMap( + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable, fileInfo.get(), offset); +} + +void TablesStatistics::saveToFile(const std::string& directory, DBFileType dbFileType, + transaction::TransactionType transactionType) { + auto filePath = getTableStatisticsFilePath(directory, dbFileType); + auto fileInfo = FileUtils::openFile(filePath, O_WRONLY | O_CREAT); + uint64_t offset = 0; + auto& tablesStatisticsContent = (transactionType == transaction::TransactionType::READ_ONLY || + tablesStatisticsContentForWriteTrx == nullptr) ? + tablesStatisticsContentForReadOnlyTrx : + tablesStatisticsContentForWriteTrx; + SerDeser::serializeUnorderedMap( + tablesStatisticsContent->tableStatisticPerTable, fileInfo.get(), offset); +} + +void TablesStatistics::initTableStatisticsForWriteTrx() { + std::unique_lock xLck{mtx}; + initTableStatisticsForWriteTrxNoLock(); +} + +void TablesStatistics::initTableStatisticsForWriteTrxNoLock() { + if (tablesStatisticsContentForWriteTrx == nullptr) { + tablesStatisticsContentForWriteTrx = std::make_unique(); + for (auto& [tableID, tableStatistic] : + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID] = + tableStatistic->copy(); + } + } +} + +PropertyStatistics& TablesStatistics::getPropertyStatisticsForTable( + const transaction::Transaction& transaction, table_id_t tableID, property_id_t propertyID) { + if (transaction.isReadOnly()) { + assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID)); + auto tableStatistics = + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.at(tableID).get(); + return tableStatistics->getPropertyStatistics(propertyID); + } else { + initTableStatisticsForWriteTrx(); + assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); + auto tableStatistics = + tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get(); + return tableStatistics->getPropertyStatistics(propertyID); + } +} + +void TablesStatistics::setPropertyStatisticsForTable( + table_id_t tableID, property_id_t propertyID, PropertyStatistics stats) { + initTableStatisticsForWriteTrx(); + assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); + auto tableStatistics = + tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get(); + tableStatistics->setPropertyStatistics(propertyID, stats); +} + +std::unique_ptr TablesStatistics::createMetadataDAHInfo( + const LogicalType& dataType, BMFileHandle& metadataFH, BufferManager* bm, WAL* wal) { + auto metadataDAHInfo = std::make_unique(); + metadataDAHInfo->dataDAHPageIdx = + InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); + metadataDAHInfo->nullDAHPageIdx = + InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); + switch (dataType.getPhysicalType()) { + case PhysicalTypeID::STRUCT: { + auto fields = StructType::getFields(&dataType); + metadataDAHInfo->childrenInfos.resize(fields.size()); + for (auto i = 0u; i < fields.size(); i++) { + metadataDAHInfo->childrenInfos[i] = + createMetadataDAHInfo(*fields[i]->getType(), metadataFH, bm, wal); + } + } break; + case PhysicalTypeID::VAR_LIST: { + metadataDAHInfo->childrenInfos.push_back( + createMetadataDAHInfo(*VarListType::getChildType(&dataType), metadataFH, bm, wal)); + } break; + case PhysicalTypeID::STRING: { + auto childMetadataDAHInfo = std::make_unique(); + childMetadataDAHInfo->dataDAHPageIdx = + InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); + metadataDAHInfo->childrenInfos.push_back(std::move(childMetadataDAHInfo)); + } break; + default: { + // DO NOTHING. + } + } + return metadataDAHInfo; +} + +} // namespace storage +} // namespace kuzu diff --git a/src/storage/store/node_table.cpp b/src/storage/store/node_table.cpp index f826d67a25..c206966390 100644 --- a/src/storage/store/node_table.cpp +++ b/src/storage/store/node_table.cpp @@ -13,7 +13,7 @@ namespace kuzu { namespace storage { NodeTable::NodeTable(BMFileHandle* dataFH, BMFileHandle* metadataFH, - NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs, BufferManager& bufferManager, + NodesStoreStatsAndDeletedIDs* nodesStatisticsAndDeletedIDs, BufferManager& bufferManager, WAL* wal, NodeTableSchema* nodeTableSchema) : nodesStatisticsAndDeletedIDs{nodesStatisticsAndDeletedIDs}, pkColumnID{nodeTableSchema->getColumnID(nodeTableSchema->getPrimaryKeyPropertyID())}, diff --git a/src/storage/store/nodes_store.cpp b/src/storage/store/nodes_store.cpp index 02058d471d..47508a4d41 100644 --- a/src/storage/store/nodes_store.cpp +++ b/src/storage/store/nodes_store.cpp @@ -9,7 +9,7 @@ NodesStore::NodesStore(BMFileHandle* dataFH, BMFileHandle* metadataFH, const Cat BufferManager& bufferManager, WAL* wal) : wal{wal}, dataFH{dataFH}, metadataFH{metadataFH} { nodesStatisticsAndDeletedIDs = - std::make_unique(metadataFH, &bufferManager, wal); + std::make_unique(metadataFH, &bufferManager, wal); for (auto& schema : catalog.getReadOnlyVersion()->getNodeTableSchemas()) { auto nodeTableSchema = reinterpret_cast(schema); nodeTables[schema->tableID] = std::make_unique(dataFH, metadataFH, diff --git a/src/storage/store/rels_store.cpp b/src/storage/store/rels_store.cpp index e21460a9f7..06c8ff78e9 100644 --- a/src/storage/store/rels_store.cpp +++ b/src/storage/store/rels_store.cpp @@ -9,7 +9,7 @@ namespace storage { RelsStore::RelsStore( BMFileHandle* metadataFH, const Catalog& catalog, MemoryManager& memoryManager, WAL* wal) : wal{wal} { - relsStatistics = std::make_unique(metadataFH, wal->getDirectory()); + relsStatistics = std::make_unique(metadataFH, wal->getDirectory()); for (auto& relTableSchema : catalog.getReadOnlyVersion()->getRelTableSchemas()) { relTables.emplace(relTableSchema->tableID, std::make_unique(catalog, relTableSchema->tableID, memoryManager, wal)); diff --git a/src/storage/store/table_data.cpp b/src/storage/store/table_data.cpp index 338c34218a..e37169b561 100644 --- a/src/storage/store/table_data.cpp +++ b/src/storage/store/table_data.cpp @@ -1,6 +1,6 @@ #include "storage/store/table_data.h" -#include "storage/stats/nodes_statistics_and_deleted_ids.h" +#include "storage/stats/nodes_store_statistics.h" using namespace kuzu::common; using namespace kuzu::transaction; @@ -17,7 +17,7 @@ TableData::TableData(BMFileHandle* dataFH, BMFileHandle* metadataFH, table_id_t for (auto i = 0u; i < properties.size(); i++) { auto property = properties[i]; auto metadataDAHInfo = - dynamic_cast(tablesStatistics) + dynamic_cast(tablesStatistics) ->getMetadataDAHInfo(Transaction::getDummyWriteTrx().get(), tableID, i); columns.push_back( NodeColumnFactory::createNodeColumn(*property->getDataType(), *metadataDAHInfo, dataFH, @@ -114,7 +114,7 @@ void TableData::append(kuzu::storage::NodeGroup* nodeGroup) { void TableData::addColumn(transaction::Transaction* transaction, const catalog::Property& property, ValueVector* defaultValueVector, TablesStatistics* tablesStats) { - auto metadataDAHInfo = dynamic_cast(tablesStats) + auto metadataDAHInfo = dynamic_cast(tablesStats) ->getMetadataDAHInfo(transaction, tableID, columns.size()); auto nodeColumn = NodeColumnFactory::createNodeColumn(*property.getDataType(), *metadataDAHInfo, dataFH, metadataFH, bufferManager, wal, transaction, diff --git a/src/storage/wal_replayer.cpp b/src/storage/wal_replayer.cpp index 93bc03b3c7..553a863baf 100644 --- a/src/storage/wal_replayer.cpp +++ b/src/storage/wal_replayer.cpp @@ -193,7 +193,7 @@ void WALReplayer::replayNodeTableRecord(const WALRecord& walRecord) { void WALReplayer::replayRelTableRecord(const WALRecord& walRecord, bool isRdf) { if (isCheckpoint) { // See comments for NODE_TABLE_RECORD. - auto nodesStatistics = std::make_unique( + auto nodesStatistics = std::make_unique( nullptr /* metadataFH */, nullptr /* bufferManager */, wal, isRdf ? DBFileType::WAL_VERSION : DBFileType::ORIGINAL); auto maxNodeOffsetPerTable = nodesStatistics->getMaxNodeOffsetPerTable(); @@ -349,7 +349,7 @@ void WALReplayer::replayCopyRelRecord(const kuzu::storage::WALRecord& walRecord) return; } auto nodesStatisticsAndDeletedIDsForCheckPointing = - std::make_unique( + std::make_unique( nullptr /* metadataFH */, nullptr /* bufferManager */, wal); auto maxNodeOffsetPerTable = nodesStatisticsAndDeletedIDsForCheckPointing->getMaxNodeOffsetPerTable();