diff --git a/CMakeLists.txt b/CMakeLists.txt index 118bc9760b..725084bfed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.11) -project(Kuzu VERSION 0.0.8.9 LANGUAGES CXX) +project(Kuzu VERSION 0.0.8.10 LANGUAGES CXX) find_package(Threads REQUIRED) diff --git a/src/include/processor/operator/scan/scan_rel_table.h b/src/include/processor/operator/scan/scan_rel_table.h index 49c5052b16..3cc4c8bd75 100644 --- a/src/include/processor/operator/scan/scan_rel_table.h +++ b/src/include/processor/operator/scan/scan_rel_table.h @@ -27,11 +27,11 @@ struct ScanRelTalePosInfo { struct RelTableScanInfo { storage::RelTableDataType relTableDataType; storage::DirectedRelTableData* tableData; - storage::RelStatistics* relStats; + storage::RelTableStats* relStats; std::vector propertyIds; RelTableScanInfo(storage::RelTableDataType relTableDataType, - storage::DirectedRelTableData* tableData, storage::RelStatistics* relStats, + storage::DirectedRelTableData* tableData, storage::RelTableStats* relStats, std::vector propertyIds) : relTableDataType{relTableDataType}, tableData{tableData}, relStats{relStats}, propertyIds{std::move(propertyIds)} {} diff --git a/src/include/storage/stats/nodes_statistics_and_deleted_ids.h b/src/include/storage/stats/nodes_statistics_and_deleted_ids.h index e656ca45c5..7e4b3bd361 100644 --- a/src/include/storage/stats/nodes_statistics_and_deleted_ids.h +++ b/src/include/storage/stats/nodes_statistics_and_deleted_ids.h @@ -9,25 +9,28 @@ namespace kuzu { namespace storage { -class NodeStatisticsAndDeletedIDs : public TableStatistics { +class NodeTableStatsAndDeletedIDs : public TableStatistics { public: - explicit NodeStatisticsAndDeletedIDs(const catalog::TableSchema& schema) + explicit NodeTableStatsAndDeletedIDs(const catalog::TableSchema& schema) : TableStatistics{schema}, tableID{schema.tableID} {} - NodeStatisticsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, + NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, + const std::vector& deletedNodeOffsets) + : NodeTableStatsAndDeletedIDs{tableID, maxNodeOffset, deletedNodeOffsets, {}} {} + NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, std::unordered_map>&& propertyStatistics) - : NodeStatisticsAndDeletedIDs(tableID, maxNodeOffset, + : NodeTableStatsAndDeletedIDs(tableID, maxNodeOffset, std::vector() /* no deleted node offsets during initial loading */, std::move(propertyStatistics)) {} - NodeStatisticsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, + NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, const std::vector& deletedNodeOffsets, std::unordered_map>&& propertyStatistics); - NodeStatisticsAndDeletedIDs(const NodeStatisticsAndDeletedIDs& other) = default; + NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other) = default; inline common::offset_t getMaxNodeOffset() { return getMaxNodeOffsetFromNumTuples(getNumTuples()); @@ -60,6 +63,10 @@ class NodeStatisticsAndDeletedIDs : public TableStatistics { return numTuples == 0 ? UINT64_MAX : numTuples - 1; } + void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; + static std::unique_ptr deserialize(common::table_id_t tableID, + common::offset_t maxNodeOffset, common::FileInfo* fileInfo, uint64_t& offset); + private: void errorIfNodeHasEdges(common::offset_t nodeOffset); @@ -92,12 +99,12 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { // Should be used only by tests; explicit NodesStatisticsAndDeletedIDs( - std::unordered_map>& + std::unordered_map>& nodesStatisticsAndDeletedIDs); - inline NodeStatisticsAndDeletedIDs* getNodeStatisticsAndDeletedIDs( + inline NodeTableStatsAndDeletedIDs* getNodeStatisticsAndDeletedIDs( common::table_id_t tableID) const { - return (NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForReadOnlyTrx + return (NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForReadOnlyTrx ->tableStatisticPerTable[tableID] .get(); } @@ -114,7 +121,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { inline void setNumTuplesForTable(common::table_id_t tableID, uint64_t numTuples) override { initTableStatisticsForWriteTrx(); - ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx + ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx ->tableStatisticPerTable[tableID] .get()) ->setNumTuples(numTuples); @@ -129,7 +136,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { std::unique_lock xLck{mtx}; return tablesStatisticsContentForWriteTrx == nullptr ? getNodeStatisticsAndDeletedIDs(tableID)->getMaxNodeOffset() : - ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx + ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx ->tableStatisticPerTable[tableID] .get()) ->getMaxNodeOffset(); @@ -148,7 +155,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { common::offset_t addNode(common::table_id_t tableID) { lock_t lck{mtx}; initTableStatisticsForWriteTrxNoLock(); - return ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx + return ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx ->tableStatisticPerTable[tableID] .get()) ->addNode(); @@ -158,7 +165,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { void deleteNode(common::table_id_t tableID, common::offset_t nodeOffset) { lock_t lck{mtx}; initTableStatisticsForWriteTrxNoLock(); - ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx + ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx ->tableStatisticPerTable[tableID] .get()) ->deleteNode(nodeOffset); @@ -174,33 +181,21 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { void addNodeStatisticsAndDeletedIDs(catalog::NodeTableSchema* tableSchema); protected: - inline std::string getTableTypeForPrinting() const override { - return "NodesStatisticsAndDeletedIDs"; - } - inline std::unique_ptr constructTableStatistic( catalog::TableSchema* tableSchema) override { - return std::make_unique(*tableSchema); + return std::make_unique(*tableSchema); } inline std::unique_ptr constructTableStatistic( TableStatistics* tableStatistics) override { - return std::make_unique( - *(NodeStatisticsAndDeletedIDs*)tableStatistics); + return std::make_unique( + *(NodeTableStatsAndDeletedIDs*)tableStatistics); } inline std::string getTableStatisticsFilePath( const std::string& directory, common::DBFileType dbFileType) override { return StorageUtils::getNodesStatisticsAndDeletedIDsFilePath(directory, dbFileType); } - - std::unique_ptr deserializeTableStatistics(uint64_t numTuples, - std::unordered_map>&& - propertyStats, - uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) override; - - void serializeTableStatistics( - TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) override; }; } // namespace storage diff --git a/src/include/storage/stats/rels_statistics.h b/src/include/storage/stats/rels_statistics.h index 6694ba0239..58ca2f75fe 100644 --- a/src/include/storage/stats/rels_statistics.h +++ b/src/include/storage/stats/rels_statistics.h @@ -9,20 +9,28 @@ namespace kuzu { namespace storage { class RelsStatistics; -class RelStatistics : public TableStatistics { +class RelTableStats : public TableStatistics { friend class RelsStatistics; public: - RelStatistics(const catalog::TableSchema& tableSchema) + RelTableStats(const catalog::TableSchema& tableSchema) : TableStatistics{tableSchema}, nextRelOffset{0} {} - RelStatistics(uint64_t numRels, + RelTableStats(uint64_t numRels, common::table_id_t tableID, common::offset_t nextRelOffset) + : TableStatistics{common::TableType::REL, numRels, tableID, {}}, nextRelOffset{ + nextRelOffset} {} + RelTableStats(uint64_t numRels, common::table_id_t tableID, std::unordered_map>&& propertyStats, common::offset_t nextRelOffset) - : TableStatistics{numRels, std::move(propertyStats)}, nextRelOffset{nextRelOffset} {} + : TableStatistics{common::TableType::REL, numRels, tableID, std::move(propertyStats)}, + nextRelOffset{nextRelOffset} {} inline common::offset_t getNextRelOffset() const { return nextRelOffset; } + void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; + static std::unique_ptr deserialize( + uint64_t numRels, common::table_id_t tableID, common::FileInfo* fileInfo, uint64_t& offset); + private: common::offset_t nextRelOffset; }; @@ -40,7 +48,7 @@ class RelsStatistics : public TablesStatistics { } // Should only be used by tests. - explicit RelsStatistics(std::unordered_map> + explicit RelsStatistics(std::unordered_map> relStatisticPerTable_); static inline void saveInitialRelsStatisticsToFile(const std::string& directory) { @@ -48,10 +56,10 @@ class RelsStatistics : public TablesStatistics { directory, common::DBFileType::ORIGINAL, transaction::TransactionType::READ_ONLY); } - inline RelStatistics* getRelStatistics(common::table_id_t tableID) const { + inline RelTableStats* getRelStatistics(common::table_id_t tableID) const { auto& tableStatisticPerTable = tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable; - return (RelStatistics*)tableStatisticPerTable[tableID].get(); + return (RelTableStats*)tableStatisticPerTable[tableID].get(); } void setNumTuplesForTable(common::table_id_t relTableID, uint64_t numRels) override; @@ -62,16 +70,14 @@ class RelsStatistics : public TablesStatistics { transaction::Transaction* transaction, common::table_id_t tableID); protected: - inline std::string getTableTypeForPrinting() const override { return "RelsStatistics"; } - inline std::unique_ptr constructTableStatistic( catalog::TableSchema* tableSchema) override { - return std::make_unique(*tableSchema); + return std::make_unique(*tableSchema); } inline std::unique_ptr constructTableStatistic( TableStatistics* tableStatistics) override { - return std::make_unique(*(RelStatistics*)tableStatistics); + return std::make_unique(*(RelTableStats*)tableStatistics); } inline std::string getTableStatisticsFilePath( @@ -80,18 +86,10 @@ class RelsStatistics : public TablesStatistics { } inline void increaseNextRelOffset(common::table_id_t relTableID, uint64_t numTuples) { - ((RelStatistics*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(relTableID) + ((RelTableStats*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(relTableID) .get()) ->nextRelOffset += numTuples; } - - std::unique_ptr deserializeTableStatistics(uint64_t numTuples, - std::unordered_map>&& - propertyStats, - uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) override; - - void serializeTableStatistics( - TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) override; }; } // namespace storage diff --git a/src/include/storage/stats/table_statistics.h b/src/include/storage/stats/table_statistics.h index 4b1d2d4c1a..3c46079886 100644 --- a/src/include/storage/stats/table_statistics.h +++ b/src/include/storage/stats/table_statistics.h @@ -11,37 +11,36 @@ namespace kuzu { namespace storage { -using lock_t = std::unique_lock; -using atomic_uint64_vec_t = std::vector>; - class TableStatistics { public: - virtual ~TableStatistics() = default; - - explicit TableStatistics(const catalog::TableSchema& schema) : numTuples{0} { + explicit TableStatistics(const catalog::TableSchema& schema) + : tableType{schema.tableType}, numTuples{0}, tableID{schema.tableID} { for (auto property : schema.getProperties()) { propertyStatistics[property->getPropertyID()] = std::make_unique(); } } - explicit TableStatistics(uint64_t numTuples, + explicit TableStatistics(common::TableType tableType, uint64_t numTuples, + common::table_id_t tableID, std::unordered_map>&& propertyStatistics) - : numTuples{numTuples}, propertyStatistics{std::move(propertyStatistics)} { + : numTuples{numTuples}, tableID{tableID}, propertyStatistics{ + std::move(propertyStatistics)} { assert(numTuples != UINT64_MAX); } - explicit TableStatistics(const TableStatistics& other) : numTuples{other.numTuples} { + explicit TableStatistics(const TableStatistics& other) + : tableType{other.tableType}, numTuples{other.numTuples}, tableID{other.tableID} { for (auto& propertyStats : other.propertyStatistics) { propertyStatistics[propertyStats.first] = std::make_unique(*propertyStats.second.get()); } } - inline bool isEmpty() const { return numTuples == 0; } + virtual ~TableStatistics() = default; + inline bool isEmpty() const { return numTuples == 0; } inline uint64_t getNumTuples() const { return numTuples; } - virtual inline void setNumTuples(uint64_t numTuples_) { assert(numTuples_ != UINT64_MAX); numTuples = numTuples_; @@ -51,19 +50,24 @@ class TableStatistics { assert(propertyStatistics.contains(propertyID)); return *(propertyStatistics.at(propertyID)); } - inline const std::unordered_map>& getPropertyStatistics() { return propertyStatistics; } - inline void setPropertyStatistics( common::property_id_t propertyID, PropertyStatistics newStats) { propertyStatistics[propertyID] = std::make_unique(newStats); } + void serialize(common::FileInfo* fileInfo, uint64_t& offset); + static std::unique_ptr deserialize( + common::FileInfo* fileInfo, uint64_t& offset); + virtual void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) = 0; + private: + common::TableType tableType; uint64_t numTuples; + common::table_id_t tableID; std::unordered_map> propertyStatistics; }; @@ -88,7 +92,7 @@ class TablesStatistics { inline bool hasUpdates() { return tablesStatisticsContentForWriteTrx != nullptr; } inline void checkpointInMemoryIfNecessary() { - lock_t lck{mtx}; + std::unique_lock lck{mtx}; tablesStatisticsContentForReadOnlyTrx = std::move(tablesStatisticsContentForWriteTrx); } @@ -110,52 +114,22 @@ class TablesStatistics { ->getNumTuples(); } - inline PropertyStatistics& getPropertyStatisticsForTable( - const transaction::Transaction& transaction, common::table_id_t tableID, - common::property_id_t propertyID) { - if (transaction.isReadOnly()) { - assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID)); - auto tableStatistics = - tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.at(tableID).get(); - return tableStatistics->getPropertyStatistics(propertyID); - } else { - initTableStatisticsForWriteTrx(); - assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); - auto tableStatistics = - tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get(); - return tableStatistics->getPropertyStatistics(propertyID); - } - } + PropertyStatistics& getPropertyStatisticsForTable(const transaction::Transaction& transaction, + common::table_id_t tableID, common::property_id_t propertyID); void setPropertyStatisticsForTable( - common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats) { - initTableStatisticsForWriteTrx(); - assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); - auto tableStatistics = - tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get(); - tableStatistics->setPropertyStatistics(propertyID, stats); - } + common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats); protected: - virtual inline std::string getTableTypeForPrinting() const = 0; - - virtual inline std::unique_ptr constructTableStatistic( + virtual std::unique_ptr constructTableStatistic( catalog::TableSchema* tableSchema) = 0; - virtual inline std::unique_ptr constructTableStatistic( + virtual std::unique_ptr constructTableStatistic( TableStatistics* tableStatistics) = 0; - virtual inline std::string getTableStatisticsFilePath( + virtual std::string getTableStatisticsFilePath( const std::string& directory, common::DBFileType dbFileType) = 0; - virtual std::unique_ptr deserializeTableStatistics(uint64_t numTuples, - std::unordered_map>&& - propertyStats, - uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) = 0; - - virtual void serializeTableStatistics( - TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) = 0; - void readFromFile(const std::string& directory); void readFromFile(const std::string& directory, common::DBFileType dbFileType); diff --git a/src/include/storage/storage_info.h b/src/include/storage/storage_info.h index 241855b551..ee986341b3 100644 --- a/src/include/storage/storage_info.h +++ b/src/include/storage/storage_info.h @@ -12,12 +12,12 @@ using storage_version_t = uint64_t; struct StorageVersionInfo { static std::unordered_map getStorageVersionInfo() { - return {{"0.0.8.9", 22}, {"0.0.8.8", 21}, {"0.0.8.7", 21}, {"0.0.8.6", 20}, {"0.0.8.5", 19}, - {"0.0.8.4", 19}, {"0.0.8.3", 19}, {"0.0.8.2", 19}, {"0.0.8.1", 18}, {"0.0.8", 17}, - {"0.0.7.1", 16}, {"0.0.7", 15}, {"0.0.6.5", 14}, {"0.0.6.4", 13}, {"0.0.6.3", 12}, - {"0.0.6.2", 11}, {"0.0.6.1", 10}, {"0.0.6", 9}, {"0.0.5", 8}, {"0.0.4", 7}, - {"0.0.3.5", 6}, {"0.0.3.4", 5}, {"0.0.3.3", 4}, {"0.0.3.2", 3}, {"0.0.3.1", 2}, - {"0.0.3", 1}}; + return {{"0.0.8.10", 22}, {"0.0.8.9", 22}, {"0.0.8.8", 21}, {"0.0.8.7", 21}, + {"0.0.8.6", 20}, {"0.0.8.5", 19}, {"0.0.8.4", 19}, {"0.0.8.3", 19}, {"0.0.8.2", 19}, + {"0.0.8.1", 18}, {"0.0.8", 17}, {"0.0.7.1", 16}, {"0.0.7", 15}, {"0.0.6.5", 14}, + {"0.0.6.4", 13}, {"0.0.6.3", 12}, {"0.0.6.2", 11}, {"0.0.6.1", 10}, {"0.0.6", 9}, + {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3.5", 6}, {"0.0.3.4", 5}, {"0.0.3.3", 4}, + {"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}}; } static storage_version_t getStorageVersion(); diff --git a/src/include/storage/store/rel_table.h b/src/include/storage/store/rel_table.h index c5c71d47e9..c180d7a78f 100644 --- a/src/include/storage/store/rel_table.h +++ b/src/include/storage/store/rel_table.h @@ -41,7 +41,7 @@ class ListsUpdateIteratorsForDirection { struct RelTableScanState { public: - RelTableScanState(storage::RelStatistics* relStats, + RelTableScanState(storage::RelTableStats* relStats, std::vector propertyIds, RelTableDataType relTableDataType) : relStats{relStats}, relTableDataType{relTableDataType}, propertyIds{ std::move(propertyIds)} { @@ -60,7 +60,7 @@ struct RelTableScanState { syncState->hasMoreAndSwitchSourceIfNecessary(); } - RelStatistics* relStats; + RelTableStats* relStats; RelTableDataType relTableDataType; std::vector propertyIds; // sync state between adj and property lists diff --git a/src/storage/stats/nodes_statistics_and_deleted_ids.cpp b/src/storage/stats/nodes_statistics_and_deleted_ids.cpp index b9878f7138..b22dc2b899 100644 --- a/src/storage/stats/nodes_statistics_and_deleted_ids.cpp +++ b/src/storage/stats/nodes_statistics_and_deleted_ids.cpp @@ -8,11 +8,12 @@ using namespace kuzu::common; namespace kuzu { namespace storage { -NodeStatisticsAndDeletedIDs::NodeStatisticsAndDeletedIDs(table_id_t tableID, offset_t maxNodeOffset, +NodeTableStatsAndDeletedIDs::NodeTableStatsAndDeletedIDs(table_id_t tableID, offset_t maxNodeOffset, const std::vector& deletedNodeOffsets, std::unordered_map>&& propertyStatistics) - : tableID{tableID}, TableStatistics{getNumTuplesFromMaxNodeOffset(maxNodeOffset), + : tableID{tableID}, TableStatistics{TableType::NODE, + getNumTuplesFromMaxNodeOffset(maxNodeOffset), tableID, std::move(propertyStatistics)} { if (getNumTuples() > 0) { hasDeletedNodesPerMorsel.resize((getNumTuples() / DEFAULT_VECTOR_CAPACITY) + 1, false); @@ -29,14 +30,13 @@ NodeStatisticsAndDeletedIDs::NodeStatisticsAndDeletedIDs(table_id_t tableID, off } } -offset_t NodeStatisticsAndDeletedIDs::addNode() { +offset_t NodeTableStatsAndDeletedIDs::addNode() { if (deletedNodeOffsetsPerMorsel.empty()) { setNumTuples(getNumTuples() + 1); return getMaxNodeOffset(); } // We return the last element in the first non-empty morsel we find auto iter = deletedNodeOffsetsPerMorsel.begin(); - std::set deletedNodeOffsets = iter->second; auto nodeOffsetIter = iter->second.end(); nodeOffsetIter--; offset_t retVal = *nodeOffsetIter; @@ -48,7 +48,7 @@ offset_t NodeStatisticsAndDeletedIDs::addNode() { return retVal; } -void NodeStatisticsAndDeletedIDs::deleteNode(offset_t nodeOffset) { +void NodeTableStatsAndDeletedIDs::deleteNode(offset_t nodeOffset) { // TODO(Semih/Guodong): This check can go into nodeOffsetsInfoForWriteTrx->deleteNode // once errorIfNodeHasEdges is removed. This function would then just be a wrapper to init // nodeOffsetsInfoForWriteTrx before calling delete on it. @@ -76,7 +76,7 @@ void NodeStatisticsAndDeletedIDs::deleteNode(offset_t nodeOffset) { // Note: this function will always be called right after scanNodeID, so we have the guarantee // that the nodeOffsetVector is always unselected. -void NodeStatisticsAndDeletedIDs::setDeletedNodeOffsetsForMorsel( +void NodeTableStatsAndDeletedIDs::setDeletedNodeOffsetsForMorsel( const std::shared_ptr& nodeOffsetVector) { auto morselIdxAndOffset = StorageUtils::getQuotientRemainder( nodeOffsetVector->readNodeOffset(0), DEFAULT_VECTOR_CAPACITY); @@ -106,14 +106,14 @@ void NodeStatisticsAndDeletedIDs::setDeletedNodeOffsetsForMorsel( } } -void NodeStatisticsAndDeletedIDs::setNumTuples(uint64_t numTuples) { +void NodeTableStatsAndDeletedIDs::setNumTuples(uint64_t numTuples) { TableStatistics::setNumTuples(numTuples); if (numTuples > 0) { hasDeletedNodesPerMorsel.resize((numTuples / DEFAULT_VECTOR_CAPACITY) + 1, false); } } -std::vector NodeStatisticsAndDeletedIDs::getDeletedNodeOffsets() { +std::vector NodeTableStatsAndDeletedIDs::getDeletedNodeOffsets() { std::vector retVal; auto morselIter = deletedNodeOffsetsPerMorsel.begin(); while (morselIter != deletedNodeOffsetsPerMorsel.end()) { @@ -123,7 +123,19 @@ std::vector NodeStatisticsAndDeletedIDs::getDeletedNodeOffsets() { return retVal; } -void NodeStatisticsAndDeletedIDs::errorIfNodeHasEdges(offset_t nodeOffset) { +void NodeTableStatsAndDeletedIDs::serializeInternal(FileInfo* fileInfo, uint64_t& offset) { + SerDeser::serializeVector(getDeletedNodeOffsets(), fileInfo, offset); +} + +std::unique_ptr NodeTableStatsAndDeletedIDs::deserialize( + table_id_t tableID, offset_t maxNodeOffset, FileInfo* fileInfo, uint64_t& offset) { + std::vector deletedNodeOffsets; + SerDeser::deserializeVector(deletedNodeOffsets, fileInfo, offset); + return std::make_unique( + tableID, maxNodeOffset, deletedNodeOffsets); +} + +void NodeTableStatsAndDeletedIDs::errorIfNodeHasEdges(offset_t nodeOffset) { for (AdjLists* adjList : adjListsAndColumns.first) { auto numElementsInList = adjList->getTotalNumElementsInList(transaction::TransactionType::WRITE, nodeOffset); @@ -144,7 +156,7 @@ void NodeStatisticsAndDeletedIDs::errorIfNodeHasEdges(offset_t nodeOffset) { } } -bool NodeStatisticsAndDeletedIDs::isDeleted(offset_t nodeOffset, uint64_t morselIdx) { +bool NodeTableStatsAndDeletedIDs::isDeleted(offset_t nodeOffset, uint64_t morselIdx) { auto iter = deletedNodeOffsetsPerMorsel.find(morselIdx); if (iter != deletedNodeOffsetsPerMorsel.end()) { return iter->second.contains(nodeOffset); @@ -153,17 +165,17 @@ bool NodeStatisticsAndDeletedIDs::isDeleted(offset_t nodeOffset, uint64_t morsel } NodesStatisticsAndDeletedIDs::NodesStatisticsAndDeletedIDs( - std::unordered_map>& + std::unordered_map>& nodesStatisticsAndDeletedIDs) : TablesStatistics{} { initTableStatisticsForWriteTrx(); for (auto& nodeStatistics : nodesStatisticsAndDeletedIDs) { tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable[nodeStatistics.first] = - std::make_unique( - *(NodeStatisticsAndDeletedIDs*)nodeStatistics.second.get()); + std::make_unique( + *(NodeTableStatsAndDeletedIDs*)nodeStatistics.second.get()); tablesStatisticsContentForWriteTrx->tableStatisticPerTable[nodeStatistics.first] = - std::make_unique( - *(NodeStatisticsAndDeletedIDs*)nodeStatistics.second.get()); + std::make_unique( + *(NodeTableStatsAndDeletedIDs*)nodeStatistics.second.get()); } } @@ -197,7 +209,7 @@ void NodesStatisticsAndDeletedIDs::setDeletedNodeOffsetsForMorsel( lock_t lck{mtx}; (transaction->isReadOnly() || tablesStatisticsContentForWriteTrx == nullptr) ? getNodeStatisticsAndDeletedIDs(tableID)->setDeletedNodeOffsetsForMorsel(nodeOffsetVector) : - ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx + ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx ->tableStatisticPerTable[tableID] .get()) ->setDeletedNodeOffsetsForMorsel(nodeOffsetVector); @@ -210,22 +222,5 @@ void NodesStatisticsAndDeletedIDs::addNodeStatisticsAndDeletedIDs( constructTableStatistic(tableSchema); } -std::unique_ptr NodesStatisticsAndDeletedIDs::deserializeTableStatistics( - uint64_t numTuples, - std::unordered_map>&& propertyStats, - uint64_t& offset, FileInfo* fileInfo, uint64_t tableID) { - std::vector deletedNodeIDs; - SerDeser::deserializeVector(deletedNodeIDs, fileInfo, offset); - return make_unique(tableID, - NodeStatisticsAndDeletedIDs::getMaxNodeOffsetFromNumTuples(numTuples), deletedNodeIDs, - std::move(propertyStats)); -} - -void NodesStatisticsAndDeletedIDs::serializeTableStatistics( - TableStatistics* tableStatistics, uint64_t& offset, FileInfo* fileInfo) { - auto nodeTableStatistic = (NodeStatisticsAndDeletedIDs*)tableStatistics; - SerDeser::serializeVector(nodeTableStatistic->getDeletedNodeOffsets(), fileInfo, offset); -} - } // namespace storage } // namespace kuzu diff --git a/src/storage/stats/rels_statistics.cpp b/src/storage/stats/rels_statistics.cpp index 2d0aa31696..5647bf601a 100644 --- a/src/storage/stats/rels_statistics.cpp +++ b/src/storage/stats/rels_statistics.cpp @@ -5,23 +5,34 @@ using namespace kuzu::common; namespace kuzu { namespace storage { +void RelTableStats::serializeInternal(FileInfo* fileInfo, uint64_t& offset) { + SerDeser::serializeValue(nextRelOffset, fileInfo, offset); +} + +std::unique_ptr RelTableStats::deserialize( + uint64_t numRels, common::table_id_t tableID, FileInfo* fileInfo, uint64_t& offset) { + common::offset_t nextRelOffset; + SerDeser::deserializeValue(nextRelOffset, fileInfo, offset); + return std::make_unique(numRels, tableID, nextRelOffset); +} + // We should only call this function after we call setNumRelsPerDirectionBoundTableID. void RelsStatistics::setNumTuplesForTable(table_id_t relTableID, uint64_t numRels) { - lock_t lck{mtx}; + std::unique_lock lck{mtx}; initTableStatisticsForWriteTrxNoLock(); assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(relTableID)); auto relStatistics = - (RelStatistics*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable[relTableID] + (RelTableStats*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable[relTableID] .get(); increaseNextRelOffset(relTableID, numRels - relStatistics->getNumTuples()); relStatistics->setNumTuples(numRels); } void RelsStatistics::updateNumRelsByValue(table_id_t relTableID, int64_t value) { - lock_t lck{mtx}; + std::unique_lock lck{mtx}; initTableStatisticsForWriteTrxNoLock(); auto relStatistics = - (RelStatistics*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable[relTableID] + (RelTableStats*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable[relTableID] .get(); auto numRelsAfterUpdate = relStatistics->getNumTuples() + value; relStatistics->setNumTuples(numRelsAfterUpdate); @@ -33,29 +44,14 @@ void RelsStatistics::updateNumRelsByValue(table_id_t relTableID, int64_t value) offset_t RelsStatistics::getNextRelOffset( transaction::Transaction* transaction, table_id_t tableID) { - lock_t lck{mtx}; + std::unique_lock lck{mtx}; auto& tableStatisticContent = (transaction->isReadOnly() || tablesStatisticsContentForWriteTrx == nullptr) ? tablesStatisticsContentForReadOnlyTrx : tablesStatisticsContentForWriteTrx; - return ((RelStatistics*)tableStatisticContent->tableStatisticPerTable.at(tableID).get()) + return ((RelTableStats*)tableStatisticContent->tableStatisticPerTable.at(tableID).get()) ->getNextRelOffset(); } -std::unique_ptr RelsStatistics::deserializeTableStatistics(uint64_t numTuples, - std::unordered_map>&& propertyStats, - uint64_t& offset, FileInfo* fileInfo, uint64_t tableID) { - std::vector> numRelsPerDirectionBoundTable{2}; - offset_t nextRelOffset; - SerDeser::deserializeValue(nextRelOffset, fileInfo, offset); - return std::make_unique(numTuples, std::move(propertyStats), nextRelOffset); -} - -void RelsStatistics::serializeTableStatistics( - TableStatistics* tableStatistics, uint64_t& offset, FileInfo* fileInfo) { - auto relStatistic = (RelStatistics*)tableStatistics; - SerDeser::serializeValue(relStatistic->nextRelOffset, fileInfo, offset); -} - } // namespace storage } // namespace kuzu diff --git a/src/storage/stats/table_statistics.cpp b/src/storage/stats/table_statistics.cpp index eab61e6635..b9590c987f 100644 --- a/src/storage/stats/table_statistics.cpp +++ b/src/storage/stats/table_statistics.cpp @@ -1,5 +1,7 @@ #include "storage/stats/table_statistics.h" +#include "storage/stats/nodes_statistics_and_deleted_ids.h" +#include "storage/stats/rels_statistics.h" #include "storage/storage_utils.h" using namespace kuzu::common; @@ -7,6 +9,47 @@ using namespace kuzu::common; namespace kuzu { namespace storage { +void TableStatistics::serialize(common::FileInfo* fileInfo, uint64_t& offset) { + SerDeser::serializeValue(tableType, fileInfo, offset); + SerDeser::serializeValue(numTuples, fileInfo, offset); + SerDeser::serializeValue(tableID, fileInfo, offset); + SerDeser::serializeUnorderedMap(propertyStatistics, fileInfo, offset); + serializeInternal(fileInfo, offset); +} + +std::unique_ptr TableStatistics::deserialize( + common::FileInfo* fileInfo, uint64_t& offset) { + TableType tableType; + uint64_t numTuples; + table_id_t tableID; + std::unordered_map> propertyStatistics; + SerDeser::deserializeValue(tableType, fileInfo, offset); + SerDeser::deserializeValue(numTuples, fileInfo, offset); + SerDeser::deserializeValue(tableID, fileInfo, offset); + SerDeser::deserializeUnorderedMap(propertyStatistics, fileInfo, offset); + std::unique_ptr result; + switch (tableType) { + case TableType::NODE: { + result = NodeTableStatsAndDeletedIDs::deserialize(tableID, + NodeTableStatsAndDeletedIDs::getMaxNodeOffsetFromNumTuples(numTuples), fileInfo, + offset); + } break; + case TableType::REL: { + result = RelTableStats::deserialize(numTuples, tableID, fileInfo, offset); + } break; + // LCOV_EXCL_START + default: { + throw NotImplementedException("TableStatistics::deserialize"); + } + // LCOV_EXCL_STOP + } + result->tableType = tableType; + result->numTuples = numTuples; + result->tableID = tableID; + result->propertyStatistics = std::move(propertyStatistics); + return result; +} + TablesStatistics::TablesStatistics() { tablesStatisticsContentForReadOnlyTrx = std::make_unique(); } @@ -19,27 +62,8 @@ void TablesStatistics::readFromFile(const std::string& directory, common::DBFile auto filePath = getTableStatisticsFilePath(directory, dbFileType); auto fileInfo = FileUtils::openFile(filePath, O_RDONLY); uint64_t offset = 0; - uint64_t numTables; - SerDeser::deserializeValue(numTables, fileInfo.get(), offset); - for (auto i = 0u; i < numTables; i++) { - uint64_t numTuples; - SerDeser::deserializeValue(numTuples, fileInfo.get(), offset); - table_id_t tableID; - SerDeser::deserializeValue(tableID, fileInfo.get(), offset); - - uint64_t numProperties; - SerDeser::deserializeValue(numProperties, fileInfo.get(), offset); - std::unordered_map> - propertyStats; - for (auto j = 0u; j < numProperties; j++) { - property_id_t propertyId; - SerDeser::deserializeValue(propertyId, fileInfo.get(), offset); - propertyStats[propertyId] = PropertyStatistics::deserialize(fileInfo.get(), offset); - } - tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable[tableID] = - deserializeTableStatistics( - numTuples, std::move(propertyStats), offset, fileInfo.get(), tableID); - } + SerDeser::deserializeUnorderedMap( + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable, fileInfo.get(), offset); } void TablesStatistics::saveToFile(const std::string& directory, DBFileType dbFileType, @@ -51,24 +75,8 @@ void TablesStatistics::saveToFile(const std::string& directory, DBFileType dbFil tablesStatisticsContentForWriteTrx == nullptr) ? tablesStatisticsContentForReadOnlyTrx : tablesStatisticsContentForWriteTrx; - SerDeser::serializeValue( - tablesStatisticsContent->tableStatisticPerTable.size(), fileInfo.get(), offset); - for (auto& tableStatistic : tablesStatisticsContent->tableStatisticPerTable) { - auto tableStatistics = tableStatistic.second.get(); - SerDeser::serializeValue(tableStatistics->getNumTuples(), fileInfo.get(), offset); - SerDeser::serializeValue(tableStatistic.first, fileInfo.get(), offset); - - SerDeser::serializeValue( - tableStatistics->getPropertyStatistics().size(), fileInfo.get(), offset); - for (auto& propertyPair : tableStatistics->getPropertyStatistics()) { - auto propertyId = propertyPair.first; - auto propertyStatistics = propertyPair.second.get(); - SerDeser::serializeValue(propertyId, fileInfo.get(), offset); - propertyStatistics->serialize(fileInfo.get(), offset); - } - - serializeTableStatistics(tableStatistics, offset, fileInfo.get()); - } + SerDeser::serializeUnorderedMap( + tablesStatisticsContent->tableStatisticPerTable, fileInfo.get(), offset); } void TablesStatistics::initTableStatisticsForWriteTrx() { @@ -86,5 +94,31 @@ void TablesStatistics::initTableStatisticsForWriteTrxNoLock() { } } +PropertyStatistics& TablesStatistics::getPropertyStatisticsForTable( + const transaction::Transaction& transaction, common::table_id_t tableID, + common::property_id_t propertyID) { + if (transaction.isReadOnly()) { + assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID)); + auto tableStatistics = + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.at(tableID).get(); + return tableStatistics->getPropertyStatistics(propertyID); + } else { + initTableStatisticsForWriteTrx(); + assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); + auto tableStatistics = + tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get(); + return tableStatistics->getPropertyStatistics(propertyID); + } +} + +void TablesStatistics::setPropertyStatisticsForTable(common::table_id_t tableID, + common::property_id_t propertyID, kuzu::storage::PropertyStatistics stats) { + initTableStatisticsForWriteTrx(); + assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); + auto tableStatistics = + tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get(); + tableStatistics->setPropertyStatistics(propertyID, stats); +} + } // namespace storage } // namespace kuzu diff --git a/test/runner/e2e_copy_transaction_test.cpp b/test/runner/e2e_copy_transaction_test.cpp index 6f8ddaa241..b2bbde1497 100644 --- a/test/runner/e2e_copy_transaction_test.cpp +++ b/test/runner/e2e_copy_transaction_test.cpp @@ -141,7 +141,7 @@ class TinySnbCopyCSVTransactionTest : public EmptyDBTest { auto dummyWriteTrx = transaction::Transaction::getDummyWriteTrx(); ASSERT_EQ(relsStatistics.getNextRelOffset(dummyWriteTrx.get(), knowsTableID), 14); ASSERT_EQ(relsStatistics.getReadOnlyVersion()->tableStatisticPerTable.size(), 1); - auto knowsRelStatistics = (RelStatistics*)relsStatistics.getReadOnlyVersion() + auto knowsRelStatistics = (RelTableStats*)relsStatistics.getReadOnlyVersion() ->tableStatisticPerTable.at(knowsTableID) .get(); ASSERT_EQ(knowsRelStatistics->getNumTuples(), 14); diff --git a/test/storage/table_statistics_test.cpp b/test/storage/table_statistics_test.cpp index a675c9352c..0aeefa10fc 100644 --- a/test/storage/table_statistics_test.cpp +++ b/test/storage/table_statistics_test.cpp @@ -15,8 +15,10 @@ TEST(TableStatisticsTest, CopyTableStatistics) { std::unordered_map> propertyStatistics; propertyStatistics[0] = std::make_unique(true); propertyStatistics[1] = std::make_unique(false); - TableStatistics tableStats(numTuples, std::move(propertyStatistics)); - TableStatistics copy(tableStats); + NodeTableStatsAndDeletedIDs tableStats(0, + NodeTableStatsAndDeletedIDs::getMaxNodeOffsetFromNumTuples(numTuples), + std::move(propertyStatistics)); + NodeTableStatsAndDeletedIDs copy(tableStats); ASSERT_EQ(copy.getNumTuples(), numTuples); ASSERT_EQ(copy.getPropertyStatistics(0).mayHaveNull(), true);