diff --git a/CMakeLists.txt b/CMakeLists.txt index 725084bfed..4eac2bef26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.11) -project(Kuzu VERSION 0.0.8.10 LANGUAGES CXX) +project(Kuzu VERSION 0.0.8.11 LANGUAGES CXX) find_package(Threads REQUIRED) diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 733c86900c..774ad21251 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -1,7 +1,6 @@ #include "catalog/catalog.h" #include "catalog/node_table_schema.h" -#include "catalog/rdf_graph_schema.h" #include "catalog/rel_table_group_schema.h" #include "catalog/rel_table_schema.h" #include "common/ser_deser.h" @@ -44,9 +43,7 @@ ExpressionType Catalog::getFunctionType(const std::string& name) const { table_id_t Catalog::addNodeTableSchema(const binder::BoundCreateTableInfo& info) { initCatalogContentForWriteTrxIfNecessary(); - auto tableID = catalogContentForWriteTrx->addNodeTableSchema(info); - wal->logNodeTableRecord(tableID); - return tableID; + return catalogContentForWriteTrx->addNodeTableSchema(info); } table_id_t Catalog::addRelTableSchema(const binder::BoundCreateTableInfo& info) { @@ -71,11 +68,7 @@ common::table_id_t Catalog::addRelTableGroupSchema(const binder::BoundCreateTabl common::table_id_t Catalog::addRdfGraphSchema(const binder::BoundCreateTableInfo& info) { initCatalogContentForWriteTrxIfNecessary(); - auto tableID = catalogContentForWriteTrx->addRdfGraphSchema(info); - auto rdfGraphSchema = (RdfGraphSchema*)catalogContentForWriteTrx->getTableSchema(tableID); - wal->logRdfGraphRecord( - tableID, rdfGraphSchema->getNodeTableID(), rdfGraphSchema->getRelTableID()); - return tableID; + return catalogContentForWriteTrx->addRdfGraphSchema(info); } void Catalog::dropTableSchema(table_id_t tableID) { @@ -89,13 +82,11 @@ void Catalog::renameTable(table_id_t tableID, const std::string& newName) { catalogContentForWriteTrx->renameTable(tableID, newName); } -void Catalog::addNodeProperty(table_id_t tableID, const std::string& propertyName, - std::unique_ptr dataType, std::unique_ptr metadataDAHInfo) { +void Catalog::addNodeProperty( + table_id_t tableID, const std::string& propertyName, std::unique_ptr dataType) { initCatalogContentForWriteTrxIfNecessary(); catalogContentForWriteTrx->getTableSchema(tableID)->addNodeProperty( - propertyName, std::move(dataType), std::move(metadataDAHInfo)); - wal->logAddPropertyRecord( - tableID, catalogContentForWriteTrx->getTableSchema(tableID)->getPropertyID(propertyName)); + propertyName, std::move(dataType)); } void Catalog::addRelProperty( diff --git a/src/catalog/property.cpp b/src/catalog/property.cpp index 468d744c90..55481e9b6d 100644 --- a/src/catalog/property.cpp +++ b/src/catalog/property.cpp @@ -7,27 +7,11 @@ using namespace kuzu::common; namespace kuzu { namespace catalog { -void MetadataDAHInfo::serialize(FileInfo* fileInfo, uint64_t& offset) const { - SerDeser::serializeValue(dataDAHPageIdx, fileInfo, offset); - SerDeser::serializeValue(nullDAHPageIdx, fileInfo, offset); - SerDeser::serializeVectorOfPtrs(childrenInfos, fileInfo, offset); -} - -std::unique_ptr MetadataDAHInfo::deserialize( - FileInfo* fileInfo, uint64_t& offset) { - auto metadataDAHInfo = std::make_unique(); - SerDeser::deserializeValue(metadataDAHInfo->dataDAHPageIdx, fileInfo, offset); - SerDeser::deserializeValue(metadataDAHInfo->nullDAHPageIdx, fileInfo, offset); - SerDeser::deserializeVectorOfPtrs(metadataDAHInfo->childrenInfos, fileInfo, offset); - return metadataDAHInfo; -} - void Property::serialize(FileInfo* fileInfo, uint64_t& offset) const { SerDeser::serializeValue(name, fileInfo, offset); dataType->serialize(fileInfo, offset); SerDeser::serializeValue(propertyID, fileInfo, offset); SerDeser::serializeValue(tableID, fileInfo, offset); - metadataDAHInfo->serialize(fileInfo, offset); } std::unique_ptr Property::deserialize(FileInfo* fileInfo, uint64_t& offset) { @@ -38,9 +22,7 @@ std::unique_ptr Property::deserialize(FileInfo* fileInfo, uint64_t& of auto dataType = LogicalType::deserialize(fileInfo, offset); SerDeser::deserializeValue(propertyID, fileInfo, offset); SerDeser::deserializeValue(tableID, fileInfo, offset); - auto metadataDAHInfo = MetadataDAHInfo::deserialize(fileInfo, offset); - return std::make_unique( - name, std::move(dataType), propertyID, tableID, std::move(metadataDAHInfo)); + return std::make_unique(name, std::move(dataType), propertyID, tableID); } std::vector> Property::copy( diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index a335fb82e3..527d50e36e 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -56,8 +56,7 @@ class Catalog { void renameTable(common::table_id_t tableID, const std::string& newName); void addNodeProperty(common::table_id_t tableID, const std::string& propertyName, - std::unique_ptr dataType, - std::unique_ptr metadataDAHInfo); + std::unique_ptr dataType); void addRelProperty(common::table_id_t tableID, const std::string& propertyName, std::unique_ptr dataType); diff --git a/src/include/catalog/property.h b/src/include/catalog/property.h index 414adb427f..5b4f84be5d 100644 --- a/src/include/catalog/property.h +++ b/src/include/catalog/property.h @@ -5,33 +5,6 @@ namespace kuzu { namespace catalog { -// DAH is the abbreviation for Disk Array Header. -class MetadataDAHInfo { -public: - MetadataDAHInfo() : MetadataDAHInfo{common::INVALID_PAGE_IDX, common::INVALID_PAGE_IDX} {} - MetadataDAHInfo(common::page_idx_t dataDAHPageIdx) - : MetadataDAHInfo{dataDAHPageIdx, common::INVALID_PAGE_IDX} {} - MetadataDAHInfo(common::page_idx_t dataDAHPageIdx, common::page_idx_t nullDAHPageIdx) - : dataDAHPageIdx{dataDAHPageIdx}, nullDAHPageIdx{nullDAHPageIdx} {} - - inline std::unique_ptr copy() { - auto result = std::make_unique(dataDAHPageIdx, nullDAHPageIdx); - result->childrenInfos.resize(childrenInfos.size()); - for (size_t i = 0; i < childrenInfos.size(); ++i) { - result->childrenInfos[i] = childrenInfos[i]->copy(); - } - return result; - } - - void serialize(common::FileInfo* fileInfo, uint64_t& offset) const; - static std::unique_ptr deserialize( - common::FileInfo* fileInfo, uint64_t& offset); - - common::page_idx_t dataDAHPageIdx = common::INVALID_PAGE_IDX; - common::page_idx_t nullDAHPageIdx = common::INVALID_PAGE_IDX; - std::vector> childrenInfos; -}; - class Property { public: // TODO: these should be guarded as reserved property names. @@ -46,14 +19,9 @@ class Property { common::INVALID_TABLE_ID} {} Property(std::string name, std::unique_ptr dataType, - common::property_id_t propertyID, common::table_id_t tableID, - std::unique_ptr metadataDAHInfo = nullptr) + common::property_id_t propertyID, common::table_id_t tableID) : name{std::move(name)}, dataType{std::move(dataType)}, - propertyID{propertyID}, tableID{tableID}, metadataDAHInfo{std::move(metadataDAHInfo)} { - if (this->metadataDAHInfo == nullptr) { - this->metadataDAHInfo = std::make_unique(); - } - } + propertyID{propertyID}, tableID{tableID} {} inline std::string getName() const { return name; } @@ -63,24 +31,17 @@ class Property { inline common::table_id_t getTableID() const { return tableID; } - inline MetadataDAHInfo* getMetadataDAHInfo() const { return metadataDAHInfo.get(); } - inline void setPropertyID(common::property_id_t propertyID_) { this->propertyID = propertyID_; } inline void setTableID(common::table_id_t tableID_) { this->tableID = tableID_; } - inline void setMetadataDAHInfo(std::unique_ptr metadataDAHInfo_) { - this->metadataDAHInfo = std::move(metadataDAHInfo_); - } - inline void rename(std::string newName) { this->name = std::move(newName); } void serialize(common::FileInfo* fileInfo, uint64_t& offset) const; static std::unique_ptr deserialize(common::FileInfo* fileInfo, uint64_t& offset); inline std::unique_ptr copy() const { - return std::make_unique( - name, dataType->copy(), propertyID, tableID, metadataDAHInfo->copy()); + return std::make_unique(name, dataType->copy(), propertyID, tableID); } static std::vector> copy( @@ -91,7 +52,6 @@ class Property { std::unique_ptr dataType; common::property_id_t propertyID; common::table_id_t tableID; - std::unique_ptr metadataDAHInfo; }; } // namespace catalog diff --git a/src/include/catalog/table_schema.h b/src/include/catalog/table_schema.h index 56aff6d3bf..3bbeea9bcf 100644 --- a/src/include/catalog/table_schema.h +++ b/src/include/catalog/table_schema.h @@ -51,11 +51,10 @@ class TableSchema { std::vector getProperties() const; - inline void addNodeProperty(std::string propertyName, - std::unique_ptr dataType, - std::unique_ptr metadataDAHInfo) { - properties.push_back(std::make_unique(std::move(propertyName), - std::move(dataType), increaseNextPropertyID(), tableID, std::move(metadataDAHInfo))); + inline void addNodeProperty( + std::string propertyName, std::unique_ptr dataType) { + properties.push_back(std::make_unique( + std::move(propertyName), std::move(dataType), increaseNextPropertyID(), tableID)); } inline void addRelProperty( std::string propertyName, std::unique_ptr dataType) { diff --git a/src/include/processor/operator/ddl/drop_property.h b/src/include/processor/operator/ddl/drop_property.h index 9abd95f455..5dac06a488 100644 --- a/src/include/processor/operator/ddl/drop_property.h +++ b/src/include/processor/operator/ddl/drop_property.h @@ -9,20 +9,29 @@ namespace processor { class DropProperty : public DDL { public: DropProperty(catalog::Catalog* catalog, common::table_id_t tableID, - common::property_id_t propertyID, const DataPos& outputPos, uint32_t id, - const std::string& paramsString) + common::property_id_t propertyID, const DataPos& outputPos, + storage::StorageManager& storageManager, uint32_t id, const std::string& paramsString) : DDL{PhysicalOperatorType::DROP_PROPERTY, catalog, outputPos, id, paramsString}, - tableID{tableID}, propertyID{propertyID} {} - - void executeDDLInternal() override { catalog->dropProperty(tableID, propertyID); } + storageManager{storageManager}, tableID{tableID}, propertyID{propertyID} {} + + void executeDDLInternal() override { + auto tableSchema = catalog->getReadOnlyVersion()->getTableSchema(tableID); + catalog->dropProperty(tableID, propertyID); + if (tableSchema->tableType == common::TableType::NODE) { + auto nodesStats = storageManager.getNodesStore().getNodesStatisticsAndDeletedIDs(); + nodesStats->removeMetadataDAHInfo(tableID, tableSchema->getColumnID(propertyID)); + } + } std::string getOutputMsg() override { return {"Drop succeed."}; } std::unique_ptr clone() override { - return make_unique(catalog, tableID, propertyID, outputPos, id, paramsString); + return make_unique( + catalog, tableID, propertyID, outputPos, storageManager, id, paramsString); } protected: + storage::StorageManager& storageManager; common::table_id_t tableID; common::property_id_t propertyID; }; diff --git a/src/include/storage/stats/nodes_statistics_and_deleted_ids.h b/src/include/storage/stats/nodes_statistics_and_deleted_ids.h index 7e4b3bd361..9504a6b8fb 100644 --- a/src/include/storage/stats/nodes_statistics_and_deleted_ids.h +++ b/src/include/storage/stats/nodes_statistics_and_deleted_ids.h @@ -10,10 +10,17 @@ namespace kuzu { namespace storage { class NodeTableStatsAndDeletedIDs : public TableStatistics { - public: - explicit NodeTableStatsAndDeletedIDs(const catalog::TableSchema& schema) - : TableStatistics{schema}, tableID{schema.tableID} {} + NodeTableStatsAndDeletedIDs(BMFileHandle* metadataFH, const catalog::TableSchema& schema, + BufferManager* bufferManager, WAL* wal) + : TableStatistics{schema}, tableID{schema.tableID} { + metadataDAHInfos.clear(); + metadataDAHInfos.reserve(schema.getNumProperties()); + for (auto property : schema.getProperties()) { + metadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( + *property->getDataType(), *metadataFH, bufferManager, wal)); + } + } NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset, const std::vector& deletedNodeOffsets) @@ -30,7 +37,17 @@ class NodeTableStatsAndDeletedIDs : public TableStatistics { std::unordered_map>&& propertyStatistics); - NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other) = default; + NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other) + : TableStatistics{other}, tableID{other.tableID}, + adjListsAndColumns{other.adjListsAndColumns}, + hasDeletedNodesPerMorsel{other.hasDeletedNodesPerMorsel}, + deletedNodeOffsetsPerMorsel{other.deletedNodeOffsetsPerMorsel} { + metadataDAHInfos.clear(); + metadataDAHInfos.reserve(other.metadataDAHInfos.size()); + for (auto& metadataDAHInfo : other.metadataDAHInfos) { + metadataDAHInfos.push_back(metadataDAHInfo->copy()); + } + } inline common::offset_t getMaxNodeOffset() { return getMaxNodeOffsetFromNumTuples(getNumTuples()); @@ -53,7 +70,7 @@ class NodeTableStatsAndDeletedIDs : public TableStatistics { void setNumTuples(uint64_t numTuples) override; - std::vector getDeletedNodeOffsets(); + std::vector getDeletedNodeOffsets() const; static inline uint64_t getNumTuplesFromMaxNodeOffset(common::offset_t maxNodeOffset) { return (maxNodeOffset == UINT64_MAX) ? 0ull : maxNodeOffset + 1ull; @@ -63,10 +80,26 @@ class NodeTableStatsAndDeletedIDs : public TableStatistics { return numTuples == 0 ? UINT64_MAX : numTuples - 1; } + inline void addMetadataDAHInfoForColumn(std::unique_ptr metadataDAHInfo) { + metadataDAHInfos.push_back(std::move(metadataDAHInfo)); + } + inline void removeMetadataDAHInfoForColumn(common::column_id_t columnID) { + assert(columnID < metadataDAHInfos.size()); + metadataDAHInfos.erase(metadataDAHInfos.begin() + columnID); + } + inline MetadataDAHInfo* getMetadataDAHInfo(common::column_id_t columnID) { + assert(columnID < metadataDAHInfos.size()); + return metadataDAHInfos[columnID].get(); + } + void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final; static std::unique_ptr deserialize(common::table_id_t tableID, common::offset_t maxNodeOffset, common::FileInfo* fileInfo, uint64_t& offset); + std::unique_ptr copy() final { + return std::make_unique(*this); + } + private: void errorIfNodeHasEdges(common::offset_t nodeOffset); @@ -75,6 +108,7 @@ class NodeTableStatsAndDeletedIDs : public TableStatistics { private: common::table_id_t tableID; + std::vector> metadataDAHInfos; // Note: This is initialized explicitly through a call to setAdjListsAndColumns after // construction. std::pair, std::vector> adjListsAndColumns; @@ -85,16 +119,15 @@ class NodeTableStatsAndDeletedIDs : public TableStatistics { // Manages the disk image of the maxNodeOffsets and deleted node IDs (per node table). // Note: This class is *not* thread-safe. class NodesStatisticsAndDeletedIDs : public TablesStatistics { - public: // Should only be used by saveInitialNodesStatisticsAndDeletedIDsToFile to start a database // from an empty directory. - NodesStatisticsAndDeletedIDs() : TablesStatistics{} {}; + NodesStatisticsAndDeletedIDs() : TablesStatistics{nullptr} {}; // Should be used when an already loaded database is started from a directory. - explicit NodesStatisticsAndDeletedIDs( - const std::string& directory, common::DBFileType dbFileType = common::DBFileType::ORIGINAL) - : TablesStatistics{} { - readFromFile(directory, dbFileType); + explicit NodesStatisticsAndDeletedIDs(BMFileHandle* metadataFH, BufferManager* bufferManager, + WAL* wal, common::DBFileType dbFileType = common::DBFileType::ORIGINAL) + : TablesStatistics{metadataFH}, bufferManager{bufferManager}, wal{wal} { + readFromFile(wal->getDirectory(), dbFileType); } // Should be used only by tests; @@ -180,10 +213,16 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { void addNodeStatisticsAndDeletedIDs(catalog::NodeTableSchema* tableSchema); + void addMetadataDAHInfo(common::table_id_t tableID, const common::LogicalType& dataType); + void removeMetadataDAHInfo(common::table_id_t tableID, common::column_id_t columnID); + MetadataDAHInfo* getMetadataDAHInfo(transaction::Transaction* transaction, + common::table_id_t tableID, common::column_id_t columnID); + protected: inline std::unique_ptr constructTableStatistic( catalog::TableSchema* tableSchema) override { - return std::make_unique(*tableSchema); + return std::make_unique( + metadataFH, *tableSchema, bufferManager, wal); } inline std::unique_ptr constructTableStatistic( @@ -196,6 +235,10 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { const std::string& directory, common::DBFileType dbFileType) override { return StorageUtils::getNodesStatisticsAndDeletedIDsFilePath(directory, dbFileType); } + +private: + BufferManager* bufferManager; + WAL* wal; }; } // namespace storage diff --git a/src/include/storage/stats/rels_statistics.h b/src/include/storage/stats/rels_statistics.h index 58ca2f75fe..0b71663830 100644 --- a/src/include/storage/stats/rels_statistics.h +++ b/src/include/storage/stats/rels_statistics.h @@ -31,6 +31,10 @@ class RelTableStats : public TableStatistics { static std::unique_ptr deserialize( uint64_t numRels, common::table_id_t tableID, common::FileInfo* fileInfo, uint64_t& offset); + inline std::unique_ptr copy() final { + return std::make_unique(*this); + } + private: common::offset_t nextRelOffset; }; @@ -41,9 +45,10 @@ class RelsStatistics : public TablesStatistics { public: // Should only be used by saveInitialRelsStatisticsToFile to start a database from an empty // directory. - RelsStatistics() : TablesStatistics{} {}; + RelsStatistics() : TablesStatistics{nullptr} {}; // Should be used when an already loaded database is started from a directory. - explicit RelsStatistics(const std::string& directory) : TablesStatistics{} { + explicit RelsStatistics(BMFileHandle* metadataFH, const std::string& directory) + : TablesStatistics{metadataFH} { readFromFile(directory); } diff --git a/src/include/storage/stats/table_statistics.h b/src/include/storage/stats/table_statistics.h index 3c46079886..d7c3c2e060 100644 --- a/src/include/storage/stats/table_statistics.h +++ b/src/include/storage/stats/table_statistics.h @@ -11,6 +11,34 @@ namespace kuzu { namespace storage { +// DAH is the abbreviation for Disk Array Header. +class MetadataDAHInfo { +public: + MetadataDAHInfo() : MetadataDAHInfo{common::INVALID_PAGE_IDX, common::INVALID_PAGE_IDX} {} + MetadataDAHInfo(common::page_idx_t dataDAHPageIdx) + : MetadataDAHInfo{dataDAHPageIdx, common::INVALID_PAGE_IDX} {} + MetadataDAHInfo(common::page_idx_t dataDAHPageIdx, common::page_idx_t nullDAHPageIdx) + : dataDAHPageIdx{dataDAHPageIdx}, nullDAHPageIdx{nullDAHPageIdx} {} + + inline std::unique_ptr copy() { + auto result = std::make_unique(dataDAHPageIdx, nullDAHPageIdx); + result->childrenInfos.resize(childrenInfos.size()); + for (size_t i = 0; i < childrenInfos.size(); ++i) { + result->childrenInfos[i] = childrenInfos[i]->copy(); + } + return result; + } + + void serialize(common::FileInfo* fileInfo, uint64_t& offset) const; + static std::unique_ptr deserialize( + common::FileInfo* fileInfo, uint64_t& offset); + + common::page_idx_t dataDAHPageIdx = common::INVALID_PAGE_IDX; + common::page_idx_t nullDAHPageIdx = common::INVALID_PAGE_IDX; + std::vector> childrenInfos; +}; + +class WAL; class TableStatistics { public: explicit TableStatistics(const catalog::TableSchema& schema) @@ -20,12 +48,11 @@ class TableStatistics { } } - explicit TableStatistics(common::TableType tableType, uint64_t numTuples, - common::table_id_t tableID, + TableStatistics(common::TableType tableType, uint64_t numTuples, common::table_id_t tableID, std::unordered_map>&& propertyStatistics) - : numTuples{numTuples}, tableID{tableID}, propertyStatistics{ - std::move(propertyStatistics)} { + : tableType{tableType}, numTuples{numTuples}, tableID{tableID}, + propertyStatistics{std::move(propertyStatistics)} { assert(numTuples != UINT64_MAX); } @@ -64,6 +91,8 @@ class TableStatistics { common::FileInfo* fileInfo, uint64_t& offset); virtual void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) = 0; + virtual std::unique_ptr copy() = 0; + private: common::TableType tableType; uint64_t numTuples; @@ -79,7 +108,7 @@ struct TablesStatisticsContent { class TablesStatistics { public: - TablesStatistics(); + TablesStatistics(BMFileHandle* metadataFH); virtual ~TablesStatistics() = default; @@ -120,6 +149,9 @@ class TablesStatistics { void setPropertyStatisticsForTable( common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats); + static std::unique_ptr createMetadataDAHInfo( + const common::LogicalType& dataType, BMFileHandle& metadataFH, BufferManager* bm, WAL* wal); + protected: virtual std::unique_ptr constructTableStatistic( catalog::TableSchema* tableSchema) = 0; @@ -140,6 +172,7 @@ class TablesStatistics { void initTableStatisticsForWriteTrxNoLock(); protected: + BMFileHandle* metadataFH; std::unique_ptr tablesStatisticsContentForReadOnlyTrx; std::unique_ptr tablesStatisticsContentForWriteTrx; std::mutex mtx; diff --git a/src/include/storage/storage_info.h b/src/include/storage/storage_info.h index ee986341b3..f2e6c685a2 100644 --- a/src/include/storage/storage_info.h +++ b/src/include/storage/storage_info.h @@ -12,12 +12,12 @@ using storage_version_t = uint64_t; struct StorageVersionInfo { static std::unordered_map getStorageVersionInfo() { - return {{"0.0.8.10", 22}, {"0.0.8.9", 22}, {"0.0.8.8", 21}, {"0.0.8.7", 21}, - {"0.0.8.6", 20}, {"0.0.8.5", 19}, {"0.0.8.4", 19}, {"0.0.8.3", 19}, {"0.0.8.2", 19}, - {"0.0.8.1", 18}, {"0.0.8", 17}, {"0.0.7.1", 16}, {"0.0.7", 15}, {"0.0.6.5", 14}, - {"0.0.6.4", 13}, {"0.0.6.3", 12}, {"0.0.6.2", 11}, {"0.0.6.1", 10}, {"0.0.6", 9}, - {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3.5", 6}, {"0.0.3.4", 5}, {"0.0.3.3", 4}, - {"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}}; + return {{"0.0.8.11", 22}, {"0.0.8.10", 22}, {"0.0.8.9", 22}, {"0.0.8.8", 21}, + {"0.0.8.7", 21}, {"0.0.8.6", 20}, {"0.0.8.5", 19}, {"0.0.8.4", 19}, {"0.0.8.3", 19}, + {"0.0.8.2", 19}, {"0.0.8.1", 18}, {"0.0.8", 17}, {"0.0.7.1", 16}, {"0.0.7", 15}, + {"0.0.6.5", 14}, {"0.0.6.4", 13}, {"0.0.6.3", 12}, {"0.0.6.2", 11}, {"0.0.6.1", 10}, + {"0.0.6", 9}, {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3.5", 6}, {"0.0.3.4", 5}, + {"0.0.3.3", 4}, {"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}}; } static storage_version_t getStorageVersion(); diff --git a/src/include/storage/storage_manager.h b/src/include/storage/storage_manager.h index 6dc400b19e..716b3a4129 100644 --- a/src/include/storage/storage_manager.h +++ b/src/include/storage/storage_manager.h @@ -39,9 +39,6 @@ class StorageManager { inline BMFileHandle* getDataFH() const { return dataFH.get(); } inline BMFileHandle* getMetadataFH() const { return metadataFH.get(); } - std::unique_ptr createMetadataDAHInfo( - const common::LogicalType& dataType); - private: std::unique_ptr dataFH; std::unique_ptr metadataFH; diff --git a/src/include/storage/store/node_column.h b/src/include/storage/store/node_column.h index ead37e5638..b1420cf72b 100644 --- a/src/include/storage/store/node_column.h +++ b/src/include/storage/store/node_column.h @@ -2,6 +2,7 @@ #include "catalog/catalog.h" #include "storage/stats/property_statistics.h" +#include "storage/stats/table_statistics.h" #include "storage/storage_structure/disk_array.h" #include "storage/storage_structure/storage_structure.h" #include "storage/store/column_chunk.h" @@ -51,10 +52,7 @@ class NodeColumn { friend class StructNodeColumn; public: - NodeColumn(const catalog::Property& property, BMFileHandle* dataFH, BMFileHandle* metadataFH, - BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, - RWPropertyStats propertyStatistics, bool requireNullColumn = true); - NodeColumn(common::LogicalType dataType, const catalog::MetadataDAHInfo& metaDAHeaderInfo, + NodeColumn(common::LogicalType dataType, const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats PropertyStatistics, bool requireNullColumn); @@ -144,7 +142,7 @@ class NodeColumn { class BoolNodeColumn : public NodeColumn { public: - BoolNodeColumn(const catalog::MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, + BoolNodeColumn(const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats propertyStatistics, bool requireNullColumn = true); @@ -181,7 +179,7 @@ class NullNodeColumn : public NodeColumn { class SerialNodeColumn : public NodeColumn { public: - SerialNodeColumn(const catalog::MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, + SerialNodeColumn(const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction); @@ -194,16 +192,10 @@ class SerialNodeColumn : public NodeColumn { }; struct NodeColumnFactory { - static inline std::unique_ptr createNodeColumn(const catalog::Property& property, - BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, - transaction::Transaction* transaction, RWPropertyStats propertyStatistics) { - return createNodeColumn(*property.getDataType(), *property.getMetadataDAHInfo(), dataFH, - metadataFH, bufferManager, wal, transaction, propertyStatistics); - } static std::unique_ptr createNodeColumn(const common::LogicalType& dataType, - const catalog::MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, - BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, - transaction::Transaction* transaction, RWPropertyStats propertyStatistics); + const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, + BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, + RWPropertyStats propertyStatistics); }; } // namespace storage diff --git a/src/include/storage/store/nodes_store.h b/src/include/storage/store/nodes_store.h index 8e5756c38d..92c1496c6d 100644 --- a/src/include/storage/store/nodes_store.h +++ b/src/include/storage/store/nodes_store.h @@ -17,8 +17,8 @@ class NodesStore { inline PrimaryKeyIndex* getPKIndex(common::table_id_t tableID) { return nodeTables[tableID]->getPKIndex(); } - inline NodesStatisticsAndDeletedIDs& getNodesStatisticsAndDeletedIDs() { - return nodesStatisticsAndDeletedIDs; + inline NodesStatisticsAndDeletedIDs* getNodesStatisticsAndDeletedIDs() { + return nodesStatisticsAndDeletedIDs.get(); } inline NodeTable* getNodeTable(common::table_id_t tableID) const { return nodeTables.at(tableID).get(); @@ -29,25 +29,26 @@ class NodesStore { inline void createNodeTable( common::table_id_t tableID, BufferManager* bufferManager, catalog::Catalog* catalog) { nodeTables[tableID] = std::make_unique(dataFH, metadataFH, - &nodesStatisticsAndDeletedIDs, *bufferManager, wal, + nodesStatisticsAndDeletedIDs.get(), *bufferManager, wal, reinterpret_cast( catalog->getReadOnlyVersion()->getTableSchema(tableID))); } inline void removeNodeTable(common::table_id_t tableID) { nodeTables.erase(tableID); - nodesStatisticsAndDeletedIDs.removeTableStatistic(tableID); + nodesStatisticsAndDeletedIDs->removeTableStatistic(tableID); } inline void prepareCommit() { - if (nodesStatisticsAndDeletedIDs.hasUpdates()) { + if (nodesStatisticsAndDeletedIDs->hasUpdates()) { wal->logTableStatisticsRecord(true /* isNodeTable */); - nodesStatisticsAndDeletedIDs.writeTablesStatisticsFileForWALRecord(wal->getDirectory()); + nodesStatisticsAndDeletedIDs->writeTablesStatisticsFileForWALRecord( + wal->getDirectory()); } for (auto& [_, nodeTable] : nodeTables) { nodeTable->prepareCommit(); } } inline void prepareRollback() { - if (nodesStatisticsAndDeletedIDs.hasUpdates()) { + if (nodesStatisticsAndDeletedIDs->hasUpdates()) { wal->logTableStatisticsRecord(true /* isNodeTable */); } for (auto& [_, nodeTable] : nodeTables) { @@ -67,7 +68,7 @@ class NodesStore { private: std::map> nodeTables; - NodesStatisticsAndDeletedIDs nodesStatisticsAndDeletedIDs; + std::unique_ptr nodesStatisticsAndDeletedIDs; WAL* wal; BMFileHandle* dataFH; BMFileHandle* metadataFH; diff --git a/src/include/storage/store/rels_store.h b/src/include/storage/store/rels_store.h index 6737418cdb..9e4c94d793 100644 --- a/src/include/storage/store/rels_store.h +++ b/src/include/storage/store/rels_store.h @@ -12,7 +12,8 @@ namespace storage { class RelsStore { public: - RelsStore(const catalog::Catalog& catalog, MemoryManager& memoryManager, WAL* wal); + RelsStore(BMFileHandle* metadataFH, const catalog::Catalog& catalog, + MemoryManager& memoryManager, WAL* wal); inline Column* getRelPropertyColumn(common::RelDataDirection relDirection, common::table_id_t relTableID, uint64_t propertyIdx) const { @@ -48,24 +49,24 @@ class RelsStore { return relTables.at(tableID).get(); } - inline RelsStatistics& getRelsStatistics() { return relsStatistics; } + inline RelsStatistics* getRelsStatistics() { return relsStatistics.get(); } inline void removeRelTable(common::table_id_t tableID) { relTables.erase(tableID); - relsStatistics.removeTableStatistic(tableID); + relsStatistics->removeTableStatistic(tableID); } inline void prepareCommit() { - if (relsStatistics.hasUpdates()) { + if (relsStatistics->hasUpdates()) { wal->logTableStatisticsRecord(false /* isNodeTable */); - relsStatistics.writeTablesStatisticsFileForWALRecord(wal->getDirectory()); + relsStatistics->writeTablesStatisticsFileForWALRecord(wal->getDirectory()); } for (auto& [_, relTable] : relTables) { relTable->prepareCommit(); } } inline void prepareRollback() { - if (relsStatistics.hasUpdates()) { + if (relsStatistics->hasUpdates()) { wal->logTableStatisticsRecord(false /* isNodeTable */); } for (auto& [_, relTable] : relTables) { @@ -93,7 +94,7 @@ class RelsStore { private: std::unordered_map> relTables; - RelsStatistics relsStatistics; + std::unique_ptr relsStatistics; WAL* wal; }; diff --git a/src/include/storage/store/string_node_column.h b/src/include/storage/store/string_node_column.h index 5206d80563..8d0122d18d 100644 --- a/src/include/storage/store/string_node_column.h +++ b/src/include/storage/store/string_node_column.h @@ -13,7 +13,7 @@ struct StringNodeColumnFunc { class StringNodeColumn : public NodeColumn { public: - StringNodeColumn(common::LogicalType dataType, const catalog::MetadataDAHInfo& metaDAHeaderInfo, + StringNodeColumn(common::LogicalType dataType, const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats propertyStatistics); diff --git a/src/include/storage/store/struct_node_column.h b/src/include/storage/store/struct_node_column.h index d45ef3f447..b0788f45f1 100644 --- a/src/include/storage/store/struct_node_column.h +++ b/src/include/storage/store/struct_node_column.h @@ -8,7 +8,7 @@ namespace storage { class StructNodeColumn : public NodeColumn { public: - StructNodeColumn(common::LogicalType dataType, const catalog::MetadataDAHInfo& metaDAHeaderInfo, + StructNodeColumn(common::LogicalType dataType, const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats propertyStatistics); diff --git a/src/include/storage/store/var_list_node_column.h b/src/include/storage/store/var_list_node_column.h index a65fc73055..128b99dc4f 100644 --- a/src/include/storage/store/var_list_node_column.h +++ b/src/include/storage/store/var_list_node_column.h @@ -47,9 +47,8 @@ class VarListNodeColumn : public NodeColumn { friend class VarListLocalColumn; public: - VarListNodeColumn(common::LogicalType dataType, - const catalog::MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, - BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, + VarListNodeColumn(common::LogicalType dataType, const MetadataDAHInfo& metaDAHeaderInfo, + BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats propertyStatistics) : NodeColumn{std::move(dataType), metaDAHeaderInfo, dataFH, metadataFH, bufferManager, wal, transaction, propertyStatistics, true /* requireNullColumn */} { diff --git a/src/main/connection.cpp b/src/main/connection.cpp index a6413af68e..8df86cbe0c 100644 --- a/src/main/connection.cpp +++ b/src/main/connection.cpp @@ -109,16 +109,16 @@ std::unique_ptr Connection::prepareNoLock( preparedStatement->parameterMap = binder.getParameterMap(); preparedStatement->statementResult = boundStatement->getStatementResult()->copy(); // planning - auto& nodeStatistics = + auto nodeStatistics = database->storageManager->getNodesStore().getNodesStatisticsAndDeletedIDs(); - auto& relStatistics = database->storageManager->getRelsStore().getRelsStatistics(); + auto relStatistics = database->storageManager->getRelsStore().getRelsStatistics(); std::vector> plans; if (enumerateAllPlans) { plans = Planner::getAllPlans( - *database->catalog, nodeStatistics, relStatistics, *boundStatement); + *database->catalog, *nodeStatistics, *relStatistics, *boundStatement); } else { plans.push_back(Planner::getBestPlan( - *database->catalog, nodeStatistics, relStatistics, *boundStatement)); + *database->catalog, *nodeStatistics, *relStatistics, *boundStatement)); } // optimizing for (auto& plan : plans) { diff --git a/src/main/storage_driver.cpp b/src/main/storage_driver.cpp index 1d0bf95b10..6951553783 100644 --- a/src/main/storage_driver.cpp +++ b/src/main/storage_driver.cpp @@ -44,7 +44,7 @@ uint64_t StorageDriver::getNumNodes(const std::string& nodeName) { auto nodeTableID = catalogContent->getTableID(nodeName); auto nodeStatistics = storageManager->getNodesStore() .getNodesStatisticsAndDeletedIDs() - .getNodeStatisticsAndDeletedIDs(nodeTableID); + ->getNodeStatisticsAndDeletedIDs(nodeTableID); return nodeStatistics->getNumTuples(); } @@ -52,7 +52,7 @@ uint64_t StorageDriver::getNumRels(const std::string& relName) { auto catalogContent = catalog->getReadOnlyVersion(); auto relTableID = catalogContent->getTableID(relName); auto relStatistics = - storageManager->getRelsStore().getRelsStatistics().getRelStatistics(relTableID); + storageManager->getRelsStore().getRelsStatistics()->getRelStatistics(relTableID); return relStatistics->getNumTuples(); } diff --git a/src/processor/map/map_copy_from.cpp b/src/processor/map/map_copy_from.cpp index 70f8b0b52e..a6b8f65adb 100644 --- a/src/processor/map/map_copy_from.cpp +++ b/src/processor/map/map_copy_from.cpp @@ -102,7 +102,7 @@ static std::unique_ptr initializeDirectedInMemRelData( auto directedInMemRelData = std::make_unique(); auto boundTableID = schema->getBoundTableID(direction); auto numNodes = - nodesStore.getNodesStatisticsAndDeletedIDs().getMaxNodeOffsetPerTable().at(boundTableID) + + nodesStore.getNodesStatisticsAndDeletedIDs()->getMaxNodeOffsetPerTable().at(boundTableID) + 1; if (schema->isSingleMultiplicityInDirection(direction)) { // columns. @@ -159,7 +159,7 @@ std::unique_ptr PlanMapper::mapCopyRelFrom( storageManager.getNodesStore(), storageManager.getDirectory(), copyFromInfo->fileScanInfo->readerConfig->csvReaderConfig.get()); auto copyRelSharedState = std::make_shared(tableSchema->tableID, - &storageManager.getRelsStore().getRelsStatistics(), std::move(fwdRelData), + storageManager.getRelsStore().getRelsStatistics(), std::move(fwdRelData), std::move(bwdRelData), memoryManager); auto copyRelColumns = createCopyRelColumnsOrLists( diff --git a/src/processor/map/map_create.cpp b/src/processor/map/map_create.cpp index b355d815bf..4db3dbe3fb 100644 --- a/src/processor/map/map_create.cpp +++ b/src/processor/map/map_create.cpp @@ -77,7 +77,7 @@ std::unique_ptr PlanMapper::getRelInsertExecutor(storage::Rel for (auto& [lhs, rhs] : info->setItems) { evaluators.push_back(ExpressionMapper::getEvaluator(rhs, &inSchema)); } - return std::make_unique(relsStore->getRelsStatistics(), table, srcNodePos, + return std::make_unique(*relsStore->getRelsStatistics(), table, srcNodePos, dstNodePos, std::move(lhsVectorPositions), std::move(evaluators)); } diff --git a/src/processor/map/map_ddl.cpp b/src/processor/map/map_ddl.cpp index 5ed0bbd819..7aca16f35e 100644 --- a/src/processor/map/map_ddl.cpp +++ b/src/processor/map/map_ddl.cpp @@ -52,7 +52,7 @@ std::unique_ptr PlanMapper::mapCreateTable(LogicalOperator* lo std::unique_ptr PlanMapper::mapCreateNodeTable(LogicalOperator* logicalOperator) { auto createTable = (LogicalCreateTable*)logicalOperator; return std::make_unique(catalog, &storageManager, - &storageManager.getNodesStore().getNodesStatisticsAndDeletedIDs(), + storageManager.getNodesStore().getNodesStatisticsAndDeletedIDs(), createTable->getInfo()->copy(), getOutputPos(createTable), getOperatorID(), createTable->getExpressionsForPrinting()); } @@ -60,7 +60,7 @@ std::unique_ptr PlanMapper::mapCreateNodeTable(LogicalOperator std::unique_ptr PlanMapper::mapCreateRelTable(LogicalOperator* logicalOperator) { auto createTable = (LogicalCreateTable*)logicalOperator; return std::make_unique(catalog, - &storageManager.getRelsStore().getRelsStatistics(), createTable->getInfo()->copy(), + storageManager.getRelsStore().getRelsStatistics(), createTable->getInfo()->copy(), getOutputPos(createTable), getOperatorID(), createTable->getExpressionsForPrinting()); } @@ -68,15 +68,15 @@ std::unique_ptr PlanMapper::mapCreateRelTableGroup( LogicalOperator* logicalOperator) { auto createTable = (LogicalCreateTable*)logicalOperator; return std::make_unique(catalog, - &storageManager.getRelsStore().getRelsStatistics(), createTable->getInfo()->copy(), + storageManager.getRelsStore().getRelsStatistics(), createTable->getInfo()->copy(), getOutputPos(createTable), getOperatorID(), createTable->getExpressionsForPrinting()); } std::unique_ptr PlanMapper::mapCreateRdfGraph(LogicalOperator* logicalOperator) { auto createTable = (LogicalCreateTable*)logicalOperator; return std::make_unique(catalog, &storageManager, - &storageManager.getNodesStore().getNodesStatisticsAndDeletedIDs(), - &storageManager.getRelsStore().getRelsStatistics(), createTable->getInfo()->copy(), + storageManager.getNodesStore().getNodesStatisticsAndDeletedIDs(), + storageManager.getRelsStore().getRelsStatistics(), createTable->getInfo()->copy(), getOutputPos(createTable), getOperatorID(), createTable->getExpressionsForPrinting()); } @@ -117,7 +117,7 @@ std::unique_ptr PlanMapper::mapAddProperty(LogicalOperator* lo std::unique_ptr PlanMapper::mapDropProperty(LogicalOperator* logicalOperator) { auto dropProperty = (LogicalDropProperty*)logicalOperator; return std::make_unique(catalog, dropProperty->getTableID(), - dropProperty->getPropertyID(), getOutputPos(dropProperty), getOperatorID(), + dropProperty->getPropertyID(), getOutputPos(dropProperty), storageManager, getOperatorID(), dropProperty->getExpressionsForPrinting()); } diff --git a/src/processor/map/map_delete.cpp b/src/processor/map/map_delete.cpp index 654b54c53e..5610132060 100644 --- a/src/processor/map/map_delete.cpp +++ b/src/processor/map/map_delete.cpp @@ -46,7 +46,7 @@ static std::unique_ptr getRelDeleteExecutor( auto srcNodePos = DataPos(inSchema.getExpressionPos(*rel.getSrcNode()->getInternalID())); auto dstNodePos = DataPos(inSchema.getExpressionPos(*rel.getDstNode()->getInternalID())); auto relIDPos = DataPos(inSchema.getExpressionPos(*rel.getInternalIDProperty())); - auto statistics = &store->getRelsStatistics(); + auto statistics = store->getRelsStatistics(); if (rel.isMultiLabeled()) { std::unordered_map> tableIDToTableMap; diff --git a/src/processor/map/map_extend.cpp b/src/processor/map/map_extend.cpp index 9a628bd443..90337e6496 100644 --- a/src/processor/map/map_extend.cpp +++ b/src/processor/map/map_extend.cpp @@ -27,7 +27,7 @@ static std::unique_ptr getRelTableScanInfo(RelDataDirection di propertyExpression->getPropertyID(relTableID) : INVALID_PROPERTY_ID); } - auto relStats = relsStore.getRelsStatistics().getRelStatistics(relTableID); + auto relStats = relsStore.getRelsStatistics()->getRelStatistics(relTableID); return std::make_unique( relTableDataType, relData, relStats, std::move(propertyIds)); } diff --git a/src/processor/operator/ddl/add_node_property.cpp b/src/processor/operator/ddl/add_node_property.cpp index adb7f3eec5..a6267373df 100644 --- a/src/processor/operator/ddl/add_node_property.cpp +++ b/src/processor/operator/ddl/add_node_property.cpp @@ -4,12 +4,11 @@ namespace kuzu { namespace processor { void AddNodeProperty::executeDDLInternal() { - auto metadataDAHInfo = storageManager.createMetadataDAHInfo(*dataType); - catalog->addNodeProperty( - tableID, propertyName, std::move(dataType), std::move(metadataDAHInfo)); + catalog->addNodeProperty(tableID, propertyName, std::move(dataType)); auto addedProp = catalog->getWriteVersion()->getNodeProperty(tableID, propertyName); storageManager.getNodesStore().getNodeTable(tableID)->addColumn( *addedProp, getDefaultValVector(), transaction); + storageManager.getWAL()->logAddPropertyRecord(tableID, addedProp->getPropertyID()); } } // namespace processor diff --git a/src/processor/operator/ddl/add_rel_property.cpp b/src/processor/operator/ddl/add_rel_property.cpp index 29fb53dc24..9710221223 100644 --- a/src/processor/operator/ddl/add_rel_property.cpp +++ b/src/processor/operator/ddl/add_rel_property.cpp @@ -19,7 +19,7 @@ static void createFileForRelColumnPropertyWithDefaultVal(table_id_t relTableID, direction, property.getPropertyID(), DBFileType::WAL_VERSION), *property.getDataType()); auto numTuples = - storageManager.getRelsStore().getRelsStatistics().getNumTuplesForTable(relTableID); + storageManager.getRelsStore().getRelsStatistics()->getNumTuplesForTable(relTableID); auto inMemColumnChunk = inMemColumn->createInMemColumnChunk(0, numTuples - 1, nullptr /* copyDescription */); if (!isDefaultValNull) { @@ -44,10 +44,10 @@ static void createFileForRelListsPropertyWithDefaultVal(table_id_t relTableID, StorageUtils::getRelPropertyListsFName(storageManager.getDirectory(), relTableID, direction, property.getPropertyID(), DBFileType::WAL_VERSION), *property.getDataType(), - storageManager.getRelsStore().getRelsStatistics().getNumTuplesForTable(relTableID), + storageManager.getRelsStore().getRelsStatistics()->getNumTuplesForTable(relTableID), nullptr /* copyDescription */); auto numNodesInBoundTable = - storageManager.getNodesStore().getNodesStatisticsAndDeletedIDs().getNumTuplesForTable( + storageManager.getNodesStore().getNodesStatisticsAndDeletedIDs()->getNumTuplesForTable( boundTableID); inMemList->initListsMetadataAndAllocatePages( numNodesInBoundTable, adjLists->getHeaders().get(), &adjLists->getListsMetadata()); diff --git a/src/processor/operator/ddl/create_node_table.cpp b/src/processor/operator/ddl/create_node_table.cpp index 6f3c15cb8f..7e8cceef2e 100644 --- a/src/processor/operator/ddl/create_node_table.cpp +++ b/src/processor/operator/ddl/create_node_table.cpp @@ -11,15 +11,11 @@ namespace kuzu { namespace processor { void CreateNodeTable::executeDDLInternal() { - auto extraInfo = (binder::BoundExtraCreateNodeTableInfo*)info->extraInfo.get(); - for (auto& property : extraInfo->properties) { - property->setMetadataDAHInfo( - storageManager->createMetadataDAHInfo(*property->getDataType())); - } auto newTableID = catalog->addNodeTableSchema(*info); auto newNodeTableSchema = reinterpret_cast(catalog->getWriteVersion()->getTableSchema(newTableID)); nodesStatistics->addNodeStatisticsAndDeletedIDs(newNodeTableSchema); + storageManager->getWAL()->logNodeTableRecord(newTableID); } std::string CreateNodeTable::getOutputMsg() { diff --git a/src/processor/operator/ddl/create_rdf_graph.cpp b/src/processor/operator/ddl/create_rdf_graph.cpp index dc406bbb42..8d256d106d 100644 --- a/src/processor/operator/ddl/create_rdf_graph.cpp +++ b/src/processor/operator/ddl/create_rdf_graph.cpp @@ -10,13 +10,6 @@ namespace kuzu { namespace processor { void CreateRdfGraph::executeDDLInternal() { - auto extraInfo = (binder::BoundExtraCreateRdfGraphInfo*)info->extraInfo.get(); - auto nodeInfo = extraInfo->nodeInfo.get(); - auto extraNodeInfo = (binder::BoundExtraCreateNodeTableInfo*)nodeInfo->extraInfo.get(); - for (auto& property : extraNodeInfo->properties) { - property->setMetadataDAHInfo( - storageManager->createMetadataDAHInfo(*property->getDataType())); - } auto newRdfGraphID = catalog->addRdfGraphSchema(*info); auto writeCatalog = catalog->getWriteVersion(); auto newRdfGraphSchema = (RdfGraphSchema*)writeCatalog->getTableSchema(newRdfGraphID); @@ -26,6 +19,8 @@ void CreateRdfGraph::executeDDLInternal() { auto newRelTableSchema = (RelTableSchema*)writeCatalog->getTableSchema(newRdfGraphSchema->getRelTableID()); relsStatistics->addTableStatistic(newRelTableSchema); + storageManager->getWAL()->logRdfGraphRecord( + newRdfGraphID, newRdfGraphSchema->getNodeTableID(), newRdfGraphSchema->getRelTableID()); } std::string CreateRdfGraph::getOutputMsg() { diff --git a/src/storage/stats/nodes_statistics_and_deleted_ids.cpp b/src/storage/stats/nodes_statistics_and_deleted_ids.cpp index b22dc2b899..935be0b7cf 100644 --- a/src/storage/stats/nodes_statistics_and_deleted_ids.cpp +++ b/src/storage/stats/nodes_statistics_and_deleted_ids.cpp @@ -113,7 +113,7 @@ void NodeTableStatsAndDeletedIDs::setNumTuples(uint64_t numTuples) { } } -std::vector NodeTableStatsAndDeletedIDs::getDeletedNodeOffsets() { +std::vector NodeTableStatsAndDeletedIDs::getDeletedNodeOffsets() const { std::vector retVal; auto morselIter = deletedNodeOffsetsPerMorsel.begin(); while (morselIter != deletedNodeOffsetsPerMorsel.end()) { @@ -125,14 +125,19 @@ std::vector NodeTableStatsAndDeletedIDs::getDeletedNodeOffsets() { void NodeTableStatsAndDeletedIDs::serializeInternal(FileInfo* fileInfo, uint64_t& offset) { SerDeser::serializeVector(getDeletedNodeOffsets(), fileInfo, offset); + SerDeser::serializeVectorOfPtrs(metadataDAHInfos, fileInfo, offset); } std::unique_ptr NodeTableStatsAndDeletedIDs::deserialize( table_id_t tableID, offset_t maxNodeOffset, FileInfo* fileInfo, uint64_t& offset) { std::vector deletedNodeOffsets; + std::vector> metadataDAHInfos; SerDeser::deserializeVector(deletedNodeOffsets, fileInfo, offset); - return std::make_unique( - tableID, maxNodeOffset, deletedNodeOffsets); + SerDeser::deserializeVectorOfPtrs(metadataDAHInfos, fileInfo, offset); + auto result = + std::make_unique(tableID, maxNodeOffset, deletedNodeOffsets); + result->metadataDAHInfos = std::move(metadataDAHInfos); + return result; } void NodeTableStatsAndDeletedIDs::errorIfNodeHasEdges(offset_t nodeOffset) { @@ -164,21 +169,6 @@ bool NodeTableStatsAndDeletedIDs::isDeleted(offset_t nodeOffset, uint64_t morsel return false; } -NodesStatisticsAndDeletedIDs::NodesStatisticsAndDeletedIDs( - std::unordered_map>& - nodesStatisticsAndDeletedIDs) - : TablesStatistics{} { - initTableStatisticsForWriteTrx(); - for (auto& nodeStatistics : nodesStatisticsAndDeletedIDs) { - tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable[nodeStatistics.first] = - std::make_unique( - *(NodeTableStatsAndDeletedIDs*)nodeStatistics.second.get()); - tablesStatisticsContentForWriteTrx->tableStatisticPerTable[nodeStatistics.first] = - std::make_unique( - *(NodeTableStatsAndDeletedIDs*)nodeStatistics.second.get()); - } -} - void NodesStatisticsAndDeletedIDs::setAdjListsAndColumns(RelsStore* relsStore) { for (auto& tableIDStatistics : tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { getNodeStatisticsAndDeletedIDs(tableIDStatistics.first) @@ -222,5 +212,39 @@ void NodesStatisticsAndDeletedIDs::addNodeStatisticsAndDeletedIDs( constructTableStatistic(tableSchema); } +void NodesStatisticsAndDeletedIDs::addMetadataDAHInfo( + table_id_t tableID, const LogicalType& dataType) { + initTableStatisticsForWriteTrx(); + auto tableStats = dynamic_cast( + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); + tableStats->addMetadataDAHInfoForColumn( + createMetadataDAHInfo(dataType, *metadataFH, bufferManager, wal)); +} + +void NodesStatisticsAndDeletedIDs::removeMetadataDAHInfo( + common::table_id_t tableID, common::column_id_t columnID) { + initTableStatisticsForWriteTrx(); + auto tableStats = dynamic_cast( + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); + tableStats->removeMetadataDAHInfoForColumn(columnID); +} + +MetadataDAHInfo* NodesStatisticsAndDeletedIDs::getMetadataDAHInfo( + transaction::Transaction* transaction, common::table_id_t tableID, + common::column_id_t columnID) { + if (transaction->isWriteTransaction()) { + initTableStatisticsForWriteTrx(); + assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); + auto nodeTableStats = dynamic_cast( + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID].get()); + return nodeTableStats->getMetadataDAHInfo(columnID); + } else { + assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID)); + auto nodeTableStats = dynamic_cast( + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable[tableID].get()); + return nodeTableStats->getMetadataDAHInfo(columnID); + } +} + } // namespace storage } // namespace kuzu diff --git a/src/storage/stats/table_statistics.cpp b/src/storage/stats/table_statistics.cpp index b9590c987f..42b2431a3e 100644 --- a/src/storage/stats/table_statistics.cpp +++ b/src/storage/stats/table_statistics.cpp @@ -3,13 +3,14 @@ #include "storage/stats/nodes_statistics_and_deleted_ids.h" #include "storage/stats/rels_statistics.h" #include "storage/storage_utils.h" +#include "storage/store/column_chunk.h" using namespace kuzu::common; namespace kuzu { namespace storage { -void TableStatistics::serialize(common::FileInfo* fileInfo, uint64_t& offset) { +void TableStatistics::serialize(FileInfo* fileInfo, uint64_t& offset) { SerDeser::serializeValue(tableType, fileInfo, offset); SerDeser::serializeValue(numTuples, fileInfo, offset); SerDeser::serializeValue(tableID, fileInfo, offset); @@ -18,7 +19,7 @@ void TableStatistics::serialize(common::FileInfo* fileInfo, uint64_t& offset) { } std::unique_ptr TableStatistics::deserialize( - common::FileInfo* fileInfo, uint64_t& offset) { + FileInfo* fileInfo, uint64_t& offset) { TableType tableType; uint64_t numTuples; table_id_t tableID; @@ -50,7 +51,22 @@ std::unique_ptr TableStatistics::deserialize( return result; } -TablesStatistics::TablesStatistics() { +void MetadataDAHInfo::serialize(FileInfo* fileInfo, uint64_t& offset) const { + SerDeser::serializeValue(dataDAHPageIdx, fileInfo, offset); + SerDeser::serializeValue(nullDAHPageIdx, fileInfo, offset); + SerDeser::serializeVectorOfPtrs(childrenInfos, fileInfo, offset); +} + +std::unique_ptr MetadataDAHInfo::deserialize( + FileInfo* fileInfo, uint64_t& offset) { + auto metadataDAHInfo = std::make_unique(); + SerDeser::deserializeValue(metadataDAHInfo->dataDAHPageIdx, fileInfo, offset); + SerDeser::deserializeValue(metadataDAHInfo->nullDAHPageIdx, fileInfo, offset); + SerDeser::deserializeVectorOfPtrs(metadataDAHInfo->childrenInfos, fileInfo, offset); + return metadataDAHInfo; +} + +TablesStatistics::TablesStatistics(BMFileHandle* metadataFH) : metadataFH{metadataFH} { tablesStatisticsContentForReadOnlyTrx = std::make_unique(); } @@ -58,7 +74,7 @@ void TablesStatistics::readFromFile(const std::string& directory) { readFromFile(directory, DBFileType::ORIGINAL); } -void TablesStatistics::readFromFile(const std::string& directory, common::DBFileType dbFileType) { +void TablesStatistics::readFromFile(const std::string& directory, DBFileType dbFileType) { auto filePath = getTableStatisticsFilePath(directory, dbFileType); auto fileInfo = FileUtils::openFile(filePath, O_RDONLY); uint64_t offset = 0; @@ -87,16 +103,16 @@ void TablesStatistics::initTableStatisticsForWriteTrx() { void TablesStatistics::initTableStatisticsForWriteTrxNoLock() { if (tablesStatisticsContentForWriteTrx == nullptr) { tablesStatisticsContentForWriteTrx = std::make_unique(); - for (auto& tableStatistic : tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { - tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableStatistic.first] = - constructTableStatistic(tableStatistic.second.get()); + for (auto& [tableID, tableStatistic] : + tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { + tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableID] = + tableStatistic->copy(); } } } PropertyStatistics& TablesStatistics::getPropertyStatisticsForTable( - const transaction::Transaction& transaction, common::table_id_t tableID, - common::property_id_t propertyID) { + const transaction::Transaction& transaction, table_id_t tableID, property_id_t propertyID) { if (transaction.isReadOnly()) { assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID)); auto tableStatistics = @@ -111,8 +127,8 @@ PropertyStatistics& TablesStatistics::getPropertyStatisticsForTable( } } -void TablesStatistics::setPropertyStatisticsForTable(common::table_id_t tableID, - common::property_id_t propertyID, kuzu::storage::PropertyStatistics stats) { +void TablesStatistics::setPropertyStatisticsForTable( + table_id_t tableID, property_id_t propertyID, PropertyStatistics stats) { initTableStatisticsForWriteTrx(); assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID)); auto tableStatistics = @@ -120,5 +136,38 @@ void TablesStatistics::setPropertyStatisticsForTable(common::table_id_t tableID, tableStatistics->setPropertyStatistics(propertyID, stats); } +std::unique_ptr TablesStatistics::createMetadataDAHInfo( + const LogicalType& dataType, BMFileHandle& metadataFH, BufferManager* bm, WAL* wal) { + auto metadataDAHInfo = std::make_unique(); + metadataDAHInfo->dataDAHPageIdx = + InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); + metadataDAHInfo->nullDAHPageIdx = + InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); + switch (dataType.getPhysicalType()) { + case PhysicalTypeID::STRUCT: { + auto fields = StructType::getFields(&dataType); + metadataDAHInfo->childrenInfos.resize(fields.size()); + for (auto i = 0u; i < fields.size(); i++) { + metadataDAHInfo->childrenInfos[i] = + createMetadataDAHInfo(*fields[i]->getType(), metadataFH, bm, wal); + } + } break; + case PhysicalTypeID::VAR_LIST: { + metadataDAHInfo->childrenInfos.push_back( + createMetadataDAHInfo(*VarListType::getChildType(&dataType), metadataFH, bm, wal)); + } break; + case PhysicalTypeID::STRING: { + auto childMetadataDAHInfo = std::make_unique(); + childMetadataDAHInfo->dataDAHPageIdx = + InMemDiskArray::addDAHPageToFile(metadataFH, bm, wal); + metadataDAHInfo->childrenInfos.push_back(std::move(childMetadataDAHInfo)); + } break; + default: { + // DO NOTHING. + } + } + return metadataDAHInfo; +} + } // namespace storage } // namespace kuzu diff --git a/src/storage/storage_manager.cpp b/src/storage/storage_manager.cpp index 416941b9b5..934c7987cd 100644 --- a/src/storage/storage_manager.cpp +++ b/src/storage/storage_manager.cpp @@ -21,41 +21,8 @@ StorageManager::StorageManager(Catalog& catalog, MemoryManager& memoryManager, W BMFileHandle::FileVersionedType::VERSIONED_FILE); nodesStore = std::make_unique( dataFH.get(), metadataFH.get(), catalog, *memoryManager.getBufferManager(), wal); - relsStore = std::make_unique(catalog, memoryManager, wal); - nodesStore->getNodesStatisticsAndDeletedIDs().setAdjListsAndColumns(relsStore.get()); -} - -std::unique_ptr StorageManager::createMetadataDAHInfo( - const common::LogicalType& dataType) { - auto metadataDAHInfo = std::make_unique(); - metadataDAHInfo->dataDAHPageIdx = InMemDiskArray::addDAHPageToFile( - *metadataFH, memoryManager.getBufferManager(), wal); - metadataDAHInfo->nullDAHPageIdx = InMemDiskArray::addDAHPageToFile( - *metadataFH, memoryManager.getBufferManager(), wal); - switch (dataType.getPhysicalType()) { - case PhysicalTypeID::STRUCT: { - auto fields = StructType::getFields(&dataType); - metadataDAHInfo->childrenInfos.resize(fields.size()); - for (auto i = 0u; i < fields.size(); i++) { - metadataDAHInfo->childrenInfos[i] = createMetadataDAHInfo(*fields[i]->getType()); - } - } break; - case PhysicalTypeID::VAR_LIST: { - metadataDAHInfo->childrenInfos.push_back( - createMetadataDAHInfo(*VarListType::getChildType(&dataType))); - } break; - case PhysicalTypeID::STRING: { - auto childMetadataDAHInfo = std::make_unique(); - childMetadataDAHInfo->dataDAHPageIdx = - InMemDiskArray::addDAHPageToFile( - *metadataFH, memoryManager.getBufferManager(), wal); - metadataDAHInfo->childrenInfos.push_back(std::move(childMetadataDAHInfo)); - } break; - default: { - // DO NOTHING. - } - } - return metadataDAHInfo; + relsStore = std::make_unique(metadataFH.get(), catalog, memoryManager, wal); + nodesStore->getNodesStatisticsAndDeletedIDs()->setAdjListsAndColumns(relsStore.get()); } } // namespace storage diff --git a/src/storage/store/node_column.cpp b/src/storage/store/node_column.cpp index d28134bce3..fd9afeaa0e 100644 --- a/src/storage/store/node_column.cpp +++ b/src/storage/store/node_column.cpp @@ -83,12 +83,6 @@ void BoolNodeColumnFunc::writeValueToPage( posInVector, (uint64_t*)frame, posInFrame, 1); } -NodeColumn::NodeColumn(const Property& property, BMFileHandle* dataFH, BMFileHandle* metadataFH, - BufferManager* bufferManager, WAL* wal, Transaction* transaction, - RWPropertyStats propertyStatistics, bool requireNullColumn) - : NodeColumn{*property.getDataType(), *property.getMetadataDAHInfo(), dataFH, metadataFH, - bufferManager, wal, transaction, propertyStatistics, requireNullColumn} {} - NodeColumn::NodeColumn(LogicalType dataType, const MetadataDAHInfo& metaDAHeaderInfo, BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats propertyStatistics, diff --git a/src/storage/store/node_table.cpp b/src/storage/store/node_table.cpp index a74888b6cf..f826d67a25 100644 --- a/src/storage/store/node_table.cpp +++ b/src/storage/store/node_table.cpp @@ -65,6 +65,7 @@ void NodeTable::addColumn(const catalog::Property& property, common::ValueVector* defaultValueVector, transaction::Transaction* transaction) { nodesStatisticsAndDeletedIDs->setPropertyStatisticsForTable(tableID, property.getPropertyID(), PropertyStatistics(!defaultValueVector->hasNoNullsGuarantee())); + nodesStatisticsAndDeletedIDs->addMetadataDAHInfo(tableID, *property.getDataType()); tableData->addColumn(transaction, property, defaultValueVector, nodesStatisticsAndDeletedIDs); wal->addToUpdatedNodeTables(tableID); } diff --git a/src/storage/store/nodes_store.cpp b/src/storage/store/nodes_store.cpp index 23bc59aba1..02058d471d 100644 --- a/src/storage/store/nodes_store.cpp +++ b/src/storage/store/nodes_store.cpp @@ -7,12 +7,13 @@ namespace storage { NodesStore::NodesStore(BMFileHandle* dataFH, BMFileHandle* metadataFH, const Catalog& catalog, BufferManager& bufferManager, WAL* wal) - : nodesStatisticsAndDeletedIDs{wal->getDirectory()}, wal{wal}, dataFH{dataFH}, metadataFH{ - metadataFH} { + : wal{wal}, dataFH{dataFH}, metadataFH{metadataFH} { + nodesStatisticsAndDeletedIDs = + std::make_unique(metadataFH, &bufferManager, wal); for (auto& schema : catalog.getReadOnlyVersion()->getNodeTableSchemas()) { auto nodeTableSchema = reinterpret_cast(schema); - nodeTables[schema->tableID] = std::make_unique( - dataFH, metadataFH, &nodesStatisticsAndDeletedIDs, bufferManager, wal, nodeTableSchema); + nodeTables[schema->tableID] = std::make_unique(dataFH, metadataFH, + nodesStatisticsAndDeletedIDs.get(), bufferManager, wal, nodeTableSchema); } } diff --git a/src/storage/store/rels_store.cpp b/src/storage/store/rels_store.cpp index 2ab4f0915d..e21460a9f7 100644 --- a/src/storage/store/rels_store.cpp +++ b/src/storage/store/rels_store.cpp @@ -6,8 +6,10 @@ using namespace kuzu::common; namespace kuzu { namespace storage { -RelsStore::RelsStore(const Catalog& catalog, MemoryManager& memoryManager, WAL* wal) - : relsStatistics{wal->getDirectory()}, wal{wal} { +RelsStore::RelsStore( + BMFileHandle* metadataFH, const Catalog& catalog, MemoryManager& memoryManager, WAL* wal) + : wal{wal} { + relsStatistics = std::make_unique(metadataFH, wal->getDirectory()); for (auto& relTableSchema : catalog.getReadOnlyVersion()->getRelTableSchemas()) { relTables.emplace(relTableSchema->tableID, std::make_unique(catalog, relTableSchema->tableID, memoryManager, wal)); diff --git a/src/storage/store/table_data.cpp b/src/storage/store/table_data.cpp index d76eaf219c..338c34218a 100644 --- a/src/storage/store/table_data.cpp +++ b/src/storage/store/table_data.cpp @@ -1,5 +1,7 @@ #include "storage/store/table_data.h" +#include "storage/stats/nodes_statistics_and_deleted_ids.h" + using namespace kuzu::common; using namespace kuzu::transaction; @@ -12,16 +14,20 @@ TableData::TableData(BMFileHandle* dataFH, BMFileHandle* metadataFH, table_id_t : dataFH{dataFH}, metadataFH{metadataFH}, tableID{tableID}, bufferManager{bufferManager}, wal{wal} { columns.reserve(properties.size()); - for (auto property : properties) { - columns.push_back(NodeColumnFactory::createNodeColumn(*property, dataFH, metadataFH, - bufferManager, wal, Transaction::getDummyReadOnlyTrx().get(), - RWPropertyStats(tablesStatistics, tableID, property->getPropertyID()))); + for (auto i = 0u; i < properties.size(); i++) { + auto property = properties[i]; + auto metadataDAHInfo = + dynamic_cast(tablesStatistics) + ->getMetadataDAHInfo(Transaction::getDummyWriteTrx().get(), tableID, i); + columns.push_back( + NodeColumnFactory::createNodeColumn(*property->getDataType(), *metadataDAHInfo, dataFH, + metadataFH, bufferManager, wal, Transaction::getDummyReadOnlyTrx().get(), + RWPropertyStats(tablesStatistics, tableID, property->getPropertyID()))); } } -void TableData::read(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, - const std::vector& columnIDs, - const std::vector& outputVectors) { +void TableData::read(transaction::Transaction* transaction, ValueVector* nodeIDVector, + const std::vector& columnIDs, const std::vector& outputVectors) { if (nodeIDVector->isSequential()) { scan(transaction, nodeIDVector, columnIDs, outputVectors); } else { @@ -29,9 +35,8 @@ void TableData::read(transaction::Transaction* transaction, common::ValueVector* } } -void TableData::scan(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, - const std::vector& columnIDs, - const std::vector& outputVectors) { +void TableData::scan(transaction::Transaction* transaction, ValueVector* nodeIDVector, + const std::vector& columnIDs, const std::vector& outputVectors) { assert(columnIDs.size() == outputVectors.size() && !nodeIDVector->state->isFlat()); for (auto i = 0u; i < columnIDs.size(); i++) { if (columnIDs[i] == INVALID_COLUMN_ID) { @@ -47,9 +52,8 @@ void TableData::scan(transaction::Transaction* transaction, common::ValueVector* } } -void TableData::lookup(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, - const std::vector& columnIDs, - const std::vector& outputVectors) { +void TableData::lookup(transaction::Transaction* transaction, ValueVector* nodeIDVector, + const std::vector& columnIDs, const std::vector& outputVectors) { auto pos = nodeIDVector->state->selVector->selectedPositions[0]; for (auto i = 0u; i < columnIDs.size(); i++) { auto columnID = columnIDs[i]; @@ -65,8 +69,8 @@ void TableData::lookup(transaction::Transaction* transaction, common::ValueVecto } } -void TableData::insert(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, - const std::vector& propertyVectors) { +void TableData::insert(transaction::Transaction* transaction, ValueVector* nodeIDVector, + const std::vector& propertyVectors) { // We assume that offsets are given in the ascending order, thus lastOffset is the max one. offset_t lastOffset = nodeIDVector->readNodeOffset( nodeIDVector->state->selVector @@ -86,15 +90,14 @@ void TableData::insert(transaction::Transaction* transaction, common::ValueVecto } } -void TableData::update(transaction::Transaction* transaction, common::column_id_t columnID, - common::ValueVector* nodeIDVector, common::ValueVector* propertyVector) { +void TableData::update(transaction::Transaction* transaction, column_id_t columnID, + ValueVector* nodeIDVector, ValueVector* propertyVector) { assert(columnID < columns.size()); transaction->getLocalStorage()->update(tableID, columnID, nodeIDVector, propertyVector); } -void TableData::update(transaction::Transaction* transaction, common::column_id_t columnID, - common::offset_t nodeOffset, common::ValueVector* propertyVector, - common::sel_t posInPropertyVector) const { +void TableData::update(transaction::Transaction* transaction, column_id_t columnID, + offset_t nodeOffset, ValueVector* propertyVector, sel_t posInPropertyVector) const { transaction->getLocalStorage()->update( tableID, columnID, nodeOffset, propertyVector, posInPropertyVector); } @@ -110,10 +113,12 @@ void TableData::append(kuzu::storage::NodeGroup* nodeGroup) { } void TableData::addColumn(transaction::Transaction* transaction, const catalog::Property& property, - common::ValueVector* defaultValueVector, TablesStatistics* tableStats) { - auto nodeColumn = - NodeColumnFactory::createNodeColumn(property, dataFH, metadataFH, bufferManager, wal, - transaction, RWPropertyStats(tableStats, tableID, property.getPropertyID())); + ValueVector* defaultValueVector, TablesStatistics* tablesStats) { + auto metadataDAHInfo = dynamic_cast(tablesStats) + ->getMetadataDAHInfo(transaction, tableID, columns.size()); + auto nodeColumn = NodeColumnFactory::createNodeColumn(*property.getDataType(), *metadataDAHInfo, + dataFH, metadataFH, bufferManager, wal, transaction, + RWPropertyStats(tablesStats, tableID, property.getPropertyID())); nodeColumn->populateWithDefaultVal( property, nodeColumn.get(), defaultValueVector, getNumNodeGroups(transaction)); columns.push_back(std::move(nodeColumn)); diff --git a/src/storage/wal_replayer.cpp b/src/storage/wal_replayer.cpp index d0a523afd6..93bc03b3c7 100644 --- a/src/storage/wal_replayer.cpp +++ b/src/storage/wal_replayer.cpp @@ -139,18 +139,18 @@ void WALReplayer::replayTableStatisticsRecord(const kuzu::storage::WALRecord& wa if (!isRecovering) { storageManager->getNodesStore() .getNodesStatisticsAndDeletedIDs() - .checkpointInMemoryIfNecessary(); + ->checkpointInMemoryIfNecessary(); } } else { StorageUtils::overwriteRelsStatisticsFileWithVersionFromWAL(wal->getDirectory()); if (!isRecovering) { - storageManager->getRelsStore().getRelsStatistics().checkpointInMemoryIfNecessary(); + storageManager->getRelsStore().getRelsStatistics()->checkpointInMemoryIfNecessary(); } } } else { storageManager->getNodesStore() .getNodesStatisticsAndDeletedIDs() - .rollbackInMemoryIfNecessary(); + ->rollbackInMemoryIfNecessary(); } } @@ -194,7 +194,8 @@ void WALReplayer::replayRelTableRecord(const WALRecord& walRecord, bool isRdf) { if (isCheckpoint) { // See comments for NODE_TABLE_RECORD. auto nodesStatistics = std::make_unique( - wal->getDirectory(), isRdf ? DBFileType::WAL_VERSION : DBFileType::ORIGINAL); + nullptr /* metadataFH */, nullptr /* bufferManager */, wal, + isRdf ? DBFileType::WAL_VERSION : DBFileType::ORIGINAL); auto maxNodeOffsetPerTable = nodesStatistics->getMaxNodeOffsetPerTable(); auto catalogForCheckpointing = getCatalogForRecovery(DBFileType::WAL_VERSION); auto relTableSchema = reinterpret_cast( @@ -206,8 +207,9 @@ void WALReplayer::replayRelTableRecord(const WALRecord& walRecord, bool isRdf) { // See comments for NODE_TABLE_RECORD. storageManager->getRelsStore().createRelTable( walRecord.relTableRecord.tableID, catalogForCheckpointing.get(), memoryManager); - storageManager->getNodesStore().getNodesStatisticsAndDeletedIDs().setAdjListsAndColumns( - &storageManager->getRelsStore()); + storageManager->getNodesStore() + .getNodesStatisticsAndDeletedIDs() + ->setAdjListsAndColumns(&storageManager->getRelsStore()); } } else { // See comments for NODE_TABLE_RECORD. @@ -338,15 +340,17 @@ void WALReplayer::replayCopyRelRecord(const kuzu::storage::WALRecord& walRecord) relTable->resetColumnsAndLists(relTableSchema); // See comments for COPY_NODE_RECORD. relTable->initializeData(relTableSchema); - storageManager->getNodesStore().getNodesStatisticsAndDeletedIDs().setAdjListsAndColumns( - &storageManager->getRelsStore()); + storageManager->getNodesStore() + .getNodesStatisticsAndDeletedIDs() + ->setAdjListsAndColumns(&storageManager->getRelsStore()); } else { // RECOVERY. if (wal->isLastLoggedRecordCommit()) { return; } auto nodesStatisticsAndDeletedIDsForCheckPointing = - std::make_unique(wal->getDirectory()); + std::make_unique( + nullptr /* metadataFH */, nullptr /* bufferManager */, wal); auto maxNodeOffsetPerTable = nodesStatisticsAndDeletedIDsForCheckPointing->getMaxNodeOffsetPerTable(); auto catalogForRecovery = getCatalogForRecovery(DBFileType::ORIGINAL); @@ -362,7 +366,7 @@ void WALReplayer::replayCopyRelRecord(const kuzu::storage::WALRecord& walRecord) WALReplayerUtils::createEmptyDBFilesForNewRelTable(relTableSchema, wal->getDirectory(), storageManager->getNodesStore() .getNodesStatisticsAndDeletedIDs() - .getMaxNodeOffsetPerTable()); + ->getMaxNodeOffsetPerTable()); } } diff --git a/test/graph_test/graph_test.cpp b/test/graph_test/graph_test.cpp index e5b86d51dd..ebd3816c00 100644 --- a/test/graph_test/graph_test.cpp +++ b/test/graph_test/graph_test.cpp @@ -77,8 +77,8 @@ void BaseGraphTest::validateQueryBestPlanJoinOrder( auto boundQuery = Binder(*catalog, database->memoryManager.get(), conn->clientContext.get()) .bind(*parsedQuery); auto plan = Planner::getBestPlan(*catalog, - getStorageManager(*database)->getNodesStore().getNodesStatisticsAndDeletedIDs(), - getStorageManager(*database)->getRelsStore().getRelsStatistics(), *boundQuery); + *getStorageManager(*database)->getNodesStore().getNodesStatisticsAndDeletedIDs(), + *getStorageManager(*database)->getRelsStore().getRelsStatistics(), *boundQuery); ASSERT_STREQ(LogicalPlanUtil::encodeJoin(*plan).c_str(), expectedJoinOrder.c_str()); } diff --git a/test/runner/e2e_copy_transaction_test.cpp b/test/runner/e2e_copy_transaction_test.cpp index b2bbde1497..253e937e40 100644 --- a/test/runner/e2e_copy_transaction_test.cpp +++ b/test/runner/e2e_copy_transaction_test.cpp @@ -53,7 +53,7 @@ class TinySnbCopyCSVTransactionTest : public EmptyDBTest { ASSERT_EQ(getStorageManager(*database) ->getNodesStore() .getNodesStatisticsAndDeletedIDs() - .getMaxNodeOffset(&transaction::DUMMY_READ_TRANSACTION, tableID), + ->getMaxNodeOffset(&transaction::DUMMY_READ_TRANSACTION, tableID), UINT64_MAX); } @@ -66,7 +66,7 @@ class TinySnbCopyCSVTransactionTest : public EmptyDBTest { ASSERT_EQ(getStorageManager(*database) ->getNodesStore() .getNodesStatisticsAndDeletedIDs() - .getMaxNodeOffset(&transaction::DUMMY_READ_TRANSACTION, tableID), + ->getMaxNodeOffset(&transaction::DUMMY_READ_TRANSACTION, tableID), 7); } @@ -126,8 +126,9 @@ class TinySnbCopyCSVTransactionTest : public EmptyDBTest { validateRelColumnAndListFilesExistence( relTableSchema, DBFileType::ORIGINAL, true /* existence */); auto dummyWriteTrx = transaction::Transaction::getDummyWriteTrx(); - ASSERT_EQ(getStorageManager(*database)->getRelsStore().getRelsStatistics().getNextRelOffset( - dummyWriteTrx.get(), tableID), + ASSERT_EQ( + getStorageManager(*database)->getRelsStore().getRelsStatistics()->getNextRelOffset( + dummyWriteTrx.get(), tableID), 14); } @@ -137,11 +138,11 @@ class TinySnbCopyCSVTransactionTest : public EmptyDBTest { validateRelColumnAndListFilesExistence( relTableSchema, DBFileType::ORIGINAL, true /* existence */); validateTinysnbKnowsDateProperty(); - auto& relsStatistics = getStorageManager(*database)->getRelsStore().getRelsStatistics(); + auto relsStatistics = getStorageManager(*database)->getRelsStore().getRelsStatistics(); auto dummyWriteTrx = transaction::Transaction::getDummyWriteTrx(); - ASSERT_EQ(relsStatistics.getNextRelOffset(dummyWriteTrx.get(), knowsTableID), 14); - ASSERT_EQ(relsStatistics.getReadOnlyVersion()->tableStatisticPerTable.size(), 1); - auto knowsRelStatistics = (RelTableStats*)relsStatistics.getReadOnlyVersion() + ASSERT_EQ(relsStatistics->getNextRelOffset(dummyWriteTrx.get(), knowsTableID), 14); + ASSERT_EQ(relsStatistics->getReadOnlyVersion()->tableStatisticPerTable.size(), 1); + auto knowsRelStatistics = (RelTableStats*)relsStatistics->getReadOnlyVersion() ->tableStatisticPerTable.at(knowsTableID) .get(); ASSERT_EQ(knowsRelStatistics->getNumTuples(), 14); diff --git a/test/runner/e2e_ddl_test.cpp b/test/runner/e2e_ddl_test.cpp index 189393a749..9f81617d00 100644 --- a/test/runner/e2e_ddl_test.cpp +++ b/test/runner/e2e_ddl_test.cpp @@ -43,7 +43,7 @@ class TinySnbDDLTest : public DBTest { ASSERT_EQ(getStorageManager(*database) ->getNodesStore() .getNodesStatisticsAndDeletedIDs() - .getNumNodeStatisticsAndDeleteIDsPerTable(), + ->getNumNodeStatisticsAndDeleteIDsPerTable(), 4); } @@ -59,7 +59,7 @@ class TinySnbDDLTest : public DBTest { ASSERT_EQ(getStorageManager(*database) ->getNodesStore() .getNodesStatisticsAndDeletedIDs() - .getNumNodeStatisticsAndDeleteIDsPerTable(), + ->getNumNodeStatisticsAndDeleteIDsPerTable(), 3); initWithoutLoadingGraph(); } else { diff --git a/test/storage/CMakeLists.txt b/test/storage/CMakeLists.txt index 8741bd6ec5..cae622b3e2 100644 --- a/test/storage/CMakeLists.txt +++ b/test/storage/CMakeLists.txt @@ -3,4 +3,3 @@ add_kuzu_test(node_insertion_deletion_test node_insertion_deletion_test.cpp) add_kuzu_test(wal_record_test wal_record_test.cpp) add_kuzu_test(wal_replayer_test wal_replayer_test.cpp) add_kuzu_test(wal_test wal_test.cpp) -add_kuzu_test(table_statistics_test table_statistics_test.cpp) diff --git a/test/storage/table_statistics_test.cpp b/test/storage/table_statistics_test.cpp deleted file mode 100644 index 0aeefa10fc..0000000000 --- a/test/storage/table_statistics_test.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "catalog/table_schema.h" -#include "graph_test/graph_test.h" -#include "storage/stats/nodes_statistics_and_deleted_ids.h" -#include "storage/stats/table_statistics.h" -#include "storage/wal_replayer.h" - -using namespace kuzu::common; -using namespace kuzu::storage; -using namespace kuzu::catalog; -using namespace kuzu::transaction; - -TEST(TableStatisticsTest, CopyTableStatistics) { - auto numTuples = 20; - - std::unordered_map> propertyStatistics; - propertyStatistics[0] = std::make_unique(true); - propertyStatistics[1] = std::make_unique(false); - NodeTableStatsAndDeletedIDs tableStats(0, - NodeTableStatsAndDeletedIDs::getMaxNodeOffsetFromNumTuples(numTuples), - std::move(propertyStatistics)); - NodeTableStatsAndDeletedIDs copy(tableStats); - - ASSERT_EQ(copy.getNumTuples(), numTuples); - ASSERT_EQ(copy.getPropertyStatistics(0).mayHaveNull(), true); - ASSERT_EQ(copy.getPropertyStatistics(1).mayHaveNull(), false); -} - -TEST(TableStatisticsTest, PropertyStatisticsSetHasNull) { - LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE); - - NodesStatisticsAndDeletedIDs tablesStatistics; - std::vector> properties; - auto propertyID = 0; - auto tableID = 0; - properties.push_back(std::make_unique( - "testproperty", std::make_unique(LogicalTypeID::BOOL), tableID, propertyID)); - auto schema = NodeTableSchema("TestTable", tableID, propertyID, std::move(properties)); - tablesStatistics.addTableStatistic(&schema); - - auto propertyStatistics = tablesStatistics.getPropertyStatisticsForTable( - DUMMY_WRITE_TRANSACTION, tableID, propertyID); - - ASSERT_FALSE(propertyStatistics.mayHaveNull()); - propertyStatistics.setHasNull(); - ASSERT_TRUE(propertyStatistics.mayHaveNull()); -} - -TEST(TableStatisticsTest, RWPropertyStatsSetHasNull) { - LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE); - - NodesStatisticsAndDeletedIDs tablesStatistics; - std::vector> properties; - auto propertyID = 0; - auto tableID = 0; - properties.push_back(std::make_unique( - "testproperty", std::make_unique(LogicalTypeID::BOOL), tableID, propertyID)); - auto schema = NodeTableSchema("TestTable", tableID, propertyID, std::move(properties)); - tablesStatistics.addTableStatistic(&schema); - RWPropertyStats stats(&tablesStatistics, tableID, propertyID); - - ASSERT_FALSE(stats.mayHaveNull(DUMMY_WRITE_TRANSACTION)); - stats.setHasNull(DUMMY_WRITE_TRANSACTION); - ASSERT_TRUE(stats.mayHaveNull(DUMMY_WRITE_TRANSACTION)); -}