Skip to content

Commit

Permalink
rework table statistics ser/deser
Browse files Browse the repository at this point in the history
  • Loading branch information
ray6080 committed Sep 23, 2023
1 parent b2ec22c commit 5192793
Show file tree
Hide file tree
Showing 10 changed files with 192 additions and 198 deletions.
4 changes: 2 additions & 2 deletions src/include/processor/operator/scan/scan_rel_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ struct ScanRelTalePosInfo {
struct RelTableScanInfo {
storage::RelTableDataType relTableDataType;
storage::DirectedRelTableData* tableData;
storage::RelStatistics* relStats;
storage::RelTableStats* relStats;
std::vector<common::property_id_t> propertyIds;

RelTableScanInfo(storage::RelTableDataType relTableDataType,
storage::DirectedRelTableData* tableData, storage::RelStatistics* relStats,
storage::DirectedRelTableData* tableData, storage::RelTableStats* relStats,
std::vector<common::property_id_t> propertyIds)
: relTableDataType{relTableDataType}, tableData{tableData}, relStats{relStats},
propertyIds{std::move(propertyIds)} {}
Expand Down
51 changes: 23 additions & 28 deletions src/include/storage/stats/nodes_statistics_and_deleted_ids.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,28 @@
namespace kuzu {
namespace storage {

class NodeStatisticsAndDeletedIDs : public TableStatistics {
class NodeTableStatsAndDeletedIDs : public TableStatistics {

public:
explicit NodeStatisticsAndDeletedIDs(const catalog::TableSchema& schema)
explicit NodeTableStatsAndDeletedIDs(const catalog::TableSchema& schema)
: TableStatistics{schema}, tableID{schema.tableID} {}

NodeStatisticsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
const std::vector<common::offset_t>& deletedNodeOffsets)
: NodeTableStatsAndDeletedIDs{tableID, maxNodeOffset, deletedNodeOffsets, {}} {}
NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStatistics)
: NodeStatisticsAndDeletedIDs(tableID, maxNodeOffset,
: NodeTableStatsAndDeletedIDs(tableID, maxNodeOffset,
std::vector<common::offset_t>() /* no deleted node offsets during initial loading */,
std::move(propertyStatistics)) {}

NodeStatisticsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
const std::vector<common::offset_t>& deletedNodeOffsets,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStatistics);

NodeStatisticsAndDeletedIDs(const NodeStatisticsAndDeletedIDs& other) = default;
NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other) = default;

inline common::offset_t getMaxNodeOffset() {
return getMaxNodeOffsetFromNumTuples(getNumTuples());
Expand Down Expand Up @@ -60,6 +63,10 @@ class NodeStatisticsAndDeletedIDs : public TableStatistics {
return numTuples == 0 ? UINT64_MAX : numTuples - 1;
}

void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final;
static std::unique_ptr<NodeTableStatsAndDeletedIDs> deserialize(common::table_id_t tableID,
common::offset_t maxNodeOffset, common::FileInfo* fileInfo, uint64_t& offset);

private:
void errorIfNodeHasEdges(common::offset_t nodeOffset);

Expand Down Expand Up @@ -92,12 +99,12 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {

// Should be used only by tests;
explicit NodesStatisticsAndDeletedIDs(
std::unordered_map<common::table_id_t, std::unique_ptr<NodeStatisticsAndDeletedIDs>>&
std::unordered_map<common::table_id_t, std::unique_ptr<NodeTableStatsAndDeletedIDs>>&
nodesStatisticsAndDeletedIDs);

inline NodeStatisticsAndDeletedIDs* getNodeStatisticsAndDeletedIDs(
inline NodeTableStatsAndDeletedIDs* getNodeStatisticsAndDeletedIDs(
common::table_id_t tableID) const {
return (NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForReadOnlyTrx
return (NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForReadOnlyTrx
->tableStatisticPerTable[tableID]
.get();
}
Expand All @@ -114,7 +121,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {

inline void setNumTuplesForTable(common::table_id_t tableID, uint64_t numTuples) override {
initTableStatisticsForWriteTrx();
((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
->tableStatisticPerTable[tableID]
.get())
->setNumTuples(numTuples);
Expand All @@ -129,7 +136,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {
std::unique_lock xLck{mtx};
return tablesStatisticsContentForWriteTrx == nullptr ?
getNodeStatisticsAndDeletedIDs(tableID)->getMaxNodeOffset() :
((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
->tableStatisticPerTable[tableID]
.get())
->getMaxNodeOffset();
Expand All @@ -148,7 +155,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {
common::offset_t addNode(common::table_id_t tableID) {
lock_t lck{mtx};
initTableStatisticsForWriteTrxNoLock();
return ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
return ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
->tableStatisticPerTable[tableID]
.get())
->addNode();
Expand All @@ -158,7 +165,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {
void deleteNode(common::table_id_t tableID, common::offset_t nodeOffset) {
lock_t lck{mtx};
initTableStatisticsForWriteTrxNoLock();
((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
->tableStatisticPerTable[tableID]
.get())
->deleteNode(nodeOffset);
Expand All @@ -174,33 +181,21 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {
void addNodeStatisticsAndDeletedIDs(catalog::NodeTableSchema* tableSchema);

protected:
inline std::string getTableTypeForPrinting() const override {
return "NodesStatisticsAndDeletedIDs";
}

inline std::unique_ptr<TableStatistics> constructTableStatistic(
catalog::TableSchema* tableSchema) override {
return std::make_unique<NodeStatisticsAndDeletedIDs>(*tableSchema);
return std::make_unique<NodeTableStatsAndDeletedIDs>(*tableSchema);
}

inline std::unique_ptr<TableStatistics> constructTableStatistic(
TableStatistics* tableStatistics) override {
return std::make_unique<NodeStatisticsAndDeletedIDs>(
*(NodeStatisticsAndDeletedIDs*)tableStatistics);
return std::make_unique<NodeTableStatsAndDeletedIDs>(
*(NodeTableStatsAndDeletedIDs*)tableStatistics);
}

inline std::string getTableStatisticsFilePath(
const std::string& directory, common::DBFileType dbFileType) override {
return StorageUtils::getNodesStatisticsAndDeletedIDsFilePath(directory, dbFileType);
}

std::unique_ptr<TableStatistics> deserializeTableStatistics(uint64_t numTuples,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStats,
uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) override;

void serializeTableStatistics(
TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) override;
};

} // namespace storage
Expand Down
38 changes: 18 additions & 20 deletions src/include/storage/stats/rels_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,28 @@ namespace kuzu {
namespace storage {

class RelsStatistics;
class RelStatistics : public TableStatistics {
class RelTableStats : public TableStatistics {
friend class RelsStatistics;

public:
RelStatistics(const catalog::TableSchema& tableSchema)
RelTableStats(const catalog::TableSchema& tableSchema)
: TableStatistics{tableSchema}, nextRelOffset{0} {}
RelStatistics(uint64_t numRels,
RelTableStats(uint64_t numRels, common::table_id_t tableID, common::offset_t nextRelOffset)
: TableStatistics{common::TableType::REL, numRels, tableID, {}}, nextRelOffset{
nextRelOffset} {}
RelTableStats(uint64_t numRels, common::table_id_t tableID,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStats,
common::offset_t nextRelOffset)
: TableStatistics{numRels, std::move(propertyStats)}, nextRelOffset{nextRelOffset} {}
: TableStatistics{common::TableType::REL, numRels, tableID, std::move(propertyStats)},
nextRelOffset{nextRelOffset} {}

inline common::offset_t getNextRelOffset() const { return nextRelOffset; }

void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final;
static std::unique_ptr<RelTableStats> deserialize(
uint64_t numRels, common::table_id_t tableID, common::FileInfo* fileInfo, uint64_t& offset);

private:
common::offset_t nextRelOffset;
};
Expand All @@ -40,18 +48,18 @@ class RelsStatistics : public TablesStatistics {
}

// Should only be used by tests.
explicit RelsStatistics(std::unordered_map<common::table_id_t, std::unique_ptr<RelStatistics>>
explicit RelsStatistics(std::unordered_map<common::table_id_t, std::unique_ptr<RelTableStats>>
relStatisticPerTable_);

static inline void saveInitialRelsStatisticsToFile(const std::string& directory) {
std::make_unique<RelsStatistics>()->saveToFile(
directory, common::DBFileType::ORIGINAL, transaction::TransactionType::READ_ONLY);
}

inline RelStatistics* getRelStatistics(common::table_id_t tableID) const {
inline RelTableStats* getRelStatistics(common::table_id_t tableID) const {
auto& tableStatisticPerTable =
tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable;
return (RelStatistics*)tableStatisticPerTable[tableID].get();
return (RelTableStats*)tableStatisticPerTable[tableID].get();
}

void setNumTuplesForTable(common::table_id_t relTableID, uint64_t numRels) override;
Expand All @@ -62,16 +70,14 @@ class RelsStatistics : public TablesStatistics {
transaction::Transaction* transaction, common::table_id_t tableID);

protected:
inline std::string getTableTypeForPrinting() const override { return "RelsStatistics"; }

inline std::unique_ptr<TableStatistics> constructTableStatistic(
catalog::TableSchema* tableSchema) override {
return std::make_unique<RelStatistics>(*tableSchema);
return std::make_unique<RelTableStats>(*tableSchema);
}

inline std::unique_ptr<TableStatistics> constructTableStatistic(
TableStatistics* tableStatistics) override {
return std::make_unique<RelStatistics>(*(RelStatistics*)tableStatistics);
return std::make_unique<RelTableStats>(*(RelTableStats*)tableStatistics);
}

inline std::string getTableStatisticsFilePath(
Expand All @@ -80,18 +86,10 @@ class RelsStatistics : public TablesStatistics {
}

inline void increaseNextRelOffset(common::table_id_t relTableID, uint64_t numTuples) {
((RelStatistics*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(relTableID)
((RelTableStats*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(relTableID)
.get())
->nextRelOffset += numTuples;
}

std::unique_ptr<TableStatistics> deserializeTableStatistics(uint64_t numTuples,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStats,
uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) override;

void serializeTableStatistics(
TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) override;
};

} // namespace storage
Expand Down
74 changes: 24 additions & 50 deletions src/include/storage/stats/table_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,37 +11,36 @@
namespace kuzu {
namespace storage {

using lock_t = std::unique_lock<std::mutex>;
using atomic_uint64_vec_t = std::vector<std::atomic<uint64_t>>;

class TableStatistics {
public:
virtual ~TableStatistics() = default;

explicit TableStatistics(const catalog::TableSchema& schema) : numTuples{0} {
explicit TableStatistics(const catalog::TableSchema& schema)
: tableType{schema.tableType}, numTuples{0}, tableID{schema.tableID} {
for (auto property : schema.getProperties()) {
propertyStatistics[property->getPropertyID()] = std::make_unique<PropertyStatistics>();
}
}

explicit TableStatistics(uint64_t numTuples,
explicit TableStatistics(common::TableType tableType, uint64_t numTuples,
common::table_id_t tableID,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStatistics)
: numTuples{numTuples}, propertyStatistics{std::move(propertyStatistics)} {
: numTuples{numTuples}, tableID{tableID}, propertyStatistics{
std::move(propertyStatistics)} {
assert(numTuples != UINT64_MAX);
}

explicit TableStatistics(const TableStatistics& other) : numTuples{other.numTuples} {
explicit TableStatistics(const TableStatistics& other)
: tableType{other.tableType}, numTuples{other.numTuples}, tableID{other.tableID} {
for (auto& propertyStats : other.propertyStatistics) {
propertyStatistics[propertyStats.first] =
std::make_unique<PropertyStatistics>(*propertyStats.second.get());
}
}

inline bool isEmpty() const { return numTuples == 0; }
virtual ~TableStatistics() = default;

Check warning on line 40 in src/include/storage/stats/table_statistics.h

View check run for this annotation

Codecov / codecov/patch

src/include/storage/stats/table_statistics.h#L40

Added line #L40 was not covered by tests

inline bool isEmpty() const { return numTuples == 0; }
inline uint64_t getNumTuples() const { return numTuples; }

virtual inline void setNumTuples(uint64_t numTuples_) {
assert(numTuples_ != UINT64_MAX);
numTuples = numTuples_;
Expand All @@ -51,19 +50,24 @@ class TableStatistics {
assert(propertyStatistics.contains(propertyID));
return *(propertyStatistics.at(propertyID));
}

inline const std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&
getPropertyStatistics() {
return propertyStatistics;
}

inline void setPropertyStatistics(
common::property_id_t propertyID, PropertyStatistics newStats) {
propertyStatistics[propertyID] = std::make_unique<PropertyStatistics>(newStats);
}

void serialize(common::FileInfo* fileInfo, uint64_t& offset);
static std::unique_ptr<TableStatistics> deserialize(
common::FileInfo* fileInfo, uint64_t& offset);
virtual void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) = 0;

private:
common::TableType tableType;
uint64_t numTuples;
common::table_id_t tableID;
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>
propertyStatistics;
};
Expand All @@ -88,7 +92,7 @@ class TablesStatistics {
inline bool hasUpdates() { return tablesStatisticsContentForWriteTrx != nullptr; }

inline void checkpointInMemoryIfNecessary() {
lock_t lck{mtx};
std::unique_lock lck{mtx};
tablesStatisticsContentForReadOnlyTrx = std::move(tablesStatisticsContentForWriteTrx);
}

Expand All @@ -110,52 +114,22 @@ class TablesStatistics {
->getNumTuples();
}

inline PropertyStatistics& getPropertyStatisticsForTable(
const transaction::Transaction& transaction, common::table_id_t tableID,
common::property_id_t propertyID) {
if (transaction.isReadOnly()) {
assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID));
auto tableStatistics =
tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.at(tableID).get();
return tableStatistics->getPropertyStatistics(propertyID);
} else {
initTableStatisticsForWriteTrx();
assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID));
auto tableStatistics =
tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get();
return tableStatistics->getPropertyStatistics(propertyID);
}
}
PropertyStatistics& getPropertyStatisticsForTable(const transaction::Transaction& transaction,
common::table_id_t tableID, common::property_id_t propertyID);

void setPropertyStatisticsForTable(
common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats) {
initTableStatisticsForWriteTrx();
assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID));
auto tableStatistics =
tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get();
tableStatistics->setPropertyStatistics(propertyID, stats);
}
common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats);

protected:
virtual inline std::string getTableTypeForPrinting() const = 0;

virtual inline std::unique_ptr<TableStatistics> constructTableStatistic(
virtual std::unique_ptr<TableStatistics> constructTableStatistic(
catalog::TableSchema* tableSchema) = 0;

virtual inline std::unique_ptr<TableStatistics> constructTableStatistic(
virtual std::unique_ptr<TableStatistics> constructTableStatistic(
TableStatistics* tableStatistics) = 0;

virtual inline std::string getTableStatisticsFilePath(
virtual std::string getTableStatisticsFilePath(
const std::string& directory, common::DBFileType dbFileType) = 0;

virtual std::unique_ptr<TableStatistics> deserializeTableStatistics(uint64_t numTuples,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStats,
uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) = 0;

virtual void serializeTableStatistics(
TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) = 0;

void readFromFile(const std::string& directory);
void readFromFile(const std::string& directory, common::DBFileType dbFileType);

Expand Down
Loading

0 comments on commit 5192793

Please sign in to comment.