Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework table statistics ser/deser #2073

Merged
merged 1 commit into from
Sep 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.8.9 LANGUAGES CXX)
project(Kuzu VERSION 0.0.8.10 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
4 changes: 2 additions & 2 deletions src/include/processor/operator/scan/scan_rel_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ struct ScanRelTalePosInfo {
struct RelTableScanInfo {
storage::RelTableDataType relTableDataType;
storage::DirectedRelTableData* tableData;
storage::RelStatistics* relStats;
storage::RelTableStats* relStats;
std::vector<common::property_id_t> propertyIds;

RelTableScanInfo(storage::RelTableDataType relTableDataType,
storage::DirectedRelTableData* tableData, storage::RelStatistics* relStats,
storage::DirectedRelTableData* tableData, storage::RelTableStats* relStats,
std::vector<common::property_id_t> propertyIds)
: relTableDataType{relTableDataType}, tableData{tableData}, relStats{relStats},
propertyIds{std::move(propertyIds)} {}
Expand Down
51 changes: 23 additions & 28 deletions src/include/storage/stats/nodes_statistics_and_deleted_ids.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,28 @@
namespace kuzu {
namespace storage {

class NodeStatisticsAndDeletedIDs : public TableStatistics {
class NodeTableStatsAndDeletedIDs : public TableStatistics {

public:
explicit NodeStatisticsAndDeletedIDs(const catalog::TableSchema& schema)
explicit NodeTableStatsAndDeletedIDs(const catalog::TableSchema& schema)
: TableStatistics{schema}, tableID{schema.tableID} {}

NodeStatisticsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
const std::vector<common::offset_t>& deletedNodeOffsets)
: NodeTableStatsAndDeletedIDs{tableID, maxNodeOffset, deletedNodeOffsets, {}} {}
NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStatistics)
: NodeStatisticsAndDeletedIDs(tableID, maxNodeOffset,
: NodeTableStatsAndDeletedIDs(tableID, maxNodeOffset,
std::vector<common::offset_t>() /* no deleted node offsets during initial loading */,
std::move(propertyStatistics)) {}

NodeStatisticsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
const std::vector<common::offset_t>& deletedNodeOffsets,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStatistics);

NodeStatisticsAndDeletedIDs(const NodeStatisticsAndDeletedIDs& other) = default;
NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other) = default;

inline common::offset_t getMaxNodeOffset() {
return getMaxNodeOffsetFromNumTuples(getNumTuples());
Expand Down Expand Up @@ -60,6 +63,10 @@ class NodeStatisticsAndDeletedIDs : public TableStatistics {
return numTuples == 0 ? UINT64_MAX : numTuples - 1;
}

void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final;
static std::unique_ptr<NodeTableStatsAndDeletedIDs> deserialize(common::table_id_t tableID,
common::offset_t maxNodeOffset, common::FileInfo* fileInfo, uint64_t& offset);

private:
void errorIfNodeHasEdges(common::offset_t nodeOffset);

Expand Down Expand Up @@ -92,12 +99,12 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {

// Should be used only by tests;
explicit NodesStatisticsAndDeletedIDs(
std::unordered_map<common::table_id_t, std::unique_ptr<NodeStatisticsAndDeletedIDs>>&
std::unordered_map<common::table_id_t, std::unique_ptr<NodeTableStatsAndDeletedIDs>>&
nodesStatisticsAndDeletedIDs);

inline NodeStatisticsAndDeletedIDs* getNodeStatisticsAndDeletedIDs(
inline NodeTableStatsAndDeletedIDs* getNodeStatisticsAndDeletedIDs(
common::table_id_t tableID) const {
return (NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForReadOnlyTrx
return (NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForReadOnlyTrx
->tableStatisticPerTable[tableID]
.get();
}
Expand All @@ -114,7 +121,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {

inline void setNumTuplesForTable(common::table_id_t tableID, uint64_t numTuples) override {
initTableStatisticsForWriteTrx();
((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
->tableStatisticPerTable[tableID]
.get())
->setNumTuples(numTuples);
Expand All @@ -129,7 +136,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {
std::unique_lock xLck{mtx};
return tablesStatisticsContentForWriteTrx == nullptr ?
getNodeStatisticsAndDeletedIDs(tableID)->getMaxNodeOffset() :
((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
->tableStatisticPerTable[tableID]
.get())
->getMaxNodeOffset();
Expand All @@ -148,7 +155,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {
common::offset_t addNode(common::table_id_t tableID) {
lock_t lck{mtx};
initTableStatisticsForWriteTrxNoLock();
return ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
return ((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
->tableStatisticPerTable[tableID]
.get())
->addNode();
Expand All @@ -158,7 +165,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {
void deleteNode(common::table_id_t tableID, common::offset_t nodeOffset) {
lock_t lck{mtx};
initTableStatisticsForWriteTrxNoLock();
((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
((NodeTableStatsAndDeletedIDs*)tablesStatisticsContentForWriteTrx
->tableStatisticPerTable[tableID]
.get())
->deleteNode(nodeOffset);
Expand All @@ -174,33 +181,21 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics {
void addNodeStatisticsAndDeletedIDs(catalog::NodeTableSchema* tableSchema);

protected:
inline std::string getTableTypeForPrinting() const override {
return "NodesStatisticsAndDeletedIDs";
}

inline std::unique_ptr<TableStatistics> constructTableStatistic(
catalog::TableSchema* tableSchema) override {
return std::make_unique<NodeStatisticsAndDeletedIDs>(*tableSchema);
return std::make_unique<NodeTableStatsAndDeletedIDs>(*tableSchema);
}

inline std::unique_ptr<TableStatistics> constructTableStatistic(
TableStatistics* tableStatistics) override {
return std::make_unique<NodeStatisticsAndDeletedIDs>(
*(NodeStatisticsAndDeletedIDs*)tableStatistics);
return std::make_unique<NodeTableStatsAndDeletedIDs>(
*(NodeTableStatsAndDeletedIDs*)tableStatistics);
}

inline std::string getTableStatisticsFilePath(
const std::string& directory, common::DBFileType dbFileType) override {
return StorageUtils::getNodesStatisticsAndDeletedIDsFilePath(directory, dbFileType);
}

std::unique_ptr<TableStatistics> deserializeTableStatistics(uint64_t numTuples,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStats,
uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) override;

void serializeTableStatistics(
TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) override;
};

} // namespace storage
Expand Down
38 changes: 18 additions & 20 deletions src/include/storage/stats/rels_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,28 @@ namespace kuzu {
namespace storage {

class RelsStatistics;
class RelStatistics : public TableStatistics {
class RelTableStats : public TableStatistics {
friend class RelsStatistics;

public:
RelStatistics(const catalog::TableSchema& tableSchema)
RelTableStats(const catalog::TableSchema& tableSchema)
: TableStatistics{tableSchema}, nextRelOffset{0} {}
RelStatistics(uint64_t numRels,
RelTableStats(uint64_t numRels, common::table_id_t tableID, common::offset_t nextRelOffset)
: TableStatistics{common::TableType::REL, numRels, tableID, {}}, nextRelOffset{
nextRelOffset} {}
RelTableStats(uint64_t numRels, common::table_id_t tableID,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStats,
common::offset_t nextRelOffset)
: TableStatistics{numRels, std::move(propertyStats)}, nextRelOffset{nextRelOffset} {}
: TableStatistics{common::TableType::REL, numRels, tableID, std::move(propertyStats)},
nextRelOffset{nextRelOffset} {}

inline common::offset_t getNextRelOffset() const { return nextRelOffset; }

void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) final;
static std::unique_ptr<RelTableStats> deserialize(
uint64_t numRels, common::table_id_t tableID, common::FileInfo* fileInfo, uint64_t& offset);

private:
common::offset_t nextRelOffset;
};
Expand All @@ -40,18 +48,18 @@ class RelsStatistics : public TablesStatistics {
}

// Should only be used by tests.
explicit RelsStatistics(std::unordered_map<common::table_id_t, std::unique_ptr<RelStatistics>>
explicit RelsStatistics(std::unordered_map<common::table_id_t, std::unique_ptr<RelTableStats>>
relStatisticPerTable_);

static inline void saveInitialRelsStatisticsToFile(const std::string& directory) {
std::make_unique<RelsStatistics>()->saveToFile(
directory, common::DBFileType::ORIGINAL, transaction::TransactionType::READ_ONLY);
}

inline RelStatistics* getRelStatistics(common::table_id_t tableID) const {
inline RelTableStats* getRelStatistics(common::table_id_t tableID) const {
auto& tableStatisticPerTable =
tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable;
return (RelStatistics*)tableStatisticPerTable[tableID].get();
return (RelTableStats*)tableStatisticPerTable[tableID].get();
}

void setNumTuplesForTable(common::table_id_t relTableID, uint64_t numRels) override;
Expand All @@ -62,16 +70,14 @@ class RelsStatistics : public TablesStatistics {
transaction::Transaction* transaction, common::table_id_t tableID);

protected:
inline std::string getTableTypeForPrinting() const override { return "RelsStatistics"; }

inline std::unique_ptr<TableStatistics> constructTableStatistic(
catalog::TableSchema* tableSchema) override {
return std::make_unique<RelStatistics>(*tableSchema);
return std::make_unique<RelTableStats>(*tableSchema);
}

inline std::unique_ptr<TableStatistics> constructTableStatistic(
TableStatistics* tableStatistics) override {
return std::make_unique<RelStatistics>(*(RelStatistics*)tableStatistics);
return std::make_unique<RelTableStats>(*(RelTableStats*)tableStatistics);
}

inline std::string getTableStatisticsFilePath(
Expand All @@ -80,18 +86,10 @@ class RelsStatistics : public TablesStatistics {
}

inline void increaseNextRelOffset(common::table_id_t relTableID, uint64_t numTuples) {
((RelStatistics*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(relTableID)
((RelTableStats*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(relTableID)
.get())
->nextRelOffset += numTuples;
}

std::unique_ptr<TableStatistics> deserializeTableStatistics(uint64_t numTuples,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStats,
uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) override;

void serializeTableStatistics(
TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) override;
};

} // namespace storage
Expand Down
74 changes: 24 additions & 50 deletions src/include/storage/stats/table_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,37 +11,36 @@
namespace kuzu {
namespace storage {

using lock_t = std::unique_lock<std::mutex>;
using atomic_uint64_vec_t = std::vector<std::atomic<uint64_t>>;

class TableStatistics {
public:
virtual ~TableStatistics() = default;

explicit TableStatistics(const catalog::TableSchema& schema) : numTuples{0} {
explicit TableStatistics(const catalog::TableSchema& schema)
: tableType{schema.tableType}, numTuples{0}, tableID{schema.tableID} {
for (auto property : schema.getProperties()) {
propertyStatistics[property->getPropertyID()] = std::make_unique<PropertyStatistics>();
}
}

explicit TableStatistics(uint64_t numTuples,
explicit TableStatistics(common::TableType tableType, uint64_t numTuples,
common::table_id_t tableID,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStatistics)
: numTuples{numTuples}, propertyStatistics{std::move(propertyStatistics)} {
: numTuples{numTuples}, tableID{tableID}, propertyStatistics{
std::move(propertyStatistics)} {
assert(numTuples != UINT64_MAX);
}

explicit TableStatistics(const TableStatistics& other) : numTuples{other.numTuples} {
explicit TableStatistics(const TableStatistics& other)
: tableType{other.tableType}, numTuples{other.numTuples}, tableID{other.tableID} {
for (auto& propertyStats : other.propertyStatistics) {
propertyStatistics[propertyStats.first] =
std::make_unique<PropertyStatistics>(*propertyStats.second.get());
}
}

inline bool isEmpty() const { return numTuples == 0; }
virtual ~TableStatistics() = default;

Check warning on line 40 in src/include/storage/stats/table_statistics.h

View check run for this annotation

Codecov / codecov/patch

src/include/storage/stats/table_statistics.h#L40

Added line #L40 was not covered by tests

inline bool isEmpty() const { return numTuples == 0; }
inline uint64_t getNumTuples() const { return numTuples; }

virtual inline void setNumTuples(uint64_t numTuples_) {
assert(numTuples_ != UINT64_MAX);
numTuples = numTuples_;
Expand All @@ -51,19 +50,24 @@
assert(propertyStatistics.contains(propertyID));
return *(propertyStatistics.at(propertyID));
}

inline const std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&
getPropertyStatistics() {
return propertyStatistics;
}

inline void setPropertyStatistics(
common::property_id_t propertyID, PropertyStatistics newStats) {
propertyStatistics[propertyID] = std::make_unique<PropertyStatistics>(newStats);
}

void serialize(common::FileInfo* fileInfo, uint64_t& offset);
static std::unique_ptr<TableStatistics> deserialize(
common::FileInfo* fileInfo, uint64_t& offset);
virtual void serializeInternal(common::FileInfo* fileInfo, uint64_t& offset) = 0;

private:
common::TableType tableType;
uint64_t numTuples;
common::table_id_t tableID;
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>
propertyStatistics;
};
Expand All @@ -88,7 +92,7 @@
inline bool hasUpdates() { return tablesStatisticsContentForWriteTrx != nullptr; }

inline void checkpointInMemoryIfNecessary() {
lock_t lck{mtx};
std::unique_lock lck{mtx};
tablesStatisticsContentForReadOnlyTrx = std::move(tablesStatisticsContentForWriteTrx);
}

Expand All @@ -110,52 +114,22 @@
->getNumTuples();
}

inline PropertyStatistics& getPropertyStatisticsForTable(
const transaction::Transaction& transaction, common::table_id_t tableID,
common::property_id_t propertyID) {
if (transaction.isReadOnly()) {
assert(tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.contains(tableID));
auto tableStatistics =
tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable.at(tableID).get();
return tableStatistics->getPropertyStatistics(propertyID);
} else {
initTableStatisticsForWriteTrx();
assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID));
auto tableStatistics =
tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get();
return tableStatistics->getPropertyStatistics(propertyID);
}
}
PropertyStatistics& getPropertyStatisticsForTable(const transaction::Transaction& transaction,
common::table_id_t tableID, common::property_id_t propertyID);

void setPropertyStatisticsForTable(
common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats) {
initTableStatisticsForWriteTrx();
assert(tablesStatisticsContentForWriteTrx->tableStatisticPerTable.contains(tableID));
auto tableStatistics =
tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(tableID).get();
tableStatistics->setPropertyStatistics(propertyID, stats);
}
common::table_id_t tableID, common::property_id_t propertyID, PropertyStatistics stats);

protected:
virtual inline std::string getTableTypeForPrinting() const = 0;

virtual inline std::unique_ptr<TableStatistics> constructTableStatistic(
virtual std::unique_ptr<TableStatistics> constructTableStatistic(
catalog::TableSchema* tableSchema) = 0;

virtual inline std::unique_ptr<TableStatistics> constructTableStatistic(
virtual std::unique_ptr<TableStatistics> constructTableStatistic(
TableStatistics* tableStatistics) = 0;

virtual inline std::string getTableStatisticsFilePath(
virtual std::string getTableStatisticsFilePath(
const std::string& directory, common::DBFileType dbFileType) = 0;

virtual std::unique_ptr<TableStatistics> deserializeTableStatistics(uint64_t numTuples,
std::unordered_map<common::property_id_t, std::unique_ptr<PropertyStatistics>>&&
propertyStats,
uint64_t& offset, common::FileInfo* fileInfo, uint64_t tableID) = 0;

virtual void serializeTableStatistics(
TableStatistics* tableStatistics, uint64_t& offset, common::FileInfo* fileInfo) = 0;

void readFromFile(const std::string& directory);
void readFromFile(const std::string& directory, common::DBFileType dbFileType);

Expand Down
Loading