Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor node table to take in columnID instead of propertyID #2054

Merged
merged 1 commit into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/catalog/table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "catalog/rel_table_group_schema.h"
#include "catalog/rel_table_schema.h"
#include "common/constants.h"
#include "common/exception/catalog.h"
#include "common/exception/internal.h"
#include "common/exception/not_implemented.h"
#include "common/exception/runtime.h"
Expand Down Expand Up @@ -50,6 +49,16 @@ property_id_t TableSchema::getPropertyID(const std::string& propertyName) const
"Table: {} doesn't have a property with propertyName={}.", tableName, propertyName));
}

// TODO(Guodong): Instead of looping over properties, cache a map between propertyID and columnID.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes let's open an issue about this

column_id_t TableSchema::getColumnID(const property_id_t propertyID) const {
for (auto i = 0u; i < properties.size(); i++) {
if (properties[i]->getPropertyID() == propertyID) {
return i;
}
}
return INVALID_COLUMN_ID;
}

Property* TableSchema::getProperty(property_id_t propertyID) const {
for (auto& property : properties) {
if (property->getPropertyID() == propertyID) {
Expand Down
1 change: 1 addition & 0 deletions src/include/catalog/table_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class TableSchema {
std::string getPropertyName(common::property_id_t propertyID) const;

common::property_id_t getPropertyID(const std::string& propertyName) const;
common::column_id_t getColumnID(common::property_id_t propertyID) const;

Property* getProperty(common::property_id_t propertyID) const;

Expand Down
14 changes: 8 additions & 6 deletions src/include/processor/operator/mask.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ struct MaskData {
std::fill(data, data + size, 0);
}

inline void setMask(uint64_t pos, uint8_t maskValue) { data[pos] = maskValue; }
inline bool isMasked(uint64_t pos, uint8_t trueMaskVal) { return data[pos] == trueMaskVal; }
inline void setMask(uint64_t pos, uint8_t maskValue) const { data[pos] = maskValue; }
inline bool isMasked(uint64_t pos, uint8_t trueMaskVal) const {
return data[pos] == trueMaskVal;
}

private:
std::unique_ptr<uint8_t[]> dataBuffer;
Expand Down Expand Up @@ -63,7 +65,7 @@ class MaskCollection {

class NodeSemiMask {
public:
NodeSemiMask(storage::NodeTable* nodeTable) : nodeTable{nodeTable} {}
explicit NodeSemiMask(storage::NodeTable* nodeTable) : nodeTable{nodeTable} {}

virtual void init(transaction::Transaction* trx) = 0;

Expand All @@ -72,7 +74,7 @@ class NodeSemiMask {
virtual uint8_t getNumMasks() const = 0;
virtual void incrementNumMasks() = 0;

inline bool isEnabled() { return getNumMasks() > 0; }
inline bool isEnabled() const { return getNumMasks() > 0; }
inline storage::NodeTable* getNodeTable() const { return nodeTable; }

protected:
Expand All @@ -81,7 +83,7 @@ class NodeSemiMask {

class NodeOffsetSemiMask : public NodeSemiMask {
public:
NodeOffsetSemiMask(storage::NodeTable* nodeTable) : NodeSemiMask{nodeTable} {
explicit NodeOffsetSemiMask(storage::NodeTable* nodeTable) : NodeSemiMask{nodeTable} {
offsetMask = std::make_unique<MaskCollection>();
}

Expand Down Expand Up @@ -110,7 +112,7 @@ class NodeOffsetSemiMask : public NodeSemiMask {

class NodeOffsetAndMorselSemiMask : public NodeSemiMask {
public:
NodeOffsetAndMorselSemiMask(storage::NodeTable* nodeTable) : NodeSemiMask{nodeTable} {
explicit NodeOffsetAndMorselSemiMask(storage::NodeTable* nodeTable) : NodeSemiMask{nodeTable} {
offsetMask = std::make_unique<MaskCollection>();
morselMask = std::make_unique<MaskCollection>();
}
Expand Down
15 changes: 6 additions & 9 deletions src/include/processor/operator/persistent/insert_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@ class NodeInsertExecutor {
public:
NodeInsertExecutor(storage::NodeTable* table, std::vector<storage::RelTable*> relTablesToInit,
const DataPos& nodeIDVectorPos, std::vector<DataPos> propertyLhsPositions,
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators,
std::unordered_map<common::property_id_t, common::vector_idx_t> propertyIDToVectorIdx)
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators)
: table{table}, relTablesToInit{std::move(relTablesToInit)},
nodeIDVectorPos{nodeIDVectorPos}, propertyLhsPositions{std::move(propertyLhsPositions)},
propertyRhsEvaluators{std::move(propertyRhsEvaluators)}, propertyIDToVectorIdx{std::move(
propertyIDToVectorIdx)} {}
propertyRhsEvaluators{std::move(propertyRhsEvaluators)}, nodeIDVector{nullptr} {}
NodeInsertExecutor(const NodeInsertExecutor& other);

void init(ResultSet* resultSet, ExecutionContext* context);
Expand All @@ -37,8 +35,6 @@ class NodeInsertExecutor {
DataPos nodeIDVectorPos;
std::vector<DataPos> propertyLhsPositions;
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators;
// TODO(Guodong): remove this.
std::unordered_map<common::property_id_t, common::vector_idx_t> propertyIDToVectorIdx;

common::ValueVector* nodeIDVector;
std::vector<common::ValueVector*> propertyLhsVectors;
Expand All @@ -53,7 +49,8 @@ class RelInsertExecutor {
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators)
: relsStatistics{relsStatistics}, table{table}, srcNodePos{srcNodePos},
dstNodePos{dstNodePos}, propertyLhsPositions{std::move(propertyLhsPositions)},
propertyRhsEvaluators{std::move(propertyRhsEvaluators)} {}
propertyRhsEvaluators{std::move(propertyRhsEvaluators)}, srcNodeIDVector{nullptr},
dstNodeIDVector{nullptr} {}
RelInsertExecutor(const RelInsertExecutor& other);

void init(ResultSet* resultSet, ExecutionContext* context);
Expand All @@ -75,8 +72,8 @@ class RelInsertExecutor {
std::vector<DataPos> propertyLhsPositions;
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators;

common::ValueVector* srcNodeIDVector = nullptr;
common::ValueVector* dstNodeIDVector = nullptr;
common::ValueVector* srcNodeIDVector;
common::ValueVector* dstNodeIDVector;
std::vector<common::ValueVector*> propertyLhsVectors;
std::vector<common::ValueVector*> propertyRhsVectors;
};
Expand Down
2 changes: 1 addition & 1 deletion src/include/processor/operator/persistent/set_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class NodeSetExecutor {

struct NodeSetInfo {
storage::NodeTable* table;
common::property_id_t propertyID;
common::column_id_t columnID;
};

class SingleLabelNodeSetExecutor : public NodeSetExecutor {
Expand Down
13 changes: 7 additions & 6 deletions src/include/processor/operator/scan/scan_node_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,31 @@ namespace processor {
class ScanSingleNodeTable : public ScanColumns {
public:
ScanSingleNodeTable(const DataPos& inVectorPos, std::vector<DataPos> outVectorsPos,
storage::NodeTable* table, std::vector<uint32_t> propertyColumnIds,
storage::NodeTable* table, std::vector<common::column_id_t> columnIDs,
std::unique_ptr<PhysicalOperator> prevOperator, uint32_t id,
const std::string& paramsString)
: ScanColumns{inVectorPos, std::move(outVectorsPos), std::move(prevOperator), id,
paramsString},
table{table}, propertyColumnIds{std::move(propertyColumnIds)} {}
table{table}, columnIDs{std::move(columnIDs)} {}

bool getNextTuplesInternal(ExecutionContext* context) override;

inline std::unique_ptr<PhysicalOperator> clone() override {
return make_unique<ScanSingleNodeTable>(inputNodeIDVectorPos, outPropertyVectorsPos, table,
propertyColumnIds, children[0]->clone(), id, paramsString);
columnIDs, children[0]->clone(), id, paramsString);
}

private:
storage::NodeTable* table;
std::vector<uint32_t> propertyColumnIds;
std::vector<common::column_id_t> columnIDs;
};

class ScanMultiNodeTables : public ScanColumns {
public:
ScanMultiNodeTables(const DataPos& inVectorPos, std::vector<DataPos> outVectorsPos,
std::unordered_map<common::table_id_t, storage::NodeTable*> tables,
std::unordered_map<common::table_id_t, std::vector<uint32_t>> tableIDToScanColumnIds,
std::unordered_map<common::table_id_t, std::vector<common::column_id_t>>
tableIDToScanColumnIds,
std::unique_ptr<PhysicalOperator> prevOperator, uint32_t id,
const std::string& paramsString)
: ScanColumns{inVectorPos, std::move(outVectorsPos), std::move(prevOperator), id,
Expand All @@ -48,7 +49,7 @@ class ScanMultiNodeTables : public ScanColumns {

private:
std::unordered_map<common::table_id_t, storage::NodeTable*> tables;
std::unordered_map<common::table_id_t, std::vector<uint32_t>> tableIDToScanColumnIds;
std::unordered_map<common::table_id_t, std::vector<common::column_id_t>> tableIDToScanColumnIds;
};

} // namespace processor
Expand Down
2 changes: 1 addition & 1 deletion src/include/processor/plan_mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class PlanMapper {

std::unique_ptr<NodeInsertExecutor> getNodeInsertExecutor(storage::NodesStore* nodesStore,
storage::RelsStore* relsStore, planner::LogicalCreateNodeInfo* info,
const planner::Schema& inSchema, const planner::Schema& outSchema);
const planner::Schema& inSchema, const planner::Schema& outSchema) const;
std::unique_ptr<RelInsertExecutor> getRelInsertExecutor(storage::RelsStore* relsStore,
planner::LogicalCreateRelInfo* info, const planner::Schema& inSchema,
const planner::Schema& outSchema);
Expand Down
7 changes: 4 additions & 3 deletions src/include/storage/copier/node_group.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ class NodeGroup {
inline void setNodeGroupIdx(uint64_t nodeGroupIdx_) { this->nodeGroupIdx = nodeGroupIdx_; }
inline uint64_t getNodeGroupIdx() const { return nodeGroupIdx; }
inline common::offset_t getNumNodes() const { return numNodes; }
inline ColumnChunk* getColumnChunk(common::property_id_t propertyID) {
return chunks.contains(propertyID) ? chunks.at(propertyID).get() : nullptr;
inline ColumnChunk* getColumnChunk(common::column_id_t columnID) {
assert(columnID < chunks.size());
return chunks[columnID].get();
}
inline bool isFull() const { return numNodes == common::StorageConstants::NODE_GROUP_SIZE; }

Expand All @@ -33,7 +34,7 @@ class NodeGroup {
private:
uint64_t nodeGroupIdx;
common::offset_t numNodes;
std::unordered_map<common::property_id_t, std::unique_ptr<ColumnChunk>> chunks;
std::vector<std::unique_ptr<ColumnChunk>> chunks;
};

} // namespace storage
Expand Down
4 changes: 2 additions & 2 deletions src/include/storage/local_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ class LocalStorage {
void lookup(common::table_id_t tableID, common::ValueVector* nodeIDVector,
const std::vector<common::column_id_t>& columnIDs,
const std::vector<common::ValueVector*>& outputVectors);
void update(common::table_id_t tableID, common::property_id_t propertyID,
void update(common::table_id_t tableID, common::column_id_t columnID,
common::ValueVector* nodeIDVector, common::ValueVector* propertyVector);
void update(common::table_id_t tableID, common::property_id_t propertyID,
void update(common::table_id_t tableID, common::column_id_t columnID,
common::offset_t nodeOffset, common::ValueVector* propertyVector,
common::sel_t posInPropertyVector);

Expand Down
6 changes: 3 additions & 3 deletions src/include/storage/local_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,15 @@ class LocalTable {
void lookup(common::ValueVector* nodeIDVector,
const std::vector<common::column_id_t>& columnIDs,
const std::vector<common::ValueVector*>& outputVectors);
void update(common::property_id_t propertyID, common::ValueVector* nodeIDVector,
void update(common::column_id_t columnID, common::ValueVector* nodeIDVector,
common::ValueVector* propertyVector, MemoryManager* mm);
void update(common::property_id_t propertyID, common::offset_t nodeOffset,
void update(common::column_id_t columnID, common::offset_t nodeOffset,
common::ValueVector* propertyVector, common::sel_t posInPropertyVector, MemoryManager* mm);

void prepareCommit();

private:
std::map<common::property_id_t, std::unique_ptr<LocalColumn>> columns;
std::map<common::column_id_t, std::unique_ptr<LocalColumn>> columns;
NodeTable* table;
};

Expand Down
35 changes: 17 additions & 18 deletions src/include/storage/store/node_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,35 +39,34 @@ class NodeTable {
inline BMFileHandle* getDataFH() const { return dataFH; }

void read(transaction::Transaction* transaction, common::ValueVector* nodeIDVector,
const std::vector<common::column_id_t>& columnIds,
const std::vector<common::column_id_t>& columnIDs,
const std::vector<common::ValueVector*>& outputVectors);
void insert(transaction::Transaction* transaction, common::ValueVector* nodeIDVector,
const std::vector<common::ValueVector*>& propertyVectors,
const std::unordered_map<common::property_id_t, common::vector_idx_t>&
propertyIDToVectorIdx);
void update(transaction::Transaction* transaction, common::property_id_t propertyID,
const std::vector<common::ValueVector*>& propertyVectors);
void update(transaction::Transaction* transaction, common::column_id_t columnID,
common::ValueVector* nodeIDVector, common::ValueVector* propertyVector);
void update(transaction::Transaction* transaction, common::property_id_t propertyID,
void update(transaction::Transaction* transaction, common::column_id_t columnID,
common::offset_t nodeOffset, common::ValueVector* propertyVector,
common::sel_t posInPropertyVector);
common::sel_t posInPropertyVector) const;
void delete_(transaction::Transaction* transaction, common::ValueVector* nodeIDVector,
DeleteState* deleteState);
void append(NodeGroup* nodeGroup);

std::unordered_set<common::property_id_t> getPropertyIDs() const;

inline NodeColumn* getPropertyColumn(common::property_id_t propertyIdx) {
assert(propertyColumns.contains(propertyIdx));
return propertyColumns.at(propertyIdx).get();
inline common::column_id_t getNumColumns() const { return columns.size(); }
inline NodeColumn* getColumn(common::column_id_t columnID) {
assert(columnID < columns.size());
return columns[columnID].get();
}
inline common::column_id_t getPKColumnID() const { return pkColumnID; }
inline PrimaryKeyIndex* getPKIndex() const { return pkIndex.get(); }
inline NodesStatisticsAndDeletedIDs* getNodeStatisticsAndDeletedIDs() const {
return nodesStatisticsAndDeletedIDs;
}
inline common::property_id_t getPKPropertyID() const { return pkPropertyID; }
inline common::table_id_t getTableID() const { return tableID; }

inline void dropColumn(common::property_id_t propertyID) { propertyColumns.erase(propertyID); }
inline void dropColumn(common::column_id_t columnID) {
columns.erase(columns.begin() + columnID);
}
void addColumn(const catalog::Property& property, common::ValueVector* defaultValueVector,
transaction::Transaction* transaction);

Expand All @@ -89,16 +88,16 @@ class NodeTable {

void insertPK(common::ValueVector* nodeIDVector, common::ValueVector* primaryKeyVector);
inline uint64_t getNumNodeGroups(transaction::Transaction* transaction) const {
return propertyColumns.begin()->second->getNumNodeGroups(transaction);
assert(!columns.empty());
return columns[0]->getNumNodeGroups(transaction);
}

private:
NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs;
// TODO(Guodong): use vector here
std::map<common::property_id_t, std::unique_ptr<NodeColumn>> propertyColumns;
std::vector<std::unique_ptr<NodeColumn>> columns;
BMFileHandle* dataFH;
BMFileHandle* metadataFH;
common::property_id_t pkPropertyID;
common::column_id_t pkColumnID;
std::unique_ptr<PrimaryKeyIndex> pkIndex;
common::table_id_t tableID;
BufferManager& bufferManager;
Expand Down
2 changes: 1 addition & 1 deletion src/include/storage/store/nodes_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class NodesStore {

inline NodeColumn* getNodePropertyColumn(
common::table_id_t tableID, uint64_t propertyIdx) const {
return nodeTables.at(tableID)->getPropertyColumn(propertyIdx);
return nodeTables.at(tableID)->getColumn(propertyIdx);
}
inline PrimaryKeyIndex* getPKIndex(common::table_id_t tableID) {
return nodeTables[tableID]->getPKIndex();
Expand Down
2 changes: 1 addition & 1 deletion src/main/storage_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ void StorageDriver::scan(const std::string& nodeName, const std::string& propert
auto nodeTableID = catalogContent->getTableID(nodeName);
auto propertyID = catalogContent->getTableSchema(nodeTableID)->getPropertyID(propertyName);
auto nodeTable = storageManager->getNodesStore().getNodeTable(nodeTableID);
auto column = nodeTable->getPropertyColumn(propertyID);
auto column = nodeTable->getColumn(propertyID);
auto current_buffer = result;
std::vector<std::thread> threads;
auto numElementsPerThread = size / numThreads + 1;
Expand Down
10 changes: 3 additions & 7 deletions src/processor/map/map_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ static std::vector<DataPos> populateLhsVectorPositions(
std::unique_ptr<NodeInsertExecutor> PlanMapper::getNodeInsertExecutor(
storage::NodesStore* nodesStore, storage::RelsStore* relsStore,
planner::LogicalCreateNodeInfo* info, const planner::Schema& inSchema,
const planner::Schema& outSchema) {
const planner::Schema& outSchema) const {
auto node = info->node;
auto nodeTableID = node->getSingleTableID();
auto table = nodesStore->getNodeTable(nodeTableID);
Expand All @@ -41,15 +41,11 @@ std::unique_ptr<NodeInsertExecutor> PlanMapper::getNodeInsertExecutor(
auto nodeIDPos = DataPos(outSchema.getExpressionPos(*node->getInternalIDProperty()));
std::vector<DataPos> lhsVectorPositions = populateLhsVectorPositions(info->setItems, outSchema);
std::vector<std::unique_ptr<ExpressionEvaluator>> evaluators;
std::unordered_map<common::property_id_t, common::vector_idx_t> propertyIDToVectorIdx;
for (auto i = 0u; i < info->setItems.size(); ++i) {
auto& [lhs, rhs] = info->setItems[i];
auto propertyExpression = (binder::PropertyExpression*)lhs.get();
for (auto& [_, rhs] : info->setItems) {
evaluators.push_back(ExpressionMapper::getEvaluator(rhs, &inSchema));
propertyIDToVectorIdx.insert({propertyExpression->getPropertyID(nodeTableID), i});
}
return std::make_unique<NodeInsertExecutor>(table, std::move(relTablesToInit), nodeIDPos,
std::move(lhsVectorPositions), std::move(evaluators), std::move(propertyIDToVectorIdx));
std::move(lhsVectorPositions), std::move(evaluators));
}

std::unique_ptr<PhysicalOperator> PlanMapper::mapCreateNode(LogicalOperator* logicalOperator) {
Expand Down
13 changes: 8 additions & 5 deletions src/processor/map/map_scan_node_property.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapScanNodeProperty(
outVectorsPos.emplace_back(outSchema->getExpressionPos(*expression));
}
if (node->isMultiLabeled()) {
std::unordered_map<table_id_t, std::vector<uint32_t>> tableIDToColumns;
std::unordered_map<table_id_t, std::vector<column_id_t>> tableIDToColumns;
std::unordered_map<table_id_t, storage::NodeTable*> tables;
for (auto& tableID : node->getTableIDs()) {
tables.insert({tableID, nodeStore.getNodeTable(tableID)});
Expand All @@ -33,7 +33,9 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapScanNodeProperty(
if (!property->hasPropertyID(tableID)) {
columns.push_back(UINT32_MAX);
} else {
columns.push_back(property->getPropertyID(tableID));
columns.push_back(
catalog->getReadOnlyVersion()->getTableSchema(tableID)->getColumnID(
property->getPropertyID(tableID)));
}
}
tableIDToColumns.insert({tableID, std::move(columns)});
Expand All @@ -43,13 +45,14 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapScanNodeProperty(
getOperatorID(), scanProperty.getExpressionsForPrinting());
} else {
auto tableID = node->getSingleTableID();
std::vector<uint32_t> columnIds;
auto tableSchema = catalog->getReadOnlyVersion()->getTableSchema(tableID);
std::vector<column_id_t> columnIDs;
for (auto& expression : scanProperty.getProperties()) {
auto property = static_pointer_cast<PropertyExpression>(expression);
columnIds.push_back(property->getPropertyID(tableID));
columnIDs.push_back(tableSchema->getColumnID(property->getPropertyID(tableID)));
}
return std::make_unique<ScanSingleNodeTable>(inputNodeIDVectorPos, std::move(outVectorsPos),
nodeStore.getNodeTable(tableID), std::move(columnIds), std::move(prevOperator),
nodeStore.getNodeTable(tableID), std::move(columnIDs), std::move(prevOperator),
getOperatorID(), scanProperty.getExpressionsForPrinting());
}
}
Expand Down
Loading