Skip to content

Commit

Permalink
refactor node table to take in columnID instead of propertyID
Browse files Browse the repository at this point in the history
  • Loading branch information
ray6080 committed Sep 19, 2023
1 parent fe850a7 commit 2c20885
Show file tree
Hide file tree
Showing 26 changed files with 152 additions and 145 deletions.
11 changes: 10 additions & 1 deletion src/catalog/table_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "catalog/rel_table_group_schema.h"
#include "catalog/rel_table_schema.h"
#include "common/constants.h"
#include "common/exception/catalog.h"
#include "common/exception/internal.h"
#include "common/exception/not_implemented.h"
#include "common/exception/runtime.h"
Expand Down Expand Up @@ -50,6 +49,16 @@ property_id_t TableSchema::getPropertyID(const std::string& propertyName) const
"Table: {} doesn't have a property with propertyName={}.", tableName, propertyName));
}

// TODO(Guodong): Instead of looping over properties, cache a map between propertyID and columnID.
column_id_t TableSchema::getColumnID(const property_id_t propertyID) const {
for (auto i = 0u; i < properties.size(); i++) {
if (properties[i]->getPropertyID() == propertyID) {
return i;
}
}
return INVALID_COLUMN_ID;
}

Property* TableSchema::getProperty(property_id_t propertyID) const {
for (auto& property : properties) {
if (property->getPropertyID() == propertyID) {
Expand Down
1 change: 1 addition & 0 deletions src/include/catalog/table_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class TableSchema {
std::string getPropertyName(common::property_id_t propertyID) const;

common::property_id_t getPropertyID(const std::string& propertyName) const;
common::column_id_t getColumnID(common::property_id_t propertyID) const;

Property* getProperty(common::property_id_t propertyID) const;

Expand Down
14 changes: 8 additions & 6 deletions src/include/processor/operator/mask.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ struct MaskData {
std::fill(data, data + size, 0);
}

inline void setMask(uint64_t pos, uint8_t maskValue) { data[pos] = maskValue; }
inline bool isMasked(uint64_t pos, uint8_t trueMaskVal) { return data[pos] == trueMaskVal; }
inline void setMask(uint64_t pos, uint8_t maskValue) const { data[pos] = maskValue; }
inline bool isMasked(uint64_t pos, uint8_t trueMaskVal) const {
return data[pos] == trueMaskVal;
}

private:
std::unique_ptr<uint8_t[]> dataBuffer;
Expand Down Expand Up @@ -63,7 +65,7 @@ class MaskCollection {

class NodeSemiMask {
public:
NodeSemiMask(storage::NodeTable* nodeTable) : nodeTable{nodeTable} {}
explicit NodeSemiMask(storage::NodeTable* nodeTable) : nodeTable{nodeTable} {}

virtual void init(transaction::Transaction* trx) = 0;

Expand All @@ -72,7 +74,7 @@ class NodeSemiMask {
virtual uint8_t getNumMasks() const = 0;
virtual void incrementNumMasks() = 0;

inline bool isEnabled() { return getNumMasks() > 0; }
inline bool isEnabled() const { return getNumMasks() > 0; }
inline storage::NodeTable* getNodeTable() const { return nodeTable; }

protected:
Expand All @@ -81,7 +83,7 @@ class NodeSemiMask {

class NodeOffsetSemiMask : public NodeSemiMask {
public:
NodeOffsetSemiMask(storage::NodeTable* nodeTable) : NodeSemiMask{nodeTable} {
explicit NodeOffsetSemiMask(storage::NodeTable* nodeTable) : NodeSemiMask{nodeTable} {
offsetMask = std::make_unique<MaskCollection>();
}

Expand Down Expand Up @@ -110,7 +112,7 @@ class NodeOffsetSemiMask : public NodeSemiMask {

class NodeOffsetAndMorselSemiMask : public NodeSemiMask {
public:
NodeOffsetAndMorselSemiMask(storage::NodeTable* nodeTable) : NodeSemiMask{nodeTable} {
explicit NodeOffsetAndMorselSemiMask(storage::NodeTable* nodeTable) : NodeSemiMask{nodeTable} {
offsetMask = std::make_unique<MaskCollection>();
morselMask = std::make_unique<MaskCollection>();
}
Expand Down
15 changes: 6 additions & 9 deletions src/include/processor/operator/persistent/insert_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,10 @@ class NodeInsertExecutor {
public:
NodeInsertExecutor(storage::NodeTable* table, std::vector<storage::RelTable*> relTablesToInit,
const DataPos& nodeIDVectorPos, std::vector<DataPos> propertyLhsPositions,
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators,
std::unordered_map<common::property_id_t, common::vector_idx_t> propertyIDToVectorIdx)
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators)
: table{table}, relTablesToInit{std::move(relTablesToInit)},
nodeIDVectorPos{nodeIDVectorPos}, propertyLhsPositions{std::move(propertyLhsPositions)},
propertyRhsEvaluators{std::move(propertyRhsEvaluators)}, propertyIDToVectorIdx{std::move(
propertyIDToVectorIdx)} {}
propertyRhsEvaluators{std::move(propertyRhsEvaluators)}, nodeIDVector{nullptr} {}
NodeInsertExecutor(const NodeInsertExecutor& other);

void init(ResultSet* resultSet, ExecutionContext* context);
Expand All @@ -37,8 +35,6 @@ class NodeInsertExecutor {
DataPos nodeIDVectorPos;
std::vector<DataPos> propertyLhsPositions;
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators;
// TODO(Guodong): remove this.
std::unordered_map<common::property_id_t, common::vector_idx_t> propertyIDToVectorIdx;

common::ValueVector* nodeIDVector;
std::vector<common::ValueVector*> propertyLhsVectors;
Expand All @@ -53,7 +49,8 @@ class RelInsertExecutor {
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators)
: relsStatistics{relsStatistics}, table{table}, srcNodePos{srcNodePos},
dstNodePos{dstNodePos}, propertyLhsPositions{std::move(propertyLhsPositions)},
propertyRhsEvaluators{std::move(propertyRhsEvaluators)} {}
propertyRhsEvaluators{std::move(propertyRhsEvaluators)}, srcNodeIDVector{nullptr},
dstNodeIDVector{nullptr} {}
RelInsertExecutor(const RelInsertExecutor& other);

void init(ResultSet* resultSet, ExecutionContext* context);
Expand All @@ -75,8 +72,8 @@ class RelInsertExecutor {
std::vector<DataPos> propertyLhsPositions;
std::vector<std::unique_ptr<evaluator::ExpressionEvaluator>> propertyRhsEvaluators;

common::ValueVector* srcNodeIDVector = nullptr;
common::ValueVector* dstNodeIDVector = nullptr;
common::ValueVector* srcNodeIDVector;
common::ValueVector* dstNodeIDVector;
std::vector<common::ValueVector*> propertyLhsVectors;
std::vector<common::ValueVector*> propertyRhsVectors;
};
Expand Down
2 changes: 1 addition & 1 deletion src/include/processor/operator/persistent/set_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class NodeSetExecutor {

struct NodeSetInfo {
storage::NodeTable* table;
common::property_id_t propertyID;
common::column_id_t columnID;
};

class SingleLabelNodeSetExecutor : public NodeSetExecutor {
Expand Down
13 changes: 7 additions & 6 deletions src/include/processor/operator/scan/scan_node_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,31 @@ namespace processor {
class ScanSingleNodeTable : public ScanColumns {
public:
ScanSingleNodeTable(const DataPos& inVectorPos, std::vector<DataPos> outVectorsPos,
storage::NodeTable* table, std::vector<uint32_t> propertyColumnIds,
storage::NodeTable* table, std::vector<common::column_id_t> columnIDs,
std::unique_ptr<PhysicalOperator> prevOperator, uint32_t id,
const std::string& paramsString)
: ScanColumns{inVectorPos, std::move(outVectorsPos), std::move(prevOperator), id,
paramsString},
table{table}, propertyColumnIds{std::move(propertyColumnIds)} {}
table{table}, columnIDs{std::move(columnIDs)} {}

bool getNextTuplesInternal(ExecutionContext* context) override;

inline std::unique_ptr<PhysicalOperator> clone() override {
return make_unique<ScanSingleNodeTable>(inputNodeIDVectorPos, outPropertyVectorsPos, table,
propertyColumnIds, children[0]->clone(), id, paramsString);
columnIDs, children[0]->clone(), id, paramsString);
}

private:
storage::NodeTable* table;
std::vector<uint32_t> propertyColumnIds;
std::vector<common::column_id_t> columnIDs;
};

class ScanMultiNodeTables : public ScanColumns {
public:
ScanMultiNodeTables(const DataPos& inVectorPos, std::vector<DataPos> outVectorsPos,
std::unordered_map<common::table_id_t, storage::NodeTable*> tables,
std::unordered_map<common::table_id_t, std::vector<uint32_t>> tableIDToScanColumnIds,
std::unordered_map<common::table_id_t, std::vector<common::column_id_t>>
tableIDToScanColumnIds,
std::unique_ptr<PhysicalOperator> prevOperator, uint32_t id,
const std::string& paramsString)
: ScanColumns{inVectorPos, std::move(outVectorsPos), std::move(prevOperator), id,
Expand All @@ -48,7 +49,7 @@ class ScanMultiNodeTables : public ScanColumns {

private:
std::unordered_map<common::table_id_t, storage::NodeTable*> tables;
std::unordered_map<common::table_id_t, std::vector<uint32_t>> tableIDToScanColumnIds;
std::unordered_map<common::table_id_t, std::vector<common::column_id_t>> tableIDToScanColumnIds;
};

} // namespace processor
Expand Down
2 changes: 1 addition & 1 deletion src/include/processor/plan_mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class PlanMapper {

std::unique_ptr<NodeInsertExecutor> getNodeInsertExecutor(storage::NodesStore* nodesStore,
storage::RelsStore* relsStore, planner::LogicalCreateNodeInfo* info,
const planner::Schema& inSchema, const planner::Schema& outSchema);
const planner::Schema& inSchema, const planner::Schema& outSchema) const;
std::unique_ptr<RelInsertExecutor> getRelInsertExecutor(storage::RelsStore* relsStore,
planner::LogicalCreateRelInfo* info, const planner::Schema& inSchema,
const planner::Schema& outSchema);
Expand Down
7 changes: 4 additions & 3 deletions src/include/storage/copier/node_group.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ class NodeGroup {
inline void setNodeGroupIdx(uint64_t nodeGroupIdx_) { this->nodeGroupIdx = nodeGroupIdx_; }
inline uint64_t getNodeGroupIdx() const { return nodeGroupIdx; }
inline common::offset_t getNumNodes() const { return numNodes; }
inline ColumnChunk* getColumnChunk(common::property_id_t propertyID) {
return chunks.contains(propertyID) ? chunks.at(propertyID).get() : nullptr;
inline ColumnChunk* getColumnChunk(common::column_id_t columnID) {
assert(columnID < chunks.size());
return chunks[columnID].get();
}
inline bool isFull() const { return numNodes == common::StorageConstants::NODE_GROUP_SIZE; }

Expand All @@ -33,7 +34,7 @@ class NodeGroup {
private:
uint64_t nodeGroupIdx;
common::offset_t numNodes;
std::unordered_map<common::property_id_t, std::unique_ptr<ColumnChunk>> chunks;
std::vector<std::unique_ptr<ColumnChunk>> chunks;
};

} // namespace storage
Expand Down
4 changes: 2 additions & 2 deletions src/include/storage/local_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ class LocalStorage {
void lookup(common::table_id_t tableID, common::ValueVector* nodeIDVector,
const std::vector<common::column_id_t>& columnIDs,
const std::vector<common::ValueVector*>& outputVectors);
void update(common::table_id_t tableID, common::property_id_t propertyID,
void update(common::table_id_t tableID, common::column_id_t columnID,
common::ValueVector* nodeIDVector, common::ValueVector* propertyVector);
void update(common::table_id_t tableID, common::property_id_t propertyID,
void update(common::table_id_t tableID, common::column_id_t columnID,
common::offset_t nodeOffset, common::ValueVector* propertyVector,
common::sel_t posInPropertyVector);

Expand Down
6 changes: 3 additions & 3 deletions src/include/storage/local_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,15 @@ class LocalTable {
void lookup(common::ValueVector* nodeIDVector,
const std::vector<common::column_id_t>& columnIDs,
const std::vector<common::ValueVector*>& outputVectors);
void update(common::property_id_t propertyID, common::ValueVector* nodeIDVector,
void update(common::column_id_t columnID, common::ValueVector* nodeIDVector,
common::ValueVector* propertyVector, MemoryManager* mm);
void update(common::property_id_t propertyID, common::offset_t nodeOffset,
void update(common::column_id_t columnID, common::offset_t nodeOffset,
common::ValueVector* propertyVector, common::sel_t posInPropertyVector, MemoryManager* mm);

void prepareCommit();

private:
std::map<common::property_id_t, std::unique_ptr<LocalColumn>> columns;
std::map<common::column_id_t, std::unique_ptr<LocalColumn>> columns;
NodeTable* table;
};

Expand Down
35 changes: 17 additions & 18 deletions src/include/storage/store/node_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,35 +39,34 @@ class NodeTable {
inline BMFileHandle* getDataFH() const { return dataFH; }

void read(transaction::Transaction* transaction, common::ValueVector* nodeIDVector,
const std::vector<common::column_id_t>& columnIds,
const std::vector<common::column_id_t>& columnIDs,
const std::vector<common::ValueVector*>& outputVectors);
void insert(transaction::Transaction* transaction, common::ValueVector* nodeIDVector,
const std::vector<common::ValueVector*>& propertyVectors,
const std::unordered_map<common::property_id_t, common::vector_idx_t>&
propertyIDToVectorIdx);
void update(transaction::Transaction* transaction, common::property_id_t propertyID,
const std::vector<common::ValueVector*>& propertyVectors);
void update(transaction::Transaction* transaction, common::column_id_t columnID,
common::ValueVector* nodeIDVector, common::ValueVector* propertyVector);
void update(transaction::Transaction* transaction, common::property_id_t propertyID,
void update(transaction::Transaction* transaction, common::column_id_t columnID,
common::offset_t nodeOffset, common::ValueVector* propertyVector,
common::sel_t posInPropertyVector);
common::sel_t posInPropertyVector) const;
void delete_(transaction::Transaction* transaction, common::ValueVector* nodeIDVector,
DeleteState* deleteState);
void append(NodeGroup* nodeGroup);

std::unordered_set<common::property_id_t> getPropertyIDs() const;

inline NodeColumn* getPropertyColumn(common::property_id_t propertyIdx) {
assert(propertyColumns.contains(propertyIdx));
return propertyColumns.at(propertyIdx).get();
inline common::column_id_t getNumColumns() const { return columns.size(); }
inline NodeColumn* getColumn(common::column_id_t columnID) {
assert(columnID < columns.size());
return columns[columnID].get();
}
inline common::column_id_t getPKColumnID() const { return pkColumnID; }
inline PrimaryKeyIndex* getPKIndex() const { return pkIndex.get(); }
inline NodesStatisticsAndDeletedIDs* getNodeStatisticsAndDeletedIDs() const {
return nodesStatisticsAndDeletedIDs;
}
inline common::property_id_t getPKPropertyID() const { return pkPropertyID; }
inline common::table_id_t getTableID() const { return tableID; }

inline void dropColumn(common::property_id_t propertyID) { propertyColumns.erase(propertyID); }
inline void dropColumn(common::column_id_t columnID) {
columns.erase(columns.begin() + columnID);
}
void addColumn(const catalog::Property& property, common::ValueVector* defaultValueVector,
transaction::Transaction* transaction);

Expand All @@ -89,16 +88,16 @@ class NodeTable {

void insertPK(common::ValueVector* nodeIDVector, common::ValueVector* primaryKeyVector);
inline uint64_t getNumNodeGroups(transaction::Transaction* transaction) const {
return propertyColumns.begin()->second->getNumNodeGroups(transaction);
assert(!columns.empty());
return columns[0]->getNumNodeGroups(transaction);
}

private:
NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs;
// TODO(Guodong): use vector here
std::map<common::property_id_t, std::unique_ptr<NodeColumn>> propertyColumns;
std::vector<std::unique_ptr<NodeColumn>> columns;
BMFileHandle* dataFH;
BMFileHandle* metadataFH;
common::property_id_t pkPropertyID;
common::column_id_t pkColumnID;
std::unique_ptr<PrimaryKeyIndex> pkIndex;
common::table_id_t tableID;
BufferManager& bufferManager;
Expand Down
2 changes: 1 addition & 1 deletion src/include/storage/store/nodes_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class NodesStore {

inline NodeColumn* getNodePropertyColumn(
common::table_id_t tableID, uint64_t propertyIdx) const {
return nodeTables.at(tableID)->getPropertyColumn(propertyIdx);
return nodeTables.at(tableID)->getColumn(propertyIdx);
}
inline PrimaryKeyIndex* getPKIndex(common::table_id_t tableID) {
return nodeTables[tableID]->getPKIndex();
Expand Down
2 changes: 1 addition & 1 deletion src/main/storage_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ void StorageDriver::scan(const std::string& nodeName, const std::string& propert
auto nodeTableID = catalogContent->getTableID(nodeName);
auto propertyID = catalogContent->getTableSchema(nodeTableID)->getPropertyID(propertyName);
auto nodeTable = storageManager->getNodesStore().getNodeTable(nodeTableID);
auto column = nodeTable->getPropertyColumn(propertyID);
auto column = nodeTable->getColumn(propertyID);
auto current_buffer = result;
std::vector<std::thread> threads;
auto numElementsPerThread = size / numThreads + 1;
Expand Down
10 changes: 3 additions & 7 deletions src/processor/map/map_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ static std::vector<DataPos> populateLhsVectorPositions(
std::unique_ptr<NodeInsertExecutor> PlanMapper::getNodeInsertExecutor(
storage::NodesStore* nodesStore, storage::RelsStore* relsStore,
planner::LogicalCreateNodeInfo* info, const planner::Schema& inSchema,
const planner::Schema& outSchema) {
const planner::Schema& outSchema) const {
auto node = info->node;
auto nodeTableID = node->getSingleTableID();
auto table = nodesStore->getNodeTable(nodeTableID);
Expand All @@ -41,15 +41,11 @@ std::unique_ptr<NodeInsertExecutor> PlanMapper::getNodeInsertExecutor(
auto nodeIDPos = DataPos(outSchema.getExpressionPos(*node->getInternalIDProperty()));
std::vector<DataPos> lhsVectorPositions = populateLhsVectorPositions(info->setItems, outSchema);
std::vector<std::unique_ptr<ExpressionEvaluator>> evaluators;
std::unordered_map<common::property_id_t, common::vector_idx_t> propertyIDToVectorIdx;
for (auto i = 0u; i < info->setItems.size(); ++i) {
auto& [lhs, rhs] = info->setItems[i];
auto propertyExpression = (binder::PropertyExpression*)lhs.get();
for (auto& [_, rhs] : info->setItems) {
evaluators.push_back(ExpressionMapper::getEvaluator(rhs, &inSchema));
propertyIDToVectorIdx.insert({propertyExpression->getPropertyID(nodeTableID), i});
}
return std::make_unique<NodeInsertExecutor>(table, std::move(relTablesToInit), nodeIDPos,
std::move(lhsVectorPositions), std::move(evaluators), std::move(propertyIDToVectorIdx));
std::move(lhsVectorPositions), std::move(evaluators));
}

std::unique_ptr<PhysicalOperator> PlanMapper::mapCreateNode(LogicalOperator* logicalOperator) {
Expand Down
13 changes: 8 additions & 5 deletions src/processor/map/map_scan_node_property.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapScanNodeProperty(
outVectorsPos.emplace_back(outSchema->getExpressionPos(*expression));
}
if (node->isMultiLabeled()) {
std::unordered_map<table_id_t, std::vector<uint32_t>> tableIDToColumns;
std::unordered_map<table_id_t, std::vector<column_id_t>> tableIDToColumns;
std::unordered_map<table_id_t, storage::NodeTable*> tables;
for (auto& tableID : node->getTableIDs()) {
tables.insert({tableID, nodeStore.getNodeTable(tableID)});
Expand All @@ -33,7 +33,9 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapScanNodeProperty(
if (!property->hasPropertyID(tableID)) {
columns.push_back(UINT32_MAX);
} else {
columns.push_back(property->getPropertyID(tableID));
columns.push_back(
catalog->getReadOnlyVersion()->getTableSchema(tableID)->getColumnID(
property->getPropertyID(tableID)));
}
}
tableIDToColumns.insert({tableID, std::move(columns)});
Expand All @@ -43,13 +45,14 @@ std::unique_ptr<PhysicalOperator> PlanMapper::mapScanNodeProperty(
getOperatorID(), scanProperty.getExpressionsForPrinting());
} else {
auto tableID = node->getSingleTableID();
std::vector<uint32_t> columnIds;
auto tableSchema = catalog->getReadOnlyVersion()->getTableSchema(tableID);
std::vector<column_id_t> columnIDs;
for (auto& expression : scanProperty.getProperties()) {
auto property = static_pointer_cast<PropertyExpression>(expression);
columnIds.push_back(property->getPropertyID(tableID));
columnIDs.push_back(tableSchema->getColumnID(property->getPropertyID(tableID)));
}
return std::make_unique<ScanSingleNodeTable>(inputNodeIDVectorPos, std::move(outVectorsPos),
nodeStore.getNodeTable(tableID), std::move(columnIds), std::move(prevOperator),
nodeStore.getNodeTable(tableID), std::move(columnIDs), std::move(prevOperator),
getOperatorID(), scanProperty.getExpressionsForPrinting());
}
}
Expand Down
Loading

0 comments on commit 2c20885

Please sign in to comment.