diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 61a9ac4f2d..194c6ccf4c 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -50,7 +50,7 @@ uint64_t SerDeser::deserializeValue( if (value.typeID == LIST) { auto childDataType = make_unique(); offset = SerDeser::deserializeValue(*childDataType, fileInfo, offset); - value.childType = move(childDataType); + value.childType = std::move(childDataType); return offset; } return offset; @@ -129,7 +129,7 @@ uint64_t SerDeser::serializeValue( offset = SerDeser::serializeValue(value.tableName, fileInfo, offset); offset = SerDeser::serializeValue(value.tableID, fileInfo, offset); offset = SerDeser::serializeValue(value.primaryKeyPropertyIdx, fileInfo, offset); - offset = SerDeser::serializeVector(value.structuredProperties, fileInfo, offset); + offset = SerDeser::serializeVector(value.properties, fileInfo, offset); offset = SerDeser::serializeUnorderedSet(value.fwdRelTableIDSet, fileInfo, offset); return SerDeser::serializeUnorderedSet(value.bwdRelTableIDSet, fileInfo, offset); } @@ -140,7 +140,7 @@ uint64_t SerDeser::deserializeValue( offset = SerDeser::deserializeValue(value.tableName, fileInfo, offset); offset = SerDeser::deserializeValue(value.tableID, fileInfo, offset); offset = SerDeser::deserializeValue(value.primaryKeyPropertyIdx, fileInfo, offset); - offset = SerDeser::deserializeVector(value.structuredProperties, fileInfo, offset); + offset = SerDeser::deserializeVector(value.properties, fileInfo, offset); offset = SerDeser::deserializeUnorderedSet(value.fwdRelTableIDSet, fileInfo, offset); return SerDeser::deserializeUnorderedSet(value.bwdRelTableIDSet, fileInfo, offset); @@ -188,59 +188,58 @@ CatalogContent::CatalogContent(const string& directory) { CatalogContent::CatalogContent(const CatalogContent& other) { for (auto& nodeTableSchema : other.nodeTableSchemas) { auto newNodeTableSchema = make_unique(*nodeTableSchema.second); - nodeTableSchemas[newNodeTableSchema->tableID] = move(newNodeTableSchema); + nodeTableSchemas[newNodeTableSchema->tableID] = std::move(newNodeTableSchema); } for (auto& relTableSchema : other.relTableSchemas) { auto newRelTableSchema = make_unique(*relTableSchema.second); - relTableSchemas[newRelTableSchema->tableID] = move(newRelTableSchema); + relTableSchemas[newRelTableSchema->tableID] = std::move(newRelTableSchema); } nodeTableNameToIDMap = other.nodeTableNameToIDMap; relTableNameToIDMap = other.relTableNameToIDMap; nextTableID = other.nextTableID; } -table_id_t CatalogContent::addNodeTableSchema(string tableName, uint32_t primaryKeyIdx, - vector structuredPropertyDefinitions) { +table_id_t CatalogContent::addNodeTableSchema( + string tableName, uint32_t primaryKeyIdx, vector propertyDefinitions) { table_id_t tableID = assignNextTableID(); - vector structuredProperties; - for (auto i = 0u; i < structuredPropertyDefinitions.size(); ++i) { - auto& propertyDefinition = structuredPropertyDefinitions[i]; - structuredProperties.push_back( - Property::constructStructuredNodeProperty(propertyDefinition, i, tableID)); + vector properties; + for (auto i = 0u; i < propertyDefinitions.size(); ++i) { + auto& propertyDefinition = propertyDefinitions[i]; + properties.push_back(Property::constructNodeProperty(propertyDefinition, i, tableID)); } auto nodeTableSchema = make_unique( - move(tableName), tableID, primaryKeyIdx, move(structuredProperties)); + std::move(tableName), tableID, primaryKeyIdx, std::move(properties)); nodeTableNameToIDMap[nodeTableSchema->tableName] = tableID; - nodeTableSchemas[tableID] = move(nodeTableSchema); + nodeTableSchemas[tableID] = std::move(nodeTableSchema); return tableID; } table_id_t CatalogContent::addRelTableSchema(string tableName, RelMultiplicity relMultiplicity, - vector structuredPropertyDefinitions, + const vector& propertyDefinitions, vector> srcDstTableIDs) { table_id_t tableID = assignNextTableID(); for (auto& [srcTableID, dstTableID] : srcDstTableIDs) { nodeTableSchemas[srcTableID]->addFwdRelTableID(tableID); nodeTableSchemas[dstTableID]->addBwdRelTableID(tableID); } - vector structuredProperties; + vector properties; auto propertyID = 0; auto propertyNameDataType = PropertyNameDataType(INTERNAL_ID_SUFFIX, INT64); - structuredProperties.push_back( + properties.push_back( Property::constructRelProperty(propertyNameDataType, propertyID++, tableID)); - for (auto& propertyDefinition : structuredPropertyDefinitions) { - structuredProperties.push_back( + for (auto& propertyDefinition : propertyDefinitions) { + properties.push_back( Property::constructRelProperty(propertyDefinition, propertyID++, tableID)); } - auto relTableSchema = make_unique(move(tableName), tableID, relMultiplicity, - move(structuredProperties), move(srcDstTableIDs)); + auto relTableSchema = make_unique(std::move(tableName), tableID, + relMultiplicity, std::move(properties), std::move(srcDstTableIDs)); relTableNameToIDMap[relTableSchema->tableName] = tableID; - relTableSchemas[tableID] = move(relTableSchema); + relTableSchemas[tableID] = std::move(relTableSchema); return tableID; } bool CatalogContent::containNodeProperty(table_id_t tableID, const string& propertyName) const { - for (auto& property : nodeTableSchemas.at(tableID)->structuredProperties) { + for (auto& property : nodeTableSchemas.at(tableID)->properties) { if (propertyName == property.name) { return true; } @@ -259,7 +258,7 @@ bool CatalogContent::containRelProperty(table_id_t tableID, const string& proper const Property& CatalogContent::getNodeProperty( table_id_t tableID, const string& propertyName) const { - for (auto& property : nodeTableSchemas.at(tableID)->structuredProperties) { + for (auto& property : nodeTableSchemas.at(tableID)->properties) { if (propertyName == property.name) { return property; } @@ -358,7 +357,7 @@ void Catalog::checkpointInMemoryIfNecessary() { if (!hasUpdates()) { return; } - catalogContentForReadOnlyTrx = move(catalogContentForWriteTrx); + catalogContentForReadOnlyTrx = std::move(catalogContentForWriteTrx); } ExpressionType Catalog::getFunctionType(const string& name) const { @@ -371,21 +370,21 @@ ExpressionType Catalog::getFunctionType(const string& name) const { } } -table_id_t Catalog::addNodeTableSchema(string tableName, uint32_t primaryKeyIdx, - vector structuredPropertyDefinitions) { +table_id_t Catalog::addNodeTableSchema( + string tableName, uint32_t primaryKeyIdx, vector propertyDefinitions) { initCatalogContentForWriteTrxIfNecessary(); auto tableID = catalogContentForWriteTrx->addNodeTableSchema( - move(tableName), primaryKeyIdx, move(structuredPropertyDefinitions)); + std::move(tableName), primaryKeyIdx, std::move(propertyDefinitions)); wal->logNodeTableRecord(tableID); return tableID; } table_id_t Catalog::addRelTableSchema(string tableName, RelMultiplicity relMultiplicity, - vector structuredPropertyDefinitions, + vector propertyDefinitions, vector> srcDstTableIDs) { initCatalogContentForWriteTrxIfNecessary(); - auto tableID = catalogContentForWriteTrx->addRelTableSchema(move(tableName), relMultiplicity, - move(structuredPropertyDefinitions), move(srcDstTableIDs)); + auto tableID = catalogContentForWriteTrx->addRelTableSchema(std::move(tableName), + relMultiplicity, std::move(propertyDefinitions), std::move(srcDstTableIDs)); wal->logRelTableRecord(tableID); return tableID; } diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index be0eba5d77..8fddf1b975 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -40,11 +40,11 @@ class CatalogContent { /** * Node and Rel table functions. */ - table_id_t addNodeTableSchema(string tableName, uint32_t primaryKeyIdx, - vector structuredPropertyDefinitions); + table_id_t addNodeTableSchema( + string tableName, uint32_t primaryKeyIdx, vector propertyDefinitions); table_id_t addRelTableSchema(string tableName, RelMultiplicity relMultiplicity, - vector structuredPropertyDefinitions, + const vector& propertyDefinitions, vector> srcDstTableIDs); virtual inline string getNodeTableName(table_id_t tableID) const { @@ -172,7 +172,7 @@ class Catalog { } inline void writeCatalogForWALRecord(string directory) { - catalogContentForWriteTrx->saveToFile(move(directory), DBFileType::WAL_VERSION); + catalogContentForWriteTrx->saveToFile(std::move(directory), DBFileType::WAL_VERSION); } static inline void saveInitialCatalogToFile(const string& directory) { @@ -181,11 +181,11 @@ class Catalog { ExpressionType getFunctionType(const string& name) const; - table_id_t addNodeTableSchema(string tableName, uint32_t primaryKeyIdx, - vector structuredPropertyDefinitions); + table_id_t addNodeTableSchema( + string tableName, uint32_t primaryKeyIdx, vector propertyDefinitions); table_id_t addRelTableSchema(string tableName, RelMultiplicity relMultiplicity, - vector structuredPropertyDefinitions, + vector propertyDefinitions, vector> srcDstTableIDs); inline void removeTableSchema(TableSchema* tableSchema) { diff --git a/src/include/catalog/catalog_structs.h b/src/include/catalog/catalog_structs.h index d76865c12f..a7838cc62e 100644 --- a/src/include/catalog/catalog_structs.h +++ b/src/include/catalog/catalog_structs.h @@ -35,14 +35,14 @@ struct PropertyNameDataType { struct Property : PropertyNameDataType { private: Property(string name, DataType dataType, uint32_t propertyID, table_id_t tableID) - : PropertyNameDataType{move(name), move(dataType)}, propertyID{propertyID}, tableID{ - tableID} {} + : PropertyNameDataType{std::move(name), std::move(dataType)}, + propertyID{propertyID}, tableID{tableID} {} public: // This constructor is needed for ser/deser functions Property() {} - static Property constructStructuredNodeProperty( + static Property constructNodeProperty( const PropertyNameDataType& nameDataType, uint32_t propertyID, table_id_t tableID) { return Property(nameDataType.name, nameDataType.dataType, propertyID, tableID); } @@ -59,7 +59,7 @@ struct Property : PropertyNameDataType { struct TableSchema { public: TableSchema(string tableName, table_id_t tableID, bool isNodeTable) - : tableName{move(tableName)}, tableID{tableID}, isNodeTable{isNodeTable} {} + : tableName{std::move(tableName)}, tableID{tableID}, isNodeTable{isNodeTable} {} virtual ~TableSchema() = default; @@ -74,40 +74,43 @@ struct TableSchema { }; struct NodeTableSchema : TableSchema { - NodeTableSchema() : NodeTableSchema{"", UINT64_MAX, UINT64_MAX, vector{}} {} + NodeTableSchema() + : NodeTableSchema{"", INVALID_TABLE_ID, INVALID_PROPERTY_ID, vector{}} {} NodeTableSchema(string tableName, table_id_t tableID, uint64_t primaryPropertyId, - vector structuredProperties) - : TableSchema{move(tableName), tableID, true /* isNodeTable */}, - primaryKeyPropertyIdx{primaryPropertyId}, structuredProperties{ - move(structuredProperties)} {} + vector properties) + : TableSchema{std::move(tableName), tableID, true /* isNodeTable */}, + primaryKeyPropertyIdx{primaryPropertyId}, properties{std::move(properties)} {} - inline uint64_t getNumStructuredProperties() const { return structuredProperties.size(); } + inline uint64_t getNumStructuredProperties() const { return properties.size(); } inline void addFwdRelTableID(table_id_t tableID) { fwdRelTableIDSet.insert(tableID); } inline void addBwdRelTableID(table_id_t tableID) { bwdRelTableIDSet.insert(tableID); } - inline Property getPrimaryKey() const { return structuredProperties[primaryKeyPropertyIdx]; } + inline Property getPrimaryKey() const { return properties[primaryKeyPropertyIdx]; } - inline vector getAllNodeProperties() const { return structuredProperties; } + inline vector getAllNodeProperties() const { return properties; } // TODO(Semih): When we support updating the schemas, we need to update this or, we need // a more robust mechanism to keep track of which property is the primary key (e.g., store this // information with the property). This is an idx, not an ID, so as the columns/properties of // the table change, the idx can change. uint64_t primaryKeyPropertyIdx; - vector structuredProperties; + vector properties; unordered_set fwdRelTableIDSet; // srcNode->rel unordered_set bwdRelTableIDSet; // dstNode->rel }; struct RelTableSchema : TableSchema { +public: + static constexpr uint64_t INTERNAL_REL_ID_PROPERTY_IDX = 0; + RelTableSchema() - : TableSchema{"", UINT64_MAX, false /* isNodeTable */}, relMultiplicity{MANY_MANY} {} + : TableSchema{"", INVALID_TABLE_ID, false /* isNodeTable */}, relMultiplicity{MANY_MANY} {} RelTableSchema(string tableName, table_id_t tableID, RelMultiplicity relMultiplicity, vector properties, vector> srcDstTableIDs) - : TableSchema{move(tableName), tableID, false /* isNodeTable */}, - relMultiplicity{relMultiplicity}, properties{move(properties)}, srcDstTableIDs{move( - srcDstTableIDs)} {} + : TableSchema{std::move(tableName), tableID, false /* isNodeTable */}, + relMultiplicity{relMultiplicity}, properties{std::move(properties)}, + srcDstTableIDs{std::move(srcDstTableIDs)} {} inline Property& getRelIDDefinition() { for (auto& property : properties) { @@ -127,7 +130,7 @@ struct RelTableSchema : TableSchema { inline uint32_t getNumProperties() const { return properties.size(); } - inline uint32_t getNumUserDefinedProperties() { + inline uint32_t getNumUserDefinedProperties() const { // Note: the first column stores the relID property. return properties.size() - 1; } @@ -153,7 +156,6 @@ struct RelTableSchema : TableSchema { unordered_set getUniqueNbrTableIDsForBoundTableIDDirection( RelDirection direction, table_id_t boundTableID) const; - static constexpr uint64_t INTERNAL_REL_ID_PROPERTY_IDX = 0; RelMultiplicity relMultiplicity; vector properties; vector> srcDstTableIDs; diff --git a/src/include/common/types/node_id_t.h b/src/include/common/types/node_id_t.h index 3317cc5efd..f681583c1a 100644 --- a/src/include/common/types/node_id_t.h +++ b/src/include/common/types/node_id_t.h @@ -1,10 +1,13 @@ #pragma once +#include + namespace kuzu { namespace common { typedef uint64_t table_id_t; typedef uint64_t node_offset_t; +constexpr table_id_t INVALID_TABLE_ID = UINT64_MAX; constexpr node_offset_t INVALID_NODE_OFFSET = UINT64_MAX; // System representation for nodeID. diff --git a/src/include/processor/operator/base_extend.h b/src/include/processor/operator/base_extend.h deleted file mode 100644 index 82e339b5fd..0000000000 --- a/src/include/processor/operator/base_extend.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include "processor/operator/physical_operator.h" - -namespace kuzu { -namespace processor { - -class BaseExtendAndScanRelProperties : public PhysicalOperator { -protected: - BaseExtendAndScanRelProperties(PhysicalOperatorType operatorType, - const DataPos& inNodeIDVectorPos, const DataPos& outNodeIDVectorPos, - vector outPropertyVectorsPos, unique_ptr child, uint32_t id, - const string& paramsString) - : PhysicalOperator{operatorType, std::move(child), id, paramsString}, - inNodeIDVectorPos{inNodeIDVectorPos}, outNodeIDVectorPos{outNodeIDVectorPos}, - outPropertyVectorsPos{std::move(outPropertyVectorsPos)} {} - virtual ~BaseExtendAndScanRelProperties() override = default; - - void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; - -protected: - // vector positions - DataPos inNodeIDVectorPos; - DataPos outNodeIDVectorPos; - vector outPropertyVectorsPos; - // vectors - shared_ptr inNodeIDVector; - shared_ptr outNodeIDVector; - vector> outPropertyVectors; -}; - -} // namespace processor -} // namespace kuzu diff --git a/src/include/processor/operator/generic_extend.h b/src/include/processor/operator/generic_extend.h deleted file mode 100644 index f948948ab5..0000000000 --- a/src/include/processor/operator/generic_extend.h +++ /dev/null @@ -1,112 +0,0 @@ -#pragma once - -#include "processor/operator/base_extend.h" -#include "storage/storage_structure/column.h" -#include "storage/storage_structure/lists/lists.h" - -namespace kuzu { -namespace processor { - -// Each output vector is scanned from a collection of Columns and Lists -struct ColumnAndListCollection { - vector columns; - vector lists; - vector> listHandles; - - ColumnAndListCollection(vector columns, vector lists) - : columns{std::move(columns)}, lists{std::move(lists)} {} - - void populateListHandles(ListSyncState& syncState); -}; - -class AdjAndPropertyCollection { -public: - AdjAndPropertyCollection(unique_ptr adjCollection, - vector> propertyCollections) - : adjCollection{std::move(adjCollection)}, propertyCollections{ - std::move(propertyCollections)} {} - - void populateListHandles(); - - void resetState(node_offset_t nodeOffset); - - bool scan(const shared_ptr& inVector, const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction); - - unique_ptr clone() const; - -private: - bool scanColumns(const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction); - bool scanLists(const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction); - - inline bool hasColumnToScan() const { return nextColumnIdx < adjCollection->columns.size(); } - inline bool hasListToScan() const { return nextListIdx < adjCollection->lists.size(); } - - bool scanColumn(uint32_t idx, const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction); - bool scanList(uint32_t idx, const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction); - void scanPropertyList(uint32_t idx, const vector>& outPropertyVectors, - Transaction* transaction); - -private: - unique_ptr adjCollection; - vector> propertyCollections; - - // Next column idx to scan. - uint32_t nextColumnIdx = UINT32_MAX; - // Next list idx to scan. - uint32_t nextListIdx = UINT32_MAX; - // Current node offset to extend from. - node_offset_t currentNodeOffset = INVALID_NODE_OFFSET; - // Current list idx to scan. Note that a list may be scanned multiple times. - uint32_t currentListIdx = UINT32_MAX; - // Sync between adjList and propertyLists - unique_ptr listSyncState = nullptr; -}; - -class GenericExtendAndScanRelProperties : public BaseExtendAndScanRelProperties { -public: - GenericExtendAndScanRelProperties(const DataPos& inNodeIDVectorPos, - const DataPos& outNodeIDVectorPos, vector outPropertyVectorsPos, - unordered_map> - adjAndPropertyCollectionPerNodeTable, - unique_ptr child, uint32_t id, const string& paramsString) - : BaseExtendAndScanRelProperties{PhysicalOperatorType::GENERIC_EXTEND, inNodeIDVectorPos, - outNodeIDVectorPos, std::move(outPropertyVectorsPos), std::move(child), id, - paramsString}, - adjAndPropertyCollectionPerNodeTable{std::move(adjAndPropertyCollectionPerNodeTable)} {} - ~GenericExtendAndScanRelProperties() override = default; - - void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; - - bool getNextTuplesInternal() override; - - unique_ptr clone() override { - unordered_map> clonedCollections; - for (auto& [tableID, adjAndPropertyCollection] : adjAndPropertyCollectionPerNodeTable) { - clonedCollections.insert({tableID, adjAndPropertyCollection->clone()}); - } - return make_unique(inNodeIDVectorPos, outNodeIDVectorPos, - outPropertyVectorsPos, std::move(clonedCollections), children[0]->clone(), id, - paramsString); - } - -private: - bool scanCurrentAdjAndPropertyCollection(); - void initCurrentAdjAndPropertyCollection(const nodeID_t& nodeID); - -private: - unordered_map> - adjAndPropertyCollectionPerNodeTable; - AdjAndPropertyCollection* currentAdjAndPropertyCollection = nullptr; -}; - -} // namespace processor -} // namespace kuzu diff --git a/src/include/processor/operator/physical_operator.h b/src/include/processor/operator/physical_operator.h index 171bc2c55a..0428132b78 100644 --- a/src/include/processor/operator/physical_operator.h +++ b/src/include/processor/operator/physical_operator.h @@ -11,7 +11,6 @@ namespace processor { enum class PhysicalOperatorType : uint8_t { AGGREGATE, AGGREGATE_SCAN, - COLUMN_EXTEND, COPY_NODE_CSV, COPY_REL_CSV, CREATE_NODE, @@ -25,20 +24,21 @@ enum class PhysicalOperatorType : uint8_t { FACTORIZED_TABLE_SCAN, FILTER, FLATTEN, - GENERIC_EXTEND, + GENERIC_SCAN_REL_TABLES, HASH_JOIN_BUILD, HASH_JOIN_PROBE, INDEX_SCAN, INTERSECT_BUILD, INTERSECT, LIMIT, - LIST_EXTEND, MULTIPLICITY_REDUCER, PROJECTION, SCAN_REL_PROPERTY, RESULT_COLLECTOR, SCAN_NODE_ID, SCAN_NODE_PROPERTY, + SCAN_REL_TABLE_COLUMNS, + SCAN_REL_TABLE_LISTS, SEMI_MASKER, SET_NODE_PROPERTY, SET_REL_PROPERTY, diff --git a/src/include/processor/operator/scan/generic_scan_rel_tables.h b/src/include/processor/operator/scan/generic_scan_rel_tables.h new file mode 100644 index 0000000000..cbf1ca5723 --- /dev/null +++ b/src/include/processor/operator/scan/generic_scan_rel_tables.h @@ -0,0 +1,64 @@ +#pragma once + +#include "processor/operator/scan/scan_rel_table.h" +#include "storage/storage_structure/column.h" +#include "storage/storage_structure/lists/lists.h" + +namespace kuzu { +namespace processor { + +class RelTableCollection { +public: + RelTableCollection( + vector tables, vector> tableScanStates) + : tables{std::move(tables)}, tableScanStates{std::move(tableScanStates)} {} + + void resetState(); + inline uint32_t getNumTablesInCollection() { return tables.size(); } + + bool scan(const shared_ptr& inVector, + vector>& outputVectors, Transaction* transaction); + + unique_ptr clone() const; + +private: + vector tables; + vector> tableScanStates; + + uint32_t currentRelTableIdxToScan = UINT32_MAX; + uint32_t nextRelTableIdxToScan = 0; +}; + +class GenericScanRelTables : public ScanRelTable { +public: + GenericScanRelTables(const DataPos& inNodeIDVectorPos, vector outputVectorsPos, + unordered_map> relTableCollectionPerNodeTable, + unique_ptr child, uint32_t id, const string& paramsString) + : ScanRelTable{inNodeIDVectorPos, std::move(outputVectorsPos), + PhysicalOperatorType::GENERIC_SCAN_REL_TABLES, std::move(child), id, paramsString}, + relTableCollectionPerNodeTable{std::move(relTableCollectionPerNodeTable)} {} + + void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; + + bool getNextTuplesInternal() override; + + unique_ptr clone() override { + unordered_map> clonedCollections; + for (auto& [tableID, propertyCollection] : relTableCollectionPerNodeTable) { + clonedCollections.insert({tableID, propertyCollection->clone()}); + } + return make_unique(inNodeIDVectorPos, outputVectorsPos, + std::move(clonedCollections), children[0]->clone(), id, paramsString); + } + +private: + bool scanCurrentRelTableCollection(); + void initCurrentRelTableCollection(const nodeID_t& nodeID); + +private: + unordered_map> relTableCollectionPerNodeTable; + RelTableCollection* currentRelTableCollection = nullptr; +}; + +} // namespace processor +} // namespace kuzu diff --git a/src/include/processor/operator/scan/scan_columns.h b/src/include/processor/operator/scan/scan_columns.h new file mode 100644 index 0000000000..46bba1a5fa --- /dev/null +++ b/src/include/processor/operator/scan/scan_columns.h @@ -0,0 +1,29 @@ +#pragma once + +#include "processor/operator/physical_operator.h" + +using namespace kuzu::storage; + +namespace kuzu { +namespace processor { + +class ScanColumns : public PhysicalOperator { +protected: + ScanColumns(const DataPos& inVectorPos, vector outPropertyVectorsPos, + unique_ptr child, uint32_t id, const string& paramsString) + : PhysicalOperator{PhysicalOperatorType::SCAN_NODE_PROPERTY, std::move(child), id, + paramsString}, + inputNodeIDVectorPos{inVectorPos}, outPropertyVectorsPos{ + std::move(outPropertyVectorsPos)} {} + + void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; + +protected: + DataPos inputNodeIDVectorPos; + shared_ptr inputNodeIDVector; + vector outPropertyVectorsPos; + vector> outPropertyVectors; +}; + +} // namespace processor +} // namespace kuzu diff --git a/src/include/processor/operator/scan/scan_node_table.h b/src/include/processor/operator/scan/scan_node_table.h new file mode 100644 index 0000000000..e017edc169 --- /dev/null +++ b/src/include/processor/operator/scan/scan_node_table.h @@ -0,0 +1,53 @@ +#pragma once + +#include "processor/operator/scan/scan_columns.h" +#include "storage/store/node_table.h" + +namespace kuzu { +namespace processor { + +class ScanSingleNodeTable : public ScanColumns { +public: + ScanSingleNodeTable(const DataPos& inVectorPos, vector outVectorsPos, NodeTable* table, + vector propertyColumnIds, unique_ptr prevOperator, uint32_t id, + const string& paramsString) + : ScanColumns{inVectorPos, std::move(outVectorsPos), std::move(prevOperator), id, + paramsString}, + table{table}, propertyColumnIds{std::move(propertyColumnIds)} {} + + bool getNextTuplesInternal() override; + + inline unique_ptr clone() override { + return make_unique(inputNodeIDVectorPos, outPropertyVectorsPos, table, + propertyColumnIds, children[0]->clone(), id, paramsString); + } + +private: + NodeTable* table; + vector propertyColumnIds; +}; + +class ScanMultiNodeTables : public ScanColumns { +public: + ScanMultiNodeTables(const DataPos& inVectorPos, vector outVectorsPos, + unordered_map tables, + unordered_map> tableIDToScanColumnIds, + unique_ptr prevOperator, uint32_t id, const string& paramsString) + : ScanColumns{inVectorPos, std::move(outVectorsPos), std::move(prevOperator), id, + paramsString}, + tables{std::move(tables)}, tableIDToScanColumnIds{std::move(tableIDToScanColumnIds)} {} + + bool getNextTuplesInternal() override; + + inline unique_ptr clone() override { + return make_unique(inputNodeIDVectorPos, outPropertyVectorsPos, tables, + tableIDToScanColumnIds, children[0]->clone(), id, paramsString); + } + +private: + unordered_map tables; + unordered_map> tableIDToScanColumnIds; +}; + +} // namespace processor +} // namespace kuzu diff --git a/src/include/processor/operator/scan/scan_rel_table.h b/src/include/processor/operator/scan/scan_rel_table.h new file mode 100644 index 0000000000..730bba47f2 --- /dev/null +++ b/src/include/processor/operator/scan/scan_rel_table.h @@ -0,0 +1,29 @@ +#pragma once + +#include "processor/operator/physical_operator.h" +#include "storage/store/rel_table.h" + +namespace kuzu { +namespace processor { + +class ScanRelTable : public PhysicalOperator { +protected: + ScanRelTable(const DataPos& inNodeIDVectorPos, vector outputVectorsPos, + PhysicalOperatorType operatorType, unique_ptr child, uint32_t id, + const string& paramsString) + : PhysicalOperator{operatorType, std::move(child), id, paramsString}, + inNodeIDVectorPos{inNodeIDVectorPos}, outputVectorsPos{std::move(outputVectorsPos)} {} + + void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; + +protected: + // vector positions + DataPos inNodeIDVectorPos; + vector outputVectorsPos; + // vectors + shared_ptr inNodeIDVector; + vector> outputVectors; +}; + +} // namespace processor +} // namespace kuzu diff --git a/src/include/processor/operator/scan/scan_rel_table_columns.h b/src/include/processor/operator/scan/scan_rel_table_columns.h new file mode 100644 index 0000000000..fa4d87fdf8 --- /dev/null +++ b/src/include/processor/operator/scan/scan_rel_table_columns.h @@ -0,0 +1,36 @@ +#pragma once + +#include "processor/operator/filtering_operator.h" +#include "processor/operator/scan/scan_rel_table.h" + +namespace kuzu { +namespace processor { + +class ScanRelTableColumns : public ScanRelTable, public SelVectorOverWriter { +public: + ScanRelTableColumns(table_id_t boundNodeTableID, DirectedRelTableData* tableData, + vector propertyIds, const DataPos& inNodeIDVectorPos, + vector outputVectorsPos, unique_ptr child, uint32_t id, + const string& paramsString) + : ScanRelTable{inNodeIDVectorPos, std::move(outputVectorsPos), + PhysicalOperatorType::SCAN_REL_TABLE_COLUMNS, std::move(child), id, paramsString}, + tableData{tableData} { + scanState = make_unique( + boundNodeTableID, std::move(propertyIds), RelTableDataType::COLUMNS); + } + + bool getNextTuplesInternal() override; + + inline unique_ptr clone() override { + return make_unique(scanState->boundNodeTableID, tableData, + scanState->propertyIds, inNodeIDVectorPos, outputVectorsPos, children[0]->clone(), id, + paramsString); + } + +private: + DirectedRelTableData* tableData; + unique_ptr scanState; +}; + +} // namespace processor +} // namespace kuzu diff --git a/src/include/processor/operator/scan/scan_rel_table_lists.h b/src/include/processor/operator/scan/scan_rel_table_lists.h new file mode 100644 index 0000000000..a185673302 --- /dev/null +++ b/src/include/processor/operator/scan/scan_rel_table_lists.h @@ -0,0 +1,36 @@ +#pragma once + +#include "processor/operator/scan/scan_rel_table.h" +#include "storage/storage_structure/lists/lists.h" + +namespace kuzu { +namespace processor { + +class ScanRelTableLists : public ScanRelTable { +public: + ScanRelTableLists(table_id_t boundNodeTableID, DirectedRelTableData* tableData, + vector propertyIds, const DataPos& inNodeIDVectorPos, + vector outputVectorsPos, unique_ptr child, uint32_t id, + const string& paramsString) + : ScanRelTable{inNodeIDVectorPos, std::move(outputVectorsPos), + PhysicalOperatorType::SCAN_REL_TABLE_LISTS, std::move(child), id, paramsString}, + tableData{tableData} { + scanState = make_unique( + boundNodeTableID, std::move(propertyIds), RelTableDataType::LISTS); + } + + bool getNextTuplesInternal() override; + + inline unique_ptr clone() override { + return make_unique(scanState->boundNodeTableID, tableData, + scanState->propertyIds, inNodeIDVectorPos, outputVectorsPos, children[0]->clone(), id, + paramsString); + } + +private: + DirectedRelTableData* tableData; + unique_ptr scanState; +}; + +} // namespace processor +} // namespace kuzu diff --git a/src/include/processor/operator/scan_column/adj_column_extend.h b/src/include/processor/operator/scan_column/adj_column_extend.h deleted file mode 100644 index 105749c3ca..0000000000 --- a/src/include/processor/operator/scan_column/adj_column_extend.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include "processor/operator/base_extend.h" -#include "processor/operator/filtering_operator.h" -#include "storage/storage_structure/column.h" - -namespace kuzu { -namespace processor { - -class ColumnExtendAndScanRelProperties : public BaseExtendAndScanRelProperties, - public SelVectorOverWriter { -public: - ColumnExtendAndScanRelProperties(const DataPos& inNodeIDVectorPos, - const DataPos& outNodeIDVectorPos, vector outPropertyVectorsPos, Column* adjColumn, - vector propertyColumns, unique_ptr child, uint32_t id, - const string& paramsString) - : BaseExtendAndScanRelProperties{PhysicalOperatorType::COLUMN_EXTEND, inNodeIDVectorPos, - outNodeIDVectorPos, std::move(outPropertyVectorsPos), std::move(child), id, - paramsString}, - adjColumn{adjColumn}, propertyColumns{std::move(propertyColumns)} {} - ~ColumnExtendAndScanRelProperties() override = default; - - bool getNextTuplesInternal() override; - - inline unique_ptr clone() override { - return make_unique(inNodeIDVectorPos, outNodeIDVectorPos, - outPropertyVectorsPos, adjColumn, propertyColumns, children[0]->clone(), id, - paramsString); - } - -private: - Column* adjColumn; - vector propertyColumns; -}; - -} // namespace processor -} // namespace kuzu diff --git a/src/include/processor/operator/scan_column/scan_column.h b/src/include/processor/operator/scan_column/scan_column.h deleted file mode 100644 index 4b8b8ac635..0000000000 --- a/src/include/processor/operator/scan_column/scan_column.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include "processor/operator/physical_operator.h" - -using namespace kuzu::storage; - -namespace kuzu { -namespace processor { - -class BaseScanColumn : public PhysicalOperator { -public: - BaseScanColumn(PhysicalOperatorType operatorType, const DataPos& inputNodeIDVectorPos, - unique_ptr child, uint32_t id, const string& paramsString) - : PhysicalOperator{operatorType, std::move(child), id, paramsString}, - inputNodeIDVectorPos{inputNodeIDVectorPos} {} - - void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; - -protected: - DataPos inputNodeIDVectorPos; - shared_ptr inputNodeIDVector; -}; - -class ScanMultipleColumns : public BaseScanColumn { -protected: - ScanMultipleColumns(const DataPos& inVectorPos, vector outPropertyVectorsPos, - unique_ptr child, uint32_t id, const string& paramsString) - : BaseScanColumn{PhysicalOperatorType::SCAN_NODE_PROPERTY, inVectorPos, std::move(child), - id, paramsString}, - outPropertyVectorsPos{std::move(outPropertyVectorsPos)} {} - - void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; - -protected: - vector outPropertyVectorsPos; - vector> outPropertyVectors; -}; - -} // namespace processor -} // namespace kuzu diff --git a/src/include/processor/operator/scan_column/scan_node_properties.h b/src/include/processor/operator/scan_column/scan_node_properties.h deleted file mode 100644 index 896c0fbbdb..0000000000 --- a/src/include/processor/operator/scan_column/scan_node_properties.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "processor/operator/scan_column/scan_column.h" -#include "storage/storage_structure/column.h" - -namespace kuzu { -namespace processor { - -class ScanSingleNodeTableProperties : public ScanMultipleColumns { -public: - ScanSingleNodeTableProperties(const DataPos& inVectorPos, vector outVectorsPos, - vector propertyColumns, unique_ptr prevOperator, uint32_t id, - const string& paramsString) - : ScanMultipleColumns{inVectorPos, std::move(outVectorsPos), std::move(prevOperator), id, - paramsString}, - propertyColumns{std::move(propertyColumns)} {} - - bool getNextTuplesInternal() override; - - inline unique_ptr clone() override { - return make_unique(inputNodeIDVectorPos, - outPropertyVectorsPos, propertyColumns, children[0]->clone(), id, paramsString); - } - -private: - vector propertyColumns; -}; - -class ScanMultiNodeTableProperties : public ScanMultipleColumns { -public: - ScanMultiNodeTableProperties(const DataPos& inVectorPos, vector outVectorsPos, - unordered_map> tableIDToPropertyColumns, - unique_ptr prevOperator, uint32_t id, const string& paramsString) - : ScanMultipleColumns{inVectorPos, std::move(outVectorsPos), std::move(prevOperator), id, - paramsString}, - tableIDToPropertyColumns{std::move(tableIDToPropertyColumns)} {} - - bool getNextTuplesInternal() override; - - inline unique_ptr clone() override { - return make_unique(inputNodeIDVectorPos, - outPropertyVectorsPos, tableIDToPropertyColumns, children[0]->clone(), id, - paramsString); - } - -private: - unordered_map> tableIDToPropertyColumns; -}; - -} // namespace processor -} // namespace kuzu diff --git a/src/include/processor/operator/scan_list/scan_rel_table_lists.h b/src/include/processor/operator/scan_list/scan_rel_table_lists.h deleted file mode 100644 index 5f31070c19..0000000000 --- a/src/include/processor/operator/scan_list/scan_rel_table_lists.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include "processor/operator/base_extend.h" -#include "storage/storage_structure/lists/lists.h" - -namespace kuzu { -namespace processor { - -class ScanRelTableLists : public BaseExtendAndScanRelProperties { -public: - ScanRelTableLists(const DataPos& inNodeIDVectorPos, const DataPos& outNodeIDVectorPos, - vector outPropertyVectorsPos, Lists* adjList, vector propertyLists, - unique_ptr child, uint32_t id, const string& paramsString) - : BaseExtendAndScanRelProperties{PhysicalOperatorType::LIST_EXTEND, inNodeIDVectorPos, - outNodeIDVectorPos, std::move(outPropertyVectorsPos), std::move(child), id, - paramsString}, - adjList{adjList}, propertyLists{std::move(propertyLists)} {} - ~ScanRelTableLists() override = default; - - void initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) override; - - bool getNextTuplesInternal() override; - - inline unique_ptr clone() override { - return make_unique(inNodeIDVectorPos, outNodeIDVectorPos, - outPropertyVectorsPos, adjList, propertyLists, children[0]->clone(), id, paramsString); - } - -private: - void scanPropertyLists(); - -private: - // lists - Lists* adjList; - vector propertyLists; - // list handles - shared_ptr adjListHandle; - vector> propertyListHandles; - // sync state between adj and property lists - unique_ptr syncState; -}; - -} // namespace processor -} // namespace kuzu diff --git a/src/include/processor/operator/var_length_extend/var_length_adj_list_extend.h b/src/include/processor/operator/var_length_extend/var_length_adj_list_extend.h index 4069b67037..8b4f55d20f 100644 --- a/src/include/processor/operator/var_length_extend/var_length_adj_list_extend.h +++ b/src/include/processor/operator/var_length_extend/var_length_adj_list_extend.h @@ -16,8 +16,8 @@ struct AdjListExtendDFSLevelInfo : DFSLevelInfo { uint64_t parent; uint64_t childrenIdx; - shared_ptr listSyncState; - shared_ptr listHandle; + unique_ptr listSyncState; + unique_ptr listHandle; }; class VarLengthAdjListExtend : public VarLengthExtend { diff --git a/src/include/processor/result/result_set.h b/src/include/processor/result/result_set.h index 5a8aa6e9f7..a9ec98e875 100644 --- a/src/include/processor/result/result_set.h +++ b/src/include/processor/result/result_set.h @@ -4,7 +4,6 @@ #include "common/data_chunk/data_chunk.h" #include "processor/data_pos.h" -#include "storage/storage_structure/lists/list_sync_state.h" using namespace kuzu::common; using namespace kuzu::storage; diff --git a/src/include/storage/in_mem_csv_copier/in_mem_node_csv_copier.h b/src/include/storage/in_mem_csv_copier/in_mem_node_csv_copier.h index cf8128b06d..89f711bc1e 100644 --- a/src/include/storage/in_mem_csv_copier/in_mem_node_csv_copier.h +++ b/src/include/storage/in_mem_csv_copier/in_mem_node_csv_copier.h @@ -21,7 +21,7 @@ class InMemNodeCSVCopier : public InMemStructuresCSVCopier { private: void initializeColumnsAndList(); - void countLinesPerBlock(uint64_t numStructuredProperties); + void countLinesPerBlock(uint64_t numProperties); template void populateColumns(); @@ -39,7 +39,7 @@ class InMemNodeCSVCopier : public InMemStructuresCSVCopier { // Concurrent tasks. // Note that primaryKeyPropertyIdx is *NOT* the property ID of the primary key property. - // Instead, it is the index in the structured columns that we expect it to appear. + // Instead, it is the index in the columns that we expect it to appear. template static void populateColumnsTask(uint64_t primaryKeyPropertyIdx, uint64_t blockId, uint64_t offsetStart, HashIndexBuilder* pkIndex, InMemNodeCSVCopier* copier); @@ -47,7 +47,7 @@ class InMemNodeCSVCopier : public InMemStructuresCSVCopier { private: NodeTableSchema* nodeTableSchema; uint64_t numNodes; - vector> structuredColumns; + vector> columns; NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs; }; diff --git a/src/include/storage/in_mem_storage_structure/in_mem_column.h b/src/include/storage/in_mem_storage_structure/in_mem_column.h index 77bec7b658..7affb8619e 100644 --- a/src/include/storage/in_mem_storage_structure/in_mem_column.h +++ b/src/include/storage/in_mem_storage_structure/in_mem_column.h @@ -9,7 +9,7 @@ namespace storage { class InMemColumn { public: - // For structured property columns. + // For property columns. InMemColumn(string fName, DataType dataType, uint64_t numBytesForElement, uint64_t numElements); virtual ~InMemColumn() = default; diff --git a/src/include/storage/node_id_compression_scheme.h b/src/include/storage/node_id_compression_scheme.h index dca525c348..09b9fb8ccc 100644 --- a/src/include/storage/node_id_compression_scheme.h +++ b/src/include/storage/node_id_compression_scheme.h @@ -10,15 +10,15 @@ namespace common { class NodeIDCompressionScheme { public: - NodeIDCompressionScheme() : commonTableID{UINT64_MAX} {} + NodeIDCompressionScheme() : commonTableID{INVALID_TABLE_ID} {} explicit NodeIDCompressionScheme(const unordered_set& nbrNodeTableIDs) - : commonTableID{nbrNodeTableIDs.size() == 1 ? *nbrNodeTableIDs.begin() : UINT64_MAX} {} + : commonTableID{nbrNodeTableIDs.size() == 1 ? *nbrNodeTableIDs.begin() : INVALID_TABLE_ID} { + } inline uint64_t getNumBytesForNodeIDAfterCompression() const { - return commonTableID == UINT64_MAX ? Types::getDataTypeSize(NODE_ID) : - sizeof(node_offset_t); + return commonTableID == INVALID_TABLE_ID ? Types::getDataTypeSize(NODE_ID) : + sizeof(node_offset_t); } - inline table_id_t getCommonTableID() const { return commonTableID; } void readNodeID(uint8_t* data, nodeID_t* nodeID) const; void writeNodeID(uint8_t* data, const nodeID_t& nodeID) const; diff --git a/src/include/storage/storage_structure/column.h b/src/include/storage/storage_structure/column.h index 12b17bd866..073bc8ad63 100644 --- a/src/include/storage/storage_structure/column.h +++ b/src/include/storage/storage_structure/column.h @@ -85,6 +85,11 @@ class PropertyColumnWithOverflow : public Column { : Column{structureIDAndFNameOfMainColumn, dataType, bufferManager, isInMemory, wal}, diskOverflowFile{structureIDAndFNameOfMainColumn, bufferManager, isInMemory, wal} {} + inline void read(Transaction* transaction, const shared_ptr& nodeIDVector, + const shared_ptr& resultVector) { + resultVector->resetOverflowBuffer(); + Column::read(transaction, nodeIDVector, resultVector); + } inline DiskOverflowFile* getDiskOverflowFile() { return &diskOverflowFile; } inline VersionedFileHandle* getDiskOverflowFileHandle() { diff --git a/src/include/storage/storage_structure/lists/list_handle.h b/src/include/storage/storage_structure/lists/list_handle.h new file mode 100644 index 0000000000..dc2b2255ce --- /dev/null +++ b/src/include/storage/storage_structure/lists/list_handle.h @@ -0,0 +1,126 @@ +#pragma once + +#include "common/types/node_id_t.h" +#include "common/types/types.h" +#include "storage/storage_structure/lists/list_headers.h" +#include "storage/storage_structure/lists/lists_metadata.h" + +using namespace kuzu::common; + +namespace kuzu { +namespace storage { + +enum class ListSourceStore : uint8_t { + PERSISTENT_STORE = 0, + UPDATE_STORE = 1, +}; + +struct ListHandle; +// ListSyncState holds the data that is required to synchronize reading from multiple Lists that +// have related data and hence share same AdjListHeaders. The Lists that share a single copy of +// AdjListHeaders are - edges, i.e., adjLists of a rel table in a particular direction, e.g., +// forward or backward, and the properties of those edges. For the case of reading from a large +// list, we do not / cannot read the entire list in a single operation since it can be very big, +// hence we read in batches from a definite start point to an end point. List Sync holds this +// information and helps in co-ordinating all related lists so that all of them read the correct +// portion of data. +class ListSyncState { + + friend class ListHandle; + +public: + explicit ListSyncState() { resetState(); }; + + inline bool isBoundNodeOffsetInValid() const { return boundNodeOffset == INVALID_NODE_OFFSET; } + + bool hasMoreAndSwitchSourceIfNecessary(); + + void resetState(); + +private: + inline bool hasValidRangeToRead() const { return UINT32_MAX != startElemOffset; } + inline uint32_t getNumValuesInList() { + return sourceStore == ListSourceStore::PERSISTENT_STORE ? numValuesInPersistentStore : + numValuesInUpdateStore; + } + inline bool hasMoreLeftInList() { + return (startElemOffset + numValuesToRead) < getNumValuesInList(); + } + inline void switchToUpdateStore() { + sourceStore = ListSourceStore::UPDATE_STORE; + startElemOffset = UINT32_MAX; + } + +private: + node_offset_t boundNodeOffset; + list_header_t listHeader; + uint32_t numValuesInUpdateStore; + uint32_t numValuesInPersistentStore; + uint32_t startElemOffset; + uint32_t numValuesToRead; + ListSourceStore sourceStore; +}; + +struct ListHandle { + explicit ListHandle(ListSyncState& listSyncState) : listSyncState{listSyncState} {} + + static inline std::function getPageMapper( + ListsMetadata& listMetadata, list_header_t listHeader, node_offset_t nodeOffset) { + return ListHeaders::isALargeList(listHeader) ? + listMetadata.getPageMapperForLargeListIdx( + ListHeaders::getLargeListIdx(listHeader)) : + listMetadata.getPageMapperForChunkIdx(StorageUtils::getListChunkIdx(nodeOffset)); + } + static inline PageElementCursor getPageCursor( + list_header_t listHeader, uint64_t numElementsPerPage) { + return ListHeaders::isALargeList(listHeader) ? + PageUtils::getPageElementCursorForPos(0, numElementsPerPage) : + PageUtils::getPageElementCursorForPos( + ListHeaders::getSmallListCSROffset(listHeader), numElementsPerPage); + } + + inline void setMapper(ListsMetadata& listMetadata) { + mapper = + getPageMapper(listMetadata, listSyncState.listHeader, listSyncState.boundNodeOffset); + } + inline void resetSyncState() { listSyncState.resetState(); } + inline void initSyncState(node_offset_t boundNodeOffset, list_header_t listHeader, + uint64_t numValuesInUpdateStore, uint64_t numValuesInPersistentStore, + ListSourceStore sourceStore) { + listSyncState.boundNodeOffset = boundNodeOffset; + listSyncState.listHeader = listHeader; + listSyncState.numValuesInUpdateStore = numValuesInUpdateStore; + listSyncState.numValuesInPersistentStore = numValuesInPersistentStore; + listSyncState.sourceStore = sourceStore; + } + inline list_header_t getListHeader() const { return listSyncState.listHeader; } + inline node_offset_t getBoundNodeOffset() const { return listSyncState.boundNodeOffset; } + inline ListSourceStore getListSourceStore() { return listSyncState.sourceStore; } + inline uint32_t getStartElemOffset() const { return listSyncState.startElemOffset; } + inline uint32_t getEndElemOffset() const { + return listSyncState.startElemOffset + listSyncState.numValuesToRead; + } + inline uint32_t getNumValuesToRead() const { return listSyncState.numValuesToRead; } + inline uint32_t getNumValuesInList() { + return listSyncState.sourceStore == ListSourceStore::PERSISTENT_STORE ? + listSyncState.numValuesInPersistentStore : + listSyncState.numValuesInUpdateStore; + } + inline bool hasValidRangeToRead() { return listSyncState.hasValidRangeToRead(); } + inline void setRangeToRead(uint32_t startIdx, uint32_t numValuesToRead) { + listSyncState.startElemOffset = startIdx; + listSyncState.numValuesToRead = numValuesToRead; + } + inline bool hasMoreAndSwitchSourceIfNecessary() { + return listSyncState.hasMoreAndSwitchSourceIfNecessary(); + } + +private: + ListSyncState& listSyncState; + +public: + std::function mapper; +}; + +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/storage_structure/lists/list_sync_state.h b/src/include/storage/storage_structure/lists/list_sync_state.h deleted file mode 100644 index 863cd6252c..0000000000 --- a/src/include/storage/storage_structure/lists/list_sync_state.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include - -#include "common/types/node_id_t.h" -#include "common/types/types.h" - -using namespace kuzu::common; - -namespace kuzu { -namespace storage { - -enum class ListSourceStore : uint8_t { - PERSISTENT_STORE = 0, - UPDATE_STORE = 1, -}; - -// ListSyncState holds the data that is required to synchronize reading from multiple Lists that -// have related data and hence share same AdjListHeaders. The Lists that share a single copy of -// AdjListHeaders are - edges, i.e., adjLists of a rel table in a particular direction, e.g., -// forward or backward, and the properties of those edges. For the case of reading from a large -// list, we do not / cannot read the entire list in a single operation since it can be very big, -// hence we read in batches from a definite start point to an end point. List Sync holds this -// information and helps in co-ordinating all related lists so that all of them read the correct -// portion of data. -class ListSyncState { - -public: - ListSyncState() { reset(); }; - - inline void init(node_offset_t boundNodeOffset_, list_header_t listHeader_, - uint64_t numValuesInUpdateStore_, uint64_t numValuesInPersistentStore_, - ListSourceStore sourceStore_) { - this->boundNodeOffset = boundNodeOffset_; - this->listHeader = listHeader_; - this->numValuesInUpdateStore = numValuesInUpdateStore_; - this->numValuesInPersistentStore = numValuesInPersistentStore_; - this->sourceStore = sourceStore_; - } - - inline void setRangeToRead(uint32_t startIdx_, uint32_t numValuesToRead_) { - this->startElemOffset = startIdx_; - this->numValuesToRead = numValuesToRead_; - } - inline node_offset_t getBoundNodeOffset() const { return boundNodeOffset; }; - inline uint32_t getStartElemOffset() const { return startElemOffset; } - inline uint32_t getEndElemOffset() const { return startElemOffset + numValuesToRead; } - inline bool hasValidRangeToRead() const { return UINT32_MAX != startElemOffset; } - inline uint32_t getNumValuesToRead() const { return numValuesToRead; } - inline ListSourceStore getListSourceStore() const { return sourceStore; } - inline list_header_t getListHeader() const { return listHeader; } - inline uint32_t getNumValuesInList() { - return sourceStore == ListSourceStore::PERSISTENT_STORE ? numValuesInPersistentStore : - numValuesInUpdateStore; - } - - bool hasMoreAndSwitchSourceIfNecessary(); - void reset(); - -private: - inline bool hasMoreLeftInList() { - return (startElemOffset + numValuesToRead) < getNumValuesInList(); - } - inline void switchToUpdateStore() { - sourceStore = ListSourceStore::UPDATE_STORE; - startElemOffset = UINT32_MAX; - } - -private: - node_offset_t boundNodeOffset; - list_header_t listHeader; - uint32_t numValuesInUpdateStore; - uint32_t numValuesInPersistentStore; - uint32_t startElemOffset; - uint32_t numValuesToRead; - ListSourceStore sourceStore; -}; - -} // namespace storage -} // namespace kuzu diff --git a/src/include/storage/storage_structure/lists/lists.h b/src/include/storage/storage_structure/lists/lists.h index e800101c0e..20e3c052ae 100644 --- a/src/include/storage/storage_structure/lists/lists.h +++ b/src/include/storage/storage_structure/lists/lists.h @@ -3,17 +3,9 @@ #include "common/types/literal.h" #include "lists_update_store.h" #include "storage/storage_structure/disk_overflow_file.h" -#include "storage/storage_structure/lists/list_headers.h" -#include "storage/storage_structure/lists/list_sync_state.h" -#include "storage/storage_structure/lists/lists_metadata.h" +#include "storage/storage_structure/lists/list_handle.h" #include "storage/storage_structure/storage_structure.h" -namespace kuzu { -namespace testing { -class CopyCSVEmptyListsTest; -} // namespace testing -} // namespace kuzu - namespace kuzu { namespace storage { @@ -35,38 +27,6 @@ struct InMemList { unique_ptr nullMask; }; -struct CursorAndMapper { - void reset(ListsMetadata& listMetadata, uint64_t numElementsPerPage, list_header_t listHeader, - node_offset_t nodeOffset) { - if (ListHeaders::isALargeList(listHeader)) { - cursor = PageUtils::getPageElementCursorForPos(0, numElementsPerPage); - mapper = - listMetadata.getPageMapperForLargeListIdx(ListHeaders::getLargeListIdx(listHeader)); - } else { - cursor = PageUtils::getPageElementCursorForPos( - ListHeaders::getSmallListCSROffset(listHeader), numElementsPerPage); - mapper = - listMetadata.getPageMapperForChunkIdx(StorageUtils::getListChunkIdx(nodeOffset)); - } - } - - std::function mapper; - PageElementCursor cursor; -}; - -struct ListHandle { - explicit ListHandle(ListSyncState& listSyncState) : listSyncState{listSyncState} {} - - inline void resetCursorMapper(ListsMetadata& listMetadata, uint64_t numElementsPerPage) { - cursorAndMapper.reset(listMetadata, numElementsPerPage, listSyncState.getListHeader(), - listSyncState.getBoundNodeOffset()); - } - inline void reset() { listSyncState.reset(); } - - ListSyncState& listSyncState; - CursorAndMapper cursorAndMapper; -}; - /** * A lists data structure holds a list of homogeneous values for each offset in it. Lists are used * for storing Adjacency List, Rel Property Lists. @@ -80,7 +40,6 @@ struct ListHandle { * actual physical location in the Lists file on disk. * */ class Lists : public BaseColumnOrList { - friend class kuzu::testing::CopyCSVEmptyListsTest; friend class ListsUpdateIterator; friend class ListsUpdateIteratorFactory; @@ -118,8 +77,10 @@ class Lists : public BaseColumnOrList { } virtual inline void rollbackInMemoryIfNecessary() { metadata.rollbackInMemoryIfNecessary(); } virtual inline bool mayContainNulls() const { return true; } - virtual inline void setDeletedRelsIfNecessary(Transaction* transaction, - ListSyncState& listSyncState, const shared_ptr& valueVector) {} + virtual inline void setDeletedRelsIfNecessary(Transaction* transaction, ListHandle& listHandle, + const shared_ptr& valueVector) { + // DO NOTHING. + } virtual void readValues(const shared_ptr& valueVector, ListHandle& listHandle); virtual void readFromSmallList( const shared_ptr& valueVector, ListHandle& listHandle); @@ -130,15 +91,15 @@ class Lists : public BaseColumnOrList { TransactionType transactionType, node_offset_t nodeOffset); void initListReadingState( node_offset_t nodeOffset, ListHandle& listHandle, TransactionType transactionType); - unique_ptr getInMemListWithDataFromUpdateStoreOnly( + unique_ptr createInMemListWithDataFromUpdateStoreOnly( node_offset_t nodeOffset, vector& insertedRelsTupleIdxInFT); // This function writes the persistent store data (skipping over the deleted rels) and update // store data to the inMemList. unique_ptr writeToInMemList(node_offset_t nodeOffset, const vector& insertedRelTupleIdxesInFT, const unordered_set& deletedRelOffsetsForList); - void fillInMemListsFromPersistentStore(CursorAndMapper& cursorAndMapper, - uint64_t numElfementsInPersistentStore, InMemList& inMemList, + void fillInMemListsFromPersistentStore(node_offset_t nodeOffset, + uint64_t numElementsInPersistentStore, InMemList& inMemList, const unordered_set& deletedRelOffsetsInList); protected: @@ -228,7 +189,7 @@ class AdjLists : public Lists { inline bool mayContainNulls() const override { return false; } - void readValues(const shared_ptr& valueVector, ListHandle& listSyncState) override; + void readValues(const shared_ptr& valueVector, ListHandle& listHandle) override; // Currently, used only in copyCSV tests. unique_ptr> readAdjacencyListOfNode(node_offset_t nodeOffset); @@ -253,7 +214,7 @@ class AdjLists : public Lists { void readFromSmallList( const shared_ptr& valueVector, ListHandle& listHandle) override; void readFromListsUpdateStore( - ListSyncState& listSyncState, const shared_ptr& valueVector); + ListHandle& listHandle, const shared_ptr& valueVector); void readFromListsPersistentStore( ListHandle& listHandle, const shared_ptr& valueVector); @@ -269,7 +230,7 @@ class RelIDList : public Lists { BufferManager& bufferManager, bool isInMemory, WAL* wal, ListsUpdateStore* listsUpdateStore) : Lists{storageStructureIDAndFName, dataType, elementSize, std::move(headers), bufferManager, isInMemory, wal, listsUpdateStore} {} - void setDeletedRelsIfNecessary(Transaction* transaction, ListSyncState& listSyncState, + void setDeletedRelsIfNecessary(Transaction* transaction, ListHandle& listHandle, const shared_ptr& relIDVector) override; unordered_set getDeletedRelOffsetsInListForNodeOffset(node_offset_t nodeOffset); }; diff --git a/src/include/storage/storage_structure/lists/lists_metadata.h b/src/include/storage/storage_structure/lists/lists_metadata.h index b668323553..ca94be48c7 100644 --- a/src/include/storage/storage_structure/lists/lists_metadata.h +++ b/src/include/storage/storage_structure/lists/lists_metadata.h @@ -3,19 +3,12 @@ #include "storage/storage_structure/disk_array.h" #include "storage/storage_structure/storage_structure.h" -using namespace std; using namespace kuzu::common; namespace spdlog { class logger; } -namespace kuzu { -namespace testing { -class CopyCSVEmptyListsTest; -} // namespace testing -} // namespace kuzu - namespace kuzu { namespace storage { @@ -41,7 +34,6 @@ class BaseListsMetadata { }; class ListsMetadata : public BaseListsMetadata { - friend class kuzu::testing::CopyCSVEmptyListsTest; friend class ListsUpdateIterator; public: diff --git a/src/include/storage/storage_structure/lists/lists_update_store.h b/src/include/storage/storage_structure/lists/lists_update_store.h index a4c4ba36a3..57f9cb41b5 100644 --- a/src/include/storage/storage_structure/lists/lists_update_store.h +++ b/src/include/storage/storage_structure/lists/lists_update_store.h @@ -7,7 +7,7 @@ #include "common/types/node_id_t.h" #include "common/types/types.h" #include "processor/result/factorized_table.h" -#include "storage/storage_structure/lists/list_sync_state.h" +#include "storage/storage_structure/lists/list_handle.h" #include "storage/storage_utils.h" namespace kuzu { @@ -18,14 +18,14 @@ using namespace catalog; struct ListUpdates { public: - ListUpdates() : newlyAddedNode{false} {} + ListUpdates() : isNewlyAddedNode{false} {} inline bool hasUpdates() const { - return newlyAddedNode || !insertedRelsTupleIdxInFT.empty() || !deletedRelIDs.empty(); + return isNewlyAddedNode || !insertedRelsTupleIdxInFT.empty() || !deletedRelIDs.empty(); } public: - bool newlyAddedNode; + bool isNewlyAddedNode; vector insertedRelsTupleIdxInFT; unordered_set deletedRelIDs; }; @@ -45,13 +45,9 @@ class ListsUpdateStore { factorizedTable->clear(); initListUpdatesPerTablePerDirection(); } - inline ListUpdatesPerChunk& getListUpdatesPerChunk(ListFileID& listFileID) { - auto relNodeTableAndDir = getRelNodeTableAndDirFromListFileID(listFileID); - return listUpdatesPerTablePerDirection[relNodeTableAndDir.dir].at( - relNodeTableAndDir.srcNodeTableID); - } - inline vector>& getListUpdatesPerTablePerDirection() { - return listUpdatesPerTablePerDirection; + inline map& getListUpdatesPerBoundNodeTableOfDirection( + RelDirection relDirection) { + return listUpdatesPerTablePerDirection[relDirection]; } uint64_t getNumDeletedRels(ListFileID& listFileID, node_offset_t nodeOffset) const; @@ -80,8 +76,8 @@ class ListsUpdateStore { uint64_t getNumInsertedRelsForNodeOffset( ListFileID& listFileID, node_offset_t nodeOffset) const; - void readValues(ListFileID& listFileID, ListSyncState& listSyncState, - shared_ptr valueVector) const; + void readValues( + ListFileID& listFileID, ListHandle& listHandle, shared_ptr valueVector) const; void initNewlyAddedNodes(nodeID_t& nodeID) { for (auto direction : REL_DIRECTIONS) { @@ -89,7 +85,7 @@ class ListsUpdateStore { listUpdatesPerTablePerDirection[direction][nodeID.tableID] [StorageUtils::getListChunkIdx(nodeID.offset)] [nodeID.offset] - .newlyAddedNode = true; + .isNewlyAddedNode = true; } } } @@ -120,9 +116,6 @@ class ListsUpdateStore { static constexpr uint64_t DST_TABLE_ID_IDX_IN_FT = 1; static constexpr uint64_t INTERNAL_REL_ID_IDX_IN_FT = 2; unique_ptr factorizedTable; - shared_ptr srcNodeVector; - shared_ptr dstNodeVector; - shared_ptr nodeDataChunk; vector> listUpdatesPerTablePerDirection; unordered_map propertyIDToColIdxMap; RelTableSchema relTableSchema; diff --git a/src/include/storage/storage_structure/lists/rel_update_store.h b/src/include/storage/storage_structure/lists/rel_update_store.h index 4171dfb9e7..39babc7582 100644 --- a/src/include/storage/storage_structure/lists/rel_update_store.h +++ b/src/include/storage/storage_structure/lists/rel_update_store.h @@ -5,7 +5,7 @@ #include "common/types/node_id_t.h" #include "common/types/types.h" #include "processor/result/factorized_table.h" -#include "storage/storage_structure/lists/list_sync_state.h" +#include "storage/storage_structure/lists/list_handle.h" #include "storage/storage_utils.h" namespace kuzu { diff --git a/src/include/storage/store/node_table.h b/src/include/storage/store/node_table.h index 7e77f54faa..17fe9d5ffd 100644 --- a/src/include/storage/store/node_table.h +++ b/src/include/storage/store/node_table.h @@ -6,10 +6,6 @@ #include "storage/store/nodes_statistics_and_deleted_ids.h" #include "storage/wal/wal.h" -namespace spdlog { -class logger; -} - namespace kuzu { namespace storage { @@ -19,27 +15,27 @@ class NodeTable { NodeTable(NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs, BufferManager& bufferManager, bool isInMemory, WAL* wal, NodeTableSchema* nodeTableSchema); - inline node_offset_t getMaxNodeOffset(Transaction* trx) { + void initializeData(NodeTableSchema* nodeTableSchema, BufferManager& bufferManager, WAL* wal); + + inline node_offset_t getMaxNodeOffset(Transaction* trx) const { return nodesStatisticsAndDeletedIDs->getMaxNodeOffset(trx, tableID); } - - inline void setSelVectorForDeletedOffsets(Transaction* trx, shared_ptr& vector) { + inline void setSelVectorForDeletedOffsets( + Transaction* trx, shared_ptr& vector) const { assert(vector->isSequential()); nodesStatisticsAndDeletedIDs->setDeletedNodeOffsetsForMorsel(trx, vector, tableID); } - void loadColumnsAndListsFromDisk( - NodeTableSchema* nodeTableSchema, BufferManager& bufferManager, WAL* wal); + void scan(Transaction* transaction, const shared_ptr& inputIDVector, + const vector& columnIdxes, vector> outputVectors); inline Column* getPropertyColumn(uint64_t propertyIdx) { return propertyColumns[propertyIdx].get(); } inline PrimaryKeyIndex* getPKIndex() const { return pkIndex.get(); } - inline NodesStatisticsAndDeletedIDs* getNodeStatisticsAndDeletedIDs() const { return nodesStatisticsAndDeletedIDs; } - inline table_id_t getTableID() const { return tableID; } node_offset_t addNodeAndResetProperties(ValueVector* primaryKeyVector); @@ -53,14 +49,12 @@ class NodeTable { private: void deleteNode(node_offset_t nodeOffset, ValueVector* primaryKeyVector, uint32_t pos) const; -public: - NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs; - private: - // This is for structured properties. + // TODO(Guodong): Consider moving statistics and deleted ids to catalog. + NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs; + // This is for properties. vector> propertyColumns; // The index for ID property. - // TODO(Guodong): rename this to primary key index unique_ptr pkIndex; table_id_t tableID; bool isInMemory; diff --git a/src/include/storage/store/rel_table.h b/src/include/storage/store/rel_table.h index 55aa266e3c..bdfb080c53 100644 --- a/src/include/storage/store/rel_table.h +++ b/src/include/storage/store/rel_table.h @@ -9,42 +9,153 @@ namespace kuzu { namespace storage { +// TODO(Guodong): remove the distinction between AdjColumn and Column, also AdjLists and Lists. using table_adj_columns_map_t = unordered_map>; using table_adj_lists_map_t = unordered_map>; using table_property_columns_map_t = unordered_map>>; using table_property_lists_map_t = unordered_map>>; -class RelTable { +enum class RelTableDataType : uint8_t { + COLUMNS = 0, + LISTS = 1, +}; + +struct RelTableScanState { +public: + explicit RelTableScanState(table_id_t boundNodeTableID, vector propertyIds, + RelTableDataType relTableDataType) + : relTableDataType{relTableDataType}, boundNodeTableID{boundNodeTableID}, + propertyIds{std::move(propertyIds)} { + if (relTableDataType == RelTableDataType::LISTS) { + syncState = make_unique(); + // The first listHandle is for adj lists. + listHandles.resize(this->propertyIds.size() + 1); + for (auto i = 0u; i < this->propertyIds.size() + 1; i++) { + listHandles[i] = make_unique(*syncState); + } + } + } + + bool hasMoreAndSwitchSourceIfNecessary() const { + return relTableDataType == RelTableDataType::LISTS && + syncState->hasMoreAndSwitchSourceIfNecessary(); + } + + RelTableDataType relTableDataType; + table_id_t boundNodeTableID; + vector propertyIds; + // sync state between adj and property lists + unique_ptr syncState; + vector> listHandles; +}; +class DirectedRelTableData { +public: + DirectedRelTableData(table_id_t tableID, RelDirection direction, + ListsUpdateStore* listsUpdateStore, bool isInMemoryMode) + : tableID{tableID}, direction{direction}, listsUpdateStore{listsUpdateStore}, + isInMemoryMode{isInMemoryMode} {} + + inline bool hasAdjColumn(table_id_t boundNodeTableID) { + return adjColumns.contains(boundNodeTableID); + } + inline bool hasAdjLists(table_id_t boundNodeTableID) { + return adjLists.contains(boundNodeTableID); + } + inline uint32_t getNumPropertyLists(table_id_t boundNodeTableID) { + return propertyLists.at(boundNodeTableID).size(); + } + + void initializeData(RelTableSchema* tableSchema, BufferManager& bufferManager, WAL* wal); + void initializeColumnsForBoundNodeTable(RelTableSchema* tableSchema, + table_id_t boundNodeTableID, NodeIDCompressionScheme& nodeIDCompressionScheme, + BufferManager& bufferManager, WAL* wal); + void initializeListsForBoundNodeTabl(RelTableSchema* tableSchema, table_id_t boundNodeTableID, + NodeIDCompressionScheme& nodeIDCompressionScheme, BufferManager& bufferManager, WAL* wal); + Column* getPropertyColumn(table_id_t boundNodeTableID, uint64_t propertyIdx); + Lists* getPropertyLists(table_id_t boundNodeTableID, uint64_t propertyIdx); + AdjColumn* getAdjColumn(table_id_t boundNodeTableID); + AdjLists* getAdjLists(table_id_t boundNodeTableID); + + inline void scan(Transaction* transaction, RelTableScanState& scanState, + const shared_ptr& inNodeIDVector, + vector>& outputVectors) { + if (scanState.relTableDataType == RelTableDataType::COLUMNS) { + scanColumns(transaction, scanState, inNodeIDVector, outputVectors); + } else { + scanLists(transaction, scanState, inNodeIDVector, outputVectors); + } + } + + void insertRel(table_id_t boundTableID, const shared_ptr& boundVector, + const shared_ptr& nbrVector, + const vector>& relPropertyVectors); + void deleteRel(table_id_t boundTableID, const shared_ptr& boundVector); + void performOpOnListsWithUpdates(const std::function& opOnListsWithUpdates); + vector> getListsUpdateIterators(table_id_t srcTableID); + +private: + void scanColumns(Transaction* transaction, RelTableScanState& scanState, + const shared_ptr& inNodeIDVector, + vector>& outputVectors); + void scanLists(Transaction* transaction, RelTableScanState& scanState, + const shared_ptr& inNodeIDVector, + vector>& outputVectors); + +private: + table_property_columns_map_t propertyColumns; + table_adj_columns_map_t adjColumns; + table_property_lists_map_t propertyLists; + table_adj_lists_map_t adjLists; + table_id_t tableID; + RelDirection direction; + ListsUpdateStore* listsUpdateStore; + bool isInMemoryMode; +}; + +class RelTable { public: RelTable(const catalog::Catalog& catalog, table_id_t tableID, BufferManager& bufferManager, MemoryManager& memoryManager, bool isInMemoryMode, WAL* wal); - void loadColumnsAndListsFromDisk(const catalog::Catalog& catalog, BufferManager& bufferManager); + void initializeData(RelTableSchema* tableSchema, BufferManager& bufferManager); -public: inline Column* getPropertyColumn( RelDirection relDirection, table_id_t boundNodeTableID, uint64_t propertyIdx) { - return propertyColumns[relDirection].at(boundNodeTableID)[propertyIdx].get(); + return relDirection == FWD ? + fwdRelTableData->getPropertyColumn(boundNodeTableID, propertyIdx) : + bwdRelTableData->getPropertyColumn(boundNodeTableID, propertyIdx); } inline Lists* getPropertyLists( RelDirection relDirection, table_id_t boundNodeTableID, uint64_t propertyIdx) { - return propertyLists[relDirection].at(boundNodeTableID)[propertyIdx].get(); + return relDirection == FWD ? + fwdRelTableData->getPropertyLists(boundNodeTableID, propertyIdx) : + bwdRelTableData->getPropertyLists(boundNodeTableID, propertyIdx); + } + inline uint32_t getNumPropertyLists(RelDirection relDirection, table_id_t boundNodeTableID) { + return relDirection == FWD ? fwdRelTableData->getNumPropertyLists(boundNodeTableID) : + bwdRelTableData->getNumPropertyLists(boundNodeTableID); } inline bool hasAdjColumn(RelDirection relDirection, table_id_t boundNodeTableID) { - return adjColumns[relDirection].contains(boundNodeTableID); + return relDirection == FWD ? fwdRelTableData->hasAdjColumn(boundNodeTableID) : + bwdRelTableData->hasAdjColumn(boundNodeTableID); } inline AdjColumn* getAdjColumn(RelDirection relDirection, table_id_t boundNodeTableID) { - return adjColumns[relDirection].at(boundNodeTableID).get(); + return relDirection == FWD ? fwdRelTableData->getAdjColumn(boundNodeTableID) : + bwdRelTableData->getAdjColumn(boundNodeTableID); } inline bool hasAdjList(RelDirection relDirection, table_id_t boundNodeTableID) { - return adjLists[relDirection].contains(boundNodeTableID); + return relDirection == FWD ? fwdRelTableData->hasAdjLists(boundNodeTableID) : + bwdRelTableData->hasAdjLists(boundNodeTableID); } inline AdjLists* getAdjLists(RelDirection relDirection, table_id_t boundNodeTableID) { - return adjLists[relDirection].at(boundNodeTableID).get(); + return relDirection == FWD ? fwdRelTableData->getAdjLists(boundNodeTableID) : + bwdRelTableData->getAdjLists(boundNodeTableID); } - inline ListsUpdateStore* getListsUpdateStore() { return listsUpdateStore.get(); } inline table_id_t getRelTableID() const { return tableID; } + inline DirectedRelTableData* getDirectedTableData(RelDirection relDirection) { + return relDirection == FWD ? fwdRelTableData.get() : bwdRelTableData.get(); + } vector getAdjListsForNodeTable(table_id_t tableID); vector getAdjColumnsForNodeTable(table_id_t tableID); @@ -63,28 +174,16 @@ class RelTable { private: inline void addToUpdatedRelTables() { wal->addToUpdatedRelTables(tableID); } inline void clearListsUpdateStore() { listsUpdateStore->clear(); } - void initAdjColumnOrLists( - const catalog::Catalog& catalog, BufferManager& bufferManager, WAL* wal); - void initPropertyListsAndColumns( - const catalog::Catalog& catalog, BufferManager& bufferManager, WAL* wal); - void initPropertyColumnsForRelTable(const catalog::Catalog& catalog, RelDirection relDirection, - BufferManager& bufferManager, WAL* wal); - void initPropertyListsForRelTable(const catalog::Catalog& catalog, RelDirection relDirection, - BufferManager& bufferManager, WAL* wal); - void performOpOnListsWithUpdates( - std::function opOnListsWithUpdates, std::function opIfHasUpdates); - string inferRelMultiplicity(table_id_t srcTableID, table_id_t dstTableID); + void performOpOnListsWithUpdates(const std::function& opOnListsWithUpdates, + const std::function& opIfHasUpdates); vector> getListsUpdateIterators( RelDirection relDirection, table_id_t srcTableID) const; + void prepareCommitForDirection(RelDirection relDirection); private: - shared_ptr logger; table_id_t tableID; - vector propertyColumns; - vector adjColumns; - vector propertyLists; - vector adjLists; - bool isInMemoryMode; + unique_ptr fwdRelTableData; + unique_ptr bwdRelTableData; unique_ptr listsUpdateStore; WAL* wal; }; diff --git a/src/include/storage/wal/wal_record.h b/src/include/storage/wal/wal_record.h index a9704caf15..6454d5fc60 100644 --- a/src/include/storage/wal/wal_record.h +++ b/src/include/storage/wal/wal_record.h @@ -99,6 +99,7 @@ struct ListFileID { } }; +// TODO(Guodong): Rename to NodePropertyColumnID struct StructuredNodePropertyColumnID { table_id_t tableID; uint32_t propertyID; @@ -133,7 +134,7 @@ struct RelPropertyColumnID { RelPropertyColumnID() = default; RelPropertyColumnID(RelNodeTableAndDir relNodeTableAndDir, uint32_t propertyID) - : relNodeTableAndDir{relNodeTableAndDir}, propertyID{move(propertyID)} {} + : relNodeTableAndDir{relNodeTableAndDir}, propertyID{std::move(propertyID)} {} inline bool operator==(const RelPropertyColumnID& rhs) const { return relNodeTableAndDir == rhs.relNodeTableAndDir && propertyID == rhs.propertyID; @@ -151,11 +152,11 @@ struct ColumnFileID { ColumnFileID() = default; explicit ColumnFileID(StructuredNodePropertyColumnID structuredNodePropertyColumnID) - : columnType{STRUCTURED_NODE_PROPERTY_COLUMN}, structuredNodePropertyColumnID{ - move(structuredNodePropertyColumnID)} {} + : columnType{STRUCTURED_NODE_PROPERTY_COLUMN}, structuredNodePropertyColumnID{std::move( + structuredNodePropertyColumnID)} {} explicit ColumnFileID(AdjColumnID adjColumnID) - : columnType{ADJ_COLUMN}, adjColumnID{move(adjColumnID)} {} + : columnType{ADJ_COLUMN}, adjColumnID{std::move(adjColumnID)} {} explicit ColumnFileID(RelPropertyColumnID relPropertyColumnID) : columnType{REL_PROPERTY_COLUMN}, relPropertyColumnID{relPropertyColumnID} {} diff --git a/src/include/storage/wal_replayer_utils.h b/src/include/storage/wal_replayer_utils.h index b50ddb3607..7e4b86ed40 100644 --- a/src/include/storage/wal_replayer_utils.h +++ b/src/include/storage/wal_replayer_utils.h @@ -18,7 +18,7 @@ class WALReplayerUtils { const string& directory, const map& maxNodeOffsetsPerTable); static void createEmptyDBFilesForNewNodeTable( - Catalog* catalog, table_id_t tableID, string directory); + Catalog* catalog, table_id_t tableID, const string& directory); static inline void replaceNodeFilesWithVersionFromWALIfExists( NodeTableSchema* nodeTableSchema, string directory) { @@ -60,19 +60,19 @@ class WALReplayerUtils { const map& maxNodeOffsetsPerTable, RelDirection relDirection, const string& directory, RelTableSchema* relTableSchema); - static void replaceOriginalColumnFilesWithWALVersionIfExists(string originalColFileName); + static void replaceOriginalColumnFilesWithWALVersionIfExists(const string& originalColFileName); - static void replaceOriginalListFilesWithWALVersionIfExists(string originalListFileName); + static void replaceOriginalListFilesWithWALVersionIfExists(const string& originalListFileName); - static void removeListFilesIfExists(string fileName); + static void removeListFilesIfExists(const string& fileName); - static void removeColumnFilesIfExists(string fileName); + static void removeColumnFilesIfExists(const string& fileName); - static void fileOperationOnNodeFiles(NodeTableSchema* nodeTableSchema, string directory, + static void fileOperationOnNodeFiles(NodeTableSchema* nodeTableSchema, const string& directory, std::function columnFileOperation, std::function listFileOperation); - static void fileOperationOnRelFiles(RelTableSchema* relTableSchema, string directory, + static void fileOperationOnRelFiles(RelTableSchema* relTableSchema, const string& directory, const Catalog* catalog, std::function columnFileOperation, std::function listFileOperation); diff --git a/src/processor/mapper/map_extend.cpp b/src/processor/mapper/map_extend.cpp index 705d83e458..1dbcb32473 100644 --- a/src/processor/mapper/map_extend.cpp +++ b/src/processor/mapper/map_extend.cpp @@ -1,90 +1,50 @@ #include "planner/logical_plan/logical_operator/logical_extend.h" #include "processor/mapper/plan_mapper.h" -#include "processor/operator/generic_extend.h" -#include "processor/operator/scan_column/adj_column_extend.h" -#include "processor/operator/scan_list/scan_rel_table_lists.h" +#include "processor/operator/scan/generic_scan_rel_tables.h" +#include "processor/operator/scan/scan_rel_table_columns.h" +#include "processor/operator/scan/scan_rel_table_lists.h" #include "processor/operator/var_length_extend/var_length_adj_list_extend.h" #include "processor/operator/var_length_extend/var_length_column_extend.h" namespace kuzu { namespace processor { -static vector populatePropertyColumns(table_id_t boundNodeTableID, table_id_t relID, - RelDirection direction, const expression_vector& properties, const RelsStore& relsStore) { - vector propertyColumns; +static vector populatePropertyIds( + table_id_t relID, const expression_vector& properties) { + vector outputColumns; for (auto& expression : properties) { auto propertyExpression = (PropertyExpression*)expression.get(); - auto column = relsStore.getRelPropertyColumn( - direction, relID, boundNodeTableID, propertyExpression->getPropertyID(relID)); - propertyColumns.push_back(column); + outputColumns.push_back(propertyExpression->getPropertyID(relID)); } - return propertyColumns; + return outputColumns; } -static vector populatePropertyLists(table_id_t boundNodeTableID, table_id_t relID, - RelDirection direction, const expression_vector& properties, const RelsStore& relsStore) { - vector propertyLists; - for (auto& expression : properties) { - auto propertyExpression = (PropertyExpression*)expression.get(); - auto list = relsStore.getRelPropertyLists( - direction, boundNodeTableID, relID, propertyExpression->getPropertyID(relID)); - propertyLists.push_back(list); - } - return propertyLists; -} - -static unique_ptr populateAdjCollection(table_id_t boundNodeTableID, - const RelExpression& rel, RelDirection direction, const RelsStore& relsStore) { - vector adjColumns; - vector adjLists; - for (auto relTableID : rel.getTableIDs()) { - if (relsStore.hasAdjColumn(direction, boundNodeTableID, relTableID)) { - adjColumns.push_back(relsStore.getAdjColumn(direction, boundNodeTableID, relTableID)); - } - if (relsStore.hasAdjList(direction, boundNodeTableID, relTableID)) { - adjLists.push_back(relsStore.getAdjLists(direction, boundNodeTableID, relTableID)); - } - } - return make_unique(std::move(adjColumns), std::move(adjLists)); -} - -static unique_ptr populatePropertyCollection(table_id_t boundNodeTableID, - const RelExpression& rel, RelDirection direction, const PropertyExpression& propertyExpression, +static unique_ptr populateRelTableCollection(table_id_t boundNodeTableID, + const RelExpression& rel, RelDirection direction, const expression_vector& properties, const RelsStore& relsStore) { - vector propertyColumns; - vector propertyLists; + vector tables; + vector> tableScanStates; for (auto relTableID : rel.getTableIDs()) { - if (relsStore.hasAdjColumn(direction, boundNodeTableID, relTableID)) { - Column* propertyColumn = nullptr; - if (propertyExpression.hasPropertyID(relTableID)) { - propertyColumn = relsStore.getRelPropertyColumn(direction, relTableID, - boundNodeTableID, propertyExpression.getPropertyID(relTableID)); - } - propertyColumns.push_back(propertyColumn); + auto relTable = relsStore.getRelTable(relTableID); + if (!relTable->hasAdjColumn(direction, boundNodeTableID) && + !relTable->hasAdjList(direction, boundNodeTableID)) { + continue; } - if (relsStore.hasAdjList(direction, boundNodeTableID, relTableID)) { - Lists* propertyList = nullptr; - if (propertyExpression.hasPropertyID(relTableID)) { - propertyList = relsStore.getRelPropertyLists(direction, boundNodeTableID, - relTableID, propertyExpression.getPropertyID(relTableID)); - } - propertyLists.push_back(propertyList); + tables.push_back(relsStore.getRelTable(relTableID)->getDirectedTableData(direction)); + vector propertyIds; + for (auto& property : properties) { + auto propertyExpression = reinterpret_cast(property.get()); + propertyIds.push_back(propertyExpression->hasPropertyID(relTableID) ? + propertyExpression->getPropertyID(relTableID) : + INVALID_PROPERTY_ID); } + tableScanStates.push_back( + make_unique(boundNodeTableID, std::move(propertyIds), + relsStore.hasAdjColumn(direction, boundNodeTableID, relTableID) ? + RelTableDataType::COLUMNS : + RelTableDataType::LISTS)); } - return make_unique( - std::move(propertyColumns), std::move(propertyLists)); -} - -static vector> populatePropertyCollections( - table_id_t boundNodeTableID, const RelExpression& rel, RelDirection direction, - const expression_vector& properties, const RelsStore& relsStore) { - vector> propertyCollections; - for (auto& expression : properties) { - auto propertyExpression = (PropertyExpression*)expression.get(); - propertyCollections.push_back(populatePropertyCollection( - boundNodeTableID, rel, direction, *propertyExpression, relsStore)); - } - return propertyCollections; + return make_unique(std::move(tables), std::move(tableScanStates)); } unique_ptr PlanMapper::mapLogicalExtendToPhysical( @@ -101,9 +61,10 @@ unique_ptr PlanMapper::mapLogicalExtendToPhysical( DataPos(inSchema->getExpressionPos(*boundNode->getInternalIDProperty())); auto outNodeIDVectorPos = DataPos(outSchema->getExpressionPos(*nbrNode->getInternalIDProperty())); - vector outPropertyVectorsPos; + vector outputVectorsPos; + outputVectorsPos.push_back(outNodeIDVectorPos); for (auto& expression : extend->getProperties()) { - outPropertyVectorsPos.emplace_back(outSchema->getExpressionPos(*expression)); + outputVectorsPos.emplace_back(outSchema->getExpressionPos(*expression)); } auto& relsStore = storageManager.getRelsStore(); if (!rel->isMultiLabeled() && !boundNode->isMultiLabeled()) { @@ -116,12 +77,11 @@ unique_ptr PlanMapper::mapLogicalExtendToPhysical( adjColumn, rel->getLowerBound(), rel->getUpperBound(), std::move(prevOperator), getOperatorID(), extend->getExpressionsForPrinting()); } else { - auto propertyColumns = populatePropertyColumns( - boundNodeTableID, relTableID, direction, extend->getProperties(), relsStore); - return make_unique(inNodeIDVectorPos, - outNodeIDVectorPos, std::move(outPropertyVectorsPos), adjColumn, - std::move(propertyColumns), std::move(prevOperator), getOperatorID(), - extend->getExpressionsForPrinting()); + auto propertyIds = populatePropertyIds(relTableID, extend->getProperties()); + return make_unique(boundNodeTableID, + relsStore.getRelTable(relTableID)->getDirectedTableData(direction), + std::move(propertyIds), inNodeIDVectorPos, std::move(outputVectorsPos), + std::move(prevOperator), getOperatorID(), extend->getExpressionsForPrinting()); } } else { assert(relsStore.hasAdjList(direction, boundNodeTableID, relTableID)); @@ -131,28 +91,26 @@ unique_ptr PlanMapper::mapLogicalExtendToPhysical( adjList, rel->getLowerBound(), rel->getUpperBound(), std::move(prevOperator), getOperatorID(), extend->getExpressionsForPrinting()); } else { - auto propertyLists = populatePropertyLists( - boundNodeTableID, relTableID, direction, extend->getProperties(), relsStore); - return make_unique(inNodeIDVectorPos, outNodeIDVectorPos, - std::move(outPropertyVectorsPos), adjList, std::move(propertyLists), + auto propertyIds = populatePropertyIds(relTableID, extend->getProperties()); + return make_unique(boundNodeTableID, + relsStore.getRelTable(relTableID)->getDirectedTableData(direction), + std::move(propertyIds), inNodeIDVectorPos, std::move(outputVectorsPos), std::move(prevOperator), getOperatorID(), extend->getExpressionsForPrinting()); } } } else { // map to generic extend - unordered_map> - adjAndPropertyCollectionPerNodeTable; + unordered_map> relTableCollectionPerNodeTable; for (auto boundNodeTableID : boundNode->getTableIDs()) { - auto adjCollection = - populateAdjCollection(boundNodeTableID, *rel, direction, relsStore); - auto propertyCollections = populatePropertyCollections( + auto relTableCollection = populateRelTableCollection( boundNodeTableID, *rel, direction, extend->getProperties(), relsStore); - adjAndPropertyCollectionPerNodeTable.insert( - {boundNodeTableID, make_unique( - std::move(adjCollection), std::move(propertyCollections))}); + if (relTableCollection->getNumTablesInCollection() > 0) { + relTableCollectionPerNodeTable.insert( + {boundNodeTableID, std::move(relTableCollection)}); + } } - return make_unique(inNodeIDVectorPos, outNodeIDVectorPos, - outPropertyVectorsPos, std::move(adjAndPropertyCollectionPerNodeTable), - std::move(prevOperator), getOperatorID(), extend->getExpressionsForPrinting()); + return make_unique(inNodeIDVectorPos, outputVectorsPos, + std::move(relTableCollectionPerNodeTable), std::move(prevOperator), getOperatorID(), + extend->getExpressionsForPrinting()); } } diff --git a/src/processor/mapper/map_scan_node_property.cpp b/src/processor/mapper/map_scan_node_property.cpp index 02285d1faa..1784f1193e 100644 --- a/src/processor/mapper/map_scan_node_property.cpp +++ b/src/processor/mapper/map_scan_node_property.cpp @@ -1,6 +1,6 @@ #include "planner/logical_plan/logical_operator/logical_scan_node_property.h" #include "processor/mapper/plan_mapper.h" -#include "processor/operator/scan_column/scan_node_properties.h" +#include "processor/operator/scan/scan_node_table.h" namespace kuzu { namespace processor { @@ -19,34 +19,34 @@ unique_ptr PlanMapper::mapLogicalScanNodePropertyToPhysical( outVectorsPos.emplace_back(outSchema->getExpressionPos(*expression)); } if (node->isMultiLabeled()) { - unordered_map> tableIDToColumns; + unordered_map> tableIDToColumns; + unordered_map tables; for (auto& tableID : node->getTableIDs()) { - vector columns; + tables.insert({tableID, nodeStore.getNodeTable(tableID)}); + vector columns; for (auto& expression : scanProperty.getProperties()) { auto property = static_pointer_cast(expression); if (!property->hasPropertyID(tableID)) { - columns.push_back(nullptr); + columns.push_back(UINT32_MAX); } else { - columns.push_back( - nodeStore.getNodePropertyColumn(tableID, property->getPropertyID(tableID))); + columns.push_back(property->getPropertyID(tableID)); } } tableIDToColumns.insert({tableID, std::move(columns)}); } - return make_unique(inputNodeIDVectorPos, - std::move(outVectorsPos), std::move(tableIDToColumns), std::move(prevOperator), + return make_unique(inputNodeIDVectorPos, std::move(outVectorsPos), + std::move(tables), std::move(tableIDToColumns), std::move(prevOperator), getOperatorID(), scanProperty.getExpressionsForPrinting()); } else { auto tableID = node->getSingleTableID(); - vector columns; + vector columnIds; for (auto& expression : scanProperty.getProperties()) { auto property = static_pointer_cast(expression); - columns.push_back( - nodeStore.getNodePropertyColumn(tableID, property->getPropertyID(tableID))); + columnIds.push_back(property->getPropertyID(tableID)); } - return make_unique(inputNodeIDVectorPos, - std::move(outVectorsPos), std::move(columns), std::move(prevOperator), getOperatorID(), - scanProperty.getExpressionsForPrinting()); + return make_unique(inputNodeIDVectorPos, std::move(outVectorsPos), + nodeStore.getNodeTable(tableID), std::move(columnIds), std::move(prevOperator), + getOperatorID(), scanProperty.getExpressionsForPrinting()); } } diff --git a/src/processor/operator/CMakeLists.txt b/src/processor/operator/CMakeLists.txt index c666879762..2ae8ef29b7 100644 --- a/src/processor/operator/CMakeLists.txt +++ b/src/processor/operator/CMakeLists.txt @@ -4,21 +4,17 @@ add_subdirectory(ddl) add_subdirectory(hash_join) add_subdirectory(intersect) add_subdirectory(order_by) -add_subdirectory(scan_column) -add_subdirectory(scan_list) +add_subdirectory(scan) add_subdirectory(table_scan) add_subdirectory(update) add_subdirectory(var_length_extend) add_library(kuzu_processor_operator OBJECT - base_extend.cpp - base_table_scan.cpp cross_product.cpp filter.cpp filtering_operator.cpp flatten.cpp - generic_extend.cpp index_scan.cpp limit.cpp multiplicity_reducer.cpp diff --git a/src/processor/operator/base_extend.cpp b/src/processor/operator/base_extend.cpp deleted file mode 100644 index adceb3ce89..0000000000 --- a/src/processor/operator/base_extend.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "processor/operator/base_extend.h" - -namespace kuzu { -namespace processor { - -void BaseExtendAndScanRelProperties::initLocalStateInternal( - ResultSet* resultSet, ExecutionContext* context) { - inNodeIDVector = resultSet->getValueVector(inNodeIDVectorPos); - outNodeIDVector = resultSet->getValueVector(outNodeIDVectorPos); - for (auto& dataPos : outPropertyVectorsPos) { - auto vector = resultSet->getValueVector(dataPos); - outPropertyVectors.push_back(vector); - } -} - -} // namespace processor -} // namespace kuzu diff --git a/src/processor/operator/generic_extend.cpp b/src/processor/operator/generic_extend.cpp deleted file mode 100644 index 6bad0ecb2c..0000000000 --- a/src/processor/operator/generic_extend.cpp +++ /dev/null @@ -1,210 +0,0 @@ -#include "processor/operator/generic_extend.h" - -namespace kuzu { -namespace processor { - -void ColumnAndListCollection::populateListHandles(ListSyncState& syncState) { - assert(listHandles.empty()); - for (auto _ : lists) { - listHandles.push_back(make_shared(syncState)); - } -} - -void AdjAndPropertyCollection::populateListHandles() { - listSyncState = make_unique(); - adjCollection->populateListHandles(*listSyncState); - for (auto& propertyCollection : propertyCollections) { - assert(propertyCollection->lists.size() == adjCollection->lists.size()); - propertyCollection->populateListHandles(*listSyncState); - } -} - -void AdjAndPropertyCollection::resetState(node_offset_t nodeOffset) { - nextColumnIdx = 0; - nextListIdx = 0; - currentNodeOffset = nodeOffset; - currentListIdx = UINT32_MAX; -} - -bool AdjAndPropertyCollection::scan(const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction) { - if (scanColumns(inVector, outNodeVector, outPropertyVectors, transaction)) { - return true; - } - if (scanLists(inVector, outNodeVector, outPropertyVectors, transaction)) { - return true; - } - return false; -} - -bool AdjAndPropertyCollection::scanColumns(const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction) { - while (hasColumnToScan()) { - if (scanColumn(nextColumnIdx, inVector, outNodeVector, outPropertyVectors, transaction)) { - nextColumnIdx++; - return true; - } - nextColumnIdx++; - } - return false; -} - -bool AdjAndPropertyCollection::scanLists(const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction) { - if (currentListIdx != UINT32_MAX) { // check current list - auto currentAdjList = adjCollection->lists[currentListIdx]; - auto currentAdjListHandle = adjCollection->listHandles[currentListIdx].get(); - if (currentAdjListHandle->listSyncState.hasMoreAndSwitchSourceIfNecessary()) { - // scan current adjList - currentAdjList->readValues(outNodeVector, *currentAdjListHandle); - scanPropertyList(currentListIdx, outPropertyVectors, transaction); - return true; - } else { - // no more to scan on current list, move to next list. - nextListIdx++; - currentListIdx = UINT32_MAX; - } - } - while (hasListToScan()) { - if (scanList(nextListIdx, inVector, outNodeVector, outPropertyVectors, transaction)) { - return true; - } - nextListIdx++; - } - return false; -} - -bool AdjAndPropertyCollection::scanColumn(uint32_t idx, const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction) { - auto selVector = outNodeVector->state->selVector.get(); - if (selVector->isUnfiltered()) { - selVector->resetSelectorToValuePosBuffer(); - } - // We need to sync output vector state with input vector because we always write output to a new - // data chunk and thus they don't share state. - auto inVectorCurrentIdx = inVector->state->selVector->selectedPositions[0]; - selVector->selectedPositions[0] = inVectorCurrentIdx; - selVector->selectedSize = 1; - auto adjColumn = adjCollection->columns[idx]; - // scan adjColumn - adjColumn->read(transaction, inVector, outNodeVector); - if (outNodeVector->isNull(selVector->selectedPositions[0])) { - return false; - } - // scan propertyColumns - for (auto i = 0u; i < propertyCollections.size(); ++i) { - auto propertyColumn = propertyCollections[i]->columns[idx]; - auto& propertyVector = outPropertyVectors[i]; - propertyVector->resetOverflowBuffer(); - if (propertyColumn == nullptr) { - propertyVector->setAllNull(); - } else { - propertyColumn->read(transaction, inVector, propertyVector); - } - } - return true; -} - -bool AdjAndPropertyCollection::scanList(uint32_t idx, const shared_ptr& inVector, - const shared_ptr& outNodeVector, - const vector>& outPropertyVectors, Transaction* transaction) { - auto selVector = outNodeVector->state->selVector.get(); - if (!selVector->isUnfiltered()) { - selVector->resetSelectorToUnselected(); - } - auto adjList = (AdjLists*)adjCollection->lists[idx]; - auto adjListHandle = adjCollection->listHandles[idx].get(); - // scan adjList - adjList->initListReadingState(currentNodeOffset, *adjListHandle, transaction->getType()); - adjList->readValues(outNodeVector, *adjListHandle); - if (selVector->selectedSize == 0) { - return false; - } - currentListIdx = idx; - scanPropertyList(idx, outPropertyVectors, transaction); - return selVector->selectedSize != 0; -} - -void AdjAndPropertyCollection::scanPropertyList(uint32_t idx, - const vector>& outPropertyVectors, Transaction* transaction) { - for (auto i = 0u; i < propertyCollections.size(); ++i) { - auto propertyList = propertyCollections[i]->lists[idx]; - auto propertyListHandle = propertyCollections[i]->listHandles[idx].get(); - auto& propertyVector = outPropertyVectors[i]; - propertyVector->resetOverflowBuffer(); - if (propertyList == nullptr) { - outPropertyVectors[i]->setAllNull(); - } else { - propertyList->readValues(propertyVector, *propertyListHandle); - propertyList->setDeletedRelsIfNecessary( - transaction, propertyListHandle->listSyncState, propertyVector); - } - } -} - -unique_ptr AdjAndPropertyCollection::clone() const { - auto clonedAdjCollection = - make_unique(adjCollection->columns, adjCollection->lists); - vector> clonedPropertyCollections; - for (auto& propertyCollection : propertyCollections) { - clonedPropertyCollections.push_back(make_unique( - propertyCollection->columns, propertyCollection->lists)); - } - return make_unique( - std::move(clonedAdjCollection), std::move(clonedPropertyCollections)); -} - -void GenericExtendAndScanRelProperties::initLocalStateInternal( - ResultSet* resultSet, ExecutionContext* context) { - BaseExtendAndScanRelProperties::initLocalStateInternal(resultSet, context); - for (auto& [_, adjAndPropertyCollection] : adjAndPropertyCollectionPerNodeTable) { - adjAndPropertyCollection->populateListHandles(); - } - // config local state - currentAdjAndPropertyCollection = nullptr; -} - -bool GenericExtendAndScanRelProperties::getNextTuplesInternal() { - while (true) { - if (scanCurrentAdjAndPropertyCollection()) { - metrics->numOutputTuple.increase(outNodeIDVector->state->selVector->selectedSize); - return true; - } - if (!children[0]->getNextTuple()) { - return false; - } - auto currentIdx = inNodeIDVector->state->selVector->selectedPositions[0]; - if (inNodeIDVector->isNull(currentIdx)) { - outNodeIDVector->state->selVector->selectedSize = 0; - continue; - } - auto nodeID = inNodeIDVector->getValue(currentIdx); - initCurrentAdjAndPropertyCollection(nodeID); - } -} - -bool GenericExtendAndScanRelProperties::scanCurrentAdjAndPropertyCollection() { - if (currentAdjAndPropertyCollection == nullptr) { - return false; - } - return currentAdjAndPropertyCollection->scan( - inNodeIDVector, outNodeIDVector, outPropertyVectors, transaction); -} - -void GenericExtendAndScanRelProperties::initCurrentAdjAndPropertyCollection( - const nodeID_t& nodeID) { - if (adjAndPropertyCollectionPerNodeTable.contains(nodeID.tableID)) { - currentAdjAndPropertyCollection = - adjAndPropertyCollectionPerNodeTable.at(nodeID.tableID).get(); - currentAdjAndPropertyCollection->resetState(nodeID.offset); - } else { - currentAdjAndPropertyCollection = nullptr; - } -} - -} // namespace processor -} // namespace kuzu diff --git a/src/processor/operator/physical_operator.cpp b/src/processor/operator/physical_operator.cpp index fee82ce91a..5739784bb7 100644 --- a/src/processor/operator/physical_operator.cpp +++ b/src/processor/operator/physical_operator.cpp @@ -15,9 +15,6 @@ std::string PhysicalOperatorUtils::operatorTypeToString(PhysicalOperatorType ope case PhysicalOperatorType::AGGREGATE_SCAN: { return "AGGREGATE_SCAN"; } - case PhysicalOperatorType::COLUMN_EXTEND: { - return "COLUMN_EXTEND"; - } case PhysicalOperatorType::COPY_NODE_CSV: { return "COPY_NODE_CSV"; } @@ -57,8 +54,14 @@ std::string PhysicalOperatorUtils::operatorTypeToString(PhysicalOperatorType ope case PhysicalOperatorType::FLATTEN: { return "FLATTEN"; } - case PhysicalOperatorType::GENERIC_EXTEND: { - return "GENERIC_EXTEND"; + case PhysicalOperatorType::SCAN_REL_TABLE_COLUMNS: { + return "SCAN_REL_TABLE_COLUMNS"; + } + case PhysicalOperatorType::SCAN_REL_TABLE_LISTS: { + return "SCAN_REL_TABLE_LISTS"; + } + case PhysicalOperatorType::GENERIC_SCAN_REL_TABLES: { + return "GENERIC_SCAN_REL_TABLES"; } case PhysicalOperatorType::HASH_JOIN_BUILD: { return "HASH_JOIN_BUILD"; @@ -78,9 +81,6 @@ std::string PhysicalOperatorUtils::operatorTypeToString(PhysicalOperatorType ope case PhysicalOperatorType::LIMIT: { return "LIMIT"; } - case PhysicalOperatorType::LIST_EXTEND: { - return "LIST_EXTEND"; - } case PhysicalOperatorType::MULTIPLICITY_REDUCER: { return "MULTIPLICITY_REDUCER"; } diff --git a/src/processor/operator/scan/CMakeLists.txt b/src/processor/operator/scan/CMakeLists.txt new file mode 100644 index 0000000000..03e401451f --- /dev/null +++ b/src/processor/operator/scan/CMakeLists.txt @@ -0,0 +1,13 @@ +add_library(kuzu_processor_operator_scan + OBJECT + generic_scan_rel_tables.cpp + scan_columns.cpp + scan_node_table.cpp + scan_rel_table.cpp + scan_rel_table_columns.cpp + scan_rel_table_lists.cpp + ) + +set(ALL_OBJECT_FILES + ${ALL_OBJECT_FILES} $ + PARENT_SCOPE) diff --git a/src/processor/operator/scan/generic_scan_rel_tables.cpp b/src/processor/operator/scan/generic_scan_rel_tables.cpp new file mode 100644 index 0000000000..06a6913698 --- /dev/null +++ b/src/processor/operator/scan/generic_scan_rel_tables.cpp @@ -0,0 +1,90 @@ +#include "processor/operator/scan/generic_scan_rel_tables.h" + +namespace kuzu { +namespace processor { + +void RelTableCollection::resetState() { + currentRelTableIdxToScan = 0; + nextRelTableIdxToScan = 0; +} + +bool RelTableCollection::scan(const shared_ptr& inVector, + vector>& outputVectors, Transaction* transaction) { + do { + if (tableScanStates[currentRelTableIdxToScan]->hasMoreAndSwitchSourceIfNecessary()) { + assert(tableScanStates[currentRelTableIdxToScan]->relTableDataType == + storage::RelTableDataType::LISTS); + tables[currentRelTableIdxToScan]->scan( + transaction, *tableScanStates[currentRelTableIdxToScan], inVector, outputVectors); + } else { + currentRelTableIdxToScan = nextRelTableIdxToScan; + if (currentRelTableIdxToScan == tableScanStates.size()) { + return false; + } + if (tableScanStates[currentRelTableIdxToScan]->relTableDataType == + storage::RelTableDataType::COLUMNS) { + outputVectors[0]->state->selVector->resetSelectorToValuePosBufferWithSize(1); + outputVectors[0]->state->selVector->selectedPositions[0] = + inVector->state->selVector->selectedPositions[0]; + } else { + tableScanStates[currentRelTableIdxToScan]->syncState->resetState(); + } + tables[currentRelTableIdxToScan]->scan( + transaction, *tableScanStates[currentRelTableIdxToScan], inVector, outputVectors); + nextRelTableIdxToScan++; + } + } while (outputVectors[0]->state->selVector->selectedSize == 0); + return true; +} + +unique_ptr RelTableCollection::clone() const { + vector> clonedScanStates; + for (auto& scanState : tableScanStates) { + clonedScanStates.push_back(make_unique( + scanState->boundNodeTableID, scanState->propertyIds, scanState->relTableDataType)); + } + return make_unique(tables, std::move(clonedScanStates)); +} + +void GenericScanRelTables::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { + ScanRelTable::initLocalStateInternal(resultSet, context); + currentRelTableCollection = nullptr; +} + +bool GenericScanRelTables::getNextTuplesInternal() { + while (true) { + if (scanCurrentRelTableCollection()) { + metrics->numOutputTuple.increase(outputVectors[0]->state->selVector->selectedSize); + return true; + } + if (!children[0]->getNextTuple()) { + return false; + } + auto currentIdx = inNodeIDVector->state->selVector->selectedPositions[0]; + if (inNodeIDVector->isNull(currentIdx)) { + outputVectors[0]->state->selVector->selectedSize = 0; + continue; + } + auto nodeID = inNodeIDVector->getValue(currentIdx); + initCurrentRelTableCollection(nodeID); + } +} + +bool GenericScanRelTables::scanCurrentRelTableCollection() { + if (currentRelTableCollection == nullptr) { + return false; + } + return currentRelTableCollection->scan(inNodeIDVector, outputVectors, transaction); +} + +void GenericScanRelTables::initCurrentRelTableCollection(const nodeID_t& nodeID) { + if (relTableCollectionPerNodeTable.contains(nodeID.tableID)) { + currentRelTableCollection = relTableCollectionPerNodeTable.at(nodeID.tableID).get(); + currentRelTableCollection->resetState(); + } else { + currentRelTableCollection = nullptr; + } +} + +} // namespace processor +} // namespace kuzu diff --git a/src/processor/operator/scan_column/scan_column.cpp b/src/processor/operator/scan/scan_columns.cpp similarity index 50% rename from src/processor/operator/scan_column/scan_column.cpp rename to src/processor/operator/scan/scan_columns.cpp index f1326483bd..21e8fbafc1 100644 --- a/src/processor/operator/scan_column/scan_column.cpp +++ b/src/processor/operator/scan/scan_columns.cpp @@ -1,14 +1,10 @@ -#include "processor/operator/scan_column/scan_column.h" +#include "processor/operator/scan/scan_columns.h" namespace kuzu { namespace processor { -void BaseScanColumn::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { +void ScanColumns::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { inputNodeIDVector = resultSet->getValueVector(inputNodeIDVectorPos); -} - -void ScanMultipleColumns::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { - BaseScanColumn::initLocalStateInternal(resultSet, context); for (auto& dataPos : outPropertyVectorsPos) { auto vector = resultSet->getValueVector(dataPos); outPropertyVectors.push_back(vector); diff --git a/src/processor/operator/scan/scan_node_table.cpp b/src/processor/operator/scan/scan_node_table.cpp new file mode 100644 index 0000000000..734d0ce459 --- /dev/null +++ b/src/processor/operator/scan/scan_node_table.cpp @@ -0,0 +1,30 @@ +#include "processor/operator/scan/scan_node_table.h" + +using namespace kuzu::common; + +namespace kuzu { +namespace processor { + +bool ScanSingleNodeTable::getNextTuplesInternal() { + if (!children[0]->getNextTuple()) { + return false; + } + table->scan(transaction, inputNodeIDVector, propertyColumnIds, outPropertyVectors); + return true; +} + +bool ScanMultiNodeTables::getNextTuplesInternal() { + if (!children[0]->getNextTuple()) { + return false; + } + auto tableID = + inputNodeIDVector + ->getValue(inputNodeIDVector->state->selVector->selectedPositions[0]) + .tableID; + tables.at(tableID)->scan( + transaction, inputNodeIDVector, tableIDToScanColumnIds.at(tableID), outPropertyVectors); + return true; +} + +} // namespace processor +} // namespace kuzu diff --git a/src/processor/operator/scan/scan_rel_table.cpp b/src/processor/operator/scan/scan_rel_table.cpp new file mode 100644 index 0000000000..faed76fa95 --- /dev/null +++ b/src/processor/operator/scan/scan_rel_table.cpp @@ -0,0 +1,15 @@ +#include "processor/operator/scan/scan_rel_table.h" + +namespace kuzu { +namespace processor { + +void ScanRelTable::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { + inNodeIDVector = resultSet->getValueVector(inNodeIDVectorPos); + for (auto& dataPos : outputVectorsPos) { + auto vector = resultSet->getValueVector(dataPos); + outputVectors.push_back(vector); + } +} + +} // namespace processor +} // namespace kuzu diff --git a/src/processor/operator/scan/scan_rel_table_columns.cpp b/src/processor/operator/scan/scan_rel_table_columns.cpp new file mode 100644 index 0000000000..502bc6e237 --- /dev/null +++ b/src/processor/operator/scan/scan_rel_table_columns.cpp @@ -0,0 +1,20 @@ +#include "processor/operator/scan/scan_rel_table_columns.h" + +namespace kuzu { +namespace processor { + +bool ScanRelTableColumns::getNextTuplesInternal() { + do { + restoreSelVector(inNodeIDVector->state->selVector); + if (!children[0]->getNextTuple()) { + return false; + } + saveSelVector(inNodeIDVector->state->selVector); + tableData->scan(transaction, *scanState, inNodeIDVector, outputVectors); + } while (inNodeIDVector->state->selVector->selectedSize == 0); + metrics->numOutputTuple.increase(inNodeIDVector->state->selVector->selectedSize); + return true; +} + +} // namespace processor +} // namespace kuzu diff --git a/src/processor/operator/scan/scan_rel_table_lists.cpp b/src/processor/operator/scan/scan_rel_table_lists.cpp new file mode 100644 index 0000000000..b2c0a195b2 --- /dev/null +++ b/src/processor/operator/scan/scan_rel_table_lists.cpp @@ -0,0 +1,24 @@ +#include "processor/operator/scan/scan_rel_table_lists.h" + +namespace kuzu { +namespace processor { + +bool ScanRelTableLists::getNextTuplesInternal() { + do { + if (scanState->syncState->hasMoreAndSwitchSourceIfNecessary()) { + tableData->scan(transaction, *scanState, inNodeIDVector, outputVectors); + metrics->numOutputTuple.increase(outputVectors[0]->state->selVector->selectedSize); + return true; + } + if (!children[0]->getNextTuple()) { + return false; + } + scanState->syncState->resetState(); + tableData->scan(transaction, *scanState, inNodeIDVector, outputVectors); + } while (outputVectors[0]->state->selVector->selectedSize == 0); + metrics->numOutputTuple.increase(outputVectors[0]->state->selVector->selectedSize); + return true; +} + +} // namespace processor +} // namespace kuzu diff --git a/src/processor/operator/scan_column/CMakeLists.txt b/src/processor/operator/scan_column/CMakeLists.txt deleted file mode 100644 index 6ef286f32a..0000000000 --- a/src/processor/operator/scan_column/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_library(kuzu_processor_operator_scan_column - OBJECT - adj_column_extend.cpp - scan_column.cpp - scan_node_properties.cpp) - -set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ - PARENT_SCOPE) diff --git a/src/processor/operator/scan_column/adj_column_extend.cpp b/src/processor/operator/scan_column/adj_column_extend.cpp deleted file mode 100644 index 655f0c9b5e..0000000000 --- a/src/processor/operator/scan_column/adj_column_extend.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include "processor/operator/scan_column/adj_column_extend.h" - -namespace kuzu { -namespace processor { - -bool ColumnExtendAndScanRelProperties::getNextTuplesInternal() { - bool hasAtLeastOneNonNullValue; - // join with adjColumn - do { - restoreSelVector(inNodeIDVector->state->selVector); - if (!children[0]->getNextTuple()) { - return false; - } - saveSelVector(inNodeIDVector->state->selVector); - outNodeIDVector->setAllNull(); - adjColumn->read(transaction, inNodeIDVector, outNodeIDVector); - hasAtLeastOneNonNullValue = NodeIDVector::discardNull(*outNodeIDVector); - } while (!hasAtLeastOneNonNullValue); - // scan column properties - for (auto i = 0u; i < propertyColumns.size(); ++i) { - auto vector = outPropertyVectors[i]; - vector->resetOverflowBuffer(); - propertyColumns[i]->read(transaction, inNodeIDVector, vector); - } - metrics->numOutputTuple.increase(inNodeIDVector->state->selVector->selectedSize); - return true; -} - -} // namespace processor -} // namespace kuzu diff --git a/src/processor/operator/scan_column/scan_node_properties.cpp b/src/processor/operator/scan_column/scan_node_properties.cpp deleted file mode 100644 index 25bd8a9a9f..0000000000 --- a/src/processor/operator/scan_column/scan_node_properties.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "processor/operator/scan_column/scan_node_properties.h" - -using namespace kuzu::common; - -namespace kuzu { -namespace processor { - -bool ScanSingleNodeTableProperties::getNextTuplesInternal() { - if (!children[0]->getNextTuple()) { - return false; - } - for (auto i = 0u; i < propertyColumns.size(); ++i) { - auto vector = outPropertyVectors[i]; - // TODO(Everyone): move resetOverflowBuffer to column & list read? - vector->resetOverflowBuffer(); - propertyColumns[i]->read(transaction, inputNodeIDVector, vector); - } - return true; -} - -bool ScanMultiNodeTableProperties::getNextTuplesInternal() { - if (!children[0]->getNextTuple()) { - return false; - } - auto state = inputNodeIDVector->state; - assert(!state->isFlat()); // Property scans should be sequential and thus on unflat vector only. - auto tableID = - inputNodeIDVector->getValue(state->selVector->selectedPositions[0]).tableID; - auto& columns = tableIDToPropertyColumns.at(tableID); - for (auto i = 0u; i < outPropertyVectors.size(); ++i) { - auto vector = outPropertyVectors[i]; - vector->resetOverflowBuffer(); - if (columns[i] != nullptr) { - columns[i]->read(transaction, inputNodeIDVector, vector); - } else { - vector->setAllNull(); - } - } - return true; -} - -} // namespace processor -} // namespace kuzu diff --git a/src/processor/operator/scan_list/CMakeLists.txt b/src/processor/operator/scan_list/CMakeLists.txt deleted file mode 100644 index 6d5715cebf..0000000000 --- a/src/processor/operator/scan_list/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -add_library(kuzu_processor_operator_scan_list - OBJECT - scan_rel_table_lists.cpp - ) - -set(ALL_OBJECT_FILES - ${ALL_OBJECT_FILES} $ - PARENT_SCOPE) diff --git a/src/processor/operator/scan_list/scan_rel_table_lists.cpp b/src/processor/operator/scan_list/scan_rel_table_lists.cpp deleted file mode 100644 index 81af69812e..0000000000 --- a/src/processor/operator/scan_list/scan_rel_table_lists.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "processor/operator/scan_list/scan_rel_table_lists.h" - -namespace kuzu { -namespace processor { - -void ScanRelTableLists::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { - BaseExtendAndScanRelProperties::initLocalStateInternal(resultSet, context); - syncState = make_unique(); - adjListHandle = make_shared(*syncState); - for (auto& _ : propertyLists) { - propertyListHandles.push_back(make_shared(*syncState)); - } -} - -bool ScanRelTableLists::getNextTuplesInternal() { - if (adjListHandle->listSyncState.hasMoreAndSwitchSourceIfNecessary()) { - adjList->readValues(outNodeIDVector, *adjListHandle); - } else { - do { - if (!children[0]->getNextTuple()) { - return false; - } - auto currentIdx = inNodeIDVector->state->selVector->selectedPositions[0]; - if (inNodeIDVector->isNull(currentIdx)) { - outNodeIDVector->state->selVector->selectedSize = 0; - continue; - } - auto currentNodeOffset = inNodeIDVector->readNodeOffset(currentIdx); - ((AdjLists*)adjList) - ->initListReadingState(currentNodeOffset, *adjListHandle, transaction->getType()); - adjList->readValues(outNodeIDVector, *adjListHandle); - } while (outNodeIDVector->state->selVector->selectedSize == 0); - } - // TODO(Ziyi/Guodong): this is a hidden bug found in this refactor but also exists in master. - // Our protocol is that an operator cannot output empty result. This is violated when - // introducing setDeletedRelsIfNecessary() which might set selectedSize = 0. Let me know if my - // understanding is correct about this. - scanPropertyLists(); - metrics->numOutputTuple.increase(outNodeIDVector->state->selVector->selectedSize); - return true; -} - -void ScanRelTableLists::scanPropertyLists() { - for (auto i = 0u; i < propertyLists.size(); ++i) { - outPropertyVectors[i]->resetOverflowBuffer(); - propertyLists[i]->readValues(outPropertyVectors[i], *propertyListHandles[i]); - propertyLists[i]->setDeletedRelsIfNecessary(transaction, *syncState, outPropertyVectors[i]); - } -} - -} // namespace processor -} // namespace kuzu diff --git a/src/processor/operator/table_scan/CMakeLists.txt b/src/processor/operator/table_scan/CMakeLists.txt index c4767ab9ab..f409eea458 100644 --- a/src/processor/operator/table_scan/CMakeLists.txt +++ b/src/processor/operator/table_scan/CMakeLists.txt @@ -1,5 +1,6 @@ add_library(kuzu_processor_operator_table_scan OBJECT + base_table_scan.cpp union_all_scan.cpp) set(ALL_OBJECT_FILES diff --git a/src/processor/operator/base_table_scan.cpp b/src/processor/operator/table_scan/base_table_scan.cpp similarity index 100% rename from src/processor/operator/base_table_scan.cpp rename to src/processor/operator/table_scan/base_table_scan.cpp diff --git a/src/processor/operator/var_length_extend/var_length_adj_list_extend.cpp b/src/processor/operator/var_length_extend/var_length_adj_list_extend.cpp index 53a29cf2df..e758b893f7 100644 --- a/src/processor/operator/var_length_extend/var_length_adj_list_extend.cpp +++ b/src/processor/operator/var_length_extend/var_length_adj_list_extend.cpp @@ -12,8 +12,8 @@ AdjListExtendDFSLevelInfo::AdjListExtendDFSLevelInfo(uint8_t level, ExecutionCon // DataChunkState to write how many nodes it has read, we create a new DataChunkState and assign // it to children. children->state = make_shared(); - listSyncState = make_shared(); - listHandle = make_shared(*listSyncState); + listSyncState = make_unique(); + listHandle = make_unique(*listSyncState); } void AdjListExtendDFSLevelInfo::reset(uint64_t parent_) { @@ -91,7 +91,7 @@ bool VarLengthAdjListExtend::addDFSLevelToStackIfParentExtends(uint64_t parent, bool VarLengthAdjListExtend::getNextBatchOfNbrNodes( shared_ptr& dfsLevel) const { - if (dfsLevel->listHandle->listSyncState.hasMoreAndSwitchSourceIfNecessary()) { + if (dfsLevel->listHandle->hasMoreAndSwitchSourceIfNecessary()) { ((AdjLists*)storage)->readValues(dfsLevel->children, *dfsLevel->listHandle); return true; } diff --git a/src/storage/buffer_manager/file_handle.cpp b/src/storage/buffer_manager/file_handle.cpp index bef98cb6b2..5ae8490d81 100644 --- a/src/storage/buffer_manager/file_handle.cpp +++ b/src/storage/buffer_manager/file_handle.cpp @@ -62,7 +62,8 @@ bool FileHandle::acquirePageLock(page_idx_t pageIdx, bool block) { bool FileHandle::acquire(page_idx_t pageIdx) { if (pageIdx >= pageLocks.size()) { - throw RuntimeException("pageIdx is >= pageLocks.size()"); + throw RuntimeException( + StringUtils::string_format("pageIdx %d is >= pageLocks.size()", pageIdx)); } auto retVal = !pageLocks[pageIdx]->test_and_set(memory_order_acquire); return retVal; diff --git a/src/storage/in_mem_csv_copier/in_mem_node_csv_copier.cpp b/src/storage/in_mem_csv_copier/in_mem_node_csv_copier.cpp index a4554412ba..25fa2cffda 100644 --- a/src/storage/in_mem_csv_copier/in_mem_node_csv_copier.cpp +++ b/src/storage/in_mem_csv_copier/in_mem_node_csv_copier.cpp @@ -10,7 +10,7 @@ namespace storage { InMemNodeCSVCopier::InMemNodeCSVCopier(CSVDescription& csvDescription, string outputDirectory, TaskScheduler& taskScheduler, Catalog& catalog, table_id_t tableID, NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs) - : InMemStructuresCSVCopier{csvDescription, move(outputDirectory), taskScheduler, catalog}, + : InMemStructuresCSVCopier{csvDescription, std::move(outputDirectory), taskScheduler, catalog}, numNodes{UINT64_MAX}, nodesStatisticsAndDeletedIDs{nodesStatisticsAndDeletedIDs} { nodeTableSchema = catalog.getReadOnlyVersion()->getNodeTableSchema(tableID); } @@ -22,7 +22,7 @@ uint64_t InMemNodeCSVCopier::copy() { countLinesPerBlock(nodeTableSchema->getNumStructuredProperties()); numNodes = calculateNumRows(csvDescription.csvReaderConfig.hasHeader); initializeColumnsAndList(); - // Populate structured columns with the ID hash index. + // Populate columns with the ID hash index. switch (nodeTableSchema->getPrimaryKey().dataType.typeID) { case INT64: { populateColumns(); @@ -44,15 +44,15 @@ uint64_t InMemNodeCSVCopier::copy() { } void InMemNodeCSVCopier::initializeColumnsAndList() { - logger->info("Initializing in memory structured columns."); - structuredColumns.resize(nodeTableSchema->getNumStructuredProperties()); - for (auto& property : nodeTableSchema->structuredProperties) { + logger->info("Initializing in memory columns."); + columns.resize(nodeTableSchema->getNumStructuredProperties()); + for (auto& property : nodeTableSchema->properties) { auto fName = StorageUtils::getNodePropertyColumnFName(outputDirectory, nodeTableSchema->tableID, property.propertyID, DBFileType::WAL_VERSION); - structuredColumns[property.propertyID] = + columns[property.propertyID] = InMemColumnFactory::getInMemPropertyColumn(fName, property.dataType, numNodes); } - logger->info("Done initializing in memory structured columns."); + logger->info("Done initializing in memory columns."); } void InMemNodeCSVCopier::countLinesPerBlock(uint64_t numStructuredProperties) { @@ -68,7 +68,7 @@ void InMemNodeCSVCopier::countLinesPerBlock(uint64_t numStructuredProperties) { template void InMemNodeCSVCopier::populateColumns() { - logger->info("Populating structured properties."); + logger->info("Populating properties."); auto pkIndex = make_unique>(StorageUtils::getNodeIndexFName(this->outputDirectory, nodeTableSchema->tableID, DBFileType::WAL_VERSION), @@ -83,7 +83,7 @@ void InMemNodeCSVCopier::populateColumns() { taskScheduler.waitAllTasksToCompleteOrError(); logger->info("Flush the pk index to disk."); pkIndex->flush(); - logger->info("Done populating structured properties, constructing the pk index."); + logger->info("Done populating properties, constructing the pk index."); } template @@ -129,23 +129,21 @@ void InMemNodeCSVCopier::populateColumnsTask(uint64_t primaryKeyPropertyIdx, uin skipFirstRowIfNecessary(blockId, copier->csvDescription, reader); auto bufferOffset = 0u; while (reader.hasNextLine()) { - putPropsOfLineIntoColumns(copier->structuredColumns, - copier->nodeTableSchema->structuredProperties, overflowCursors, reader, - offsetStart + bufferOffset); + putPropsOfLineIntoColumns(copier->columns, copier->nodeTableSchema->properties, + overflowCursors, reader, offsetStart + bufferOffset); bufferOffset++; } - populatePKIndex(copier->structuredColumns[primaryKeyPropertyIdx].get(), pkIndex, offsetStart, + populatePKIndex(copier->columns[primaryKeyPropertyIdx].get(), pkIndex, offsetStart, copier->numLinesPerBlock[blockId]); copier->logger->trace("End: path={0} blkIdx={1}", copier->csvDescription.filePath, blockId); } -void InMemNodeCSVCopier::putPropsOfLineIntoColumns( - vector>& structuredColumns, - const vector& structuredProperties, vector& overflowCursors, - CSVReader& reader, uint64_t nodeOffset) { - for (auto columnIdx = 0u; columnIdx < structuredColumns.size(); columnIdx++) { +void InMemNodeCSVCopier::putPropsOfLineIntoColumns(vector>& columns, + const vector& properties, vector& overflowCursors, CSVReader& reader, + uint64_t nodeOffset) { + for (auto columnIdx = 0u; columnIdx < columns.size(); columnIdx++) { reader.hasNextTokenOrError(); - auto column = structuredColumns[columnIdx].get(); + auto column = columns[columnIdx].get(); switch (column->getDataType().typeID) { case INT64: { if (!reader.skipTokenIfNull()) { @@ -208,14 +206,14 @@ void InMemNodeCSVCopier::putPropsOfLineIntoColumns( } void InMemNodeCSVCopier::saveToFile() { - logger->debug("Writing node structured columns to disk."); - assert(!structuredColumns.empty()); - for (auto& column : structuredColumns) { + logger->debug("Writing node columns to disk."); + assert(!columns.empty()); + for (auto& column : columns) { taskScheduler.scheduleTask(CopyCSVTaskFactory::createCopyCSVTask( [&](InMemColumn* x) { x->saveToFile(); }, column.get())); } taskScheduler.waitAllTasksToCompleteOrError(); - logger->debug("Done writing node structured columns to disk."); + logger->debug("Done writing node columns to disk."); } } // namespace storage diff --git a/src/storage/node_id_compression_scheme.cpp b/src/storage/node_id_compression_scheme.cpp index 70b88c68dd..ba2e5b2bbb 100644 --- a/src/storage/node_id_compression_scheme.cpp +++ b/src/storage/node_id_compression_scheme.cpp @@ -4,7 +4,7 @@ namespace kuzu { namespace common { void NodeIDCompressionScheme::readNodeID(uint8_t* data, nodeID_t* nodeID) const { - if (commonTableID == UINT64_MAX) { + if (commonTableID == INVALID_TABLE_ID) { memcpy(&*nodeID, data, sizeof(nodeID_t)); } else { nodeID->tableID = commonTableID; @@ -13,7 +13,7 @@ void NodeIDCompressionScheme::readNodeID(uint8_t* data, nodeID_t* nodeID) const } void NodeIDCompressionScheme::writeNodeID(uint8_t* data, const nodeID_t& nodeID) const { - if (commonTableID == UINT64_MAX) { + if (commonTableID == INVALID_TABLE_ID) { memcpy(data, &nodeID, sizeof(nodeID_t)); } else { memcpy(data, &nodeID.offset, sizeof(node_offset_t)); diff --git a/src/storage/storage_structure/lists/CMakeLists.txt b/src/storage/storage_structure/lists/CMakeLists.txt index e68f991e9d..e174a4b47c 100644 --- a/src/storage/storage_structure/lists/CMakeLists.txt +++ b/src/storage/storage_structure/lists/CMakeLists.txt @@ -1,7 +1,7 @@ add_library(kuzu_storage_lists OBJECT list_headers.cpp - list_sync_state.cpp + list_handle.cpp lists.cpp lists_metadata.cpp lists_update_iterator.cpp diff --git a/src/storage/storage_structure/lists/list_sync_state.cpp b/src/storage/storage_structure/lists/list_handle.cpp similarity index 83% rename from src/storage/storage_structure/lists/list_sync_state.cpp rename to src/storage/storage_structure/lists/list_handle.cpp index 9e05a57135..9935a9105f 100644 --- a/src/storage/storage_structure/lists/list_sync_state.cpp +++ b/src/storage/storage_structure/lists/list_handle.cpp @@ -1,4 +1,4 @@ -#include "storage/storage_structure/lists/list_sync_state.h" +#include "storage/storage_structure/lists/list_handle.h" namespace kuzu { namespace storage { @@ -16,8 +16,8 @@ bool ListSyncState::hasMoreAndSwitchSourceIfNecessary() { return false; } -void ListSyncState::reset() { - boundNodeOffset = UINT64_MAX; +void ListSyncState::resetState() { + boundNodeOffset = INVALID_NODE_OFFSET; startElemOffset = UINT32_MAX; numValuesToRead = UINT32_MAX; numValuesInUpdateStore = 0; diff --git a/src/storage/storage_structure/lists/lists.cpp b/src/storage/storage_structure/lists/lists.cpp index 14f3ae3dfd..5d7430d0d7 100644 --- a/src/storage/storage_structure/lists/lists.cpp +++ b/src/storage/storage_structure/lists/lists.cpp @@ -14,15 +14,14 @@ namespace storage { // has a small list then largeListHandle does not contain anything specific to v3 (it would likely // be containing information about the last portion of the last large list that was read). void Lists::readValues(const shared_ptr& valueVector, ListHandle& listHandle) { - auto& listSyncState = listHandle.listSyncState; - if (listSyncState.getListSourceStore() == ListSourceStore::UPDATE_STORE) { + if (listHandle.getListSourceStore() == ListSourceStore::UPDATE_STORE) { listsUpdateStore->readValues( - storageStructureIDAndFName.storageStructureID.listFileID, listSyncState, valueVector); + storageStructureIDAndFName.storageStructureID.listFileID, listHandle, valueVector); } else { // If the startElementOffset is 0, it means that this is the first time that we read from // the list. As a result, we need to reset the cursor and mapper. - if (listHandle.listSyncState.getStartElemOffset() == 0) { - listHandle.resetCursorMapper(metadata, numElementsPerPage); + if (listHandle.getStartElemOffset() == 0) { + listHandle.setMapper(metadata); } readFromList(valueVector, listHandle); } @@ -30,21 +29,21 @@ void Lists::readValues(const shared_ptr& valueVector, ListHandle& l void Lists::readFromSmallList(const shared_ptr& valueVector, ListHandle& listHandle) { auto dummyReadOnlyTrx = Transaction::getDummyReadOnlyTrx(); - readBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, listHandle.cursorAndMapper.cursor, - listHandle.cursorAndMapper.mapper); + auto pageCursor = PageUtils::getPageElementCursorForPos( + ListHeaders::getSmallListCSROffset(listHandle.getListHeader()), numElementsPerPage); + readBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, pageCursor, listHandle.mapper); } void Lists::readFromLargeList(const shared_ptr& valueVector, ListHandle& listHandle) { // Assumes that the associated adjList has already updated the syncState. - auto pageCursor = PageUtils::getPageElementCursorForPos( - listHandle.listSyncState.getStartElemOffset(), numElementsPerPage); + auto pageCursor = + PageUtils::getPageElementCursorForPos(listHandle.getStartElemOffset(), numElementsPerPage); auto dummyReadOnlyTrx = Transaction::getDummyReadOnlyTrx(); - readBySequentialCopy( - dummyReadOnlyTrx.get(), valueVector, pageCursor, listHandle.cursorAndMapper.mapper); + readBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, pageCursor, listHandle.mapper); } void Lists::readFromList(const shared_ptr& valueVector, ListHandle& listHandle) { - if (ListHeaders::isALargeList(listHandle.listSyncState.getListHeader())) { + if (ListHeaders::isALargeList(listHandle.getListHeader())) { readFromLargeList(valueVector, listHandle); } else { readFromSmallList(valueVector, listHandle); @@ -63,11 +62,10 @@ uint64_t Lists::getNumElementsInPersistentStore( void Lists::initListReadingState( node_offset_t nodeOffset, ListHandle& listHandle, TransactionType transactionType) { - auto& listSyncState = listHandle.listSyncState; - listSyncState.reset(); + listHandle.resetSyncState(); auto isNewlyAddedNode = listsUpdateStore->isNewlyAddedNode( storageStructureIDAndFName.storageStructureID.listFileID, nodeOffset); - uint64_t numElementsInPersistentStore = 0, numElementsInUpdateStore = 0; + uint64_t numElementsInPersistentStore, numElementsInUpdateStore = 0; list_header_t listHeader; if (transactionType == TransactionType::WRITE) { numElementsInUpdateStore = listsUpdateStore->getNumInsertedRelsForNodeOffset( @@ -85,11 +83,11 @@ void Lists::initListReadingState( // reading from listsUpdateStore directly. auto sourceStore = numElementsInPersistentStore == 0 ? ListSourceStore::UPDATE_STORE : ListSourceStore::PERSISTENT_STORE; - listSyncState.init(nodeOffset, listHeader, numElementsInUpdateStore, + listHandle.initSyncState(nodeOffset, listHeader, numElementsInUpdateStore, numElementsInPersistentStore, sourceStore); } -unique_ptr Lists::getInMemListWithDataFromUpdateStoreOnly( +unique_ptr Lists::createInMemListWithDataFromUpdateStoreOnly( node_offset_t nodeOffset, vector& insertedRelsTupleIdxInFT) { auto inMemList = make_unique( getNumElementsInListsUpdateStore(nodeOffset), elementSize, mayContainNulls()); @@ -105,11 +103,9 @@ unique_ptr Lists::writeToInMemList(node_offset_t nodeOffset, auto inMemList = make_unique(getTotalNumElementsInList(TransactionType::WRITE, nodeOffset), elementSize, mayContainNulls()); - CursorAndMapper cursorAndMapper; - cursorAndMapper.reset(metadata, numElementsPerPage, headers->getHeader(nodeOffset), nodeOffset); auto numElementsInPersistentStore = getNumElementsFromListHeader(nodeOffset); fillInMemListsFromPersistentStore( - cursorAndMapper, numElementsInPersistentStore, *inMemList, deletedRelOffsetsForList); + nodeOffset, numElementsInPersistentStore, *inMemList, deletedRelOffsetsForList); listsUpdateStore->readInsertionsToList(storageStructureIDAndFName.storageStructureID.listFileID, insertedRelTupleIdxesInFT, *inMemList, numElementsInPersistentStore - deletedRelOffsetsForList.size(), @@ -117,23 +113,26 @@ unique_ptr Lists::writeToInMemList(node_offset_t nodeOffset, return inMemList; } -void Lists::fillInMemListsFromPersistentStore(CursorAndMapper& cursorAndMapper, +void Lists::fillInMemListsFromPersistentStore(node_offset_t nodeOffset, uint64_t numElementsInPersistentStore, InMemList& inMemList, const unordered_set& deletedRelOffsetsInList) { + auto listHeader = headers->getHeader(nodeOffset); + auto pageMapper = ListHandle::getPageMapper(metadata, listHeader, nodeOffset); + auto pageCursor = ListHandle::getPageCursor(listHeader, numElementsPerPage); uint64_t numElementsRead = 0; uint64_t nextPosToWriteToInMemList = 0; auto numElementsToRead = numElementsInPersistentStore; while (numElementsRead < numElementsToRead) { auto numElementsToReadInCurPage = min(numElementsToRead - numElementsRead, - (uint64_t)(numElementsPerPage - cursorAndMapper.cursor.elemPosInPage)); - auto physicalPageIdx = cursorAndMapper.mapper(cursorAndMapper.cursor.pageIdx); + (uint64_t)(numElementsPerPage - pageCursor.elemPosInPage)); + auto physicalPageIdx = pageMapper(pageCursor.pageIdx); auto frame = bufferManager.pin(fileHandle, physicalPageIdx); - fillInMemListsFromFrame(inMemList, frame, cursorAndMapper.cursor.elemPosInPage, + fillInMemListsFromFrame(inMemList, frame, pageCursor.elemPosInPage, numElementsToReadInCurPage, deletedRelOffsetsInList, numElementsRead, nextPosToWriteToInMemList); bufferManager.unpin(fileHandle, physicalPageIdx); numElementsRead += numElementsToReadInCurPage; - cursorAndMapper.cursor.nextPage(); + pageCursor.nextPage(); } } @@ -173,33 +172,36 @@ void Lists::fillInMemListsFromFrame(InMemList& inMemList, const uint8_t* frame, void StringPropertyLists::readFromLargeList( const shared_ptr& valueVector, ListHandle& listHandle) { + valueVector->resetOverflowBuffer(); Lists::readFromLargeList(valueVector, listHandle); diskOverflowFile.readStringsToVector(TransactionType::READ_ONLY, *valueVector); } void StringPropertyLists::readFromSmallList( const shared_ptr& valueVector, ListHandle& listHandle) { + valueVector->resetOverflowBuffer(); Lists::readFromSmallList(valueVector, listHandle); diskOverflowFile.readStringsToVector(TransactionType::READ_ONLY, *valueVector); } void ListPropertyLists::readFromLargeList( const shared_ptr& valueVector, ListHandle& listHandle) { + valueVector->resetOverflowBuffer(); Lists::readFromLargeList(valueVector, listHandle); diskOverflowFile.readListsToVector(TransactionType::READ_ONLY, *valueVector); } void ListPropertyLists::readFromSmallList( const shared_ptr& valueVector, ListHandle& listHandle) { + valueVector->resetOverflowBuffer(); Lists::readFromSmallList(valueVector, listHandle); diskOverflowFile.readListsToVector(TransactionType::READ_ONLY, *valueVector); } void AdjLists::readValues(const shared_ptr& valueVector, ListHandle& listHandle) { - auto& listSyncState = listHandle.listSyncState; valueVector->state->selVector->resetSelectorToUnselected(); - if (listSyncState.getListSourceStore() == ListSourceStore::UPDATE_STORE) { - readFromListsUpdateStore(listSyncState, valueVector); + if (listHandle.getListSourceStore() == ListSourceStore::UPDATE_STORE) { + readFromListsUpdateStore(listHandle, valueVector); } else { readFromListsPersistentStore(listHandle, valueVector); } @@ -211,8 +213,8 @@ unique_ptr> AdjLists::readAdjacencyListOfNode( // nodeIDCompressionScheme into a vector of nodeID_t. node_offset_t nodeOffset) { auto header = headers->getHeader(nodeOffset); - CursorAndMapper cursorAndMapper; - cursorAndMapper.reset(getListsMetadata(), numElementsPerPage, header, nodeOffset); + auto pageMapper = ListHandle::getPageMapper(metadata, header, nodeOffset); + auto pageCursor = ListHandle::getPageCursor(header, numElementsPerPage); // Step 1 auto numElementsInList = getNumElementsFromListHeader(nodeOffset); auto listLenInBytes = numElementsInList * elementSize; @@ -220,18 +222,17 @@ unique_ptr> AdjLists::readAdjacencyListOfNode( auto sizeLeftToCopy = listLenInBytes; auto bufferPtr = buffer.get(); while (sizeLeftToCopy) { - auto physicalPageIdx = cursorAndMapper.mapper(cursorAndMapper.cursor.pageIdx); - auto sizeToCopyInPage = min( - ((uint64_t)(numElementsPerPage - cursorAndMapper.cursor.elemPosInPage) * elementSize), - sizeLeftToCopy); + auto physicalPageIdx = pageMapper(pageCursor.pageIdx); + auto sizeToCopyInPage = + min(((uint64_t)(numElementsPerPage - pageCursor.elemPosInPage) * elementSize), + sizeLeftToCopy); auto frame = bufferManager.pin(fileHandle, physicalPageIdx); - memcpy(bufferPtr, frame + mapElementPosToByteOffset(cursorAndMapper.cursor.elemPosInPage), + memcpy(bufferPtr, frame + mapElementPosToByteOffset(pageCursor.elemPosInPage), sizeToCopyInPage); bufferManager.unpin(fileHandle, physicalPageIdx); bufferPtr += sizeToCopyInPage; sizeLeftToCopy -= sizeToCopyInPage; - cursorAndMapper.cursor.elemPosInPage = 0; - cursorAndMapper.cursor.pageIdx++; + pageCursor.nextPage(); } // Step 2 @@ -250,40 +251,33 @@ unique_ptr> AdjLists::readAdjacencyListOfNode( void AdjLists::readFromLargeList( const shared_ptr& valueVector, ListHandle& listHandle) { - uint64_t nextPartBeginElemOffset; - auto& listSyncState = listHandle.listSyncState; - if (!listSyncState.hasValidRangeToRead()) { - nextPartBeginElemOffset = 0; - } else { - nextPartBeginElemOffset = listSyncState.getEndElemOffset(); - listHandle.cursorAndMapper.cursor = - PageUtils::getPageElementCursorForPos(nextPartBeginElemOffset, numElementsPerPage); - } + uint64_t nextPartBeginElemOffset = + listHandle.hasValidRangeToRead() ? listHandle.getEndElemOffset() : 0; + auto pageCursor = + PageUtils::getPageElementCursorForPos(nextPartBeginElemOffset, numElementsPerPage); // The number of edges to read is the minimum of: (i) how may edges are left to read // (info.listLen - nextPartBeginElemOffset); and (ii) how many elements are left in the current // page that's being read (nextPartBeginElemOffset above should be set to the beginning of the // next page. Note that because of case (ii), this computation guarantees that what we read fits // into a single page. That's why we can call copyFromAPage. auto numValuesToCopy = - min((uint32_t)(listSyncState.getNumValuesInList() - nextPartBeginElemOffset), + min((uint32_t)(listHandle.getNumValuesInList() - nextPartBeginElemOffset), numElementsPerPage - (uint32_t)(nextPartBeginElemOffset % numElementsPerPage)); valueVector->state->initOriginalAndSelectedSize(numValuesToCopy); - listSyncState.setRangeToRead(nextPartBeginElemOffset, numValuesToCopy); + listHandle.setRangeToRead(nextPartBeginElemOffset, numValuesToCopy); // map logical pageIdx to physical pageIdx - auto physicalPageId = - listHandle.cursorAndMapper.mapper(listHandle.cursorAndMapper.cursor.pageIdx); + auto physicalPageId = listHandle.mapper(pageCursor.pageIdx); // See comments for AdjLists::readFromSmallList. auto dummyReadOnlyTrx = Transaction::getDummyReadOnlyTrx(); readNodeIDsFromAPageBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, 0, physicalPageId, - listHandle.cursorAndMapper.cursor.elemPosInPage, numValuesToCopy, nodeIDCompressionScheme, - true /*isAdjLists*/); + pageCursor.elemPosInPage, numValuesToCopy, nodeIDCompressionScheme, true /*isAdjLists*/); } // Note: This function sets the original and selected size of the DataChunk into which it will // read a list of nodes and edges. void AdjLists::readFromSmallList( const shared_ptr& valueVector, ListHandle& listHandle) { - valueVector->state->initOriginalAndSelectedSize(listHandle.listSyncState.getNumValuesInList()); + valueVector->state->initOriginalAndSelectedSize(listHandle.getNumValuesInList()); // We store the updates for adjLists in listsUpdateStore, however we store the // updates for adjColumn in the WAL version of the page. The adjColumn needs to pass a // transaction to readNodeIDsBySequentialCopy, so readNodeIDsBySequentialCopy can know whether @@ -291,64 +285,64 @@ void AdjLists::readFromSmallList( // version of the page(since its updates are stored in listsUpdateStore), so we // simply pass a dummy read-only transaction to readNodeIDsBySequentialCopy. auto dummyReadOnlyTrx = Transaction::getDummyReadOnlyTrx(); - readNodeIDsBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, - listHandle.cursorAndMapper.cursor, listHandle.cursorAndMapper.mapper, + auto pageCursor = PageUtils::getPageElementCursorForPos( + ListHeaders::getSmallListCSROffset(listHandle.getListHeader()), numElementsPerPage); + readNodeIDsBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, pageCursor, listHandle.mapper, nodeIDCompressionScheme, true /*isAdjLists*/); // We set the startIdx + numValuesToRead == numValuesInList in listSyncState to indicate to the // callers (e.g., the adj_list_extend or var_len_extend) that we have read the small list // already. This allows the callers to know when to switch to reading from the update store if // there is any updates. - listHandle.listSyncState.setRangeToRead(0, listHandle.listSyncState.getNumValuesInList()); + listHandle.setRangeToRead(0, listHandle.getNumValuesInList()); } void AdjLists::readFromListsUpdateStore( - ListSyncState& listSyncState, const shared_ptr& valueVector) { - assert(listSyncState.getListSourceStore() == ListSourceStore::UPDATE_STORE); - if (!listSyncState.hasValidRangeToRead()) { + ListHandle& listHandle, const shared_ptr& valueVector) { + if (!listHandle.hasValidRangeToRead()) { // We have read all values from persistent store or the persistent store is empty, we should // reset listSyncState to indicate ranges in listsUpdateStore and start // reading from it. - listSyncState.setRangeToRead( - 0, min(DEFAULT_VECTOR_CAPACITY, (uint64_t)listSyncState.getNumValuesInList())); + listHandle.setRangeToRead( + 0, min(DEFAULT_VECTOR_CAPACITY, (uint64_t)listHandle.getNumValuesInList())); } else { - listSyncState.setRangeToRead(listSyncState.getEndElemOffset(), + listHandle.setRangeToRead(listHandle.getEndElemOffset(), min(DEFAULT_VECTOR_CAPACITY, - (uint64_t)listSyncState.getNumValuesInList() - listSyncState.getEndElemOffset())); + (uint64_t)listHandle.getNumValuesInList() - listHandle.getEndElemOffset())); } // Note that: we always store nbr node in the second column of factorizedTable. listsUpdateStore->readValues( - storageStructureIDAndFName.storageStructureID.listFileID, listSyncState, valueVector); + storageStructureIDAndFName.storageStructureID.listFileID, listHandle, valueVector); } void AdjLists::readFromListsPersistentStore( ListHandle& listHandle, const shared_ptr& valueVector) { // If the startElemOffset is invalid, it means that we never read from the list. As a // result, we need to reset the cursor and mapper. - if (!listHandle.listSyncState.hasValidRangeToRead()) { - listHandle.resetCursorMapper(metadata, numElementsPerPage); + if (!listHandle.hasValidRangeToRead()) { + listHandle.setMapper(metadata); } readFromList(valueVector, listHandle); } // Note: this function will always be called right after scanRelID, so we have the // guarantee that the relIDVector is always unselected. -void RelIDList::setDeletedRelsIfNecessary(Transaction* transaction, ListSyncState& listSyncState, - const shared_ptr& relIDVector) { +void RelIDList::setDeletedRelsIfNecessary( + Transaction* transaction, ListHandle& listHandle, const shared_ptr& relIDVector) { // We only need to unselect the positions for deleted rels when we are reading from the // persistent store in a write transaction and the current nodeOffset has deleted rels in // persistent store. if (!transaction->isReadOnly() && - listSyncState.getListSourceStore() != ListSourceStore::UPDATE_STORE && + listHandle.getListSourceStore() != ListSourceStore::UPDATE_STORE && listsUpdateStore->hasAnyDeletedRelsInPersistentStore( storageStructureIDAndFName.storageStructureID.listFileID, - listSyncState.getBoundNodeOffset())) { + listHandle.getBoundNodeOffset())) { relIDVector->state->selVector->resetSelectorToValuePosBuffer(); auto& selVector = relIDVector->state->selVector; auto nextSelectedPos = 0u; for (auto pos = 0; pos < relIDVector->state->originalSize; ++pos) { if (!listsUpdateStore->isRelDeletedInPersistentStore( storageStructureIDAndFName.storageStructureID.listFileID, - listSyncState.getBoundNodeOffset(), relIDVector->getValue(pos))) { + listHandle.getBoundNodeOffset(), relIDVector->getValue(pos))) { selVector->selectedPositions[nextSelectedPos++] = pos; } } @@ -359,16 +353,17 @@ void RelIDList::setDeletedRelsIfNecessary(Transaction* transaction, ListSyncStat unordered_set RelIDList::getDeletedRelOffsetsInListForNodeOffset( node_offset_t nodeOffset) { unordered_set deletedRelOffsetsInList; - CursorAndMapper cursorAndMapper; - cursorAndMapper.reset(metadata, numElementsPerPage, headers->getHeader(nodeOffset), nodeOffset); + auto listHeader = headers->getHeader(nodeOffset); + auto pageMapper = ListHandle::getPageMapper(metadata, listHeader, nodeOffset); + auto pageCursor = ListHandle::getPageCursor(listHeader, numElementsPerPage); auto numElementsInPersistentStore = getNumElementsFromListHeader(nodeOffset); uint64_t numElementsRead = 0; while (numElementsRead < numElementsInPersistentStore) { auto numElementsToReadInCurPage = min(numElementsInPersistentStore - numElementsRead, - (uint64_t)(numElementsPerPage - cursorAndMapper.cursor.elemPosInPage)); - auto physicalPageIdx = cursorAndMapper.mapper(cursorAndMapper.cursor.pageIdx); + (uint64_t)(numElementsPerPage - pageCursor.elemPosInPage)); + auto physicalPageIdx = pageMapper(pageCursor.pageIdx); auto frame = bufferManager.pin(fileHandle, physicalPageIdx) + - getElemByteOffset(cursorAndMapper.cursor.elemPosInPage); + getElemByteOffset(pageCursor.elemPosInPage); for (auto i = 0u; i < numElementsToReadInCurPage; i++) { auto relID = *(int64_t*)frame; if (listsUpdateStore->isRelDeletedInPersistentStore( @@ -379,7 +374,7 @@ unordered_set RelIDList::getDeletedRelOffsetsInListForNodeOffset( frame += elementSize; } bufferManager.unpin(fileHandle, physicalPageIdx); - cursorAndMapper.cursor.nextPage(); + pageCursor.nextPage(); } return deletedRelOffsetsInList; } diff --git a/src/storage/storage_structure/lists/lists_update_iterator.cpp b/src/storage/storage_structure/lists/lists_update_iterator.cpp index 970fe85cde..425c765687 100644 --- a/src/storage/storage_structure/lists/lists_update_iterator.cpp +++ b/src/storage/storage_structure/lists/lists_update_iterator.cpp @@ -88,11 +88,8 @@ void ListsUpdateIterator::slideListsIfNecessary(uint64_t endNodeOffsetInclusive) list_header_t newHeader = ListHeaders::getSmallListHeader(curCSROffset, listLen); if (newHeader != oldHeader) { InMemList inMemList{listLen, lists->elementSize, lists->mayContainNulls()}; - CursorAndMapper cursorAndMapper; - cursorAndMapper.reset(lists->getListsMetadata(), lists->numElementsPerPage, - lists->getHeaders()->getHeader(nodeOffsetToSlide), nodeOffsetToSlide); const unordered_set deletedRelOffsetsInList; - lists->fillInMemListsFromPersistentStore(cursorAndMapper, + lists->fillInMemListsFromPersistentStore(nodeOffsetToSlide, lists->getNumElementsFromListHeader(nodeOffsetToSlide), inMemList, deletedRelOffsetsInList); updateSmallListAndCurCSROffset(oldHeader, inMemList); diff --git a/src/storage/storage_structure/lists/lists_update_store.cpp b/src/storage/storage_structure/lists/lists_update_store.cpp index 433698013f..2eb1d7010d 100644 --- a/src/storage/storage_structure/lists/lists_update_store.cpp +++ b/src/storage/storage_structure/lists/lists_update_store.cpp @@ -19,12 +19,6 @@ ListsUpdateStore::ListsUpdateStore(MemoryManager& memoryManager, RelTableSchema& factorizedTableSchema->appendColumn(make_unique(false /* isUnflat */, 0 /* dataChunkPos */, Types::getDataTypeSize(relProperty.dataType))); } - nodeDataChunk = make_shared(2); - nodeDataChunk->state->currIdx = 0; - srcNodeVector = make_shared(NODE_ID, &memoryManager); - nodeDataChunk->insert(0 /* pos */, srcNodeVector); - dstNodeVector = make_shared(NODE_ID, &memoryManager); - nodeDataChunk->insert(1 /* pos */, dstNodeVector); factorizedTable = make_unique(&memoryManager, std::move(factorizedTableSchema)); initListUpdatesPerTablePerDirection(); @@ -52,7 +46,7 @@ bool ListsUpdateStore::isNewlyAddedNode(ListFileID& listFileID, node_offset_t no !listUpdatesPerChunk.at(chunkIdx).contains(nodeOffset)) { return false; } - return listUpdatesPerChunk.at(chunkIdx).at(nodeOffset).newlyAddedNode; + return listUpdatesPerChunk.at(chunkIdx).at(nodeOffset).isNewlyAddedNode; } bool ListsUpdateStore::isRelDeletedInPersistentStore( @@ -168,10 +162,10 @@ uint64_t ListsUpdateStore::getNumInsertedRelsForNodeOffset( return listUpdatesPerTable.at(chunkIdx).at(nodeOffset).insertedRelsTupleIdxInFT.size(); } -void ListsUpdateStore::readValues(ListFileID& listFileID, ListSyncState& listSyncState, - shared_ptr valueVector) const { - auto numTuplesToRead = listSyncState.getNumValuesToRead(); - auto nodeOffset = listSyncState.getBoundNodeOffset(); +void ListsUpdateStore::readValues( + ListFileID& listFileID, ListHandle& listHandle, shared_ptr valueVector) const { + auto numTuplesToRead = listHandle.getNumValuesToRead(); + auto nodeOffset = listHandle.getBoundNodeOffset(); if (numTuplesToRead == 0) { valueVector->state->initOriginalAndSelectedSize(0); return; @@ -184,7 +178,7 @@ void ListsUpdateStore::readValues(ListFileID& listFileID, ListSyncState& listSyn .at(StorageUtils::getListChunkIdx(nodeOffset)) .at(nodeOffset); factorizedTable->lookup(vectorsToRead, columnsToRead, listUpdates.insertedRelsTupleIdxInFT, - listSyncState.getStartElemOffset(), numTuplesToRead); + listHandle.getStartElemOffset(), numTuplesToRead); valueVector->state->originalSize = numTuplesToRead; } diff --git a/src/storage/store/node_table.cpp b/src/storage/store/node_table.cpp index cde69af058..9c75d3530c 100644 --- a/src/storage/store/node_table.cpp +++ b/src/storage/store/node_table.cpp @@ -7,10 +7,10 @@ NodeTable::NodeTable(NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs, BufferManager& bufferManager, bool isInMemory, WAL* wal, NodeTableSchema* nodeTableSchema) : nodesStatisticsAndDeletedIDs{nodesStatisticsAndDeletedIDs}, tableID{nodeTableSchema->tableID}, isInMemory{isInMemory} { - loadColumnsAndListsFromDisk(nodeTableSchema, bufferManager, wal); + initializeData(nodeTableSchema, bufferManager, wal); } -void NodeTable::loadColumnsAndListsFromDisk( +void NodeTable::initializeData( NodeTableSchema* nodeTableSchema, BufferManager& bufferManager, WAL* wal) { propertyColumns.resize(nodeTableSchema->getAllNodeProperties().size()); for (auto i = 0u; i < nodeTableSchema->getAllNodeProperties().size(); i++) { @@ -25,15 +25,26 @@ void NodeTable::loadColumnsAndListsFromDisk( nodeTableSchema->getPrimaryKey().dataType, bufferManager, wal); } +void NodeTable::scan(Transaction* transaction, const shared_ptr& inputIDVector, + const vector& columnIds, vector> outputVectors) { + assert(columnIds.size() == outputVectors.size()); + for (auto i = 0u; i < columnIds.size(); i++) { + if (columnIds[i] == UINT32_MAX) { + outputVectors[i]->setAllNull(); + } else { + propertyColumns[columnIds[i]]->read(transaction, inputIDVector, outputVectors[i]); + } + } +} + node_offset_t NodeTable::addNodeAndResetProperties(ValueVector* primaryKeyVector) { auto nodeOffset = nodesStatisticsAndDeletedIDs->addNode(tableID); - assert(primaryKeyVector->state->isFlat()); - if (primaryKeyVector->isNull(primaryKeyVector->state->selVector->selectedPositions[0])) { + assert(primaryKeyVector->state->selVector->selectedSize == 1); + auto pkValPos = primaryKeyVector->state->selVector->selectedPositions[0]; + if (primaryKeyVector->isNull(pkValPos)) { throw RuntimeException("Null is not allowed as a primary key value."); } - if (!pkIndex->insert(primaryKeyVector, primaryKeyVector->state->selVector->selectedPositions[0], - nodeOffset)) { - auto pkValPos = primaryKeyVector->state->selVector->selectedPositions[0]; + if (!pkIndex->insert(primaryKeyVector, pkValPos, nodeOffset)) { string pkStr = primaryKeyVector->dataType.typeID == INT64 ? to_string(primaryKeyVector->getValue(pkValPos)) : primaryKeyVector->getValue(pkValPos).getAsString(); @@ -48,7 +59,7 @@ node_offset_t NodeTable::addNodeAndResetProperties(ValueVector* primaryKeyVector void NodeTable::deleteNodes(ValueVector* nodeIDVector, ValueVector* primaryKeyVector) { assert(nodeIDVector->state == primaryKeyVector->state && nodeIDVector->hasNoNullsGuarantee() && primaryKeyVector->hasNoNullsGuarantee()); - if (nodeIDVector->state->isFlat()) { + if (nodeIDVector->state->selVector->selectedSize == 1) { auto pos = nodeIDVector->state->selVector->selectedPositions[0]; deleteNode(nodeIDVector->readNodeOffset(pos), primaryKeyVector, pos); } else { diff --git a/src/storage/store/rel_table.cpp b/src/storage/store/rel_table.cpp index a801699888..37bbf3e03b 100644 --- a/src/storage/store/rel_table.cpp +++ b/src/storage/store/rel_table.cpp @@ -8,145 +8,240 @@ using namespace kuzu::catalog; namespace kuzu { namespace storage { +Column* DirectedRelTableData::getPropertyColumn(table_id_t boundNodeTableID, uint64_t propertyIdx) { + if (propertyColumns.contains(boundNodeTableID) && + propertyIdx < propertyColumns[boundNodeTableID].size()) { + return propertyColumns[boundNodeTableID][propertyIdx].get(); + } + return nullptr; +} + +Lists* DirectedRelTableData::getPropertyLists(table_id_t boundNodeTableID, uint64_t propertyIdx) { + if (propertyLists.contains(boundNodeTableID) && + propertyIdx < propertyLists[boundNodeTableID].size()) { + return propertyLists[boundNodeTableID][propertyIdx].get(); + } + return nullptr; +} + +AdjColumn* DirectedRelTableData::getAdjColumn(table_id_t boundNodeTableID) { + if (adjColumns.contains(boundNodeTableID)) { + return adjColumns[boundNodeTableID].get(); + } + return nullptr; +} + +AdjLists* DirectedRelTableData::getAdjLists(table_id_t boundNodeTableID) { + if (adjLists.contains(boundNodeTableID)) { + return adjLists[boundNodeTableID].get(); + } + return nullptr; +} + +void DirectedRelTableData::initializeData( + RelTableSchema* tableSchema, BufferManager& bufferManager, WAL* wal) { + for (auto& [srcTableID, dstTableID] : tableSchema->getSrcDstTableIDs()) { + auto boundNodeTableID = direction == FWD ? srcTableID : dstTableID; + NodeIDCompressionScheme nodeIDCompressionScheme( + tableSchema->getUniqueNbrTableIDsForBoundTableIDDirection(direction, boundNodeTableID)); + if (tableSchema->isSingleMultiplicityInDirection(direction)) { + initializeColumnsForBoundNodeTable( + tableSchema, boundNodeTableID, nodeIDCompressionScheme, bufferManager, wal); + } else { + initializeListsForBoundNodeTabl( + tableSchema, boundNodeTableID, nodeIDCompressionScheme, bufferManager, wal); + } + } +} + +void DirectedRelTableData::initializeColumnsForBoundNodeTable(RelTableSchema* tableSchema, + table_id_t boundNodeTableID, NodeIDCompressionScheme& nodeIDCompressionScheme, + BufferManager& bufferManager, WAL* wal) { + adjColumns[boundNodeTableID] = + make_unique(StorageUtils::getAdjColumnStructureIDAndFName(wal->getDirectory(), + tableSchema->tableID, boundNodeTableID, direction), + bufferManager, nodeIDCompressionScheme, isInMemoryMode, wal); + propertyColumns[boundNodeTableID].resize(tableSchema->getNumProperties()); + for (auto& property : tableSchema->properties) { + propertyColumns.at(boundNodeTableID)[property.propertyID] = ColumnFactory::getColumn( + StorageUtils::getRelPropertyColumnStructureIDAndFName(wal->getDirectory(), + tableSchema->tableID, boundNodeTableID, direction, property.propertyID), + property.dataType, bufferManager, isInMemoryMode, wal); + } +} + +void DirectedRelTableData::initializeListsForBoundNodeTabl(RelTableSchema* tableSchema, + table_id_t boundNodeTableID, NodeIDCompressionScheme& nodeIDCompressionScheme, + BufferManager& bufferManager, WAL* wal) { + adjLists[boundNodeTableID] = + make_unique(StorageUtils::getAdjListsStructureIDAndFName(wal->getDirectory(), + tableSchema->tableID, boundNodeTableID, direction), + bufferManager, nodeIDCompressionScheme, isInMemoryMode, wal, listsUpdateStore); + propertyLists[boundNodeTableID].resize(tableSchema->getNumProperties()); + for (auto& property : tableSchema->properties) { + propertyLists.at(boundNodeTableID)[property.propertyID] = ListsFactory::getLists( + StorageUtils::getRelPropertyListsStructureIDAndFName( + wal->getDirectory(), tableSchema->tableID, boundNodeTableID, direction, property), + property.dataType, adjLists[boundNodeTableID]->getHeaders(), bufferManager, + isInMemoryMode, wal, listsUpdateStore); + } +} + +void DirectedRelTableData::scanColumns(Transaction* transaction, RelTableScanState& scanState, + const shared_ptr& inNodeIDVector, vector>& outputVectors) { + auto adjColumn = adjColumns.at(scanState.boundNodeTableID).get(); + // Note: The scan operator should guarantee that the first property in the output is adj column. + adjColumn->read(transaction, inNodeIDVector, outputVectors[0]); + NodeIDVector::discardNull(*outputVectors[0]); + if (outputVectors[0]->state->selVector->selectedSize == 0) { + return; + } + for (auto i = 0u; i < scanState.propertyIds.size(); i++) { + auto propertyId = scanState.propertyIds[i]; + auto outputVectorId = i + 1; + if (propertyId == INVALID_PROPERTY_ID) { + outputVectors[outputVectorId]->setAllNull(); + continue; + } + auto propertyColumn = getPropertyColumn(scanState.boundNodeTableID, propertyId); + propertyColumn->read(transaction, inNodeIDVector, outputVectors[outputVectorId]); + } +} + +void DirectedRelTableData::scanLists(Transaction* transaction, RelTableScanState& scanState, + const shared_ptr& inNodeIDVector, vector>& outputVectors) { + auto adjList = getAdjLists(scanState.boundNodeTableID); + if (scanState.syncState->isBoundNodeOffsetInValid()) { + auto currentIdx = inNodeIDVector->state->selVector->selectedPositions[0]; + if (inNodeIDVector->isNull(currentIdx)) { + outputVectors[0]->state->selVector->selectedSize = 0; + return; + } + auto currentNodeOffset = inNodeIDVector->readNodeOffset(currentIdx); + adjList->initListReadingState( + currentNodeOffset, *scanState.listHandles[0], transaction->getType()); + } + adjList->readValues(outputVectors[0], *scanState.listHandles[0]); + for (auto i = 0u; i < scanState.propertyIds.size(); i++) { + auto propertyId = scanState.propertyIds[i]; + auto outputVectorId = i + 1; + if (propertyId == INVALID_PROPERTY_ID) { + outputVectors[outputVectorId]->setAllNull(); + continue; + } + auto propertyList = getPropertyLists(scanState.boundNodeTableID, propertyId); + propertyList->readValues( + outputVectors[outputVectorId], *scanState.listHandles[outputVectorId]); + propertyList->setDeletedRelsIfNecessary( + transaction, *scanState.listHandles[outputVectorId], outputVectors[outputVectorId]); + } +} + +void DirectedRelTableData::insertRel(table_id_t boundTableID, + const shared_ptr& boundVector, const shared_ptr& nbrVector, + const vector>& relPropertyVectors) { + if (!adjColumns.contains(boundTableID)) { + return; + } + auto adjColumn = adjColumns.at(boundTableID).get(); + auto nodeOffset = + boundVector->readNodeOffset(boundVector->state->selVector->selectedPositions[0]); + // TODO(Guodong): We should pass a write transaction pointer down. + if (!adjColumn->isNull(nodeOffset, Transaction::getDummyWriteTrx().get())) { + throw RuntimeException( + StringUtils::string_format("Node(nodeOffset: %d, tableID: %d) in RelTable %d cannot " + "have more than one neighbour in the %s direction.", + nodeOffset, boundTableID, tableID, getRelDirectionAsString(direction).c_str())); + } + adjColumn->writeValues(boundVector, nbrVector); + for (auto i = 0u; i < relPropertyVectors.size(); i++) { + auto propertyColumn = getPropertyColumn(boundTableID, i); + propertyColumn->writeValues(boundVector, relPropertyVectors[i]); + } +} + +void DirectedRelTableData::deleteRel( + table_id_t boundTableID, const shared_ptr& boundVector) { + if (!adjColumns.contains(boundTableID)) { + return; + } + auto adjColumn = adjColumns.at(boundTableID).get(); + auto nodeOffset = + boundVector->readNodeOffset(boundVector->state->selVector->selectedPositions[0]); + adjColumn->setNodeOffsetToNull(nodeOffset); + for (auto& propertyColumn : propertyColumns.at(boundTableID)) { + propertyColumn->setNodeOffsetToNull(nodeOffset); + } +} + +void DirectedRelTableData::performOpOnListsWithUpdates( + const std::function& opOnListsWithUpdates) { + for (auto& [boundNodeTableID, listsUpdatePerTable] : + listsUpdateStore->getListUpdatesPerBoundNodeTableOfDirection(direction)) { + opOnListsWithUpdates(adjLists.at(boundNodeTableID).get()); + for (auto& propertyList : propertyLists.at(boundNodeTableID)) { + opOnListsWithUpdates(propertyList.get()); + } + } +} + +vector> DirectedRelTableData::getListsUpdateIterators( + table_id_t boundNodeTableID) { + vector> listsUpdateIterators; + listsUpdateIterators.push_back( + ListsUpdateIteratorFactory::getListsUpdateIterator(adjLists.at(boundNodeTableID).get())); + for (auto& propList : propertyLists.at(boundNodeTableID)) { + listsUpdateIterators.push_back( + ListsUpdateIteratorFactory::getListsUpdateIterator(propList.get())); + } + return listsUpdateIterators; +} + RelTable::RelTable(const Catalog& catalog, table_id_t tableID, BufferManager& bufferManager, MemoryManager& memoryManager, bool isInMemoryMode, WAL* wal) - : logger{LoggerUtils::getOrCreateLogger("storage")}, tableID{tableID}, - isInMemoryMode{isInMemoryMode}, listsUpdateStore{make_unique(memoryManager, - *catalog.getReadOnlyVersion()->getRelTableSchema( - tableID))}, - wal{wal} { - loadColumnsAndListsFromDisk(catalog, bufferManager); + : tableID{tableID}, wal{wal} { + auto tableSchema = catalog.getReadOnlyVersion()->getRelTableSchema(tableID); + listsUpdateStore = make_unique(memoryManager, *tableSchema); + fwdRelTableData = + make_unique(tableID, FWD, listsUpdateStore.get(), isInMemoryMode); + bwdRelTableData = + make_unique(tableID, BWD, listsUpdateStore.get(), isInMemoryMode); + initializeData(tableSchema, bufferManager); } -void RelTable::loadColumnsAndListsFromDisk( - const catalog::Catalog& catalog, BufferManager& bufferManager) { - initAdjColumnOrLists(catalog, bufferManager, wal); - initPropertyListsAndColumns(catalog, bufferManager, wal); +void RelTable::initializeData(RelTableSchema* tableSchema, BufferManager& bufferManager) { + fwdRelTableData->initializeData(tableSchema, bufferManager, wal); + bwdRelTableData->initializeData(tableSchema, bufferManager, wal); } -vector RelTable::getAdjListsForNodeTable(table_id_t tableID) { +vector RelTable::getAdjListsForNodeTable(table_id_t boundNodeTableID) { vector retVal; - auto it = adjLists[FWD].find(tableID); - if (it != adjLists[FWD].end()) { - retVal.push_back(it->second.get()); + if (fwdRelTableData->hasAdjLists(boundNodeTableID)) { + retVal.push_back(fwdRelTableData->getAdjLists(boundNodeTableID)); } - it = adjLists[BWD].find(tableID); - if (it != adjLists[BWD].end()) { - retVal.push_back(it->second.get()); + if (bwdRelTableData->hasAdjLists(boundNodeTableID)) { + retVal.push_back(bwdRelTableData->getAdjLists(boundNodeTableID)); } return retVal; } -vector RelTable::getAdjColumnsForNodeTable(table_id_t tableID) { +vector RelTable::getAdjColumnsForNodeTable(table_id_t boundNodeTableID) { vector retVal; - auto it = adjColumns[FWD].find(tableID); - if (it != adjColumns[FWD].end()) { - retVal.push_back(it->second.get()); + if (fwdRelTableData->hasAdjColumn(boundNodeTableID)) { + retVal.push_back(fwdRelTableData->getAdjColumn(boundNodeTableID)); } - it = adjColumns[BWD].find(tableID); - if (it != adjColumns[BWD].end()) { - retVal.push_back(it->second.get()); + if (bwdRelTableData->hasAdjColumn(boundNodeTableID)) { + retVal.push_back(bwdRelTableData->getAdjColumn(boundNodeTableID)); } return retVal; } // Prepares all the db file changes necessary to update the "persistent" store of lists with the -// listsUpdateStore, which stores the updates by the write trx locally. +// listsUpdateStore, which stores the updates by the write transaction locally. void RelTable::prepareCommitOrRollbackIfNecessary(bool isCommit) { - auto& listUpdatesPerDirection = listsUpdateStore->getListUpdatesPerTablePerDirection(); - for (auto& relDirection : REL_DIRECTIONS) { - for (auto& listUpdatesPerTable : listUpdatesPerDirection[relDirection]) { - if (isCommit && !listUpdatesPerTable.second.empty()) { - auto srcTableID = listUpdatesPerTable.first; - auto listsUpdateIterators = getListsUpdateIterators(relDirection, srcTableID); - // Note: In C++ iterating through maps happens in non-descending order of the keys. - // This property is critical when using listsUpdateIterator, which requires the user - // to make calls to writeInMemListToListPages in ascending order of nodeOffsets. - auto& listUpdatesPerChunk = listUpdatesPerTable.second; - for (auto updatedChunkItr = listUpdatesPerChunk.begin(); - updatedChunkItr != listUpdatesPerChunk.end(); ++updatedChunkItr) { - for (auto updatedNodeOffsetItr = updatedChunkItr->second.begin(); - updatedNodeOffsetItr != updatedChunkItr->second.end(); - updatedNodeOffsetItr++) { - auto nodeOffset = updatedNodeOffsetItr->first; - auto& listUpdates = updatedNodeOffsetItr->second; - // Note: An empty listUpdates can exist for a nodeOffset, because we don't - // fix the listUpdates, listUpdatesPerNode and ListUpdatesPerChunk indices - // after we insert or delete a rel. For example: a user inserts 1 rel to - // nodeOffset1, and then deletes that rel. We will end up getting an empty - // listUpdates for nodeOffset1. - if (!listUpdates.hasUpdates()) { - continue; - } - if (listUpdates.newlyAddedNode) { - listsUpdateIterators[0]->updateList(nodeOffset, - *adjLists[relDirection] - .at(srcTableID) - ->getInMemListWithDataFromUpdateStoreOnly( - nodeOffset, listUpdates.insertedRelsTupleIdxInFT)); - for (auto i = 0u; i < propertyLists[relDirection].at(srcTableID).size(); - i++) { - listsUpdateIterators[i + 1]->updateList(nodeOffset, - *propertyLists[relDirection] - .at(srcTableID)[i] - ->getInMemListWithDataFromUpdateStoreOnly( - nodeOffset, listUpdates.insertedRelsTupleIdxInFT)); - } - } else if (ListHeaders::isALargeList(adjLists[relDirection] - .at(srcTableID) - ->getHeaders() - ->headersDiskArray->get(nodeOffset, - TransactionType::READ_ONLY)) && - listUpdates.deletedRelIDs.empty()) { - // We do an optimization for relPropertyList and adjList : - // If the initial list is a largeList and we don't delete any rel from - // the persistentStore, we can simply append the newly inserted rels - // from the relUpdateStore to largeList. In this case, we can skip - // reading the data from persistentStore to InMemList and only need to - // read the data from relUpdateStore to InMemList. - listsUpdateIterators[0]->appendToLargeList(nodeOffset, - *adjLists[relDirection] - .at(srcTableID) - ->getInMemListWithDataFromUpdateStoreOnly( - nodeOffset, listUpdates.insertedRelsTupleIdxInFT)); - for (auto i = 0u; i < propertyLists[relDirection].at(srcTableID).size(); - i++) { - listsUpdateIterators[i + 1]->appendToLargeList(nodeOffset, - *propertyLists[relDirection] - .at(srcTableID)[i] - ->getInMemListWithDataFromUpdateStoreOnly( - nodeOffset, listUpdates.insertedRelsTupleIdxInFT)); - } - } else { - auto deletedRelOffsetsForList = - ((RelIDList*)(propertyLists[relDirection] - .at(srcTableID) - [RelTableSchema::INTERNAL_REL_ID_PROPERTY_IDX] - .get())) - ->getDeletedRelOffsetsInListForNodeOffset(nodeOffset); - listsUpdateIterators[0]->updateList( - nodeOffset, *adjLists[relDirection] - .at(srcTableID) - ->writeToInMemList(nodeOffset, - listUpdates.insertedRelsTupleIdxInFT, - deletedRelOffsetsForList)); - for (auto i = 0u; i < propertyLists[relDirection].at(srcTableID).size(); - i++) { - listsUpdateIterators[i + 1]->updateList( - nodeOffset, *propertyLists[relDirection] - .at(srcTableID)[i] - ->writeToInMemList(nodeOffset, - listUpdates.insertedRelsTupleIdxInFT, - deletedRelOffsetsForList)); - } - } - } - for (auto& listsUpdateIterator : listsUpdateIterators) { - listsUpdateIterator->doneUpdating(); - } - } - } - } + if (isCommit) { + prepareCommitForDirection(FWD); + prepareCommitForDirection(BWD); } if (listsUpdateStore->hasUpdates()) { addToUpdatedRelTables(); @@ -177,29 +272,8 @@ void RelTable::insertRel(const shared_ptr& srcNodeIDVector, auto dstTableID = dstNodeIDVector->getValue(dstNodeIDVector->state->selVector->selectedPositions[0]) .tableID; - for (auto direction : REL_DIRECTIONS) { - auto boundTableID = (direction == RelDirection::FWD ? srcTableID : dstTableID); - auto boundVector = (direction == RelDirection::FWD ? srcNodeIDVector : dstNodeIDVector); - auto nbrVector = (direction == RelDirection::FWD ? dstNodeIDVector : srcNodeIDVector); - if (adjColumns[direction].contains(boundTableID)) { - auto nodeOffset = - boundVector->readNodeOffset(boundVector->state->selVector->selectedPositions[0]); - if (!adjColumns[direction] - .at(boundTableID) - ->isNull(nodeOffset, Transaction::getDummyWriteTrx().get())) { - throw RuntimeException(StringUtils::string_format( - "RelTable %d is a %s table, but node(nodeOffset: %d, tableID: %d) has " - "more than one neighbour in the %s direction.", - tableID, inferRelMultiplicity(srcTableID, dstTableID).c_str(), nodeOffset, - boundTableID, getRelDirectionAsString(direction).c_str())); - } - adjColumns[direction].at(boundTableID)->writeValues(boundVector, nbrVector); - for (auto i = 0; i < relPropertyVectors.size(); i++) { - propertyColumns[direction].at(boundTableID)[i]->writeValues( - boundVector, relPropertyVectors[i]); - } - } - } + fwdRelTableData->insertRel(srcTableID, srcNodeIDVector, dstNodeIDVector, relPropertyVectors); + bwdRelTableData->insertRel(dstTableID, dstNodeIDVector, srcNodeIDVector, relPropertyVectors); listsUpdateStore->insertRelIfNecessary(srcNodeIDVector, dstNodeIDVector, relPropertyVectors); } @@ -213,167 +287,109 @@ void RelTable::deleteRel(const shared_ptr& srcNodeIDVector, auto dstTableID = dstNodeIDVector->getValue(dstNodeIDVector->state->selVector->selectedPositions[0]) .tableID; - for (auto direction : REL_DIRECTIONS) { - auto boundTableID = (direction == RelDirection::FWD ? srcTableID : dstTableID); - auto boundVector = (direction == RelDirection::FWD ? srcNodeIDVector : dstNodeIDVector); - if (adjColumns[direction].contains(boundTableID)) { - auto nodeOffset = - boundVector->readNodeOffset(boundVector->state->selVector->selectedPositions[0]); - adjColumns[direction].at(boundTableID)->setNodeOffsetToNull(nodeOffset); - for (auto i = 0; i < propertyColumns[direction].size(); i++) { - propertyColumns[direction].at(boundTableID)[i]->setNodeOffsetToNull(nodeOffset); - } - } - } + fwdRelTableData->deleteRel(srcTableID, srcNodeIDVector); + bwdRelTableData->deleteRel(dstTableID, dstNodeIDVector); listsUpdateStore->deleteRelIfNecessary(srcNodeIDVector, dstNodeIDVector, relIDVector); } void RelTable::initEmptyRelsForNewNode(nodeID_t& nodeID) { - for (auto direction : REL_DIRECTIONS) { - if (adjColumns[direction].contains(nodeID.tableID)) { - adjColumns[direction].at(nodeID.tableID)->setNodeOffsetToNull(nodeID.offset); - } + if (fwdRelTableData->hasAdjColumn(nodeID.tableID)) { + fwdRelTableData->getAdjColumn(nodeID.tableID)->setNodeOffsetToNull(nodeID.offset); } - listsUpdateStore->initNewlyAddedNodes(nodeID); -} - -void RelTable::initAdjColumnOrLists( - const Catalog& catalog, BufferManager& bufferManager, WAL* wal) { - logger->info("Initializing AdjColumns and AdjLists for rel {}.", tableID); - adjColumns = vector{2}; - adjLists = vector{2}; - for (auto relDirection : REL_DIRECTIONS) { - for (auto& srcDstTableID : - catalog.getReadOnlyVersion()->getRelTableSchema(tableID)->getSrcDstTableIDs()) { - auto boundTableID = relDirection == FWD ? srcDstTableID.first : srcDstTableID.second; - NodeIDCompressionScheme nodeIDCompressionScheme( - catalog.getReadOnlyVersion() - ->getRelTableSchema(tableID) - ->getUniqueNbrTableIDsForBoundTableIDDirection(relDirection, boundTableID)); - if (catalog.getReadOnlyVersion()->isSingleMultiplicityInDirection( - tableID, relDirection)) { - // Add adj column. - auto adjColumn = make_unique( - StorageUtils::getAdjColumnStructureIDAndFName( - wal->getDirectory(), tableID, boundTableID, relDirection), - bufferManager, nodeIDCompressionScheme, isInMemoryMode, wal); - adjColumns[relDirection].emplace(boundTableID, move(adjColumn)); - } else { - // Add adj list. - auto adjList = make_unique( - StorageUtils::getAdjListsStructureIDAndFName( - wal->getDirectory(), tableID, boundTableID, relDirection), - bufferManager, nodeIDCompressionScheme, isInMemoryMode, wal, - listsUpdateStore.get()); - adjLists[relDirection].emplace(boundTableID, move(adjList)); - } - } + if (bwdRelTableData->hasAdjColumn(nodeID.tableID)) { + bwdRelTableData->getAdjColumn(nodeID.tableID)->setNodeOffsetToNull(nodeID.offset); } - logger->info("Initializing AdjColumns and AdjLists for rel {} done.", tableID); + listsUpdateStore->initNewlyAddedNodes(nodeID); } -void RelTable::initPropertyListsAndColumns( - const Catalog& catalog, BufferManager& bufferManager, WAL* wal) { - logger->info("Initializing PropertyLists and PropertyColumns for rel {}.", tableID); - propertyLists = vector{2}; - propertyColumns = vector{2}; - if (!catalog.getReadOnlyVersion()->getRelProperties(tableID).empty()) { - for (auto relDirection : REL_DIRECTIONS) { - if (catalog.getReadOnlyVersion()->isSingleMultiplicityInDirection( - tableID, relDirection)) { - initPropertyColumnsForRelTable(catalog, relDirection, bufferManager, wal); - } else { - initPropertyListsForRelTable(catalog, relDirection, bufferManager, wal); - } - } +void RelTable::performOpOnListsWithUpdates(const std::function& opOnListsWithUpdates, + const std::function& opIfHasUpdates) { + fwdRelTableData->performOpOnListsWithUpdates(opOnListsWithUpdates); + bwdRelTableData->performOpOnListsWithUpdates(opOnListsWithUpdates); + if (listsUpdateStore->hasUpdates()) { + opIfHasUpdates(); } - logger->info("Initializing PropertyLists and PropertyColumns for rel {} Done.", tableID); } -void RelTable::initPropertyColumnsForRelTable( - const Catalog& catalog, RelDirection relDirection, BufferManager& bufferManager, WAL* wal) { - logger->debug("Initializing PropertyColumns: relTable {}", tableID); - for (auto& boundTableID : - catalog.getReadOnlyVersion()->getNodeTableIDsForRelTableDirection(tableID, relDirection)) { - auto& properties = catalog.getReadOnlyVersion()->getRelProperties(tableID); - propertyColumns[relDirection].emplace( - boundTableID, vector>(properties.size())); - for (auto& property : properties) { - propertyColumns[relDirection].at(boundTableID)[property.propertyID] = - ColumnFactory::getColumn( - StorageUtils::getRelPropertyColumnStructureIDAndFName(wal->getDirectory(), - tableID, boundTableID, relDirection, property.propertyID), - property.dataType, bufferManager, isInMemoryMode, wal); - } - } - logger->debug("Initializing PropertyColumns done."); +vector> RelTable::getListsUpdateIterators( + RelDirection relDirection, table_id_t boundNodeTableID) const { + return relDirection == FWD ? fwdRelTableData->getListsUpdateIterators(boundNodeTableID) : + bwdRelTableData->getListsUpdateIterators(boundNodeTableID); } -void RelTable::initPropertyListsForRelTable( - const Catalog& catalog, RelDirection relDirection, BufferManager& bufferManager, WAL* wal) { - logger->debug("Initializing PropertyLists for rel {}", tableID); - for (auto& boundTableID : - catalog.getReadOnlyVersion()->getNodeTableIDsForRelTableDirection(tableID, relDirection)) { - auto& properties = catalog.getReadOnlyVersion()->getRelProperties(tableID); - auto adjListsHeaders = adjLists[relDirection].at(boundTableID)->getHeaders(); - propertyLists[relDirection].emplace( - boundTableID, vector>(properties.size())); - for (auto& property : properties) { - auto propertyID = property.propertyID; - propertyLists[relDirection].at(boundTableID)[property.propertyID] = - ListsFactory::getLists( - StorageUtils::getRelPropertyListsStructureIDAndFName( - wal->getDirectory(), tableID, boundTableID, relDirection, property), - property.dataType, adjListsHeaders, bufferManager, isInMemoryMode, wal, - listsUpdateStore.get()); +void RelTable::prepareCommitForDirection(RelDirection relDirection) { + for (auto& [boundNodeTableID, listsUpdates] : + listsUpdateStore->getListUpdatesPerBoundNodeTableOfDirection(relDirection)) { + if (listsUpdates.empty()) { + continue; } - } - logger->debug("Initializing PropertyLists for rel {} done.", tableID); -} - -void RelTable::performOpOnListsWithUpdates( - std::function opOnListsWithUpdates, std::function opIfHasUpdates) { - auto& listUpdatesPerDirection = listsUpdateStore->getListUpdatesPerTablePerDirection(); - for (auto& relDirection : REL_DIRECTIONS) { - for (auto& listUpdatesPerTable : listUpdatesPerDirection[relDirection]) { - if (!listUpdatesPerTable.second.empty()) { - auto tableID = listUpdatesPerTable.first; - opOnListsWithUpdates(adjLists[relDirection].at(tableID).get()); - for (auto& propertyList : propertyLists[relDirection].at(tableID)) { - opOnListsWithUpdates(propertyList.get()); + auto listsUpdateIterators = getListsUpdateIterators(relDirection, boundNodeTableID); + // Note: call writeInMemListToListPages in ascending order of nodeOffsets is critical here. + for (auto& [chunkId, listsUpdatesOfChunk] : listsUpdates) { + for (auto& [nodeOffset, listUpdatesOfNode] : listsUpdatesOfChunk) { + // Note: An empty listUpdates can exist for a nodeOffset, because we don't fix the + // listUpdates, listUpdatesPerNode and ListUpdatesPerChunk indices after we insert + // or delete a rel. For example: a user inserts 1 rel to nodeOffset1, and then + // deletes that rel. We will end up getting an empty listUpdates for nodeOffset1. + if (!listUpdatesOfNode.hasUpdates()) { + continue; + } + auto adjLists = getAdjLists(relDirection, boundNodeTableID); + if (listUpdatesOfNode.isNewlyAddedNode) { + auto inMemAdjLists = adjLists->createInMemListWithDataFromUpdateStoreOnly( + nodeOffset, listUpdatesOfNode.insertedRelsTupleIdxInFT); + listsUpdateIterators[0]->updateList(nodeOffset, *inMemAdjLists); + auto numPropertyLists = getNumPropertyLists(relDirection, boundNodeTableID); + for (auto i = 0u; i < numPropertyLists; i++) { + auto inMemPropLists = + getPropertyLists(relDirection, boundNodeTableID, i) + ->createInMemListWithDataFromUpdateStoreOnly( + nodeOffset, listUpdatesOfNode.insertedRelsTupleIdxInFT); + listsUpdateIterators[i + 1]->updateList(nodeOffset, *inMemPropLists); + } + // TODO(Guodong): Do we need to access the header in this way? + } else if (ListHeaders::isALargeList(adjLists->getHeaders()->headersDiskArray->get( + nodeOffset, TransactionType::READ_ONLY)) && + listUpdatesOfNode.deletedRelIDs.empty()) { + // We do an optimization for relPropertyList and adjList : If the initial list + // is a largeList and we didn't delete any rel from the persistentStore, we can + // simply append the newly inserted rels from the relUpdateStore to largeList. + // In this case, we can skip reading the data from persistentStore to InMemList + // and only need to read the data from relUpdateStore to InMemList. + auto inMemAdjLists = adjLists->createInMemListWithDataFromUpdateStoreOnly( + nodeOffset, listUpdatesOfNode.insertedRelsTupleIdxInFT); + listsUpdateIterators[0]->appendToLargeList(nodeOffset, *inMemAdjLists); + auto numPropertyLists = getNumPropertyLists(relDirection, boundNodeTableID); + for (auto i = 0u; i < numPropertyLists; i++) { + auto inMemPropLists = + getPropertyLists(relDirection, boundNodeTableID, i) + ->createInMemListWithDataFromUpdateStoreOnly( + nodeOffset, listUpdatesOfNode.insertedRelsTupleIdxInFT); + listsUpdateIterators[i + 1]->appendToLargeList(nodeOffset, *inMemPropLists); + } + } else { + auto relIDLists = (RelIDList*)getPropertyLists(relDirection, boundNodeTableID, + RelTableSchema::INTERNAL_REL_ID_PROPERTY_IDX); + auto deletedRelOffsets = + relIDLists->getDeletedRelOffsetsInListForNodeOffset(nodeOffset); + auto inMemAdjLists = adjLists->writeToInMemList( + nodeOffset, listUpdatesOfNode.insertedRelsTupleIdxInFT, deletedRelOffsets); + listsUpdateIterators[0]->updateList(nodeOffset, *inMemAdjLists); + auto numPropertyLists = getNumPropertyLists(relDirection, boundNodeTableID); + for (auto i = 0u; i < numPropertyLists; i++) { + auto inMemPropLists = + getPropertyLists(relDirection, boundNodeTableID, i) + ->writeToInMemList(nodeOffset, + listUpdatesOfNode.insertedRelsTupleIdxInFT, deletedRelOffsets); + listsUpdateIterators[i + 1]->updateList(nodeOffset, *inMemPropLists); + } } } + for (auto& listsUpdateIterator : listsUpdateIterators) { + listsUpdateIterator->doneUpdating(); + } } } - if (listsUpdateStore->hasUpdates()) { - opIfHasUpdates(); - } -} - -string RelTable::inferRelMultiplicity(table_id_t srcTableID, table_id_t dstTableID) { - auto isFWDColumn = adjColumns[RelDirection::FWD].contains(srcTableID); - auto isBWDColumn = adjColumns[RelDirection::BWD].contains(dstTableID); - if (isFWDColumn && isBWDColumn) { - return "ONE_ONE"; - } else if (isFWDColumn && !isBWDColumn) { - return "MANY_ONE"; - } else if (!isFWDColumn && isBWDColumn) { - return "ONE_MANY"; - } else { - return "MANY_MANY"; - } -} - -vector> RelTable::getListsUpdateIterators( - RelDirection relDirection, table_id_t srcTableID) const { - vector> listsUpdateIterators; - listsUpdateIterators.push_back(ListsUpdateIteratorFactory::getListsUpdateIterator( - adjLists[relDirection].at(srcTableID).get())); - for (auto& propList : propertyLists[relDirection].at(srcTableID)) { - listsUpdateIterators.push_back( - ListsUpdateIteratorFactory::getListsUpdateIterator(propList.get())); - } - return listsUpdateIterators; } } // namespace storage diff --git a/src/storage/wal_replayer.cpp b/src/storage/wal_replayer.cpp index e6b43a63af..8809716ce9 100644 --- a/src/storage/wal_replayer.cpp +++ b/src/storage/wal_replayer.cpp @@ -132,7 +132,7 @@ void WALReplayer::replayWALRecord(WALRecord& walRecord) { case OVERFLOW_FILE_NEXT_BYTE_POS_RECORD: { // If we are recovering we do not replay OVERFLOW_FILE_NEXT_BYTE_POS_RECORD because // this record is intended for rolling back a transaction to ensure that we can - // recover the overflow space allocated for the write trx by calling + // recover the overflow space allocated for the write transaction by calling // DiskOverflowFile::resetNextBytePosToWriteTo(...). However during recovery, storageManager // is null, so we cannot construct this value. if (isRecovering) { @@ -220,7 +220,7 @@ void WALReplayer::replayWALRecord(WALRecord& walRecord) { // files have been changed during checkpoint. So the in memory // fileHandles are obsolete and should be reconstructed (e.g. since the numPages // have likely changed they need to reconstruct their page locks). - storageManager->getNodesStore().getNodeTable(tableID)->loadColumnsAndListsFromDisk( + storageManager->getNodesStore().getNodeTable(tableID)->initializeData( nodeTableSchema, *bufferManager, wal); } else { auto catalogForCheckpointing = make_unique(); @@ -245,8 +245,8 @@ void WALReplayer::replayWALRecord(WALRecord& walRecord) { catalog->getReadOnlyVersion()->getRelTableSchema(tableID), wal->getDirectory(), catalog); // See comments for COPY_NODE_CSV_RECORD. - storageManager->getRelsStore().getRelTable(tableID)->loadColumnsAndListsFromDisk( - *catalog, *bufferManager); + storageManager->getRelsStore().getRelTable(tableID)->initializeData( + catalog->getReadOnlyVersion()->getRelTableSchema(tableID), *bufferManager); storageManager->getNodesStore() .getNodesStatisticsAndDeletedIDs() .setAdjListsAndColumns(&storageManager->getRelsStore()); diff --git a/src/storage/wal_replayer_utils.cpp b/src/storage/wal_replayer_utils.cpp index e54aba9c2f..232ec20f7b 100644 --- a/src/storage/wal_replayer_utils.cpp +++ b/src/storage/wal_replayer_utils.cpp @@ -22,9 +22,9 @@ void WALReplayerUtils::createEmptyDBFilesForNewRelTable(Catalog* catalog, table_ } void WALReplayerUtils::createEmptyDBFilesForNewNodeTable( - Catalog* catalog, table_id_t tableID, string directory) { + Catalog* catalog, table_id_t tableID, const string& directory) { auto nodeTableSchema = catalog->getReadOnlyVersion()->getNodeTableSchema(tableID); - for (auto& property : nodeTableSchema->structuredProperties) { + for (auto& property : nodeTableSchema->properties) { auto fName = StorageUtils::getNodePropertyColumnFName( directory, nodeTableSchema->tableID, property.propertyID, DBFileType::ORIGINAL); InMemColumnFactory::getInMemPropertyColumn(fName, property.dataType, 0 /* numNodes */) @@ -77,7 +77,7 @@ void WALReplayerUtils::createEmptyDBFilesForColumns(const unordered_set& maxNodeOffsetsPerTable, RelDirection relDirection, const string& directory, RelTableSchema* relTableSchema) { for (auto boundTableID : boundTableIDs) { - auto numNodes = maxNodeOffsetsPerTable.at(boundTableID) == UINT64_MAX ? + auto numNodes = maxNodeOffsetsPerTable.at(boundTableID) == INVALID_NODE_OFFSET ? 0 : maxNodeOffsetsPerTable.at(boundTableID) + 1; make_unique( @@ -96,7 +96,7 @@ void WALReplayerUtils::createEmptyDBFilesForLists(const unordered_set& maxNodeOffsetsPerTable, RelDirection relDirection, const string& directory, RelTableSchema* relTableSchema) { for (auto boundTableID : boundTableIDs) { - auto numNodes = maxNodeOffsetsPerTable.at(boundTableID) == UINT64_MAX ? + auto numNodes = maxNodeOffsetsPerTable.at(boundTableID) == INVALID_NODE_OFFSET ? 0 : maxNodeOffsetsPerTable.at(boundTableID) + 1; auto adjLists = make_unique( @@ -112,7 +112,7 @@ void WALReplayerUtils::createEmptyDBFilesForLists(const unordered_set columnFileOperation, +void WALReplayerUtils::fileOperationOnNodeFiles(NodeTableSchema* nodeTableSchema, + const string& directory, std::function columnFileOperation, std::function listFileOperation) { - for (auto& property : nodeTableSchema->structuredProperties) { + for (auto& property : nodeTableSchema->properties) { columnFileOperation(StorageUtils::getNodePropertyColumnFName( directory, nodeTableSchema->tableID, property.propertyID, DBFileType::ORIGINAL)); } @@ -157,8 +158,9 @@ void WALReplayerUtils::fileOperationOnNodeFiles(NodeTableSchema* nodeTableSchema StorageUtils::getNodeIndexFName(directory, nodeTableSchema->tableID, DBFileType::ORIGINAL)); } -void WALReplayerUtils::fileOperationOnRelFiles(RelTableSchema* relTableSchema, string directory, - const Catalog* catalog, std::function columnFileOperation, +void WALReplayerUtils::fileOperationOnRelFiles(RelTableSchema* relTableSchema, + const string& directory, const Catalog* catalog, + std::function columnFileOperation, std::function listFileOperation) { for (auto relDirection : REL_DIRECTIONS) { auto boundTableIDs = catalog->getReadOnlyVersion()->getNodeTableIDsForRelTableDirection( diff --git a/test/include/mock_catalog/mock_catalog.h b/test/include/mock_catalog/mock_catalog.h index c3b4ce2d6a..be1d225381 100644 --- a/test/include/mock_catalog/mock_catalog.h +++ b/test/include/mock_catalog/mock_catalog.h @@ -187,30 +187,30 @@ class TinySnbCatalogContent : public MockCatalogContent { void setSrcNodeTableToRelTables() { unordered_set personToRelTableIDs = {KNOWS_TABLE_ID, WORKAT_TABLE_ID}; unordered_set organisationToRelTableIDs = {}; - srcNodeIDToRelIDs.push_back(move(personToRelTableIDs)); - srcNodeIDToRelIDs.push_back(move(organisationToRelTableIDs)); + srcNodeIDToRelIDs.push_back(std::move(personToRelTableIDs)); + srcNodeIDToRelIDs.push_back(std::move(organisationToRelTableIDs)); } void setDstNodeTableToRelTables() { unordered_set personToRelTableIDs = {KNOWS_TABLE_ID}; unordered_set organisationToRelTableIDs = {WORKAT_TABLE_ID}; - dstNodeIDToRelIDs.push_back(move(personToRelTableIDs)); - dstNodeIDToRelIDs.push_back(move(organisationToRelTableIDs)); + dstNodeIDToRelIDs.push_back(std::move(personToRelTableIDs)); + dstNodeIDToRelIDs.push_back(std::move(organisationToRelTableIDs)); } void setProperties() { PropertyNameDataType agePropertyDefinition(AGE_PROPERTY_KEY_STR, INT64); - ageProperty = Property::constructStructuredNodeProperty( + ageProperty = Property::constructNodeProperty( agePropertyDefinition, AGE_PROPERTY_KEY_ID, PERSON_TABLE_ID); PropertyNameDataType namePropertyDefinition(NAME_PROPERTY_KEY_STR, STRING); - nameProperty = Property::constructStructuredNodeProperty( + nameProperty = Property::constructNodeProperty( namePropertyDefinition, NAME_PROPERTY_KEY_ID, PERSON_TABLE_ID); PropertyNameDataType birthDatePropertyDefinition(BIRTHDATE_PROPERTY_KEY_STR, DATE); - birthDateProperty = Property::constructStructuredNodeProperty( + birthDateProperty = Property::constructNodeProperty( birthDatePropertyDefinition, BIRTHDATE_PROPERTY_KEY_ID, PERSON_TABLE_ID); PropertyNameDataType registerTimePropertyDefinition( REGISTERTIME_PROPERTY_KEY_STR, TIMESTAMP); - registerTimeProperty = Property::constructStructuredNodeProperty( + registerTimeProperty = Property::constructNodeProperty( registerTimePropertyDefinition, REGISTERTIME_PROPERTY_KEY_ID, PERSON_TABLE_ID); PropertyNameDataType descriptionPropertyDefinition(DESCRIPTION_PROPERTY_KEY_STR, STRING); descriptionProperty = Property::constructRelProperty( @@ -245,6 +245,6 @@ class TinySnbCatalog : public Catalog { void setUp() { auto catalogContent = make_unique>(); catalogContent->setUp(); - catalogContentForReadOnlyTrx = move(catalogContent); + catalogContentForReadOnlyTrx = std::move(catalogContent); } }; diff --git a/test/runner/e2e_delete_create_transaction_test.cpp b/test/runner/e2e_delete_create_transaction_test.cpp index 0ff613aa0b..3e565731a7 100644 --- a/test/runner/e2e_delete_create_transaction_test.cpp +++ b/test/runner/e2e_delete_create_transaction_test.cpp @@ -750,16 +750,16 @@ TEST_F(CreateRelTrxTest, ViolateManyOneMultiplicityError) { validateExceptionMessage( "MATCH (p1:person), (p2:person) WHERE p1.ID = 10 AND p2.ID = 10 CREATE " "(p1)-[:teaches]->(p2);", - "Runtime exception: RelTable 5 is a MANY_ONE table, but node(nodeOffset: 10, tableID: 1) " - "has more than one neighbour in the forward direction."); + "Runtime exception: Node(nodeOffset: 10, tableID: 1) in RelTable 5 cannot have more than " + "one neighbour in the forward direction."); } TEST_F(CreateRelTrxTest, ViolateOneOneMultiplicityError) { conn->beginWriteTransaction(); validateExceptionMessage("MATCH (a:animal), (p:person) WHERE a.ID = 10 AND p.ID = 10 CREATE " "(a)-[:hasOwner]->(p);", - "Runtime exception: RelTable 4 is a ONE_ONE table, but node(nodeOffset: 10, tableID: 0) " - "has more than one neighbour in the forward direction."); + "Runtime exception: Node(nodeOffset: 10, tableID: 0) in RelTable 4 cannot have more than " + "one neighbour in the forward direction."); } TEST_F(CreateRelTrxTest, CreateRelToEmptyRelTable) { diff --git a/test/runner/e2e_delete_rel_test.cpp b/test/runner/e2e_delete_rel_test.cpp index 39f3d81fe2..ecfeefd30e 100644 --- a/test/runner/e2e_delete_rel_test.cpp +++ b/test/runner/e2e_delete_rel_test.cpp @@ -86,8 +86,8 @@ class DeleteRelTest : public DBTest { conn->beginWriteTransaction(); // We delete all person->person rels whose dst nodeID offset is between 100-200 (inclusive); for (auto i = 100; i <= 200; i++) { - ASSERT_TRUE(conn->query(getDeleteKnowsRelQuery("person", "person", 0 /* srcID */, i)) - ->isSuccess()); + auto result = conn->query(getDeleteKnowsRelQuery("person", "person", 0 /* srcID */, i)); + ASSERT_TRUE(result->isSuccess()); } commitOrRollbackConnectionAndInitDBIfNecessary(isCommit, transactionTestType); auto result = conn->query("MATCH (p1:person)-[e:knows]->(p2:person) return e.length"); @@ -447,13 +447,13 @@ TEST_F(DeleteRelTest, MixedDeleteAndCreateRels) { ASSERT_EQ( TestHelper::convertResultToString(*conn->query(knowsRelQuery), true /* checkOutputOrder */), expectedResult); - ASSERT_TRUE( - conn->query(getInsertKnowsRelQuery("animal", "person", 7 /* srcID */, 0 /* dstID */)) - ->isSuccess()); + auto result = + conn->query(getInsertKnowsRelQuery("animal", "person", 7 /* srcID */, 0 /* dstID */)); + ASSERT_TRUE(result->isSuccess()); expectedResult = {"0", "1", "2", "3", "4", "5", "6", "7", "9", "10"}; + result = conn->query(knowsRelQuery); ASSERT_EQ( - TestHelper::convertResultToString(*conn->query(knowsRelQuery), true /* checkOutputOrder */), - expectedResult); + TestHelper::convertResultToString(*result, true /* checkOutputOrder */), expectedResult); ASSERT_TRUE( conn->query(getDeleteKnowsRelQuery("animal", "person", 7 /* srcID */, 0 /* dstID */)) ->isSuccess()); diff --git a/test/test_files/tinysnb/agg/simple.test b/test/test_files/tinysnb/agg/simple.test index cbf872b4a6..e83d180fcc 100644 --- a/test/test_files/tinysnb/agg/simple.test +++ b/test/test_files/tinysnb/agg/simple.test @@ -1,6 +1,6 @@ -NAME OneHopSimpleAggTest -QUERY MATCH (a:person)-[:knows]->(b:person)-[:knows]->(c:person) RETURN COUNT(a.ID), MIN(a.fName), MAX(c.ID) ---PARALLELISM 8 +-PARALLELISM 8 -ENUMERATE ---- 1 36|Alice|5 diff --git a/test/test_helper/test_helper.cpp b/test/test_helper/test_helper.cpp index eb8f9c2eb7..0bbda1de22 100644 --- a/test/test_helper/test_helper.cpp +++ b/test/test_helper/test_helper.cpp @@ -151,7 +151,7 @@ void BaseGraphTest::validateListFilesExistence( void BaseGraphTest::validateNodeColumnFilesExistence( NodeTableSchema* nodeTableSchema, DBFileType dbFileType, bool existence) { - for (auto& property : nodeTableSchema->structuredProperties) { + for (auto& property : nodeTableSchema->properties) { validateColumnFilesExistence( StorageUtils::getNodePropertyColumnFName(databaseConfig->databasePath, nodeTableSchema->tableID, property.propertyID, dbFileType),