diff --git a/src/binder/bind/bind_projection_clause.cpp b/src/binder/bind/bind_projection_clause.cpp index d04d9a68916..2d6d247246f 100644 --- a/src/binder/bind/bind_projection_clause.cpp +++ b/src/binder/bind/bind_projection_clause.cpp @@ -26,7 +26,10 @@ unique_ptr Binder::bindReturnClause(const ReturnClause& retur auto projectionBody = returnClause.getProjectionBody(); auto boundProjectionExpressions = bindProjectionExpressions( projectionBody->getProjectionExpressions(), projectionBody->containsStar()); - validateProjectionColumnHasNoInternalType(boundProjectionExpressions); + // TODO(Xiyang): Our id function will return the internal id of a node/rel, however the + // following function disallows the system to return an internal type. Either removes the + // constraints or remove the id function. + // validateProjectionColumnHasNoInternalType(boundProjectionExpressions); // expand node/rel to all of its properties. auto statementResult = make_unique(); for (auto& expression : boundProjectionExpressions) { diff --git a/src/binder/binder.cpp b/src/binder/binder.cpp index 9898fd36195..8c85ccc1673 100644 --- a/src/binder/binder.cpp +++ b/src/binder/binder.cpp @@ -89,7 +89,7 @@ void Binder::validateProjectionColumnNamesAreUnique(const expression_vector& exp } void Binder::validateProjectionColumnHasNoInternalType(const expression_vector& expressions) { - auto internalTypes = unordered_set{NODE_ID}; + auto internalTypes = unordered_set{INTERNAL_ID}; for (auto& expression : expressions) { if (internalTypes.contains(expression->dataType.typeID)) { throw BinderException("Cannot return expression " + expression->getRawName() + diff --git a/src/binder/expression_binder.cpp b/src/binder/expression_binder.cpp index c91b722e778..37d44fcaecc 100644 --- a/src/binder/expression_binder.cpp +++ b/src/binder/expression_binder.cpp @@ -324,7 +324,7 @@ unique_ptr ExpressionBinder::createInternalNodeIDExpression( propertyIDPerTable.insert({tableID, INVALID_PROPERTY_ID}); } auto result = make_unique( - DataType(NODE_ID), INTERNAL_ID_SUFFIX, node, std::move(propertyIDPerTable)); + DataType(INTERNAL_ID), INTERNAL_ID_SUFFIX, node, std::move(propertyIDPerTable)); return result; } diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 9c55c114a9d..79ed41c8c3d 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -225,7 +225,7 @@ table_id_t CatalogContent::addRelTableSchema(string tableName, RelMultiplicity r } vector properties; auto propertyID = 0; - auto propertyNameDataType = PropertyNameDataType(INTERNAL_ID_SUFFIX, INT64); + auto propertyNameDataType = PropertyNameDataType(INTERNAL_ID_SUFFIX, INTERNAL_ID); properties.push_back( Property::constructRelProperty(propertyNameDataType, propertyID++, tableID)); for (auto& propertyDefinition : propertyDefinitions) { diff --git a/src/common/types/types.cpp b/src/common/types/types.cpp index a5151790797..197e5e90ac4 100644 --- a/src/common/types/types.cpp +++ b/src/common/types/types.cpp @@ -54,8 +54,8 @@ DataType Types::dataTypeFromString(const string& dataTypeString) { } DataTypeID Types::dataTypeIDFromString(const std::string& dataTypeIDString) { - if ("NODE_ID" == dataTypeIDString) { - return NODE_ID; + if ("INTERNAL_ID" == dataTypeIDString) { + return INTERNAL_ID; } else if ("INT64" == dataTypeIDString) { return INT64; } else if ("DOUBLE" == dataTypeIDString) { @@ -93,8 +93,8 @@ string Types::dataTypeToString(DataTypeID dataTypeID) { return "NODE"; case REL: return "REL"; - case NODE_ID: - return "NODE_ID"; + case INTERNAL_ID: + return "INTERNAL_ID"; case BOOL: return "BOOL"; case INT64: @@ -138,8 +138,8 @@ string Types::dataTypesToString(const vector& dataTypeIDs) { uint32_t Types::getDataTypeSize(DataTypeID dataTypeID) { switch (dataTypeID) { - case NODE_ID: - return sizeof(nodeID_t); + case INTERNAL_ID: + return sizeof(internalID_t); case BOOL: return sizeof(uint8_t); case INT64: diff --git a/src/common/types/value.cpp b/src/common/types/value.cpp index 51834817b23..8c44a235a1a 100644 --- a/src/common/types/value.cpp +++ b/src/common/types/value.cpp @@ -27,7 +27,7 @@ Value Value::createDefaultValue(const DataType& dataType) { return Value(timestamp_t()); case INTERVAL: return Value(interval_t()); - case NODE_ID: + case INTERNAL_ID: return Value(nodeID_t()); case STRING: return Value(string("")); @@ -67,8 +67,8 @@ Value::Value(kuzu::common::interval_t val_) : dataType{INTERVAL}, isNull_{false} val.intervalVal = val_; } -Value::Value(kuzu::common::nodeID_t val_) : dataType{NODE_ID}, isNull_{false} { - val.nodeIDVal = val_; +Value::Value(kuzu::common::internalID_t val_) : dataType{INTERNAL_ID}, isNull_{false} { + val.internalIDVal = val_; } Value::Value(const char* val_) : dataType{STRING}, isNull_{false} { @@ -121,8 +121,8 @@ void Value::copyValueFrom(const uint8_t* value) { case INTERVAL: { val.intervalVal = *((interval_t*)value); } break; - case NODE_ID: { - val.nodeIDVal = *((nodeID_t*)value); + case INTERNAL_ID: { + val.internalIDVal = *((nodeID_t*)value); } break; case STRING: { strVal = ((ku_string_t*)value)->getAsString(); @@ -162,8 +162,8 @@ void Value::copyValueFrom(const Value& other) { case INTERVAL: { val.intervalVal = other.val.intervalVal; } break; - case NODE_ID: { - val.nodeIDVal = other.val.nodeIDVal; + case INTERNAL_ID: { + val.internalIDVal = other.val.internalIDVal; } break; case STRING: { strVal = other.strVal; @@ -202,8 +202,8 @@ string Value::toString() const { return TypeUtils::toString(val.timestampVal); case INTERVAL: return TypeUtils::toString(val.intervalVal); - case NODE_ID: - return TypeUtils::toString(val.nodeIDVal); + case INTERNAL_ID: + return TypeUtils::toString(val.internalIDVal); case STRING: return strVal; case LIST: { diff --git a/src/function/aggregate_function.cpp b/src/function/aggregate_function.cpp index 3cd926fb6fe..4d8363ccd50 100644 --- a/src/function/aggregate_function.cpp +++ b/src/function/aggregate_function.cpp @@ -96,7 +96,7 @@ unique_ptr AggregateFunctionUtil::getMinMaxFunction( MinMaxFunction::updatePos, MinMaxFunction::combine, MinMaxFunction::finalize, inputType, isDistinct); - case NODE_ID: + case INTERNAL_ID: return make_unique(MinMaxFunction::initialize, MinMaxFunction::updateAll, MinMaxFunction::updatePos, MinMaxFunction::combine, MinMaxFunction::finalize, inputType, diff --git a/src/function/built_in_vector_operations.cpp b/src/function/built_in_vector_operations.cpp index ba84e38499d..4b5b73538a8 100644 --- a/src/function/built_in_vector_operations.cpp +++ b/src/function/built_in_vector_operations.cpp @@ -326,10 +326,10 @@ void BuiltInVectorOperations::registerListOperations() { void BuiltInVectorOperations::registerInternalIDOperation() { vector> definitions; definitions.push_back(make_unique( - ID_FUNC_NAME, vector{NODE}, NODE_ID, nullptr)); + ID_FUNC_NAME, vector{NODE}, INTERNAL_ID, nullptr)); definitions.push_back(make_unique( - ID_FUNC_NAME, vector{REL}, INT64, nullptr)); - vectorOperations.insert({ID_FUNC_NAME, move(definitions)}); + ID_FUNC_NAME, vector{REL}, INTERNAL_ID, nullptr)); + vectorOperations.insert({ID_FUNC_NAME, std::move(definitions)}); } } // namespace function diff --git a/src/function/vector_hash_operations.cpp b/src/function/vector_hash_operations.cpp index a591a7d6a4f..41a172bae35 100644 --- a/src/function/vector_hash_operations.cpp +++ b/src/function/vector_hash_operations.cpp @@ -10,8 +10,8 @@ void VectorHashOperations::computeHash(ValueVector* operand, ValueVector* result result->state = operand->state; assert(result->dataType.typeID == INT64); switch (operand->dataType.typeID) { - case NODE_ID: { - UnaryHashOperationExecutor::execute(*operand, *result); + case INTERNAL_ID: { + UnaryHashOperationExecutor::execute(*operand, *result); } break; case BOOL: { UnaryHashOperationExecutor::execute(*operand, *result); diff --git a/src/include/common/types/node_id_t.h b/src/include/common/types/internal_id_t.h similarity index 54% rename from src/include/common/types/node_id_t.h rename to src/include/common/types/internal_id_t.h index f681583c1a6..144f106dc69 100644 --- a/src/include/common/types/node_id_t.h +++ b/src/include/common/types/internal_id_t.h @@ -5,37 +5,40 @@ namespace kuzu { namespace common { +struct internalID_t; +typedef internalID_t nodeID_t; +typedef internalID_t relID_t; + typedef uint64_t table_id_t; -typedef uint64_t node_offset_t; +typedef uint64_t offset_t; constexpr table_id_t INVALID_TABLE_ID = UINT64_MAX; -constexpr node_offset_t INVALID_NODE_OFFSET = UINT64_MAX; +constexpr offset_t INVALID_NODE_OFFSET = UINT64_MAX; // System representation for nodeID. -struct nodeID_t { - node_offset_t offset; +struct internalID_t { + offset_t offset; table_id_t tableID; - nodeID_t() = default; - explicit inline nodeID_t(node_offset_t _offset, table_id_t tableID) - : offset(_offset), tableID(tableID) {} + internalID_t() = default; + internalID_t(offset_t offset, table_id_t tableID) : offset(offset), tableID(tableID) {} // comparison operators - inline bool operator==(const nodeID_t& rhs) const { + inline bool operator==(const internalID_t& rhs) const { return offset == rhs.offset && tableID == rhs.tableID; }; - inline bool operator!=(const nodeID_t& rhs) const { + inline bool operator!=(const internalID_t& rhs) const { return offset != rhs.offset || tableID != rhs.tableID; }; - inline bool operator>(const nodeID_t& rhs) const { + inline bool operator>(const internalID_t& rhs) const { return (tableID > rhs.tableID) || (tableID == rhs.tableID && offset > rhs.offset); }; - inline bool operator>=(const nodeID_t& rhs) const { + inline bool operator>=(const internalID_t& rhs) const { return (tableID > rhs.tableID) || (tableID == rhs.tableID && offset >= rhs.offset); }; - inline bool operator<(const nodeID_t& rhs) const { + inline bool operator<(const internalID_t& rhs) const { return (tableID < rhs.tableID) || (tableID == rhs.tableID && offset < rhs.offset); }; - inline bool operator<=(const nodeID_t& rhs) const { + inline bool operator<=(const internalID_t& rhs) const { return (tableID < rhs.tableID) || (tableID == rhs.tableID && offset <= rhs.offset); }; }; diff --git a/src/include/common/types/types.h b/src/include/common/types/types.h index d27cd202d3a..2b80db1de7f 100644 --- a/src/include/common/types/types.h +++ b/src/include/common/types/types.h @@ -53,7 +53,7 @@ enum DataTypeID : uint8_t { TIMESTAMP = 26, INTERVAL = 27, - NODE_ID = 40, + INTERNAL_ID = 40, // variable size types STRING = 50, @@ -80,7 +80,7 @@ class DataType { } static inline std::vector getAllValidTypeIDs() { return std::vector{ - NODE_ID, BOOL, INT64, DOUBLE, STRING, DATE, TIMESTAMP, INTERVAL, LIST}; + INTERNAL_ID, BOOL, INT64, DOUBLE, STRING, DATE, TIMESTAMP, INTERVAL, LIST}; } DataType& operator=(const DataType& other); diff --git a/src/include/common/types/types_include.h b/src/include/common/types/types_include.h index a435a3ca91f..bc010a0fec6 100644 --- a/src/include/common/types/types_include.h +++ b/src/include/common/types/types_include.h @@ -2,9 +2,9 @@ #include "date_t.h" #include "dtime_t.h" +#include "internal_id_t.h" #include "interval_t.h" #include "ku_list.h" #include "ku_string.h" -#include "node_id_t.h" #include "timestamp_t.h" #include "types.h" diff --git a/src/include/common/types/value.h b/src/include/common/types/value.h index e775254d65b..f8d38d043b9 100644 --- a/src/include/common/types/value.h +++ b/src/include/common/types/value.h @@ -26,7 +26,7 @@ class Value { explicit Value(date_t val_); explicit Value(timestamp_t val_); explicit Value(interval_t val_); - explicit Value(nodeID_t val_); + explicit Value(internalID_t val_); explicit Value(const char* val_); explicit Value(const string& val_); explicit Value(DataType dataType, vector> vals); @@ -91,7 +91,7 @@ class Value { common::date_t dateVal; common::timestamp_t timestampVal; common::interval_t intervalVal; - common::nodeID_t nodeIDVal; + common::internalID_t internalIDVal; } val; std::string strVal; vector> listVal; @@ -191,8 +191,8 @@ inline interval_t Value::getValue() const { template<> inline nodeID_t Value::getValue() const { - validateType(NODE_ID); - return val.nodeIDVal; + validateType(INTERNAL_ID); + return val.internalIDVal; } template<> @@ -251,8 +251,8 @@ inline interval_t& Value::getValueReference() { template<> inline nodeID_t& Value::getValueReference() { - assert(dataType.typeID == NODE_ID); - return val.nodeIDVal; + assert(dataType.typeID == INTERNAL_ID); + return val.internalIDVal; } template<> diff --git a/src/include/common/vector/value_vector.h b/src/include/common/vector/value_vector.h index 7039a2a9678..e9ec54d7e44 100644 --- a/src/include/common/vector/value_vector.h +++ b/src/include/common/vector/value_vector.h @@ -53,8 +53,8 @@ class ValueVector { inline uint8_t* getData() const { return valueBuffer.get(); } - inline node_offset_t readNodeOffset(uint32_t pos) const { - assert(dataType.typeID == NODE_ID); + inline offset_t readNodeOffset(uint32_t pos) const { + assert(dataType.typeID == INTERNAL_ID); return getValue(pos).offset; } diff --git a/src/include/function/comparison/vector_comparison_operations.h b/src/include/function/comparison/vector_comparison_operations.h index 2ccd77379f7..0ed4785d85f 100644 --- a/src/include/function/comparison/vector_comparison_operations.h +++ b/src/include/function/comparison/vector_comparison_operations.h @@ -18,7 +18,8 @@ class VectorComparisonOperations : public VectorOperations { definitions.push_back(getDefinition(name, leftTypeID, rightTypeID)); } } - for (auto& typeID : vector{BOOL, STRING, NODE_ID, DATE, TIMESTAMP, INTERVAL}) { + for (auto& typeID : + vector{BOOL, STRING, INTERNAL_ID, DATE, TIMESTAMP, INTERVAL}) { definitions.push_back(getDefinition(name, typeID, typeID)); } definitions.push_back(getDefinition(name, DATE, TIMESTAMP)); @@ -75,8 +76,8 @@ class VectorComparisonOperations : public VectorOperations { assert(rightTypeID == STRING); return BinaryExecFunction; } - case NODE_ID: { - assert(rightTypeID == NODE_ID); + case INTERNAL_ID: { + assert(rightTypeID == INTERNAL_ID); return BinaryExecFunction; } case DATE: { @@ -157,8 +158,8 @@ class VectorComparisonOperations : public VectorOperations { assert(rightTypeID == STRING); return BinarySelectFunction; } - case NODE_ID: { - assert(rightTypeID == NODE_ID); + case INTERNAL_ID: { + assert(rightTypeID == INTERNAL_ID); return BinarySelectFunction; } case DATE: { diff --git a/src/include/function/hash/hash_operations.h b/src/include/function/hash/hash_operations.h index c80f534808b..643c3808979 100644 --- a/src/include/function/hash/hash_operations.h +++ b/src/include/function/hash/hash_operations.h @@ -45,6 +45,11 @@ struct CombineHash { } }; +template<> +inline void Hash::operation(const internalID_t& key, hash_t& result) { + result = murmurhash64(key.offset) ^ murmurhash64(key.tableID); +} + template<> inline void Hash::operation(const bool& key, hash_t& result) { result = murmurhash64(key); @@ -96,11 +101,6 @@ inline void Hash::operation(const interval_t& key, hash_t& result) { combineHashScalar(murmurhash64(key.days), murmurhash64(key.micros))); } -template<> -inline void Hash::operation(const nodeID_t& key, hash_t& result) { - result = murmurhash64(key.offset) ^ murmurhash64(key.tableID); -} - template<> inline void Hash::operation(const unordered_set& key, hash_t& result) { for (auto&& s : key) { diff --git a/src/include/processor/operator/scan_node_id.h b/src/include/processor/operator/scan_node_id.h index 86f067f5a39..6135aba3d7b 100644 --- a/src/include/processor/operator/scan_node_id.h +++ b/src/include/processor/operator/scan_node_id.h @@ -36,7 +36,7 @@ struct Mask { struct ScanNodeIDSemiMask { public: - ScanNodeIDSemiMask(node_offset_t maxNodeOffset, uint8_t maskedFlag) { + ScanNodeIDSemiMask(offset_t maxNodeOffset, uint8_t maskedFlag) { nodeMask = make_unique(maxNodeOffset + 1, maskedFlag); morselMask = make_unique((maxNodeOffset >> DEFAULT_VECTOR_CAPACITY_LOG_2) + 1, maskedFlag); @@ -78,7 +78,7 @@ class ScanTableNodeIDSharedState { inline uint8_t getNumMaskers() { return numMaskers; } inline void incrementNumMaskers() { numMaskers++; } - pair getNextRangeToRead(); + pair getNextRangeToRead(); private: NodeTable* table; @@ -107,7 +107,7 @@ class ScanNodeIDSharedState { } } - tuple getNextRangeToRead(); + tuple getNextRangeToRead(); private: mutex mtx; @@ -140,7 +140,7 @@ class ScanNodeID : public PhysicalOperator { void initGlobalStateInternal(ExecutionContext* context) override; void setSelVector( - ScanTableNodeIDSharedState* tableState, node_offset_t startOffset, node_offset_t endOffset); + ScanTableNodeIDSharedState* tableState, offset_t startOffset, offset_t endOffset); private: string nodeID; diff --git a/src/include/processor/operator/var_length_extend/var_length_extend.h b/src/include/processor/operator/var_length_extend/var_length_extend.h index 3f1a5248bb5..31b3fa22d55 100644 --- a/src/include/processor/operator/var_length_extend/var_length_extend.h +++ b/src/include/processor/operator/var_length_extend/var_length_extend.h @@ -12,7 +12,7 @@ namespace processor { struct DFSLevelInfo { DFSLevelInfo(uint8_t level, ExecutionContext& context) : level{level}, hasBeenOutput{false}, children{make_shared( - NODE_ID, context.memoryManager)} {}; + INTERNAL_ID, context.memoryManager)} {}; const uint8_t level; bool hasBeenOutput; shared_ptr children; diff --git a/src/include/storage/copy_arrow/copy_node_arrow.h b/src/include/storage/copy_arrow/copy_node_arrow.h index 5c4e40d9796..7ed48fb750a 100644 --- a/src/include/storage/copy_arrow/copy_node_arrow.h +++ b/src/include/storage/copy_arrow/copy_node_arrow.h @@ -42,7 +42,7 @@ class CopyNodeArrow : public CopyStructuresArrow { template static void populatePKIndex(InMemColumn* column, HashIndexBuilder* pkIndex, - node_offset_t startOffset, uint64_t numValues); + offset_t startOffset, uint64_t numValues); // Concurrent tasks. // Note that primaryKeyPropertyIdx is *NOT* the property ID of the primary key property. diff --git a/src/include/storage/copy_arrow/copy_rel_arrow.h b/src/include/storage/copy_arrow/copy_rel_arrow.h index 16c361dd328..3e7c40c61b0 100644 --- a/src/include/storage/copy_arrow/copy_rel_arrow.h +++ b/src/include/storage/copy_arrow/copy_rel_arrow.h @@ -18,7 +18,7 @@ class CopyRelArrow : public CopyStructuresArrow { public: CopyRelArrow(CopyDescription& copyDescription, string outputDirectory, TaskScheduler& taskScheduler, Catalog& catalog, - map maxNodeOffsetsPerNodeTable, BufferManager* bufferManager, + map maxNodeOffsetsPerNodeTable, BufferManager* bufferManager, table_id_t tableID, RelsStatistics* relsStatistics); ~CopyRelArrow() override = default; @@ -109,17 +109,16 @@ class CopyRelArrow : public CopyStructuresArrow { const vector>& batchColumns, CopyDescription& copyDescription); static void sortOverflowValuesOfPropertyColumnTask(const DataType& dataType, - node_offset_t offsetStart, node_offset_t offsetEnd, InMemColumn* propertyColumn, + offset_t offsetStart, offset_t offsetEnd, InMemColumn* propertyColumn, InMemOverflowFile* unorderedInMemOverflowFile, InMemOverflowFile* orderedInMemOverflowFile); static void sortOverflowValuesOfPropertyListsTask(const DataType& dataType, - node_offset_t offsetStart, node_offset_t offsetEnd, InMemAdjLists* adjLists, + offset_t offsetStart, offset_t offsetEnd, InMemAdjLists* adjLists, InMemLists* propertyLists, InMemOverflowFile* unorderedInMemOverflowFile, InMemOverflowFile* orderedInMemOverflowFile); private: - const map maxNodeOffsetsPerTable; - uint64_t startRelID; + const map maxNodeOffsetsPerTable; RelTableSchema* relTableSchema; RelsStatistics* relsStatistics; unique_ptr dummyReadOnlyTrx; diff --git a/src/include/storage/copy_arrow/copy_structures_arrow.h b/src/include/storage/copy_arrow/copy_structures_arrow.h index 8b3a58f6b6a..c21a599d0fc 100644 --- a/src/include/storage/copy_arrow/copy_structures_arrow.h +++ b/src/include/storage/copy_arrow/copy_structures_arrow.h @@ -38,7 +38,7 @@ class CopyStructuresArrow { // Initializes (in listHeadersBuilder) the header of each list in a Lists structure, from the // listSizes. ListSizes is used to determine if the list is small or large, based on which, // information is encoded in the 4 byte header. - static void calculateListHeadersTask(node_offset_t numNodes, uint32_t elementSize, + static void calculateListHeadersTask(offset_t numNodes, uint32_t elementSize, atomic_uint64_vec_t* listSizes, ListHeadersBuilder* listHeadersBuilder, const shared_ptr& logger); diff --git a/src/include/storage/in_mem_storage_structure/in_mem_column.h b/src/include/storage/in_mem_storage_structure/in_mem_column.h index bf20e7ec176..7ee5963e3e3 100644 --- a/src/include/storage/in_mem_storage_structure/in_mem_column.h +++ b/src/include/storage/in_mem_storage_structure/in_mem_column.h @@ -12,7 +12,7 @@ class InMemColumn; using fill_in_mem_column_function_t = std::function; + PageByteCursor& pageByteCursor, offset_t nodeOffset, const DataType& dataType)>; class InMemColumn { @@ -26,8 +26,8 @@ class InMemColumn { virtual void saveToFile(); - virtual void setElement(node_offset_t offset, const uint8_t* val); - inline uint8_t* getElement(node_offset_t offset) { + virtual void setElement(offset_t offset, const uint8_t* val); + inline uint8_t* getElement(offset_t offset) { auto cursor = getPageElementCursorForOffset(offset); return inMemFile->getPage(cursor.pageIdx)->data + (cursor.elemPosInPage * numBytesForElement); @@ -37,29 +37,29 @@ class InMemColumn { inline DataType getDataType() { return dataType; } - inline bool isNullAtNodeOffset(node_offset_t nodeOffset) { + inline bool isNullAtNodeOffset(offset_t nodeOffset) { auto cursor = getPageElementCursorForOffset(nodeOffset); return inMemFile->getPage(cursor.pageIdx)->isElemPosNull(cursor.elemPosInPage); } protected: - inline PageElementCursor getPageElementCursorForOffset(node_offset_t offset) const { + inline PageElementCursor getPageElementCursorForOffset(offset_t offset) const { return PageElementCursor{ (page_idx_t)(offset / numElementsInAPage), (uint16_t)(offset % numElementsInAPage)}; } private: static inline void fillInMemColumnWithNonOverflowValFunc(InMemColumn* inMemColumn, - uint8_t* defaultVal, PageByteCursor& pageByteCursor, node_offset_t nodeOffset, + uint8_t* defaultVal, PageByteCursor& pageByteCursor, offset_t nodeOffset, const DataType& dataType) { inMemColumn->setElement(nodeOffset, defaultVal); } static void fillInMemColumnWithStrValFunc(InMemColumn* inMemColumn, uint8_t* defaultVal, - PageByteCursor& pageByteCursor, node_offset_t nodeOffset, const DataType& dataType); + PageByteCursor& pageByteCursor, offset_t nodeOffset, const DataType& dataType); static void fillInMemColumnWithListValFunc(InMemColumn* inMemColumn, uint8_t* defaultVal, - PageByteCursor& pageByteCursor, node_offset_t nodeOffset, const DataType& dataType); + PageByteCursor& pageByteCursor, offset_t nodeOffset, const DataType& dataType); static fill_in_mem_column_function_t getFillInMemColumnFunc(const DataType& dataType); @@ -86,18 +86,25 @@ class InMemColumnWithOverflow : public InMemColumn { unique_ptr inMemOverflowFile; }; +class InMemRelIDColumn : public InMemColumn { + +public: + // Note: we only store the rel offset in the rel ID column since all rels in a column must share + // the same relTableID. + InMemRelIDColumn(string fName, uint64_t numElements) + : InMemColumn{std::move(fName), DataType(INTERNAL_ID), sizeof(offset_t), numElements} {} +}; + class InMemAdjColumn : public InMemColumn { public: InMemAdjColumn( string fName, const NodeIDCompressionScheme& nodeIDCompressionScheme, uint64_t numElements) - : InMemColumn{move(fName), DataType(NODE_ID), + : InMemColumn{std::move(fName), DataType(INTERNAL_ID), nodeIDCompressionScheme.getNumBytesForNodeIDAfterCompression(), numElements}, - nodeIDCompressionScheme{nodeIDCompressionScheme} {}; - - ~InMemAdjColumn() override = default; + nodeIDCompressionScheme{nodeIDCompressionScheme} {} - void setElement(node_offset_t offset, const uint8_t* val) override; + void setElement(offset_t offset, const uint8_t* val) override; private: NodeIDCompressionScheme nodeIDCompressionScheme; diff --git a/src/include/storage/in_mem_storage_structure/in_mem_lists.h b/src/include/storage/in_mem_storage_structure/in_mem_lists.h index d4a9e2355ad..c195a8ef72d 100644 --- a/src/include/storage/in_mem_storage_structure/in_mem_lists.h +++ b/src/include/storage/in_mem_storage_structure/in_mem_lists.h @@ -12,9 +12,9 @@ typedef vector> atomic_uint64_vec_t; class InMemLists; class AdjLists; -using fill_in_mem_lists_function_t = std::function; +using fill_in_mem_lists_function_t = + std::function; class InMemListsUtils { @@ -33,7 +33,7 @@ class InMemListsUtils { // Calculates the page id and offset in page where the data of a particular list has to be put // in the in-mem pages. static PageElementCursor calcPageElementCursor(uint32_t header, uint64_t reversePos, - uint8_t numBytesPerElement, node_offset_t nodeOffset, ListsMetadataBuilder& metadataBuilder, + uint8_t numBytesPerElement, offset_t nodeOffset, ListsMetadataBuilder& metadataBuilder, bool hasNULLBytes); }; @@ -48,7 +48,7 @@ class InMemLists { virtual ~InMemLists() = default; virtual void saveToFile(); - virtual void setElement(uint32_t header, node_offset_t nodeOffset, uint64_t pos, uint8_t* val); + virtual void setElement(uint32_t header, offset_t nodeOffset, uint64_t pos, uint8_t* val); virtual inline InMemOverflowFile* getInMemOverflowFile() { return nullptr; } inline ListsMetadataBuilder* getListsMetadataBuilder() { return listsMetadataBuilder.get(); } inline uint8_t* getMemPtrToLoc(uint64_t pageIdx, uint16_t posInPage) { @@ -68,17 +68,17 @@ class InMemLists { uint64_t numElementsInList, uint64_t numElementsPerPage); static inline void fillInMemListsWithNonOverflowValFunc(InMemLists* inMemLists, - uint8_t* defaultVal, PageByteCursor& pageByteCursor, node_offset_t nodeOffset, + uint8_t* defaultVal, PageByteCursor& pageByteCursor, offset_t nodeOffset, list_header_t header, uint64_t posInList, const DataType& dataType) { inMemLists->setElement(header, nodeOffset, posInList, defaultVal); } static void fillInMemListsWithStrValFunc(InMemLists* inMemLists, uint8_t* defaultVal, - PageByteCursor& pageByteCursor, node_offset_t nodeOffset, list_header_t header, + PageByteCursor& pageByteCursor, offset_t nodeOffset, list_header_t header, uint64_t posInList, const DataType& dataType); static void fillInMemListsWithListValFunc(InMemLists* inMemLists, uint8_t* defaultVal, - PageByteCursor& pageByteCursor, node_offset_t nodeOffset, list_header_t header, + PageByteCursor& pageByteCursor, offset_t nodeOffset, list_header_t header, uint64_t posInList, const DataType& dataType); static fill_in_mem_lists_function_t getFillInMemListsFunc(const DataType& dataType); @@ -93,13 +93,17 @@ class InMemLists { unique_ptr listsMetadataBuilder; }; +class InMemRelIDLists : public InMemLists { +public: + InMemRelIDLists(string fName, uint64_t numNodes) + : InMemLists{std::move(fName), DataType{INTERNAL_ID}, sizeof(offset_t), numNodes} {} +}; + class InMemListsWithOverflow : public InMemLists { protected: InMemListsWithOverflow(string fName, DataType dataType, uint64_t numNodes); - ~InMemListsWithOverflow() override = default; - InMemOverflowFile* getInMemOverflowFile() override { return overflowInMemFile.get(); } void saveToFile() override; @@ -112,15 +116,13 @@ class InMemAdjLists : public InMemLists { public: InMemAdjLists( string fName, const NodeIDCompressionScheme& nodeIDCompressionScheme, uint64_t numNodes) - : InMemLists{move(fName), DataType(NODE_ID), + : InMemLists{move(fName), DataType(INTERNAL_ID), nodeIDCompressionScheme.getNumBytesForNodeIDAfterCompression(), numNodes}, nodeIDCompressionScheme{nodeIDCompressionScheme} { listHeadersBuilder = make_unique(this->fName, numNodes); }; - ~InMemAdjLists() override = default; - - void setElement(uint32_t header, node_offset_t nodeOffset, uint64_t pos, uint8_t* val) override; + void setElement(uint32_t header, offset_t nodeOffset, uint64_t pos, uint8_t* val) override; void saveToFile() override; diff --git a/src/include/storage/index/hash_index.h b/src/include/storage/index/hash_index.h index fcde8c0a8e6..d254f15e7d9 100644 --- a/src/include/storage/index/hash_index.h +++ b/src/include/storage/index/hash_index.h @@ -15,9 +15,9 @@ enum class ChainedSlotsAction : uint8_t { LOOKUP_IN_SLOTS, DELETE_IN_SLOTS, FIND template class TemplatedHashIndexLocalStorage { public: - HashIndexLocalLookupState lookup(const T& key, node_offset_t& result); + HashIndexLocalLookupState lookup(const T& key, offset_t& result); void deleteKey(const T& key); - bool insert(const T& key, node_offset_t value); + bool insert(const T& key, offset_t value); inline bool hasUpdates() const { return !(localInsertions.empty() && localDeletions.empty()); } inline void clear() { @@ -25,7 +25,7 @@ class TemplatedHashIndexLocalStorage { localDeletions.clear(); } - unordered_map localInsertions; + unordered_map localInsertions; unordered_set localDeletions; }; @@ -40,11 +40,11 @@ class HashIndexLocalStorage { // never happen concurrently. Thus, lookup requires no local storage lock. Writes are // coordinated to execute in serial with the help of the localStorageMutex. This is a // simplification to the lock scheme, but can be relaxed later if necessary. - HashIndexLocalLookupState lookup(const uint8_t* key, node_offset_t& result); + HashIndexLocalLookupState lookup(const uint8_t* key, offset_t& result); void deleteKey(const uint8_t* key); - bool insert(const uint8_t* key, node_offset_t value); + bool insert(const uint8_t* key, offset_t value); void applyLocalChanges(const std::function& deleteOp, - const std::function& insertOp); + const std::function& insertOp); bool hasUpdates() const; void clear(); @@ -84,9 +84,9 @@ class HashIndex : public BaseHashIndex { const DataType& keyDataType, BufferManager& bufferManager, WAL* wal); public: - bool lookupInternal(Transaction* transaction, const uint8_t* key, node_offset_t& result); + bool lookupInternal(Transaction* transaction, const uint8_t* key, offset_t& result); void deleteInternal(const uint8_t* key) const; - bool insertInternal(const uint8_t* key, node_offset_t value); + bool insertInternal(const uint8_t* key, offset_t value); void prepareCommitOrRollbackIfNecessary(bool isCommit); void checkpointInMemoryIfNecessary(); @@ -96,17 +96,16 @@ class HashIndex : public BaseHashIndex { private: template bool performActionInChainedSlots(TransactionType trxType, HashIndexHeader& header, - SlotInfo& slotInfo, const uint8_t* key, node_offset_t& result); - bool lookupInPersistentIndex( - TransactionType trxType, const uint8_t* key, node_offset_t& result); + SlotInfo& slotInfo, const uint8_t* key, offset_t& result); + bool lookupInPersistentIndex(TransactionType trxType, const uint8_t* key, offset_t& result); // The following two functions are only used in prepareCommit, and are not thread-safe. - void insertIntoPersistentIndex(const uint8_t* key, node_offset_t value); + void insertIntoPersistentIndex(const uint8_t* key, offset_t value); void deleteFromPersistentIndex(const uint8_t* key); - void copyAndUpdateSlotHeader(bool isCopyEntry, Slot& slot, entry_pos_t entryPos, - const uint8_t* key, node_offset_t value); + void copyAndUpdateSlotHeader( + bool isCopyEntry, Slot& slot, entry_pos_t entryPos, const uint8_t* key, offset_t value); void copyKVOrEntryToSlot(bool isCopyEntry, const SlotInfo& slotInfo, Slot& slot, - const uint8_t* key, node_offset_t value); + const uint8_t* key, offset_t value); void splitSlot(HashIndexHeader& header); void rehashSlots(HashIndexHeader& header); vector>> getChainedSlots(slot_id_t pSlotId); @@ -160,20 +159,19 @@ class PrimaryKeyIndex { } } - bool lookup( - Transaction* trx, ValueVector* keyVector, uint64_t vectorPos, node_offset_t& result); + bool lookup(Transaction* trx, ValueVector* keyVector, uint64_t vectorPos, offset_t& result); void deleteKey(ValueVector* keyVector, uint64_t vectorPos); - bool insert(ValueVector* keyVector, uint64_t vectorPos, node_offset_t value); + bool insert(ValueVector* keyVector, uint64_t vectorPos, offset_t value); // These two lookups are used by InMemRelCSVCopier. - inline bool lookup(Transaction* transaction, int64_t key, node_offset_t& result) { + inline bool lookup(Transaction* transaction, int64_t key, offset_t& result) { assert(keyDataTypeID == INT64); return hashIndexForInt64->lookupInternal( transaction, reinterpret_cast(&key), result); } - inline bool lookup(Transaction* transaction, const char* key, node_offset_t& result) { + inline bool lookup(Transaction* transaction, const char* key, offset_t& result) { assert(keyDataTypeID == STRING); return hashIndexForString->lookupInternal( transaction, reinterpret_cast(key), result); @@ -209,11 +207,11 @@ class PrimaryKeyIndex { assert(keyDataTypeID == STRING); hashIndexForString->deleteInternal(reinterpret_cast(key)); } - inline bool insert(int64_t key, node_offset_t value) { + inline bool insert(int64_t key, offset_t value) { assert(keyDataTypeID == INT64); return hashIndexForInt64->insertInternal(reinterpret_cast(&key), value); } - inline bool insert(const char* key, node_offset_t value) { + inline bool insert(const char* key, offset_t value) { assert(keyDataTypeID == STRING); return hashIndexForString->insertInternal(reinterpret_cast(key), value); } diff --git a/src/include/storage/index/hash_index_builder.h b/src/include/storage/index/hash_index_builder.h index 3ff6d928ce6..ec7e5329203 100644 --- a/src/include/storage/index/hash_index_builder.h +++ b/src/include/storage/index/hash_index_builder.h @@ -82,13 +82,13 @@ class HashIndexBuilder : public BaseHashIndex { // Note: append assumes that bulkRserve has been called before it and the index has reserved // enough space already. - inline bool append(int64_t key, node_offset_t value) { + inline bool append(int64_t key, offset_t value) { return appendInternal(reinterpret_cast(&key), value); } - inline bool append(const char* key, node_offset_t value) { + inline bool append(const char* key, offset_t value) { return appendInternal(reinterpret_cast(key), value); } - inline bool lookup(int64_t key, node_offset_t& result) { + inline bool lookup(int64_t key, offset_t& result) { return lookupInternalWithoutLock(reinterpret_cast(&key), result); } @@ -96,13 +96,13 @@ class HashIndexBuilder : public BaseHashIndex { void flush(); private: - bool appendInternal(const uint8_t* key, node_offset_t value); - bool lookupInternalWithoutLock(const uint8_t* key, node_offset_t& result); + bool appendInternal(const uint8_t* key, offset_t value); + bool lookupInternalWithoutLock(const uint8_t* key, offset_t& result); template bool lookupOrExistsInSlotWithoutLock( - Slot* slot, const uint8_t* key, node_offset_t* result = nullptr); - void insertToSlotWithoutLock(Slot* slot, const uint8_t* key, node_offset_t value); + Slot* slot, const uint8_t* key, offset_t* result = nullptr); + void insertToSlotWithoutLock(Slot* slot, const uint8_t* key, offset_t value); Slot* getSlot(const SlotInfo& slotInfo); uint32_t allocatePSlots(uint32_t numSlotsToAllocate); uint32_t allocateAOSlot(); diff --git a/src/include/storage/index/hash_index_header.h b/src/include/storage/index/hash_index_header.h index 468322112be..8b6bf0b3e99 100644 --- a/src/include/storage/index/hash_index_header.h +++ b/src/include/storage/index/hash_index_header.h @@ -11,8 +11,8 @@ class HashIndexHeader { explicit HashIndexHeader(common::DataTypeID keyDataTypeID) : currentLevel{1}, levelHashMask{1}, higherLevelHashMask{3}, nextSplitSlotId{0}, numEntries{0}, numBytesPerKey{common::Types::getDataTypeSize(keyDataTypeID)}, - numBytesPerEntry{(uint32_t)( - common::Types::getDataTypeSize(keyDataTypeID) + sizeof(common::node_offset_t))}, + numBytesPerEntry{ + (uint32_t)(common::Types::getDataTypeSize(keyDataTypeID) + sizeof(common::offset_t))}, keyDataTypeID{keyDataTypeID} {} // Used for element initialization in disk array only. diff --git a/src/include/storage/index/hash_index_slot.h b/src/include/storage/index/hash_index_slot.h index 2da1ae97924..834b33c6eed 100644 --- a/src/include/storage/index/hash_index_slot.h +++ b/src/include/storage/index/hash_index_slot.h @@ -3,8 +3,8 @@ #include #include "common/configs.h" +#include "common/types/internal_id_t.h" #include "common/types/ku_string.h" -#include "common/types/node_id_t.h" namespace kuzu { namespace storage { @@ -40,7 +40,7 @@ class SlotHeader { template struct SlotEntry { - uint8_t data[sizeof(T) + sizeof(common::node_offset_t)]; + uint8_t data[sizeof(T) + sizeof(common::offset_t)]; }; template diff --git a/src/include/storage/index/hash_index_utils.h b/src/include/storage/index/hash_index_utils.h index e4ee6c380b4..4af6c9da0e5 100644 --- a/src/include/storage/index/hash_index_utils.h +++ b/src/include/storage/index/hash_index_utils.h @@ -12,7 +12,7 @@ namespace kuzu { namespace storage { using insert_function_t = - std::function; + std::function; using hash_function_t = std::function; using equals_function_t = std::function; @@ -21,7 +21,7 @@ static const uint32_t NUM_BYTES_FOR_INT64_KEY = Types::getDataTypeSize(INT64); static const uint32_t NUM_BYTES_FOR_STRING_KEY = Types::getDataTypeSize(STRING); using in_mem_insert_function_t = - std::function; + std::function; using in_mem_equals_function_t = std::function; @@ -32,16 +32,16 @@ class InMemHashIndexUtils { private: // InsertFunc - inline static void insertFuncForInt64(const uint8_t* key, node_offset_t offset, uint8_t* entry, + inline static void insertFuncForInt64(const uint8_t* key, offset_t offset, uint8_t* entry, InMemOverflowFile* inMemOverflowFile = nullptr) { memcpy(entry, key, NUM_BYTES_FOR_INT64_KEY); - memcpy(entry + NUM_BYTES_FOR_INT64_KEY, &offset, sizeof(node_offset_t)); + memcpy(entry + NUM_BYTES_FOR_INT64_KEY, &offset, sizeof(offset_t)); } - inline static void insertFuncForString(const uint8_t* key, node_offset_t offset, uint8_t* entry, - InMemOverflowFile* inMemOverflowFile) { + inline static void insertFuncForString( + const uint8_t* key, offset_t offset, uint8_t* entry, InMemOverflowFile* inMemOverflowFile) { auto kuString = inMemOverflowFile->appendString(reinterpret_cast(key)); memcpy(entry, &kuString, NUM_BYTES_FOR_STRING_KEY); - memcpy(entry + NUM_BYTES_FOR_STRING_KEY, &offset, sizeof(node_offset_t)); + memcpy(entry + NUM_BYTES_FOR_STRING_KEY, &offset, sizeof(offset_t)); } inline static bool equalsFuncForInt64(const uint8_t* keyToLookup, const uint8_t* keyInEntry, const InMemOverflowFile* inMemOverflowFile = nullptr) { @@ -55,16 +55,16 @@ class HashIndexUtils { public: // InsertFunc - inline static void insertFuncForInt64(const uint8_t* key, node_offset_t offset, uint8_t* entry, + inline static void insertFuncForInt64(const uint8_t* key, offset_t offset, uint8_t* entry, DiskOverflowFile* overflowFile = nullptr) { memcpy(entry, key, NUM_BYTES_FOR_INT64_KEY); - memcpy(entry + NUM_BYTES_FOR_INT64_KEY, &offset, sizeof(node_offset_t)); + memcpy(entry + NUM_BYTES_FOR_INT64_KEY, &offset, sizeof(offset_t)); } inline static void insertFuncForString( - const uint8_t* key, node_offset_t offset, uint8_t* entry, DiskOverflowFile* overflowFile) { + const uint8_t* key, offset_t offset, uint8_t* entry, DiskOverflowFile* overflowFile) { auto kuString = overflowFile->writeString((const char*)key); memcpy(entry, &kuString, NUM_BYTES_FOR_STRING_KEY); - memcpy(entry + NUM_BYTES_FOR_STRING_KEY, &offset, sizeof(node_offset_t)); + memcpy(entry + NUM_BYTES_FOR_STRING_KEY, &offset, sizeof(offset_t)); } static insert_function_t initializeInsertFunc(DataTypeID dataTypeID); diff --git a/src/include/storage/node_id_compression_scheme.h b/src/include/storage/node_id_compression_scheme.h index 09b9fb8ccc9..b11a26a6769 100644 --- a/src/include/storage/node_id_compression_scheme.h +++ b/src/include/storage/node_id_compression_scheme.h @@ -16,8 +16,8 @@ class NodeIDCompressionScheme { } inline uint64_t getNumBytesForNodeIDAfterCompression() const { - return commonTableID == INVALID_TABLE_ID ? Types::getDataTypeSize(NODE_ID) : - sizeof(node_offset_t); + return commonTableID == INVALID_TABLE_ID ? Types::getDataTypeSize(INTERNAL_ID) : + sizeof(offset_t); } void readNodeID(uint8_t* data, nodeID_t* nodeID) const; diff --git a/src/include/storage/storage_structure/column.h b/src/include/storage/storage_structure/column.h index 0565af7ef91..b7df63bdab1 100644 --- a/src/include/storage/storage_structure/column.h +++ b/src/include/storage/storage_structure/column.h @@ -32,9 +32,9 @@ class Column : public BaseColumnOrList { const shared_ptr& vectorToWriteFrom); // Currently, used only in CopyCSV tests. - virtual Value readValue(node_offset_t offset); - bool isNull(node_offset_t nodeOffset, Transaction* transaction); - void setNodeOffsetToNull(node_offset_t nodeOffset); + virtual Value readValue(offset_t offset); + bool isNull(offset_t nodeOffset, Transaction* transaction); + void setNodeOffsetToNull(offset_t nodeOffset); protected: void lookup(Transaction* transaction, const shared_ptr& nodeIDVector, @@ -50,9 +50,9 @@ class Column : public BaseColumnOrList { const shared_ptr& resultVector, PageElementCursor& cursor) { readBySequentialCopyWithSelState(transaction, resultVector, cursor, identityMapper); } - virtual void writeValueForSingleNodeIDPosition(node_offset_t nodeOffset, + virtual void writeValueForSingleNodeIDPosition(offset_t nodeOffset, const shared_ptr& vectorToWriteFrom, uint32_t posInVectorToWriteFrom); - WALPageIdxPosInPageAndFrame beginUpdatingPage(node_offset_t nodeOffset, + WALPageIdxPosInPageAndFrame beginUpdatingPage(offset_t nodeOffset, const shared_ptr& vectorToWriteFrom, uint32_t posInVectorToWriteFrom); private: @@ -71,7 +71,7 @@ class Column : public BaseColumnOrList { // Note that caller must ensure to unpin and release the WAL version of the page by calling // StorageStructure::unpinWALPageAndReleaseOriginalPageLock. WALPageIdxPosInPageAndFrame beginUpdatingPageAndWriteOnlyNullBit( - node_offset_t nodeOffset, bool isNull); + offset_t nodeOffset, bool isNull); protected: // no logical-physical page mapping is required for columns @@ -108,11 +108,11 @@ class StringPropertyColumn : public PropertyColumnWithOverflow { : PropertyColumnWithOverflow{ structureIDAndFNameOfMainColumn, dataType, bufferManager, isInMemory, wal} {}; - void writeValueForSingleNodeIDPosition(node_offset_t nodeOffset, + void writeValueForSingleNodeIDPosition(offset_t nodeOffset, const shared_ptr& vectorToWriteFrom, uint32_t posInVectorToWriteFrom) override; // Currently, used only in CopyCSV tests. - Value readValue(node_offset_t offset) override; + Value readValue(offset_t offset) override; private: inline void lookup(Transaction* transaction, const shared_ptr& resultVector, @@ -143,10 +143,10 @@ class ListPropertyColumn : public PropertyColumnWithOverflow { : PropertyColumnWithOverflow{ structureIDAndFNameOfMainColumn, dataType, bufferManager, isInMemory, wal} {}; - void writeValueForSingleNodeIDPosition(node_offset_t nodeOffset, + void writeValueForSingleNodeIDPosition(offset_t nodeOffset, const shared_ptr& vectorToWriteFrom, uint32_t posInVectorToWriteFrom) override; - Value readValue(node_offset_t offset) override; + Value readValue(offset_t offset) override; private: inline void lookup(Transaction* transaction, const shared_ptr& resultVector, @@ -169,12 +169,57 @@ class ListPropertyColumn : public PropertyColumnWithOverflow { } }; +class RelIDColumn : public Column { + +public: + RelIDColumn(const StorageStructureIDAndFName& structureIDAndFName, BufferManager& bufferManager, + bool isInMemory, WAL* wal) + : Column{structureIDAndFName, DataType(INTERNAL_ID), sizeof(offset_t), bufferManager, + isInMemory, wal}, + commonTableID{structureIDAndFName.storageStructureID.columnFileID.relPropertyColumnID + .relNodeTableAndDir.relTableID} { + assert(structureIDAndFName.storageStructureID.columnFileID.columnType == + ColumnType::REL_PROPERTY_COLUMN); + assert(structureIDAndFName.storageStructureID.storageStructureType == + StorageStructureType::COLUMN); + } + +private: + inline void lookup(Transaction* transaction, const shared_ptr& resultVector, + uint32_t vectorPos, PageElementCursor& cursor) override { + auto set_vec_null_func = + BaseColumnOrList::getSetVectorNullFunc(false /* hasNoNullGuarantee */); + readRelIDsFromAPageBySequentialCopy(transaction, resultVector, vectorPos, cursor.pageIdx, + cursor.elemPosInPage, 1 /* numValuesToCopy */, commonTableID, set_vec_null_func); + } + inline void scan(Transaction* transaction, const shared_ptr& resultVector, + PageElementCursor& cursor) override { + readRelIDsBySequentialCopy(transaction, resultVector, cursor, identityMapper, commonTableID, + false /* hasNoNullGuarantee */); + } + inline void scanWithSelState(Transaction* transaction, + const shared_ptr& resultVector, PageElementCursor& cursor) override { + readRelIDsBySequentialCopyWithSelState( + transaction, resultVector, cursor, identityMapper, commonTableID); + } + inline void writeToPage(WALPageIdxPosInPageAndFrame& walPageInfo, + const shared_ptr& vectorToWriteFrom, + uint32_t posInVectorToWriteFrom) override { + auto relID = vectorToWriteFrom->getValue(posInVectorToWriteFrom); + memcpy(walPageInfo.frame + mapElementPosToByteOffset(walPageInfo.posInPage), &relID.offset, + sizeof(relID.offset)); + } + +private: + table_id_t commonTableID; +}; + class AdjColumn : public Column { public: AdjColumn(const StorageStructureIDAndFName& structureIDAndFName, BufferManager& bufferManager, const NodeIDCompressionScheme& nodeIDCompressionScheme, bool isInMemory, WAL* wal) - : Column{structureIDAndFName, DataType(NODE_ID), + : Column{structureIDAndFName, DataType(INTERNAL_ID), nodeIDCompressionScheme.getNumBytesForNodeIDAfterCompression(), bufferManager, isInMemory, wal}, nodeIDCompressionScheme(nodeIDCompressionScheme){}; @@ -184,12 +229,12 @@ class AdjColumn : public Column { uint32_t vectorPos, PageElementCursor& cursor) override { readNodeIDsFromAPageBySequentialCopy(transaction, resultVector, vectorPos, cursor.pageIdx, cursor.elemPosInPage, 1 /* numValuesToCopy */, nodeIDCompressionScheme, - false /*isAdjLists*/); + BaseColumnOrList::getSetVectorNullFunc(false /* hasNoNullGuarantee */)); } inline void scan(Transaction* transaction, const shared_ptr& resultVector, PageElementCursor& cursor) override { readNodeIDsBySequentialCopy(transaction, resultVector, cursor, identityMapper, - nodeIDCompressionScheme, false /*isAdjLists*/); + nodeIDCompressionScheme, false /* hasNoNullGuarantee */); } inline void scanWithSelState(Transaction* transaction, const shared_ptr& resultVector, PageElementCursor& cursor) override { @@ -228,6 +273,12 @@ class ColumnFactory { case LIST: return make_unique( structureIDAndFName, dataType, bufferManager, isInMemory, wal); + case INTERNAL_ID: + assert(structureIDAndFName.storageStructureID.storageStructureType == + StorageStructureType::COLUMN); + assert(structureIDAndFName.storageStructureID.columnFileID.columnType == + ColumnType::REL_PROPERTY_COLUMN); + return make_unique(structureIDAndFName, bufferManager, isInMemory, wal); default: throw StorageException("Invalid type for property column creation."); } diff --git a/src/include/storage/storage_structure/in_mem_page.h b/src/include/storage/storage_structure/in_mem_page.h index ddb9014da23..85d25551ceb 100644 --- a/src/include/storage/storage_structure/in_mem_page.h +++ b/src/include/storage/storage_structure/in_mem_page.h @@ -5,7 +5,6 @@ #include "common/configs.h" #include "common/null_mask.h" -#include "common/types/node_id_t.h" #include "storage/node_id_compression_scheme.h" using namespace kuzu::common; diff --git a/src/include/storage/storage_structure/lists/list_handle.h b/src/include/storage/storage_structure/lists/list_handle.h index dc2b2255cef..00aa7823d13 100644 --- a/src/include/storage/storage_structure/lists/list_handle.h +++ b/src/include/storage/storage_structure/lists/list_handle.h @@ -1,6 +1,5 @@ #pragma once -#include "common/types/node_id_t.h" #include "common/types/types.h" #include "storage/storage_structure/lists/list_headers.h" #include "storage/storage_structure/lists/lists_metadata.h" @@ -52,7 +51,7 @@ class ListSyncState { } private: - node_offset_t boundNodeOffset; + offset_t boundNodeOffset; list_header_t listHeader; uint32_t numValuesInUpdateStore; uint32_t numValuesInPersistentStore; @@ -65,7 +64,7 @@ struct ListHandle { explicit ListHandle(ListSyncState& listSyncState) : listSyncState{listSyncState} {} static inline std::function getPageMapper( - ListsMetadata& listMetadata, list_header_t listHeader, node_offset_t nodeOffset) { + ListsMetadata& listMetadata, list_header_t listHeader, offset_t nodeOffset) { return ListHeaders::isALargeList(listHeader) ? listMetadata.getPageMapperForLargeListIdx( ListHeaders::getLargeListIdx(listHeader)) : @@ -84,7 +83,7 @@ struct ListHandle { getPageMapper(listMetadata, listSyncState.listHeader, listSyncState.boundNodeOffset); } inline void resetSyncState() { listSyncState.resetState(); } - inline void initSyncState(node_offset_t boundNodeOffset, list_header_t listHeader, + inline void initSyncState(offset_t boundNodeOffset, list_header_t listHeader, uint64_t numValuesInUpdateStore, uint64_t numValuesInPersistentStore, ListSourceStore sourceStore) { listSyncState.boundNodeOffset = boundNodeOffset; @@ -94,7 +93,7 @@ struct ListHandle { listSyncState.sourceStore = sourceStore; } inline list_header_t getListHeader() const { return listSyncState.listHeader; } - inline node_offset_t getBoundNodeOffset() const { return listSyncState.boundNodeOffset; } + inline offset_t getBoundNodeOffset() const { return listSyncState.boundNodeOffset; } inline ListSourceStore getListSourceStore() { return listSyncState.sourceStore; } inline uint32_t getStartElemOffset() const { return listSyncState.startElemOffset; } inline uint32_t getEndElemOffset() const { diff --git a/src/include/storage/storage_structure/lists/list_headers.h b/src/include/storage/storage_structure/lists/list_headers.h index 058241048cb..9cb18e4d024 100644 --- a/src/include/storage/storage_structure/lists/list_headers.h +++ b/src/include/storage/storage_structure/lists/list_headers.h @@ -99,9 +99,9 @@ class ListHeadersBuilder : public BaseListHeaders { public: explicit ListHeadersBuilder(const string& baseListFName, uint64_t numElements); - inline list_header_t getHeader(node_offset_t offset) { return (*headersBuilder)[offset]; }; + inline list_header_t getHeader(offset_t offset) { return (*headersBuilder)[offset]; }; - inline void setHeader(node_offset_t offset, list_header_t header) { + inline void setHeader(offset_t offset, list_header_t header) { (*headersBuilder)[offset] = header; } void saveToDisk(); @@ -117,7 +117,7 @@ class ListHeaders : public BaseListHeaders { explicit ListHeaders(const StorageStructureIDAndFName storageStructureIDAndFNameForBaseList, BufferManager* bufferManager, WAL* wal); - inline list_header_t getHeader(node_offset_t offset) { return (*headersDiskArray)[offset]; }; + inline list_header_t getHeader(offset_t offset) { return (*headersDiskArray)[offset]; }; inline VersionedFileHandle* getFileHandle() { return versionedFileHandle.get(); } diff --git a/src/include/storage/storage_structure/lists/lists.h b/src/include/storage/storage_structure/lists/lists.h index 6311f94418c..b2f1cfb87e6 100644 --- a/src/include/storage/storage_structure/lists/lists.h +++ b/src/include/storage/storage_structure/lists/lists.h @@ -52,18 +52,18 @@ class Lists : public BaseColumnOrList { inline ListsMetadata& getListsMetadata() { return metadata; }; inline shared_ptr getHeaders() const { return headers; }; // TODO(Guodong): change the input to header. - inline uint64_t getNumElementsFromListHeader(node_offset_t nodeOffset) const { + inline uint64_t getNumElementsFromListHeader(offset_t nodeOffset) const { auto header = headers->getHeader(nodeOffset); return ListHeaders::isALargeList(header) ? metadata.getNumElementsInLargeLists(ListHeaders::getLargeListIdx(header)) : ListHeaders::getSmallListLen(header); } - inline uint64_t getNumElementsInListsUpdatesStore(node_offset_t nodeOffset) { + inline uint64_t getNumElementsInListsUpdatesStore(offset_t nodeOffset) { return listsUpdatesStore->getNumInsertedRelsForNodeOffset( storageStructureIDAndFName.storageStructureID.listFileID, nodeOffset); } inline uint64_t getTotalNumElementsInList( - TransactionType transactionType, node_offset_t nodeOffset) { + TransactionType transactionType, offset_t nodeOffset) { return getNumElementsInPersistentStore(transactionType, nodeOffset) + (transactionType == TransactionType::WRITE ? getNumElementsInListsUpdatesStore(nodeOffset) - @@ -88,19 +88,18 @@ class Lists : public BaseColumnOrList { virtual void readFromLargeList( const shared_ptr& valueVector, ListHandle& listHandle); void readFromList(const shared_ptr& valueVector, ListHandle& listHandle); - uint64_t getNumElementsInPersistentStore( - TransactionType transactionType, node_offset_t nodeOffset); + uint64_t getNumElementsInPersistentStore(TransactionType transactionType, offset_t nodeOffset); void initListReadingState( - node_offset_t nodeOffset, ListHandle& listHandle, TransactionType transactionType); + offset_t nodeOffset, ListHandle& listHandle, TransactionType transactionType); unique_ptr createInMemListWithDataFromUpdateStoreOnly( - node_offset_t nodeOffset, vector& insertedRelsTupleIdxInFT); + offset_t nodeOffset, vector& insertedRelsTupleIdxInFT); // This function writes the persistent store data (skipping over the deleted rels) and update // store data to the inMemList. - unique_ptr writeToInMemList(node_offset_t nodeOffset, + unique_ptr writeToInMemList(offset_t nodeOffset, const vector& insertedRelTupleIdxesInFT, const unordered_set& deletedRelOffsetsForList, UpdatedPersistentListOffsets* updatedPersistentListOffsets); - void fillInMemListsFromPersistentStore(node_offset_t nodeOffset, + void fillInMemListsFromPersistentStore(offset_t nodeOffset, uint64_t numElementsInPersistentStore, InMemList& inMemList, const unordered_set& deletedRelOffsetsInList, UpdatedPersistentListOffsets* updatedPersistentListOffsets = nullptr); @@ -189,7 +188,7 @@ class AdjLists : public Lists { AdjLists(const StorageStructureIDAndFName& storageStructureIDAndFName, BufferManager& bufferManager, NodeIDCompressionScheme nodeIDCompressionScheme, bool isInMemory, WAL* wal, ListsUpdatesStore* listsUpdatesStore) - : Lists{storageStructureIDAndFName, DataType(NODE_ID), + : Lists{storageStructureIDAndFName, DataType(INTERNAL_ID), nodeIDCompressionScheme.getNumBytesForNodeIDAfterCompression(), make_shared(storageStructureIDAndFName, &bufferManager, wal), bufferManager, false /* hasNullBytes */, isInMemory, wal, listsUpdatesStore}, @@ -201,7 +200,7 @@ class AdjLists : public Lists { ListHandle& listHandle) override; // Currently, used only in copyCSV tests. - unique_ptr> readAdjacencyListOfNode(node_offset_t nodeOffset); + unique_ptr> readAdjacencyListOfNode(offset_t nodeOffset); void checkpointInMemoryIfNecessary() override { headers->checkpointInMemoryIfNecessary(); @@ -235,16 +234,35 @@ class RelIDList : public Lists { public: RelIDList(const StorageStructureIDAndFName& storageStructureIDAndFName, - const DataType& dataType, const size_t& elementSize, shared_ptr headers, - BufferManager& bufferManager, bool isInMemory, WAL* wal, + shared_ptr headers, BufferManager& bufferManager, bool isInMemory, WAL* wal, ListsUpdatesStore* listsUpdatesStore) - : Lists{storageStructureIDAndFName, dataType, elementSize, std::move(headers), - bufferManager, isInMemory, wal, listsUpdatesStore} {} + : Lists{storageStructureIDAndFName, DataType{INTERNAL_ID}, sizeof(offset_t), + std::move(headers), bufferManager, isInMemory, wal, listsUpdatesStore} {} + void setDeletedRelsIfNecessary(Transaction* transaction, ListHandle& listHandle, const shared_ptr& relIDVector) override; - unordered_set getDeletedRelOffsetsInListForNodeOffset(node_offset_t nodeOffset); - list_offset_t getListOffset(node_offset_t nodeOffset, int64_t relID); -}; + + unordered_set getDeletedRelOffsetsInListForNodeOffset(offset_t nodeOffset); + + list_offset_t getListOffset(offset_t nodeOffset, offset_t relIDOffset); + + // void readValues(Transaction* transaction, const shared_ptr& valueVector, + // ListHandle& listHandle) override; + + void readFromSmallList( + const shared_ptr& valueVector, ListHandle& listHandle) override; + + void readFromLargeList( + const shared_ptr& valueVector, ListHandle& listHandle) override; + +private: + inline bool mayContainNulls() const override { return false; } + + inline table_id_t getRelTableID() const { + return storageStructureIDAndFName.storageStructureID.listFileID.relPropertyListID + .relNodeTableAndDir.relTableID; + } +}; // namespace storage class ListsFactory { @@ -254,15 +272,6 @@ class ListsFactory { BufferManager& bufferManager, bool isInMemory, WAL* wal, ListsUpdatesStore* listsUpdatesStore) { assert(listsUpdatesStore != nullptr); - // TODO(Ziyi): this is a super hacky design. Consider storing a relIDColumn/List in relTable - // just like adjColumn/List and we can have Extend read from both relIDColumn/List and - // adjColumn/List. - if (structureIDAndFName.storageStructureID.listFileID.relPropertyListID.propertyID == - RelTableSchema::INTERNAL_REL_ID_PROPERTY_IDX) { - return make_unique(structureIDAndFName, dataType, - Types::getDataTypeSize(dataType), adjListsHeaders, bufferManager, isInMemory, wal, - listsUpdatesStore); - } switch (dataType.typeID) { case INT64: case DOUBLE: @@ -279,6 +288,12 @@ class ListsFactory { case LIST: return make_unique(structureIDAndFName, dataType, adjListsHeaders, bufferManager, isInMemory, wal, listsUpdatesStore); + case INTERNAL_ID: + // TODO(Ziyi): this is a super hacky design. Consider storing a relIDColumn/List in + // relTable just like adjColumn/List and we can have Extend read from both + // relIDColumn/List and adjColumn/List. + return make_unique(structureIDAndFName, adjListsHeaders, bufferManager, + isInMemory, wal, listsUpdatesStore); default: throw StorageException("Invalid type for property list creation."); } diff --git a/src/include/storage/storage_structure/lists/lists_update_iterator.h b/src/include/storage/storage_structure/lists/lists_update_iterator.h index 24f306797f0..40807865be9 100644 --- a/src/include/storage/storage_structure/lists/lists_update_iterator.h +++ b/src/include/storage/storage_structure/lists/lists_update_iterator.h @@ -43,9 +43,9 @@ class ListsUpdateIterator { virtual ~ListsUpdateIterator() { assert(finishCalled); } - void updateList(node_offset_t nodeOffset, InMemList& inMemList); + void updateList(offset_t nodeOffset, InMemList& inMemList); - void appendToLargeList(node_offset_t nodeOffset, InMemList& inMemList); + void appendToLargeList(offset_t nodeOffset, InMemList& inMemList); void doneUpdating(); @@ -60,7 +60,7 @@ class ListsUpdateIterator { void slideListsIfNecessary(uint64_t endNodeOffsetInclusive); - void seekToNodeOffsetAndSlideListsIfNecessary(node_offset_t nodeOffsetToSeekTo); + void seekToNodeOffsetAndSlideListsIfNecessary(offset_t nodeOffsetToSeekTo); void writeInMemListToListPages( InMemList& inMemList, page_idx_t pageListHeadIdx, bool isSmallList); diff --git a/src/include/storage/storage_structure/lists/lists_update_store.h b/src/include/storage/storage_structure/lists/lists_update_store.h index 8e308940830..2af0f77182c 100644 --- a/src/include/storage/storage_structure/lists/lists_update_store.h +++ b/src/include/storage/storage_structure/lists/lists_update_store.h @@ -4,7 +4,6 @@ #include "catalog/catalog_structs.h" #include "common/data_chunk/data_chunk.h" -#include "common/types/node_id_t.h" #include "common/types/types.h" #include "processor/result/factorized_table.h" #include "storage/storage_structure/lists/list_handle.h" @@ -34,8 +33,8 @@ struct ListsUpdatesForNodeOffset { explicit ListsUpdatesForNodeOffset(const RelTableSchema& relTableSchema); inline bool hasUpdates() const { - return isNewlyAddedNode || !insertedRelsTupleIdxInFT.empty() || !deletedRelIDs.empty() || - hasAnyUpdatedPersistentListOffsets(); + return isNewlyAddedNode || !insertedRelsTupleIdxInFT.empty() || + !deletedRelOffsets.empty() || hasAnyUpdatedPersistentListOffsets(); } bool hasAnyUpdatedPersistentListOffsets() const; @@ -44,7 +43,7 @@ struct ListsUpdatesForNodeOffset { bool isNewlyAddedNode; vector insertedRelsTupleIdxInFT; unordered_map updatedPersistentListOffsets; - unordered_set deletedRelIDs; + unordered_set deletedRelOffsets; }; struct ListsUpdateInfo { @@ -64,7 +63,7 @@ struct ListsUpdateInfo { list_offset_t bwdListOffset; }; -using ListsUpdatesPerNode = map>; +using ListsUpdatesPerNode = map>; using ListsUpdatesPerChunk = map; struct InMemList; @@ -84,12 +83,12 @@ class ListsUpdatesStore { return listsUpdatesPerTablePerDirection[relDirection]; } - bool isNewlyAddedNode(ListFileID& listFileID, node_offset_t nodeOffset) const; + bool isNewlyAddedNode(ListFileID& listFileID, offset_t nodeOffset) const; - uint64_t getNumDeletedRels(ListFileID& listFileID, node_offset_t nodeOffset) const; + uint64_t getNumDeletedRels(ListFileID& listFileID, offset_t nodeOffset) const; bool isRelDeletedInPersistentStore( - ListFileID& listFileID, node_offset_t nodeOffset, int64_t relID) const; + ListFileID& listFileID, offset_t nodeOffset, offset_t relOffset) const; bool hasUpdates() const; @@ -107,13 +106,12 @@ class ListsUpdatesStore { void deleteRelIfNecessary(const shared_ptr& srcNodeIDVector, const shared_ptr& dstNodeIDVector, const shared_ptr& relIDVector); - uint64_t getNumInsertedRelsForNodeOffset( - ListFileID& listFileID, node_offset_t nodeOffset) const; + uint64_t getNumInsertedRelsForNodeOffset(ListFileID& listFileID, offset_t nodeOffset) const; void readValues(ListFileID& listFileID, ListHandle& listSyncState, shared_ptr valueVector) const; - bool hasAnyDeletedRelsInPersistentStore(ListFileID& listFileID, node_offset_t nodeOffset) const; + bool hasAnyDeletedRelsInPersistentStore(ListFileID& listFileID, offset_t nodeOffset) const; // This function is called ifNecessary because it only handles the updates to a propertyList. // If the property is stored as a column in both direction(e.g. we are updating a ONE-ONE rel @@ -121,7 +119,7 @@ class ListsUpdatesStore { void updateRelIfNecessary(const shared_ptr& srcNodeIDVector, const shared_ptr& dstNodeIDVector, const ListsUpdateInfo& listsUpdateInfo); - void readUpdatesToPropertyVectorIfExists(ListFileID& listFileID, node_offset_t nodeOffset, + void readUpdatesToPropertyVectorIfExists(ListFileID& listFileID, offset_t nodeOffset, const shared_ptr& valueVector, list_offset_t startListOffset); void readPropertyUpdateToInMemList(ListFileID& listFileID, ft_tuple_idx_t ftTupleIdx, @@ -150,7 +148,7 @@ class ListsUpdatesStore { RelDirection relDirection, nodeID_t nodeID); ListsUpdatesForNodeOffset* getListsUpdatesForNodeOffsetIfExists( - ListFileID& listFileID, node_offset_t nodeOffset) const; + ListFileID& listFileID, offset_t nodeOffset) const; private: /* ListsUpdatesStore stores all inserted edges in a factorizedTable in the format: diff --git a/src/include/storage/storage_structure/storage_structure.h b/src/include/storage/storage_structure/storage_structure.h index 694dce4e469..c6daa4a4903 100644 --- a/src/include/storage/storage_structure/storage_structure.h +++ b/src/include/storage/storage_structure/storage_structure.h @@ -22,6 +22,12 @@ typedef uint64_t chunk_idx_t; class ListsUpdateIterator; +class BaseColumnOrList; + +using set_vec_null_func = std::function vector, uint8_t* frame, uint16_t pagePosOfFirstElement, + uint64_t vectorStartPos, uint64_t numValuesToRead)>; + class StorageStructure { friend class ListsUpdateIterator; @@ -83,6 +89,10 @@ class BaseColumnOrList : public StorageStructure { return pageFrame + numElementsPerPage * elementSize; } + static inline set_vec_null_func getSetVectorNullFunc(bool hasNoNullGuarantee) { + return hasNoNullGuarantee ? setVectorRangeNoNull : setVectorNullBits; + } + protected: inline uint64_t getElemByteOffset(uint64_t elemPosInPage) const { return elemPosInPage * elementSize; @@ -96,6 +106,21 @@ class BaseColumnOrList : public StorageStructure { PageElementCursor& cursor, const std::function& logicalToPhysicalPageMapper); + void readRelIDsBySequentialCopy(Transaction* transaction, const shared_ptr& vector, + PageElementCursor& cursor, + const std::function& logicalToPhysicalPageMapper, + table_id_t commonTableID, bool hasNoNullGuarantee); + + void readRelIDsFromAPageBySequentialCopy(Transaction* transaction, + const shared_ptr& vector, uint64_t vectorStartPos, page_idx_t physicalPageIdx, + uint16_t pagePosOfFirstElement, uint64_t numValuesToRead, table_id_t commonTableID, + const set_vec_null_func& setVecNullFunc); + + void readRelIDsBySequentialCopyWithSelState(Transaction* transaction, + const shared_ptr& vector, PageElementCursor& cursor, + const std::function& logicalToPhysicalPageMapper, + table_id_t commonTableID); + void readBySequentialCopyWithSelState(Transaction* transaction, const shared_ptr& vector, PageElementCursor& cursor, const std::function& logicalToPhysicalPageMapper); @@ -103,7 +128,7 @@ class BaseColumnOrList : public StorageStructure { void readNodeIDsBySequentialCopy(Transaction* transaction, const shared_ptr& valueVector, PageElementCursor& cursor, const std::function& logicalToPhysicalPageMapper, - NodeIDCompressionScheme nodeIDCompressionScheme, bool isAdjLists); + NodeIDCompressionScheme nodeIDCompressionScheme, bool hasNoNullGuarantee); void readNodeIDsBySequentialCopyWithSelState(Transaction* transaction, const shared_ptr& valueVector, PageElementCursor& cursor, @@ -113,7 +138,7 @@ class BaseColumnOrList : public StorageStructure { void readNodeIDsFromAPageBySequentialCopy(Transaction* transaction, const shared_ptr& vector, uint64_t vectorStartPos, page_idx_t physicalPageIdx, uint16_t pagePosOfFirstElement, uint64_t numValuesToRead, - NodeIDCompressionScheme& nodeIDCompressionScheme, bool isAdjLists); + NodeIDCompressionScheme& nodeIDCompressionScheme, const set_vec_null_func& setVecNullFunc); void readSingleNullBit(const shared_ptr& valueVector, const uint8_t* frame, uint64_t elementPos, uint64_t offsetInVector) const; @@ -121,6 +146,19 @@ class BaseColumnOrList : public StorageStructure { void setNullBitOfAPosInFrame(const uint8_t* frame, uint16_t elementPos, bool isNull) const; private: + static inline void setVectorRangeNoNull(BaseColumnOrList* baseColumnOrList, + shared_ptr vector, uint8_t* frame, uint16_t pagePosOfFirstElement, + uint64_t vectorStartPos, uint64_t numValuesToRead) { + vector->setRangeNonNull(vectorStartPos, numValuesToRead); + } + + static inline void setVectorNullBits(BaseColumnOrList* baseColumnOrList, + shared_ptr vector, uint8_t* frame, uint16_t pagePosOfFirstElement, + uint64_t vectorStartPos, uint64_t numValuesToRead) { + baseColumnOrList->readNullBitsFromAPage( + vector, frame, pagePosOfFirstElement, vectorStartPos, numValuesToRead); + } + void readAPageBySequentialCopy(Transaction* transaction, const shared_ptr& vector, uint64_t vectorStartPos, page_idx_t physicalPageIdx, uint16_t pagePosOfFirstElement, uint64_t numValuesToRead); diff --git a/src/include/storage/storage_utils.h b/src/include/storage/storage_utils.h index 00cac106ded..172c1a6f508 100644 --- a/src/include/storage/storage_utils.h +++ b/src/include/storage/storage_utils.h @@ -211,7 +211,7 @@ class StorageUtils { common::StorageConfig::RELS_METADATA_FILE_NAME_FOR_WAL); } - static inline uint64_t getNumChunks(node_offset_t numNodes) { + static inline uint64_t getNumChunks(offset_t numNodes) { auto numChunks = StorageUtils::getListChunkIdx(numNodes); if (0 != (numNodes & (ListsMetadataConfig::LISTS_CHUNK_SIZE - 1))) { numChunks++; @@ -219,15 +219,15 @@ class StorageUtils { return numChunks; } - static inline uint64_t getListChunkIdx(node_offset_t nodeOffset) { + static inline uint64_t getListChunkIdx(offset_t nodeOffset) { return nodeOffset >> ListsMetadataConfig::LISTS_CHUNK_SIZE_LOG_2; } - static inline node_offset_t getChunkIdxBeginNodeOffset(uint64_t chunkIdx) { + static inline offset_t getChunkIdxBeginNodeOffset(uint64_t chunkIdx) { return chunkIdx << ListsMetadataConfig::LISTS_CHUNK_SIZE_LOG_2; } - static inline node_offset_t getChunkIdxEndNodeOffsetInclusive(uint64_t chunkIdx) { + static inline offset_t getChunkIdxEndNodeOffsetInclusive(uint64_t chunkIdx) { return ((chunkIdx + 1) << ListsMetadataConfig::LISTS_CHUNK_SIZE_LOG_2) - 1; } diff --git a/src/include/storage/store/node_table.h b/src/include/storage/store/node_table.h index 27971562395..7a5e1e91544 100644 --- a/src/include/storage/store/node_table.h +++ b/src/include/storage/store/node_table.h @@ -17,7 +17,7 @@ class NodeTable { void initializeData(NodeTableSchema* nodeTableSchema); - inline node_offset_t getMaxNodeOffset(Transaction* trx) const { + inline offset_t getMaxNodeOffset(Transaction* trx) const { return nodesStatisticsAndDeletedIDs->getMaxNodeOffset(trx, tableID); } inline void setSelVectorForDeletedOffsets( @@ -48,13 +48,13 @@ class NodeTable { property.dataType, bufferManager, isInMemory, wal)); } - node_offset_t addNodeAndResetProperties(ValueVector* primaryKeyVector); + offset_t addNodeAndResetProperties(ValueVector* primaryKeyVector); void deleteNodes(ValueVector* nodeIDVector, ValueVector* primaryKeyVector); void prepareCommitOrRollbackIfNecessary(bool isCommit); private: - void deleteNode(node_offset_t nodeOffset, ValueVector* primaryKeyVector, uint32_t pos) const; + void deleteNode(offset_t nodeOffset, ValueVector* primaryKeyVector, uint32_t pos) const; private: NodesStatisticsAndDeletedIDs* nodesStatisticsAndDeletedIDs; diff --git a/src/include/storage/store/nodes_statistics_and_deleted_ids.h b/src/include/storage/store/nodes_statistics_and_deleted_ids.h index 2f960afd5e2..fd5afedbcd6 100644 --- a/src/include/storage/store/nodes_statistics_and_deleted_ids.h +++ b/src/include/storage/store/nodes_statistics_and_deleted_ids.h @@ -11,12 +11,12 @@ namespace storage { class NodeStatisticsAndDeletedIDs : public TableStatistics { public: - NodeStatisticsAndDeletedIDs(table_id_t tableID, node_offset_t maxNodeOffset) + NodeStatisticsAndDeletedIDs(table_id_t tableID, offset_t maxNodeOffset) : NodeStatisticsAndDeletedIDs(tableID, maxNodeOffset, - vector() /* no deleted node offsets during initial loading */) {} + vector() /* no deleted node offsets during initial loading */) {} - NodeStatisticsAndDeletedIDs(table_id_t tableID, node_offset_t maxNodeOffset, - const vector& deletedNodeOffsets); + NodeStatisticsAndDeletedIDs( + table_id_t tableID, offset_t maxNodeOffset, const vector& deletedNodeOffsets); NodeStatisticsAndDeletedIDs(const NodeStatisticsAndDeletedIDs& other) : TableStatistics{other.getNumTuples()}, tableID{other.tableID}, @@ -24,18 +24,16 @@ class NodeStatisticsAndDeletedIDs : public TableStatistics { hasDeletedNodesPerMorsel{other.hasDeletedNodesPerMorsel}, deletedNodeOffsetsPerMorsel{other.deletedNodeOffsetsPerMorsel} {} - inline node_offset_t getMaxNodeOffset() { - return getMaxNodeOffsetFromNumTuples(getNumTuples()); - } + inline offset_t getMaxNodeOffset() { return getMaxNodeOffsetFromNumTuples(getNumTuples()); } inline void setAdjListsAndColumns( pair, vector> adjListsAndColumns_) { adjListsAndColumns = adjListsAndColumns_; } - node_offset_t addNode(); + offset_t addNode(); - void deleteNode(node_offset_t nodeOffset); + void deleteNode(offset_t nodeOffset); // This function assumes that it is being called right after ScanNodeID has obtained a // morsel and that the nodeID structs in nodeOffsetVector.values have consecutive node @@ -44,9 +42,9 @@ class NodeStatisticsAndDeletedIDs : public TableStatistics { void setNumTuples(uint64_t numTuples) override; - vector getDeletedNodeOffsets(); + vector getDeletedNodeOffsets(); - static inline uint64_t geNumTuplesFromMaxNodeOffset(node_offset_t maxNodeOffset) { + static inline uint64_t geNumTuplesFromMaxNodeOffset(offset_t maxNodeOffset) { return (maxNodeOffset == UINT64_MAX) ? 0ull : maxNodeOffset + 1ull; } @@ -55,10 +53,10 @@ class NodeStatisticsAndDeletedIDs : public TableStatistics { } private: - void errorIfNodeHasEdges(node_offset_t nodeOffset); + void errorIfNodeHasEdges(offset_t nodeOffset); // We pass the morselIdx to not do the division nodeOffset/DEFAULT_VECTOR_CAPACITY again - bool isDeleted(node_offset_t nodeOffset, uint64_t morselIdx); + bool isDeleted(offset_t nodeOffset, uint64_t morselIdx); private: table_id_t tableID; @@ -66,7 +64,7 @@ class NodeStatisticsAndDeletedIDs : public TableStatistics { // construction. pair, vector> adjListsAndColumns; vector hasDeletedNodesPerMorsel; - map> deletedNodeOffsetsPerMorsel; + map> deletedNodeOffsetsPerMorsel; }; // Manages the disk image of the maxNodeOffsets and deleted node IDs (per node table). @@ -113,14 +111,14 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { ->setNumTuples(numTuples); } - inline node_offset_t getMaxNodeOffset(Transaction* transaction, table_id_t tableID) { + inline offset_t getMaxNodeOffset(Transaction* transaction, table_id_t tableID) { return getMaxNodeOffset(transaction == nullptr || transaction->isReadOnly() ? TransactionType::READ_ONLY : TransactionType::WRITE, tableID); } - inline node_offset_t getMaxNodeOffset(TransactionType transactionType, table_id_t tableID) { + inline offset_t getMaxNodeOffset(TransactionType transactionType, table_id_t tableID) { return (transactionType == TransactionType::READ_ONLY || tablesStatisticsContentForWriteTrx == nullptr) ? getNodeStatisticsAndDeletedIDs(tableID)->getMaxNodeOffset() : @@ -139,7 +137,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { // This function assumes that there is a single write transaction. That is why for now we // keep the interface simple and no transaction is passed. - node_offset_t addNode(table_id_t tableID) { + offset_t addNode(table_id_t tableID) { lock_t lck{mtx}; initTableStatisticPerTableForWriteTrxIfNecessary(); return ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx @@ -149,7 +147,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { } // Refer to the comments for addNode. - void deleteNode(table_id_t tableID, node_offset_t nodeOffset) { + void deleteNode(table_id_t tableID, offset_t nodeOffset) { lock_t lck{mtx}; initTableStatisticPerTableForWriteTrxIfNecessary(); ((NodeStatisticsAndDeletedIDs*)tablesStatisticsContentForWriteTrx @@ -160,7 +158,7 @@ class NodesStatisticsAndDeletedIDs : public TablesStatistics { // This function is only used by storageManager to construct relsStore during start-up, so // we can just safely return the maxNodeOffsetPerTable for readOnlyVersion. - map getMaxNodeOffsetPerTable() const; + map getMaxNodeOffsetPerTable() const; void setDeletedNodeOffsetsForMorsel(Transaction* transaction, const shared_ptr& nodeOffsetVector, table_id_t tableID); diff --git a/src/include/storage/store/rel_table.h b/src/include/storage/store/rel_table.h index 83868be8f5c..bfea8642ca9 100644 --- a/src/include/storage/store/rel_table.h +++ b/src/include/storage/store/rel_table.h @@ -224,21 +224,21 @@ class RelTable { inline void addToUpdatedRelTables() { wal->addToUpdatedRelTables(tableID); } inline void clearListsUpdatesStore() { listsUpdatesStore->clear(); } static void appendInMemListToLargeListOP( - ListsUpdateIterator* listsUpdateIterator, node_offset_t nodeOffset, InMemList& inMemList); + ListsUpdateIterator* listsUpdateIterator, offset_t nodeOffset, InMemList& inMemList); static void updateListOP( - ListsUpdateIterator* listsUpdateIterator, node_offset_t nodeOffset, InMemList& inMemList); + ListsUpdateIterator* listsUpdateIterator, offset_t nodeOffset, InMemList& inMemList); void performOpOnListsWithUpdates(const std::function& opOnListsWithUpdates, const std::function& opIfHasUpdates); unique_ptr getListsUpdateIteratorsForDirection( RelDirection relDirection, table_id_t boundNodeTableID) const; void prepareCommitForDirection(RelDirection relDirection); - void prepareCommitForListWithUpdateStoreDataOnly(AdjLists* adjLists, node_offset_t nodeOffset, + void prepareCommitForListWithUpdateStoreDataOnly(AdjLists* adjLists, offset_t nodeOffset, ListsUpdatesForNodeOffset* listsUpdatesForNodeOffset, RelDirection relDirection, ListsUpdateIteratorsForDirection* listsUpdateIteratorsForDirection, table_id_t boundNodeTableID, - const std::function& opOnListsUpdateIterators); - void prepareCommitForList(AdjLists* adjLists, node_offset_t nodeOffset, + void prepareCommitForList(AdjLists* adjLists, offset_t nodeOffset, ListsUpdatesForNodeOffset* listsUpdatesForNodeOffset, RelDirection relDirection, ListsUpdateIteratorsForDirection* listsUpdateIteratorsForDirection, table_id_t boundNodeTableID); diff --git a/src/include/storage/store/rels_statistics.h b/src/include/storage/store/rels_statistics.h index c1463ee4b21..4db19aace11 100644 --- a/src/include/storage/store/rels_statistics.h +++ b/src/include/storage/store/rels_statistics.h @@ -12,10 +12,12 @@ class RelStatistics : public TableStatistics { friend class RelsStatistics; public: - RelStatistics( - uint64_t numRels, vector> numRelsPerDirectionBoundTable) - : TableStatistics{numRels}, numRelsPerDirectionBoundTable{ - move(numRelsPerDirectionBoundTable)} {} + RelStatistics(uint64_t numRels, + vector> numRelsPerDirectionBoundTable, + offset_t nextRelOffset) + : TableStatistics{numRels}, numRelsPerDirectionBoundTable{std::move( + numRelsPerDirectionBoundTable)}, + nextRelOffset{nextRelOffset} {} RelStatistics(vector> srcDstTableIDs); inline uint64_t getNumRelsForDirectionBoundTable( @@ -31,8 +33,11 @@ class RelStatistics : public TableStatistics { numRelsPerDirectionBoundTable[relDirection][boundTableID] = numRels; } + inline offset_t getNextRelOffset() { return nextRelOffset; } + private: vector> numRelsPerDirectionBoundTable; + offset_t nextRelOffset; }; // Manages the disk image of the numRels and numRelsPerDirectionBoundTable. @@ -76,7 +81,7 @@ class RelsStatistics : public TablesStatistics { void setNumRelsPerDirectionBoundTableID( table_id_t tableID, vector>>& directionNumRelsPerTable); - uint64_t getNextRelID(Transaction* transaction); + offset_t getNextRelOffset(Transaction* transaction, table_id_t tableID); protected: inline string getTableTypeForPrinting() const override { return "RelsStatistics"; } @@ -95,6 +100,12 @@ class RelsStatistics : public TablesStatistics { return StorageUtils::getRelsStatisticsFilePath(directory, dbFileType); } + inline void increaseNextRelOffset(table_id_t relTableID, uint64_t numTuples) { + ((RelStatistics*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable.at(relTableID) + .get()) + ->nextRelOffset += numTuples; + } + unique_ptr deserializeTableStatistics( uint64_t numTuples, uint64_t& offset, FileInfo* fileInfo, uint64_t tableID) override; diff --git a/src/include/storage/store/table_statistics.h b/src/include/storage/store/table_statistics.h index 8ab650e3d62..b70e12c7f69 100644 --- a/src/include/storage/store/table_statistics.h +++ b/src/include/storage/store/table_statistics.h @@ -41,11 +41,8 @@ class TableStatistics { }; struct TablesStatisticsContent { - TablesStatisticsContent() : nextRelID{0} {} + TablesStatisticsContent() {} unordered_map> tableStatisticPerTable; - // This is only needed for RelsStatistics and is a temporary solution until we move to a - // uniform node and edge ID scheme (and then open an issue about this.) - uint64_t nextRelID; }; class TablesStatistics { diff --git a/src/include/storage/wal/wal_record.h b/src/include/storage/wal/wal_record.h index e1f0bce62fa..95252cd1559 100644 --- a/src/include/storage/wal/wal_record.h +++ b/src/include/storage/wal/wal_record.h @@ -54,16 +54,16 @@ struct AdjListsID { } }; -struct RelPropertyListID { +struct RelPropertyListsID { RelNodeTableAndDir relNodeTableAndDir; property_id_t propertyID; - RelPropertyListID() = default; + RelPropertyListsID() = default; - RelPropertyListID(RelNodeTableAndDir relNodeTableAndDir, property_id_t propertyID) + RelPropertyListsID(RelNodeTableAndDir relNodeTableAndDir, property_id_t propertyID) : relNodeTableAndDir{relNodeTableAndDir}, propertyID{propertyID} {} - inline bool operator==(const RelPropertyListID& rhs) const { + inline bool operator==(const RelPropertyListsID& rhs) const { return relNodeTableAndDir == rhs.relNodeTableAndDir && propertyID == rhs.propertyID; } }; @@ -73,7 +73,7 @@ struct ListFileID { ListFileType listFileType; union { AdjListsID adjListsID; - RelPropertyListID relPropertyListID; + RelPropertyListsID relPropertyListID; }; ListFileID() = default; @@ -81,7 +81,7 @@ struct ListFileID { ListFileID(ListFileType listFileType, AdjListsID adjListsID) : listType{ListType::ADJ_LISTS}, listFileType{listFileType}, adjListsID{adjListsID} {} - ListFileID(ListFileType listFileType, RelPropertyListID relPropertyListID) + ListFileID(ListFileType listFileType, RelPropertyListsID relPropertyListID) : listType{ListType::REL_PROPERTY_LISTS}, listFileType{listFileType}, relPropertyListID{relPropertyListID} {} diff --git a/src/include/storage/wal_replayer_utils.h b/src/include/storage/wal_replayer_utils.h index a8ac367fafc..5330c1da393 100644 --- a/src/include/storage/wal_replayer_utils.h +++ b/src/include/storage/wal_replayer_utils.h @@ -56,7 +56,7 @@ class WALReplayerUtils { const string& directory, RelTableSchema* relTableSchema, property_id_t propertyID); static void createEmptyDBFilesForNewRelTable(RelTableSchema* relTableSchema, - const string& directory, const map& maxNodeOffsetsPerTable); + const string& directory, const map& maxNodeOffsetsPerTable); static void createEmptyDBFilesForNewNodeTable( NodeTableSchema* nodeTableSchema, const string& directory); diff --git a/src/main/query_result.cpp b/src/main/query_result.cpp index 34762365f9a..a0a74c7c4a9 100644 --- a/src/main/query_result.cpp +++ b/src/main/query_result.cpp @@ -27,8 +27,8 @@ void QueryResult::initResultTableAndIterator( unique_ptr value; if (columnType.typeID == common::NODE) { // first expression is node ID. - assert(expressionsToCollect[0]->dataType.typeID == common::NODE_ID); - auto nodeIDVal = make_unique(Value::createDefaultValue(DataType(NODE_ID))); + assert(expressionsToCollect[0]->dataType.typeID == common::INTERNAL_ID); + auto nodeIDVal = make_unique(Value::createDefaultValue(DataType(INTERNAL_ID))); valuesToCollect.push_back(nodeIDVal.get()); // second expression is node label function. assert(expressionsToCollect[1]->dataType.typeID == common::STRING); @@ -46,12 +46,14 @@ void QueryResult::initResultTableAndIterator( value = make_unique(std::move(nodeVal)); } else if (columnType.typeID == common::REL) { // first expression is src node ID. - assert(expressionsToCollect[0]->dataType.typeID == common::NODE_ID); - auto srcNodeIDVal = make_unique(Value::createDefaultValue(DataType(NODE_ID))); + assert(expressionsToCollect[0]->dataType.typeID == common::INTERNAL_ID); + auto srcNodeIDVal = + make_unique(Value::createDefaultValue(DataType(INTERNAL_ID))); valuesToCollect.push_back(srcNodeIDVal.get()); // second expression is dst node ID. - assert(expressionsToCollect[1]->dataType.typeID == common::NODE_ID); - auto dstNodeIDVal = make_unique(Value::createDefaultValue(DataType(NODE_ID))); + assert(expressionsToCollect[1]->dataType.typeID == common::INTERNAL_ID); + auto dstNodeIDVal = + make_unique(Value::createDefaultValue(DataType(INTERNAL_ID))); valuesToCollect.push_back(dstNodeIDVal.get()); auto relVal = make_unique(std::move(srcNodeIDVal), std::move(dstNodeIDVal)); for (auto j = 2u; j < expressionsToCollect.size(); ++j) { diff --git a/src/planner/join_order_enumerator.cpp b/src/planner/join_order_enumerator.cpp index 0b16795a140..136e12e9cf7 100644 --- a/src/planner/join_order_enumerator.cpp +++ b/src/planner/join_order_enumerator.cpp @@ -112,7 +112,7 @@ void JoinOrderEnumerator::planLevel(uint32_t level) { void JoinOrderEnumerator::planOuterExpressionsScan(expression_vector& expressions) { auto newSubgraph = context->getEmptySubqueryGraph(); for (auto& expression : expressions) { - if (expression->getDataType().typeID == NODE_ID) { + if (expression->getDataType().typeID == INTERNAL_ID) { auto node = static_pointer_cast(expression->getChild(0)); auto nodePos = context->getQueryGraph()->getQueryNodePos(*node); newSubgraph.addQueryNode(nodePos); diff --git a/src/planner/query_planner.cpp b/src/planner/query_planner.cpp index 43f82115d69..fae973570e1 100644 --- a/src/planner/query_planner.cpp +++ b/src/planner/query_planner.cpp @@ -166,7 +166,7 @@ static expression_vector getCorrelatedExpressions( static expression_vector getJoinNodeIDs(expression_vector& expressions) { expression_vector joinNodeIDs; for (auto& expression : expressions) { - if (expression->dataType.typeID == NODE_ID) { + if (expression->dataType.typeID == INTERNAL_ID) { joinNodeIDs.push_back(expression); } } @@ -180,7 +180,7 @@ void QueryPlanner::planOptionalMatch(const QueryGraphCollection& queryGraphColle if (correlatedExpressions.empty()) { throw NotImplementedException("Optional match is disconnected with previous MATCH clause."); } - if (ExpressionUtil::allExpressionsHaveDataType(correlatedExpressions, NODE_ID)) { + if (ExpressionUtil::allExpressionsHaveDataType(correlatedExpressions, INTERNAL_ID)) { auto joinNodeIDs = getJoinNodeIDs(correlatedExpressions); // When correlated variables are all NODE IDs, the subquery can be un-nested as left join. // Join nodes are scanned twice in both outer and inner. However, we make sure inner table @@ -239,7 +239,7 @@ void QueryPlanner::planExistsSubquery(shared_ptr& expression, Logica if (correlatedExpressions.empty()) { throw NotImplementedException("Subquery is disconnected with outer query."); } - if (ExpressionUtil::allExpressionsHaveDataType(correlatedExpressions, NODE_ID)) { + if (ExpressionUtil::allExpressionsHaveDataType(correlatedExpressions, INTERNAL_ID)) { auto joinNodeIDs = getJoinNodeIDs(correlatedExpressions); // Unnest as mark join. See planOptionalMatch for unnesting logic. auto prevContext = joinOrderEnumerator.enterSubquery( diff --git a/src/processor/mapper/map_hash_join.cpp b/src/processor/mapper/map_hash_join.cpp index bb0ca95992b..94f47b17d54 100644 --- a/src/processor/mapper/map_hash_join.cpp +++ b/src/processor/mapper/map_hash_join.cpp @@ -97,7 +97,7 @@ BuildDataInfo PlanMapper::generateBuildDataInfo(const Schema& buildSideSchema, for (auto& key : keys) { auto buildSideKeyPos = DataPos(buildSideSchema.getExpressionPos(*key)); isBuildDataChunkContainKeys[buildSideKeyPos.dataChunkPos] = true; - buildKeysPosAndType.emplace_back(buildSideKeyPos, NODE_ID); + buildKeysPosAndType.emplace_back(buildSideKeyPos, INTERNAL_ID); joinKeyNames.insert(key->getUniqueName()); } for (auto& payload : payloads) { diff --git a/src/processor/operator/aggregate/aggregate_hash_table.cpp b/src/processor/operator/aggregate/aggregate_hash_table.cpp index ed32b21e796..c10861776ce 100644 --- a/src/processor/operator/aggregate/aggregate_hash_table.cpp +++ b/src/processor/operator/aggregate/aggregate_hash_table.cpp @@ -664,7 +664,7 @@ bool AggregateHashTable::compareEntryWithKeys(const uint8_t* keyValue, const uin compare_function_t AggregateHashTable::getCompareEntryWithKeysFunc(DataTypeID typeId) { switch (typeId) { - case NODE_ID: { + case INTERNAL_ID: { return compareEntryWithKeys; } case BOOL: { diff --git a/src/processor/operator/index_scan.cpp b/src/processor/operator/index_scan.cpp index 52f47e10f96..af2f02671fe 100644 --- a/src/processor/operator/index_scan.cpp +++ b/src/processor/operator/index_scan.cpp @@ -18,7 +18,7 @@ bool IndexScan::getNextTuplesInternal() { indexKeyEvaluator->evaluate(); auto indexKeyVector = indexKeyEvaluator->resultVector.get(); assert(indexKeyVector->state->isFlat()); - node_offset_t nodeOffset; + offset_t nodeOffset; bool isSuccessfulLookup = pkIndex->lookup(transaction, indexKeyVector, indexKeyVector->state->selVector->selectedPositions[0], nodeOffset); if (isSuccessfulLookup) { diff --git a/src/processor/operator/scan_node_id.cpp b/src/processor/operator/scan_node_id.cpp index d3d45db1fea..7e779a46b1e 100644 --- a/src/processor/operator/scan_node_id.cpp +++ b/src/processor/operator/scan_node_id.cpp @@ -8,7 +8,7 @@ void ScanNodeIDSemiMask::setMask(uint64_t nodeOffset, uint8_t maskerIdx) { morselMask->setMask(nodeOffset >> DEFAULT_VECTOR_CAPACITY_LOG_2, maskerIdx, maskerIdx + 1); } -pair ScanTableNodeIDSharedState::getNextRangeToRead() { +pair ScanTableNodeIDSharedState::getNextRangeToRead() { // Note: we use maxNodeOffset=UINT64_MAX to represent an empty table. if (currentNodeOffset > maxNodeOffset || maxNodeOffset == UINT64_MAX) { return make_pair(currentNodeOffset, currentNodeOffset); @@ -27,8 +27,7 @@ pair ScanTableNodeIDSharedState::getNextRangeToRea return make_pair(startOffset, startOffset + range); } -tuple -ScanNodeIDSharedState::getNextRangeToRead() { +tuple ScanNodeIDSharedState::getNextRangeToRead() { unique_lock lck{mtx}; if (currentStateIdx == tableStates.size()) { return make_tuple(nullptr, INVALID_NODE_OFFSET, INVALID_NODE_OFFSET); @@ -76,7 +75,7 @@ void ScanNodeID::initGlobalStateInternal(ExecutionContext* context) { } void ScanNodeID::setSelVector( - ScanTableNodeIDSharedState* tableState, node_offset_t startOffset, node_offset_t endOffset) { + ScanTableNodeIDSharedState* tableState, offset_t startOffset, offset_t endOffset) { if (tableState->isSemiMaskEnabled()) { outValueVector->state->selVector->resetSelectorToValuePosBuffer(); // Fill selected positions based on node mask for nodes between the given startOffset and diff --git a/src/processor/operator/semi_masker.cpp b/src/processor/operator/semi_masker.cpp index c4867707be9..17c980c666a 100644 --- a/src/processor/operator/semi_masker.cpp +++ b/src/processor/operator/semi_masker.cpp @@ -5,7 +5,7 @@ namespace processor { void SemiMasker::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) { keyValueVector = resultSet->getValueVector(keyDataPos); - assert(keyValueVector->dataType.typeID == NODE_ID); + assert(keyValueVector->dataType.typeID == INTERNAL_ID); } bool SemiMasker::getNextTuplesInternal() { diff --git a/src/processor/operator/update/create.cpp b/src/processor/operator/update/create.cpp index eaaf82e2502..cc19cb2f8ba 100644 --- a/src/processor/operator/update/create.cpp +++ b/src/processor/operator/update/create.cpp @@ -56,9 +56,10 @@ bool CreateRel::getNextTuplesInternal() { // Rel ID is our interval property, so we overwrite relID=$expr with system ID. if (j == createRelInfo->relIDEvaluatorIdx) { auto relIDVector = evaluator->resultVector; - assert(relIDVector->dataType.typeID == INT64 && + assert(relIDVector->dataType.typeID == INTERNAL_ID && relIDVector->state->selVector->selectedPositions[0] == 0); - relIDVector->setValue(0, relsStatistics.getNextRelID(transaction)); + relIDVector->setValue(0, relsStatistics.getNextRelOffset( + transaction, createRelInfo->table->getRelTableID())); relIDVector->setNull(0, false); } else { createRelInfo->evaluators[j]->evaluate(); diff --git a/src/processor/result/factorized_table.cpp b/src/processor/result/factorized_table.cpp index 9b7b536483b..d9bfd1b5dc1 100644 --- a/src/processor/result/factorized_table.cpp +++ b/src/processor/result/factorized_table.cpp @@ -284,9 +284,10 @@ void FactorizedTable::copyToInMemList(ft_col_idx_t colIdx, vectorgetColumn(colIdx); assert(column->isFlat() == true); - auto numBytesPerValue = nodeIDCompressionScheme == nullptr ? - Types::getDataTypeSize(type) : - nodeIDCompressionScheme->getNumBytesForNodeIDAfterCompression(); + auto numBytesPerValue = + nodeIDCompressionScheme == nullptr ? + (type.typeID == INTERNAL_ID ? sizeof(offset_t) : Types::getDataTypeSize(type)) : + nodeIDCompressionScheme->getNumBytesForNodeIDAfterCompression(); auto colOffset = tableSchema->getColOffset(colIdx); auto listToFill = data + startElemPosInList * numBytesPerValue; for (auto i = 0u; i < tupleIdxesToRead.size(); i++) { diff --git a/src/storage/copy_arrow/copy_node_arrow.cpp b/src/storage/copy_arrow/copy_node_arrow.cpp index 4ce57224c69..58e39a90c37 100644 --- a/src/storage/copy_arrow/copy_node_arrow.cpp +++ b/src/storage/copy_arrow/copy_node_arrow.cpp @@ -101,7 +101,7 @@ arrow::Status CopyNodeArrow::populateColumns() { template arrow::Status CopyNodeArrow::populateColumnsFromCSV(unique_ptr>& pkIndex) { - node_offset_t offsetStart = 0; + offset_t offsetStart = 0; shared_ptr csv_streaming_reader; auto status = initCSVReader(csv_streaming_reader, copyDescription.filePath); @@ -134,7 +134,7 @@ arrow::Status CopyNodeArrow::populateColumnsFromCSV(unique_ptr arrow::Status CopyNodeArrow::populateColumnsFromArrow(unique_ptr>& pkIndex) { - node_offset_t offsetStart = 0; + offset_t offsetStart = 0; std::shared_ptr ipc_reader; auto status = initArrowReader(ipc_reader, copyDescription.filePath); @@ -165,7 +165,7 @@ arrow::Status CopyNodeArrow::populateColumnsFromArrow(unique_ptr arrow::Status CopyNodeArrow::populateColumnsFromParquet(unique_ptr>& pkIndex) { - node_offset_t offsetStart = 0; + offset_t offsetStart = 0; std::unique_ptr reader; auto status = initParquetReader(reader, copyDescription.filePath); @@ -195,8 +195,8 @@ arrow::Status CopyNodeArrow::populateColumnsFromParquet(unique_ptr -void CopyNodeArrow::populatePKIndex(InMemColumn* column, HashIndexBuilder* pkIndex, - node_offset_t startOffset, uint64_t numValues) { +void CopyNodeArrow::populatePKIndex( + InMemColumn* column, HashIndexBuilder* pkIndex, offset_t startOffset, uint64_t numValues) { for (auto i = 0u; i < numValues; i++) { auto offset = i + startOffset; if constexpr (is_same::value) { diff --git a/src/storage/copy_arrow/copy_rel_arrow.cpp b/src/storage/copy_arrow/copy_rel_arrow.cpp index 70267ad39c3..bd249e7b728 100644 --- a/src/storage/copy_arrow/copy_rel_arrow.cpp +++ b/src/storage/copy_arrow/copy_rel_arrow.cpp @@ -8,13 +8,12 @@ namespace storage { CopyRelArrow::CopyRelArrow(CopyDescription& copyDescription, string outputDirectory, TaskScheduler& taskScheduler, Catalog& catalog, - map maxNodeOffsetsPerNodeTable, BufferManager* bufferManager, + map maxNodeOffsetsPerNodeTable, BufferManager* bufferManager, table_id_t tableID, RelsStatistics* relsStatistics) : CopyStructuresArrow{copyDescription, std::move(outputDirectory), taskScheduler, catalog}, maxNodeOffsetsPerTable{std::move(maxNodeOffsetsPerNodeTable)}, relsStatistics{ relsStatistics} { dummyReadOnlyTrx = Transaction::getDummyReadOnlyTrx(); - startRelID = relsStatistics->getNextRelID(dummyReadOnlyTrx.get()); relTableSchema = catalog.getReadOnlyVersion()->getRelTableSchema(tableID); for (auto& nodeTableID : relTableSchema->getAllNodeTableIDs()) { assert(!pkIndexes.contains(nodeTableID)); @@ -180,7 +179,7 @@ arrow::Status CopyRelArrow::populateFromCSV(PopulateTaskType populateTaskType) { } ARROW_ASSIGN_OR_RAISE(currBatch, *it); taskScheduler.scheduleTask(CopyTaskFactory::createCopyTask(populateTask, blockIdx, - startRelID + blockStartOffset, this, currBatch->columns(), copyDescription)); + blockStartOffset, this, currBatch->columns(), copyDescription)); blockStartOffset += numLinesPerBlock[blockIdx]; ++it; ++blockIdx; @@ -212,7 +211,7 @@ arrow::Status CopyRelArrow::populateFromArrow(PopulateTaskType populateTaskType) } ARROW_ASSIGN_OR_RAISE(currBatch, ipc_reader->ReadRecordBatch(blockIdx)); taskScheduler.scheduleTask(CopyTaskFactory::createCopyTask(populateTask, blockIdx, - startRelID + blockStartOffset, this, currBatch->columns(), copyDescription)); + blockStartOffset, this, currBatch->columns(), copyDescription)); blockStartOffset += numLinesPerBlock[blockIdx]; ++blockIdx; } @@ -244,7 +243,7 @@ arrow::Status CopyRelArrow::populateFromParquet(PopulateTaskType populateTaskTyp } ARROW_RETURN_NOT_OK(reader->RowGroup(blockIdx)->ReadTable(&currTable)); taskScheduler.scheduleTask(CopyTaskFactory::createCopyTask(populateTask, blockIdx, - startRelID + blockStartOffset, this, currTable->columns(), copyDescription)); + blockStartOffset, this, currTable->columns(), copyDescription)); blockStartOffset += numLinesPerBlock[blockIdx]; ++blockIdx; } @@ -672,7 +671,7 @@ void CopyRelArrow::copyListOverflowFromUnorderedToOrderedPages(ku_list_t* kuList } void CopyRelArrow::sortOverflowValuesOfPropertyColumnTask(const DataType& dataType, - node_offset_t offsetStart, node_offset_t offsetEnd, InMemColumn* propertyColumn, + offset_t offsetStart, offset_t offsetEnd, InMemColumn* propertyColumn, InMemOverflowFile* unorderedInMemOverflowFile, InMemOverflowFile* orderedInMemOverflowFile) { PageByteCursor unorderedOverflowCursor, orderedOverflowCursor; for (; offsetStart < offsetEnd; offsetStart++) { @@ -691,9 +690,8 @@ void CopyRelArrow::sortOverflowValuesOfPropertyColumnTask(const DataType& dataTy } void CopyRelArrow::sortOverflowValuesOfPropertyListsTask(const DataType& dataType, - node_offset_t offsetStart, node_offset_t offsetEnd, InMemAdjLists* adjLists, - InMemLists* propertyLists, InMemOverflowFile* unorderedInMemOverflowFile, - InMemOverflowFile* orderedInMemOverflowFile) { + offset_t offsetStart, offset_t offsetEnd, InMemAdjLists* adjLists, InMemLists* propertyLists, + InMemOverflowFile* unorderedInMemOverflowFile, InMemOverflowFile* orderedInMemOverflowFile) { PageByteCursor unorderedOverflowCursor, orderedOverflowCursor; PageElementCursor propertyListCursor; for (; offsetStart < offsetEnd; offsetStart++) { @@ -734,7 +732,7 @@ void CopyRelArrow::sortAndCopyOverflowValues() { numBuckets += (numNodes % 256 != 0); for (auto& property : relTableSchema->properties) { if (property.dataType.typeID == STRING || property.dataType.typeID == LIST) { - node_offset_t offsetStart = 0, offsetEnd = 0; + offset_t offsetStart = 0, offsetEnd = 0; for (auto bucketIdx = 0u; bucketIdx < numBuckets; bucketIdx++) { offsetStart = offsetEnd; offsetEnd = min(offsetStart + 256, numNodes); @@ -760,7 +758,7 @@ void CopyRelArrow::sortAndCopyOverflowValues() { numBuckets += (numNodes % 256 != 0); for (auto& property : relTableSchema->properties) { if (property.dataType.typeID == STRING || property.dataType.typeID == LIST) { - node_offset_t offsetStart = 0, offsetEnd = 0; + offset_t offsetStart = 0, offsetEnd = 0; for (auto bucketIdx = 0u; bucketIdx < numBuckets; bucketIdx++) { offsetStart = offsetEnd; offsetEnd = min(offsetStart + 256, numNodes); diff --git a/src/storage/copy_arrow/copy_structures_arrow.cpp b/src/storage/copy_arrow/copy_structures_arrow.cpp index ccc97cbcf10..6b360d3db36 100644 --- a/src/storage/copy_arrow/copy_structures_arrow.cpp +++ b/src/storage/copy_arrow/copy_structures_arrow.cpp @@ -13,13 +13,13 @@ CopyStructuresArrow::CopyStructuresArrow(CopyDescription& copyDescription, strin taskScheduler{taskScheduler}, catalog{catalog}, numRows{0} {} // Lists headers are created for only AdjLists, which store data in the page without NULL bits. -void CopyStructuresArrow::calculateListHeadersTask(node_offset_t numNodes, uint32_t elementSize, +void CopyStructuresArrow::calculateListHeadersTask(offset_t numNodes, uint32_t elementSize, atomic_uint64_vec_t* listSizes, ListHeadersBuilder* listHeadersBuilder, const shared_ptr& logger) { logger->trace("Start: ListHeadersBuilder={0:p}", (void*)listHeadersBuilder); auto numElementsPerPage = PageUtils::getNumElementsInAPage(elementSize, false /* hasNull */); auto numChunks = StorageUtils::getNumChunks(numNodes); - node_offset_t nodeOffset = 0u; + offset_t nodeOffset = 0u; uint64_t lAdjListsIdx = 0u; for (auto chunkId = 0u; chunkId < numChunks; chunkId++) { auto csrOffset = 0u; @@ -47,7 +47,7 @@ void CopyStructuresArrow::calculateListsMetadataAndAllocateInMemListPagesTask(ui logger->trace("Start: listsMetadataBuilder={0:p} adjListHeadersBuilder={1:p}", (void*)inMemList->getListsMetadataBuilder(), (void*)listHeadersBuilder); auto numChunks = StorageUtils::getNumChunks(numNodes); - node_offset_t nodeOffset = 0u; + offset_t nodeOffset = 0u; auto largeListIdx = 0u; for (auto chunkId = 0u; chunkId < numChunks; chunkId++) { auto lastNodeOffsetInChunk = diff --git a/src/storage/in_mem_storage_structure/in_mem_column.cpp b/src/storage/in_mem_storage_structure/in_mem_column.cpp index 2e505dcd6cf..4e4229baf1a 100644 --- a/src/storage/in_mem_storage_structure/in_mem_column.cpp +++ b/src/storage/in_mem_storage_structure/in_mem_column.cpp @@ -26,7 +26,7 @@ void InMemColumn::saveToFile() { inMemFile->flush(); } -void InMemColumn::setElement(node_offset_t offset, const uint8_t* val) { +void InMemColumn::setElement(offset_t offset, const uint8_t* val) { auto cursor = getPageElementCursorForOffset(offset); inMemFile->getPage(cursor.pageIdx) ->write(cursor.elemPosInPage * numBytesForElement, cursor.elemPosInPage, val, @@ -34,7 +34,7 @@ void InMemColumn::setElement(node_offset_t offset, const uint8_t* val) { } void InMemColumn::fillInMemColumnWithStrValFunc(InMemColumn* inMemColumn, uint8_t* defaultVal, - PageByteCursor& pageByteCursor, node_offset_t nodeOffset, const DataType& dataType) { + PageByteCursor& pageByteCursor, offset_t nodeOffset, const DataType& dataType) { auto strVal = *reinterpret_cast(defaultVal); if (strVal.len > ku_string_t::SHORT_STR_LENGTH) { inMemColumn->getInMemOverflowFile()->copyStringOverflow( @@ -44,7 +44,7 @@ void InMemColumn::fillInMemColumnWithStrValFunc(InMemColumn* inMemColumn, uint8_ } void InMemColumn::fillInMemColumnWithListValFunc(InMemColumn* inMemColumn, uint8_t* defaultVal, - PageByteCursor& pageByteCursor, node_offset_t nodeOffset, const DataType& dataType) { + PageByteCursor& pageByteCursor, offset_t nodeOffset, const DataType& dataType) { auto listVal = *reinterpret_cast(defaultVal); inMemColumn->getInMemOverflowFile()->copyListOverflowToFile( pageByteCursor, &listVal, dataType.childType.get()); @@ -87,7 +87,7 @@ void InMemColumnWithOverflow::saveToFile() { InMemColumn::saveToFile(); } -void InMemAdjColumn::setElement(node_offset_t offset, const uint8_t* val) { +void InMemAdjColumn::setElement(offset_t offset, const uint8_t* val) { auto node = (nodeID_t*)val; auto cursor = getPageElementCursorForOffset(offset); inMemFile->getPage(cursor.pageIdx) @@ -110,6 +110,8 @@ unique_ptr InMemColumnFactory::getInMemPropertyColumn( return make_unique(fName, numElements); case LIST: return make_unique(fName, dataType, numElements); + case INTERNAL_ID: + return make_unique(fName, numElements); default: throw CopyException("Invalid type for property column creation."); } diff --git a/src/storage/in_mem_storage_structure/in_mem_lists.cpp b/src/storage/in_mem_storage_structure/in_mem_lists.cpp index 6adb7b2d6b5..30cfb2ece76 100644 --- a/src/storage/in_mem_storage_structure/in_mem_lists.cpp +++ b/src/storage/in_mem_storage_structure/in_mem_lists.cpp @@ -6,7 +6,7 @@ namespace kuzu { namespace storage { PageElementCursor InMemListsUtils::calcPageElementCursor(uint32_t header, uint64_t reversePos, - uint8_t numBytesPerElement, node_offset_t nodeOffset, ListsMetadataBuilder& metadataBuilder, + uint8_t numBytesPerElement, offset_t nodeOffset, ListsMetadataBuilder& metadataBuilder, bool hasNULLBytes) { PageElementCursor cursor; auto numElementsInAPage = PageUtils::getNumElementsInAPage(numBytesPerElement, hasNULLBytes); @@ -36,8 +36,8 @@ InMemLists::InMemLists( numChunks++; } listsMetadataBuilder->initChunkPageLists(numChunks); - inMemFile = - make_unique(this->fName, numBytesForElement, this->dataType.typeID != NODE_ID); + inMemFile = make_unique( + this->fName, numBytesForElement, this->dataType.typeID != INTERNAL_ID); } void InMemLists::fillWithDefaultVal( @@ -59,7 +59,7 @@ void InMemLists::saveToFile() { inMemFile->flush(); } -void InMemLists::setElement(uint32_t header, node_offset_t nodeOffset, uint64_t pos, uint8_t* val) { +void InMemLists::setElement(uint32_t header, offset_t nodeOffset, uint64_t pos, uint8_t* val) { auto cursor = InMemListsUtils::calcPageElementCursor(header, pos, numBytesForElement, nodeOffset, *listsMetadataBuilder, true /* hasNULLBytes */); inMemFile->getPage(cursor.pageIdx) @@ -67,8 +67,7 @@ void InMemLists::setElement(uint32_t header, node_offset_t nodeOffset, uint64_t numBytesForElement); } -void InMemAdjLists::setElement( - uint32_t header, node_offset_t nodeOffset, uint64_t pos, uint8_t* val) { +void InMemAdjLists::setElement(uint32_t header, offset_t nodeOffset, uint64_t pos, uint8_t* val) { auto cursor = InMemListsUtils::calcPageElementCursor(header, pos, numBytesForElement, nodeOffset, *listsMetadataBuilder, false /* hasNULLBytes */); auto node = (nodeID_t*)val; @@ -80,7 +79,7 @@ void InMemAdjLists::setElement( void InMemLists::initListsMetadataAndAllocatePages( uint64_t numNodes, ListHeaders* listHeaders, ListsMetadata* listsMetadata) { initLargeListPageLists(numNodes, listHeaders); - node_offset_t nodeOffset = 0u; + offset_t nodeOffset = 0u; auto largeListIdx = 0u; auto numElementsPerPage = PageUtils::getNumElementsInAPage(numBytesForElement, true /* hasNull */); @@ -113,7 +112,7 @@ void InMemLists::initListsMetadataAndAllocatePages( void InMemLists::initLargeListPageLists(uint64_t numNodes, ListHeaders* listHeaders) { auto largeListIdx = 0u; - for (node_offset_t nodeOffset = 0; nodeOffset < numNodes; nodeOffset++) { + for (offset_t nodeOffset = 0; nodeOffset < numNodes; nodeOffset++) { if (ListHeaders::isALargeList(listHeaders->getHeader(nodeOffset))) { largeListIdx++; } @@ -142,8 +141,8 @@ void InMemLists::calculatePagesForSmallList(uint64_t& numPages, uint64_t& offset } void InMemLists::fillInMemListsWithStrValFunc(InMemLists* inMemLists, uint8_t* defaultVal, - PageByteCursor& pageByteCursor, node_offset_t nodeOffset, list_header_t header, - uint64_t posInList, const DataType& dataType) { + PageByteCursor& pageByteCursor, offset_t nodeOffset, list_header_t header, uint64_t posInList, + const DataType& dataType) { auto strVal = *(ku_string_t*)defaultVal; inMemLists->getInMemOverflowFile()->copyStringOverflow( pageByteCursor, reinterpret_cast(strVal.overflowPtr), &strVal); @@ -151,8 +150,8 @@ void InMemLists::fillInMemListsWithStrValFunc(InMemLists* inMemLists, uint8_t* d } void InMemLists::fillInMemListsWithListValFunc(InMemLists* inMemLists, uint8_t* defaultVal, - PageByteCursor& pageByteCursor, node_offset_t nodeOffset, list_header_t header, - uint64_t posInList, const DataType& dataType) { + PageByteCursor& pageByteCursor, offset_t nodeOffset, list_header_t header, uint64_t posInList, + const DataType& dataType) { auto listVal = *reinterpret_cast(defaultVal); inMemLists->getInMemOverflowFile()->copyListOverflowToFile( pageByteCursor, &listVal, dataType.childType.get()); @@ -187,7 +186,8 @@ void InMemAdjLists::saveToFile() { } InMemListsWithOverflow::InMemListsWithOverflow(string fName, DataType dataType, uint64_t numNodes) - : InMemLists{move(fName), move(dataType), Types::getDataTypeSize(dataType), numNodes} { + : InMemLists{ + std::move(fName), std::move(dataType), Types::getDataTypeSize(dataType), numNodes} { assert(this->dataType.typeID == STRING || this->dataType.typeID == LIST); overflowInMemFile = make_unique(StorageUtils::getOverflowFileName(this->fName)); @@ -212,6 +212,8 @@ unique_ptr InMemListsFactory::getInMemPropertyLists( return make_unique(fName, numNodes); case LIST: return make_unique(fName, dataType, numNodes); + case INTERNAL_ID: + return make_unique(fName, numNodes); default: throw CopyException("Invalid type for property list creation."); } diff --git a/src/storage/index/hash_index.cpp b/src/storage/index/hash_index.cpp index 83437aefe0c..3a4e16600b1 100644 --- a/src/storage/index/hash_index.cpp +++ b/src/storage/index/hash_index.cpp @@ -10,7 +10,7 @@ namespace storage { template HashIndexLocalLookupState TemplatedHashIndexLocalStorage::lookup( - const T& key, node_offset_t& result) { + const T& key, offset_t& result) { if (localDeletions.contains(key)) { return HashIndexLocalLookupState::KEY_DELETED; } else if (localInsertions.contains(key)) { @@ -31,7 +31,7 @@ void TemplatedHashIndexLocalStorage::deleteKey(const T& key) { } template -bool TemplatedHashIndexLocalStorage::insert(const T& key, node_offset_t value) { +bool TemplatedHashIndexLocalStorage::insert(const T& key, offset_t value) { if (localDeletions.contains(key)) { localDeletions.erase(key); } @@ -45,7 +45,7 @@ bool TemplatedHashIndexLocalStorage::insert(const T& key, node_offset_t value template class TemplatedHashIndexLocalStorage; template class TemplatedHashIndexLocalStorage; -HashIndexLocalLookupState HashIndexLocalStorage::lookup(const uint8_t* key, node_offset_t& result) { +HashIndexLocalLookupState HashIndexLocalStorage::lookup(const uint8_t* key, offset_t& result) { shared_lock sLck{localStorageSharedMutex}; if (keyDataType.typeID == INT64) { auto keyVal = *(int64_t*)key; @@ -69,7 +69,7 @@ void HashIndexLocalStorage::deleteKey(const uint8_t* key) { } } -bool HashIndexLocalStorage::insert(const uint8_t* key, node_offset_t value) { +bool HashIndexLocalStorage::insert(const uint8_t* key, offset_t value) { unique_lock xLck{localStorageSharedMutex}; if (keyDataType.typeID == INT64) { auto keyVal = *(int64_t*)key; @@ -82,7 +82,7 @@ bool HashIndexLocalStorage::insert(const uint8_t* key, node_offset_t value) { } void HashIndexLocalStorage::applyLocalChanges(const std::function& deleteOp, - const std::function& insertOp) { + const std::function& insertOp) { if (keyDataType.typeID == INT64) { for (auto& key : templatedLocalStorageForInt.localDeletions) { deleteOp((uint8_t*)&key); @@ -154,8 +154,7 @@ HashIndex::HashIndex(const StorageStructureIDAndFName& storageStructureIDAndF // - the key is neither deleted nor found in the local storage, lookup in the persistent // storage. template -bool HashIndex::lookupInternal( - Transaction* transaction, const uint8_t* key, node_offset_t& result) { +bool HashIndex::lookupInternal(Transaction* transaction, const uint8_t* key, offset_t& result) { if (transaction->isReadOnly()) { return lookupInPersistentIndex(transaction->getType(), key, result); } else { @@ -186,8 +185,8 @@ void HashIndex::deleteInternal(const uint8_t* key) const { // index, if // so, return false, else insert the key to the local storage. template -bool HashIndex::insertInternal(const uint8_t* key, node_offset_t value) { - node_offset_t tmpResult; +bool HashIndex::insertInternal(const uint8_t* key, offset_t value) { + offset_t tmpResult; auto localLookupState = localStorage->lookup(key, tmpResult); if (localLookupState == HashIndexLocalLookupState::KEY_FOUND) { return false; @@ -202,7 +201,7 @@ bool HashIndex::insertInternal(const uint8_t* key, node_offset_t value) { template template bool HashIndex::performActionInChainedSlots(TransactionType trxType, HashIndexHeader& header, - SlotInfo& slotInfo, const uint8_t* key, node_offset_t& result) { + SlotInfo& slotInfo, const uint8_t* key, offset_t& result) { while (slotInfo.slotType == SlotType::PRIMARY || slotInfo.slotId != 0) { auto slot = getSlot(trxType, slotInfo); if constexpr (action == ChainedSlotsAction::FIND_FREE_SLOT) { @@ -215,8 +214,8 @@ bool HashIndex::performActionInChainedSlots(TransactionType trxType, HashInde auto entryPos = findMatchedEntryInSlot(trxType, slot, key); if (entryPos != SlotHeader::INVALID_ENTRY_POS) { if constexpr (action == ChainedSlotsAction::LOOKUP_IN_SLOTS) { - result = *( - node_offset_t*)(slot.entries[entryPos].data + indexHeader->numBytesPerKey); + result = + *(offset_t*)(slot.entries[entryPos].data + indexHeader->numBytesPerKey); } else if constexpr (action == ChainedSlotsAction::DELETE_IN_SLOTS) { slot.header.setEntryInvalid(entryPos); slot.header.numEntries--; @@ -234,7 +233,7 @@ bool HashIndex::performActionInChainedSlots(TransactionType trxType, HashInde template bool HashIndex::lookupInPersistentIndex( - TransactionType trxType, const uint8_t* key, node_offset_t& result) { + TransactionType trxType, const uint8_t* key, offset_t& result) { auto header = trxType == TransactionType::READ_ONLY ? *indexHeader : headerArray->get(INDEX_HEADER_IDX_IN_ARRAY, TransactionType::WRITE); @@ -244,7 +243,7 @@ bool HashIndex::lookupInPersistentIndex( } template -void HashIndex::insertIntoPersistentIndex(const uint8_t* key, node_offset_t value) { +void HashIndex::insertIntoPersistentIndex(const uint8_t* key, offset_t value) { auto header = headerArray->get(INDEX_HEADER_IDX_IN_ARRAY, TransactionType::WRITE); slot_id_t numRequiredEntries = getNumRequiredEntries(header.numEntries, 1); while (numRequiredEntries > @@ -253,7 +252,7 @@ void HashIndex::insertIntoPersistentIndex(const uint8_t* key, node_offset_t v } auto pSlotId = getPrimarySlotIdForKey(header, key); SlotInfo slotInfo{pSlotId, SlotType::PRIMARY}; - node_offset_t result; + offset_t result; performActionInChainedSlots( TransactionType::WRITE, header, slotInfo, key, result); Slot slot = getSlot(TransactionType::WRITE, slotInfo); @@ -266,7 +265,7 @@ template void HashIndex::deleteFromPersistentIndex(const uint8_t* key) { auto header = headerArray->get(INDEX_HEADER_IDX_IN_ARRAY, TransactionType::WRITE); SlotInfo slotInfo{getPrimarySlotIdForKey(header, key), SlotType::PRIMARY}; - node_offset_t result; + offset_t result; performActionInChainedSlots( TransactionType::WRITE, header, slotInfo, key, result); headerArray->update(INDEX_HEADER_IDX_IN_ARRAY, header); @@ -341,8 +340,8 @@ vector>> HashIndex::getChainedSlots(slot_id_t pSlotId) } template -void HashIndex::copyAndUpdateSlotHeader(bool isCopyEntry, Slot& slot, entry_pos_t entryPos, - const uint8_t* key, node_offset_t value) { +void HashIndex::copyAndUpdateSlotHeader( + bool isCopyEntry, Slot& slot, entry_pos_t entryPos, const uint8_t* key, offset_t value) { if (isCopyEntry) { memcpy(slot.entries[entryPos].data, key, indexHeader->numBytesPerEntry); } else { @@ -353,8 +352,8 @@ void HashIndex::copyAndUpdateSlotHeader(bool isCopyEntry, Slot& slot, entr } template -void HashIndex::copyKVOrEntryToSlot(bool isCopyEntry, const SlotInfo& slotInfo, Slot& slot, - const uint8_t* key, node_offset_t value) { +void HashIndex::copyKVOrEntryToSlot( + bool isCopyEntry, const SlotInfo& slotInfo, Slot& slot, const uint8_t* key, offset_t value) { if (slot.header.numEntries == HashIndexConfig::SLOT_CAPACITY) { // Allocate a new oSlot, insert the entry to the new oSlot, and update slot's // nextOvfSlotId. @@ -391,7 +390,7 @@ template void HashIndex::prepareCommit() { localStorage->applyLocalChanges( [this](const uint8_t* key) -> void { this->deleteFromPersistentIndex(key); }, - [this](const uint8_t* key, node_offset_t value) -> void { + [this](const uint8_t* key, offset_t value) -> void { this->insertIntoPersistentIndex(key, value); }); } @@ -436,7 +435,7 @@ template class HashIndex; template class HashIndex; bool PrimaryKeyIndex::lookup( - Transaction* trx, ValueVector* keyVector, uint64_t vectorPos, node_offset_t& result) { + Transaction* trx, ValueVector* keyVector, uint64_t vectorPos, offset_t& result) { assert(!keyVector->isNull(vectorPos)); if (keyDataTypeID == INT64) { auto key = keyVector->getValue(vectorPos); @@ -460,7 +459,7 @@ void PrimaryKeyIndex::deleteKey(ValueVector* keyVector, uint64_t vectorPos) { } } -bool PrimaryKeyIndex::insert(ValueVector* keyVector, uint64_t vectorPos, node_offset_t value) { +bool PrimaryKeyIndex::insert(ValueVector* keyVector, uint64_t vectorPos, offset_t value) { assert(!keyVector->isNull(vectorPos)); if (keyDataTypeID == INT64) { auto key = keyVector->getValue(vectorPos); diff --git a/src/storage/index/hash_index_builder.cpp b/src/storage/index/hash_index_builder.cpp index 208d8a0b854..a91c4b15f51 100644 --- a/src/storage/index/hash_index_builder.cpp +++ b/src/storage/index/hash_index_builder.cpp @@ -55,7 +55,7 @@ void HashIndexBuilder::bulkReserve(uint32_t numEntries_) { } template -bool HashIndexBuilder::appendInternal(const uint8_t* key, node_offset_t value) { +bool HashIndexBuilder::appendInternal(const uint8_t* key, offset_t value) { SlotInfo pSlotInfo{getPrimarySlotIdForKey(*indexHeader, key), SlotType::PRIMARY}; auto currentSlotInfo = pSlotInfo; Slot* currentSlot = nullptr; @@ -81,7 +81,7 @@ bool HashIndexBuilder::appendInternal(const uint8_t* key, node_offset_t value } template -bool HashIndexBuilder::lookupInternalWithoutLock(const uint8_t* key, node_offset_t& result) { +bool HashIndexBuilder::lookupInternalWithoutLock(const uint8_t* key, offset_t& result) { SlotInfo pSlotInfo{getPrimarySlotIdForKey(*indexHeader, key), SlotType::PRIMARY}; SlotInfo currentSlotInfo = pSlotInfo; Slot* currentSlot; @@ -132,7 +132,7 @@ Slot* HashIndexBuilder::getSlot(const SlotInfo& slotInfo) { template template bool HashIndexBuilder::lookupOrExistsInSlotWithoutLock( - Slot* slot, const uint8_t* key, node_offset_t* result) { + Slot* slot, const uint8_t* key, offset_t* result) { for (auto entryPos = 0u; entryPos < HashIndexConfig::SLOT_CAPACITY; entryPos++) { if (!slot->header.isEntryValid(entryPos)) { continue; @@ -140,7 +140,7 @@ bool HashIndexBuilder::lookupOrExistsInSlotWithoutLock( auto& entry = slot->entries[entryPos]; if (keyEqualsFunc(key, entry.data, inMemOverflowFile.get())) { if constexpr (IS_LOOKUP) { - memcpy(result, entry.data + indexHeader->numBytesPerKey, sizeof(node_offset_t)); + memcpy(result, entry.data + indexHeader->numBytesPerKey, sizeof(offset_t)); } return true; } @@ -150,7 +150,7 @@ bool HashIndexBuilder::lookupOrExistsInSlotWithoutLock( template void HashIndexBuilder::insertToSlotWithoutLock( - Slot* slot, const uint8_t* key, node_offset_t value) { + Slot* slot, const uint8_t* key, offset_t value) { if (slot->header.numEntries == HashIndexConfig::SLOT_CAPACITY) { // Allocate a new oSlot and change the nextOvfSlotId. auto ovfSlotId = allocateAOSlot(); diff --git a/src/storage/node_id_compression_scheme.cpp b/src/storage/node_id_compression_scheme.cpp index ba2e5b2bbb7..61060f3578a 100644 --- a/src/storage/node_id_compression_scheme.cpp +++ b/src/storage/node_id_compression_scheme.cpp @@ -8,7 +8,7 @@ void NodeIDCompressionScheme::readNodeID(uint8_t* data, nodeID_t* nodeID) const memcpy(&*nodeID, data, sizeof(nodeID_t)); } else { nodeID->tableID = commonTableID; - memcpy(&nodeID->offset, data, sizeof(node_offset_t)); + memcpy(&nodeID->offset, data, sizeof(offset_t)); } } @@ -16,7 +16,7 @@ void NodeIDCompressionScheme::writeNodeID(uint8_t* data, const nodeID_t& nodeID) if (commonTableID == INVALID_TABLE_ID) { memcpy(data, &nodeID, sizeof(nodeID_t)); } else { - memcpy(data, &nodeID.offset, sizeof(node_offset_t)); + memcpy(data, &nodeID.offset, sizeof(offset_t)); } } diff --git a/src/storage/storage_structure/column.cpp b/src/storage/storage_structure/column.cpp index da9ed08f0f3..2c0f0baeffc 100644 --- a/src/storage/storage_structure/column.cpp +++ b/src/storage/storage_structure/column.cpp @@ -56,7 +56,7 @@ void Column::writeValues( } } -Value Column::readValue(node_offset_t offset) { +Value Column::readValue(offset_t offset) { auto cursor = PageUtils::getPageElementCursorForPos(offset, numElementsPerPage); auto frame = bufferManager.pin(fileHandle, cursor.pageIdx); auto retVal = Value(dataType, frame + mapElementPosToByteOffset(cursor.elemPosInPage)); @@ -64,7 +64,7 @@ Value Column::readValue(node_offset_t offset) { return retVal; } -bool Column::isNull(node_offset_t nodeOffset, Transaction* transaction) { +bool Column::isNull(offset_t nodeOffset, Transaction* transaction) { auto cursor = PageUtils::getPageElementCursorForPos(nodeOffset, numElementsPerPage); auto originalPageIdx = cursor.pageIdx; fileHandle.acquirePageLock(originalPageIdx, true /* block */); @@ -91,7 +91,7 @@ bool Column::isNull(node_offset_t nodeOffset, Transaction* transaction) { return isNull; } -void Column::setNodeOffsetToNull(node_offset_t nodeOffset) { +void Column::setNodeOffsetToNull(offset_t nodeOffset) { auto updatedPageInfoAndWALPageFrame = beginUpdatingPageAndWriteOnlyNullBit(nodeOffset, true /* isNull */); StorageStructureUtils::unpinWALPageAndReleaseOriginalPageLock( @@ -122,7 +122,7 @@ void Column::lookup(Transaction* transaction, const shared_ptr& res bufferManager.unpin(*fileHandleToPin, pageIdxToPin); } -WALPageIdxPosInPageAndFrame Column::beginUpdatingPage(node_offset_t nodeOffset, +WALPageIdxPosInPageAndFrame Column::beginUpdatingPage(offset_t nodeOffset, const shared_ptr& vectorToWriteFrom, uint32_t posInVectorToWriteFrom) { auto isNull = vectorToWriteFrom->isNull(posInVectorToWriteFrom); auto walPageInfo = beginUpdatingPageAndWriteOnlyNullBit(nodeOffset, isNull); @@ -133,13 +133,13 @@ WALPageIdxPosInPageAndFrame Column::beginUpdatingPage(node_offset_t nodeOffset, } WALPageIdxPosInPageAndFrame Column::beginUpdatingPageAndWriteOnlyNullBit( - node_offset_t nodeOffset, bool isNull) { + offset_t nodeOffset, bool isNull) { auto walPageInfo = createWALVersionOfPageIfNecessaryForElement(nodeOffset, numElementsPerPage); setNullBitOfAPosInFrame(walPageInfo.frame, walPageInfo.posInPage, isNull); return walPageInfo; } -void Column::writeValueForSingleNodeIDPosition(node_offset_t nodeOffset, +void Column::writeValueForSingleNodeIDPosition(offset_t nodeOffset, const shared_ptr& vectorToWriteFrom, uint32_t posInVectorToWriteFrom) { auto updatedPageInfoAndWALPageFrame = beginUpdatingPage(nodeOffset, vectorToWriteFrom, posInVectorToWriteFrom); @@ -147,7 +147,7 @@ void Column::writeValueForSingleNodeIDPosition(node_offset_t nodeOffset, updatedPageInfoAndWALPageFrame, fileHandle, bufferManager, *wal); } -void StringPropertyColumn::writeValueForSingleNodeIDPosition(node_offset_t nodeOffset, +void StringPropertyColumn::writeValueForSingleNodeIDPosition(offset_t nodeOffset, const shared_ptr& vectorToWriteFrom, uint32_t posInVectorToWriteFrom) { auto updatedPageInfoAndWALPageFrame = beginUpdatingPage(nodeOffset, vectorToWriteFrom, posInVectorToWriteFrom); @@ -167,7 +167,7 @@ void StringPropertyColumn::writeValueForSingleNodeIDPosition(node_offset_t nodeO updatedPageInfoAndWALPageFrame, fileHandle, bufferManager, *wal); } -Value StringPropertyColumn::readValue(node_offset_t offset) { +Value StringPropertyColumn::readValue(offset_t offset) { auto cursor = PageUtils::getPageElementCursorForPos(offset, numElementsPerPage); ku_string_t kuString; auto frame = bufferManager.pin(fileHandle, cursor.pageIdx); @@ -176,7 +176,7 @@ Value StringPropertyColumn::readValue(node_offset_t offset) { return Value(diskOverflowFile.readString(TransactionType::READ_ONLY, kuString)); } -void ListPropertyColumn::writeValueForSingleNodeIDPosition(node_offset_t nodeOffset, +void ListPropertyColumn::writeValueForSingleNodeIDPosition(offset_t nodeOffset, const shared_ptr& vectorToWriteFrom, uint32_t posInVectorToWriteFrom) { assert(vectorToWriteFrom->dataType.typeID == LIST); auto updatedPageInfoAndWALPageFrame = @@ -193,7 +193,7 @@ void ListPropertyColumn::writeValueForSingleNodeIDPosition(node_offset_t nodeOff updatedPageInfoAndWALPageFrame, fileHandle, bufferManager, *wal); } -Value ListPropertyColumn::readValue(node_offset_t offset) { +Value ListPropertyColumn::readValue(offset_t offset) { auto cursor = PageUtils::getPageElementCursorForPos(offset, numElementsPerPage); ku_list_t kuList; auto frame = bufferManager.pin(fileHandle, cursor.pageIdx); diff --git a/src/storage/storage_structure/lists/lists.cpp b/src/storage/storage_structure/lists/lists.cpp index f858830b3d8..db2fa421f17 100644 --- a/src/storage/storage_structure/lists/lists.cpp +++ b/src/storage/storage_structure/lists/lists.cpp @@ -34,18 +34,18 @@ void Lists::readValues( } void Lists::readFromSmallList(const shared_ptr& valueVector, ListHandle& listHandle) { - auto dummyReadOnlyTrx = Transaction::getDummyReadOnlyTrx(); auto pageCursor = PageUtils::getPageElementCursorForPos( ListHeaders::getSmallListCSROffset(listHandle.getListHeader()), numElementsPerPage); - readBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, pageCursor, listHandle.mapper); + readBySequentialCopy( + Transaction::getDummyReadOnlyTrx().get(), valueVector, pageCursor, listHandle.mapper); } void Lists::readFromLargeList(const shared_ptr& valueVector, ListHandle& listHandle) { // Assumes that the associated adjList has already updated the syncState. auto pageCursor = PageUtils::getPageElementCursorForPos(listHandle.getStartElemOffset(), numElementsPerPage); - auto dummyReadOnlyTrx = Transaction::getDummyReadOnlyTrx(); - readBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, pageCursor, listHandle.mapper); + readBySequentialCopy( + Transaction::getDummyReadOnlyTrx().get(), valueVector, pageCursor, listHandle.mapper); } void Lists::readFromList(const shared_ptr& valueVector, ListHandle& listHandle) { @@ -57,7 +57,7 @@ void Lists::readFromList(const shared_ptr& valueVector, ListHandle& } uint64_t Lists::getNumElementsInPersistentStore( - TransactionType transactionType, node_offset_t nodeOffset) { + TransactionType transactionType, offset_t nodeOffset) { if (transactionType == TransactionType::WRITE && listsUpdatesStore->isNewlyAddedNode( storageStructureIDAndFName.storageStructureID.listFileID, nodeOffset)) { @@ -67,7 +67,7 @@ uint64_t Lists::getNumElementsInPersistentStore( } void Lists::initListReadingState( - node_offset_t nodeOffset, ListHandle& listHandle, TransactionType transactionType) { + offset_t nodeOffset, ListHandle& listHandle, TransactionType transactionType) { listHandle.resetSyncState(); auto isNewlyAddedNode = listsUpdatesStore->isNewlyAddedNode( storageStructureIDAndFName.storageStructureID.listFileID, nodeOffset); @@ -94,7 +94,7 @@ void Lists::initListReadingState( } unique_ptr Lists::createInMemListWithDataFromUpdateStoreOnly( - node_offset_t nodeOffset, vector& insertedRelsTupleIdxInFT) { + offset_t nodeOffset, vector& insertedRelsTupleIdxInFT) { auto inMemList = make_unique( getNumElementsInListsUpdatesStore(nodeOffset), elementSize, mayContainNulls()); listsUpdatesStore->readInsertedRelsToList( @@ -104,7 +104,7 @@ unique_ptr Lists::createInMemListWithDataFromUpdateStoreOnly( return inMemList; } -unique_ptr Lists::writeToInMemList(node_offset_t nodeOffset, +unique_ptr Lists::writeToInMemList(offset_t nodeOffset, const vector& insertedRelTupleIdxesInFT, const unordered_set& deletedRelOffsetsForList, UpdatedPersistentListOffsets* updatedPersistentListOffsets) { @@ -121,7 +121,7 @@ unique_ptr Lists::writeToInMemList(node_offset_t nodeOffset, return inMemList; } -void Lists::fillInMemListsFromPersistentStore(node_offset_t nodeOffset, +void Lists::fillInMemListsFromPersistentStore(offset_t nodeOffset, uint64_t numElementsInPersistentStore, InMemList& inMemList, const unordered_set& deletedRelOffsetsInList, UpdatedPersistentListOffsets* updatedPersistentListOffsets) { @@ -255,7 +255,7 @@ unique_ptr> AdjLists::readAdjacencyListOfNode( // We read the adjacency list of a node in 2 steps: i) we read all the bytes from the pages // that hold the list into a buffer; and (ii) we interpret the bytes in the buffer based on the // nodeIDCompressionScheme into a vector of nodeID_t. - node_offset_t nodeOffset) { + offset_t nodeOffset) { auto header = headers->getHeader(nodeOffset); auto pageMapper = ListHandle::getPageMapper(metadata, header, nodeOffset); auto pageCursor = ListHandle::getPageCursor(header, numElementsPerPage); @@ -308,12 +308,12 @@ void AdjLists::readFromLargeList( (uint32_t)DEFAULT_VECTOR_CAPACITY); valueVector->state->initOriginalAndSelectedSize(numValuesToCopy); listHandle.setRangeToRead(nextPartBeginElemOffset, numValuesToCopy); - // map logical pageIdx to physical pageIdx + // Map logical pageIdx to physical pageIdx. auto physicalPageId = listHandle.mapper(pageCursor.pageIdx); // See comments for AdjLists::readFromSmallList. auto dummyReadOnlyTrx = Transaction::getDummyReadOnlyTrx(); readNodeIDsBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, pageCursor, listHandle.mapper, - nodeIDCompressionScheme, true /*isAdjLists*/); + nodeIDCompressionScheme, true /* hasNoNullGuarantee */); } // Note: This function sets the original and selected size of the DataChunk into which it will @@ -331,7 +331,7 @@ void AdjLists::readFromSmallList( auto pageCursor = PageUtils::getPageElementCursorForPos( ListHeaders::getSmallListCSROffset(listHandle.getListHeader()), numElementsPerPage); readNodeIDsBySequentialCopy(dummyReadOnlyTrx.get(), valueVector, pageCursor, listHandle.mapper, - nodeIDCompressionScheme, true /*isAdjLists*/); + nodeIDCompressionScheme, true /* hasNoNullGuarantee */); // We set the startIdx + numValuesToRead == numValuesInList in listSyncState to indicate to the // callers (e.g., the adj_list_extend or var_len_extend) that we have read the small list // already. This allows the callers to know when to switch to reading from the update store if @@ -383,9 +383,10 @@ void RelIDList::setDeletedRelsIfNecessary( auto& selVector = relIDVector->state->selVector; auto nextSelectedPos = 0u; for (auto pos = 0; pos < relIDVector->state->originalSize; ++pos) { + auto relID = relIDVector->getValue(pos); if (!listsUpdatesStore->isRelDeletedInPersistentStore( storageStructureIDAndFName.storageStructureID.listFileID, - listHandle.getBoundNodeOffset(), relIDVector->getValue(pos))) { + listHandle.getBoundNodeOffset(), relID.offset)) { selVector->selectedPositions[nextSelectedPos++] = pos; } } @@ -393,8 +394,7 @@ void RelIDList::setDeletedRelsIfNecessary( } } -unordered_set RelIDList::getDeletedRelOffsetsInListForNodeOffset( - node_offset_t nodeOffset) { +unordered_set RelIDList::getDeletedRelOffsetsInListForNodeOffset(offset_t nodeOffset) { unordered_set deletedRelOffsetsInList; auto listHeader = headers->getHeader(nodeOffset); auto pageMapper = ListHandle::getPageMapper(metadata, listHeader, nodeOffset); @@ -422,7 +422,7 @@ unordered_set RelIDList::getDeletedRelOffsetsInListForNodeOffset( return deletedRelOffsetsInList; } -list_offset_t RelIDList::getListOffset(node_offset_t nodeOffset, int64_t relID) { +list_offset_t RelIDList::getListOffset(offset_t nodeOffset, offset_t relIDOffset) { auto listHeader = headers->getHeader(nodeOffset); auto pageMapper = ListHandle::getPageMapper(metadata, listHeader, nodeOffset); auto pageCursor = ListHandle::getPageCursor(listHeader, numElementsPerPage); @@ -436,7 +436,7 @@ list_offset_t RelIDList::getListOffset(node_offset_t nodeOffset, int64_t relID) getElemByteOffset(pageCursor.elemPosInPage); for (auto i = 0u; i < numElementsToReadInCurPage; i++) { auto relIDInList = *(int64_t*)frame; - if (relIDInList == relID) { + if (relIDInList == relIDOffset) { bufferManager.unpin(fileHandle, physicalPageIdx); return numElementsRead; } @@ -451,5 +451,37 @@ list_offset_t RelIDList::getListOffset(node_offset_t nodeOffset, int64_t relID) return UINT64_MAX; } +// void RelIDList::readValues( +// Transaction* transaction, const shared_ptr& valueVector, ListHandle& listHandle) +// { if (listHandle.getListSourceStore() == ListSourceStore::UPDATE_STORE) { +// listsUpdatesStore->readValues( +// storageStructureIDAndFName.storageStructureID.listFileID, listHandle, valueVector); +// } else { +// // If the startElementOffset is 0, it means that this is the first time that we read +// // from the list. As a result, we need to reset the cursor and mapper. +// if (listHandle.getStartElemOffset() == 0) { +// listHandle.setMapper(metadata); +// } +// readFromList(valueVector, listHandle); +// } +//} + +void RelIDList::readFromSmallList( + const shared_ptr& valueVector, ListHandle& listHandle) { + auto pageCursor = PageUtils::getPageElementCursorForPos( + ListHeaders::getSmallListCSROffset(listHandle.getListHeader()), numElementsPerPage); + readRelIDsBySequentialCopy(Transaction::getDummyReadOnlyTrx().get(), valueVector, pageCursor, + listHandle.mapper, getRelTableID(), true /* hasNoNullGuarantee */); +} + +void RelIDList::readFromLargeList( + const shared_ptr& valueVector, ListHandle& listHandle) { + // Assumes that the associated adjList has already updated the syncState. + auto pageCursor = + PageUtils::getPageElementCursorForPos(listHandle.getStartElemOffset(), numElementsPerPage); + readRelIDsBySequentialCopy(Transaction::getDummyReadOnlyTrx().get(), valueVector, pageCursor, + listHandle.mapper, getRelTableID(), true /* hasNoNullGuarantee */); +} + } // namespace storage } // namespace kuzu diff --git a/src/storage/storage_structure/lists/lists_update_iterator.cpp b/src/storage/storage_structure/lists/lists_update_iterator.cpp index 425c7656878..ea30e3dae8a 100644 --- a/src/storage/storage_structure/lists/lists_update_iterator.cpp +++ b/src/storage/storage_structure/lists/lists_update_iterator.cpp @@ -5,7 +5,7 @@ namespace kuzu { namespace storage { -void ListsUpdateIterator::updateList(node_offset_t nodeOffset, InMemList& inMemList) { +void ListsUpdateIterator::updateList(offset_t nodeOffset, InMemList& inMemList) { seekToNodeOffsetAndSlideListsIfNecessary(nodeOffset); list_header_t oldHeader; if (nodeOffset >= @@ -33,7 +33,7 @@ void ListsUpdateIterator::updateList(node_offset_t nodeOffset, InMemList& inMemL // If the initial list is a largeList, we can simply append the data in inMemList to the // largeList. -void ListsUpdateIterator::appendToLargeList(node_offset_t nodeOffset, InMemList& inMemList) { +void ListsUpdateIterator::appendToLargeList(offset_t nodeOffset, InMemList& inMemList) { seekToNodeOffsetAndSlideListsIfNecessary(nodeOffset); auto largeListIdx = ListHeaders::getLargeListIdx( lists->headers->headersDiskArray->get(nodeOffset, TransactionType::READ_ONLY)); @@ -75,7 +75,7 @@ void ListsUpdateIterator::seekToBeginningOfChunkIdx(uint64_t chunkIdx) { } void ListsUpdateIterator::slideListsIfNecessary(uint64_t endNodeOffsetInclusive) { - for (node_offset_t nodeOffsetToSlide = curUnprocessedNodeOffset; + for (offset_t nodeOffsetToSlide = curUnprocessedNodeOffset; nodeOffsetToSlide <= endNodeOffsetInclusive; ++nodeOffsetToSlide) { list_header_t oldHeader = lists->getHeaders()->headersDiskArray->get( nodeOffsetToSlide, TransactionType::READ_ONLY); @@ -101,8 +101,7 @@ void ListsUpdateIterator::slideListsIfNecessary(uint64_t endNodeOffsetInclusive) } } -void ListsUpdateIterator::seekToNodeOffsetAndSlideListsIfNecessary( - node_offset_t nodeOffsetToSeekTo) { +void ListsUpdateIterator::seekToNodeOffsetAndSlideListsIfNecessary(offset_t nodeOffsetToSeekTo) { auto chunkIdxOfNode = StorageUtils::getListChunkIdx(nodeOffsetToSeekTo); if (curChunkIdx == UINT64_MAX) { seekToBeginningOfChunkIdx(chunkIdxOfNode); diff --git a/src/storage/storage_structure/lists/lists_update_store.cpp b/src/storage/storage_structure/lists/lists_update_store.cpp index b670fa313a5..2a3aa89908f 100644 --- a/src/storage/storage_structure/lists/lists_update_store.cpp +++ b/src/storage/storage_structure/lists/lists_update_store.cpp @@ -28,7 +28,7 @@ ListsUpdatesStore::ListsUpdatesStore(MemoryManager& memoryManager, RelTableSchem initListsUpdatesPerTablePerDirection(); } -bool ListsUpdatesStore::isNewlyAddedNode(ListFileID& listFileID, node_offset_t nodeOffset) const { +bool ListsUpdatesStore::isNewlyAddedNode(ListFileID& listFileID, offset_t nodeOffset) const { auto listsUpdatesForNodeOffset = getListsUpdatesForNodeOffsetIfExists(listFileID, nodeOffset); if (listsUpdatesForNodeOffset == nullptr) { return false; @@ -36,22 +36,21 @@ bool ListsUpdatesStore::isNewlyAddedNode(ListFileID& listFileID, node_offset_t n return listsUpdatesForNodeOffset->isNewlyAddedNode; } -uint64_t ListsUpdatesStore::getNumDeletedRels( - ListFileID& listFileID, node_offset_t nodeOffset) const { +uint64_t ListsUpdatesStore::getNumDeletedRels(ListFileID& listFileID, offset_t nodeOffset) const { auto listsUpdatesForNodeOffset = getListsUpdatesForNodeOffsetIfExists(listFileID, nodeOffset); if (listsUpdatesForNodeOffset == nullptr) { return 0; } - return listsUpdatesForNodeOffset->deletedRelIDs.size(); + return listsUpdatesForNodeOffset->deletedRelOffsets.size(); } bool ListsUpdatesStore::isRelDeletedInPersistentStore( - ListFileID& listFileID, node_offset_t nodeOffset, int64_t relID) const { + ListFileID& listFileID, offset_t nodeOffset, offset_t relOffset) const { auto listsUpdatesForNodeOffset = getListsUpdatesForNodeOffsetIfExists(listFileID, nodeOffset); if (listsUpdatesForNodeOffset == nullptr) { return false; } - return listsUpdatesForNodeOffset->deletedRelIDs.contains(relID); + return listsUpdatesForNodeOffset->deletedRelOffsets.contains(relOffset); } bool ListsUpdatesStore::hasUpdates() const { @@ -111,8 +110,8 @@ void ListsUpdatesStore::deleteRelIfNecessary(const shared_ptr& srcN auto dstNodeID = dstNodeIDVector->getValue( dstNodeIDVector->state->selVector->selectedPositions[0]); auto relID = - relIDVector->getValue(relIDVector->state->selVector->selectedPositions[0]); - auto tupleIdx = getTupleIdxIfInsertedRel(relID); + relIDVector->getValue(relIDVector->state->selVector->selectedPositions[0]); + auto tupleIdx = getTupleIdxIfInsertedRel(relID.offset); if (tupleIdx != -1) { // If the rel that we are going to delete is a newly inserted rel, we need to delete // its tupleIdx from the insertedRelsTupleIdxInFT of listsUpdatesStore in FWD and BWD @@ -137,14 +136,14 @@ void ListsUpdatesStore::deleteRelIfNecessary(const shared_ptr& srcN auto boundNodeID = direction == RelDirection::FWD ? srcNodeID : dstNodeID; if (listsUpdatesPerTablePerDirection[direction].contains(boundNodeID.tableID)) { getOrCreateListsUpdatesForNodeOffset(direction, boundNodeID) - ->deletedRelIDs.insert(relID); + ->deletedRelOffsets.insert(relID.offset); } } } } uint64_t ListsUpdatesStore::getNumInsertedRelsForNodeOffset( - ListFileID& listFileID, node_offset_t nodeOffset) const { + ListFileID& listFileID, offset_t nodeOffset) const { auto listsUpdatesForNodeOffset = getListsUpdatesForNodeOffsetIfExists(listFileID, nodeOffset); if (listsUpdatesForNodeOffset == nullptr) { return 0; @@ -173,12 +172,12 @@ void ListsUpdatesStore::readValues( } bool ListsUpdatesStore::hasAnyDeletedRelsInPersistentStore( - ListFileID& listFileID, node_offset_t nodeOffset) const { + ListFileID& listFileID, offset_t nodeOffset) const { auto listsUpdatesForNodeOffset = getListsUpdatesForNodeOffsetIfExists(listFileID, nodeOffset); if (listsUpdatesForNodeOffset == nullptr) { return false; } - return !listsUpdatesForNodeOffset->deletedRelIDs.empty(); + return !listsUpdatesForNodeOffset->deletedRelOffsets.empty(); } void ListsUpdatesStore::updateRelIfNecessary(const shared_ptr& srcNodeIDVector, @@ -227,7 +226,7 @@ void ListsUpdatesStore::updateRelIfNecessary(const shared_ptr& srcN } void ListsUpdatesStore::readUpdatesToPropertyVectorIfExists(ListFileID& listFileID, - node_offset_t nodeOffset, const shared_ptr& valueVector, + offset_t nodeOffset, const shared_ptr& valueVector, list_offset_t startListOffset) { // Note: only rel property lists can have updates. assert(listFileID.listType == ListType::REL_PROPERTY_LISTS); @@ -287,7 +286,10 @@ void ListsUpdatesStore::initInsertedRels() { factorizedTableSchema->appendColumn( make_unique(false /* isUnflat */, 0 /* dataChunkPos */, sizeof(nodeID_t))); for (auto& relProperty : relTableSchema.properties) { - auto numBytesForProperty = Types::getDataTypeSize(relProperty.dataType); + auto numBytesForProperty = + relProperty.propertyID == RelTableSchema::INTERNAL_REL_ID_PROPERTY_IDX ? + sizeof(offset_t) : + Types::getDataTypeSize(relProperty.dataType); propertyIDToColIdxMap.emplace( relProperty.propertyID, factorizedTableSchema->getNumColumns()); factorizedTableSchema->appendColumn(make_unique( @@ -341,7 +343,7 @@ ListsUpdatesForNodeOffset* ListsUpdatesStore::getOrCreateListsUpdatesForNodeOffs } ListsUpdatesForNodeOffset* ListsUpdatesStore::getListsUpdatesForNodeOffsetIfExists( - ListFileID& listFileID, node_offset_t nodeOffset) const { + ListFileID& listFileID, offset_t nodeOffset) const { auto relNodeTableAndDir = getRelNodeTableAndDirFromListFileID(listFileID); auto& listsUpdatesPerChunk = listsUpdatesPerTablePerDirection[relNodeTableAndDir.dir].at( relNodeTableAndDir.srcNodeTableID); diff --git a/src/storage/storage_structure/storage_structure.cpp b/src/storage/storage_structure/storage_structure.cpp index 3ed3de09cce..164838df0c1 100644 --- a/src/storage/storage_structure/storage_structure.cpp +++ b/src/storage/storage_structure/storage_structure.cpp @@ -63,6 +63,72 @@ void BaseColumnOrList::readBySequentialCopy(Transaction* transaction, } } +void BaseColumnOrList::readRelIDsBySequentialCopy(Transaction* transaction, + const shared_ptr& vector, PageElementCursor& cursor, + const std::function& logicalToPhysicalPageMapper, + table_id_t commonTableID, bool hasNoNullGuarantee) { + uint64_t numValuesToRead = vector->state->originalSize; + uint64_t vectorPos = 0; + auto setVectorNullFunc = getSetVectorNullFunc(hasNoNullGuarantee); + while (vectorPos != numValuesToRead) { + uint64_t numValuesInPage = numElementsPerPage - cursor.elemPosInPage; + uint64_t numValuesToReadInPage = min(numValuesInPage, numValuesToRead - vectorPos); + auto physicalPageIdx = logicalToPhysicalPageMapper(cursor.pageIdx); + readRelIDsFromAPageBySequentialCopy(transaction, vector, vectorPos, physicalPageIdx, + cursor.elemPosInPage, numValuesToReadInPage, commonTableID, setVectorNullFunc); + vectorPos += numValuesToReadInPage; + cursor.nextPage(); + } +} + +void BaseColumnOrList::readRelIDsFromAPageBySequentialCopy(Transaction* transaction, + const shared_ptr& vector, uint64_t vectorStartPos, page_idx_t physicalPageIdx, + uint16_t pagePosOfFirstElement, uint64_t numValuesToRead, table_id_t commonTableID, + const set_vec_null_func& setVectorNullFunc) { + auto [fileHandleToPin, pageIdxToPin] = + StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin( + fileHandle, physicalPageIdx, *wal, transaction->getType()); + auto frame = bufferManager.pin(*fileHandleToPin, pageIdxToPin); + setVectorNullFunc(this, vector, frame, pagePosOfFirstElement, vectorStartPos, numValuesToRead); + auto currentFrameHead = frame + getElemByteOffset(pagePosOfFirstElement); + for (auto i = 0u; i < numValuesToRead; i++) { + relID_t relID{0, commonTableID}; + relID.offset = *(offset_t*)currentFrameHead; + currentFrameHead += sizeof(offset_t); + vector->setValue(vectorStartPos + i, relID); + } + bufferManager.unpin(*fileHandleToPin, pageIdxToPin); +} + +void BaseColumnOrList::readRelIDsBySequentialCopyWithSelState(Transaction* transaction, + const shared_ptr& vector, PageElementCursor& cursor, + const std::function& logicalToPhysicalPageMapper, + table_id_t commonTableID) { + auto selectedState = vector->state; + uint64_t numValuesToRead = vector->state->originalSize; + uint64_t selectedStatePos = 0; + uint64_t vectorPos = 0; + auto set_vec_null_func = getSetVectorNullFunc(false /* hasNoNullGuarantee */); + while (true) { + uint64_t numValuesInPage = numElementsPerPage - cursor.elemPosInPage; + uint64_t numValuesToReadInPage = min(numValuesInPage, numValuesToRead - vectorPos); + if (isInRange(selectedState->selVector->selectedPositions[selectedStatePos], vectorPos, + vectorPos + numValuesToReadInPage)) { + auto physicalPageIdx = logicalToPhysicalPageMapper(cursor.pageIdx); + readRelIDsFromAPageBySequentialCopy(transaction, vector, vectorPos, physicalPageIdx, + cursor.elemPosInPage, numValuesToReadInPage, commonTableID, set_vec_null_func); + } + vectorPos += numValuesToReadInPage; + while (selectedState->selVector->selectedPositions[selectedStatePos] < vectorPos) { + selectedStatePos++; + if (selectedStatePos == selectedState->selVector->selectedSize) { + return; + } + } + cursor.nextPage(); + } +} + void BaseColumnOrList::readBySequentialCopyWithSelState(Transaction* transaction, const shared_ptr& vector, PageElementCursor& cursor, const std::function& logicalToPhysicalPageMapper) { @@ -93,15 +159,16 @@ void BaseColumnOrList::readBySequentialCopyWithSelState(Transaction* transaction void BaseColumnOrList::readNodeIDsBySequentialCopy(Transaction* transaction, const shared_ptr& valueVector, PageElementCursor& cursor, const std::function& logicalToPhysicalPageMapper, - NodeIDCompressionScheme nodeIDCompressionScheme, bool isAdjLists) { + NodeIDCompressionScheme nodeIDCompressionScheme, bool hasNoNullGuarantee) { uint64_t numValuesToRead = valueVector->state->originalSize; uint64_t vectorPos = 0; + auto setVecNullFunc = getSetVectorNullFunc(hasNoNullGuarantee); while (vectorPos != numValuesToRead) { uint64_t numValuesInPage = numElementsPerPage - cursor.elemPosInPage; uint64_t numValuesToReadInPage = min(numValuesInPage, numValuesToRead - vectorPos); auto physicalPageId = logicalToPhysicalPageMapper(cursor.pageIdx); readNodeIDsFromAPageBySequentialCopy(transaction, valueVector, vectorPos, physicalPageId, - cursor.elemPosInPage, numValuesToReadInPage, nodeIDCompressionScheme, isAdjLists); + cursor.elemPosInPage, numValuesToReadInPage, nodeIDCompressionScheme, setVecNullFunc); vectorPos += numValuesToReadInPage; cursor.nextPage(); } @@ -115,6 +182,7 @@ void BaseColumnOrList::readNodeIDsBySequentialCopyWithSelState(Transaction* tran uint64_t numValuesToRead = vector->state->originalSize; uint64_t selectedStatePos = 0; uint64_t vectorPos = 0; + auto setVecNullFunc = getSetVectorNullFunc(false /* hasNoNullGuarantee */); while (true) { uint64_t numValuesInPage = numElementsPerPage - cursor.elemPosInPage; uint64_t numValuesToReadInPage = min(numValuesInPage, numValuesToRead - vectorPos); @@ -123,7 +191,7 @@ void BaseColumnOrList::readNodeIDsBySequentialCopyWithSelState(Transaction* tran auto physicalPageIdx = logicalToPhysicalPageMapper(cursor.pageIdx); readNodeIDsFromAPageBySequentialCopy(transaction, vector, vectorPos, physicalPageIdx, cursor.elemPosInPage, numValuesToReadInPage, nodeIDCompressionScheme, - false /* isAdjList */); + setVecNullFunc); } vectorPos += numValuesToReadInPage; while (selectedState->selVector->selectedPositions[selectedStatePos] < vectorPos) { @@ -139,17 +207,12 @@ void BaseColumnOrList::readNodeIDsBySequentialCopyWithSelState(Transaction* tran void BaseColumnOrList::readNodeIDsFromAPageBySequentialCopy(Transaction* transaction, const shared_ptr& vector, uint64_t vectorStartPos, page_idx_t physicalPageIdx, uint16_t pagePosOfFirstElement, uint64_t numValuesToRead, - NodeIDCompressionScheme& nodeIDCompressionScheme, bool isAdjLists) { + NodeIDCompressionScheme& nodeIDCompressionScheme, const set_vec_null_func& setVecNullFunc) { auto [fileHandleToPin, pageIdxToPin] = StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin( fileHandle, physicalPageIdx, *wal, transaction->getType()); auto frame = bufferManager.pin(*fileHandleToPin, pageIdxToPin); - if (isAdjLists) { - vector->setRangeNonNull(vectorStartPos, numValuesToRead); - } else { - readNullBitsFromAPage( - vector, frame, pagePosOfFirstElement, vectorStartPos, numValuesToRead); - } + setVecNullFunc(this, vector, frame, pagePosOfFirstElement, vectorStartPos, numValuesToRead); auto currentFrameHead = frame + getElemByteOffset(pagePosOfFirstElement); for (auto i = 0u; i < numValuesToRead; i++) { nodeID_t nodeID{0, 0}; diff --git a/src/storage/store/node_table.cpp b/src/storage/store/node_table.cpp index 8447cfe2f8b..8dbadff4b7d 100644 --- a/src/storage/store/node_table.cpp +++ b/src/storage/store/node_table.cpp @@ -33,7 +33,7 @@ void NodeTable::scan(Transaction* transaction, const shared_ptr& in } } -node_offset_t NodeTable::addNodeAndResetProperties(ValueVector* primaryKeyVector) { +offset_t NodeTable::addNodeAndResetProperties(ValueVector* primaryKeyVector) { auto nodeOffset = nodesStatisticsAndDeletedIDs->addNode(tableID); assert(primaryKeyVector->state->selVector->selectedSize == 1); auto pkValPos = primaryKeyVector->state->selVector->selectedPositions[0]; @@ -70,8 +70,7 @@ void NodeTable::prepareCommitOrRollbackIfNecessary(bool isCommit) { pkIndex->prepareCommitOrRollbackIfNecessary(isCommit); } -void NodeTable::deleteNode( - node_offset_t nodeOffset, ValueVector* primaryKeyVector, uint32_t pos) const { +void NodeTable::deleteNode(offset_t nodeOffset, ValueVector* primaryKeyVector, uint32_t pos) const { nodesStatisticsAndDeletedIDs->deleteNode(tableID, nodeOffset); pkIndex->deleteKey(primaryKeyVector, pos); } diff --git a/src/storage/store/nodes_statistics_and_deleted_ids.cpp b/src/storage/store/nodes_statistics_and_deleted_ids.cpp index e0805074413..164fa4d682f 100644 --- a/src/storage/store/nodes_statistics_and_deleted_ids.cpp +++ b/src/storage/store/nodes_statistics_and_deleted_ids.cpp @@ -8,37 +8,37 @@ using namespace std; namespace kuzu { namespace storage { -NodeStatisticsAndDeletedIDs::NodeStatisticsAndDeletedIDs(table_id_t tableID, - node_offset_t maxNodeOffset, const vector& deletedNodeOffsets) +NodeStatisticsAndDeletedIDs::NodeStatisticsAndDeletedIDs( + table_id_t tableID, offset_t maxNodeOffset, const vector& deletedNodeOffsets) : tableID{tableID} { auto numTuples = geNumTuplesFromMaxNodeOffset(maxNodeOffset); TableStatistics::setNumTuples(numTuples); if (numTuples > 0) { hasDeletedNodesPerMorsel.resize((numTuples / DEFAULT_VECTOR_CAPACITY) + 1, false); } - for (node_offset_t deletedNodeOffset : deletedNodeOffsets) { + for (offset_t deletedNodeOffset : deletedNodeOffsets) { auto morselIdxAndOffset = StorageUtils::getQuotientRemainder(deletedNodeOffset, DEFAULT_VECTOR_CAPACITY); hasDeletedNodesPerMorsel[morselIdxAndOffset.first] = true; if (!deletedNodeOffsetsPerMorsel.contains(morselIdxAndOffset.first)) { - deletedNodeOffsetsPerMorsel.insert({morselIdxAndOffset.first, set()}); + deletedNodeOffsetsPerMorsel.insert({morselIdxAndOffset.first, set()}); } deletedNodeOffsetsPerMorsel.find(morselIdxAndOffset.first) ->second.insert(deletedNodeOffset); } } -node_offset_t NodeStatisticsAndDeletedIDs::addNode() { +offset_t NodeStatisticsAndDeletedIDs::addNode() { if (deletedNodeOffsetsPerMorsel.empty()) { setNumTuples(getNumTuples() + 1); return getMaxNodeOffset(); } // We return the last element in the first non-empty morsel we find auto iter = deletedNodeOffsetsPerMorsel.begin(); - set deletedNodeOffsets = iter->second; + set deletedNodeOffsets = iter->second; auto nodeOffsetIter = iter->second.end(); nodeOffsetIter--; - node_offset_t retVal = *nodeOffsetIter; + offset_t retVal = *nodeOffsetIter; iter->second.erase(nodeOffsetIter); if (iter->second.empty()) { hasDeletedNodesPerMorsel[iter->first] = false; @@ -47,7 +47,7 @@ node_offset_t NodeStatisticsAndDeletedIDs::addNode() { return retVal; } -void NodeStatisticsAndDeletedIDs::deleteNode(node_offset_t nodeOffset) { +void NodeStatisticsAndDeletedIDs::deleteNode(offset_t nodeOffset) { // TODO(Semih/Guodong): This check can go into nodeOffsetsInfoForWriteTrx->deleteNode // once errorIfNodeHasEdges is removed. This function would then just be a wrapper to init // nodeOffsetsInfoForWriteTrx before calling delete on it. @@ -66,7 +66,7 @@ void NodeStatisticsAndDeletedIDs::deleteNode(node_offset_t nodeOffset) { } errorIfNodeHasEdges(nodeOffset); if (!hasDeletedNodesPerMorsel[morselIdxAndOffset.first]) { - set deletedNodeOffsets; + set deletedNodeOffsets; deletedNodeOffsetsPerMorsel.insert({morselIdxAndOffset.first, deletedNodeOffsets}); } deletedNodeOffsetsPerMorsel.find(morselIdxAndOffset.first)->second.insert(nodeOffset); @@ -112,8 +112,8 @@ void NodeStatisticsAndDeletedIDs::setNumTuples(uint64_t numTuples) { } } -vector NodeStatisticsAndDeletedIDs::getDeletedNodeOffsets() { - vector retVal; +vector NodeStatisticsAndDeletedIDs::getDeletedNodeOffsets() { + vector retVal; auto morselIter = deletedNodeOffsetsPerMorsel.begin(); while (morselIter != deletedNodeOffsetsPerMorsel.end()) { retVal.insert(retVal.cend(), morselIter->second.begin(), morselIter->second.end()); @@ -122,7 +122,7 @@ vector NodeStatisticsAndDeletedIDs::getDeletedNodeOffsets() { return retVal; } -void NodeStatisticsAndDeletedIDs::errorIfNodeHasEdges(node_offset_t nodeOffset) { +void NodeStatisticsAndDeletedIDs::errorIfNodeHasEdges(offset_t nodeOffset) { for (AdjLists* adjList : adjListsAndColumns.first) { auto numElementsInList = adjList->getTotalNumElementsInList(TransactionType::WRITE, nodeOffset); @@ -144,7 +144,7 @@ void NodeStatisticsAndDeletedIDs::errorIfNodeHasEdges(node_offset_t nodeOffset) } } -bool NodeStatisticsAndDeletedIDs::isDeleted(node_offset_t nodeOffset, uint64_t morselIdx) { +bool NodeStatisticsAndDeletedIDs::isDeleted(offset_t nodeOffset, uint64_t morselIdx) { auto iter = deletedNodeOffsetsPerMorsel.find(morselIdx); if (iter != deletedNodeOffsetsPerMorsel.end()) { return iter->second.contains(nodeOffset); @@ -174,8 +174,8 @@ void NodesStatisticsAndDeletedIDs::setAdjListsAndColumns(RelsStore* relsStore) { } } -map NodesStatisticsAndDeletedIDs::getMaxNodeOffsetPerTable() const { - map retVal; +map NodesStatisticsAndDeletedIDs::getMaxNodeOffsetPerTable() const { + map retVal; for (auto& tableIDStatistics : tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { retVal[tableIDStatistics.first] = getNodeStatisticsAndDeletedIDs(tableIDStatistics.first)->getMaxNodeOffset(); @@ -212,7 +212,7 @@ void NodesStatisticsAndDeletedIDs::addNodeStatisticsAndDeletedIDs(NodeTableSchem unique_ptr NodesStatisticsAndDeletedIDs::deserializeTableStatistics( uint64_t numTuples, uint64_t& offset, FileInfo* fileInfo, uint64_t tableID) { - vector deletedNodeIDs; + vector deletedNodeIDs; offset = SerDeser::deserializeVector(deletedNodeIDs, fileInfo, offset); return make_unique(tableID, NodeStatisticsAndDeletedIDs::getMaxNodeOffsetFromNumTuples(numTuples), deletedNodeIDs); diff --git a/src/storage/store/rel_table.cpp b/src/storage/store/rel_table.cpp index 911a464f92d..603bff6c101 100644 --- a/src/storage/store/rel_table.cpp +++ b/src/storage/store/rel_table.cpp @@ -346,12 +346,12 @@ void RelTable::addProperty(Property property) { } void RelTable::appendInMemListToLargeListOP( - ListsUpdateIterator* listsUpdateIterator, node_offset_t nodeOffset, InMemList& inMemList) { + ListsUpdateIterator* listsUpdateIterator, offset_t nodeOffset, InMemList& inMemList) { listsUpdateIterator->appendToLargeList(nodeOffset, inMemList); } void RelTable::updateListOP( - ListsUpdateIterator* listsUpdateIterator, node_offset_t nodeOffset, InMemList& inMemList) { + ListsUpdateIterator* listsUpdateIterator, offset_t nodeOffset, InMemList& inMemList) { listsUpdateIterator->updateList(nodeOffset, inMemList); } @@ -436,7 +436,7 @@ void RelTable::prepareCommitForDirection(RelDirection relDirection) { // TODO(Guodong): Do we need to access the header in this way? } else if (ListHeaders::isALargeList(adjLists->getHeaders()->headersDiskArray->get( nodeOffset, TransactionType::READ_ONLY)) && - listsUpdatesForNodeOffset->deletedRelIDs.empty() && + listsUpdatesForNodeOffset->deletedRelOffsets.empty() && !listsUpdatesForNodeOffset->hasUpdates()) { // We do an optimization for relPropertyList and adjList : If the initial list // is a largeList and we didn't delete or update any rel from the @@ -458,11 +458,10 @@ void RelTable::prepareCommitForDirection(RelDirection relDirection) { } } -void RelTable::prepareCommitForListWithUpdateStoreDataOnly(AdjLists* adjLists, - node_offset_t nodeOffset, ListsUpdatesForNodeOffset* listsUpdatesForNodeOffset, - RelDirection relDirection, ListsUpdateIteratorsForDirection* listsUpdateIteratorsForDirection, - table_id_t boundNodeTableID, - const std::function& opOnListsUpdateIterators) { auto inMemAdjLists = adjLists->createInMemListWithDataFromUpdateStoreOnly( nodeOffset, listsUpdatesForNodeOffset->insertedRelsTupleIdxInFT); @@ -478,7 +477,7 @@ void RelTable::prepareCommitForListWithUpdateStoreDataOnly(AdjLists* adjLists, } } -void RelTable::prepareCommitForList(AdjLists* adjLists, node_offset_t nodeOffset, +void RelTable::prepareCommitForList(AdjLists* adjLists, offset_t nodeOffset, ListsUpdatesForNodeOffset* listsUpdatesForNodeOffset, RelDirection relDirection, ListsUpdateIteratorsForDirection* listsUpdateIteratorsForDirection, table_id_t boundNodeTableID) { diff --git a/src/storage/store/rels_statistics.cpp b/src/storage/store/rels_statistics.cpp index 0185ae7160b..ac754304edc 100644 --- a/src/storage/store/rels_statistics.cpp +++ b/src/storage/store/rels_statistics.cpp @@ -4,7 +4,7 @@ namespace kuzu { namespace storage { RelStatistics::RelStatistics(vector> srcDstTableIDs) - : TableStatistics{0} { + : TableStatistics{0}, nextRelOffset{0} { numRelsPerDirectionBoundTable.resize(2); for (auto& [srcTableID, dstTableID] : srcDstTableIDs) { numRelsPerDirectionBoundTable[RelDirection::FWD].emplace(srcTableID, 0); @@ -32,7 +32,7 @@ void RelsStatistics::setNumRelsForTable(table_id_t relTableID, uint64_t numRels) auto relStatistics = (RelStatistics*)tablesStatisticsContentForWriteTrx->tableStatisticPerTable[relTableID] .get(); - tablesStatisticsContentForWriteTrx->nextRelID += (numRels - relStatistics->getNumTuples()); + increaseNextRelOffset(relTableID, numRels - relStatistics->getNumTuples()); relStatistics->setNumTuples(numRels); assertNumRelsIsSound(relStatistics->numRelsPerDirectionBoundTable[FWD], numRels); assertNumRelsIsSound(relStatistics->numRelsPerDirectionBoundTable[BWD], numRels); @@ -66,7 +66,7 @@ void RelsStatistics::updateNumRelsByValue( } // Update the nextRelID only when we are inserting rels. if (value > 0) { - tablesStatisticsContentForWriteTrx->nextRelID += value; + increaseNextRelOffset(relTableID, value); } assertNumRelsIsSound(relStatistics->numRelsPerDirectionBoundTable[FWD], numRelsAfterUpdate); assertNumRelsIsSound(relStatistics->numRelsPerDirectionBoundTable[BWD], numRelsAfterUpdate); @@ -86,21 +86,25 @@ void RelsStatistics::setNumRelsPerDirectionBoundTableID( } } -uint64_t RelsStatistics::getNextRelID(Transaction* transaction) { +offset_t RelsStatistics::getNextRelOffset(Transaction* transaction, table_id_t tableID) { lock_t lck{mtx}; auto& tableStatisticContent = (transaction->isReadOnly() || tablesStatisticsContentForWriteTrx == nullptr) ? tablesStatisticsContentForReadOnlyTrx : tablesStatisticsContentForWriteTrx; - return tableStatisticContent->nextRelID; + return ((RelStatistics*)tableStatisticContent->tableStatisticPerTable.at(tableID).get()) + ->getNextRelOffset(); } unique_ptr RelsStatistics::deserializeTableStatistics( uint64_t numTuples, uint64_t& offset, FileInfo* fileInfo, uint64_t tableID) { vector> numRelsPerDirectionBoundTable{2}; + offset_t nextRelOffset; offset = SerDeser::deserializeUnorderedMap(numRelsPerDirectionBoundTable[0], fileInfo, offset); offset = SerDeser::deserializeUnorderedMap(numRelsPerDirectionBoundTable[1], fileInfo, offset); - return make_unique(numTuples, move(numRelsPerDirectionBoundTable)); + offset = SerDeser::deserializeValue(nextRelOffset, fileInfo, offset); + return make_unique( + numTuples, std::move(numRelsPerDirectionBoundTable), nextRelOffset); } void RelsStatistics::serializeTableStatistics( @@ -110,6 +114,7 @@ void RelsStatistics::serializeTableStatistics( relStatistic->numRelsPerDirectionBoundTable[0], fileInfo, offset); offset = SerDeser::serializeUnorderedMap( relStatistic->numRelsPerDirectionBoundTable[1], fileInfo, offset); + offset = SerDeser::serializeValue(relStatistic->nextRelOffset, fileInfo, offset); } } // namespace storage diff --git a/src/storage/store/table_statistics.cpp b/src/storage/store/table_statistics.cpp index d2f44f143e3..f582bf19a85 100644 --- a/src/storage/store/table_statistics.cpp +++ b/src/storage/store/table_statistics.cpp @@ -16,8 +16,6 @@ void TablesStatistics::readFromFile(const string& directory) { logger->info("Reading {} from {}.", getTableTypeForPrinting(), filePath); uint64_t offset = 0; uint64_t numTables; - offset = SerDeser::deserializeValue( - tablesStatisticsContentForReadOnlyTrx->nextRelID, fileInfo.get(), offset); offset = SerDeser::deserializeValue(numTables, fileInfo.get(), offset); for (auto i = 0u; i < numTables; i++) { uint64_t numTuples; @@ -39,7 +37,6 @@ void TablesStatistics::saveToFile( tablesStatisticsContentForWriteTrx == nullptr) ? tablesStatisticsContentForReadOnlyTrx : tablesStatisticsContentForWriteTrx; - offset = SerDeser::serializeValue(tablesStatisticsContent->nextRelID, fileInfo.get(), offset); offset = SerDeser::serializeValue( tablesStatisticsContent->tableStatisticPerTable.size(), fileInfo.get(), offset); for (auto& tableStatistic : tablesStatisticsContent->tableStatisticPerTable) { @@ -54,8 +51,6 @@ void TablesStatistics::saveToFile( void TablesStatistics::initTableStatisticPerTableForWriteTrxIfNecessary() { if (tablesStatisticsContentForWriteTrx == nullptr) { tablesStatisticsContentForWriteTrx = make_unique(); - tablesStatisticsContentForWriteTrx->nextRelID = - tablesStatisticsContentForReadOnlyTrx->nextRelID; for (auto& tableStatistic : tablesStatisticsContentForReadOnlyTrx->tableStatisticPerTable) { tablesStatisticsContentForWriteTrx->tableStatisticPerTable[tableStatistic.first] = constructTableStatistic(tableStatistic.second.get()); diff --git a/src/storage/wal/wal_record.cpp b/src/storage/wal/wal_record.cpp index 62df58ec57b..ce99a6d0301 100644 --- a/src/storage/wal/wal_record.cpp +++ b/src/storage/wal/wal_record.cpp @@ -54,7 +54,7 @@ StorageStructureID StorageStructureID::newRelPropertyListsID(table_id_t nodeTabl retVal.isOverflow = false; retVal.storageStructureType = StorageStructureType::LISTS; retVal.listFileID = ListFileID(listFileType, - RelPropertyListID(RelNodeTableAndDir(nodeTableID, relTableID, dir), propertyID)); + RelPropertyListsID(RelNodeTableAndDir(nodeTableID, relTableID, dir), propertyID)); return retVal; } diff --git a/test/binder/binder_error_test.cpp b/test/binder/binder_error_test.cpp index 0d23d756625..2963438288c 100644 --- a/test/binder/binder_error_test.cpp +++ b/test/binder/binder_error_test.cpp @@ -91,8 +91,8 @@ TEST_F(BinderErrorTest, BindPropertyNotExist2) { TEST_F(BinderErrorTest, BindIDArithmetic) { string expectedException = - "Binder exception: Cannot match a built-in function for given function +(NODE_ID,INT64). " - "Supported inputs " + "Binder exception: Cannot match a built-in function for given function " + "+(INTERNAL_ID,INT64). Supported inputs " "are\n(INT64,INT64) -> INT64\n(INT64,DOUBLE) -> DOUBLE\n(DOUBLE,INT64) -> " "DOUBLE\n(DOUBLE,DOUBLE) -> DOUBLE\n(DATE,INT64) -> DATE\n(INT64,DATE) -> " "DATE\n(DATE,INTERVAL) -> DATE\n(INTERVAL,DATE) -> DATE\n(TIMESTAMP,INTERVAL) -> " @@ -373,7 +373,7 @@ TEST_F(BinderErrorTest, DuplicateVariableName) { TEST_F(BinderErrorTest, MaxNodeID) { string expectedException = - "Binder exception: Cannot match a built-in function for given function MIN(NODE_ID). " + "Binder exception: Cannot match a built-in function for given function MIN(INTERNAL_ID). " "Supported inputs are\nDISTINCT (BOOL) -> BOOL\n(BOOL) -> BOOL\nDISTINCT (INT64) -> " "INT64\n(INT64) -> INT64\nDISTINCT (DOUBLE) -> DOUBLE\n(DOUBLE) -> DOUBLE\nDISTINCT " "(DATE) -> DATE\n(DATE) -> DATE\nDISTINCT (STRING) -> STRING\n(STRING) -> " @@ -389,12 +389,12 @@ TEST_F(BinderErrorTest, OrderByNodeID) { ASSERT_STREQ(expectedException.c_str(), getBindingError(input).c_str()); } -TEST_F(BinderErrorTest, ReturnInternalType) { - string expectedException = - "Binder exception: Cannot return expression ID(p) with internal type NODE_ID"; - auto input = "match (p:person) return ID(p);"; - ASSERT_STREQ(expectedException.c_str(), getBindingError(input).c_str()); -} +// TEST_F(BinderErrorTest, ReturnInternalType) { +// string expectedException = +// "Binder exception: Cannot return expression ID(p) with internal type INTERNAL_ID"; +// auto input = "match (p:person) return ID(p);"; +// ASSERT_STREQ(expectedException.c_str(), getBindingError(input).c_str()); +//} TEST_F(BinderErrorTest, DropColumnFromNonExistedTable) { string expectedException = "Binder exception: Node/Rel person1 does not exist."; diff --git a/test/copy/copy_test.cpp b/test/copy/copy_test.cpp index aaeb8748a40..c1bfcb4bcb6 100644 --- a/test/copy/copy_test.cpp +++ b/test/copy/copy_test.cpp @@ -126,7 +126,7 @@ TEST_F(CopyNodePropertyTest, NodeStructuredStringPropertyTest) { void verifyP0ToP5999(KnowsTablePTablePKnowsLists& knowsTablePTablePKnowsLists) { // p0 has 5001 fwd edges to p0...p5000 - node_offset_t p0Offset = 0; + offset_t p0Offset = 0; auto pOFwdList = knowsTablePTablePKnowsLists.fwdPKnowsLists->readAdjacencyListOfNode(p0Offset); EXPECT_EQ(5001, pOFwdList->size()); for (int nodeOffset = 0; nodeOffset <= 5000; ++nodeOffset) { @@ -142,7 +142,7 @@ void verifyP0ToP5999(KnowsTablePTablePKnowsLists& knowsTablePTablePKnowsLists) { // p1,p2,...,p5000 have a single fwd edge to p5000 and 1 bwd edge from node p0 nodeID_t nodeIDP5000(5000ul, knowsTablePTablePKnowsLists.pNodeTableID); - for (node_offset_t nodeOffset = 1; nodeOffset <= 5000; ++nodeOffset) { + for (offset_t nodeOffset = 1; nodeOffset <= 5000; ++nodeOffset) { auto fwdAdjList = knowsTablePTablePKnowsLists.fwdPKnowsLists->readAdjacencyListOfNode(nodeOffset); EXPECT_EQ(1, fwdAdjList->size()); @@ -154,7 +154,7 @@ void verifyP0ToP5999(KnowsTablePTablePKnowsLists& knowsTablePTablePKnowsLists) { } // p5001 to p6000 are singletons - for (node_offset_t nodeOffset = 5001; nodeOffset < 6000; ++nodeOffset) { + for (offset_t nodeOffset = 5001; nodeOffset < 6000; ++nodeOffset) { EXPECT_TRUE(knowsTablePTablePKnowsLists.fwdPKnowsLists->readAdjacencyListOfNode(nodeOffset) ->empty()); EXPECT_TRUE(knowsTablePTablePKnowsLists.bwdPKnowsLists->readAdjacencyListOfNode(nodeOffset) @@ -166,8 +166,8 @@ void verifya0Andp6000(KnowsTablePTablePKnowsLists& knowsTablePTablePKnowsLists, const Catalog& catalog, StorageManager* storageManager) { auto aTableAKnowsLists = getATableAKnowsLists(catalog, storageManager); // a0 has 1 fwd edge to p6000, and no backward edges. - node_offset_t a0NodeOffset = 0; - node_offset_t p6000NodeOffset = 6000; + offset_t a0NodeOffset = 0; + offset_t p6000NodeOffset = 6000; auto a0FwdList = aTableAKnowsLists.fwdAKnowsLists->readAdjacencyListOfNode(a0NodeOffset); EXPECT_EQ(1, a0FwdList->size()); nodeID_t p6000NodeID(p6000NodeOffset, knowsTablePTablePKnowsLists.pNodeTableID); @@ -186,7 +186,7 @@ void verifya0Andp6000(KnowsTablePTablePKnowsLists& knowsTablePTablePKnowsLists, } void verifyP6001ToP65999(KnowsTablePTablePKnowsLists& knowsTablePTablePKnowsLists) { - for (node_offset_t node_offset_t = 6001; node_offset_t < 66000; ++node_offset_t) { + for (offset_t node_offset_t = 6001; node_offset_t < 66000; ++node_offset_t) { EXPECT_TRUE( knowsTablePTablePKnowsLists.fwdPKnowsLists->readAdjacencyListOfNode(node_offset_t) ->empty()); diff --git a/test/runner/e2e_copy_transaction_test.cpp b/test/runner/e2e_copy_transaction_test.cpp index 91b1678d9d5..6df71d50186 100644 --- a/test/runner/e2e_copy_transaction_test.cpp +++ b/test/runner/e2e_copy_transaction_test.cpp @@ -131,8 +131,8 @@ class TinySnbCopyCSVTransactionTest : public EmptyDBTest { validateRelColumnAndListFilesExistence( relTableSchema, DBFileType::ORIGINAL, true /* existence */); auto dummyWriteTrx = Transaction::getDummyWriteTrx(); - ASSERT_EQ(getStorageManager(*database)->getRelsStore().getRelsStatistics().getNextRelID( - dummyWriteTrx.get()), + ASSERT_EQ(getStorageManager(*database)->getRelsStore().getRelsStatistics().getNextRelOffset( + dummyWriteTrx.get(), tableID), 14); } @@ -148,7 +148,7 @@ class TinySnbCopyCSVTransactionTest : public EmptyDBTest { validateTinysnbKnowsDateProperty(); auto& relsStatistics = getStorageManager(*database)->getRelsStore().getRelsStatistics(); auto dummyWriteTrx = Transaction::getDummyWriteTrx(); - ASSERT_EQ(relsStatistics.getNextRelID(dummyWriteTrx.get()), 14); + ASSERT_EQ(relsStatistics.getNextRelOffset(dummyWriteTrx.get(), knowsTableID), 14); ASSERT_EQ(relsStatistics.getReadOnlyVersion()->tableStatisticPerTable.size(), 1); auto knowsRelStatistics = (RelStatistics*)relsStatistics.getReadOnlyVersion() ->tableStatisticPerTable.at(knowsTableID) diff --git a/test/runner/e2e_ddl_test.cpp b/test/runner/e2e_ddl_test.cpp index daa4790b71e..f038f6fb840 100644 --- a/test/runner/e2e_ddl_test.cpp +++ b/test/runner/e2e_ddl_test.cpp @@ -339,7 +339,7 @@ class TinySnbDDLTest : public DBTest { auto result = conn->query( "MATCH (:person)-[s:studyAt]->(:organisation) RETURN * ORDER BY s.year DESC LIMIT 1"); ASSERT_EQ(TestHelper::convertResultToString(*result), - vector{"(0:0)-[{_id:14, year:2021}]->(1:0)"}); + vector{"(0:0)-[{_id:4:0, year:2021}]->(1:0)"}); } void ddlStatementsInsideActiveTransactionErrorTest(string query) { diff --git a/test/runner/e2e_update_node_test.cpp b/test/runner/e2e_update_node_test.cpp index 1d0998d64c5..5dab02d5e9f 100644 --- a/test/runner/e2e_update_node_test.cpp +++ b/test/runner/e2e_update_node_test.cpp @@ -227,8 +227,8 @@ TEST_F(TinySnbUpdateTest, InsertSingleNToNRelTest) { "CREATE (a)-[:knows {meetTime:timestamp('1976-12-23 11:21:42'), validInterval:interval('2 " "years'), comments:['A', 'k'], date:date('1997-03-22')}]->(b);"); auto groundTruth = - vector{"9|10|(0:6)-[{_id:40, date:1997-03-22, meetTime:1976-12-23 11:21:42, " - "validInterval:2 years, comments:[A,k]}]->(0:7)|40"}; + vector{"9|10|(0:6)-[{_id:3:14, date:1997-03-22, meetTime:1976-12-23 11:21:42, " + "validInterval:2 years, comments:[A,k]}]->(0:7)|3:14"}; auto result = conn->query( "MATCH (a:person)-[e:knows]->(b:person) WHERE a.ID > 8 RETURN a.ID, b.ID, e, ID(e)"); ASSERT_EQ(TestHelper::convertResultToString(*result), groundTruth); @@ -238,9 +238,9 @@ TEST_F(TinySnbUpdateTest, InsertSingleNTo1RelTest) { // insert studyAt edge between Greg and CsWork conn->query("MATCH (a:person), (b:organisation) WHERE a.ID = 9 AND b.orgCode = 934 " "CREATE (a)-[:studyAt {year:2022}]->(b);"); - auto groundTruth = vector{ - "8|325|(0:5)-[{_id:16, year:2020, places:[awndsnjwejwen,isuhuwennjnuhuhuwewe]}]->(1:0)|16", - "9|934|(0:6)-[{_id:40, year:2022, places:}]->(1:1)|40"}; + auto groundTruth = vector{"8|325|(0:5)-[{_id:4:2, year:2020, " + "places:[awndsnjwejwen,isuhuwennjnuhuhuwewe]}]->(1:0)|4:2", + "9|934|(0:6)-[{_id:4:3, year:2022, places:}]->(1:1)|4:3"}; auto result = conn->query("MATCH (a:person)-[e:studyAt]->(b:organisation) WHERE a.ID > 5 " "RETURN a.ID, b.orgCode, e, ID(e)"); ASSERT_EQ(TestHelper::convertResultToString(*result), groundTruth); @@ -257,6 +257,7 @@ TEST_F(TinySnbUpdateTest, InsertRepeatedNToNRelTest) { TEST_F(TinySnbUpdateTest, InsertMixedRelTest) { conn->query( + "MATCH (a:person), (b:person), (c:organisation) WHERE a.ID = 0 AND b.ID = 9 AND c.ID = 4 " "MATCH (a:person), (b:person), (c:organisation) WHERE a.ID = 0 AND b.ID = 9 AND c.ID = 4 " "CREATE (b)-[:studyAt]->(c), (a)<-[:knows]-(b)"); auto groundTruth = vector{"9"}; @@ -268,7 +269,7 @@ TEST_F(TinySnbUpdateTest, InsertMixedRelTest) { TEST_F(TinySnbUpdateTest, InsertMultipleRelsTest) { conn->query("MATCH (a:person)-[:knows]->(b:person) WHERE a.ID = 7 " "CREATE (a)<-[:knows]-(b);"); - auto groundTruth = vector{"7|8|12", "7|9|13", "8|7|40", "9|7|41"}; + auto groundTruth = vector{"7|8|3:12", "7|9|3:13", "8|7|3:14", "9|7|3:15"}; auto result = conn->query("MATCH (a:person)-[e:knows]->(b:person) WHERE a.ID > 6 RETURN a.ID, " "b.ID, ID(e)"); ASSERT_EQ(TestHelper::convertResultToString(*result), groundTruth); @@ -286,7 +287,7 @@ TEST_F(TinySnbUpdateTest, InsertNodeAndRelTest) { TEST_F(TinySnbUpdateTest, InsertNodeAndRelTest2) { conn->query( "CREATE (c:organisation {ID:50})<-[:workAt]-(a:person {ID:100}), (a)-[:studyAt]->(c)"); - auto groundTruth = vector{"100|50|41|40"}; + auto groundTruth = vector{"100|50|4:3|5:3"}; auto result = conn->query( "MATCH (a:person)-[e1:studyAt]->(b:organisation), (a)-[e2:workAt]->(b) RETURN a.ID, " "b.ID, ID(e1), ID(e2)"); diff --git a/test/storage/node_insertion_deletion_test.cpp b/test/storage/node_insertion_deletion_test.cpp index 9f2b3b902c9..2c13eca47d2 100644 --- a/test/storage/node_insertion_deletion_test.cpp +++ b/test/storage/node_insertion_deletion_test.cpp @@ -33,19 +33,19 @@ class NodeInsertionDeletionTests : public DBTest { conn->beginWriteTransaction(); } - node_offset_t addNode() { + offset_t addNode() { // TODO(Guodong/Semih/Xiyang): Currently it is not clear when and from where the hash index, // structured columns, adjacency Lists, and adj columns of a // newly added node should be informed that a new node is being inserted, so these data // structures either write values or NULLs or empty Lists etc. Within the scope of these // tests we only have an ID column and we are manually from outside // NodesStatisticsAndDeletedIDs adding a NULL value for the ID. This should change later. - node_offset_t nodeOffset = personNodeTable->getNodeStatisticsAndDeletedIDs()->addNode( + offset_t nodeOffset = personNodeTable->getNodeStatisticsAndDeletedIDs()->addNode( personNodeTable->getTableID()); auto dataChunk = make_shared(2); // Flatten the data chunk dataChunk->state->currIdx = 0; - auto nodeIDVector = make_shared(NODE_ID, getMemoryManager(*database)); + auto nodeIDVector = make_shared(INTERNAL_ID, getMemoryManager(*database)); dataChunk->insert(0, nodeIDVector); auto idVector = make_shared(INT64, getMemoryManager(*database)); dataChunk->insert(1, idVector); @@ -73,7 +73,7 @@ TEST_F(NodeInsertionDeletionTests, DeletingSameNodeOffsetErrorsTest) { } TEST_F(NodeInsertionDeletionTests, DeleteAddMixedTest) { - for (node_offset_t nodeOffset = 1000; nodeOffset < 9000; ++nodeOffset) { + for (offset_t nodeOffset = 1000; nodeOffset < 9000; ++nodeOffset) { personNodeTable->getNodeStatisticsAndDeletedIDs()->deleteNode( personNodeTable->getTableID(), nodeOffset); } @@ -96,7 +96,7 @@ TEST_F(NodeInsertionDeletionTests, DeleteAddMixedTest) { ASSERT_EQ(conn->query(query)->getNext()->getValue(0)->getValue(), 10010); ASSERT_EQ(readConn->query(query)->getNext()->getValue(0)->getValue(), 10010); - for (node_offset_t nodeOffset = 0; nodeOffset < 10010; ++nodeOffset) { + for (offset_t nodeOffset = 0; nodeOffset < 10010; ++nodeOffset) { personNodeTable->getNodeStatisticsAndDeletedIDs()->deleteNode( personNodeTable->getTableID(), nodeOffset); } diff --git a/test/storage/rel_insertion_test.cpp b/test/storage/rel_insertion_test.cpp index 182a3386549..feab22ad9f4 100644 --- a/test/storage/rel_insertion_test.cpp +++ b/test/storage/rel_insertion_test.cpp @@ -26,8 +26,8 @@ class RelInsertionTest : public DBTest { lengthValues = (int64_t*)lengthPropertyVector->getData(); placePropertyVector = make_shared(STRING, memoryManager.get()); placeValues = (ku_string_t*)placePropertyVector->getData(); - srcNodeVector = make_shared(NODE_ID, memoryManager.get()); - dstNodeVector = make_shared(NODE_ID, memoryManager.get()); + srcNodeVector = make_shared(INTERNAL_ID, memoryManager.get()); + dstNodeVector = make_shared(INTERNAL_ID, memoryManager.get()); tagPropertyVector = make_shared( DataType{LIST, make_unique(STRING)}, memoryManager.get()); tagValues = (ku_list_t*)tagPropertyVector->getData(); @@ -165,7 +165,7 @@ class RelInsertionTest : public DBTest { } } - void insertRelsToNode(node_offset_t srcNodeOffset) { + void insertRelsToNode(offset_t srcNodeOffset) { auto placeStr = ku_string_t(); auto tagList = ku_list_t(); tagList.overflowPtr = diff --git a/test/test_files/read_list/2-bytes-per-edge.test b/test/test_files/read_list/2-bytes-per-edge.test index 82072055455..fcd0e1d4422 100644 --- a/test/test_files/read_list/2-bytes-per-edge.test +++ b/test/test_files/read_list/2-bytes-per-edge.test @@ -21,7 +21,7 @@ -QUERY MATCH (a:person)-[r:knows]->(b:person) WHERE a.ID = 5000 RETURN ID(r) -ENUMERATE ---- 1 -10000 +1:10000 -NAME CrossProduct1 -QUERY MATCH (a:person), (b:person) RETURN COUNT(*) diff --git a/test/test_files/read_list/4-bytes-per-edge.test b/test/test_files/read_list/4-bytes-per-edge.test index cac450f9d7a..fa689fa4877 100644 --- a/test/test_files/read_list/4-bytes-per-edge.test +++ b/test/test_files/read_list/4-bytes-per-edge.test @@ -24,7 +24,7 @@ 0 -NAME EdgeID --QUERY MATCH (a:person)-[r:knows]->(b:person) RETURN COUNT(DISTINCT ID(r)) +-QUERY MATCH (a:person)-[r:knows]->(b:person) RETURN COUNT(DISTINCT(ID(r))) -ENUMERATE ---- 1 10001 @@ -42,4 +42,3 @@ -ENUMERATE ---- 1 5001 - diff --git a/test/test_files/tinysnb/agg/multi_label.test b/test/test_files/tinysnb/agg/multi_label.test index 0610c280d25..4b13ce27827 100644 --- a/test/test_files/tinysnb/agg/multi_label.test +++ b/test/test_files/tinysnb/agg/multi_label.test @@ -7,8 +7,8 @@ -NAME MultiLabelAggTest2 -QUERY MATCH (a:person)<-[e1:marries|:mixed|studyAt]-(b:person)-[e2:knows]->(c:person) WHERE b.ID = 7 RETURN ID(e1), COUNT(*), MIN(e2.date) ---- 2 -30|2|1905-12-12 -39|2|1905-12-12 +7:3|2|1905-12-12 +8:2|2|1905-12-12 -NAME MultiLabelAggTest3 -QUERY MATCH (a:person)-[e1:marries|:mixed|studyAt]->(b:person) RETURN e1.year, COUNT(*) diff --git a/test/test_files/tinysnb/match/multi_label.test b/test/test_files/tinysnb/match/multi_label.test index 21e80758880..e0376e785c9 100644 --- a/test/test_files/tinysnb/match/multi_label.test +++ b/test/test_files/tinysnb/match/multi_label.test @@ -44,16 +44,16 @@ -QUERY MATCH (a:person)-[e:mixed|:marries]->(b:person) RETURN a.ID, ID(e), e.note, e.year, b.ID -ENUMERATE ---- 10 -0|27||1930|2 -2|28||1945|5 -3|29||2088|7 -7|30||2066|3 -8|31||2120|3 -9|32||2022|3 -10|33||2020|2 -0|37|||2 -3|38|long long long string||5 -7|39|short str||8 +0|7:0||1930|2 +2|7:1||1945|5 +3|7:2||2088|7 +7|7:3||2066|3 +8|7:4||2120|3 +9|7:5||2022|3 +10|7:6||2020|2 +0|8:0|||2 +3|8:1|long long long string||5 +7|8:2|short str||8 -NAME MultiLabelOneHopTest5 -QUERY MATCH (a:person)-[e:mixed|:studyAt|:knows]->(b:person:organisation) RETURN COUNT(*) diff --git a/test/test_files/tinysnb/order_by/multi_label.test b/test/test_files/tinysnb/order_by/multi_label.test index 50d75c2c026..344da510319 100644 --- a/test/test_files/tinysnb/order_by/multi_label.test +++ b/test/test_files/tinysnb/order_by/multi_label.test @@ -12,14 +12,14 @@ 325 325 --NAME MultiLabelTest2 --QUERY MATCH (a:person)-[e:marries|:workAt|:mixed]->(b:person) return a.ID, b.ID order by ID(e) desc LIMIT 4 --ENUMERATE ----- 4 -7|8 -3|5 -0|2 -10|2 +#-NAME MultiLabelTest2 +#-QUERY MATCH (a:person)-[e:marries|:workAt|:mixed]->(b:person) return a.ID, b.ID order by ID(e) desc LIMIT 4 +#-ENUMERATE +#---- 4 +#7|8 +#3|5 +#0|2 +#10|2 -NAME MultiLabelTest3 -QUERY MATCH (a:person:organisation)-[:mixed]->(b:person:organisation) return b.name, b.fName ORDER BY b.ID diff --git a/test/test_files/tinysnb/projection/multi_label.test b/test/test_files/tinysnb/projection/multi_label.test index a03e9138b6f..ea58fc955b5 100644 --- a/test/test_files/tinysnb/projection/multi_label.test +++ b/test/test_files/tinysnb/projection/multi_label.test @@ -34,20 +34,20 @@ -QUERY MATCH (a:person)-[e:mixed|:marries]->(b:person) RETURN e -ENUMERATE ---- 10 -(0:0)-[{_id:27, year:1930, usedAddress:, note:}]->(0:1) -(0:0)-[{_id:37, year:, usedAddress:[toronto], note:}]->(0:1) -(0:1)-[{_id:28, year:1945, usedAddress:, note:}]->(0:3) -(0:2)-[{_id:29, year:2088, usedAddress:, note:}]->(0:4) -(0:2)-[{_id:38, year:, usedAddress:, note:long long long string}]->(0:3) -(0:4)-[{_id:30, year:2066, usedAddress:, note:}]->(0:2) -(0:4)-[{_id:39, year:, usedAddress:[vancouver], note:short str}]->(0:5) -(0:5)-[{_id:31, year:2120, usedAddress:, note:}]->(0:2) -(0:6)-[{_id:32, year:2022, usedAddress:, note:}]->(0:2) -(0:7)-[{_id:33, year:2020, usedAddress:, note:}]->(0:1) +(0:0)-[{_id:7:0, year:1930, usedAddress:, note:}]->(0:1) +(0:0)-[{_id:8:0, year:, usedAddress:[toronto], note:}]->(0:1) +(0:1)-[{_id:7:1, year:1945, usedAddress:, note:}]->(0:3) +(0:2)-[{_id:7:2, year:2088, usedAddress:, note:}]->(0:4) +(0:2)-[{_id:8:1, year:, usedAddress:, note:long long long string}]->(0:3) +(0:4)-[{_id:7:3, year:2066, usedAddress:, note:}]->(0:2) +(0:4)-[{_id:8:2, year:, usedAddress:[vancouver], note:short str}]->(0:5) +(0:5)-[{_id:7:4, year:2120, usedAddress:, note:}]->(0:2) +(0:6)-[{_id:7:5, year:2022, usedAddress:, note:}]->(0:2) +(0:7)-[{_id:7:6, year:2020, usedAddress:, note:}]->(0:1) -NAME MultiLabelReturnTest -QUERY MATCH (a:person:organisation)-[e:mixed|:studyAt]->(b:person:organisation) WHERE a.fName='Alice' RETURN e, b.fName, b.name -ENUMERATE ---- 2 -(0:0)-[{_id:14, year:2021, places:[wwAewsdndweusd,wek]}]->(1:0)||ABFsUni -(0:0)-[{_id:27, year:1930, places:}]->(0:1)|Bob| +(0:0)-[{_id:4:0, year:2021, places:[wwAewsdndweusd,wek]}]->(1:0)||ABFsUni +(0:0)-[{_id:7:0, year:1930, places:}]->(0:1)|Bob| diff --git a/test/test_files/tinysnb/projection/single_label.test b/test/test_files/tinysnb/projection/single_label.test index 2ee34c8d04c..9c95ef8d1ed 100644 --- a/test/test_files/tinysnb/projection/single_label.test +++ b/test/test_files/tinysnb/projection/single_label.test @@ -291,52 +291,52 @@ Dan|Carol -QUERY MATCH (a:person)-[r:knows]->(b:person) RETURN id(r) -ENUMERATE ---- 14 -0 -1 -10 -11 -12 -13 -2 -3 -4 -5 -6 -7 -8 -9 +3:0 +3:1 +3:10 +3:11 +3:12 +3:13 +3:2 +3:3 +3:4 +3:5 +3:6 +3:7 +3:8 +3:9 -NAME RelID2 -QUERY MATCH (a:person)-[r:studyAt]->(o:organisation) RETURN id(r) -ENUMERATE ---- 3 -14 -15 -16 +4:0 +4:1 +4:2 -NAME RelID3 -QUERY MATCH (a:person)-[r:workAt]->(o:organisation) RETURN id(r) -ENUMERATE ---- 3 -17 -18 -19 +5:0 +5:1 +5:2 -NAME QueryOneToOneRelTable -QUERY MATCH (:person)-[m:marries]->(:person) RETURN m ---- 3 -(0:0)-[{_id:37, usedAddress:[toronto], note:}]->(0:1) -(0:2)-[{_id:38, usedAddress:, note:long long long string}]->(0:3) -(0:4)-[{_id:39, usedAddress:[vancouver], note:short str}]->(0:5) +(0:0)-[{_id:8:0, usedAddress:[toronto], note:}]->(0:1) +(0:2)-[{_id:8:1, usedAddress:, note:long long long string}]->(0:3) +(0:4)-[{_id:8:2, usedAddress:[vancouver], note:short str}]->(0:5) -NAME OneHopMixedTest -QUERY MATCH (a:person)-[e:mixed]->(b:person) RETURN a.ID, e, b.ID, b.fName -ENUMERATE ---- 7 -0|(0:0)-[{_id:27, year:1930}]->(0:1)|2|Bob -10|(0:7)-[{_id:33, year:2020}]->(0:1)|2|Bob -2|(0:1)-[{_id:28, year:1945}]->(0:3)|5|Dan -3|(0:2)-[{_id:29, year:2088}]->(0:4)|7|Elizabeth -7|(0:4)-[{_id:30, year:2066}]->(0:2)|3|Carol -8|(0:5)-[{_id:31, year:2120}]->(0:2)|3|Carol -9|(0:6)-[{_id:32, year:2022}]->(0:2)|3|Carol +0|(0:0)-[{_id:7:0, year:1930}]->(0:1)|2|Bob +10|(0:7)-[{_id:7:6, year:2020}]->(0:1)|2|Bob +2|(0:1)-[{_id:7:1, year:1945}]->(0:3)|5|Dan +3|(0:2)-[{_id:7:2, year:2088}]->(0:4)|7|Elizabeth +7|(0:4)-[{_id:7:3, year:2066}]->(0:2)|3|Carol +8|(0:5)-[{_id:7:4, year:2120}]->(0:2)|3|Carol +9|(0:6)-[{_id:7:5, year:2022}]->(0:2)|3|Carol diff --git a/test/transaction/transaction_test.cpp b/test/transaction/transaction_test.cpp index b0f3aa162f7..af5c1cdeb13 100644 --- a/test/transaction/transaction_test.cpp +++ b/test/transaction/transaction_test.cpp @@ -32,7 +32,7 @@ class TransactionTests : public DBTest { .propertyID; dataChunk = make_shared(3); - nodeVector = make_shared(NODE_ID, getMemoryManager(*database)); + nodeVector = make_shared(INTERNAL_ID, getMemoryManager(*database)); dataChunk->insert(0, nodeVector); ((nodeID_t*)nodeVector->getData())[0].offset = 0; ((nodeID_t*)nodeVector->getData())[1].offset = 1; diff --git a/tools/python_api/src_cpp/py_query_result.cpp b/tools/python_api/src_cpp/py_query_result.cpp index 805d8d20ced..bab14ced331 100644 --- a/tools/python_api/src_cpp/py_query_result.cpp +++ b/tools/python_api/src_cpp/py_query_result.cpp @@ -122,7 +122,7 @@ py::object PyQueryResult::convertValueToPyObject(const Value& value) { dict["_dst"] = convertNodeIdToPyDict(relVal.getDstNodeID()); return move(dict); } - case NODE_ID: { + case INTERNAL_ID: { return convertNodeIdToPyDict(value.getValue()); } default: