diff --git a/CMakeLists.txt b/CMakeLists.txt index 8520f18aaa3..bac30420ab4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.15) -project(Kuzu VERSION 0.3.1.1 LANGUAGES CXX C) +project(Kuzu VERSION 0.3.1.2 LANGUAGES CXX C) find_package(Threads REQUIRED) diff --git a/src/catalog/catalog_entry/rel_table_catalog_entry.cpp b/src/catalog/catalog_entry/rel_table_catalog_entry.cpp index 82922f68708..0ea59ba5e59 100644 --- a/src/catalog/catalog_entry/rel_table_catalog_entry.cpp +++ b/src/catalog/catalog_entry/rel_table_catalog_entry.cpp @@ -2,12 +2,14 @@ #include "catalog/catalog.h" +using namespace kuzu::common; + namespace kuzu { namespace catalog { -RelTableCatalogEntry::RelTableCatalogEntry(std::string name, common::table_id_t tableID, +RelTableCatalogEntry::RelTableCatalogEntry(std::string name, table_id_t tableID, common::RelMultiplicity srcMultiplicity, common::RelMultiplicity dstMultiplicity, - common::table_id_t srcTableID, common::table_id_t dstTableID) + table_id_t srcTableID, table_id_t dstTableID) : TableCatalogEntry{CatalogEntryType::REL_TABLE_ENTRY, std::move(name), tableID}, srcMultiplicity{srcMultiplicity}, dstMultiplicity{dstMultiplicity}, srcTableID{srcTableID}, dstTableID{dstTableID} {} @@ -20,27 +22,32 @@ RelTableCatalogEntry::RelTableCatalogEntry(const RelTableCatalogEntry& other) dstTableID = other.dstTableID; } -bool RelTableCatalogEntry::isParent(common::table_id_t tableID) { +bool RelTableCatalogEntry::isParent(table_id_t tableID) { return srcTableID == tableID || dstTableID == tableID; } -bool RelTableCatalogEntry::isSingleMultiplicity(common::RelDataDirection direction) const { +column_id_t RelTableCatalogEntry::getColumnID(property_id_t propertyID) const { + auto it = std::find_if(properties.begin(), properties.end(), + [&propertyID](const auto& property) { return property.getPropertyID() == propertyID; }); + // Skip the first column in the rel table, which is reserved for nbrID. + return it == properties.end() ? common::INVALID_COLUMN_ID : + std::distance(properties.begin(), it) + 1; +} + +bool RelTableCatalogEntry::isSingleMultiplicity(RelDataDirection direction) const { return getMultiplicity(direction) == common::RelMultiplicity::ONE; } -common::RelMultiplicity RelTableCatalogEntry::getMultiplicity( - common::RelDataDirection direction) const { - return direction == common::RelDataDirection::FWD ? dstMultiplicity : srcMultiplicity; +common::RelMultiplicity RelTableCatalogEntry::getMultiplicity(RelDataDirection direction) const { + return direction == RelDataDirection::FWD ? dstMultiplicity : srcMultiplicity; } -common::table_id_t RelTableCatalogEntry::getBoundTableID( - common::RelDataDirection relDirection) const { - return relDirection == common::RelDataDirection::FWD ? srcTableID : dstTableID; +table_id_t RelTableCatalogEntry::getBoundTableID(RelDataDirection relDirection) const { + return relDirection == RelDataDirection::FWD ? srcTableID : dstTableID; } -common::table_id_t RelTableCatalogEntry::getNbrTableID( - common::RelDataDirection relDirection) const { - return relDirection == common::RelDataDirection::FWD ? dstTableID : srcTableID; +table_id_t RelTableCatalogEntry::getNbrTableID(RelDataDirection relDirection) const { + return relDirection == RelDataDirection::FWD ? dstTableID : srcTableID; } -void RelTableCatalogEntry::serialize(common::Serializer& serializer) const { +void RelTableCatalogEntry::serialize(Serializer& serializer) const { TableCatalogEntry::serialize(serializer); serializer.write(srcMultiplicity); serializer.write(dstMultiplicity); @@ -49,11 +56,11 @@ void RelTableCatalogEntry::serialize(common::Serializer& serializer) const { } std::unique_ptr RelTableCatalogEntry::deserialize( - common::Deserializer& deserializer) { + Deserializer& deserializer) { common::RelMultiplicity srcMultiplicity; common::RelMultiplicity dstMultiplicity; - common::table_id_t srcTableID; - common::table_id_t dstTableID; + table_id_t srcTableID; + table_id_t dstTableID; deserializer.deserializeValue(srcMultiplicity); deserializer.deserializeValue(dstMultiplicity); deserializer.deserializeValue(srcTableID); diff --git a/src/common/data_chunk/data_chunk_collection.cpp b/src/common/data_chunk/data_chunk_collection.cpp index c0a142d8dd8..510e9020c54 100644 --- a/src/common/data_chunk/data_chunk_collection.cpp +++ b/src/common/data_chunk/data_chunk_collection.cpp @@ -7,35 +7,35 @@ DataChunkCollection::DataChunkCollection(storage::MemoryManager* mm) : mm{mm} {} void DataChunkCollection::append(DataChunk& chunk) { auto numTuplesToAppend = chunk.state->selVector->selectedSize; - auto chunkToAppendInfo = chunks.empty() ? allocateChunk(chunk) : chunks.back().get(); auto numTuplesAppended = 0u; while (numTuplesAppended < numTuplesToAppend) { - if (chunkToAppendInfo->state->selVector->selectedSize == DEFAULT_VECTOR_CAPACITY) { - chunkToAppendInfo = allocateChunk(chunk); + if (chunks.empty() || + chunks.back().state->selVector->selectedSize == DEFAULT_VECTOR_CAPACITY) { + allocateChunk(chunk); } + auto& chunkToAppend = chunks.back(); auto numTuplesToCopy = std::min(numTuplesToAppend - numTuplesAppended, - DEFAULT_VECTOR_CAPACITY - chunkToAppendInfo->state->selVector->selectedSize); + DEFAULT_VECTOR_CAPACITY - chunkToAppend.state->selVector->selectedSize); for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) { for (auto i = 0u; i < numTuplesToCopy; i++) { auto srcPos = chunk.state->selVector->selectedPositions[numTuplesAppended + i]; - auto dstPos = chunkToAppendInfo->state->selVector->selectedSize + i; - chunkToAppendInfo->getValueVector(vectorIdx)->copyFromVectorData( + auto dstPos = chunkToAppend.state->selVector->selectedSize + i; + chunkToAppend.getValueVector(vectorIdx)->copyFromVectorData( dstPos, chunk.getValueVector(vectorIdx).get(), srcPos); } } - chunkToAppendInfo->state->selVector->selectedSize += numTuplesToCopy; + chunkToAppend.state->selVector->selectedSize += numTuplesToCopy; numTuplesAppended += numTuplesToCopy; } } -void DataChunkCollection::append(std::unique_ptr chunk) { - KU_ASSERT(chunk); +void DataChunkCollection::merge(DataChunk chunk) { if (chunks.empty()) { - initTypes(*chunk); + initTypes(chunk); } - KU_ASSERT(chunk->getNumValueVectors() == types.size()); - for (auto vectorIdx = 0u; vectorIdx < chunk->getNumValueVectors(); vectorIdx++) { - KU_ASSERT(chunk->getValueVector(vectorIdx)->dataType == types[vectorIdx]); + KU_ASSERT(chunk.getNumValueVectors() == types.size()); + for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) { + KU_ASSERT(chunk.getValueVector(vectorIdx)->dataType == types[vectorIdx]); } chunks.push_back(std::move(chunk)); } @@ -47,28 +47,19 @@ void DataChunkCollection::initTypes(DataChunk& chunk) { } } -std::vector DataChunkCollection::getChunks() const { - std::vector ret; - ret.reserve(chunks.size()); - for (auto& chunk : chunks) { - ret.push_back(chunk.get()); - } - return ret; -} - -DataChunk* DataChunkCollection::allocateChunk(DataChunk& chunk) { +DataChunk& DataChunkCollection::allocateChunk(DataChunk& chunk) { if (chunks.empty()) { types.reserve(chunk.getNumValueVectors()); for (auto vectorIdx = 0u; vectorIdx < chunk.getNumValueVectors(); vectorIdx++) { types.push_back(chunk.getValueVector(vectorIdx)->dataType); } } - auto newChunk = std::make_unique(types.size(), std::make_shared()); + DataChunk newChunk(types.size(), std::make_shared()); for (auto i = 0u; i < types.size(); i++) { - newChunk->insert(i, std::make_shared(types[i], mm)); + newChunk.insert(i, std::make_shared(types[i], mm)); } chunks.push_back(std::move(newChunk)); - return chunks.back().get(); + return chunks.back(); } } // namespace common diff --git a/src/common/types/types.cpp b/src/common/types/types.cpp index 5931e80da4b..c907a711201 100644 --- a/src/common/types/types.cpp +++ b/src/common/types/types.cpp @@ -443,6 +443,15 @@ std::vector> LogicalType::copy( return typesCopy; } +std::vector LogicalType::copy(const std::vector& types) { + std::vector typesCopy; + typesCopy.reserve(types.size()); + for (auto& type : types) { + typesCopy.push_back(*type->copy()); + } + return typesCopy; +} + PhysicalTypeID LogicalType::getPhysicalType(LogicalTypeID typeID) { switch (typeID) { case LogicalTypeID::ANY: { diff --git a/src/function/table/call/storage_info.cpp b/src/function/table/call/storage_info.cpp index 6cdd3026345..3a66c378ad8 100644 --- a/src/function/table/call/storage_info.cpp +++ b/src/function/table/call/storage_info.cpp @@ -48,8 +48,6 @@ struct StorageInfoSharedState final : public CallFuncSharedState { columns.push_back(relTable->getCSRLengthColumn(RelDataDirection::FWD)); columns.push_back(relTable->getCSROffsetColumn(RelDataDirection::BWD)); columns.push_back(relTable->getCSRLengthColumn(RelDataDirection::BWD)); - columns.push_back(relTable->getAdjColumn(RelDataDirection::FWD)); - columns.push_back(relTable->getAdjColumn(RelDataDirection::BWD)); for (auto columnID = 0u; columnID < relTable->getNumColumns(); columnID++) { auto column = relTable->getColumn(columnID, RelDataDirection::FWD); auto collectedColumns = collectColumns(column); @@ -167,10 +165,10 @@ static common::offset_t tableFunc(TableFuncInput& input, TableFuncOutput& output while (true) { if (localState->currChunkIdx < localState->dataChunkCollection->getNumChunks()) { // Copy from local state chunk. - auto chunk = localState->dataChunkCollection->getChunk(localState->currChunkIdx); - auto numValuesToOutput = chunk->state->selVector->selectedSize; + auto& chunk = localState->dataChunkCollection->getChunkUnSafe(localState->currChunkIdx); + auto numValuesToOutput = chunk.state->selVector->selectedSize; for (auto columnIdx = 0u; columnIdx < dataChunk.getNumValueVectors(); columnIdx++) { - auto localVector = chunk->getValueVector(columnIdx); + auto localVector = chunk.getValueVector(columnIdx); auto outputVector = dataChunk.getValueVector(columnIdx); for (auto i = 0u; i < numValuesToOutput; i++) { outputVector->copyFromVectorData(i, localVector.get(), i); diff --git a/src/include/catalog/catalog_entry/rel_table_catalog_entry.h b/src/include/catalog/catalog_entry/rel_table_catalog_entry.h index 5fedd1dc5bd..7fa776b4bc7 100644 --- a/src/include/catalog/catalog_entry/rel_table_catalog_entry.h +++ b/src/include/catalog/catalog_entry/rel_table_catalog_entry.h @@ -23,6 +23,7 @@ class RelTableCatalogEntry final : public TableCatalogEntry { //===--------------------------------------------------------------------===// bool isParent(common::table_id_t tableID) override; common::TableType getTableType() const override { return common::TableType::REL; } + common::column_id_t getColumnID(common::property_id_t propertyID) const override; common::table_id_t getSrcTableID() const { return srcTableID; } common::table_id_t getDstTableID() const { return dstTableID; } bool isSingleMultiplicity(common::RelDataDirection direction) const; diff --git a/src/include/catalog/catalog_entry/table_catalog_entry.h b/src/include/catalog/catalog_entry/table_catalog_entry.h index e3c0ba6d283..93990ea4d6f 100644 --- a/src/include/catalog/catalog_entry/table_catalog_entry.h +++ b/src/include/catalog/catalog_entry/table_catalog_entry.h @@ -39,7 +39,7 @@ class TableCatalogEntry : public CatalogEntry { bool containProperty(const std::string& propertyName) const; common::property_id_t getPropertyID(const std::string& propertyName) const; const Property* getProperty(common::property_id_t propertyID) const; - common::column_id_t getColumnID(common::property_id_t propertyID) const; + virtual common::column_id_t getColumnID(common::property_id_t propertyID) const; bool containPropertyType(const common::LogicalType& logicalType) const; void addProperty(std::string propertyName, std::unique_ptr dataType); void dropProperty(common::property_id_t propertyID); @@ -52,7 +52,7 @@ class TableCatalogEntry : public CatalogEntry { static std::unique_ptr deserialize( common::Deserializer& deserializer, CatalogEntryType type); -private: +protected: common::table_id_t tableID; std::string comment; common::property_id_t nextPID; diff --git a/src/include/common/column_data_format.h b/src/include/common/column_data_format.h index 5f4b3ad8803..ebaa367658c 100644 --- a/src/include/common/column_data_format.h +++ b/src/include/common/column_data_format.h @@ -7,5 +7,5 @@ namespace common { enum class ColumnDataFormat : uint8_t { REGULAR = 0, CSR = 1 }; -} +} // namespace common } // namespace kuzu diff --git a/src/include/common/data_chunk/data_chunk_collection.h b/src/include/common/data_chunk/data_chunk_collection.h index e6b0aa89e9f..cb3ce1bc597 100644 --- a/src/include/common/data_chunk/data_chunk_collection.h +++ b/src/include/common/data_chunk/data_chunk_collection.h @@ -5,36 +5,39 @@ namespace kuzu { namespace common { -// TODO(Guodong/Ziyi): We should extend this to ColumnDataCollection, which takes ResultSet into -// consideration for storage and scan. +// TODO(Guodong): Should rework this to use ColumnChunk. class DataChunkCollection { public: explicit DataChunkCollection(storage::MemoryManager* mm); void append(DataChunk& chunk); - void append(std::unique_ptr chunk); - std::vector getChunks() const; - + inline const std::vector& getChunks() const { return chunks; } + inline std::vector& getChunksUnSafe() { return chunks; } inline uint64_t getNumChunks() const { return chunks.size(); } - inline DataChunk* getChunk(uint64_t idx) const { + inline const DataChunk& getChunk(uint64_t idx) const { + KU_ASSERT(idx < chunks.size()); + return chunks[idx]; + } + inline DataChunk& getChunkUnSafe(uint64_t idx) { KU_ASSERT(idx < chunks.size()); - return chunks[idx].get(); + return chunks[idx]; } inline void merge(DataChunkCollection* other) { for (auto& chunk : other->chunks) { - append(std::move(chunk)); + merge(std::move(chunk)); } } + void merge(DataChunk chunk); private: - DataChunk* allocateChunk(DataChunk& chunk); + DataChunk& allocateChunk(DataChunk& chunk); void initTypes(DataChunk& chunk); private: storage::MemoryManager* mm; std::vector types; - std::vector> chunks; + std::vector chunks; }; } // namespace common diff --git a/src/include/common/types/types.h b/src/include/common/types/types.h index a0b90450d98..2d02312cbdb 100644 --- a/src/include/common/types/types.h +++ b/src/include/common/types/types.h @@ -301,6 +301,7 @@ class LogicalType { static std::vector> copy( const std::vector>& types); + static std::vector copy(const std::vector& types); static std::unique_ptr ANY() { return std::make_unique(LogicalTypeID::ANY); diff --git a/src/include/processor/operator/partitioner.h b/src/include/processor/operator/partitioner.h index 65800727cc6..0f1e98a711f 100644 --- a/src/include/processor/operator/partitioner.h +++ b/src/include/processor/operator/partitioner.h @@ -1,7 +1,8 @@ #pragma once #include "common/data_chunk/data_chunk_collection.h" -#include "processor/operator/sink.h" +#include "processor/data_pos.h" +#include "sink.h" namespace kuzu { namespace storage { @@ -105,10 +106,13 @@ class Partitioner : public Sink { std::vector numPartitions, storage::MemoryManager* mm); private: + common::DataChunk constructDataChunk(const std::vector& columnPositions, + const common::logical_types_t& columnTypes, const ResultSet& resultSet, + const std::shared_ptr& state); // TODO: For now, RelBatchInsert will guarantee all data are inside one data chunk. Should be // generalized to resultSet later if needed. void copyDataToPartitions( - common::partition_idx_t partitioningIdx, common::DataChunk* chunkToCopyFrom); + common::partition_idx_t partitioningIdx, common::DataChunk chunkToCopyFrom); private: std::vector> infos; diff --git a/src/include/storage/local_storage/local_node_table.h b/src/include/storage/local_storage/local_node_table.h index 8672e313c31..42343b40ec0 100644 --- a/src/include/storage/local_storage/local_node_table.h +++ b/src/include/storage/local_storage/local_node_table.h @@ -11,35 +11,26 @@ class LocalNodeNG final : public LocalNodeGroup { public: LocalNodeNG(common::offset_t nodeGroupStartOffset, const std::vector& dataTypes, MemoryManager* mm) - : LocalNodeGroup{nodeGroupStartOffset, dataTypes, mm} { - insertInfo.resize(dataTypes.size()); - updateInfo.resize(dataTypes.size()); - } + : LocalNodeGroup{nodeGroupStartOffset, dataTypes, mm} {} void scan(common::ValueVector* nodeIDVector, const std::vector& columnIDs, const std::vector& outputVectors); void lookup(common::offset_t nodeOffset, common::column_id_t columnID, common::ValueVector* outputVector, common::sel_t posInOutputVector); - void insert(common::ValueVector* nodeIDVector, - const std::vector& propertyVectors); - void update(common::ValueVector* nodeIDVector, common::column_id_t columnID, - common::ValueVector* propertyVector); - void delete_(common::ValueVector* nodeIDVector); - common::row_idx_t getRowIdx(common::column_id_t columnID, common::offset_t nodeOffset); + bool insert(std::vector nodeIDVectors, + std::vector propertyVectors) override; + bool update(std::vector nodeIDVectors, common::column_id_t columnID, + common::ValueVector* propertyVector) override; + bool delete_( + common::ValueVector* nodeIDVector, common::ValueVector* /*extraVector*/ = nullptr) override; - inline const offset_to_row_idx_t& getInsertInfoRef(common::column_id_t columnID) { - KU_ASSERT(columnID < insertInfo.size()); - return insertInfo[columnID]; + inline const offset_to_row_idx_t& getInsertInfoRef() { + return insertChunks.getOffsetToRowIdx(); } inline const offset_to_row_idx_t& getUpdateInfoRef(common::column_id_t columnID) { - KU_ASSERT(columnID < updateInfo.size()); - return updateInfo[columnID]; + return getUpdateChunks(columnID).getOffsetToRowIdx(); } - -private: - std::vector insertInfo; - std::vector updateInfo; }; class LocalNodeTableData final : public LocalTableData { @@ -52,11 +43,6 @@ class LocalNodeTableData final : public LocalTableData { void lookup(common::ValueVector* nodeIDVector, const std::vector& columnIDs, const std::vector& outputVectors); - void insert(common::ValueVector* nodeIDVector, - const std::vector& propertyVectors); - void update(common::ValueVector* nodeIDVector, common::column_id_t columnID, - common::ValueVector* propertyVector); - void delete_(common::ValueVector* nodeIDVector); private: LocalNodeGroup* getOrCreateLocalNodeGroup(common::ValueVector* nodeIDVector) override; diff --git a/src/include/storage/local_storage/local_rel_table.h b/src/include/storage/local_storage/local_rel_table.h index 553b083ea8c..96c49be7c0f 100644 --- a/src/include/storage/local_storage/local_rel_table.h +++ b/src/include/storage/local_storage/local_rel_table.h @@ -7,89 +7,46 @@ namespace kuzu { namespace storage { -static constexpr common::column_id_t REL_ID_COLUMN_ID = 0; - -// Info of node groups with CSR chunks for rel tables. -// Note that srcNodeOffset here are the relative offset within each node group. -struct RelNGInfo { - update_insert_info_t adjInsertInfo; - std::vector insertInfoPerChunk; - std::vector updateInfoPerChunk; - delete_info_t deleteInfo; - common::RelMultiplicity multiplicity; - - RelNGInfo(common::RelMultiplicity multiplicity, common::column_id_t numChunks) - : multiplicity{multiplicity} { - insertInfoPerChunk.resize(numChunks); - updateInfoPerChunk.resize(numChunks); - } - - bool insert(common::offset_t srcOffsetInChunk, common::offset_t relOffset, - common::row_idx_t adjNodeRowIdx, const std::vector& propertyNodesRowIdx); - void update(common::offset_t srcOffsetInChunk, common::offset_t relOffset, - common::column_id_t columnID, common::row_idx_t rowIdx); - bool delete_(common::offset_t srcOffsetInChunk, common::offset_t relOffset); - - bool hasUpdates(); - - uint64_t getNumInsertedTuples(common::offset_t srcOffsetInChunk); - - const update_insert_info_t& getUpdateInfo(common::column_id_t columnID) { - KU_ASSERT(columnID == common::INVALID_COLUMN_ID || columnID < updateInfoPerChunk.size()); - return columnID == common::INVALID_COLUMN_ID ? getEmptyInfo() : - updateInfoPerChunk[columnID]; - } - const update_insert_info_t& getInsertInfo(common::column_id_t columnID) { - KU_ASSERT(columnID == common::INVALID_COLUMN_ID || columnID < insertInfoPerChunk.size()); - return columnID == common::INVALID_COLUMN_ID ? adjInsertInfo : insertInfoPerChunk[columnID]; - } - const delete_info_t& getDeleteInfo() const { return deleteInfo; } - - const update_insert_info_t& getEmptyInfo(); - -private: - inline static bool contains( - const std::unordered_set& set, common::offset_t value) { - return set.find(value) != set.end(); - } -}; +static constexpr common::column_id_t LOCAL_NBR_ID_COLUMN_ID = 0; +static constexpr common::column_id_t LOCAL_REL_ID_COLUMN_ID = 1; class LocalRelNG final : public LocalNodeGroup { + friend class RelTableData; + public: LocalRelNG(common::offset_t nodeGroupStartOffset, std::vector dataTypes, MemoryManager* mm, common::RelMultiplicity multiplicity); - common::row_idx_t scanCSR(common::offset_t srcOffsetInChunk, - common::offset_t posToReadForOffset, const std::vector& columnIDs, + common::row_idx_t scanCSR(common::offset_t srcOffset, common::offset_t posToReadForOffset, + const std::vector& columnIDs, const std::vector& outputVector); // For CSR, we need to apply updates and deletions here, while insertions are handled by // `scanCSR`. - void applyLocalChangesForCSRColumns(common::offset_t srcOffsetInChunk, + void applyLocalChangesToScannedVectors(common::offset_t srcOffset, const std::vector& columnIDs, common::ValueVector* relIDVector, const std::vector& outputVectors); - bool insert(common::ValueVector* srcNodeIDVector, common::ValueVector* dstNodeIDVector, - const std::vector& propertyVectors); - void update(common::ValueVector* srcNodeIDVector, common::ValueVector* relIDVector, - common::column_id_t columnID, common::ValueVector* propertyVector); - bool delete_(common::ValueVector* srcNodeIDVector, common::ValueVector* relIDVector); + bool insert(std::vector nodeIDVectors, + std::vector vectors) override; + bool update(std::vector nodeIDVectors, common::column_id_t columnID, + common::ValueVector* propertyVector) override; + bool delete_(common::ValueVector* srcNodeVector, common::ValueVector* relIDVector) override; - inline LocalVectorCollection* getAdjChunk() { return adjChunk.get(); } - inline LocalVectorCollection* getPropertyChunk(common::column_id_t columnID) { - KU_ASSERT(columnID < chunks.size()); - return chunks[columnID].get(); - } - inline RelNGInfo* getRelNGInfo() { return relNGInfo.get(); } + common::offset_t getNumInsertedRels(common::offset_t srcOffset) const; + void getChangesPerCSRSegment( + std::vector& sizeChangesPerSegment, std::vector& hasChangesPerSegment); private: - void applyCSRUpdates(common::offset_t srcOffsetInChunk, common::column_id_t columnID, - common::ValueVector* relIDVector, common::ValueVector* outputVector); - void applyCSRDeletions(common::offset_t srcOffsetInChunk, const delete_info_t& deleteInfo, - common::ValueVector* relIDVector); + static common::vector_idx_t getSegmentIdx(common::offset_t offset) { + return offset >> common::StorageConstants::CSR_SEGMENT_SIZE_LOG2; + } + + void applyCSRUpdates(common::column_id_t columnID, common::ValueVector* relIDVector, + common::ValueVector* outputVector); + void applyCSRDeletions(common::offset_t srcOffsetInChunk, common::ValueVector* relIDVector); private: - std::unique_ptr adjChunk; - std::unique_ptr relNGInfo; + common::RelMultiplicity multiplicity; }; class LocalRelTableData final : public LocalTableData { @@ -100,13 +57,6 @@ class LocalRelTableData final : public LocalTableData { std::vector dataTypes, MemoryManager* mm) : LocalTableData{std::move(dataTypes), mm}, multiplicity{multiplicity} {} - bool insert(common::ValueVector* srcNodeIDVector, common::ValueVector* dstNodeIDVector, - const std::vector& propertyVectors); - void update(common::ValueVector* srcNodeIDVector, common::ValueVector* relIDVector, - common::column_id_t columnID, common::ValueVector* propertyVector); - bool delete_(common::ValueVector* srcNodeIDVector, common::ValueVector* dstNodeIDVector, - common::ValueVector* relIDVector); - private: LocalNodeGroup* getOrCreateLocalNodeGroup(common::ValueVector* nodeIDVector) override; diff --git a/src/include/storage/local_storage/local_table.h b/src/include/storage/local_storage/local_table.h index cb0f26a6150..3e471d417b4 100644 --- a/src/include/storage/local_storage/local_table.h +++ b/src/include/storage/local_storage/local_table.h @@ -1,95 +1,177 @@ #pragma once -#include +#include +#include "common/data_chunk/data_chunk_collection.h" #include "common/enums/rel_multiplicity.h" #include "common/enums/table_type.h" #include "common/vector/value_vector.h" namespace kuzu { -namespace catalog { -class TableCatalogEntry; -} // namespace catalog namespace storage { -class TableData; -using offset_to_row_idx_t = std::map; +using offset_to_row_idx_t = std::unordered_map; +using offset_to_row_idx_vec_t = + std::unordered_map>; using offset_set_t = std::unordered_set; -using update_insert_info_t = std::map; -using delete_info_t = std::map>; -// TODO(Guodong): Instead of using ValueVector, we should switch to ColumnChunk. -// This class is used to store a chunk of local changes to a column in a node group. -// Values are stored inside `vector`. -class LocalVector { -public: - LocalVector(const common::LogicalType& dataType, MemoryManager* mm) : numValues{0} { - vector = std::make_unique(dataType, mm); - vector->setState(std::make_shared()); - vector->state->selVector->resetSelectorToValuePosBufferWithSize(1); - } +static constexpr common::column_id_t NBR_ID_COLUMN_ID = 0; +static constexpr common::column_id_t REL_ID_COLUMN_ID = 1; - void read(common::sel_t offsetInLocalVector, common::ValueVector* resultVector, - common::sel_t offsetInResultVector); - void append(common::ValueVector* valueVector); +struct LocalVectorCollection { + std::vector vectors; - inline common::ValueVector* getVector() { return vector.get(); } - inline bool isFull() const { return numValues == common::DEFAULT_VECTOR_CAPACITY; } + static LocalVectorCollection empty() { return LocalVectorCollection{}; } -private: - std::unique_ptr vector; - common::sel_t numValues; + inline bool isEmpty() const { return vectors.empty(); } + inline void appendVector(common::ValueVector* vector) { vectors.push_back(vector); } + inline common::ValueVector* getLocalVector(common::row_idx_t rowIdx) const { + auto vectorIdx = rowIdx >> common::DEFAULT_VECTOR_CAPACITY_LOG_2; + KU_ASSERT(vectorIdx < vectors.size()); + return vectors[vectorIdx]; + } + + LocalVectorCollection getStructChildVectorCollection(common::struct_field_idx_t idx) const; }; -// This class is used to store local changes of a column in a node group. -// It consists of a collection of LocalVector, each of which is a chunk of the local changes. -// By default, the size of each vector (chunk) is DEFAULT_VECTOR_CAPACITY, and the collection -// contains 64 vectors (chunks). -class LocalVectorCollection { +class LocalDataChunkCollection { public: - LocalVectorCollection(std::unique_ptr dataType, MemoryManager* mm) - : dataType{std::move(dataType)}, mm{mm}, numRows{0} {} + LocalDataChunkCollection(MemoryManager* mm, std::vector dataTypes) + : dataChunkCollection{mm}, mm{mm}, dataTypes{std::move(dataTypes)}, numRows{0} {} - void read(common::row_idx_t rowIdx, common::ValueVector* outputVector, - common::sel_t posInOutputVector); - inline uint64_t getNumRows() const { return numRows; } - inline LocalVector* getLocalVector(common::row_idx_t rowIdx) { - auto vectorIdx = rowIdx >> common::DEFAULT_VECTOR_CAPACITY_LOG_2; - KU_ASSERT(vectorIdx < vectors.size()); - return vectors[vectorIdx].get(); + inline common::row_idx_t getRowIdxFromOffset(common::offset_t offset) { + KU_ASSERT(offsetToRowIdx.contains(offset)); + return offsetToRowIdx.at(offset); + } + inline std::vector& getRelOffsetsFromSrcOffset(common::offset_t srcOffset) { + KU_ASSERT(srcNodeOffsetToRelOffsets.contains(srcOffset)); + return srcNodeOffsetToRelOffsets.at(srcOffset); } + inline bool hasOffset(common::offset_t offset) const { return offsetToRowIdx.contains(offset); } + inline bool hasRelOffsetsFromSrcOffset(common::offset_t srcOffset) const { + return srcNodeOffsetToRelOffsets.contains(srcOffset); + } + inline uint64_t getNumRelsFromSrcOffset(common::offset_t srcOffset) const { + return srcNodeOffsetToRelOffsets.at(srcOffset).size(); + } + inline const offset_to_row_idx_vec_t& getSrcNodeOffsetToRelOffsets() const { + return srcNodeOffsetToRelOffsets; + } + inline const offset_to_row_idx_t& getOffsetToRowIdx() const { return offsetToRowIdx; } - std::unique_ptr getStructChildVectorCollection( - common::struct_field_idx_t idx); + bool isEmpty() const { return offsetToRowIdx.empty() && srcNodeOffsetToRelOffsets.empty(); } + void readValueAtRowIdx(common::row_idx_t rowIdx, common::column_id_t columnID, + common::ValueVector* outputVector, common::sel_t posInOutputVector); + bool read(common::offset_t offset, common::column_id_t columnID, + common::ValueVector* outputVector, common::sel_t posInOutputVector); - // TODO(Guodong): Change this interface to take an extra `SelVector` or `DataChunkState`. - common::row_idx_t append(common::ValueVector* vector); + inline void append(common::offset_t offset, std::vector vectors) { + offsetToRowIdx[offset] = appendToDataChunkCollection(vectors); + } + // Only used for rel tables. Should be moved out later. + inline void append(common::offset_t nodeOffset, common::offset_t relOffset, + std::vector vectors) { + append(relOffset, vectors); + srcNodeOffsetToRelOffsets[nodeOffset].push_back(relOffset); + } + void update( + common::offset_t offset, common::column_id_t columnID, common::ValueVector* propertyVector); + void remove(common::offset_t offset) { + if (offsetToRowIdx.contains(offset)) { + offsetToRowIdx.erase(offset); + } + } + // Only used for rel tables. Should be moved out later. + void remove(common::offset_t srcNodeOffset, common::offset_t relOffset); + + inline LocalVectorCollection getLocalChunk(common::column_id_t columnID) { + LocalVectorCollection localVectorCollection; + for (auto& chunk : dataChunkCollection.getChunksUnSafe()) { + localVectorCollection.appendVector(chunk.getValueVector(columnID).get()); + } + return localVectorCollection; + } private: - void prepareAppend(); + common::row_idx_t appendToDataChunkCollection(std::vector vectors); + common::DataChunk createNewDataChunk(); private: - std::unique_ptr dataType; - MemoryManager* mm; - std::vector> vectors; + common::DataChunkCollection dataChunkCollection; + // The offset here can either be nodeOffset ( for node table) or relOffset (for rel table). + offset_to_row_idx_t offsetToRowIdx; + storage::MemoryManager* mm; + std::vector dataTypes; common::row_idx_t numRows; + + // Only used for rel tables. Should be moved out later. + offset_to_row_idx_vec_t srcNodeOffsetToRelOffsets; }; -class LocalNodeGroup { - friend class NodeTableData; +class LocalDeletionInfo { +public: + bool isEmpty() const { return deletedOffsets.empty() && srcNodeOffsetToRelOffsetVec.empty(); } + bool isEmpty(common::offset_t srcOffset) const { + return !srcNodeOffsetToRelOffsetVec.contains(srcOffset) || + srcNodeOffsetToRelOffsetVec.at(srcOffset).empty(); + } + bool containsOffset(common::offset_t offset) { return deletedOffsets.contains(offset); } + bool deleteOffset(common::offset_t offset) { + if (deletedOffsets.contains(offset)) { + return false; + } + deletedOffsets.insert(offset); + return true; + } + // For rel tables only. + void deleteRelAux(common::offset_t srcNodeOffset, common::offset_t relOffset) { + srcNodeOffsetToRelOffsetVec[srcNodeOffset].push_back(relOffset); + } + const offset_to_row_idx_vec_t& getSrcNodeOffsetToRelOffsetVec() const { + return srcNodeOffsetToRelOffsetVec; + } + uint64_t getNumDeletedRelsFromSrcOffset(common::offset_t srcOffset) const { + return srcNodeOffsetToRelOffsetVec.contains(srcOffset) ? + srcNodeOffsetToRelOffsetVec.at(srcOffset).size() : + 0; + } + +private: + // The offset here can either be nodeOffset ( for node table) or relOffset (for rel table). + offset_set_t deletedOffsets; + + // Only used for rel tables. Should be moved out later. + offset_to_row_idx_vec_t srcNodeOffsetToRelOffsetVec; +}; + +class LocalNodeGroup { public: LocalNodeGroup(common::offset_t nodeGroupStartOffset, std::vector dataTypes, MemoryManager* mm); virtual ~LocalNodeGroup() = default; - inline LocalVectorCollection* getLocalColumnChunk(common::column_id_t columnID) { - return chunks[columnID].get(); + virtual bool insert(std::vector nodeIDVectors, + std::vector propertyVectors) = 0; + virtual bool update(std::vector nodeIDVectors, + common::column_id_t columnID, common::ValueVector* propertyVector) = 0; + virtual bool delete_(common::ValueVector* IDVector, common::ValueVector* extraVector) = 0; + + LocalDataChunkCollection& getUpdateChunks(common::column_id_t columnID) { + KU_ASSERT(columnID < updateChunks.size()); + return updateChunks[columnID]; } + LocalDataChunkCollection& getInsesrtChunks() { return insertChunks; } + + bool hasUpdatesOrDeletions() const; protected: common::offset_t nodeGroupStartOffset; - std::vector> chunks; + storage::MemoryManager* mm; + + LocalDataChunkCollection insertChunks; + LocalDeletionInfo deleteInfo; + std::vector updateChunks; }; class LocalTableData { @@ -102,12 +184,19 @@ class LocalTableData { inline void clear() { nodeGroups.clear(); } + bool insert(std::vector nodeIDVectors, + std::vector propertyVectors); + bool update(std::vector nodeIDVectors, common::column_id_t columnID, + common::ValueVector* propertyVector); + bool delete_(common::ValueVector* nodeIDVector, common::ValueVector* extraVector = nullptr); + protected: virtual LocalNodeGroup* getOrCreateLocalNodeGroup(common::ValueVector* nodeIDVector) = 0; protected: std::vector dataTypes; MemoryManager* mm; + std::unordered_map> nodeGroups; }; diff --git a/src/include/storage/stats/rel_table_statistics.h b/src/include/storage/stats/rel_table_statistics.h index 2a9348630e3..7249688006b 100644 --- a/src/include/storage/stats/rel_table_statistics.h +++ b/src/include/storage/stats/rel_table_statistics.h @@ -30,12 +30,12 @@ class RelTableStats : public TableStatistics { inline void addMetadataDAHInfoForColumn( std::unique_ptr metadataDAHInfo, common::RelDataDirection direction) { - auto& metadataDAHInfos = getDirectedPropertyMetadataDAHInfosRef(direction); + auto& metadataDAHInfos = getDirectedMetadataDAHInfosRef(direction); metadataDAHInfos.push_back(std::move(metadataDAHInfo)); } inline void removeMetadataDAHInfoForColumn( common::column_id_t columnID, common::RelDataDirection direction) { - auto& metadataDAHInfos = getDirectedPropertyMetadataDAHInfosRef(direction); + auto& metadataDAHInfos = getDirectedMetadataDAHInfosRef(direction); KU_ASSERT(columnID < metadataDAHInfos.size()); metadataDAHInfos.erase(metadataDAHInfos.begin() + columnID); } @@ -47,13 +47,9 @@ class RelTableStats : public TableStatistics { return direction == common::RelDataDirection::FWD ? fwdCSRLengthMetadataDAHInfo.get() : bwdCSRLengthMetadataDAHInfo.get(); } - inline MetadataDAHInfo* getAdjMetadataDAHInfo(common::RelDataDirection direction) { - return direction == common::RelDataDirection::FWD ? fwdAdjMetadataDAHInfo.get() : - bwdAdjMetadataDAHInfo.get(); - } - inline MetadataDAHInfo* getPropertyMetadataDAHInfo( + inline MetadataDAHInfo* getColumnMetadataDAHInfo( common::column_id_t columnID, common::RelDataDirection direction) { - auto& metadataDAHInfos = getDirectedPropertyMetadataDAHInfosRef(direction); + auto& metadataDAHInfos = getDirectedMetadataDAHInfosRef(direction); KU_ASSERT(columnID < metadataDAHInfos.size()); return metadataDAHInfos[columnID].get(); } @@ -67,10 +63,10 @@ class RelTableStats : public TableStatistics { } private: - inline std::vector>& getDirectedPropertyMetadataDAHInfosRef( + inline std::vector>& getDirectedMetadataDAHInfosRef( common::RelDataDirection direction) { - return direction == common::RelDataDirection::FWD ? fwdPropertyMetadataDAHInfos : - bwdPropertyMetadataDAHInfos; + return direction == common::RelDataDirection::FWD ? fwdMetadataDAHInfos : + bwdMetadataDAHInfos; } private: @@ -80,10 +76,8 @@ class RelTableStats : public TableStatistics { std::unique_ptr bwdCSROffsetMetadataDAHInfo; std::unique_ptr fwdCSRLengthMetadataDAHInfo; std::unique_ptr bwdCSRLengthMetadataDAHInfo; - std::unique_ptr fwdAdjMetadataDAHInfo; - std::unique_ptr bwdAdjMetadataDAHInfo; - std::vector> fwdPropertyMetadataDAHInfos; - std::vector> bwdPropertyMetadataDAHInfos; + std::vector> fwdMetadataDAHInfos; + std::vector> bwdMetadataDAHInfos; }; } // namespace storage diff --git a/src/include/storage/stats/rels_store_statistics.h b/src/include/storage/stats/rels_store_statistics.h index 48c79f4dcc7..d4faab037cb 100644 --- a/src/include/storage/stats/rels_store_statistics.h +++ b/src/include/storage/stats/rels_store_statistics.h @@ -48,9 +48,7 @@ class RelsStoreStats : public TablesStatistics { common::table_id_t tableID, common::RelDataDirection direction); MetadataDAHInfo* getCSRLengthMetadataDAHInfo(transaction::Transaction* transaction, common::table_id_t tableID, common::RelDataDirection direction); - MetadataDAHInfo* getAdjMetadataDAHInfo(transaction::Transaction* transaction, - common::table_id_t tableID, common::RelDataDirection direction); - MetadataDAHInfo* getPropertyMetadataDAHInfo(transaction::Transaction* transaction, + MetadataDAHInfo* getColumnMetadataDAHInfo(transaction::Transaction* transaction, common::table_id_t tableID, common::column_id_t columnID, common::RelDataDirection direction); diff --git a/src/include/storage/storage_utils.h b/src/include/storage/storage_utils.h index 7d55457cf6b..9ac95a005f9 100644 --- a/src/include/storage/storage_utils.h +++ b/src/include/storage/storage_utils.h @@ -69,9 +69,8 @@ class StorageUtils { DATA = 3, // This is used for data columns in VAR_LIST and STRING columns. CSR_OFFSET = 4, CSR_LENGTH = 5, - ADJ = 6, - STRUCT_CHILD = 7, - NULL_MASK = 8, + STRUCT_CHILD = 6, + NULL_MASK = 7, }; // TODO: Constrain T1 and T2 to numerics. diff --git a/src/include/storage/store/column.h b/src/include/storage/store/column.h index f691db1e477..69277a3b010 100644 --- a/src/include/storage/store/column.h +++ b/src/include/storage/store/column.h @@ -28,7 +28,7 @@ using batch_lookup_func_t = read_values_to_page_func_t; class NullColumn; class StructColumn; -class LocalVectorCollection; +struct LocalVectorCollection; class Column { friend class StringColumn; friend class VarListLocalColumn; @@ -63,7 +63,8 @@ class Column { virtual void lookup(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, common::ValueVector* resultVector); - virtual void append(ColumnChunk* columnChunk, uint64_t nodeGroupIdx); + // Append column chunk in a new node group. + virtual void append(ColumnChunk* columnChunk, common::node_group_idx_t nodeGroupIdx); inline common::LogicalType& getDataType() { return dataType; } inline const common::LogicalType& getDataType() const { return dataType; } @@ -75,9 +76,9 @@ class Column { Column* getNullColumn(); virtual void prepareCommitForChunk(transaction::Transaction* transaction, - common::node_group_idx_t nodeGroupIdx, LocalVectorCollection* localColumnChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo, - const offset_set_t& deleteInfo); + common::node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, + const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo); virtual void prepareCommitForChunk(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, const std::vector& dstOffsets, ColumnChunk* chunk, common::offset_t startSrcOffset); @@ -113,7 +114,8 @@ class Column { ReadState getReadState( transaction::TransactionType transactionType, common::node_group_idx_t nodeGroupIdx) const; - static void applyLocalChunkToColumnChunk(LocalVectorCollection* localChunk, + virtual std::unique_ptr getEmptyChunkForCommit(uint64_t capacity); + static void applyLocalChunkToColumnChunk(const LocalVectorCollection& localChunk, ColumnChunk* columnChunk, const offset_to_row_idx_t& info); protected: @@ -148,8 +150,6 @@ class Column { void updatePageWithCursor( PageCursor cursor, const std::function& writeOp); - virtual std::unique_ptr getEmptyChunkForCommit(uint64_t capacity); - inline common::offset_t getMaxOffset(const std::vector& offsets) { common::offset_t maxOffset = 0u; for (auto offset : offsets) { @@ -163,22 +163,24 @@ class Column { const ColumnChunkMetadata& metadata, const offset_to_row_idx_t& insertInfo); bool isMaxOffsetOutOfPagesCapacity( const ColumnChunkMetadata& metadata, common::offset_t maxOffset); - bool checkUpdateInPlace(const ColumnChunkMetadata& metadata, LocalVectorCollection* localChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo); + bool checkUpdateInPlace(const ColumnChunkMetadata& metadata, + const LocalVectorCollection& localChunk, const offset_to_row_idx_t& writeInfo); virtual bool canCommitInPlace(transaction::Transaction* transaction, - common::node_group_idx_t nodeGroupIdx, LocalVectorCollection* localChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo); + common::node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, + const offset_to_row_idx_t& updateInfo); virtual bool canCommitInPlace(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, const std::vector& dstOffsets, ColumnChunk* chunk, common::offset_t srcOffset); virtual void commitLocalChunkInPlace(transaction::Transaction* transaction, - common::node_group_idx_t nodeGroupIdx, LocalVectorCollection* localChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo, - const offset_set_t& deleteInfo); - virtual void commitLocalChunkOutOfPlace(transaction::Transaction* transaction, - common::node_group_idx_t nodeGroupIdx, LocalVectorCollection* localChunk, - bool isNewNodeGroup, const offset_to_row_idx_t& insertInfo, + common::node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo); + virtual void commitLocalChunkOutOfPlace(transaction::Transaction* transaction, + common::node_group_idx_t nodeGroupIdx, bool isNewNodeGroup, + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo, + const offset_set_t& deleteInfo); virtual void commitColumnChunkInPlace(common::node_group_idx_t nodeGroupIdx, const std::vector& dstOffsets, ColumnChunk* chunk, common::offset_t srcOffset); @@ -188,7 +190,7 @@ class Column { common::offset_t srcOffset); void applyLocalChunkToColumn(common::node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& info); + const LocalVectorCollection& localChunk, const offset_to_row_idx_t& info); // check if val is in range [start, end) static inline bool isInRange(uint64_t val, uint64_t start, uint64_t end) { diff --git a/src/include/storage/store/node_group.h b/src/include/storage/store/node_group.h index 1343b10a6fd..7e08f9d6981 100644 --- a/src/include/storage/store/node_group.h +++ b/src/include/storage/store/node_group.h @@ -32,15 +32,15 @@ class NodeGroup { uint64_t append(const std::vector& columnVectors, common::DataChunkState* columnState, uint64_t numValuesToAppend); common::offset_t append(NodeGroup* other, common::offset_t offsetInOtherNodeGroup); - void write(common::DataChunk* dataChunk, common::vector_idx_t offsetVector); + void write(common::DataChunk& dataChunk, common::vector_idx_t offsetVector); void finalize(uint64_t nodeGroupIdx_); virtual inline void writeToColumnChunk(common::vector_idx_t chunkIdx, - common::vector_idx_t vectorIdx, common::DataChunk* dataChunk, + common::vector_idx_t vectorIdx, common::DataChunk& dataChunk, common::ValueVector* offsetVector) { chunks[chunkIdx]->write( - dataChunk->getValueVector(vectorIdx).get(), offsetVector, false /* isCSR */); + dataChunk.getValueVector(vectorIdx).get(), offsetVector, false /* isCSR */); } protected: @@ -81,9 +81,9 @@ class CSRNodeGroup : public NodeGroup { const CSRHeaderChunks& getCSRHeader() const { return csrHeaderChunks; } inline void writeToColumnChunk(common::vector_idx_t chunkIdx, common::vector_idx_t vectorIdx, - common::DataChunk* dataChunk, common::ValueVector* offsetVector) override { + common::DataChunk& dataChunk, common::ValueVector* offsetVector) override { chunks[chunkIdx]->write( - dataChunk->getValueVector(vectorIdx).get(), offsetVector, true /* isCSR */); + dataChunk.getValueVector(vectorIdx).get(), offsetVector, true /* isCSR */); } private: diff --git a/src/include/storage/store/node_table_data.h b/src/include/storage/store/node_table_data.h index 75617ae8e4a..370e524fe13 100644 --- a/src/include/storage/store/node_table_data.h +++ b/src/include/storage/store/node_table_data.h @@ -27,13 +27,15 @@ class NodeTableData final : public TableData { common::ValueVector* nodeIDVector, const std::vector& outputVectors) override; - // These two interfaces are node table specific, as rel table requires also relIDVector. + // These interfaces are node table specific, as rel table requires also relIDVector. + // insert/update/delete_ keeps changes inside the local storage. void insert(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, const std::vector& propertyVectors); void update(transaction::Transaction* transaction, common::column_id_t columnID, common::ValueVector* nodeIDVector, common::ValueVector* propertyVector); void delete_(transaction::Transaction* transaction, common::ValueVector* nodeIDVector); + // Flush the nodeGroup to disk and update metadataDAs. void append(NodeGroup* nodeGroup) override; void prepareLocalTableToCommit( @@ -43,6 +45,12 @@ class NodeTableData final : public TableData { transaction::Transaction* transaction) const override { return columns[0]->getNumNodeGroups(transaction); } + +private: + void append(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, + LocalNodeGroup* localNodeGroup); + void merge(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, + LocalNodeGroup* nodeGroup); }; } // namespace storage diff --git a/src/include/storage/store/null_column.h b/src/include/storage/store/null_column.h index 4a1a71ef184..5a507a90bc1 100644 --- a/src/include/storage/store/null_column.h +++ b/src/include/storage/store/null_column.h @@ -44,16 +44,20 @@ class NullColumn final : public Column { ColumnChunk* data, common::offset_t dataOffset, common::length_t numValues) override; bool canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo) override; bool canCommitInPlace(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, const std::vector& dstOffsets, ColumnChunk* chunk, common::offset_t srcOffset) override; - void commitLocalChunkInPlace(Transaction* /*transaction*/, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo) override; + void commitLocalChunkInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo, + const offset_set_t& deleteInfo) override; private: + bool checkUpdateInPlace(const ColumnChunkMetadata& metadata, + const LocalVectorCollection& localChunk, const offset_to_row_idx_t& writeInfo); std::unique_ptr getEmptyChunkForCommit(uint64_t capacity) override { return ColumnChunkFactory::createNullColumnChunk(enableCompression, capacity); } diff --git a/src/include/storage/store/rel_table.h b/src/include/storage/store/rel_table.h index 27f8c517728..305ca0d7399 100644 --- a/src/include/storage/store/rel_table.h +++ b/src/include/storage/store/rel_table.h @@ -54,10 +54,6 @@ class RelTable final : public Table { fwdRelTableData->dropColumn(columnID); bwdRelTableData->dropColumn(columnID); } - inline Column* getAdjColumn(common::RelDataDirection direction) { - return direction == common::RelDataDirection::FWD ? fwdRelTableData->getAdjColumn() : - bwdRelTableData->getAdjColumn(); - } inline Column* getCSROffsetColumn(common::RelDataDirection direction) { return direction == common::RelDataDirection::FWD ? fwdRelTableData->getCSROffsetColumn() : bwdRelTableData->getCSROffsetColumn(); diff --git a/src/include/storage/store/rel_table_data.h b/src/include/storage/store/rel_table_data.h index ab9a7fca0c4..dd37d955fa6 100644 --- a/src/include/storage/store/rel_table_data.h +++ b/src/include/storage/store/rel_table_data.h @@ -123,12 +123,9 @@ class RelTableData final : public TableData { common::offset_t rightCSROffset = common::INVALID_OFFSET; bool needSliding = false; - explicit LocalState(LocalRelNG* localNG) : localNG{localNG} { initChangesPerSegment(); } + explicit LocalState(LocalRelNG* localNG); inline void setRegion(PackedCSRRegion& region_) { region = region_; } - - private: - void initChangesPerSegment(); }; RelTableData(BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, @@ -152,7 +149,7 @@ class RelTableData final : public TableData { common::ValueVector* srcNodeIDVector, common::ValueVector* relIDVector, common::ValueVector* propertyVector); bool delete_(transaction::Transaction* transaction, common::ValueVector* srcNodeIDVector, - common::ValueVector* dstNodeIDVector, common::ValueVector* relIDVector); + common::ValueVector* relIDVector); void checkRelMultiplicityConstraint( transaction::Transaction* transaction, common::ValueVector* srcNodeIDVector) const; @@ -160,10 +157,9 @@ class RelTableData final : public TableData { transaction::Transaction* transaction, common::offset_t nodeOffset) const; void append(NodeGroup* nodeGroup) override; - inline Column* getAdjColumn() const { return adjColumn.get(); } + inline Column* getNbrIDColumn() const { return columns[NBR_ID_COLUMN_ID].get(); } inline Column* getCSROffsetColumn() const { return csrHeaderColumns.offset.get(); } inline Column* getCSRLengthColumn() const { return csrHeaderColumns.length.get(); } - Column* getColumn(common::column_id_t columnID) override; void prepareLocalTableToCommit( transaction::Transaction* transaction, LocalTableData* localTable) override; @@ -173,10 +169,13 @@ class RelTableData final : public TableData { inline common::node_group_idx_t getNumNodeGroups( transaction::Transaction* transaction) const override { - return adjColumn->getNumNodeGroups(transaction); + return columns[NBR_ID_COLUMN_ID]->getNumNodeGroups(transaction); } private: + static common::offset_t getMaxNumNodesInRegion( + const CSRHeaderChunks& header, const PackedCSRRegion& region, const LocalRelNG* localNG); + std::vector findRegions( const CSRHeaderChunks& headerChunks, LocalState& localState); common::length_t getNewRegionSize(const CSRHeaderChunks& header, @@ -211,14 +210,12 @@ class RelTableData final : public TableData { void slideRightForInsertions(common::offset_t nodeOffset, common::offset_t rightBoundary, LocalState& localState, uint64_t numValuesToInsert); - void applyUpdatesToChunk(const PersistentState& persistentState, const PackedCSRRegion& region, - LocalVectorCollection* localChunk, const update_insert_info_t& updateInfo, - ColumnChunk* chunk); + void applyUpdatesToChunk(const PersistentState& persistentState, LocalState& localState, + const LocalVectorCollection& localChunk, ColumnChunk* chunk, common::column_id_t columnID); void applyInsertionsToChunk(const PersistentState& persistentState, - const LocalState& localState, LocalVectorCollection* localChunk, - const update_insert_info_t& insertInfo, ColumnChunk* chunk); - void applyDeletionsToChunk(const PersistentState& persistentState, const LocalState& localState, - const delete_info_t& deleteInfo, ColumnChunk* chunk); + const LocalState& localState, const LocalVectorCollection& localChunk, ColumnChunk* chunk); + void applyDeletionsToChunk( + const PersistentState& persistentState, const LocalState& localState, ColumnChunk* chunk); void applyUpdatesToColumn(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, common::column_id_t columnID, @@ -233,25 +230,22 @@ class RelTableData final : public TableData { LocalState& localState, const PersistentState& persistentState, Column* column); std::vector> getSlidesForDeletions( - const PersistentState& persistentState, const LocalState& localState, - const delete_info_t& deleteInfo); + const PersistentState& persistentState, const LocalState& localState); LocalRelNG* getLocalNodeGroup( transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx); - // TODO: Constrain T1 and T2 to numerics. template static double divideNoRoundUp(T1 v1, T2 v2) { + static_assert(std::is_arithmetic::value && std::is_arithmetic::value); return (double)v1 / (double)v2; } template static uint64_t multiplyAndRoundUpTo(T1 v1, T2 v2) { + static_assert(std::is_arithmetic::value && std::is_arithmetic::value); return std::ceil((double)v1 * (double)v2); } - LocalVectorCollection* getLocalChunk( - const LocalState& localState, common::column_id_t columnID); - inline void fillSequence(std::span offsets, common::offset_t startOffset) { for (auto i = 0u; i < offsets.size(); i++) { offsets[i] = i + startOffset; @@ -268,7 +262,6 @@ class RelTableData final : public TableData { private: PackedCSRInfo packedCSRInfo; CSRHeaderColumns csrHeaderColumns; - std::unique_ptr adjColumn; common::RelDataDirection direction; common::RelMultiplicity multiplicity; }; diff --git a/src/include/storage/store/string_column.h b/src/include/storage/store/string_column.h index fac763194cc..4b1eff6a8ae 100644 --- a/src/include/storage/store/string_column.h +++ b/src/include/storage/store/string_column.h @@ -49,8 +49,9 @@ class StringColumn final : public Column { private: bool canCommitInPlace(transaction::Transaction* transaction, - common::node_group_idx_t nodeGroupIdx, LocalVectorCollection* localChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo) override; + common::node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, + const offset_to_row_idx_t& updateInfo) override; bool canCommitInPlace(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, const std::vector& dstOffsets, ColumnChunk* chunk, common::offset_t srcOffset) override; @@ -58,6 +59,10 @@ class StringColumn final : public Column { bool canIndexCommitInPlace(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, uint64_t numStrings, common::offset_t maxOffset); + bool checkUpdateInPlace(transaction::Transaction* transaction, + common::node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localChunk, + const offset_to_row_idx_t& writeInfo); + private: // Main column stores indices of values in the dictionary DictionaryColumn dictionary; diff --git a/src/include/storage/store/struct_column.h b/src/include/storage/store/struct_column.h index 696e1002b0a..613dfff7c96 100644 --- a/src/include/storage/store/struct_column.h +++ b/src/include/storage/store/struct_column.h @@ -34,9 +34,9 @@ class StructColumn final : public Column { ColumnChunk* data, common::offset_t dataOffset, common::length_t numValues) override; void prepareCommitForChunk(transaction::Transaction* transaction, - common::node_group_idx_t nodeGroupIdx, LocalVectorCollection* localColumnChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo, - const offset_set_t& deleteInfo) override; + common::node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, + const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo) override; void prepareCommitForChunk(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, const std::vector& dstOffsets, ColumnChunk* chunk, common::offset_t startSrcOffset) override; @@ -48,8 +48,9 @@ class StructColumn final : public Column { common::ValueVector* resultVector) override; bool canCommitInPlace(transaction::Transaction* transaction, - common::node_group_idx_t nodeGroupIdx, LocalVectorCollection* localChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo) override; + common::node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, + const offset_to_row_idx_t& updateInfo) override; bool canCommitInPlace(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, const std::vector& dstOffsets, ColumnChunk* chunk, common::offset_t dataOffset) override; diff --git a/src/include/storage/store/table_data.h b/src/include/storage/store/table_data.h index 13f3cc80b65..c9ba360cb39 100644 --- a/src/include/storage/store/table_data.h +++ b/src/include/storage/store/table_data.h @@ -35,8 +35,8 @@ class TableData { TablesStatistics* tableStats); inline common::vector_idx_t getNumColumns() const { return columns.size(); } - inline virtual Column* getColumn(common::column_id_t columnID) { - KU_ASSERT(columnID < columns.size()); + inline Column* getColumn(common::column_id_t columnID) { + KU_ASSERT(columnID < columns.size() && columnID != common::INVALID_COLUMN_ID); return columns[columnID].get(); } diff --git a/src/include/storage/store/var_list_column.h b/src/include/storage/store/var_list_column.h index 9fc7cdcb420..186ee43c7ff 100644 --- a/src/include/storage/store/var_list_column.h +++ b/src/include/storage/store/var_list_column.h @@ -82,10 +82,9 @@ class VarListColumn : public Column { void scanFiltered(transaction::Transaction* transaction, common::node_group_idx_t nodeGroupIdx, common::ValueVector* offsetVector, const ListOffsetInfoInStorage& listOffsetInfoInStorage); - inline bool canCommitInPlace(transaction::Transaction* /*transaction*/, - common::node_group_idx_t /*nodeGroupIdx*/, LocalVectorCollection* /*localChunk*/, - const offset_to_row_idx_t& /*insertInfo*/, - const offset_to_row_idx_t& /*updateInfo*/) override { + inline bool canCommitInPlace(transaction::Transaction*, common::node_group_idx_t, + const LocalVectorCollection&, const offset_to_row_idx_t&, const LocalVectorCollection&, + const offset_to_row_idx_t&) override { // Always perform out-of-place commit for VAR_LIST columns. return false; } diff --git a/src/processor/map/map_copy_from.cpp b/src/processor/map/map_copy_from.cpp index 11af63835af..565d907b7f2 100644 --- a/src/processor/map/map_copy_from.cpp +++ b/src/processor/map/map_copy_from.cpp @@ -196,7 +196,7 @@ physical_op_vector_t PlanMapper::mapCopyRelFrom(LogicalOperator* logicalOperator storageManager.getNodeTable(relTableEntry->getDstTableID()); // TODO(Xiyang): Move binding of column types to binder. std::vector> columnTypes; - columnTypes.push_back(LogicalType::INTERNAL_ID()); // ADJ COLUMN. + columnTypes.push_back(LogicalType::INTERNAL_ID()); // NBR_ID COLUMN. for (auto& property : relTableEntry->getPropertiesRef()) { columnTypes.push_back(property.getDataType()->copy()); } diff --git a/src/processor/operator/partitioner.cpp b/src/processor/operator/partitioner.cpp index 8db8df79e23..c7368f54ede 100644 --- a/src/processor/operator/partitioner.cpp +++ b/src/processor/operator/partitioner.cpp @@ -97,19 +97,22 @@ void Partitioner::initLocalStateInternal(ResultSet* /*resultSet*/, ExecutionCont context->clientContext->getMemoryManager()); } -static void constructDataChunk(DataChunk* dataChunk, const std::vector& columnPositions, - const logical_types_t& columnTypes, const ResultSet& resultSet) { +DataChunk Partitioner::constructDataChunk(const std::vector& columnPositions, + const logical_types_t& columnTypes, const ResultSet& resultSet, + const std::shared_ptr& state) { + DataChunk dataChunk(columnTypes.size(), state); for (auto i = 0u; i < columnPositions.size(); i++) { auto pos = columnPositions[i]; if (pos.isValid()) { - dataChunk->insert(i, resultSet.getValueVector(pos)); + dataChunk.insert(i, resultSet.getValueVector(pos)); } else { auto columnType = columnTypes[i].get(); auto nullVector = std::make_shared(*columnType); nullVector->setAllNull(); - dataChunk->insert(i, nullVector); + dataChunk.insert(i, nullVector); } } + return dataChunk; } void Partitioner::initializePartitioningStates( @@ -134,21 +137,18 @@ void Partitioner::executeInternal(ExecutionContext* context) { auto keyVector = resultSet->getValueVector(info->keyDataPos); partitionIdxes->state = resultSet->getValueVector(info->keyDataPos)->state; info->partitionerFunc(keyVector.get(), partitionIdxes.get()); - auto columnDataChunk = - std::make_unique(info->columnTypes.size(), keyVector->state); - constructDataChunk( - columnDataChunk.get(), info->columnDataPositions, info->columnTypes, *resultSet); - copyDataToPartitions(partitioningIdx, columnDataChunk.get()); + auto chunkToCopyFrom = constructDataChunk( + info->columnDataPositions, info->columnTypes, *resultSet, keyVector->state); + copyDataToPartitions(partitioningIdx, std::move(chunkToCopyFrom)); } } sharedState->merge(std::move(localState->partitioningBuffers)); } -void Partitioner::copyDataToPartitions( - partition_idx_t partitioningIdx, DataChunk* chunkToCopyFrom) { - auto originalChunkState = chunkToCopyFrom->state; - chunkToCopyFrom->state = std::make_shared(1 /* capacity */); - chunkToCopyFrom->state->selVector->resetSelectorToValuePosBufferWithSize(1 /* size */); +void Partitioner::copyDataToPartitions(partition_idx_t partitioningIdx, DataChunk chunkToCopyFrom) { + auto originalChunkState = chunkToCopyFrom.state; + chunkToCopyFrom.state = std::make_shared(1 /* capacity */); + chunkToCopyFrom.state->selVector->resetSelectorToValuePosBufferWithSize(1 /* size */); for (auto i = 0u; i < originalChunkState->selVector->selectedSize; i++) { auto posToCopyFrom = originalChunkState->selVector->selectedPositions[i]; auto partitionIdx = partitionIdxes->getValue(posToCopyFrom); @@ -156,8 +156,8 @@ void Partitioner::copyDataToPartitions( partitionIdx < localState->getPartitioningBuffer(partitioningIdx)->partitions.size()); auto partition = localState->getPartitioningBuffer(partitioningIdx)->partitions[partitionIdx].get(); - chunkToCopyFrom->state->selVector->selectedPositions[0] = posToCopyFrom; - partition->append(*chunkToCopyFrom); + chunkToCopyFrom.state->selVector->selectedPositions[0] = posToCopyFrom; + partition->merge(std::move(chunkToCopyFrom)); } } diff --git a/src/processor/operator/persistent/delete_executor.cpp b/src/processor/operator/persistent/delete_executor.cpp index 59abfe16a75..5cdf314dba1 100644 --- a/src/processor/operator/persistent/delete_executor.cpp +++ b/src/processor/operator/persistent/delete_executor.cpp @@ -82,6 +82,12 @@ void MultiLabelNodeDeleteExecutor::delete_(ExecutionContext* context) { detachDeleteState.get()); } for (auto& relTable : bwdRelTables) { + // TODO(Guodong): For detach delete, there can possibly be a case where the same relTable is + // in both fwd and bwd rel tables set. the rels can be deleted twice. This is a temporary + // hack. + if (deleteType == DeleteNodeType::DETACH_DELETE && fwdRelTables.contains(relTable)) { + continue; + } deleteFromRelTable(context, deleteType, RelDataDirection::BWD, relTable, nodeIDVector, detachDeleteState.get()); } diff --git a/src/processor/operator/persistent/rel_batch_insert.cpp b/src/processor/operator/persistent/rel_batch_insert.cpp index 0b83ab14a9a..95adbd51372 100644 --- a/src/processor/operator/persistent/rel_batch_insert.cpp +++ b/src/processor/operator/persistent/rel_batch_insert.cpp @@ -34,15 +34,19 @@ void RelBatchInsert::executeInternal(ExecutionContext* /*context*/) { relLocalState->nodeGroupIdx = partitionerSharedState->getNextPartition(relInfo->partitioningIdx); if (relLocalState->nodeGroupIdx == INVALID_PARTITION_IDX) { + // No more partitions left in the partitioning buffer. break; } + // TODO: Multiple copy. + // If the node group is a new one. Construct the node group and flush it to table. + // Else, append data to the local rel node group, and merge it to the table. // Read the whole partition, and set to node group. auto partitioningBuffer = partitionerSharedState->getPartitionBuffer( relInfo->partitioningIdx, relLocalState->nodeGroupIdx); auto startNodeOffset = StorageUtils::getStartOffsetOfNodeGroup(relLocalState->nodeGroupIdx); - for (auto dataChunk : partitioningBuffer->getChunks()) { + for (auto& dataChunk : partitioningBuffer->getChunksUnSafe()) { setOffsetToWithinNodeGroup( - dataChunk->getValueVector(relInfo->offsetVectorIdx).get(), startNodeOffset); + dataChunk.getValueVector(relInfo->offsetVectorIdx).get(), startNodeOffset); } // Calculate num of source nodes in this node group. // This will be used to set the num of values of the node group. @@ -50,7 +54,7 @@ void RelBatchInsert::executeInternal(ExecutionContext* /*context*/) { partitionerSharedState->maxNodeOffsets[relInfo->partitioningIdx] - startNodeOffset + 1); prepareCSRNodeGroup( partitioningBuffer, startNodeOffset, relInfo->offsetVectorIdx, numNodes); - for (auto dataChunk : partitioningBuffer->getChunks()) { + for (auto& dataChunk : partitioningBuffer->getChunksUnSafe()) { localState->nodeGroup->write(dataChunk, relInfo->offsetVectorIdx); } localState->nodeGroup->finalize(relLocalState->nodeGroupIdx); @@ -79,8 +83,8 @@ void RelBatchInsert::prepareCSRNodeGroup(DataChunkCollection* partition, offset_t csrChunkCapacity = csrHeader.getEndCSROffset(numNodes - 1) + csrHeader.getCSRLength(numNodes - 1); localState->nodeGroup->resizeChunks(csrChunkCapacity); - for (auto dataChunk : partition->getChunks()) { - auto offsetVector = dataChunk->getValueVector(offsetVectorIdx).get(); + for (auto& dataChunk : partition->getChunksUnSafe()) { + auto offsetVector = dataChunk.getValueVector(offsetVectorIdx).get(); setOffsetFromCSROffsets(offsetVector, csrHeader.offset.get()); } populateEndCSROffsets(csrHeader, gaps); @@ -113,8 +117,8 @@ std::vector RelBatchInsert::populateStartCSROffsetsAndLengths(CSRHeade auto csrLengths = (length_t*)csrHeader.length->getData(); std::fill(csrLengths, csrLengths + numNodes, 0); // Calculate length for each node. Store the num of tuples of node i at csrLengths[i]. - for (auto chunk : partition->getChunks()) { - auto offsetVector = chunk->getValueVector(offsetVectorIdx); + for (auto& chunk : partition->getChunksUnSafe()) { + auto offsetVector = chunk.getValueVector(offsetVectorIdx); for (auto i = 0u; i < offsetVector->state->selVector->selectedSize; i++) { auto pos = offsetVector->state->selVector->selectedPositions[i]; auto nodeOffset = offsetVector->getValue(pos); diff --git a/src/storage/local_storage/local_node_table.cpp b/src/storage/local_storage/local_node_table.cpp index b9356602922..543cbb78f47 100644 --- a/src/storage/local_storage/local_node_table.cpp +++ b/src/storage/local_storage/local_node_table.cpp @@ -13,7 +13,6 @@ void LocalNodeNG::scan(ValueVector* nodeIDVector, const std::vector KU_ASSERT(columnIDs.size() == outputVectors.size()); for (auto i = 0u; i < columnIDs.size(); i++) { auto columnID = columnIDs[i]; - KU_ASSERT(columnID < chunks.size()); for (auto pos = 0u; pos < nodeIDVector->state->selVector->selectedSize; pos++) { auto nodeIDPos = nodeIDVector->state->selVector->selectedPositions[pos]; auto nodeOffset = nodeIDVector->getValue(nodeIDPos).offset; @@ -25,73 +24,71 @@ void LocalNodeNG::scan(ValueVector* nodeIDVector, const std::vector void LocalNodeNG::lookup( offset_t nodeOffset, column_id_t columnID, ValueVector* outputVector, sel_t posInOutputVector) { - KU_ASSERT(columnID < chunks.size()); - row_idx_t rowIdx = getRowIdx(columnID, nodeOffset - nodeGroupStartOffset); - if (rowIdx != INVALID_ROW_IDX) { - chunks[columnID]->read(rowIdx, outputVector, posInOutputVector); + if (deleteInfo.containsOffset(nodeOffset)) { + // Node has been deleted. + return; + } + if (insertChunks.read(nodeOffset, columnID, outputVector, posInOutputVector)) { + // Node has been newly inserted. + return; } + updateChunks[columnID].read(nodeOffset, 0 /*columnID*/, outputVector, posInOutputVector); } -void LocalNodeNG::insert( - ValueVector* nodeIDVector, const std::vector& propertyVectors) { - KU_ASSERT(propertyVectors.size() == chunks.size() && - nodeIDVector->state->selVector->selectedSize == 1); +bool LocalNodeNG::insert(std::vector nodeIDVectors, + std::vector propertyVectors) { + KU_ASSERT(nodeIDVectors.size() == 1); + auto nodeIDVector = nodeIDVectors[0]; + KU_ASSERT(nodeIDVector->state->selVector->selectedSize == 1); auto nodeIDPos = nodeIDVector->state->selVector->selectedPositions[0]; if (nodeIDVector->isNull(nodeIDPos)) { - return; + return false; } + // The nodeOffset here should be the offset within the node group. auto nodeOffset = nodeIDVector->getValue(nodeIDPos).offset - nodeGroupStartOffset; KU_ASSERT(nodeOffset < StorageConstants::NODE_GROUP_SIZE); - for (auto columnID = 0u; columnID < chunks.size(); columnID++) { - auto rowIdx = chunks[columnID]->append(propertyVectors[columnID]); - KU_ASSERT(!updateInfo[columnID].contains(nodeOffset)); - insertInfo[columnID][nodeOffset] = rowIdx; - } + insertChunks.append(nodeOffset, propertyVectors); + return true; } -void LocalNodeNG::update( - ValueVector* nodeIDVector, column_id_t columnID, ValueVector* propertyVector) { - KU_ASSERT(columnID < chunks.size() && nodeIDVector->state->selVector->selectedSize == 1); +bool LocalNodeNG::update(std::vector nodeIDVectors, + common::column_id_t columnID, common::ValueVector* propertyVector) { + KU_ASSERT(nodeIDVectors.size() == 1); + auto nodeIDVector = nodeIDVectors[0]; + KU_ASSERT(nodeIDVector->state->selVector->selectedSize == 1); auto nodeIDPos = nodeIDVector->state->selVector->selectedPositions[0]; if (nodeIDVector->isNull(nodeIDPos)) { - return; + return false; } auto nodeOffset = nodeIDVector->getValue(nodeIDPos).offset - nodeGroupStartOffset; - KU_ASSERT(nodeOffset < StorageConstants::NODE_GROUP_SIZE); - auto rowIdx = chunks[columnID]->append(propertyVector); - if (insertInfo[columnID].contains(nodeOffset)) { - // This node is in local storage, and had been newly inserted. - insertInfo.at(columnID)[nodeOffset] = rowIdx; + KU_ASSERT(nodeOffset < StorageConstants::NODE_GROUP_SIZE && columnID < updateChunks.size()); + // Check if the node is newly inserted or in persistent storage. + if (insertChunks.hasOffset(nodeOffset)) { + insertChunks.update(nodeOffset, columnID, propertyVector); } else { - updateInfo[columnID][nodeOffset] = rowIdx; + updateChunks[columnID].append(nodeOffset, {propertyVector}); } + return true; } -void LocalNodeNG::delete_(ValueVector* nodeIDVector) { +bool LocalNodeNG::delete_(common::ValueVector* nodeIDVector, common::ValueVector* /*extraVector*/) { KU_ASSERT(nodeIDVector->state->selVector->selectedSize == 1); auto nodeIDPos = nodeIDVector->state->selVector->selectedPositions[0]; if (nodeIDVector->isNull(nodeIDPos)) { - return; + return false; } auto nodeOffset = nodeIDVector->getValue(nodeIDPos).offset - nodeGroupStartOffset; KU_ASSERT(nodeOffset < StorageConstants::NODE_GROUP_SIZE); - for (auto i = 0u; i < chunks.size(); i++) { - insertInfo[i].erase(nodeOffset); - updateInfo[i].erase(nodeOffset); - } -} - -row_idx_t LocalNodeNG::getRowIdx(column_id_t columnID, offset_t offsetInChunk) { - KU_ASSERT(columnID < chunks.size()); - if (updateInfo[columnID].contains(offsetInChunk)) { - // This node is in persistent storage, and had been updated. - return updateInfo[columnID][offsetInChunk]; - } else if (insertInfo[columnID].contains(offsetInChunk)) { - // This node is in local storage, and had been newly inserted. - return insertInfo[columnID][offsetInChunk]; + // Check if the node is newly inserted or in persistent storage. + if (insertChunks.hasOffset(nodeOffset)) { + insertChunks.remove(nodeOffset); } else { - return INVALID_ROW_IDX; + for (auto i = 0u; i < updateChunks.size(); i++) { + updateChunks[i].remove(nodeOffset); + } + deleteInfo.deleteOffset(nodeOffset); } + return true; } void LocalNodeTableData::scan(ValueVector* nodeIDVector, const std::vector& columnIDs, @@ -125,35 +122,6 @@ void LocalNodeTableData::lookup(ValueVector* nodeIDVector, } } -void LocalNodeTableData::insert( - ValueVector* nodeIDVector, const std::vector& propertyVectors) { - KU_ASSERT(nodeIDVector->state->selVector->selectedSize == 1); - auto localNodeGroup = - ku_dynamic_cast(getOrCreateLocalNodeGroup(nodeIDVector)); - KU_ASSERT(localNodeGroup); - localNodeGroup->insert(nodeIDVector, propertyVectors); -} - -void LocalNodeTableData::update( - ValueVector* nodeIDVector, column_id_t columnID, ValueVector* propertyVector) { - auto localNodeGroup = - ku_dynamic_cast(getOrCreateLocalNodeGroup(nodeIDVector)); - KU_ASSERT(localNodeGroup); - localNodeGroup->update(nodeIDVector, columnID, propertyVector); -} - -void LocalNodeTableData::delete_(ValueVector* nodeIDVector) { - auto nodeIDPos = nodeIDVector->state->selVector->selectedPositions[0]; - auto nodeOffset = nodeIDVector->getValue(nodeIDPos).offset; - auto nodeGroupIdx = StorageUtils::getNodeGroupIdx(nodeOffset); - if (!nodeGroups.contains(nodeGroupIdx)) { - return; - } - auto localNodeGroup = - ku_dynamic_cast(nodeGroups.at(nodeGroupIdx).get()); - localNodeGroup->delete_(nodeIDVector); -} - LocalNodeGroup* LocalNodeTableData::getOrCreateLocalNodeGroup(common::ValueVector* nodeIDVector) { auto nodeIDPos = nodeIDVector->state->selVector->selectedPositions[0]; auto nodeOffset = nodeIDVector->getValue(nodeIDPos).offset; diff --git a/src/storage/local_storage/local_rel_table.cpp b/src/storage/local_storage/local_rel_table.cpp index ffdc53a8a07..7f40321acb8 100644 --- a/src/storage/local_storage/local_rel_table.cpp +++ b/src/storage/local_storage/local_rel_table.cpp @@ -1,6 +1,5 @@ #include "storage/local_storage/local_rel_table.h" -#include "common/cast.h" #include "storage/storage_utils.h" using namespace kuzu::common; @@ -8,181 +7,69 @@ using namespace kuzu::common; namespace kuzu { namespace storage { -bool RelNGInfo::insert(offset_t srcOffsetInChunk, offset_t relOffset, row_idx_t adjNodeRowIdx, - const std::vector& propertyNodesRowIdx) { - KU_ASSERT(propertyNodesRowIdx.size() == insertInfoPerChunk.size()); - if (deleteInfo.contains(srcOffsetInChunk) && - contains(deleteInfo.at(srcOffsetInChunk), relOffset)) { - deleteInfo.at(srcOffsetInChunk).erase(relOffset); - } - if (adjInsertInfo.contains(srcOffsetInChunk)) { - if (multiplicity == RelMultiplicity::ONE) { - throw RuntimeException("Inserting multiple edges to a single node in a " - "ONE_ONE/MANY_ONE relationship is not allowed."); - } - adjInsertInfo.at(srcOffsetInChunk)[relOffset] = adjNodeRowIdx; - } else { - adjInsertInfo[srcOffsetInChunk] = {{relOffset, adjNodeRowIdx}}; - } - for (auto i = 0u; i < propertyNodesRowIdx.size(); ++i) { - if (insertInfoPerChunk[i].contains(srcOffsetInChunk)) { - insertInfoPerChunk[i].at(srcOffsetInChunk)[relOffset] = propertyNodesRowIdx[i]; - } else { - insertInfoPerChunk[i][srcOffsetInChunk] = {{relOffset, propertyNodesRowIdx[i]}}; - } - } - return false; -} - -void RelNGInfo::update( - offset_t srcOffsetInChunk, offset_t relOffset, column_id_t columnID, row_idx_t rowIdx) { - // REL_ID_COLUMN_ID is immutable. - KU_ASSERT(columnID != REL_ID_COLUMN_ID && columnID < updateInfoPerChunk.size()); - if (deleteInfo.contains(srcOffsetInChunk) && - contains(deleteInfo.at(srcOffsetInChunk), relOffset)) { - // We choose to ignore the update operation if the node is deleted. - return; - } - if (insertInfoPerChunk[columnID].contains(srcOffsetInChunk) && - insertInfoPerChunk[columnID].at(srcOffsetInChunk).contains(relOffset)) { - // Update newly inserted value. - insertInfoPerChunk[columnID].at(srcOffsetInChunk)[relOffset] = rowIdx; - } else { - if (updateInfoPerChunk[columnID].contains(srcOffsetInChunk)) { - updateInfoPerChunk[columnID].at(srcOffsetInChunk)[relOffset] = rowIdx; - } else { - updateInfoPerChunk[columnID][srcOffsetInChunk] = {{relOffset, rowIdx}}; - } - } -} - -bool RelNGInfo::delete_(offset_t srcOffsetInChunk, offset_t relOffset) { - if (adjInsertInfo.contains(srcOffsetInChunk) && - adjInsertInfo.at(srcOffsetInChunk).contains(relOffset)) { - // Delete newly inserted tuple. - adjInsertInfo.at(srcOffsetInChunk).erase(relOffset); - for (auto& insertInfo : insertInfoPerChunk) { - insertInfo.at(srcOffsetInChunk).erase(relOffset); - } - } else { - if (deleteInfo.contains(srcOffsetInChunk)) { - if (deleteInfo.at(srcOffsetInChunk).contains(relOffset)) { - // The node is already deleted. - return false; - } else { - deleteInfo.at(srcOffsetInChunk).insert(relOffset); - } - } else { - deleteInfo[srcOffsetInChunk] = {relOffset}; - } - } - return true; -} - -bool RelNGInfo::hasUpdates() { - for (auto& updateInfo : updateInfoPerChunk) { - if (!updateInfo.empty()) { - return true; - } - } - return false; -} - -const update_insert_info_t& RelNGInfo::getEmptyInfo() { - static update_insert_info_t emptyInfo; - return emptyInfo; -} - -uint64_t RelNGInfo::getNumInsertedTuples(offset_t srcOffsetInChunk) { - return adjInsertInfo.contains(srcOffsetInChunk) ? adjInsertInfo.at(srcOffsetInChunk).size() : 0; -} - LocalRelNG::LocalRelNG(offset_t nodeGroupStartOffset, std::vector dataTypes, - MemoryManager* mm, common::RelMultiplicity multiplicity) - : LocalNodeGroup{nodeGroupStartOffset, std::move(dataTypes), mm} { - relNGInfo = std::make_unique(multiplicity, chunks.size()); - adjChunk = std::make_unique(LogicalType::INTERNAL_ID(), mm); -} + MemoryManager* mm, RelMultiplicity multiplicity) + : LocalNodeGroup{nodeGroupStartOffset, std::move(dataTypes), mm}, multiplicity{multiplicity} {} -// TODO(Guodong): We should change the map between relID and rowIdx to a vector of pairs, which is -// more friendly for scan. row_idx_t LocalRelNG::scanCSR(offset_t srcOffsetInChunk, offset_t posToReadForOffset, const std::vector& columnIDs, const std::vector& outputVectors) { - KU_ASSERT(columnIDs.size() + 1 == outputVectors.size()); - KU_ASSERT(relNGInfo->adjInsertInfo.contains(srcOffsetInChunk)); - uint64_t posInVector = 0; - auto iteratorIdx = 0u; - for (auto& [relID, rowIdx] : relNGInfo->adjInsertInfo.at(srcOffsetInChunk)) { - if (iteratorIdx++ < posToReadForOffset) { - continue; + KU_ASSERT(columnIDs.size() == outputVectors.size()); + std::vector rowIdxesToRead; + rowIdxesToRead.reserve(DEFAULT_VECTOR_CAPACITY); + auto& insertedRelOffsets = insertChunks.getRelOffsetsFromSrcOffset(srcOffsetInChunk); + for (auto i = posToReadForOffset; i < insertedRelOffsets.size(); i++) { + if (rowIdxesToRead.size() == DEFAULT_VECTOR_CAPACITY) { + break; } - auto posInLocalVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - outputVectors[0]->copyFromVectorData( - posInVector++, adjChunk->getLocalVector(rowIdx)->getVector(), posInLocalVector); + rowIdxesToRead.push_back(insertChunks.getRowIdxFromOffset(insertedRelOffsets[i])); } - for (auto i = 0u; i < columnIDs.size(); ++i) { - auto columnID = columnIDs[i]; - posInVector = 0; - iteratorIdx = 0u; - auto& insertInfo = relNGInfo->insertInfoPerChunk[columnID]; - KU_ASSERT(insertInfo.contains(srcOffsetInChunk)); - for (auto& [relID, rowIdx] : insertInfo.at(srcOffsetInChunk)) { - if (iteratorIdx++ < posToReadForOffset) { - continue; - } - auto posInLocalVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - outputVectors[i + 1]->copyFromVectorData(posInVector++, - chunks[columnID]->getLocalVector(rowIdx)->getVector(), posInLocalVector); + for (auto i = 0u; i < columnIDs.size(); i++) { + uint64_t posInOutputVector = 0; + for (auto rowIdx : rowIdxesToRead) { + insertChunks.readValueAtRowIdx( + rowIdx, columnIDs[i], outputVectors[i], posInOutputVector++); } } - outputVectors[0]->state->selVector->resetSelectorToUnselectedWithSize(posInVector); - return posInVector; + auto numRelsRead = rowIdxesToRead.size(); + outputVectors[0]->state->selVector->resetSelectorToUnselectedWithSize(numRelsRead); + return numRelsRead; } -void LocalRelNG::applyLocalChangesForCSRColumns(offset_t srcOffsetInChunk, +void LocalRelNG::applyLocalChangesToScannedVectors(offset_t srcOffset, const std::vector& columnIDs, ValueVector* relIDVector, const std::vector& outputVectors) { - KU_ASSERT(columnIDs.size() + 1 == outputVectors.size()); + KU_ASSERT(columnIDs.size() == outputVectors.size()); // Apply updates first, as applying deletions might change selected state. for (auto i = 0u; i < columnIDs.size(); ++i) { - applyCSRUpdates(srcOffsetInChunk, columnIDs[i], relIDVector, outputVectors[i + 1]); + applyCSRUpdates(columnIDs[i], relIDVector, outputVectors[i]); } // Apply deletions and update selVector if necessary. - if (relNGInfo->deleteInfo.contains(srcOffsetInChunk) && - relNGInfo->deleteInfo.at(srcOffsetInChunk).size() > 0) { - applyCSRDeletions(srcOffsetInChunk, relNGInfo->deleteInfo, relIDVector); - } + applyCSRDeletions(srcOffset, relIDVector); } -void LocalRelNG::applyCSRUpdates(offset_t srcOffsetInChunk, column_id_t columnID, - ValueVector* relIDVector, ValueVector* outputVector) { - auto updateInfo = relNGInfo->updateInfoPerChunk[columnID]; - if (!updateInfo.contains(srcOffsetInChunk) || updateInfo.at(srcOffsetInChunk).empty()) { - return; - } - auto& updateInfoForOffset = updateInfo.at(srcOffsetInChunk); +void LocalRelNG::applyCSRUpdates( + column_id_t columnID, ValueVector* relIDVector, ValueVector* outputVector) { + auto& updateChunk = updateChunks[columnID]; for (auto i = 0u; i < relIDVector->state->selVector->selectedSize; i++) { auto pos = relIDVector->state->selVector->selectedPositions[i]; auto relOffset = relIDVector->getValue(pos).offset; - if (updateInfoForOffset.contains(relOffset)) { - auto rowIdx = updateInfoForOffset.at(relOffset); - auto posInLocalVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - outputVector->copyFromVectorData( - pos, chunks[columnID]->getLocalVector(rowIdx)->getVector(), posInLocalVector); + if (updateChunk.hasOffset(relOffset)) { + updateChunk.read(relOffset, 0, outputVector, pos); } } } -void LocalRelNG::applyCSRDeletions( - offset_t srcOffsetInChunk, const delete_info_t& deleteInfo, ValueVector* relIDVector) { - auto& deleteInfoForOffset = deleteInfo.at(srcOffsetInChunk); +void LocalRelNG::applyCSRDeletions(offset_t srcOffset, ValueVector* relIDVector) { + if (deleteInfo.isEmpty(srcOffset)) { + return; + } auto selectPos = 0u; auto selVector = std::make_unique(DEFAULT_VECTOR_CAPACITY); selVector->resetSelectorToValuePosBuffer(); for (auto i = 0u; i < relIDVector->state->selVector->selectedSize; i++) { auto relIDPos = relIDVector->state->selVector->selectedPositions[i]; auto relOffset = relIDVector->getValue(relIDPos).offset; - if (deleteInfoForOffset.contains(relOffset)) { + if (deleteInfo.containsOffset(relOffset)) { continue; } selVector->selectedPositions[selectPos++] = relIDPos; @@ -195,88 +82,116 @@ void LocalRelNG::applyCSRDeletions( } } -bool LocalRelNG::insert(ValueVector* srcNodeIDVector, ValueVector* dstNodeIDVector, - const std::vector& propertyVectors) { - KU_ASSERT(propertyVectors.size() == chunks.size() && propertyVectors.size() >= 1); - auto adjNodeIDRowIdx = adjChunk->append(dstNodeIDVector); - std::vector propertyValuesRowIdx; - propertyValuesRowIdx.reserve(propertyVectors.size()); - for (auto i = 0u; i < propertyVectors.size(); ++i) { - propertyValuesRowIdx.push_back(chunks[i]->append(propertyVectors[i])); - } +// nodeIDVectors: srcNodeIDVector, dstNodeIDVector. +bool LocalRelNG::insert( + std::vector nodeIDVectors, std::vector propertyVectors) { + KU_ASSERT(nodeIDVectors.size() == 2); + auto srcNodeIDVector = nodeIDVectors[0]; + auto dstNodeIDVector = nodeIDVectors[1]; + KU_ASSERT(srcNodeIDVector->state->selVector->selectedSize == 1 && + dstNodeIDVector->state->selVector->selectedSize == 1); auto srcNodeIDPos = srcNodeIDVector->state->selVector->selectedPositions[0]; + auto dstNodeIDPos = dstNodeIDVector->state->selVector->selectedPositions[0]; + if (srcNodeIDVector->isNull(srcNodeIDPos) || dstNodeIDVector->isNull(dstNodeIDPos)) { + return false; + } auto srcNodeOffset = srcNodeIDVector->getValue(srcNodeIDPos).offset - nodeGroupStartOffset; KU_ASSERT(srcNodeOffset < StorageConstants::NODE_GROUP_SIZE); - auto relIDPos = propertyVectors[REL_ID_COLUMN_ID]->state->selVector->selectedPositions[0]; - auto relOffset = propertyVectors[REL_ID_COLUMN_ID]->getValue(relIDPos).offset; - return relNGInfo->insert(srcNodeOffset, relOffset, adjNodeIDRowIdx, propertyValuesRowIdx); + std::vector vectorsToInsert; + vectorsToInsert.push_back(dstNodeIDVector); + for (auto i = 0u; i < propertyVectors.size(); i++) { + vectorsToInsert.push_back(propertyVectors[i]); + } + auto relIDPos = vectorsToInsert[LOCAL_REL_ID_COLUMN_ID]->state->selVector->selectedPositions[0]; + auto relOffset = vectorsToInsert[LOCAL_REL_ID_COLUMN_ID]->getValue(relIDPos).offset; + insertChunks.append(srcNodeOffset, relOffset, vectorsToInsert); + return true; } -void LocalRelNG::update(ValueVector* srcNodeIDVector, ValueVector* relIDVector, - column_id_t columnID, ValueVector* propertyVector) { - KU_ASSERT(columnID < chunks.size()); - auto rowIdx = chunks[columnID]->append(propertyVector); +// IDVectors: srcNodeIDVector, relIDVector. +bool LocalRelNG::update( + std::vector IDVectors, column_id_t columnID, ValueVector* propertyVector) { + KU_ASSERT(IDVectors.size() == 2); + auto srcNodeIDVector = IDVectors[0]; + auto relIDVector = IDVectors[1]; + KU_ASSERT(srcNodeIDVector->state->selVector->selectedSize == 1 && + relIDVector->state->selVector->selectedSize == 1); auto srcNodeIDPos = srcNodeIDVector->state->selVector->selectedPositions[0]; + auto relIDPos = relIDVector->state->selVector->selectedPositions[0]; + if (srcNodeIDVector->isNull(srcNodeIDPos) || relIDVector->isNull(relIDPos)) { + return false; + } auto srcNodeOffset = srcNodeIDVector->getValue(srcNodeIDPos).offset - nodeGroupStartOffset; - KU_ASSERT(srcNodeOffset < StorageConstants::NODE_GROUP_SIZE); - auto relIDPos = relIDVector->state->selVector->selectedPositions[0]; + KU_ASSERT(srcNodeOffset < StorageConstants::NODE_GROUP_SIZE && columnID < updateChunks.size()); auto relOffset = relIDVector->getValue(relIDPos).offset; - relNGInfo->update(srcNodeOffset, relOffset, columnID, rowIdx); + // Check if the rel is newly inserted or in persistent storage. + if (insertChunks.hasOffset(relOffset)) { + insertChunks.update(relOffset, columnID, propertyVector); + } else { + updateChunks[columnID].append(srcNodeOffset, relOffset, {propertyVector}); + } + return true; } -bool LocalRelNG::delete_(ValueVector* srcNodeIDVector, ValueVector* relIDVector) { - auto srcNodeIDPos = srcNodeIDVector->state->selVector->selectedPositions[0]; - auto srcNodeOffset = - srcNodeIDVector->getValue(srcNodeIDPos).offset - nodeGroupStartOffset; - KU_ASSERT(srcNodeOffset < StorageConstants::NODE_GROUP_SIZE); +bool LocalRelNG::delete_(ValueVector* srcNodeVector, ValueVector* relIDVector) { + KU_ASSERT(srcNodeVector->state->selVector->selectedSize == 1 && + relIDVector->state->selVector->selectedSize == 1); + auto srcNodePos = srcNodeVector->state->selVector->selectedPositions[0]; auto relIDPos = relIDVector->state->selVector->selectedPositions[0]; + if (srcNodeVector->isNull(srcNodePos) || relIDVector->isNull(relIDPos)) { + return false; + } + auto srcNodeOffset = + srcNodeVector->getValue(srcNodePos).offset - nodeGroupStartOffset; auto relOffset = relIDVector->getValue(relIDPos).offset; - return relNGInfo->delete_(srcNodeOffset, relOffset); -} - -bool LocalRelTableData::insert(ValueVector* srcNodeIDVector, ValueVector* dstNodeIDVector, - const std::vector& propertyVectors) { - KU_ASSERT(srcNodeIDVector->state->selVector->selectedSize == 1 && - dstNodeIDVector->state->selVector->selectedSize == 1); - auto srcNodeIDPos = srcNodeIDVector->state->selVector->selectedPositions[0]; - auto dstNodeIDPos = dstNodeIDVector->state->selVector->selectedPositions[0]; - if (srcNodeIDVector->isNull(srcNodeIDPos) || dstNodeIDVector->isNull(dstNodeIDPos)) { + // If the rel is newly inserted, remove the rel from insertChunks. + if (insertChunks.hasOffset(relOffset)) { + insertChunks.remove(srcNodeOffset, relOffset); + return true; + } + // If the rel is updated, remove the rel from updateChunks if exists. + for (auto i = 0u; i < updateChunks.size(); i++) { + if (updateChunks[i].hasOffset(relOffset)) { + updateChunks[i].remove(srcNodeOffset, relOffset); + } + } + if (!deleteInfo.deleteOffset(relOffset)) { return false; } - auto localNodeGroup = - ku_dynamic_cast(getOrCreateLocalNodeGroup(srcNodeIDVector)); - return localNodeGroup->insert(srcNodeIDVector, dstNodeIDVector, propertyVectors); + deleteInfo.deleteRelAux(srcNodeOffset, relOffset); + return true; } -void LocalRelTableData::update(ValueVector* srcNodeIDVector, ValueVector* relIDVector, - column_id_t columnID, ValueVector* propertyVector) { - KU_ASSERT(srcNodeIDVector->state->selVector->selectedSize == 1 && - relIDVector->state->selVector->selectedSize == 1); - auto srcNodeIDPos = srcNodeIDVector->state->selVector->selectedPositions[0]; - auto relIDPos = relIDVector->state->selVector->selectedPositions[0]; - if (srcNodeIDVector->isNull(srcNodeIDPos) || relIDVector->isNull(relIDPos)) { - return; +offset_t LocalRelNG::getNumInsertedRels(offset_t srcOffset) const { + if (!insertChunks.hasRelOffsetsFromSrcOffset(srcOffset)) { + return 0; } - auto localNodeGroup = - ku_dynamic_cast(getOrCreateLocalNodeGroup(srcNodeIDVector)); - localNodeGroup->update(srcNodeIDVector, relIDVector, columnID, propertyVector); + return insertChunks.getNumRelsFromSrcOffset(srcOffset); } -bool LocalRelTableData::delete_( - ValueVector* srcNodeIDVector, ValueVector* dstNodeIDVector, ValueVector* relIDVector) { - KU_ASSERT(srcNodeIDVector->state->selVector->selectedSize == 1 && - dstNodeIDVector->state->selVector->selectedSize == 1 && - relIDVector->state->selVector->selectedSize == 1); - auto srcNodeIDPos = srcNodeIDVector->state->selVector->selectedPositions[0]; - auto dstNodeIDPos = dstNodeIDVector->state->selVector->selectedPositions[0]; - if (srcNodeIDVector->isNull(srcNodeIDPos) || dstNodeIDVector->isNull(dstNodeIDPos)) { - return false; +void LocalRelNG::getChangesPerCSRSegment( + std::vector& sizeChangesPerSegment, std::vector& hasChangesPerSegment) { + auto numSegments = StorageConstants::NODE_GROUP_SIZE / StorageConstants::CSR_SEGMENT_SIZE; + sizeChangesPerSegment.resize(numSegments, 0 /*initValue*/); + hasChangesPerSegment.resize(numSegments, false /*initValue*/); + for (auto& [srcOffset, insertions] : insertChunks.getSrcNodeOffsetToRelOffsets()) { + auto segmentIdx = getSegmentIdx(srcOffset); + sizeChangesPerSegment[segmentIdx] += insertions.size(); + hasChangesPerSegment[segmentIdx] = true; + } + for (auto& [srcOffset, deletions] : deleteInfo.getSrcNodeOffsetToRelOffsetVec()) { + auto segmentIdx = getSegmentIdx(srcOffset); + sizeChangesPerSegment[segmentIdx] -= deletions.size(); + hasChangesPerSegment[segmentIdx] = true; + } + for (auto& updateChunk : updateChunks) { + for (auto& [srcOffset, _] : updateChunk.getSrcNodeOffsetToRelOffsets()) { + auto segmentIdx = getSegmentIdx(srcOffset); + hasChangesPerSegment[segmentIdx] = true; + } } - auto localNodeGroup = - ku_dynamic_cast(getOrCreateLocalNodeGroup(srcNodeIDVector)); - return localNodeGroup->delete_(srcNodeIDVector, relIDVector); } LocalNodeGroup* LocalRelTableData::getOrCreateLocalNodeGroup(ValueVector* nodeIDVector) { diff --git a/src/storage/local_storage/local_table.cpp b/src/storage/local_storage/local_table.cpp index 1310e2904d7..78ba0d62673 100644 --- a/src/storage/local_storage/local_table.cpp +++ b/src/storage/local_storage/local_table.cpp @@ -9,67 +9,131 @@ using namespace kuzu::common; namespace kuzu { namespace storage { -void LocalVector::read( - sel_t offsetInLocalVector, ValueVector* resultVector, sel_t offsetInResultVector) { - resultVector->copyFromVectorData(offsetInResultVector, vector.get(), offsetInLocalVector); +LocalVectorCollection LocalVectorCollection::getStructChildVectorCollection( + struct_field_idx_t idx) const { + LocalVectorCollection childCollection; + for (auto vector : vectors) { + auto fieldVector = StructVector::getFieldVector(vector, idx).get(); + childCollection.vectors.push_back(fieldVector); + } + return childCollection; } -void LocalVector::append(ValueVector* valueVector) { - KU_ASSERT(valueVector->state->selVector->selectedSize == 1); - auto pos = valueVector->state->selVector->selectedPositions[0]; - vector->copyFromVectorData(numValues, valueVector, pos); - numValues++; +LocalNodeGroup::LocalNodeGroup( + offset_t nodeGroupStartOffset, std::vector dataTypes, MemoryManager* mm) + : nodeGroupStartOffset{nodeGroupStartOffset}, insertChunks{mm, LogicalType::copy(dataTypes)} { + updateChunks.reserve(dataTypes.size()); + for (auto i = 0u; i < dataTypes.size(); i++) { + std::vector chunkCollectionTypes; + chunkCollectionTypes.push_back(*dataTypes[i]->copy()); + LocalDataChunkCollection localDataChunkCollection(mm, std::move(chunkCollectionTypes)); + updateChunks.push_back(std::move(localDataChunkCollection)); + } } -void LocalVectorCollection::read( - row_idx_t rowIdx, ValueVector* outputVector, sel_t posInOutputVector) { - auto vectorIdx = rowIdx >> DEFAULT_VECTOR_CAPACITY_LOG_2; - auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - KU_ASSERT(vectorIdx < vectors.size()); - vectors[vectorIdx]->read(offsetInVector, outputVector, posInOutputVector); +bool LocalNodeGroup::hasUpdatesOrDeletions() const { + if (!deleteInfo.isEmpty()) { + return true; + } + for (auto& updateChunk : updateChunks) { + if (!updateChunk.isEmpty()) { + return true; + } + } + return false; } -row_idx_t LocalVectorCollection::append(ValueVector* vector) { - prepareAppend(); - auto lastVector = vectors.back().get(); - KU_ASSERT(!lastVector->isFull()); - lastVector->append(vector); - return numRows++; +void LocalDataChunkCollection::readValueAtRowIdx( + row_idx_t rowIdx, column_id_t columnID, ValueVector* outputVector, sel_t posInOutputVector) { + outputVector->copyFromVectorData(posInOutputVector, + dataChunkCollection.getChunkUnSafe(rowIdx >> DEFAULT_VECTOR_CAPACITY_LOG_2) + .getValueVector(columnID) + .get(), + rowIdx % DEFAULT_VECTOR_CAPACITY); } -void LocalVectorCollection::prepareAppend() { - if (vectors.empty()) { - vectors.emplace_back(std::make_unique(*dataType, mm)); - } - auto lastVector = vectors.back().get(); - if (lastVector->isFull()) { - vectors.emplace_back(std::make_unique(*dataType, mm)); +bool LocalDataChunkCollection::read( + offset_t offset, column_id_t columnID, ValueVector* outputVector, sel_t posInOutputVector) { + if (!offsetToRowIdx.contains(offset)) { + return false; } + auto rowIdx = offsetToRowIdx.at(offset); + readValueAtRowIdx(rowIdx, columnID, outputVector, posInOutputVector); + return true; } -std::unique_ptr LocalVectorCollection::getStructChildVectorCollection( - common::struct_field_idx_t idx) { - auto childCollection = std::make_unique( - StructType::getField(dataType.get(), idx)->getType()->copy(), mm); +void LocalDataChunkCollection::update( + offset_t offset, column_id_t columnID, ValueVector* propertyVector) { + KU_ASSERT(offsetToRowIdx.contains(offset)); + auto rowIdx = offsetToRowIdx.at(offset); + dataChunkCollection.getChunkUnSafe(rowIdx >> DEFAULT_VECTOR_CAPACITY_LOG_2) + .getValueVector(columnID) + ->copyFromVectorData(rowIdx % DEFAULT_VECTOR_CAPACITY, propertyVector, + propertyVector->state->selVector->selectedPositions[0]); +} - for (auto i = 0u; i < numRows; i++) { - auto fieldVector = - common::StructVector::getFieldVector(getLocalVector(i)->getVector(), idx); - fieldVector->state->selVector->selectedPositions[0] = i & (DEFAULT_VECTOR_CAPACITY - 1); - childCollection->append(fieldVector.get()); +void LocalDataChunkCollection::remove(offset_t srcNodeOffset, offset_t relOffset) { + KU_ASSERT(srcNodeOffsetToRelOffsets.contains(srcNodeOffset)); + remove(relOffset); + offsetToRowIdx.erase(relOffset); + auto& vec = srcNodeOffsetToRelOffsets.at(srcNodeOffset); + vec.erase(std::remove(vec.begin(), vec.end(), relOffset), vec.end()); + if (vec.empty()) { + srcNodeOffsetToRelOffsets.erase(srcNodeOffset); } - return childCollection; } -LocalNodeGroup::LocalNodeGroup( - offset_t nodeGroupStartOffset, std::vector dataTypes, MemoryManager* mm) - : nodeGroupStartOffset{nodeGroupStartOffset} { - chunks.resize(dataTypes.size()); - for (auto i = 0u; i < dataTypes.size(); ++i) { - // To avoid unnecessary memory consumption, we chunk local changes of each column in the - // node group into chunks of size DEFAULT_VECTOR_CAPACITY. - chunks[i] = std::make_unique(dataTypes[i]->copy(), mm); +row_idx_t LocalDataChunkCollection::appendToDataChunkCollection(std::vector vectors) { + KU_ASSERT(vectors.size() == dataTypes.size()); + if (dataChunkCollection.getNumChunks() == 0 || + dataChunkCollection.getChunkUnSafe(dataChunkCollection.getNumChunks() - 1) + .state->selVector->selectedSize == DEFAULT_VECTOR_CAPACITY) { + auto newDataChunk = createNewDataChunk(); + dataChunkCollection.merge(std::move(newDataChunk)); + } + auto& lastDataChunk = + dataChunkCollection.getChunkUnSafe(dataChunkCollection.getNumChunks() - 1); + for (auto i = 0u; i < vectors.size(); i++) { + auto localVector = lastDataChunk.getValueVector(i); + KU_ASSERT(vectors[i]->state->selVector->selectedSize == 1); + auto pos = vectors[i]->state->selVector->selectedPositions[0]; + localVector->copyFromVectorData( + lastDataChunk.state->selVector->selectedSize, vectors[i], pos); + } + lastDataChunk.state->selVector->selectedSize++; + KU_ASSERT((dataChunkCollection.getNumChunks() - 1) * DEFAULT_VECTOR_CAPACITY + + lastDataChunk.state->selVector->selectedSize == + numRows + 1); + return numRows++; +} + +common::DataChunk LocalDataChunkCollection::createNewDataChunk() { + DataChunk newDataChunk(dataTypes.size()); + for (auto i = 0u; i < dataTypes.size(); i++) { + auto valueVector = std::make_unique(dataTypes[i], mm); + newDataChunk.insert(i, std::move(valueVector)); } + newDataChunk.state->selVector->resetSelectorToValuePosBuffer(); + return newDataChunk; +} + +bool LocalTableData::insert( + std::vector nodeIDVectors, std::vector propertyVectors) { + KU_ASSERT(nodeIDVectors.size() >= 1); + auto localNodeGroup = getOrCreateLocalNodeGroup(nodeIDVectors[0]); + return localNodeGroup->insert(nodeIDVectors, propertyVectors); +} + +bool LocalTableData::update( + std::vector nodeIDVectors, column_id_t columnID, ValueVector* propertyVector) { + KU_ASSERT(nodeIDVectors.size() >= 1); + auto localNodeGroup = getOrCreateLocalNodeGroup(nodeIDVectors[0]); + return localNodeGroup->update(nodeIDVectors, columnID, propertyVector); +} + +bool LocalTableData::delete_(ValueVector* nodeIDVector, ValueVector* extraVector) { + auto localNodeGroup = getOrCreateLocalNodeGroup(nodeIDVector); + return localNodeGroup->delete_(nodeIDVector, extraVector); } LocalTableData* LocalTable::getOrCreateLocalTableData( diff --git a/src/storage/stats/property_statistics.cpp b/src/storage/stats/property_statistics.cpp index 23c59ed0da3..fb808b30a16 100644 --- a/src/storage/stats/property_statistics.cpp +++ b/src/storage/stats/property_statistics.cpp @@ -27,7 +27,7 @@ std::unique_ptr PropertyStatistics::deserialize( bool RWPropertyStats::mayHaveNull(const transaction::Transaction& transaction) { // Columns internal to the storage, i.e., not mapping to a property in table schema, are not // tracked in statistics. For example, offset of var list column, csr offset column, etc. - // TODO(Guodong): INVALID_PROPERTY_ID is used here because we have a column, i.e., adjColumn, + // TODO(Guodong): INVALID_PROPERTY_ID is used here because we have a column, i.e., nbrIDColumn, // not exposed as property in table schema, but still have nullColumn. Should be fixed once we // properly align properties and chunks. if (propertyID == common::INVALID_PROPERTY_ID) { @@ -40,7 +40,7 @@ bool RWPropertyStats::mayHaveNull(const transaction::Transaction& transaction) { } void RWPropertyStats::setHasNull(const transaction::Transaction& transaction) { - // TODO(Guodong): INVALID_PROPERTY_ID is used here because we have a column, i.e., adjColumn, + // TODO(Guodong): INVALID_PROPERTY_ID is used here because we have a column, i.e., nbrIDColumn, // not exposed as property in table schema, but still have nullColumn. Should be fixed once we // properly align properties and chunks. if (propertyID != common::INVALID_PROPERTY_ID) { diff --git a/src/storage/stats/rel_table_statistics.cpp b/src/storage/stats/rel_table_statistics.cpp index bc85da64365..8bd6e054127 100644 --- a/src/storage/stats/rel_table_statistics.cpp +++ b/src/storage/stats/rel_table_statistics.cpp @@ -22,18 +22,17 @@ RelTableStats::RelTableStats(BMFileHandle* metadataFH, const catalog::TableCatal LogicalType{LogicalTypeID::INT64}, *metadataFH, bufferManager, wal); bwdCSRLengthMetadataDAHInfo = TablesStatistics::createMetadataDAHInfo( LogicalType{LogicalTypeID::INT64}, *metadataFH, bufferManager, wal); - fwdAdjMetadataDAHInfo = TablesStatistics::createMetadataDAHInfo( - LogicalType{LogicalTypeID::INTERNAL_ID}, *metadataFH, bufferManager, wal); - bwdAdjMetadataDAHInfo = TablesStatistics::createMetadataDAHInfo( - LogicalType{LogicalTypeID::INTERNAL_ID}, *metadataFH, bufferManager, wal); - fwdPropertyMetadataDAHInfos.clear(); - bwdPropertyMetadataDAHInfos.clear(); - fwdPropertyMetadataDAHInfos.reserve(tableEntry.getNumProperties()); - bwdPropertyMetadataDAHInfos.reserve(tableEntry.getNumProperties()); + KU_ASSERT(fwdMetadataDAHInfos.empty() && bwdMetadataDAHInfos.empty()); + fwdMetadataDAHInfos.reserve(tableEntry.getNumProperties() + 1); + bwdMetadataDAHInfos.reserve(tableEntry.getNumProperties() + 1); + fwdMetadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( + LogicalType{LogicalTypeID::INTERNAL_ID}, *metadataFH, bufferManager, wal)); + bwdMetadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( + LogicalType{LogicalTypeID::INTERNAL_ID}, *metadataFH, bufferManager, wal)); for (auto& property : tableEntry.getPropertiesRef()) { - fwdPropertyMetadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( + fwdMetadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( *property.getDataType(), *metadataFH, bufferManager, wal)); - bwdPropertyMetadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( + bwdMetadataDAHInfos.push_back(TablesStatistics::createMetadataDAHInfo( *property.getDataType(), *metadataFH, bufferManager, wal)); } } @@ -48,17 +47,15 @@ RelTableStats::RelTableStats(const RelTableStats& other) : TableStatistics{other bwdCSROffsetMetadataDAHInfo = other.bwdCSROffsetMetadataDAHInfo->copy(); bwdCSRLengthMetadataDAHInfo = other.bwdCSRLengthMetadataDAHInfo->copy(); } - fwdAdjMetadataDAHInfo = other.fwdAdjMetadataDAHInfo->copy(); - bwdAdjMetadataDAHInfo = other.bwdAdjMetadataDAHInfo->copy(); - fwdPropertyMetadataDAHInfos.clear(); - fwdPropertyMetadataDAHInfos.reserve(other.fwdPropertyMetadataDAHInfos.size()); - for (auto& metadataDAHInfo : other.fwdPropertyMetadataDAHInfos) { - fwdPropertyMetadataDAHInfos.push_back(metadataDAHInfo->copy()); + fwdMetadataDAHInfos.clear(); + fwdMetadataDAHInfos.reserve(other.fwdMetadataDAHInfos.size()); + for (auto& metadataDAHInfo : other.fwdMetadataDAHInfos) { + fwdMetadataDAHInfos.push_back(metadataDAHInfo->copy()); } - bwdPropertyMetadataDAHInfos.clear(); - bwdPropertyMetadataDAHInfos.reserve(other.bwdPropertyMetadataDAHInfos.size()); - for (auto& metadataDAHInfo : other.bwdPropertyMetadataDAHInfos) { - bwdPropertyMetadataDAHInfos.push_back(metadataDAHInfo->copy()); + bwdMetadataDAHInfos.clear(); + bwdMetadataDAHInfos.reserve(other.bwdMetadataDAHInfos.size()); + for (auto& metadataDAHInfo : other.bwdMetadataDAHInfos) { + bwdMetadataDAHInfos.push_back(metadataDAHInfo->copy()); } } @@ -68,10 +65,8 @@ void RelTableStats::serializeInternal(Serializer& serializer) { serializer.serializeOptionalValue(bwdCSROffsetMetadataDAHInfo); serializer.serializeOptionalValue(fwdCSRLengthMetadataDAHInfo); serializer.serializeOptionalValue(bwdCSRLengthMetadataDAHInfo); - fwdAdjMetadataDAHInfo->serialize(serializer); - bwdAdjMetadataDAHInfo->serialize(serializer); - serializer.serializeVectorOfPtrs(fwdPropertyMetadataDAHInfos); - serializer.serializeVectorOfPtrs(bwdPropertyMetadataDAHInfos); + serializer.serializeVectorOfPtrs(fwdMetadataDAHInfos); + serializer.serializeVectorOfPtrs(bwdMetadataDAHInfos); } std::unique_ptr RelTableStats::deserialize( @@ -84,8 +79,6 @@ std::unique_ptr RelTableStats::deserialize( deserializer.deserializeOptionalValue(bwdCSROffsetMetadataDAHInfo); deserializer.deserializeOptionalValue(fwdCSRLengthMetadataDAHInfo); deserializer.deserializeOptionalValue(bwdCSRLengthMetadataDAHInfo); - auto fwdNbrIDMetadataDAHInfo = MetadataDAHInfo::deserialize(deserializer); - auto bwdNbrIDMetadataDAHInfo = MetadataDAHInfo::deserialize(deserializer); std::vector> fwdPropertyMetadataDAHInfos; std::vector> bwdPropertyMetadataDAHInfos; deserializer.deserializeVectorOfPtrs(fwdPropertyMetadataDAHInfos); @@ -95,10 +88,8 @@ std::unique_ptr RelTableStats::deserialize( result->bwdCSROffsetMetadataDAHInfo = std::move(bwdCSROffsetMetadataDAHInfo); result->fwdCSRLengthMetadataDAHInfo = std::move(fwdCSRLengthMetadataDAHInfo); result->bwdCSRLengthMetadataDAHInfo = std::move(bwdCSRLengthMetadataDAHInfo); - result->fwdAdjMetadataDAHInfo = std::move(fwdNbrIDMetadataDAHInfo); - result->bwdAdjMetadataDAHInfo = std::move(bwdNbrIDMetadataDAHInfo); - result->fwdPropertyMetadataDAHInfos = std::move(fwdPropertyMetadataDAHInfos); - result->bwdPropertyMetadataDAHInfos = std::move(bwdPropertyMetadataDAHInfos); + result->fwdMetadataDAHInfos = std::move(fwdPropertyMetadataDAHInfos); + result->bwdMetadataDAHInfos = std::move(bwdPropertyMetadataDAHInfos); return result; } diff --git a/src/storage/stats/rels_store_statistics.cpp b/src/storage/stats/rels_store_statistics.cpp index 26d17385593..660bae4bded 100644 --- a/src/storage/stats/rels_store_statistics.cpp +++ b/src/storage/stats/rels_store_statistics.cpp @@ -93,22 +93,13 @@ MetadataDAHInfo* RelsStoreStats::getCSRLengthMetadataDAHInfo( return tableStats->getCSRLengthMetadataDAHInfo(direction); } -MetadataDAHInfo* RelsStoreStats::getAdjMetadataDAHInfo( - Transaction* transaction, table_id_t tableID, RelDataDirection direction) { - if (transaction->isWriteTransaction()) { - initTableStatisticsForWriteTrx(); - } - auto tableStats = getRelStatistics(tableID, transaction); - return tableStats->getAdjMetadataDAHInfo(direction); -} - -MetadataDAHInfo* RelsStoreStats::getPropertyMetadataDAHInfo(transaction::Transaction* transaction, - table_id_t tableID, column_id_t columnID, RelDataDirection direction) { +MetadataDAHInfo* RelsStoreStats::getColumnMetadataDAHInfo(transaction::Transaction* transaction, + common::table_id_t tableID, common::column_id_t columnID, common::RelDataDirection direction) { if (transaction->isWriteTransaction()) { initTableStatisticsForWriteTrx(); } auto relTableStats = getRelStatistics(tableID, transaction); - return relTableStats->getPropertyMetadataDAHInfo(columnID, direction); + return relTableStats->getColumnMetadataDAHInfo(columnID, direction); } } // namespace storage diff --git a/src/storage/storage_manager.cpp b/src/storage/storage_manager.cpp index 8cf5f99e047..f70ba829d79 100644 --- a/src/storage/storage_manager.cpp +++ b/src/storage/storage_manager.cpp @@ -37,8 +37,10 @@ static void setCommonTableIDToRdfRelTable( for (auto rdfEntry : rdfEntries) { if (rdfEntry->isParent(relTable->getTableID())) { std::vector columns; - columns.push_back(relTable->getDirectedTableData(RelDataDirection::FWD)->getColumn(1)); - columns.push_back(relTable->getDirectedTableData(RelDataDirection::BWD)->getColumn(1)); + // TODO(Guodong): This is a hack. We should not use constant 2 and should move the + // setting logic inside RelTableData. + columns.push_back(relTable->getDirectedTableData(RelDataDirection::FWD)->getColumn(2)); + columns.push_back(relTable->getDirectedTableData(RelDataDirection::BWD)->getColumn(2)); for (auto& column : columns) { ku_dynamic_cast(column) ->setCommonTableID(rdfEntry->getResourceTableID()); diff --git a/src/storage/storage_utils.cpp b/src/storage/storage_utils.cpp index b29197608ba..d346c0f54cf 100644 --- a/src/storage/storage_utils.cpp +++ b/src/storage/storage_utils.cpp @@ -37,9 +37,6 @@ std::string StorageUtils::getColumnName( case StorageUtils::ColumnType::CSR_LENGTH: { return stringFormat("{}_csr_length", prefix); } - case StorageUtils::ColumnType::ADJ: { - return stringFormat("{}_adj", prefix); - } case StorageUtils::ColumnType::STRUCT_CHILD: { return stringFormat("{}_{}_child", propertyName, prefix); } diff --git a/src/storage/store/column.cpp b/src/storage/store/column.cpp index e9e78bfa7f6..36be1b987c1 100644 --- a/src/storage/store/column.cpp +++ b/src/storage/store/column.cpp @@ -122,8 +122,8 @@ class SerialColumn final : public Column { } } - bool canCommitInPlace(Transaction* /*transaction*/, node_group_idx_t /*nodeGroupIdx*/, - LocalVectorCollection* /*localChunk*/, const offset_to_row_idx_t& /*insertInfo*/, + bool canCommitInPlace(Transaction*, node_group_idx_t, const LocalVectorCollection&, + const offset_to_row_idx_t&, const LocalVectorCollection&, const offset_to_row_idx_t& updateInfo) override { KU_ASSERT(updateInfo.empty()); return true; @@ -139,8 +139,9 @@ class SerialColumn final : public Column { } void commitLocalChunkInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* /*localChunk*/, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo) override { + const LocalVectorCollection& /*localChunk*/, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection&, const offset_to_row_idx_t& updateInfo, + const offset_set_t& deleteInfo) override { KU_ASSERT(updateInfo.empty() && deleteInfo.empty()); auto chunkMeta = metadataDA->get(nodeGroupIdx, transaction->getType()); auto numValues = chunkMeta.numValues; @@ -156,8 +157,8 @@ class SerialColumn final : public Column { } void commitLocalChunkOutOfPlace(Transaction* /*transaction*/, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* /*localChunk*/, bool isNewNodeGroup, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo, + bool isNewNodeGroup, const LocalVectorCollection&, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection&, const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo) override { KU_ASSERT(isNewNodeGroup && updateInfo.empty() && deleteInfo.empty()); // Only when a new node group is created, we need to commit out of place. @@ -200,7 +201,7 @@ InternalIDColumn::InternalIDColumn(std::string name, const MetadataDAHInfo& meta BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal, transaction::Transaction* transaction, RWPropertyStats stats) : Column{name, *LogicalType::INTERNAL_ID(), metaDAHeaderInfo, dataFH, metadataFH, bufferManager, - wal, transaction, stats, false /* enableCompression */}, + wal, transaction, stats, false /*enableCompression*/}, commonTableID{INVALID_TABLE_ID} {} void InternalIDColumn::populateCommonTableID(ValueVector* resultVector) const { @@ -466,7 +467,7 @@ static bool sanityCheckForWrites(const ColumnChunkMetadata& metadata, const Logi } void Column::append(ColumnChunk* columnChunk, uint64_t nodeGroupIdx) { - KU_ASSERT(enableCompression == columnChunk->isCompressionEnabled()); + // KU_ASSERT(enableCompression == columnChunk->isCompressionEnabled()); // Main column chunk. auto preScanMetadata = columnChunk->getMetadataToFlush(); auto startPageIdx = dataFH->addNewPages(preScanMetadata.numPages); @@ -573,41 +574,43 @@ Column::ReadState Column::getReadState( } void Column::prepareCommitForChunk(Transaction* transaction, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localColumnChunk, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo) { + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo, + const offset_set_t& deleteInfo) { auto currentNumNodeGroups = metadataDA->getNumElements(transaction->getType()); auto isNewNodeGroup = nodeGroupIdx >= currentNumNodeGroups; if (isNewNodeGroup) { // If this is a new node group, updateInfo should be empty. We should perform out-of-place // commit with a new column chunk. - commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, localColumnChunk, isNewNodeGroup, - insertInfo, updateInfo, deleteInfo); + commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, isNewNodeGroup, localInsertChunk, + insertInfo, localUpdateChunk, updateInfo, deleteInfo); } else { bool didInPlaceCommit = false; // If this is not a new node group, we should first check if we can perform in-place commit. - if (canCommitInPlace(transaction, nodeGroupIdx, localColumnChunk, insertInfo, updateInfo)) { - commitLocalChunkInPlace( - transaction, nodeGroupIdx, localColumnChunk, insertInfo, updateInfo, deleteInfo); + if (canCommitInPlace(transaction, nodeGroupIdx, localInsertChunk, insertInfo, + localUpdateChunk, updateInfo)) { + commitLocalChunkInPlace(transaction, nodeGroupIdx, localInsertChunk, insertInfo, + localUpdateChunk, updateInfo, deleteInfo); didInPlaceCommit = true; } else { - commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, localColumnChunk, isNewNodeGroup, - insertInfo, updateInfo, deleteInfo); + commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, isNewNodeGroup, localInsertChunk, + insertInfo, localUpdateChunk, updateInfo, deleteInfo); } // TODO(Guodong/Ben): The logic here on NullColumn is confusing as out-of-place commits and // in-place commits handle it differently. See if we can unify them. if (nullColumn) { // Uses functions written for the null chunk which only access the localColumnChunk's // null information - if (nullColumn->canCommitInPlace( - transaction, nodeGroupIdx, localColumnChunk, insertInfo, updateInfo)) { - nullColumn->commitLocalChunkInPlace(transaction, nodeGroupIdx, localColumnChunk, - insertInfo, updateInfo, deleteInfo); + if (nullColumn->canCommitInPlace(transaction, nodeGroupIdx, localInsertChunk, + insertInfo, localUpdateChunk, updateInfo)) { + nullColumn->commitLocalChunkInPlace(transaction, nodeGroupIdx, localInsertChunk, + insertInfo, localUpdateChunk, updateInfo, deleteInfo); } else if (didInPlaceCommit) { // Out-of-place commits also commit the null chunk out of place, // so we only need to do a separate out of place commit for the null chunk if the // main chunk did an in-place commit. - nullColumn->commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, localColumnChunk, - isNewNodeGroup, insertInfo, updateInfo, deleteInfo); + nullColumn->commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, isNewNodeGroup, + localInsertChunk, insertInfo, localUpdateChunk, updateInfo, deleteInfo); } } } @@ -667,24 +670,20 @@ bool Column::isMaxOffsetOutOfPagesCapacity( } bool Column::checkUpdateInPlace(const ColumnChunkMetadata& metadata, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo) { + const LocalVectorCollection& localChunk, const offset_to_row_idx_t& writeInfo) { std::vector rowIdxesToRead; - for (auto& [_, rowIdx] : updateInfo) { - rowIdxesToRead.push_back(rowIdx); - } - for (auto& [_, rowIdx] : insertInfo) { + for (auto& [_, rowIdx] : writeInfo) { rowIdxesToRead.push_back(rowIdx); } std::sort(rowIdxesToRead.begin(), rowIdxesToRead.end()); for (auto rowIdx : rowIdxesToRead) { - auto localVector = localChunk->getLocalVector(rowIdx); + auto localVector = localChunk.getLocalVector(rowIdx); auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - if (localVector->getVector()->isNull(offsetInVector)) { + if (localVector->isNull(offsetInVector)) { continue; } if (!metadata.compMeta.canUpdateInPlace( - localVector->getVector()->getData(), offsetInVector, dataType.getPhysicalType())) { + localVector->getData(), offsetInVector, dataType.getPhysicalType())) { return false; } } @@ -692,8 +691,8 @@ bool Column::checkUpdateInPlace(const ColumnChunkMetadata& metadata, } bool Column::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo) { + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo) { auto metadata = getMetadata(nodeGroupIdx, transaction->getType()); if (isInsertionsOutOfPagesCapacity(metadata, insertInfo)) { return false; @@ -701,7 +700,8 @@ bool Column::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGro if (metadata.compMeta.canAlwaysUpdateInPlace()) { return true; } - return checkUpdateInPlace(metadata, localChunk, insertInfo, updateInfo); + return checkUpdateInPlace(metadata, localInsertChunk, insertInfo) && + checkUpdateInPlace(metadata, localUpdateChunk, updateInfo); } bool Column::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, @@ -725,10 +725,11 @@ bool Column::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGro } void Column::commitLocalChunkInPlace(Transaction* /*transaction*/, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo, const offset_set_t& /*deleteInfo*/) { - applyLocalChunkToColumn(nodeGroupIdx, localChunk, updateInfo); - applyLocalChunkToColumn(nodeGroupIdx, localChunk, insertInfo); + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo, + const offset_set_t& /*deleteInfo*/) { + applyLocalChunkToColumn(nodeGroupIdx, localUpdateChunk, updateInfo); + applyLocalChunkToColumn(nodeGroupIdx, localInsertChunk, insertInfo); } std::unique_ptr Column::getEmptyChunkForCommit(uint64_t capacity) { @@ -736,20 +737,21 @@ std::unique_ptr Column::getEmptyChunkForCommit(uint64_t capacity) { } void Column::commitLocalChunkOutOfPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, bool isNewNodeGroup, const offset_to_row_idx_t& insertInfo, + bool isNewNodeGroup, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo) { auto columnChunk = getEmptyChunkForCommit(common::StorageConstants::NODE_GROUP_SIZE); if (isNewNodeGroup) { KU_ASSERT(updateInfo.empty() && deleteInfo.empty()); // Apply inserts from the local chunk. - applyLocalChunkToColumnChunk(localChunk, columnChunk.get(), insertInfo); + applyLocalChunkToColumnChunk(localInsertChunk, columnChunk.get(), insertInfo); } else { // First, scan the whole column chunk from persistent storage. scan(transaction, nodeGroupIdx, columnChunk.get()); // Then, apply updates from the local chunk. - applyLocalChunkToColumnChunk(localChunk, columnChunk.get(), updateInfo); + applyLocalChunkToColumnChunk(localUpdateChunk, columnChunk.get(), updateInfo); // Lastly, apply inserts from the local chunk. - applyLocalChunkToColumnChunk(localChunk, columnChunk.get(), insertInfo); + applyLocalChunkToColumnChunk(localInsertChunk, columnChunk.get(), insertInfo); if (columnChunk->getNullChunk()) { // Set nulls based on deleteInfo. for (auto offsetInChunk : deleteInfo) { @@ -794,23 +796,23 @@ void Column::commitColumnChunkOutOfPlace(Transaction* transaction, node_group_id } } -void Column::applyLocalChunkToColumnChunk(LocalVectorCollection* localChunk, - ColumnChunk* columnChunk, const std::map& updateInfo) { +void Column::applyLocalChunkToColumnChunk(const LocalVectorCollection& localChunk, + ColumnChunk* columnChunk, const offset_to_row_idx_t& updateInfo) { for (auto& [offsetInChunk, rowIdx] : updateInfo) { - auto localVector = localChunk->getLocalVector(rowIdx); + auto localVector = localChunk.getLocalVector(rowIdx); auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - localVector->getVector()->state->selVector->selectedPositions[0] = offsetInVector; - columnChunk->write(localVector->getVector(), offsetInVector, offsetInChunk); + localVector->state->selVector->selectedPositions[0] = offsetInVector; + columnChunk->write(localVector, offsetInVector, offsetInChunk); } } void Column::applyLocalChunkToColumn(node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& updateInfo) { + const LocalVectorCollection& localChunk, const offset_to_row_idx_t& updateInfo) { for (auto& [offsetInChunk, rowIdx] : updateInfo) { - auto localVector = localChunk->getLocalVector(rowIdx); + auto localVector = localChunk.getLocalVector(rowIdx); auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - if (!localVector->getVector()->isNull(offsetInVector)) { - write(nodeGroupIdx, offsetInChunk, localVector->getVector(), offsetInVector); + if (!localVector->isNull(offsetInVector)) { + write(nodeGroupIdx, offsetInChunk, localVector, offsetInVector); } } } diff --git a/src/storage/store/node_group.cpp b/src/storage/store/node_group.cpp index d50c64a0d53..6bd4eb50885 100644 --- a/src/storage/store/node_group.cpp +++ b/src/storage/store/node_group.cpp @@ -125,16 +125,16 @@ offset_t NodeGroup::append(NodeGroup* other, offset_t offsetInOtherNodeGroup) { return numNodesToAppend; } -void NodeGroup::write(DataChunk* dataChunk, vector_idx_t offsetVectorIdx) { - KU_ASSERT(dataChunk->getNumValueVectors() == chunks.size() + 1); - auto offsetVector = dataChunk->getValueVector(offsetVectorIdx).get(); +void NodeGroup::write(DataChunk& dataChunk, vector_idx_t offsetVectorIdx) { + KU_ASSERT(dataChunk.getNumValueVectors() == chunks.size() + 1); + auto offsetVector = dataChunk.getValueVector(offsetVectorIdx).get(); vector_idx_t vectorIdx = 0, chunkIdx = 0; - for (auto i = 0u; i < dataChunk->getNumValueVectors(); i++) { + for (auto i = 0u; i < dataChunk.getNumValueVectors(); i++) { if (i == offsetVectorIdx) { vectorIdx++; continue; } - KU_ASSERT(vectorIdx < dataChunk->getNumValueVectors()); + KU_ASSERT(vectorIdx < dataChunk.getNumValueVectors()); writeToColumnChunk(chunkIdx, vectorIdx, dataChunk, offsetVector); chunkIdx++; vectorIdx++; diff --git a/src/storage/store/node_table_data.cpp b/src/storage/store/node_table_data.cpp index 575a0b2cc64..5fc80c5b0e4 100644 --- a/src/storage/store/node_table_data.cpp +++ b/src/storage/store/node_table_data.cpp @@ -45,9 +45,9 @@ void NodeTableData::scan(Transaction* transaction, TableReadState& readState, if (transaction->isWriteTransaction()) { auto localTableData = transaction->getLocalStorage()->getLocalTableData(tableID); if (localTableData) { - auto localRelTableData = + auto localNodeTableData = ku_dynamic_cast(localTableData); - localRelTableData->scan(nodeIDVector, readState.columnIDs, outputVectors); + localNodeTableData->scan(nodeIDVector, readState.columnIDs, outputVectors); } } } @@ -56,22 +56,22 @@ void NodeTableData::insert(Transaction* transaction, ValueVector* nodeIDVector, const std::vector& propertyVectors) { // We assume that offsets are given in the ascending order, thus lastOffset is the max one. KU_ASSERT(nodeIDVector->state->selVector->selectedSize == 1); - auto localTableData = ku_dynamic_cast( - transaction->getLocalStorage()->getOrCreateLocalTableData(tableID, columns)); - localTableData->insert(nodeIDVector, propertyVectors); + auto localTableData = + transaction->getLocalStorage()->getOrCreateLocalTableData(tableID, columns); + localTableData->insert({nodeIDVector}, propertyVectors); } void NodeTableData::update(Transaction* transaction, column_id_t columnID, ValueVector* nodeIDVector, ValueVector* propertyVector) { KU_ASSERT(columnID < columns.size()); - auto localTableData = ku_dynamic_cast( - transaction->getLocalStorage()->getOrCreateLocalTableData(tableID, columns)); - localTableData->update(nodeIDVector, columnID, propertyVector); + auto localTableData = + transaction->getLocalStorage()->getOrCreateLocalTableData(tableID, columns); + localTableData->update({nodeIDVector}, columnID, propertyVector); } void NodeTableData::delete_(Transaction* transaction, ValueVector* nodeIDVector) { - auto localTableData = ku_dynamic_cast( - transaction->getLocalStorage()->getOrCreateLocalTableData(tableID, columns)); + auto localTableData = + transaction->getLocalStorage()->getOrCreateLocalTableData(tableID, columns); localTableData->delete_(nodeIDVector); } @@ -97,7 +97,7 @@ void NodeTableData::lookup(Transaction* transaction, TableReadState& readState, } } -void NodeTableData::append(kuzu::storage::NodeGroup* nodeGroup) { +void NodeTableData::append(NodeGroup* nodeGroup) { for (auto columnID = 0u; columnID < columns.size(); columnID++) { auto columnChunk = nodeGroup->getColumnChunk(columnID); KU_ASSERT(columnID < columns.size()); @@ -107,17 +107,18 @@ void NodeTableData::append(kuzu::storage::NodeGroup* nodeGroup) { void NodeTableData::prepareLocalTableToCommit( Transaction* transaction, LocalTableData* localTable) { - for (auto& [nodeGroupIdx, nodeGroup] : localTable->nodeGroups) { + for (auto& [nodeGroupIdx, localNodeGroup] : localTable->nodeGroups) { for (auto columnID = 0u; columnID < columns.size(); columnID++) { auto column = columns[columnID].get(); - auto columnChunk = nodeGroup->getLocalColumnChunk(columnID); - if (columnChunk->getNumRows() == 0) { + auto localInsertChunk = localNodeGroup->getInsesrtChunks().getLocalChunk(columnID); + auto localUpdateChunk = localNodeGroup->getUpdateChunks(columnID).getLocalChunk(0); + if (localInsertChunk.isEmpty() && localUpdateChunk.isEmpty()) { continue; } - auto localNodeGroup = ku_dynamic_cast(nodeGroup.get()); - column->prepareCommitForChunk(transaction, nodeGroupIdx, columnChunk, - localNodeGroup->getInsertInfoRef(columnID), - localNodeGroup->getUpdateInfoRef(columnID), {} /* deleteInfo */); + auto localNodeNG = ku_dynamic_cast(localNodeGroup.get()); + column->prepareCommitForChunk(transaction, nodeGroupIdx, localInsertChunk, + localNodeNG->getInsertInfoRef(), localUpdateChunk, + localNodeNG->getUpdateInfoRef(columnID), {} /* deleteInfo */); } } } diff --git a/src/storage/store/null_column.cpp b/src/storage/store/null_column.cpp index 3dbd9761cc2..06e699d14dd 100644 --- a/src/storage/store/null_column.cpp +++ b/src/storage/store/null_column.cpp @@ -164,24 +164,27 @@ void NullColumn::write(node_group_idx_t nodeGroupIdx, offset_t offsetInChunk, } bool NullColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo) { + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo) { auto metadata = getMetadata(nodeGroupIdx, transaction->getType()); if (metadata.compMeta.canAlwaysUpdateInPlace()) { return true; } + return checkUpdateInPlace(metadata, localInsertChunk, insertInfo) && + checkUpdateInPlace(metadata, localUpdateChunk, updateInfo); +} + +bool NullColumn::checkUpdateInPlace(const ColumnChunkMetadata& metadata, + const LocalVectorCollection& localChunk, const offset_to_row_idx_t& writeInfo) { std::vector rowIdxesToRead; - for (auto& [_, rowIdx] : updateInfo) { - rowIdxesToRead.push_back(rowIdx); - } - for (auto& [_, rowIdx] : insertInfo) { + for (auto& [_, rowIdx] : writeInfo) { rowIdxesToRead.push_back(rowIdx); } std::sort(rowIdxesToRead.begin(), rowIdxesToRead.end()); for (auto rowIdx : rowIdxesToRead) { - auto localVector = localChunk->getLocalVector(rowIdx); + auto localVector = localChunk.getLocalVector(rowIdx); auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - bool value = localVector->getVector()->isNull(offsetInVector); + bool value = localVector->isNull(offsetInVector); if (!metadata.compMeta.canUpdateInPlace( reinterpret_cast(&value), 0, dataType.getPhysicalType())) { return false; @@ -217,18 +220,18 @@ bool NullColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t nod } void NullColumn::commitLocalChunkInPlace(Transaction* /*transaction*/, - node_group_idx_t nodeGroupIdx, LocalVectorCollection* localChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo, - const offset_set_t& deleteInfo) { + node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, + const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo) { for (auto& [offsetInChunk, rowIdx] : updateInfo) { - auto localVector = localChunk->getLocalVector(rowIdx); + auto localVector = localUpdateChunk.getLocalVector(rowIdx); auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - write(nodeGroupIdx, offsetInChunk, localVector->getVector(), offsetInVector); + write(nodeGroupIdx, offsetInChunk, localVector, offsetInVector); } for (auto& [offsetInChunk, rowIdx] : insertInfo) { - auto localVector = localChunk->getLocalVector(rowIdx); + auto localVector = localInsertChunk.getLocalVector(rowIdx); auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - write(nodeGroupIdx, offsetInChunk, localVector->getVector(), offsetInVector); + write(nodeGroupIdx, offsetInChunk, localVector, offsetInVector); } // Set nulls based on deleteInfo. Note that this code path actually only gets executed when // the column is a regular format one. This is not a good design, should be unified with csr diff --git a/src/storage/store/rel_table.cpp b/src/storage/store/rel_table.cpp index def92c61c50..48c1be3023c 100644 --- a/src/storage/store/rel_table.cpp +++ b/src/storage/store/rel_table.cpp @@ -53,10 +53,8 @@ void RelTable::update(transaction::Transaction* transaction, column_id_t columnI void RelTable::delete_(Transaction* transaction, ValueVector* srcNodeIDVector, ValueVector* dstNodeIDVector, ValueVector* relIDVector) { - auto fwdDeleted = - fwdRelTableData->delete_(transaction, srcNodeIDVector, dstNodeIDVector, relIDVector); - auto bwdDeleted = - bwdRelTableData->delete_(transaction, dstNodeIDVector, srcNodeIDVector, relIDVector); + auto fwdDeleted = fwdRelTableData->delete_(transaction, srcNodeIDVector, relIDVector); + auto bwdDeleted = bwdRelTableData->delete_(transaction, dstNodeIDVector, relIDVector); KU_ASSERT(fwdDeleted == bwdDeleted); if (fwdDeleted && bwdDeleted) { auto relsStats = ku_dynamic_cast(tablesStatistics); @@ -72,7 +70,8 @@ void RelTable::detachDelete(Transaction* transaction, RelDataDirection direction auto reverseTableData = direction == RelDataDirection::FWD ? bwdRelTableData.get() : fwdRelTableData.get(); auto relDataReadState = std::make_unique(); - initializeReadState(transaction, direction, {0}, srcNodeIDVector, relDataReadState.get()); + initializeReadState( + transaction, direction, {REL_ID_COLUMN_ID}, srcNodeIDVector, relDataReadState.get()); row_idx_t numRelsDeleted = detachDeleteForCSRRels(transaction, tableData, reverseTableData, srcNodeIDVector, relDataReadState.get(), deleteState); auto relsStats = ku_dynamic_cast(tablesStatistics); @@ -106,11 +105,10 @@ row_idx_t RelTable::detachDeleteForCSRRels(Transaction* transaction, RelTableDat tempState->selVector->resetSelectorToValuePosBufferWithSize(1); for (auto i = 0u; i < numRelsScanned; i++) { tempState->selVector->selectedPositions[0] = i; - auto deleted = tableData->delete_(transaction, srcNodeIDVector, - deleteState->dstNodeIDVector.get(), deleteState->relIDVector.get()); - auto reverseDeleted = - reverseTableData->delete_(transaction, deleteState->dstNodeIDVector.get(), - srcNodeIDVector, deleteState->relIDVector.get()); + auto deleted = + tableData->delete_(transaction, srcNodeIDVector, deleteState->relIDVector.get()); + auto reverseDeleted = reverseTableData->delete_( + transaction, deleteState->dstNodeIDVector.get(), deleteState->relIDVector.get()); KU_ASSERT(deleted == reverseDeleted); numRelsDeleted += (deleted && reverseDeleted); } @@ -133,14 +131,14 @@ void RelTable::addColumn( relsStats->addMetadataDAHInfo(tableID, *property.getDataType()); fwdRelTableData->addColumn(transaction, RelDataDirectionUtils::relDirectionToString(RelDataDirection::FWD), - fwdRelTableData->getAdjColumn()->getMetadataDA(), - *relsStats->getPropertyMetadataDAHInfo( + fwdRelTableData->getNbrIDColumn()->getMetadataDA(), + *relsStats->getColumnMetadataDAHInfo( transaction, tableID, fwdRelTableData->getNumColumns(), RelDataDirection::FWD), property, defaultValueVector, relsStats); bwdRelTableData->addColumn(transaction, RelDataDirectionUtils::relDirectionToString(RelDataDirection::BWD), - bwdRelTableData->getAdjColumn()->getMetadataDA(), - *relsStats->getPropertyMetadataDAHInfo( + bwdRelTableData->getNbrIDColumn()->getMetadataDA(), + *relsStats->getColumnMetadataDAHInfo( transaction, tableID, bwdRelTableData->getNumColumns(), RelDataDirection::BWD), property, defaultValueVector, relsStats); // TODO(Guodong): addColumn is not going through localStorage design for now. So it needs to add diff --git a/src/storage/store/rel_table_data.cpp b/src/storage/store/rel_table_data.cpp index ef3bcea03c8..61b8c894ea1 100644 --- a/src/storage/store/rel_table_data.cpp +++ b/src/storage/store/rel_table_data.cpp @@ -21,13 +21,13 @@ RelDataReadState::RelDataReadState() bool RelDataReadState::hasMoreToReadFromLocalStorage() const { KU_ASSERT(localNodeGroup); - return posInCurrentCSR < localNodeGroup->getRelNGInfo()->getNumInsertedTuples( - currentNodeOffset - startNodeOffset); + return posInCurrentCSR < + localNodeGroup->getNumInsertedRels(currentNodeOffset - startNodeOffset); } bool RelDataReadState::trySwitchToLocalStorage() { - if (localNodeGroup && localNodeGroup->getRelNGInfo()->getNumInsertedTuples( - currentNodeOffset - startNodeOffset) > 0) { + if (localNodeGroup && + localNodeGroup->getNumInsertedRels(currentNodeOffset - startNodeOffset) > 0) { readFromLocalStorage = true; posInCurrentCSR = 0; return true; @@ -139,21 +139,23 @@ RelTableData::RelTableData(BMFileHandle* dataFH, BMFileHandle* metadataFH, csrHeaderColumns.length = std::make_unique(csrLengthColumnName, *LogicalType::UINT64(), *csrLengthMetadataDAHInfo, dataFH, metadataFH, bufferManager, wal, &DUMMY_WRITE_TRANSACTION, RWPropertyStats::empty(), enableCompression, false /* requireNUllColumn */); - // Adj column. - auto adjMetadataDAHInfo = - relsStoreStats->getAdjMetadataDAHInfo(&DUMMY_WRITE_TRANSACTION, tableID, direction); - auto adjColName = StorageUtils::getColumnName( - "", StorageUtils::ColumnType::ADJ, RelDataDirectionUtils::relDirectionToString(direction)); - adjColumn = ColumnFactory::createColumn(adjColName, *LogicalType::INTERNAL_ID(), - *adjMetadataDAHInfo, dataFH, metadataFH, bufferManager, wal, &DUMMY_WRITE_TRANSACTION, + // Columns (nbrID + properties). + auto& properties = tableEntry->getPropertiesRef(); + columns.reserve(properties.size() + 1); + auto nbrIDMetadataDAHInfo = relsStoreStats->getColumnMetadataDAHInfo( + &DUMMY_WRITE_TRANSACTION, tableID, NBR_ID_COLUMN_ID, direction); + auto nbrIDColName = StorageUtils::getColumnName("NBR_ID", StorageUtils::ColumnType::DEFAULT, + RelDataDirectionUtils::relDirectionToString(direction)); + auto nbrIDColumn = ColumnFactory::createColumn(nbrIDColName, *LogicalType::INTERNAL_ID(), + *nbrIDMetadataDAHInfo, dataFH, metadataFH, bufferManager, wal, &DUMMY_WRITE_TRANSACTION, RWPropertyStats::empty(), enableCompression); + columns.push_back(std::move(nbrIDColumn)); // Property columns. - auto& properties = tableEntry->getPropertiesRef(); - columns.reserve(properties.size()); for (auto i = 0u; i < properties.size(); i++) { auto& property = properties[i]; - auto metadataDAHInfo = relsStoreStats->getPropertyMetadataDAHInfo( - &DUMMY_WRITE_TRANSACTION, tableID, i, direction); + auto columnID = tableEntry->getColumnID(property.getPropertyID()); + auto metadataDAHInfo = relsStoreStats->getColumnMetadataDAHInfo( + &DUMMY_WRITE_TRANSACTION, tableID, columnID, direction); auto colName = StorageUtils::getColumnName(property.getName(), StorageUtils::ColumnType::DEFAULT, RelDataDirectionUtils::relDirectionToString(direction)); @@ -161,18 +163,22 @@ RelTableData::RelTableData(BMFileHandle* dataFH, BMFileHandle* metadataFH, *metadataDAHInfo, dataFH, metadataFH, bufferManager, wal, &DUMMY_WRITE_TRANSACTION, RWPropertyStats(relsStoreStats, tableID, property.getPropertyID()), enableCompression)); } - // Set common tableID for adjColumn and relIDColumn. + // Set common tableID for nbrIDColumn and relIDColumn. auto nbrTableID = ku_dynamic_cast(tableEntry) ->getNbrTableID(direction); - dynamic_cast(adjColumn.get())->setCommonTableID(nbrTableID); - dynamic_cast(columns[REL_ID_COLUMN_ID].get())->setCommonTableID(tableID); + ku_dynamic_cast(columns[NBR_ID_COLUMN_ID].get()) + ->setCommonTableID(nbrTableID); + ku_dynamic_cast(columns[REL_ID_COLUMN_ID].get()) + ->setCommonTableID(tableID); packedCSRInfo = PackedCSRInfo(); } void RelTableData::initializeReadState(Transaction* transaction, std::vector columnIDs, ValueVector* inNodeIDVector, RelDataReadState* readState) { readState->direction = direction; - readState->columnIDs = std::move(columnIDs); + readState->columnIDs.clear(); + readState->columnIDs.push_back(NBR_ID_COLUMN_ID); + readState->columnIDs.insert(readState->columnIDs.end(), columnIDs.begin(), columnIDs.end()); // Reset to read from persistent storage. readState->readFromLocalStorage = false; auto nodeOffset = @@ -214,12 +220,10 @@ void RelTableData::scan(Transaction* transaction, TableReadState& readState, outputVectors[0]->state->selVector->resetSelectorToUnselectedWithSize(numRowsToRead); outputVectors[0]->state->setOriginalSize(numRowsToRead); auto nodeGroupIdx = StorageUtils::getNodeGroupIdx(relReadState.currentNodeOffset); - adjColumn->scan(transaction, nodeGroupIdx, startOffset, endOffset, outputVectors[0], - 0 /* offsetInVector */); auto relIDVectorIdx = INVALID_VECTOR_IDX; for (auto i = 0u; i < relReadState.columnIDs.size(); i++) { auto columnID = relReadState.columnIDs[i]; - auto outputVectorId = i + 1; // Skip output from adj column. + auto outputVectorId = i; // Skip output from nbrID column. if (columnID == INVALID_COLUMN_ID) { outputVectors[outputVectorId]->setAllNull(); continue; @@ -227,7 +231,7 @@ void RelTableData::scan(Transaction* transaction, TableReadState& readState, if (columnID == REL_ID_COLUMN_ID) { relIDVectorIdx = outputVectorId; } - columns[relReadState.columnIDs[i]]->scan(transaction, nodeGroupIdx, startOffset, endOffset, + getColumn(columnID)->scan(transaction, nodeGroupIdx, startOffset, endOffset, outputVectors[outputVectorId], 0 /* offsetInVector */); } if (transaction->isWriteTransaction() && relReadState.localNodeGroup) { @@ -235,7 +239,7 @@ void RelTableData::scan(Transaction* transaction, TableReadState& readState, inNodeIDVector->readNodeOffset(inNodeIDVector->state->selVector->selectedPositions[0]); KU_ASSERT(relIDVectorIdx != INVALID_VECTOR_IDX); auto relIDVector = outputVectors[relIDVectorIdx]; - relReadState.localNodeGroup->applyLocalChangesForCSRColumns( + relReadState.localNodeGroup->applyLocalChangesToScannedVectors( nodeOffset - relReadState.startNodeOffset, relReadState.columnIDs, relIDVector, outputVectors); } @@ -248,11 +252,10 @@ void RelTableData::lookup(Transaction* /*transaction*/, TableReadState& /*readSt void RelTableData::insert(transaction::Transaction* transaction, ValueVector* srcNodeIDVector, ValueVector* dstNodeIDVector, const std::vector& propertyVectors) { - auto localTableData = ku_dynamic_cast( - transaction->getLocalStorage()->getOrCreateLocalTableData( - tableID, columns, TableType::REL, getDataIdxFromDirection(direction), multiplicity)); + auto localTableData = transaction->getLocalStorage()->getOrCreateLocalTableData( + tableID, columns, TableType::REL, getDataIdxFromDirection(direction), multiplicity); auto checkPersistent = - localTableData->insert(srcNodeIDVector, dstNodeIDVector, propertyVectors); + localTableData->insert({srcNodeIDVector, dstNodeIDVector}, propertyVectors); if (checkPersistent && multiplicity == common::RelMultiplicity::ONE) { checkRelMultiplicityConstraint(transaction, srcNodeIDVector); } @@ -261,18 +264,16 @@ void RelTableData::insert(transaction::Transaction* transaction, ValueVector* sr void RelTableData::update(transaction::Transaction* transaction, column_id_t columnID, ValueVector* srcNodeIDVector, ValueVector* relIDVector, ValueVector* propertyVector) { KU_ASSERT(columnID < columns.size() && columnID != REL_ID_COLUMN_ID); - auto localTableData = ku_dynamic_cast( - transaction->getLocalStorage()->getOrCreateLocalTableData( - tableID, columns, TableType::REL, getDataIdxFromDirection(direction), multiplicity)); - localTableData->update(srcNodeIDVector, relIDVector, columnID, propertyVector); + auto localTableData = transaction->getLocalStorage()->getOrCreateLocalTableData( + tableID, columns, TableType::REL, getDataIdxFromDirection(direction), multiplicity); + localTableData->update({srcNodeIDVector, relIDVector}, columnID, propertyVector); } -bool RelTableData::delete_(Transaction* transaction, ValueVector* srcNodeIDVector, - ValueVector* dstNodeIDVector, ValueVector* relIDVector) { - auto localTableData = ku_dynamic_cast( - transaction->getLocalStorage()->getOrCreateLocalTableData( - tableID, columns, TableType::REL, getDataIdxFromDirection(direction), multiplicity)); - return localTableData->delete_(srcNodeIDVector, dstNodeIDVector, relIDVector); +bool RelTableData::delete_( + Transaction* transaction, ValueVector* srcNodeIDVector, ValueVector* relIDVector) { + auto localTableData = transaction->getLocalStorage()->getOrCreateLocalTableData( + tableID, columns, TableType::REL, getDataIdxFromDirection(direction), multiplicity); + return localTableData->delete_(srcNodeIDVector, relIDVector); } void RelTableData::checkRelMultiplicityConstraint( @@ -304,10 +305,9 @@ bool RelTableData::checkIfNodeHasRels(Transaction* transaction, offset_t nodeOff void RelTableData::append(NodeGroup* nodeGroup) { auto csrNodeGroup = ku_dynamic_cast(nodeGroup); csrHeaderColumns.append(csrNodeGroup->getCSRHeader(), nodeGroup->getNodeGroupIdx()); - adjColumn->append(nodeGroup->getColumnChunk(0), nodeGroup->getNodeGroupIdx()); for (auto columnID = 0u; columnID < columns.size(); columnID++) { - columns[columnID]->append( - nodeGroup->getColumnChunk(columnID + 1), nodeGroup->getNodeGroupIdx()); + getColumn(columnID)->append( + nodeGroup->getColumnChunk(columnID), nodeGroup->getNodeGroupIdx()); } } @@ -385,66 +385,44 @@ double RelTableData::getHighDensity(uint64_t level) const { (packedCSRInfo.highDensityStep * (double)(packedCSRInfo.calibratorTreeHeight - level)); } -static vector_idx_t getSegmentIdx(offset_t offset) { - return offset >> StorageConstants::CSR_SEGMENT_SIZE_LOG2; -} - -void RelTableData::LocalState::initChangesPerSegment() { - auto numSegments = StorageConstants::NODE_GROUP_SIZE / StorageConstants::CSR_SEGMENT_SIZE; - sizeChangesPerSegment.resize(numSegments, 0 /*initValue*/); - hasChangesPerSegment.resize(numSegments, false /*initValue*/); - auto relNGInfo = localNG->getRelNGInfo(); - for (auto& [offset, insertions] : relNGInfo->adjInsertInfo) { - auto segmentIdx = getSegmentIdx(offset); - sizeChangesPerSegment[segmentIdx] += insertions.size(); - hasChangesPerSegment[segmentIdx] = true; - } - for (auto& [offset, deletions] : relNGInfo->deleteInfo) { - auto segmentIdx = getSegmentIdx(offset); - sizeChangesPerSegment[segmentIdx] -= deletions.size(); - hasChangesPerSegment[segmentIdx] = true; - } - for (auto& updateInfoPerColumn : relNGInfo->updateInfoPerChunk) { - for (auto& [offset, updates] : updateInfoPerColumn) { - auto segmentIdx = getSegmentIdx(offset); - hasChangesPerSegment[segmentIdx] = true; - } - } +RelTableData::LocalState::LocalState(LocalRelNG* localNG) : localNG{localNG} { + localNG->getChangesPerCSRSegment(sizeChangesPerSegment, hasChangesPerSegment); } void RelTableData::applyUpdatesToChunk(const PersistentState& persistentState, - const PackedCSRRegion& region, LocalVectorCollection* localChunk, - const update_insert_info_t& updateInfo, ColumnChunk* chunk) { - std::map csrOffsetInRegionToRowIdx; - auto [leftNodeBoundary, rightNodeBoundary] = region.getNodeOffsetBoundaries(); - for (auto& [nodeOffset, updates] : updateInfo) { + LocalState& localState, const LocalVectorCollection& localChunk, ColumnChunk* chunk, + column_id_t columnID) { + offset_to_row_idx_t csrOffsetInRegionToRowIdx; + auto [leftNodeBoundary, rightNodeBoundary] = localState.region.getNodeOffsetBoundaries(); + auto& updateChunk = localState.localNG->getUpdateChunks(columnID); + for (auto& [nodeOffset, updates] : updateChunk.getSrcNodeOffsetToRelOffsets()) { if (nodeOffset < leftNodeBoundary || nodeOffset > rightNodeBoundary) { continue; } - for (auto [relID, rowIdx] : updates) { - auto csrOffsetInRegion = findCSROffsetInRegion(persistentState, nodeOffset, relID); - csrOffsetInRegionToRowIdx[csrOffsetInRegion] = rowIdx; + for (auto relOffset : updates) { + auto csrOffsetInRegion = findCSROffsetInRegion(persistentState, nodeOffset, relOffset); + csrOffsetInRegionToRowIdx[csrOffsetInRegion] = + updateChunk.getRowIdxFromOffset(relOffset); } } Column::applyLocalChunkToColumnChunk(localChunk, chunk, csrOffsetInRegionToRowIdx); } void RelTableData::applyInsertionsToChunk(const PersistentState& persistentState, - const LocalState& localState, LocalVectorCollection* localChunk, - const update_insert_info_t& insertInfo, ColumnChunk* newChunk) { - std::map csrOffsetToRowIdx; + const LocalState& localState, const LocalVectorCollection& localChunk, ColumnChunk* newChunk) { + offset_to_row_idx_t csrOffsetToRowIdx; auto [leftNodeBoundary, rightNodeBoundary] = localState.region.getNodeOffsetBoundaries(); - for (auto& [nodeOffset, insertions] : insertInfo) { + auto& insertChunks = localState.localNG->insertChunks; + for (auto& [nodeOffset, insertions] : insertChunks.getSrcNodeOffsetToRelOffsets()) { if (nodeOffset < leftNodeBoundary || nodeOffset > rightNodeBoundary) { continue; } - // TODO: Separate this into a function. auto csrOffsetInRegion = localState.header.getStartCSROffset(nodeOffset) + persistentState.header.getCSRLength(nodeOffset) - localState.leftCSROffset; - for (auto& [_, rowIdx] : insertions) { + for (auto relOffset : insertions) { KU_ASSERT(csrOffsetInRegion != UINT64_MAX); - csrOffsetToRowIdx[csrOffsetInRegion++] = rowIdx; + csrOffsetToRowIdx[csrOffsetInRegion++] = insertChunks.getRowIdxFromOffset(relOffset); } } Column::applyLocalChunkToColumnChunk(localChunk, newChunk, csrOffsetToRowIdx); @@ -452,9 +430,10 @@ void RelTableData::applyInsertionsToChunk(const PersistentState& persistentState // TODO(Guodong): This should be refactored to share the same control logic with // `applyDeletionsToColumn`. -void RelTableData::applyDeletionsToChunk(const PersistentState& persistentState, - const LocalState& localState, const delete_info_t& deleteInfo, ColumnChunk* chunk) { - for (auto& [offset, deletions] : deleteInfo) { +void RelTableData::applyDeletionsToChunk( + const PersistentState& persistentState, const LocalState& localState, ColumnChunk* chunk) { + auto& deleteInfo = localState.localNG->deleteInfo; + for (auto& [offset, deletions] : deleteInfo.getSrcNodeOffsetToRelOffsetVec()) { if (localState.region.isOutOfBoundary(offset)) { continue; } @@ -488,9 +467,8 @@ void RelTableData::applyDeletionsToChunk(const PersistentState& persistentState, void RelTableData::distributeAndUpdateColumn(Transaction* transaction, node_group_idx_t nodeGroupIdx, column_id_t columnID, const PersistentState& persistentState, LocalState& localState) { - KU_ASSERT(columnID < columns.size() || columnID == INVALID_COLUMN_ID); + auto column = getColumn(columnID); auto [leftNodeBoundary, rightNodeBoundary] = localState.region.getNodeOffsetBoundaries(); - auto column = columnID == INVALID_COLUMN_ID ? adjColumn.get() : columns[columnID].get(); KU_ASSERT(localState.regionCapacity >= (localState.rightCSROffset - localState.leftCSROffset)); // First, scan the whole region to a temp chunk. auto oldSize = persistentState.rightCSROffset - persistentState.leftCSROffset + 1; @@ -498,11 +476,10 @@ void RelTableData::distributeAndUpdateColumn(Transaction* transaction, *column->getDataType().copy(), enableCompression, oldSize); column->scan(transaction, nodeGroupIdx, chunk.get(), persistentState.leftCSROffset, persistentState.rightCSROffset + 1); - auto relNGInfo = localState.localNG->getRelNGInfo(); - auto& updateInfo = relNGInfo->getUpdateInfo(columnID); - auto localChunk = getLocalChunk(localState, columnID); - applyUpdatesToChunk(persistentState, localState.region, localChunk, updateInfo, chunk.get()); - applyDeletionsToChunk(persistentState, localState, relNGInfo->deleteInfo, chunk.get()); + auto localUpdateChunk = + localState.localNG->getUpdateChunks(columnID).getLocalChunk(0 /*columnID*/); + applyUpdatesToChunk(persistentState, localState, localUpdateChunk, chunk.get(), columnID); + applyDeletionsToChunk(persistentState, localState, chunk.get()); // Second, create a new temp chunk for the region. auto newSize = localState.rightCSROffset - localState.leftCSROffset + 1; auto newChunk = ColumnChunkFactory::createColumnChunk( @@ -520,12 +497,11 @@ void RelTableData::distributeAndUpdateColumn(Transaction* transaction, } auto newCSROffsetInRegion = localState.header.getStartCSROffset(nodeOffset) - localState.leftCSROffset; - KU_ASSERT(!relNGInfo->deleteInfo.contains(nodeOffset)); KU_ASSERT(newCSROffsetInRegion >= newChunk->getNumValues()); newChunk->copy(chunk.get(), csrOffsetInRegion, newCSROffsetInRegion, length); } - auto& insertInfo = relNGInfo->getInsertInfo(columnID); - applyInsertionsToChunk(persistentState, localState, localChunk, insertInfo, newChunk.get()); + auto insertLocalChunk = localState.localNG->insertChunks.getLocalChunk(columnID); + applyInsertionsToChunk(persistentState, localState, insertLocalChunk, newChunk.get()); std::vector dstOffsets; dstOffsets.resize(newChunk->getNumValues()); fillSequence(dstOffsets, localState.leftCSROffset); @@ -566,26 +542,23 @@ std::vector RelTableData::findRegions( void RelTableData::updateRegion(Transaction* transaction, node_group_idx_t nodeGroupIdx, PersistentState& persistentState, LocalState& localState) { - auto localInfo = localState.localNG->getRelNGInfo(); // Scan RelID column chunk when there are updates or deletions. // TODO(Guodong): Should track for each region if it has updates or deletions. - if (localInfo->hasUpdates() || !localInfo->deleteInfo.empty()) { + if (localState.localNG->hasUpdatesOrDeletions()) { // NOTE: There is an implicit trick happening. Due to the mismatch of storage type and // in-memory representation of INTERNAL_ID, we only store offset as INT64 on disk. Here // we directly read relID's offset part from disk into an INT64 column chunk. persistentState.relIDChunk = ColumnChunkFactory::createColumnChunk( *LogicalType::INT64(), enableCompression, localState.regionCapacity); - columns[REL_ID_COLUMN_ID]->scan(transaction, nodeGroupIdx, persistentState.relIDChunk.get(), - persistentState.leftCSROffset, persistentState.rightCSROffset + 1); + getColumn(REL_ID_COLUMN_ID) + ->scan(transaction, nodeGroupIdx, persistentState.relIDChunk.get(), + persistentState.leftCSROffset, persistentState.rightCSROffset + 1); } if (localState.region.level == 0) { - updateColumn(transaction, nodeGroupIdx, INVALID_COLUMN_ID, persistentState, localState); for (auto columnID = 0u; columnID < columns.size(); columnID++) { updateColumn(transaction, nodeGroupIdx, columnID, persistentState, localState); } } else { - distributeAndUpdateColumn( - transaction, nodeGroupIdx, INVALID_COLUMN_ID, persistentState, localState); for (auto columnID = 0u; columnID < columns.size(); columnID++) { distributeAndUpdateColumn( transaction, nodeGroupIdx, columnID, persistentState, localState); @@ -701,16 +674,6 @@ void RelTableData::slideRightForInsertions(offset_t nodeOffset, offset_t rightBo } } -LocalVectorCollection* RelTableData::getLocalChunk( - const RelTableData::LocalState& localState, column_id_t columnID) { - return columnID == INVALID_COLUMN_ID ? localState.localNG->getAdjChunk() : - localState.localNG->getPropertyChunk(columnID); -} - -Column* RelTableData::getColumn(column_id_t columnID) { - return columnID == INVALID_COLUMN_ID ? adjColumn.get() : TableData::getColumn(columnID); -} - void RelTableData::updateColumn(Transaction* transaction, node_group_idx_t nodeGroupIdx, column_id_t columnID, const RelTableData::PersistentState& persistentState, LocalState& localState) { @@ -725,34 +688,32 @@ void RelTableData::updateColumn(Transaction* transaction, node_group_idx_t nodeG void RelTableData::applyUpdatesToColumn(Transaction* transaction, node_group_idx_t nodeGroupIdx, column_id_t columnID, const PersistentState& persistentState, LocalState& localState, Column* column) { - std::map writeInfo; - auto relNGInfo = localState.localNG->getRelNGInfo(); - auto& updateInfo = relNGInfo->getUpdateInfo(columnID); - for (auto& [offset, updatesPerNode] : updateInfo) { - if (localState.region.isOutOfBoundary(offset)) { + offset_to_row_idx_t writeInfo; + auto& updateChunk = localState.localNG->getUpdateChunks(columnID); + for (auto& [srcOffset, updatesPerNode] : updateChunk.getSrcNodeOffsetToRelOffsets()) { + if (localState.region.isOutOfBoundary(srcOffset)) { // TODO: Should also partition local storage into regions. So we can avoid this check. continue; } - for (auto& [relID, rowIdx] : updatesPerNode) { - auto csrOffsetInRegion = findCSROffsetInRegion(persistentState, offset, relID); - writeInfo[csrOffsetInRegion] = rowIdx; + for (auto relOffset : updatesPerNode) { + auto csrOffsetInRegion = findCSROffsetInRegion(persistentState, srcOffset, relOffset); + writeInfo[csrOffsetInRegion] = updateChunk.getRowIdxFromOffset(relOffset); } } if (!writeInfo.empty()) { - auto localChunk = getLocalChunk(localState, columnID); - column->prepareCommitForChunk( - transaction, nodeGroupIdx, localChunk, {} /*insertInfo*/, writeInfo, {} /*deleteInfo*/); + auto localChunk = updateChunk.getLocalChunk(0 /*columnID*/); + column->prepareCommitForChunk(transaction, nodeGroupIdx, LocalVectorCollection::empty(), + {} /*insertInfo*/, localChunk, writeInfo, {} /*deleteInfo*/); } } void RelTableData::applyInsertionsToColumn(Transaction* transaction, node_group_idx_t nodeGroupIdx, column_id_t columnID, LocalState& localState, const PersistentState& persistentState, Column* column) { - std::map writeInfo; - auto relNGInfo = localState.localNG->getRelNGInfo(); - auto& insertInfo = relNGInfo->getInsertInfo(columnID); - auto& deleteInfo = relNGInfo->getDeleteInfo(); - for (auto& [offset, insertions] : insertInfo) { + offset_to_row_idx_t writeInfo; + auto& deleteInfo = localState.localNG->deleteInfo; + auto& insertChunks = localState.localNG->insertChunks; + for (auto& [offset, insertions] : insertChunks.getSrcNodeOffsetToRelOffsets()) { if (localState.region.isOutOfBoundary(offset)) { continue; } @@ -760,22 +721,23 @@ void RelTableData::applyInsertionsToColumn(Transaction* transaction, node_group_ auto length = localState.header.getCSRLength(offset); KU_ASSERT(length >= insertions.size()); KU_ASSERT((startCSROffset + persistentState.header.getCSRLength(offset) - - (deleteInfo.contains(offset) ? deleteInfo.at(offset).size() : 0) + - insertions.size()) <= localState.header.getEndCSROffset(offset)); + deleteInfo.getNumDeletedRelsFromSrcOffset(offset) + insertions.size()) <= + localState.header.getEndCSROffset(offset)); auto idx = startCSROffset + length - insertions.size(); - for (auto& [relID, rowIdx] : insertions) { - writeInfo[idx++] = rowIdx; + for (auto relOffset : insertions) { + writeInfo[idx++] = insertChunks.getRowIdxFromOffset(relOffset); } } - auto localChunk = getLocalChunk(localState, columnID); - column->prepareCommitForChunk(transaction, nodeGroupIdx, localChunk, writeInfo, {}, {}); + auto localChunk = insertChunks.getLocalChunk(columnID); + column->prepareCommitForChunk( + transaction, nodeGroupIdx, localChunk, writeInfo, LocalVectorCollection::empty(), {}, {}); } std::vector> RelTableData::getSlidesForDeletions( - const PersistentState& persistentState, const LocalState& localState, - const delete_info_t& deleteInfo) { + const PersistentState& persistentState, const LocalState& localState) { std::vector> slides; - for (auto& [offset, deletions] : deleteInfo) { + for (auto& [offset, deletions] : + localState.localNG->deleteInfo.getSrcNodeOffsetToRelOffsetVec()) { if (localState.region.isOutOfBoundary(offset)) { continue; } @@ -817,9 +779,7 @@ std::vector> RelTableData::getSlidesForDeletions( // 3. `getSlidesForDeletions` can be done once for all columns. void RelTableData::applyDeletionsToColumn(Transaction* transaction, node_group_idx_t nodeGroupIdx, LocalState& localState, const PersistentState& persistentState, Column* column) { - auto relNGInfo = localState.localNG->getRelNGInfo(); - auto& deleteInfo = relNGInfo->getDeleteInfo(); - auto slides = getSlidesForDeletions(persistentState, localState, deleteInfo); + auto slides = getSlidesForDeletions(persistentState, localState); if (slides.empty()) { return; } @@ -879,11 +839,11 @@ void RelTableData::applySliding(Transaction* transaction, node_group_idx_t nodeG column->prepareCommitForChunk(transaction, nodeGroupIdx, dstOffsets, chunk.get(), 0); } -static offset_t getMaxNumNodesInRegion( - const CSRHeaderChunks& header, const PackedCSRRegion& region, const RelNGInfo* localInfo) { +offset_t RelTableData::getMaxNumNodesInRegion( + const CSRHeaderChunks& header, const PackedCSRRegion& region, const LocalRelNG* localNG) { auto numNodes = header.offset->getNumValues(); KU_ASSERT(numNodes == header.length->getNumValues()); - for (auto& [offset, _] : localInfo->adjInsertInfo) { + for (auto& [offset, _] : localNG->insertChunks.getSrcNodeOffsetToRelOffsets()) { if (!region.isOutOfBoundary(offset) && offset >= numNodes) { numNodes = offset + 1; } @@ -893,10 +853,10 @@ static offset_t getMaxNumNodesInRegion( void RelTableData::updateCSRHeader(Transaction* transaction, node_group_idx_t nodeGroupIdx, PersistentState& persistentState, LocalState& localState) { - auto localInfo = localState.localNG->getRelNGInfo(); auto [leftBoundary, rightBoundary] = localState.region.getNodeOffsetBoundaries(); auto& header = persistentState.header; - auto maxNumNodesInRegion = getMaxNumNodesInRegion(header, localState.region, localInfo); + auto maxNumNodesInRegion = + getMaxNumNodesInRegion(header, localState.region, localState.localNG); // Update the region boundary based on actual num nodes in the region. localState.region.leftBoundary = std::min(leftBoundary, header.offset->getNumValues()); localState.region.rightBoundary = std::min(rightBoundary, maxNumNodesInRegion - 1); @@ -906,13 +866,14 @@ void RelTableData::updateCSRHeader(Transaction* transaction, node_group_idx_t no auto& newHeader = localState.header; newHeader.copyFrom(header); newHeader.fillDefaultValues(localState.region.rightBoundary + 1); - if (localInfo->adjInsertInfo.empty() && localInfo->deleteInfo.empty()) { + if (localState.localNG->insertChunks.isEmpty() && localState.localNG->deleteInfo.isEmpty()) { // No need to update the csr header. localState.leftCSROffset = persistentState.leftCSROffset; localState.rightCSROffset = persistentState.rightCSROffset; return; } - for (auto& [offset, deletions] : localInfo->deleteInfo) { + for (auto& [offset, deletions] : + localState.localNG->deleteInfo.getSrcNodeOffsetToRelOffsetVec()) { if (localState.region.isOutOfBoundary(offset)) { continue; } @@ -921,12 +882,12 @@ void RelTableData::updateCSRHeader(Transaction* transaction, node_group_idx_t no KU_ASSERT(newLength >= 0); newHeader.length->setValue(newLength, offset); } - for (auto& [offset, _] : localInfo->adjInsertInfo) { + for (auto& [offset, _] : localState.localNG->insertChunks.getSrcNodeOffsetToRelOffsets()) { if (localState.region.isOutOfBoundary(offset)) { continue; } auto oldLength = newHeader.getCSRLength(offset); - auto numInsertions = localInfo->adjInsertInfo.at(offset).size(); + auto numInsertions = localState.localNG->getNumInsertedRels(offset); if (localState.region.level == 0) { findPositionsForInsertions(offset, numInsertions, localState); } @@ -1017,14 +978,12 @@ LocalRelNG* RelTableData::getLocalNodeGroup( void RelTableData::checkpointInMemory() { csrHeaderColumns.offset->checkpointInMemory(); csrHeaderColumns.length->checkpointInMemory(); - adjColumn->checkpointInMemory(); TableData::checkpointInMemory(); } void RelTableData::rollbackInMemory() { csrHeaderColumns.offset->rollbackInMemory(); csrHeaderColumns.length->rollbackInMemory(); - adjColumn->rollbackInMemory(); TableData::rollbackInMemory(); } diff --git a/src/storage/store/string_column.cpp b/src/storage/store/string_column.cpp index eb333fbc6e8..a6a47a4bf96 100644 --- a/src/storage/store/string_column.cpp +++ b/src/storage/store/string_column.cpp @@ -170,28 +170,27 @@ void StringColumn::lookupInternal( } bool StringColumn::canCommitInPlace(transaction::Transaction* transaction, - node_group_idx_t nodeGroupIdx, LocalVectorCollection* localChunk, - const offset_to_row_idx_t& insertInfo, const offset_to_row_idx_t& updateInfo) { - std::vector rowIdxesToRead; - for (auto& [offset, rowIdx] : updateInfo) { - rowIdxesToRead.push_back(rowIdx); + node_group_idx_t nodeGroupIdx, const LocalVectorCollection& localInsertChunk, + const offset_to_row_idx_t& insertInfo, const LocalVectorCollection& localUpdateChunk, + const offset_to_row_idx_t& updateInfo) { + auto strLenToAdd = 0u; + for (auto& [_, rowIdx] : updateInfo) { + auto localVector = localUpdateChunk.getLocalVector(rowIdx); + auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); + auto kuStr = localVector->getValue(offsetInVector); + strLenToAdd += kuStr.len; } offset_t maxOffset = 0u; for (auto& [offset, rowIdx] : insertInfo) { - rowIdxesToRead.push_back(rowIdx); if (offset > maxOffset) { maxOffset = offset; } - } - std::sort(rowIdxesToRead.begin(), rowIdxesToRead.end()); - auto strLenToAdd = 0u; - for (auto rowIdx : rowIdxesToRead) { - auto localVector = localChunk->getLocalVector(rowIdx); + auto localVector = localInsertChunk.getLocalVector(rowIdx); auto offsetInVector = rowIdx & (DEFAULT_VECTOR_CAPACITY - 1); - auto kuStr = localVector->getVector()->getValue(offsetInVector); + auto kuStr = localVector->getValue(offsetInVector); strLenToAdd += kuStr.len; } - auto numStrings = rowIdxesToRead.size(); + auto numStrings = insertInfo.size() + updateInfo.size(); if (!dictionary.canCommitInPlace(transaction, nodeGroupIdx, numStrings, strLenToAdd)) { return false; } diff --git a/src/storage/store/struct_column.cpp b/src/storage/store/struct_column.cpp index 70427e381d0..19c27e9e5e4 100644 --- a/src/storage/store/struct_column.cpp +++ b/src/storage/store/struct_column.cpp @@ -126,12 +126,12 @@ void StructColumn::rollbackInMemory() { } bool StructColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localChunk, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo) { + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo) { // STRUCT column doesn't have actual data stored in buffer. Only need to check the null column. // Children columns are committed separately. return nullColumn->canCommitInPlace( - transaction, nodeGroupIdx, localChunk, insertInfo, updateInfo); + transaction, nodeGroupIdx, localInsertChunk, insertInfo, localUpdateChunk, updateInfo); } bool StructColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t nodeGroupIdx, @@ -141,31 +141,33 @@ bool StructColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t n } void StructColumn::prepareCommitForChunk(Transaction* transaction, node_group_idx_t nodeGroupIdx, - LocalVectorCollection* localColumnChunk, const offset_to_row_idx_t& insertInfo, - const offset_to_row_idx_t& updateInfo, const offset_set_t& deleteInfo) { + const LocalVectorCollection& localInsertChunk, const offset_to_row_idx_t& insertInfo, + const LocalVectorCollection& localUpdateChunk, const offset_to_row_idx_t& updateInfo, + const offset_set_t& deleteInfo) { auto currentNumNodeGroups = metadataDA->getNumElements(transaction->getType()); auto isNewNodeGroup = nodeGroupIdx >= currentNumNodeGroups; if (isNewNodeGroup) { // If this is a new node group, updateInfo should be empty. We should perform out-of-place // commit with a new column chunk. - commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, localColumnChunk, isNewNodeGroup, - insertInfo, updateInfo, deleteInfo); + commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, isNewNodeGroup, localInsertChunk, + insertInfo, localUpdateChunk, updateInfo, deleteInfo); } else { // STRUCT column doesn't have actual data stored in buffer. Only need to update the null // column. - if (canCommitInPlace(transaction, nodeGroupIdx, localColumnChunk, insertInfo, updateInfo)) { - nullColumn->commitLocalChunkInPlace( - transaction, nodeGroupIdx, localColumnChunk, insertInfo, updateInfo, deleteInfo); + if (canCommitInPlace(transaction, nodeGroupIdx, localInsertChunk, insertInfo, + localUpdateChunk, updateInfo)) { + nullColumn->commitLocalChunkInPlace(transaction, nodeGroupIdx, localInsertChunk, + insertInfo, localUpdateChunk, updateInfo, deleteInfo); } else { - nullColumn->commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, localColumnChunk, - isNewNodeGroup, insertInfo, updateInfo, deleteInfo); + nullColumn->commitLocalChunkOutOfPlace(transaction, nodeGroupIdx, isNewNodeGroup, + localInsertChunk, insertInfo, localUpdateChunk, updateInfo, deleteInfo); } // Update each child column separately for (auto i = 0u; i < childColumns.size(); i++) { const auto& childColumn = childColumns[i]; - auto childLocalColumnChunk = localColumnChunk->getStructChildVectorCollection(i); childColumn->prepareCommitForChunk(transaction, nodeGroupIdx, - childLocalColumnChunk.get(), insertInfo, updateInfo, deleteInfo); + localInsertChunk.getStructChildVectorCollection(i), insertInfo, + localUpdateChunk.getStructChildVectorCollection(i), updateInfo, deleteInfo); } } } diff --git a/src/storage/wal_replayer.cpp b/src/storage/wal_replayer.cpp index 011eaa868ba..b5bb2ea5932 100644 --- a/src/storage/wal_replayer.cpp +++ b/src/storage/wal_replayer.cpp @@ -97,7 +97,7 @@ void WALReplayer::replayWALRecord(WALRecord& walRecord) { } } -void WALReplayer::replayPageUpdateOrInsertRecord(const kuzu::storage::WALRecord& walRecord) { +void WALReplayer::replayPageUpdateOrInsertRecord(const WALRecord& walRecord) { // 1. As the first step we copy over the page on disk, regardless of if we are recovering // (and checkpointing) or checkpointing while during regular execution. auto dbFileID = walRecord.pageInsertOrUpdateRecord.dbFileID; @@ -122,7 +122,7 @@ void WALReplayer::replayPageUpdateOrInsertRecord(const kuzu::storage::WALRecord& } } -void WALReplayer::replayTableStatisticsRecord(const kuzu::storage::WALRecord& walRecord) { +void WALReplayer::replayTableStatisticsRecord(const WALRecord& walRecord) { if (isCheckpoint) { if (walRecord.tableStatisticsRecord.isNodeTable) { auto walFilePath = StorageUtils::getNodesStatisticsAndDeletedIDsFilePath( @@ -219,7 +219,7 @@ void WALReplayer::replayOverflowFileNextBytePosRecord(const WALRecord& walRecord diskOverflowFile->resetLoggedNewOverflowFileNextBytePosRecord(); } -void WALReplayer::replayCopyTableRecord(const kuzu::storage::WALRecord& walRecord) { +void WALReplayer::replayCopyTableRecord(const WALRecord& walRecord) { auto tableID = walRecord.copyTableRecord.tableID; if (isCheckpoint) { if (!isRecovering) { @@ -249,7 +249,7 @@ void WALReplayer::replayCopyTableRecord(const kuzu::storage::WALRecord& walRecor } } -void WALReplayer::replayDropTableRecord(const kuzu::storage::WALRecord& walRecord) { +void WALReplayer::replayDropTableRecord(const WALRecord& walRecord) { if (isCheckpoint) { auto tableID = walRecord.dropTableRecord.tableID; if (!isRecovering) { @@ -298,7 +298,7 @@ void WALReplayer::replayDropTableRecord(const kuzu::storage::WALRecord& walRecor } } -void WALReplayer::replayDropPropertyRecord(const kuzu::storage::WALRecord& walRecord) { +void WALReplayer::replayDropPropertyRecord(const WALRecord& walRecord) { if (isCheckpoint) { auto tableID = walRecord.dropPropertyRecord.tableID; auto propertyID = walRecord.dropPropertyRecord.propertyID; @@ -331,7 +331,7 @@ void WALReplayer::replayDropPropertyRecord(const kuzu::storage::WALRecord& walRe } } -void WALReplayer::replayAddPropertyRecord(const kuzu::storage::WALRecord& walRecord) { +void WALReplayer::replayAddPropertyRecord(const WALRecord& walRecord) { auto tableID = walRecord.addPropertyRecord.tableID; auto propertyID = walRecord.addPropertyRecord.propertyID; if (!isCheckpoint) { diff --git a/test/test_files/tinysnb/call/call.test b/test/test_files/tinysnb/call/call.test index f484e75a1c6..dc10b125467 100644 --- a/test/test_files/tinysnb/call/call.test +++ b/test/test_files/tinysnb/call/call.test @@ -198,7 +198,7 @@ Binder exception: Cannot evaluate a.fName as a literal. 37 -STATEMENT CALL storage_info('knows') RETURN COUNT(*) ---- 1 -82 +84 -STATEMENT CALL storage_info('workAt') RETURN COUNT(*) ---- 1 -22 +24 diff --git a/test/test_files/update_node/set_tinysnb.test b/test/test_files/update_node/set_tinysnb.test index f5cfac72c99..4a66f7286e8 100644 --- a/test/test_files/update_node/set_tinysnb.test +++ b/test/test_files/update_node/set_tinysnb.test @@ -3,86 +3,86 @@ -- -#-CASE SetNodeInt64PropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.age=20 + 50 -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age -#---- 1 -#70 -# -#-CASE SetNodeInt32PropTest -#-STATEMENT MATCH (a:movies) WHERE a.name='Roma' SET a.length=2.2 -#---- ok -#-STATEMENT MATCH (a:movies) WHERE a.name='Roma' RETURN a.length -#---- 1 -#2 -# -#-CASE SetNodeDoublePropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.eyeSight=1.0 -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.eyeSight -#---- 1 -#1.000000 -# -#-CASE SetNodeFloatPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.height=12 -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.height -#---- 1 -#12.000000 -# -#-CASE SetNodeBoolPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.isStudent=false -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.isStudent -#---- 1 -#False -# -#-CASE SetNodeDatePropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.birthdate=date('2200-10-10') -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.birthdate -#---- 1 -#2200-10-10 -# -#-CASE SetNodeTimestampPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.registerTime=timestamp('2200-10-10 12:01:01') -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.registerTime -#---- 1 -#2200-10-10 12:01:01 -# -#-CASE SetNodeEmptyStringPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.fName='' -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.fName -#---- 1 -# -## end of SetNodeEmptyStringPropTest -# -#-CASE SetNodeShortStringPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.fName=string(22) -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.fName -#---- 1 -#22 -# -#-CASE SetNodeLongStringPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.fName='abcdefghijklmnopqrstuvwxyz' -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.fName -#---- 1 -#abcdefghijklmnopqrstuvwxyz -# -#-CASE SetLongListTest -#-DEFINE STRING_EXCEEDS_PAGE ARANGE 0 5990 -#-STATEMENT BEGIN TRANSACTION -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.fName="${STRING_EXCEEDS_PAGE}" -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.fName; -#---- 1 -#${STRING_EXCEEDS_PAGE} +-CASE SetNodeInt64PropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.age=20 + 50 +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age +---- 1 +70 + +-CASE SetNodeInt32PropTest +-STATEMENT MATCH (a:movies) WHERE a.name='Roma' SET a.length=2.2 +---- ok +-STATEMENT MATCH (a:movies) WHERE a.name='Roma' RETURN a.length +---- 1 +2 + +-CASE SetNodeDoublePropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.eyeSight=1.0 +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.eyeSight +---- 1 +1.000000 + +-CASE SetNodeFloatPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.height=12 +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.height +---- 1 +12.000000 + +-CASE SetNodeBoolPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.isStudent=false +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.isStudent +---- 1 +False + +-CASE SetNodeDatePropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.birthdate=date('2200-10-10') +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.birthdate +---- 1 +2200-10-10 + +-CASE SetNodeTimestampPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.registerTime=timestamp('2200-10-10 12:01:01') +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.registerTime +---- 1 +2200-10-10 12:01:01 + +-CASE SetNodeEmptyStringPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.fName='' +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.fName +---- 1 + +# end of SetNodeEmptyStringPropTest + +-CASE SetNodeShortStringPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.fName=string(22) +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.fName +---- 1 +22 + +-CASE SetNodeLongStringPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.fName='abcdefghijklmnopqrstuvwxyz' +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.fName +---- 1 +abcdefghijklmnopqrstuvwxyz + +-CASE SetLongListTest +-DEFINE STRING_EXCEEDS_PAGE ARANGE 0 5990 +-STATEMENT BEGIN TRANSACTION +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.fName="${STRING_EXCEEDS_PAGE}" +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.fName; +---- 1 +${STRING_EXCEEDS_PAGE} -CASE SetVeryLongListErrorsTest -DEFINE STRING_EXCEEDS_MEMORY_MANAGER_LIMIT ARANGE 0 50000 @@ -95,245 +95,245 @@ ---- hash 1 1c6f2aee653d75dfc2361ff73d5807f7 -#-CASE SetNodeIntervalPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.lastJobDuration=interval('1 years 1 days') -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.lastJobDuration -#---- 1 -#1 year 1 day -# -#-CASE SetNodePropNullTest -#-STATEMENT MATCH (a:person) SET a.age=null -#---- ok -#-STATEMENT MATCH (a:person) RETURN a.age -#---- 8 -# -# -# -# -# -# -# -# -## end of SetNodePropNullTest. Empty lines represent the expected null values -# -#-CASE SetBothUnflatTest -#-STATEMENT MATCH (a:person) SET a.age=a.ID -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID < 4 RETURN a.ID, a.age -#---- 3 -#0|0 -#2|2 -#3|3 -# -#-CASE SetFlatUnFlatTest -#-STATEMENT MATCH (a:person)-[:knows]->(b:person) WHERE a.ID=0 SET a.age=b.age -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID < 4 RETURN a.ID, a.age -#---- 3 -#0|20 -#2|30 -#3|45 -# -#-CASE SetUnFlatFlatTest -#-STATEMENT MATCH (a:person)-[:knows]->(b:person) WHERE b.ID=2 AND a.ID = 0 SET b.age=a.age -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID < 4 RETURN a.ID, a.age -#---- 3 -#0|35 -#2|35 -#3|45 -# -#-CASE SetTwoHopTest -#-STATEMENT MATCH (a:person)-[:knows]->(b:person)-[:knows]->(c:person) WHERE b.ID=0 AND c.fName = 'Bob' SET a.age=c.age -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID < 6 RETURN a.ID, a.age -#---- 4 -#0|35 -#2|30 -#3|30 -#5|30 -# -#-CASE SetTwoHopNullTest -#-STATEMENT MATCH (a:person)-[:knows]->(b:person)-[:knows]->(c:person) SET a.age=null -#---- ok -#-STATEMENT MATCH (a:person) RETURN a.ID, a.age -#---- 8 -#0| -#10|83 -#2| -#3| -#5| -#7|20 -#8|25 -#9|40 -# -#-CASE SetIndexNestedLoopJoinTest -#-STATEMENT MATCH (a:person), (b:person) WHERE a.ID = b.ID SET a.age=b.gender -#---- ok -#-STATEMENT MATCH (a:person) RETURN a.ID, a.age -#---- 8 -#0|1 -#10|2 -#2|2 -#3|1 -#5|2 -#7|1 -#8|2 -#9|2 -# -#-CASE SetRelInt16PropTest -#-STATEMENT MATCH (a:person)-[e:studyAt]->(b:organisation) WHERE a.ID = 0 SET e.length=99 -#---- ok -#-STATEMENT MATCH (a:person)-[e:studyAt]->(b:organisation) RETURN e.length -#---- 3 -#22 -#55 -#99 -# -#-CASE SetNodeListOfIntPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.workedHours=[10,20] -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.workedHours -#---- 1 -#[10,20] -# -#-CASE SetNodeListOfShortStringPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.usedNames=['intel','microsoft'] -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.usedNames -#---- 1 -#[intel,microsoft] -# -#-CASE SetNodeListOfLongStringPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.usedNames -#---- 1 -#[abcndwjbwesdsd,microsofthbbjuwgedsd] -# -#-CASE SetNodeListofListPropTest -#-STATEMENT MATCH (a:person) WHERE a.ID=8 SET a.courseScoresPerTerm=[[10,20],[0,0,0]] -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=8 RETURN a.courseScoresPerTerm -#---- 1 -#[[10,20],[0,0,0]] -# -#-CASE SETMultiLabelNodePropTest -#-STATEMENT MATCH (a) WHERE a.ID < 2 SET a.age = 1; -#---- ok -#-STATEMENT MATCH (a) WHERE a.ID < 2 RETURN a.ID, a.age; -#---- 2 -#0|1 -#1| -#-STATEMENT CREATE NODE TABLE play(ID INT64, name STRING, PRIMARY KEY(ID)); -#---- ok -#-STATEMENT CREATE (a:play {ID: 0, name: 'AA'}); -#---- ok -#-STATEMENT MATCH (a:organisation:play) RETURN a.ID, a.name; -#---- 4 -#0|AA -#1|ABFsUni -#4|CsWork -#6|DEsWork -#-STATEMENT MATCH (a:organisation:play) WHERE a.ID < 2 SET a.name = string(a.ID * 10); -#---- ok -#-STATEMENT MATCH (a:organisation:play) WHERE a.ID < 2 RETURN a.ID, a.name; -#---- 2 -#0|0 -#1|10 -# -#-CASE SETMultiLabelWithPruning -#-STATEMENT MATCH (a:person)-[:knows]->(b) WHERE a.ID=0 SET b.name = "a", b.fName = "XX" RETURN b.name, b.fName; -#---- 3 -#|XX -#|XX -#|XX -#-STATEMENT MATCH (b) RETURN b.name, b.fName -#---- 14 -#ABFsUni| -#CsWork| -#DEsWork| -#Roma| -#Sóló cón tu párejâ| -#The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie| -#|Alice -#|Elizabeth -#|Farooq -#|Greg -#|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff -#|XX -#|XX -#|XX -#-STATEMENT MATCH (a:person)-[e:knows|:studyAt]->(b:person) WHERE a.ID=0 SET e.year = 2023, e.date = date("2023-11-11") RETURN e.year, e.date; -#---- 3 -#|2023-11-11 -#|2023-11-11 -#|2023-11-11 -#-STATEMENT MATCH (a:person)-[e:knows|:studyAt]->(b:person:organisation) WHERE a.ID=0 RETURN e.year, e.date; -#---- 4 -#2021| -#|2023-11-11 -#|2023-11-11 -#|2023-11-11 -# -#-CASE SetNonNullValueWithWriteTransaction -#-STATEMENT BEGIN TRANSACTION -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age -#---- 1 -#35 -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.age=70 -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age -#---- 1 -#70 -# -#-CASE SetNullValueWithWriteTransaction -#-STATEMENT BEGIN TRANSACTION -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age -#---- 1 -#35 -#-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.age=NULL -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age -#---- 1 -# -#-CASE MultipleSetListValue -#-STATEMENT BEGIN TRANSACTION -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=10 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=5 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=2 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=3 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] -#---- ok -#-STATEMENT COMMIT -#---- ok -#-STATEMENT MATCH (a:person) WHERE a.ID=10 RETURN a.usedNames -#---- 1 -#[abcndwjbwesdsd,microsofthbbjuwgedsd] -#-STATEMENT MATCH (a:person) WHERE a.ID=5 RETURN a.usedNames -#---- 1 -#[abcndwjbwesdsd,microsofthbbjuwgedsd] -# -# -#-CASE OptionalSET -#-STATEMENT OPTIONAL MATCH (a:person) WHERE a.ID > 100 SET a.fName = 'a' RETURN a.fName; -#---- 1 -# -#-STATEMENT MATCH (a:person) WHERE a.ID < 3 RETURN a.fName; -#---- 2 -#Alice -#Bob -#-STATEMENT OPTIONAL MATCH (a) WHERE a.ID > 100 SET a.name, a.fName; -#----1 -# -#-STATEMENT MATCH (a) WHERE a.ID < 3 RETURN a.name, a.fName; -#---- 3 -#ABFsUni| -#|Alice -#|Bob +-CASE SetNodeIntervalPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.lastJobDuration=interval('1 years 1 days') +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.lastJobDuration +---- 1 +1 year 1 day + +-CASE SetNodePropNullTest +-STATEMENT MATCH (a:person) SET a.age=null +---- ok +-STATEMENT MATCH (a:person) RETURN a.age +---- 8 + + + + + + + + +# end of SetNodePropNullTest. Empty lines represent the expected null values + +-CASE SetBothUnflatTest +-STATEMENT MATCH (a:person) SET a.age=a.ID +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID < 4 RETURN a.ID, a.age +---- 3 +0|0 +2|2 +3|3 + +-CASE SetFlatUnFlatTest +-STATEMENT MATCH (a:person)-[:knows]->(b:person) WHERE a.ID=0 SET a.age=b.age +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID < 4 RETURN a.ID, a.age +---- 3 +0|20 +2|30 +3|45 + +-CASE SetUnFlatFlatTest +-STATEMENT MATCH (a:person)-[:knows]->(b:person) WHERE b.ID=2 AND a.ID = 0 SET b.age=a.age +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID < 4 RETURN a.ID, a.age +---- 3 +0|35 +2|35 +3|45 + +-CASE SetTwoHopTest +-STATEMENT MATCH (a:person)-[:knows]->(b:person)-[:knows]->(c:person) WHERE b.ID=0 AND c.fName = 'Bob' SET a.age=c.age +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID < 6 RETURN a.ID, a.age +---- 4 +0|35 +2|30 +3|30 +5|30 + +-CASE SetTwoHopNullTest +-STATEMENT MATCH (a:person)-[:knows]->(b:person)-[:knows]->(c:person) SET a.age=null +---- ok +-STATEMENT MATCH (a:person) RETURN a.ID, a.age +---- 8 +0| +10|83 +2| +3| +5| +7|20 +8|25 +9|40 + +-CASE SetIndexNestedLoopJoinTest +-STATEMENT MATCH (a:person), (b:person) WHERE a.ID = b.ID SET a.age=b.gender +---- ok +-STATEMENT MATCH (a:person) RETURN a.ID, a.age +---- 8 +0|1 +10|2 +2|2 +3|1 +5|2 +7|1 +8|2 +9|2 + +-CASE SetRelInt16PropTest +-STATEMENT MATCH (a:person)-[e:studyAt]->(b:organisation) WHERE a.ID = 0 SET e.length=99 +---- ok +-STATEMENT MATCH (a:person)-[e:studyAt]->(b:organisation) RETURN e.length +---- 3 +22 +55 +99 + +-CASE SetNodeListOfIntPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.workedHours=[10,20] +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.workedHours +---- 1 +[10,20] + +-CASE SetNodeListOfShortStringPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.usedNames=['intel','microsoft'] +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.usedNames +---- 1 +[intel,microsoft] + +-CASE SetNodeListOfLongStringPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.usedNames +---- 1 +[abcndwjbwesdsd,microsofthbbjuwgedsd] + +-CASE SetNodeListofListPropTest +-STATEMENT MATCH (a:person) WHERE a.ID=8 SET a.courseScoresPerTerm=[[10,20],[0,0,0]] +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=8 RETURN a.courseScoresPerTerm +---- 1 +[[10,20],[0,0,0]] + +-CASE SETMultiLabelNodePropTest +-STATEMENT MATCH (a) WHERE a.ID < 2 SET a.age = 1; +---- ok +-STATEMENT MATCH (a) WHERE a.ID < 2 RETURN a.ID, a.age; +---- 2 +0|1 +1| +-STATEMENT CREATE NODE TABLE play(ID INT64, name STRING, PRIMARY KEY(ID)); +---- ok +-STATEMENT CREATE (a:play {ID: 0, name: 'AA'}); +---- ok +-STATEMENT MATCH (a:organisation:play) RETURN a.ID, a.name; +---- 4 +0|AA +1|ABFsUni +4|CsWork +6|DEsWork +-STATEMENT MATCH (a:organisation:play) WHERE a.ID < 2 SET a.name = string(a.ID * 10); +---- ok +-STATEMENT MATCH (a:organisation:play) WHERE a.ID < 2 RETURN a.ID, a.name; +---- 2 +0|0 +1|10 + +-CASE SETMultiLabelWithPruning +-STATEMENT MATCH (a:person)-[:knows]->(b) WHERE a.ID=0 SET b.name = "a", b.fName = "XX" RETURN b.name, b.fName; +---- 3 +|XX +|XX +|XX +-STATEMENT MATCH (b) RETURN b.name, b.fName +---- 14 +ABFsUni| +CsWork| +DEsWork| +Roma| +Sóló cón tu párejâ| +The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie| +|Alice +|Elizabeth +|Farooq +|Greg +|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff +|XX +|XX +|XX +-STATEMENT MATCH (a:person)-[e:knows|:studyAt]->(b:person) WHERE a.ID=0 SET e.year = 2023, e.date = date("2023-11-11") RETURN e.year, e.date; +---- 3 +|2023-11-11 +|2023-11-11 +|2023-11-11 +-STATEMENT MATCH (a:person)-[e:knows|:studyAt]->(b:person:organisation) WHERE a.ID=0 RETURN e.year, e.date; +---- 4 +2021| +|2023-11-11 +|2023-11-11 +|2023-11-11 + +-CASE SetNonNullValueWithWriteTransaction +-STATEMENT BEGIN TRANSACTION +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age +---- 1 +35 +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.age=70 +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age +---- 1 +70 + +-CASE SetNullValueWithWriteTransaction +-STATEMENT BEGIN TRANSACTION +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age +---- 1 +35 +-STATEMENT MATCH (a:person) WHERE a.ID=0 SET a.age=NULL +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=0 RETURN a.age +---- 1 + +-CASE MultipleSetListValue +-STATEMENT BEGIN TRANSACTION +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=10 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=5 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=2 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=3 SET a.usedNames=['abcndwjbwesdsd','microsofthbbjuwgedsd'] +---- ok +-STATEMENT COMMIT +---- ok +-STATEMENT MATCH (a:person) WHERE a.ID=10 RETURN a.usedNames +---- 1 +[abcndwjbwesdsd,microsofthbbjuwgedsd] +-STATEMENT MATCH (a:person) WHERE a.ID=5 RETURN a.usedNames +---- 1 +[abcndwjbwesdsd,microsofthbbjuwgedsd] + + +-CASE OptionalSET +-STATEMENT OPTIONAL MATCH (a:person) WHERE a.ID > 100 SET a.fName = 'a' RETURN a.fName; +---- 1 + +-STATEMENT MATCH (a:person) WHERE a.ID < 3 RETURN a.fName; +---- 2 +Alice +Bob +-STATEMENT OPTIONAL MATCH (a) WHERE a.ID > 100 SET a.name, a.fName; +----1 + +-STATEMENT MATCH (a) WHERE a.ID < 3 RETURN a.name, a.fName; +---- 3 +ABFsUni| +|Alice +|Bob diff --git a/test/test_files/update_rel/create_empty.test b/test/test_files/update_rel/create_empty.test index 0703d6d67e2..b8f1f9d31a1 100644 --- a/test/test_files/update_rel/create_empty.test +++ b/test/test_files/update_rel/create_empty.test @@ -182,6 +182,9 @@ ---- ok -STATEMENT MATCH (n1:N1), (n2:N2) WHERE n1.ID=10 AND n2.ID=12 CREATE (n1)-[r:Rel1]->(n2) ---- ok +-STATEMENT MATCH (n1:N1)-[r:Rel1]->(n2:N2) WHERE n1.ID=10 AND n2.ID=12 RETURN r +---- 1 +(0:0)-{_LABEL: Rel1, _ID: 0:0}->(1:0) -STATEMENT MATCH (n1:N1)-[r:Rel1]->(n2:N2) WHERE n1.ID=10 AND n2.ID=12 DELETE r ---- ok -STATEMENT MATCH (n:N1)-[r:Rel1]->(m:N2) RETURN n.ID, m.ID