From 2c18ab38e138faef6d4c6faee8fb0bb0e8c4dd1e Mon Sep 17 00:00:00 2001 From: Guodong Jin Date: Sun, 12 Mar 2023 22:28:48 -0400 Subject: [PATCH] single pool for bm and mm --- CMakeLists.txt | 1 + src/binder/bind/bind_ddl.cpp | 2 +- src/common/in_mem_overflow_buffer.cpp | 6 +- src/include/common/constants.h | 46 +- src/include/common/in_mem_overflow_buffer.h | 22 +- src/include/common/types/types.h | 3 + src/include/main/database.h | 8 +- .../processor/result/factorized_table.h | 14 +- .../storage/buffer_manager/bm_file_handle.h | 121 + .../buffer_managed_file_handle.h | 73 - .../storage/buffer_manager/buffer_manager.h | 196 +- .../storage/buffer_manager/buffer_pool.h | 141 - .../storage/buffer_manager/memory_manager.h | 67 +- .../storage/buffer_manager/vm_region.h | 46 + src/include/storage/file_handle.h | 4 +- src/include/storage/index/hash_index.h | 6 +- .../storage/storage_structure/column.h | 4 +- .../storage/storage_structure/disk_array.h | 8 +- .../storage_structure/disk_overflow_file.h | 8 +- .../storage_structure/lists/list_headers.h | 2 +- .../storage_structure/lists/lists_metadata.h | 2 +- .../lists/lists_update_iterator.h | 2 +- .../storage_structure/storage_structure.h | 8 +- .../storage_structure_utils.h | 31 +- src/include/storage/wal/wal.h | 8 +- src/include/storage/wal_replayer.h | 8 +- src/main/database.cpp | 16 +- .../aggregate/aggregate_hash_table.cpp | 4 +- .../operator/hash_join/join_hash_table.cpp | 2 +- .../operator/order_by/key_block_merger.cpp | 4 +- .../order_by/order_by_key_encoder.cpp | 4 +- src/processor/result/factorized_table.cpp | 14 +- src/storage/buffer_manager/CMakeLists.txt | 4 +- ...ged_file_handle.cpp => bm_file_handle.cpp} | 127 +- src/storage/buffer_manager/buffer_manager.cpp | 271 +- src/storage/buffer_manager/buffer_pool.cpp | 292 -- src/storage/buffer_manager/memory_manager.cpp | 38 +- src/storage/buffer_manager/vm_region.cpp | 52 + src/storage/copy_arrow/copy_node_arrow.cpp | 2 +- src/storage/copy_arrow/copy_rel_arrow.cpp | 4 +- .../copy_arrow/copy_structures_arrow.cpp | 4 +- src/storage/index/hash_index.cpp | 5 +- src/storage/storage_manager.cpp | 1 - src/storage/storage_structure/column.cpp | 6 +- src/storage/storage_structure/disk_array.cpp | 60 +- .../storage_structure/disk_overflow_file.cpp | 28 +- src/storage/storage_structure/in_mem_file.cpp | 15 +- src/storage/storage_structure/in_mem_page.cpp | 2 +- .../storage_structure/lists/list_headers.cpp | 4 +- src/storage/storage_structure/lists/lists.cpp | 16 +- .../lists/lists_metadata.cpp | 5 +- .../storage_structure/storage_structure.cpp | 2 +- .../storage_structure_utils.cpp | 34 +- src/storage/storage_utils.cpp | 4 +- src/storage/wal/wal.cpp | 12 +- src/storage/wal_replayer.cpp | 16 +- test/include/graph_test/graph_test.h | 9 +- .../main_test_helper/main_test_helper.h | 4 +- .../order_by/key_block_merger_test.cpp | 9 +- .../order_by/order_by_key_encoder_test.cpp | 15 +- test/processor/order_by/radix_sort_test.cpp | 9 +- test/runner/e2e_ddl_test.cpp | 7 +- test/runner/e2e_set_transaction_test.cpp | 2 +- test/storage/CMakeLists.txt | 1 - test/storage/buffer_manager_test.cpp | 53 - test/storage/wal_test.cpp | 7 +- test/transaction/transaction_manager_test.cpp | 7 +- test/transaction/transaction_test.cpp | 2 +- third_party/concurrentqueue/LICENSE.md | 62 + .../concurrentqueue/blockingconcurrentqueue.h | 582 +++ third_party/concurrentqueue/concurrentqueue.h | 3747 +++++++++++++++++ .../concurrentqueue/lightweightsemaphore.h | 425 ++ tools/python_api/src_cpp/py_database.cpp | 5 +- 73 files changed, 5770 insertions(+), 1061 deletions(-) create mode 100644 src/include/storage/buffer_manager/bm_file_handle.h delete mode 100644 src/include/storage/buffer_manager/buffer_managed_file_handle.h delete mode 100644 src/include/storage/buffer_manager/buffer_pool.h create mode 100644 src/include/storage/buffer_manager/vm_region.h rename src/storage/buffer_manager/{buffer_managed_file_handle.cpp => bm_file_handle.cpp} (60%) delete mode 100644 src/storage/buffer_manager/buffer_pool.cpp create mode 100644 src/storage/buffer_manager/vm_region.cpp delete mode 100644 test/storage/buffer_manager_test.cpp create mode 100644 third_party/concurrentqueue/LICENSE.md create mode 100644 third_party/concurrentqueue/blockingconcurrentqueue.h create mode 100644 third_party/concurrentqueue/concurrentqueue.h create mode 100644 third_party/concurrentqueue/lightweightsemaphore.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ae71c3f76..83407202d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,6 +100,7 @@ include_directories(third_party/nlohmann_json) include_directories(third_party/utf8proc/include) include_directories(third_party/pybind11/include) include_directories(third_party/re2/include) +include_directories(third_party/concurrentqueue) add_subdirectory(third_party) add_subdirectory(src) diff --git a/src/binder/bind/bind_ddl.cpp b/src/binder/bind/bind_ddl.cpp index 47dd0973a1..9e026ad4c5 100644 --- a/src/binder/bind/bind_ddl.cpp +++ b/src/binder/bind/bind_ddl.cpp @@ -193,7 +193,7 @@ DataType Binder::bindDataType(const std::string& dataType) { "The number of elements in a fixed list must be greater than 0. Given: " + std::to_string(boundType.fixedNumElementsInList) + "."); } - if (Types::getDataTypeSize(boundType) > common::BufferPoolConstants::DEFAULT_PAGE_SIZE) { + if (Types::getDataTypeSize(boundType) > common::BufferPoolConstants::PAGE_4KB_SIZE) { throw common::BinderException("The size of fixed list is larger than a " "DEFAULT_PAGE_SIZE, which is not supported yet."); } diff --git a/src/common/in_mem_overflow_buffer.cpp b/src/common/in_mem_overflow_buffer.cpp index 96ddcdcd4e..61eb4d6407 100644 --- a/src/common/in_mem_overflow_buffer.cpp +++ b/src/common/in_mem_overflow_buffer.cpp @@ -4,18 +4,18 @@ namespace kuzu { namespace common { uint8_t* InMemOverflowBuffer::allocateSpace(uint64_t size) { - assert(size <= BufferPoolConstants::LARGE_PAGE_SIZE); + assert(size <= BufferPoolConstants::PAGE_256KB_SIZE); if (requireNewBlock(size)) { allocateNewBlock(); } - auto data = currentBlock->block->data + currentBlock->currentOffset; + auto data = currentBlock->block->buffer + currentBlock->currentOffset; currentBlock->currentOffset += size; return data; } void InMemOverflowBuffer::allocateNewBlock() { auto newBlock = make_unique( - memoryManager->allocateBlock(false /* do not initialize to zero */)); + memoryManager->allocateBuffer(false /* do not initialize to zero */)); currentBlock = newBlock.get(); blocks.push_back(std::move(newBlock)); } diff --git a/src/include/common/constants.h b/src/include/common/constants.h index f34d4f94de..7de0953419 100644 --- a/src/include/common/constants.h +++ b/src/include/common/constants.h @@ -21,31 +21,40 @@ constexpr uint64_t DEFAULT_CHECKPOINT_WAIT_TIMEOUT_FOR_TRANSACTIONS_TO_LEAVE_IN_ const std::string INTERNAL_ID_SUFFIX = "_id"; +enum PageSizeClass : uint8_t { + PAGE_4KB = 0, + PAGE_256KB = 1, +}; + // Currently the system supports files with 2 different pages size, which we refer to as -// DEFAULT_PAGE_SIZE and LARGE_PAGE_SIZE. Default size of the page which is the unit of read/write -// to the database files, such as to store columns or lists. For now, this value cannot be changed. -// But technically it can change from 2^12 to 2^16. 2^12 lower bound is assuming the OS page size is -// 4K. 2^16 is because currently we leave 11 fixed number of bits for relOffInPage and the maximum -// number of bytes needed for an edge is 20 bytes so 11 + log_2(20) = 15.xxx, so certainly over -// 2^16-size pages, we cannot utilize the page for storing adjacency lists. +// PAGE_4KB_SIZE and PAGE_256KB_SIZE. PAGE_4KB_SIZE is the default size of the page which is the +// unit of read/write to the database files, such as to store columns or lists. For now, this value +// cannot be changed. But technically it can change from 2^12 to 2^16. 2^12 lower bound is assuming +// the OS page size is 4K. 2^16 is because currently we leave 11 fixed number of bits for +// relOffInPage and the maximum number of bytes needed for an edge is 20 bytes so 11 + log_2(20) +// = 15.xxx, so certainly over 2^16-size pages, we cannot utilize the page for storing adjacency +// lists. struct BufferPoolConstants { - static constexpr uint64_t DEFAULT_PAGE_SIZE_LOG_2 = 12; - static constexpr uint64_t DEFAULT_PAGE_SIZE = 1 << DEFAULT_PAGE_SIZE_LOG_2; + static constexpr uint64_t PAGE_4KB_SIZE_LOG2 = 12; + static constexpr uint64_t PAGE_4KB_SIZE = (std::uint64_t)1 << PAGE_4KB_SIZE_LOG2; // Page size for files with large pages, e.g., temporary files that are used by operators that // may require large amounts of memory. - static constexpr uint64_t LARGE_PAGE_SIZE_LOG_2 = 18; - static constexpr uint64_t LARGE_PAGE_SIZE = 1 << LARGE_PAGE_SIZE_LOG_2; + static constexpr uint64_t PAGE_256KB_SIZE_LOG2 = 18; + static constexpr uint64_t PAGE_256KB_SIZE = (std::uint64_t)1 << PAGE_256KB_SIZE_LOG2; + // If a user does not specify a max size for BM, we by default set the max size of BM to + // maxPhyMemSize * DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM. + static constexpr double DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM = 0.8; + // For each PURGE_EVICTION_QUEUE_INTERVAL candidates added to the eviction queue, we will call + // `removeNonEvictableCandidates` to remove candidates that are not evictable. See + // `EvictionQueue::removeNonEvictableCandidates()` for more details. + static constexpr uint64_t EVICTION_QUEUE_PURGING_INTERVAL = 1024; + // The default max size for a VMRegion. + static constexpr uint64_t DEFAULT_VM_REGION_MAX_SIZE = (uint64_t)1 << 45; // (32TB) + + static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 26; // (64MB) }; struct StorageConstants { - // The default amount of memory pre-allocated to both the default and large pages buffer pool. - static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE = 1ull << 30; // (1GB) - static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 27; // (128MB) - // The default ratio of system memory allocated to buffer pools (including default and large). - static constexpr double DEFAULT_BUFFER_POOL_RATIO = 0.8; - // The default ratio of buffer allocated to default and large pages. - static constexpr double DEFAULT_PAGES_BUFFER_RATIO = 0.75; - static constexpr double LARGE_PAGES_BUFFER_RATIO = 1.0 - DEFAULT_PAGES_BUFFER_RATIO; static constexpr char OVERFLOW_FILE_SUFFIX[] = ".ovf"; static constexpr char COLUMN_FILE_SUFFIX[] = ".col"; static constexpr char LISTS_FILE_SUFFIX[] = ".lists"; @@ -63,6 +72,7 @@ struct StorageConstants { // The number of pages that we add at one time when we need to grow a file. static constexpr uint64_t PAGE_GROUP_SIZE_LOG2 = 10; static constexpr uint64_t PAGE_GROUP_SIZE = (uint64_t)1 << PAGE_GROUP_SIZE_LOG2; + static constexpr uint64_t PAGE_IDX_IN_GROUP_MASK = ((uint64_t)1 << PAGE_GROUP_SIZE_LOG2) - 1; }; struct ListsMetadataConstants { diff --git a/src/include/common/in_mem_overflow_buffer.h b/src/include/common/in_mem_overflow_buffer.h index cdd364737c..a7914685a5 100644 --- a/src/include/common/in_mem_overflow_buffer.h +++ b/src/include/common/in_mem_overflow_buffer.h @@ -10,13 +10,13 @@ namespace common { struct BufferBlock { public: - explicit BufferBlock(std::unique_ptr block) - : size{block->size}, currentOffset{0}, block{std::move(block)} {} + explicit BufferBlock(std::unique_ptr block) + : size{block->allocator->getPageSize()}, currentOffset{0}, block{std::move(block)} {} public: uint64_t size; uint64_t currentOffset; - std::unique_ptr block; + std::unique_ptr block; inline void resetCurrentOffset() { currentOffset = 0; } }; @@ -27,15 +27,6 @@ class InMemOverflowBuffer { explicit InMemOverflowBuffer(storage::MemoryManager* memoryManager) : memoryManager{memoryManager}, currentBlock{nullptr} {}; - // The blocks used are allocated through the MemoryManager but are backed by the - // BufferManager. We need to therefore release them back by calling - // memoryManager->freeBlock. - ~InMemOverflowBuffer() { - for (auto& block : blocks) { - memoryManager->freeBlock(block->block->pageIdx); - } - } - uint8_t* allocateSpace(uint64_t size); inline void merge(InMemOverflowBuffer& other) { @@ -54,9 +45,6 @@ class InMemOverflowBuffer { inline void resetBuffer() { if (!blocks.empty()) { auto firstBlock = std::move(blocks[0]); - for (auto i = 1u; i < blocks.size(); ++i) { - memoryManager->freeBlock(blocks[i]->block->pageIdx); - } blocks.clear(); firstBlock->resetCurrentOffset(); blocks.push_back(std::move(firstBlock)); @@ -68,10 +56,10 @@ class InMemOverflowBuffer { private: inline bool requireNewBlock(uint64_t sizeToAllocate) { - if (sizeToAllocate > BufferPoolConstants::LARGE_PAGE_SIZE) { + if (sizeToAllocate > BufferPoolConstants::PAGE_256KB_SIZE) { throw RuntimeException("Require size " + std::to_string(sizeToAllocate) + " greater than single block size " + - std::to_string(BufferPoolConstants::LARGE_PAGE_SIZE) + "."); + std::to_string(BufferPoolConstants::PAGE_256KB_SIZE) + "."); } return currentBlock == nullptr || (currentBlock->currentOffset + sizeToAllocate) > currentBlock->size; diff --git a/src/include/common/types/types.h b/src/include/common/types/types.h index c439e468a3..7e1f43fd36 100644 --- a/src/include/common/types/types.h +++ b/src/include/common/types/types.h @@ -16,8 +16,11 @@ namespace common { using sel_t = uint16_t; using hash_t = uint64_t; using page_idx_t = uint32_t; +using frame_idx_t = page_idx_t; using page_offset_t = uint32_t; constexpr page_idx_t PAGE_IDX_MAX = UINT32_MAX; +using page_group_idx_t = uint32_t; +using frame_group_idx_t = page_group_idx_t; using list_header_t = uint32_t; using property_id_t = uint32_t; constexpr property_id_t INVALID_PROPERTY_ID = UINT32_MAX; diff --git a/src/include/main/database.h b/src/include/main/database.h index 0d49545def..97cc8116fe 100644 --- a/src/include/main/database.h +++ b/src/include/main/database.h @@ -20,15 +20,11 @@ KUZU_API struct SystemConfig { explicit SystemConfig(); /** * @brief Creates a SystemConfig object. - * @param bufferPoolSize Buffer pool size in bytes. - * @note Currently, we have two internal buffer pools with different frame size of 4KB and - * 256KB. When a user sets a customized buffer pool size, it is divided into two internal pools - * based on the DEFAULT_PAGES_BUFFER_RATIO and LARGE_PAGES_BUFFER_RATIO. + * @param bufferPoolSize Max size of the buffer pool in bytes. */ explicit SystemConfig(uint64_t bufferPoolSize); - uint64_t defaultPageBufferPoolSize; - uint64_t largePageBufferPoolSize; + uint64_t bufferPoolSize; uint64_t maxNumThreads; }; diff --git a/src/include/processor/result/factorized_table.h b/src/include/processor/result/factorized_table.h index 8d842189d1..0af4a74e7c 100644 --- a/src/include/processor/result/factorized_table.h +++ b/src/include/processor/result/factorized_table.h @@ -33,21 +33,19 @@ class DataBlock { public: explicit DataBlock(storage::MemoryManager* memoryManager) : numTuples{0}, memoryManager{memoryManager} { - block = memoryManager->allocateBlock(true); - freeSize = block->size; + block = memoryManager->allocateBuffer(true /* initializeToZero */); + freeSize = block->allocator->getPageSize(); } DataBlock(DataBlock&& other) = default; - ~DataBlock() { memoryManager->freeBlock(block->pageIdx); } - - inline uint8_t* getData() const { return block->data; } + inline uint8_t* getData() const { return block->buffer; } inline void resetNumTuplesAndFreeSize() { - freeSize = common::BufferPoolConstants::LARGE_PAGE_SIZE; + freeSize = common::BufferPoolConstants::PAGE_256KB_SIZE; numTuples = 0; } inline void resetToZero() { - memset(block->data, 0, common::BufferPoolConstants::LARGE_PAGE_SIZE); + memset(block->buffer, 0, common::BufferPoolConstants::PAGE_256KB_SIZE); } static void copyTuples(DataBlock* blockToCopyFrom, ft_tuple_idx_t tupleIdxToCopyFrom, @@ -60,7 +58,7 @@ class DataBlock { storage::MemoryManager* memoryManager; private: - std::unique_ptr block; + std::unique_ptr block; }; class DataBlockCollection { diff --git a/src/include/storage/buffer_manager/bm_file_handle.h b/src/include/storage/buffer_manager/bm_file_handle.h new file mode 100644 index 0000000000..fb6e5f752d --- /dev/null +++ b/src/include/storage/buffer_manager/bm_file_handle.h @@ -0,0 +1,121 @@ +#pragma once + +#include "storage/buffer_manager/vm_region.h" +#include "storage/file_handle.h" + +namespace kuzu { +namespace storage { + +static constexpr uint64_t IS_IN_FRAME_MASK = 0x8000000000000000; +static constexpr uint64_t DIRTY_MASK = 0x4000000000000000; +static constexpr uint64_t PAGE_IDX_MASK = 0x3FFFFFFFFFFFFFFF; + +enum class LockMode : uint8_t { SPIN = 0, NON_BLOCKING = 1 }; + +class BMFileHandle; +class BufferManager; + +// Keeps the state information of a page in a file. +class PageState { +public: + inline bool isInFrame() const { return pageIdx & IS_IN_FRAME_MASK; } + inline void setDirty() { pageIdx |= DIRTY_MASK; } + inline void clearDirty() { pageIdx &= ~DIRTY_MASK; } + inline bool isDirty() const { return pageIdx & DIRTY_MASK; } + inline common::page_idx_t getPageIdx() const { + return (common::page_idx_t)(pageIdx & PAGE_IDX_MASK); + } + inline uint64_t incrementPinCount() { return pinCount.fetch_add(1); } + inline uint64_t decrementPinCount() { return pinCount.fetch_sub(1); } + inline void setPinCount(uint64_t newPinCount) { pinCount.store(newPinCount); } + inline uint64_t getPinCount() const { return pinCount.load(); } + inline uint64_t getEvictionTimestamp() const { return evictionTimestamp.load(); } + inline uint64_t incrementEvictionTimestamp() { return evictionTimestamp.fetch_add(1); } + inline void releaseLock() { lock.clear(); } + + bool acquireLock(LockMode lockMode); + void setInFrame(common::page_idx_t pageIdx); + void resetState(); + +private: + std::atomic_flag lock = ATOMIC_FLAG_INIT; + // Highest 1st bit indicates if this page is loaded or not, 2nd bit indicates if this + // page is dirty or not. The rest 62 bits records the page idx inside the file. + uint64_t pageIdx = 0; + std::atomic pinCount = 0; + std::atomic evictionTimestamp = 0; +}; + +// BMFileHandle is a file handle that is backed by BufferManager. It holds the state of +// each page in the file. File Handle is the bridge between a Column/Lists/Index and the Buffer +// Manager that abstracts the file in which that Column/Lists/Index is stored. +// BMFileHandle supports two types of files: versioned and non-versioned. Versioned files +// contains mapping from pages that have updates to the versioned pages in the wal file. +// Currently, only MemoryManager and WAL files are non-versioned. +class BMFileHandle : public FileHandle { +public: + enum class FileVersionedType : uint8_t { + VERSIONED_FILE = 0, // The file is backed by versioned pages in wal file. + NON_VERSIONED_FILE = 1 // The file does not have any versioned pages in wal file. + }; + + BMFileHandle(const std::string& path, uint8_t flags, BufferManager* bm, + common::PageSizeClass pageSizeClass, FileVersionedType fileVersionedType); + + void createPageVersionGroupIfNecessary(common::page_idx_t pageIdx); + + // This function is intended to be used after a fileInfo is created and we want the file + // to have not pages and page locks. Should be called after ensuring that the buffer manager + // does not hold any of the pages of the file. + void resetToZeroPagesAndPageCapacity(); + void removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx); + + bool hasWALPageVersionNoPageLock(common::page_idx_t pageIdx); + void clearWALPageVersionIfNecessary(common::page_idx_t pageIdx); + common::page_idx_t getWALPageVersionNoPageLock(common::page_idx_t pageIdx); + void setWALPageVersion(common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL); + void setWALPageVersionNoLock(common::page_idx_t pageIdx, common::page_idx_t pageVersion); + + inline bool acquirePageLock(common::page_idx_t pageIdx, LockMode lockMode) { + return getPageState(pageIdx)->acquireLock(lockMode); + } + inline void releasePageLock(common::page_idx_t pageIdx) { + getPageState(pageIdx)->releaseLock(); + } + inline PageState* getPageState(common::page_idx_t pageIdx) { + assert(pageIdx < numPages && pageStates[pageIdx]); + return pageStates[pageIdx].get(); + } + inline void clearPageState(common::page_idx_t pageIdx) { + assert(pageIdx < numPages && pageStates[pageIdx]); + pageStates[pageIdx]->resetState(); + } + inline common::frame_idx_t getFrameIdx(common::page_idx_t pageIdx) { + assert(pageIdx < pageCapacity); + return (frameGroupIdxes[pageIdx >> common::StorageConstants::PAGE_GROUP_SIZE_LOG2] + << common::StorageConstants::PAGE_GROUP_SIZE_LOG2) | + (pageIdx & common::StorageConstants::PAGE_IDX_IN_GROUP_MASK); + } + inline common::PageSizeClass getPageSizeClass() const { return pageSizeClass; } + +private: + void initPageStatesAndGroups(); + common::page_idx_t addNewPageWithoutLock() override; + void addNewPageGroupWithoutLock(); + void removePageIdxAndTruncateIfNecessaryWithoutLock(common::page_idx_t pageIdxToRemove); + inline common::page_group_idx_t getNumPageGroups() { + return ceil((double)numPages / common::StorageConstants::PAGE_GROUP_SIZE); + } + +private: + FileVersionedType fileVersionedType; + BufferManager* bm; + common::PageSizeClass pageSizeClass; + std::vector> pageStates; + // Each file page group corresponds to a frame group in the VMRegion. + std::vector frameGroupIdxes; + std::vector> pageVersions; + std::vector> pageGroupLocks; +}; +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/buffer_manager/buffer_managed_file_handle.h b/src/include/storage/buffer_manager/buffer_managed_file_handle.h deleted file mode 100644 index 401d1069ed..0000000000 --- a/src/include/storage/buffer_manager/buffer_managed_file_handle.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once - -#include "storage/file_handle.h" - -namespace kuzu { -namespace storage { - -// BufferManagedFileHandle is a file handle that is backed by BufferManager. It holds the state of -// each in the file. File Handle is the bridge between a Column/Lists/Index and the Buffer Manager -// that abstracts the file in which that Column/Lists/Index is stored. -// BufferManagedFileHandle supports two types of files: versioned and non-versioned. Versioned files -// contains mapping from pages that have updates to the versioned pages in the wal file. -// Currently, only MemoryManager and WAL files are non-versioned. -class BufferManagedFileHandle : public FileHandle { -public: - enum class FileVersionedType : uint8_t { - VERSIONED_FILE = 0, // The file is backed by versioned pages in wal file. - NON_VERSIONED_FILE = 1 // The file does not have any versioned pages in wal file. - }; - - BufferManagedFileHandle( - const std::string& path, uint8_t flags, FileVersionedType fileVersionedType); - - bool acquirePageLock(common::page_idx_t pageIdx, bool block); - inline void releasePageLock(common::page_idx_t pageIdx) { pageLocks[pageIdx]->clear(); } - - void createPageVersionGroupIfNecessary(common::page_idx_t pageIdx); - - // This function is intended to be used after a fileInfo is created and we want the file - // to have not pages and page locks. Should be called after ensuring that the buffer manager - // does not hold any of the pages of the file. - void resetToZeroPagesAndPageCapacity(); - void removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx); - - bool hasWALPageVersionNoPageLock(common::page_idx_t pageIdx); - void clearWALPageVersionIfNecessary(common::page_idx_t pageIdx); - common::page_idx_t getWALPageVersionNoPageLock(common::page_idx_t pageIdx); - void setWALPageVersion(common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL); - void setWALPageVersionNoLock(common::page_idx_t pageIdx, common::page_idx_t pageVersion); - - inline common::page_idx_t getFrameIdx(common::page_idx_t pageIdx) { - return pageIdxToFrameMap[pageIdx]->load(); - } - inline void swizzle(common::page_idx_t pageIdx, common::page_idx_t swizzledVal) { - pageIdxToFrameMap[pageIdx]->store(swizzledVal); - } - inline void unswizzle(common::page_idx_t pageIdx) { - pageIdxToFrameMap[pageIdx]->store(UINT32_MAX); - } - - static inline bool isAFrame(common::page_idx_t mappedFrameIdx) { - return UINT32_MAX != mappedFrameIdx; - } - -private: - void initPageIdxToFrameMapAndLocks(); - common::page_idx_t addNewPageWithoutLock() override; - bool acquire(common::page_idx_t pageIdx); - void removePageIdxAndTruncateIfNecessaryWithoutLock(common::page_idx_t pageIdxToRemove); - void resizePageGroupLocksAndPageVersionsWithoutLock(); - uint32_t getNumPageGroups() { - return ceil((double)numPages / common::StorageConstants::PAGE_GROUP_SIZE); - } - -private: - FileVersionedType fileVersionedType; - std::vector> pageLocks; - std::vector>> pageIdxToFrameMap; - std::vector> pageVersions; - std::vector> pageGroupLocks; -}; -} // namespace storage -} // namespace kuzu diff --git a/src/include/storage/buffer_manager/buffer_manager.h b/src/include/storage/buffer_manager/buffer_manager.h index 7ae12417b9..8b3079dfa1 100644 --- a/src/include/storage/buffer_manager/buffer_manager.h +++ b/src/include/storage/buffer_manager/buffer_manager.h @@ -1,11 +1,9 @@ #pragma once -#include #include -#include "common/metric.h" -#include "storage/buffer_manager/buffer_managed_file_handle.h" -#include "storage/buffer_manager/buffer_pool.h" +#include "concurrentqueue.h" +#include "storage/buffer_manager/bm_file_handle.h" namespace spdlog { class logger; @@ -14,97 +12,155 @@ class logger; namespace kuzu { namespace storage { +struct EvictionCandidate { + bool isEvictable() const { + return pageState->getEvictionTimestamp() == evictionTimestamp && + pageState->getPinCount() == 0; + } + + BMFileHandle* fileHandle; + PageState* pageState; + // The eviction timestamp of the corresponding page state at the time the candidate is enqueued. + uint64_t evictionTimestamp = -1u; +}; + +class EvictionQueue { +public: + EvictionQueue() { queue = std::make_unique>(); } + + inline void enqueue( + BMFileHandle* fileHandle, PageState* frameHandle, uint64_t evictionTimestamp) { + queue->enqueue(EvictionCandidate{fileHandle, frameHandle, evictionTimestamp}); + } + inline bool dequeue(EvictionCandidate& candidate) { return queue->try_dequeue(candidate); } + void removeNonEvictableCandidates(); + +private: + std::unique_ptr> queue; +}; + /** + * The Buffer Manager (BM) is a centralized manager of database memory resources. + * It provides two main functionalities: + * 1) it provides the high-level functionality to pin() and unpin() the pages of the database files + * used by storage structures, such as the Column, Lists, or HashIndex in the storage layer, and + * operates via their BMFileHandle to read/write the page data into/out of one of the frames. + * 2) it supports the MemoryManager (MM) to allocate memory buffers that are not backed by + * any disk files. Similar to disk files, MM provides BMFileHandles backed by temp in-mem files to + * the BM to pin/unpin pages. Pin happens when MM tries to allocate a new memory buffer, and unpin + * happens when MM tries to reclaim a memory buffer. * - * The Buffer Manager (BM) is the cache of database file pages. It provides the high-level - * functionality of pin() and unpin() the pages of the database files used by the Column/Lists in - * the storage layer, and operates via their FileHandles to make the page data available in one of - * the frames. BM can also be used by any operator or other components of the system to acquire - * memory blocks and ensure that they do not acquire memory directly from the OS. Depending on how - * the user of the BM pins and unpins pages, operators can ensure either that the memory blocks they - * acquire are safely spilled to disk and read back or always kept in memory (see below.) + * Specifically, in BM's context, page is the basic management unit of data in a file. The file can + * be a disk file, such as a column file, or an in-mem file, such as an temp in-memory file kept by + * the MM. Frame is the basic management unit of data resides in a VMRegion, namely in a virtual + * memory space. Each page is uniquely mapped to a frame, and it can be cached into or evicted from + * the frame. See `VMRegion` for more details. * - * Currently the BM has internal BufferPools to cache pages of 2 size: DEFAULT_PAGE_SIZE and - * LARGE_PAGE_SIZE, both of which are defined in configs.h. We only have a mechanism to control the - * memory size of each BufferPool. So when the BM of the system is constructed, one pool of memory - * is allocated to cache files whose pages are of size DEFAULT_PAGE_SIZE, and a separate pool of - * memory is allocated to cache files whose pages are of size LARGE_PAGE_SIZE. Ideally we should - * move towards allocating a single pool of memory from which different size pages are allocated. - * The Umbra paper (http://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf) describes an - * mmap-based mechanism to do this (where the responsibility to handle memory fragmentation is - * delegated to the OS). + * When users unpin their pages, the BM might spill them to disk. The behavior of what is guaranteed + * to be kept in frame and what can be spilled to disk is directly determined by the pin/unpin + * calls of the users. * - * The BM uses CLOCK replacement policy to evict pages from frames, which is an approximate LRU - * policy that is based of FIFO-like operations. + * Also, BM provides some specialized functionalities: + * 1) it supports the caller to set pinned pages as dirty, which will be safely written back to disk + * when the pages are evicted; + * 2) it supports the caller to flush or remove pages from the BM; + * 3) it supports the caller to directly update the content of a frame. * - * All access to the BM is through a FileHandle. To use the BM to acquire in-memory blocks users can - * pin pages, which will then lead the BM to put these pages in memory, and then never unpin them - * and the BM will never spill those pages to disk. However *make sure to unpin these pages* - * eventually, otherwise this would be a form of internal memory leak. See InMemOverflowBuffer for - * an example, where this is done during the deconstruction of the InMemOverflowBuffer. + * All accesses to the BM are through a FileHandle. This design is to de-centralize the management + * of page states from the BM to each file handle itself. Thus each on-disk file should have a + * unique BMFileHandle, and MM also holds a unique BMFileHandle, which is backed by an temp in-mem + * file, for all memory buffer allocations * - * Users can also unpin their pages and then the BM might spill them to disk. The behavior of what - * is guaranteed to be kept in memory and what can be spilled to disk is directly determined by the - * pin/unpin calls of the users of BM. + * To start a Database, users need to specify the max size of the memory usage (`maxSize`) in BM. + * If users don't specify the value, the system will set maxSize to available physical mem * + * DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM (defined in constants.h). + * The BM relies on virtual memory regions mapped through `mmap` to anonymous address spaces. + * 1) For disk pages, BM allocates a virtual memory region of DEFAULT_VM_REGION_MAX_SIZE (defined in + * constants.h), which is usually much larger than `maxSize`, and is expected to be large enough to + * contain all disk pages. Each disk page in database files is directly mapped to a unique + * PAGE_4KB_SIZE frame in the region. + * 2) For each BMFileHandle backed by a temp in-mem file in MM, BM allocates a virtual memory region + * of `maxSize` for it. Each memory buffer is mapped to a unique PAGE_256KB_SIZE frame in that + * region. Both disk pages and memory buffers are all managed by the BM to make sure that actually + * used physical memory doesn't go beyond max size specified by users. Currently, the BM uses a + * queue based replacement policy and the MADV_DONTNEED hint to explicitly control evictions. See + * comments above `claimAFrame()` for more details. * - * BufferManager supports a special pin function called pinWithoutReadingFromFile. A caller can - * call the common::page_idx_t newPageIdx = fh::addNewPage() function on the FileHandle fh they - * have, and then call bm::pinWithoutReadingFromFile(fh, newPageIdx), and the BM will not try to - * read this page from the file (because the page has not yet been written). + * The design is inspired by vmcache in the paper "Virtual-Memory Assisted Buffer Management" + * (https://www.cs.cit.tum.de/fileadmin/w00cfj/dis/_my_direct_uploads/vmcache.pdf). + * We would also like to thank Fadhil Abubaker for doing the initial research and prototyping of + * Umbra's design in his CS 848 course project: + * https://github.com/fabubaker/kuzu/blob/umbra-bm/final_project_report.pdf. */ -class BufferManager { +class BufferManager { public: - BufferManager(uint64_t maxSizeForDefaultPagePool, uint64_t maxSizeForLargePagePool); + enum class PageReadPolicy : uint8_t { READ_PAGE = 0, DONT_READ_PAGE = 1 }; + + explicit BufferManager(uint64_t bufferPoolSize); ~BufferManager(); - uint8_t* pin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); + uint8_t* pin(BMFileHandle& fileHandle, common::page_idx_t pageIdx, + PageReadPolicy pageReadPolicy = PageReadPolicy::READ_PAGE); + uint8_t* pinWithoutAcquiringPageLock( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy); - // The caller should ensure that the given pageIdx is indeed a new page, so should not be read - // from disk - uint8_t* pinWithoutReadingFromFile( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); + void setPinnedPageDirty(BMFileHandle& fileHandle, common::page_idx_t pageIdx); - inline uint8_t* pinWithoutAcquiringPageLock( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile) { - return fileHandle.isLargePaged() ? bufferPoolLargePages->pinWithoutAcquiringPageLock( - fileHandle, pageIdx, doNotReadFromFile) : - bufferPoolDefaultPages->pinWithoutAcquiringPageLock( - fileHandle, pageIdx, doNotReadFromFile); - } + // The function assumes that the requested page is already pinned. + void unpin(BMFileHandle& fileHandle, common::page_idx_t pageIdx); + void unpinWithoutAcquiringPageLock(BMFileHandle& fileHandle, common::page_idx_t pageIdx); - void setPinnedPageDirty(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); + void removeFilePagesFromFrames(BMFileHandle& fileHandle); + void flushAllDirtyPagesInFrames(BMFileHandle& fileHandle); + void updateFrameIfPageIsInFrameWithoutLock( + BMFileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx); + void removePageFromFrameIfNecessary(BMFileHandle& fileHandle, common::page_idx_t pageIdx); - // The function assumes that the requested page is already pinned. - void unpin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); - inline void unpinWithoutAcquiringPageLock( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx) { - return fileHandle.isLargePaged() ? - bufferPoolLargePages->unpinWithoutAcquiringPageLock(fileHandle, pageIdx) : - bufferPoolDefaultPages->unpinWithoutAcquiringPageLock(fileHandle, pageIdx); + // For files that are managed by BM, their FileHandles should be created through this function. + inline std::unique_ptr getBMFileHandle(const std::string& filePath, uint8_t flags, + BMFileHandle::FileVersionedType fileVersionedType, + common::PageSizeClass pageSizeClass = common::PAGE_4KB) { + return std::make_unique( + filePath, flags, this, pageSizeClass, fileVersionedType); + } + inline common::frame_group_idx_t addNewFrameGroup(common::PageSizeClass pageSizeClass) { + return vmRegions[pageSizeClass]->addNewFrameGroup(); } - void removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle); +private: + bool claimAFrame( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy); + // Return number of bytes freed. + uint64_t tryEvictPage(EvictionCandidate& candidate); - void flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle); - void updateFrameIfPageIsInFrameWithoutPageOrFrameLock( - BufferManagedFileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx); + void cachePageIntoFrame( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy); + void flushIfDirtyWithoutLock(BMFileHandle& fileHandle, common::page_idx_t pageIdx); + void removePageFromFrame( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, bool shouldFlush); - void removePageFromFrameIfNecessary( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); + void addToEvictionQueue(BMFileHandle* fileHandle, PageState* pageState); - // Note: This function is not thread-safe. - // For files that are managed by BM, their FileHandles should be created through this function. - inline std::unique_ptr getBufferManagedFileHandle( - const std::string& filePath, uint8_t flags, - BufferManagedFileHandle::FileVersionedType fileVersionedType) { - return std::make_unique(filePath, flags, fileVersionedType); + inline uint64_t reserveUsedMemory(uint64_t size) { return usedMemory.fetch_add(size); } + inline uint64_t freeUsedMemory(uint64_t size) { return usedMemory.fetch_sub(size); } + + inline uint8_t* getFrame(BMFileHandle& fileHandle, common::page_idx_t pageIdx) { + return vmRegions[fileHandle.getPageSizeClass()]->getFrame(fileHandle.getFrameIdx(pageIdx)); + } + inline void releaseFrameForPage(BMFileHandle& fileHandle, common::page_idx_t pageIdx) { + vmRegions[fileHandle.getPageSizeClass()]->releaseFrame(fileHandle.getFrameIdx(pageIdx)); } private: std::shared_ptr logger; - std::unique_ptr bufferPoolDefaultPages; - std::unique_ptr bufferPoolLargePages; + std::atomic usedMemory; + std::atomic bufferPoolSize; + std::atomic numEvictionQueueInsertions; + // Each VMRegion corresponds to a virtual memory region of a specific page size. Currently, we + // hold two sizes of PAGE_4KB and PAGE_256KB. + std::vector> vmRegions; + std::unique_ptr evictionQueue; }; } // namespace storage diff --git a/src/include/storage/buffer_manager/buffer_pool.h b/src/include/storage/buffer_manager/buffer_pool.h deleted file mode 100644 index de03738afe..0000000000 --- a/src/include/storage/buffer_manager/buffer_pool.h +++ /dev/null @@ -1,141 +0,0 @@ -#pragma once - -#include -#include - -#include "common/metric.h" -#include "storage/buffer_manager/buffer_managed_file_handle.h" - -namespace spdlog { -class logger; -} - -namespace kuzu { -namespace storage { - -struct BufferManagerMetrics { - uint64_t numPins{0}; - // Number of pinning operations that required eviction from a Frame. - uint64_t numEvicts{0}; - // Number of failed tries to evict the page from a Frame. This is incremented if either the - // eviction routine fails to get the lock on the page that is in the Frame or the pinCount of - // the Frame has increased after taking the locks of Frame and page. - uint64_t numEvictFails{0}; - // Number of failed tried to evict the page frame a Frame because the Frame has been recently - // accessed and hence is given a second chance. - uint64_t numRecentlyAccessedWalkover{0}; - uint64_t numCacheHit{0}; - uint64_t numCacheMiss{0}; - uint64_t numDirtyPageWriteIO{0}; -}; - -// A frame is a unit of buffer space having a fixed size of 4KB, where a single file page is -// read from the disk. Frame also stores other metadata to locate and maintain this buffer in the -// Buffer Manager. -class Frame { - friend class BufferPool; - -public: - explicit Frame(common::page_offset_t pageSize, uint8_t* buffer); - ~Frame() noexcept(false); - -private: - void resetFrameWithoutLock(); - bool acquireFrameLock(bool block); - void releaseFrameLock() { frameLock.clear(); } - void setIsDirty(bool _isDirty) { isDirty = _isDirty; } - void releaseBuffer(); - -private: - // fileHandlePtr and pageIdx identify the file and the page in file whose data the buffer is - // maintaining. pageIdx of -1u means that the frame is empty, i.e. it has no data. - std::atomic fileHandlePtr; - std::atomic pageIdx; - std::atomic pinCount; - - bool recentlyAccessed; - bool isDirty; - uint8_t* buffer; - common::page_offset_t pageSize; - std::atomic_flag frameLock; -}; - -// The BufferPool is a cache of file pages of a fixed size. It provides the high-level functionality -// of pin() and unpin() pages of files in memory and operates via their FileHandles -// to make the page data available in one of the frames. It uses CLOCK replacement policy to evict -// pages from frames, which is an approximate LRU policy that is based of FIFO-like operations. -class BufferPool { - friend class BufferManager; - -public: - BufferPool(uint64_t pageSize, uint64_t maxSize); - - uint8_t* pin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); - - // Pins a new page that has been added to the file. This means that the BufferManager does not - // need to read the page from the file for now. Ensuring that the given pageIdx is new is the - // responsibility of the caller. - uint8_t* pinWithoutReadingFromFile( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); - - uint8_t* pinWithoutAcquiringPageLock( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile); - - void setPinnedPageDirty(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); - - // The function assumes that the requested page is already pinned. - void unpin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); - - void unpinWithoutAcquiringPageLock( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); - - // Note: These two functions that remove pages from frames is not designed for concurrency and - // therefore not tested under concurrency. If this is called while other threads are accessing - // the BM, it should work safely but this is not tested. - void removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle); - - void flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle); - void updateFrameIfPageIsInFrameWithoutPageOrFrameLock( - BufferManagedFileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx); - - void removePageFromFrameWithoutFlushingIfNecessary( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx); - -private: - uint8_t* pin( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile); - - common::page_idx_t claimAFrame( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile); - - bool fillEmptyFrame(common::page_idx_t frameIdx, BufferManagedFileHandle& fileHandle, - common::page_idx_t pageIdx, bool doNotReadFromFile); - - bool tryEvict(common::page_idx_t frameIdx, BufferManagedFileHandle& fileHandle, - common::page_idx_t pageIdx, bool doNotReadFromFile); - - void moveClockHand(uint64_t newClockHand); - // Performs 2 actions: - // 1) Clears the contents of the frame. - // 2) Unswizzles the pageIdx in the frame. - void clearFrameAndUnswizzleWithoutLock(const std::unique_ptr& frame, - BufferManagedFileHandle& fileHandleInFrame, common::page_idx_t pageIdxInFrame); - void readNewPageIntoFrame(Frame& frame, BufferManagedFileHandle& fileHandle, - common::page_idx_t pageIdx, bool doNotReadFromFile); - - void flushIfDirty(const std::unique_ptr& frame); - - void removePageFromFrame( - BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool shouldFlush); - -private: - std::shared_ptr logger; - uint64_t pageSize; - std::vector> bufferCache; - std::atomic clockHand; - common::page_idx_t numFrames; - BufferManagerMetrics bmMetrics; -}; - -} // namespace storage -} // namespace kuzu diff --git a/src/include/storage/buffer_manager/memory_manager.h b/src/include/storage/buffer_manager/memory_manager.h index 3d6208e650..bb10a960c9 100644 --- a/src/include/storage/buffer_manager/memory_manager.h +++ b/src/include/storage/buffer_manager/memory_manager.h @@ -11,42 +11,69 @@ namespace kuzu { namespace storage { -struct MemoryBlock { +class MemoryAllocator; +class MemoryBuffer { public: - explicit MemoryBlock(common::page_idx_t pageIdx, uint8_t* data) - : size(common::BufferPoolConstants::LARGE_PAGE_SIZE), pageIdx(pageIdx), data(data) {} + MemoryBuffer(MemoryAllocator* allocator, common::page_idx_t blockIdx, uint8_t* buffer); + ~MemoryBuffer(); public: - uint64_t size; + uint8_t* buffer; common::page_idx_t pageIdx; - uint8_t* data; + MemoryAllocator* allocator; }; -// Memory manager for allocating/reclaiming large intermediate memory blocks. It can allocate a -// memory block with fixed size of LARGE_PAGE_SIZE from the buffer manager. -class MemoryManager { +class MemoryAllocator { + friend class MemoryBuffer; + public: - explicit MemoryManager(BufferManager* bm) : bm(bm) { - // Because the memory manager only manages blocks in memory, this file should never be - // created, opened, or written to. It's a place_holder name. We keep the name for logging - // purposes. - fh = bm->getBufferManagedFileHandle("mm-place-holder-file-name", - FileHandle::O_IN_MEM_TEMP_FILE, - BufferManagedFileHandle::FileVersionedType::NON_VERSIONED_FILE); - } + explicit MemoryAllocator(BufferManager* bm); + ~MemoryAllocator(); - std::unique_ptr allocateBlock(bool initializeToZero = false); + std::unique_ptr allocateBuffer(bool initializeToZero = false); + inline common::page_offset_t getPageSize() const { return pageSize; } +private: void freeBlock(common::page_idx_t pageIdx); +private: + std::unique_ptr fh; + BufferManager* bm; + common::page_offset_t pageSize; + std::stack freePages; + std::mutex allocatorLock; +}; + +/* + * The Memory Manager (MM) is used for allocating/reclaiming intermediate memory blocks. + * It can allocate a memory buffer of size PAGE_256KB from the buffer manager backed by a + * BMFileHandle with temp in-mem file. + * + * Internally, MM uses a MemoryAllocator. The MemoryAllocator is holding the BMFileHandle backed by + * a temp in-mem file, and responsible for allocating/reclaiming memory buffers of its size class + * from the buffer manager. The MemoryAllocator keeps track of free pages in the BMFileHandle, so + * that it can reuse those freed pages without allocating new pages. The MemoryAllocator is + * thread-safe, so that multiple threads can allocate/reclaim memory blocks with the same size class + * at the same time. + * + * MM will return a MemoryBuffer to the caller, which is a wrapper of the allocated memory block, + * and it will automatically call its allocator to reclaim the memory block when it is destroyed. + */ +class MemoryManager { +public: + explicit MemoryManager(BufferManager* bm) : bm{bm} { + allocator = std::make_unique(bm); + } + + inline std::unique_ptr allocateBuffer(bool initializeToZero = false) { + return allocator->allocateBuffer(initializeToZero); + } inline BufferManager* getBufferManager() const { return bm; } private: - std::unique_ptr fh; BufferManager* bm; - std::stack freePages; - std::mutex memMgrLock; + std::unique_ptr allocator; }; } // namespace storage } // namespace kuzu diff --git a/src/include/storage/buffer_manager/vm_region.h b/src/include/storage/buffer_manager/vm_region.h new file mode 100644 index 0000000000..9a48db392f --- /dev/null +++ b/src/include/storage/buffer_manager/vm_region.h @@ -0,0 +1,46 @@ +#pragma once + +#include + +#include "common/constants.h" +#include "common/types/types.h" + +namespace kuzu { +namespace storage { + +// A VMRegion holds a virtual memory region of a certain size allocated through mmap. +// The region is divided into frame groups, each of which is a group of frames of the same size. +// Each BMFileHandle should grab a frame group each time when they add a new file page group (see +// `BMFileHandle::addNewPageGroupWithoutLock`). In this way, each file page group uniquely +// corresponds to a frame group, thus, a page also uniquely corresponds to a frame in a VMRegion. +class VMRegion { + friend class BufferManager; + +public: + explicit VMRegion(common::PageSizeClass pageSizeClass, uint64_t maxRegionSize); + ~VMRegion(); + + common::frame_group_idx_t addNewFrameGroup(); + + // Use `MADV_DONTNEED` to release physical memory associated with this frame. + void releaseFrame(common::frame_idx_t frameIdx); + + inline uint8_t* getFrame(common::frame_idx_t frameIdx) { + return region + ((std::uint64_t)frameIdx * frameSize); + } + +private: + inline uint64_t getMaxRegionSize() const { + return maxNumFrameGroups * frameSize * common::StorageConstants::PAGE_GROUP_SIZE; + } + +private: + std::mutex mtx; + uint8_t* region; + uint32_t frameSize; + uint64_t numFrameGroups; + uint64_t maxNumFrameGroups; +}; + +} // namespace storage +} // namespace kuzu diff --git a/src/include/storage/file_handle.h b/src/include/storage/file_handle.h index cd63b8a757..35d4981803 100644 --- a/src/include/storage/file_handle.h +++ b/src/include/storage/file_handle.h @@ -48,8 +48,8 @@ class FileHandle { inline common::page_idx_t getNumPages() const { return numPages; } inline common::FileInfo* getFileInfo() const { return fileInfo.get(); } inline uint64_t getPageSize() const { - return isLargePaged() ? common::BufferPoolConstants::LARGE_PAGE_SIZE : - common::BufferPoolConstants::DEFAULT_PAGE_SIZE; + return isLargePaged() ? common::BufferPoolConstants::PAGE_256KB_SIZE : + common::BufferPoolConstants::PAGE_4KB_SIZE; } protected: diff --git a/src/include/storage/index/hash_index.h b/src/include/storage/index/hash_index.h index 0658c301cb..d5347c1c5f 100644 --- a/src/include/storage/index/hash_index.h +++ b/src/include/storage/index/hash_index.h @@ -93,7 +93,7 @@ class HashIndex : public BaseHashIndex { void prepareCommitOrRollbackIfNecessary(bool isCommit); void checkpointInMemoryIfNecessary(); void rollbackInMemoryIfNecessary() const; - inline BufferManagedFileHandle* getFileHandle() const { return fileHandle.get(); } + inline BMFileHandle* getFileHandle() const { return fileHandle.get(); } private: template @@ -134,7 +134,7 @@ class HashIndex : public BaseHashIndex { StorageStructureIDAndFName storageStructureIDAndFName; BufferManager& bm; WAL* wal; - std::unique_ptr fileHandle; + std::unique_ptr fileHandle; std::unique_ptr> headerArray; std::unique_ptr>> pSlots; std::unique_ptr>> oSlots; @@ -196,7 +196,7 @@ class PrimaryKeyIndex { hashIndexForInt64->prepareCommitOrRollbackIfNecessary(isCommit) : hashIndexForString->prepareCommitOrRollbackIfNecessary(isCommit); } - inline BufferManagedFileHandle* getFileHandle() { + inline BMFileHandle* getFileHandle() { return keyDataTypeID == common::INT64 ? hashIndexForInt64->getFileHandle() : hashIndexForString->getFileHandle(); } diff --git a/src/include/storage/storage_structure/column.h b/src/include/storage/storage_structure/column.h index 2187297aa1..a3b9d16ae7 100644 --- a/src/include/storage/storage_structure/column.h +++ b/src/include/storage/storage_structure/column.h @@ -89,9 +89,7 @@ class PropertyColumnWithOverflow : public Column { } inline DiskOverflowFile* getDiskOverflowFile() { return &diskOverflowFile; } - inline BufferManagedFileHandle* getDiskOverflowFileHandle() { - return diskOverflowFile.getFileHandle(); - } + inline BMFileHandle* getDiskOverflowFileHandle() { return diskOverflowFile.getFileHandle(); } protected: DiskOverflowFile diskOverflowFile; diff --git a/src/include/storage/storage_structure/disk_array.h b/src/include/storage/storage_structure/disk_array.h index 98be02be8b..935a9e614c 100644 --- a/src/include/storage/storage_structure/disk_array.h +++ b/src/include/storage/storage_structure/disk_array.h @@ -2,7 +2,7 @@ #include "common/constants.h" #include "common/types/types.h" -#include "storage/buffer_manager/buffer_managed_file_handle.h" +#include "storage/buffer_manager/bm_file_handle.h" #include "storage/wal/wal.h" #include "storage_structure_utils.h" #include "transaction/transaction.h" @@ -13,7 +13,7 @@ namespace storage { class FileHandle; static constexpr uint64_t NUM_PAGE_IDXS_PER_PIP = - (common::BufferPoolConstants::DEFAULT_PAGE_SIZE - sizeof(common::page_idx_t)) / + (common::BufferPoolConstants::PAGE_4KB_SIZE - sizeof(common::page_idx_t)) / sizeof(common::page_idx_t); /** @@ -199,10 +199,10 @@ class BaseInMemDiskArray : public BaseDiskArray { protected: inline uint64_t addInMemoryArrayPage(bool setToZero) { inMemArrayPages.emplace_back( - std::make_unique(common::BufferPoolConstants::DEFAULT_PAGE_SIZE)); + std::make_unique(common::BufferPoolConstants::PAGE_4KB_SIZE)); if (setToZero) { memset(inMemArrayPages[inMemArrayPages.size() - 1].get(), 0, - common::BufferPoolConstants::DEFAULT_PAGE_SIZE); + common::BufferPoolConstants::PAGE_4KB_SIZE); } return inMemArrayPages.size() - 1; } diff --git a/src/include/storage/storage_structure/disk_overflow_file.h b/src/include/storage/storage_structure/disk_overflow_file.h index 0b858b2aa0..07aed6d498 100644 --- a/src/include/storage/storage_structure/disk_overflow_file.h +++ b/src/include/storage/storage_structure/disk_overflow_file.h @@ -24,7 +24,7 @@ class DiskOverflowFile : public StorageStructure { bufferManager, wal), loggedNewOverflowFileNextBytePosRecord{false} { nextBytePosToWriteTo = - fileHandle->getNumPages() * common::BufferPoolConstants::DEFAULT_PAGE_SIZE; + fileHandle->getNumPages() * common::BufferPoolConstants::PAGE_4KB_SIZE; } static inline StorageStructureIDAndFName constructOverflowStorageStructureIDAndFName( @@ -73,7 +73,7 @@ class DiskOverflowFile : public StorageStructure { private: struct OverflowPageCache { - BufferManagedFileHandle* bufferManagedFileHandle = nullptr; + BMFileHandle* bmFileHandle = nullptr; common::page_idx_t pageIdx = UINT32_MAX; uint8_t* frame = nullptr; }; @@ -89,8 +89,8 @@ class DiskOverflowFile : public StorageStructure { void setListRecursiveIfNestedWithoutLock(const common::ku_list_t& inMemSrcList, common::ku_list_t& diskDstList, const common::DataType& dataType); void logNewOverflowFileNextBytePosRecordIfNecessaryWithoutLock(); - void pinOverflowPageCache(BufferManagedFileHandle* bufferManagedFileHandleToPin, - common::page_idx_t pageIdxToPin, OverflowPageCache& overflowPageCache); + void pinOverflowPageCache(BMFileHandle* bmFileHandleToPin, common::page_idx_t pageIdxToPin, + OverflowPageCache& overflowPageCache); void unpinOverflowPageCache(OverflowPageCache& overflowPageCache); private: diff --git a/src/include/storage/storage_structure/lists/list_headers.h b/src/include/storage/storage_structure/lists/list_headers.h index adce582ac3..45e66e4833 100644 --- a/src/include/storage/storage_structure/lists/list_headers.h +++ b/src/include/storage/storage_structure/lists/list_headers.h @@ -135,7 +135,7 @@ class ListHeaders : public BaseListHeaders { std::unique_ptr> headersDiskArray; private: - std::unique_ptr fileHandle; + std::unique_ptr fileHandle; StorageStructureIDAndFName storageStructureIDAndFName; }; } // namespace storage diff --git a/src/include/storage/storage_structure/lists/lists_metadata.h b/src/include/storage/storage_structure/lists/lists_metadata.h index aecedc7b43..5a1f146deb 100644 --- a/src/include/storage/storage_structure/lists/lists_metadata.h +++ b/src/include/storage/storage_structure/lists/lists_metadata.h @@ -69,7 +69,7 @@ class ListsMetadata : public BaseListsMetadata { } private: - std::unique_ptr metadataVersionedFileHandle; + std::unique_ptr metadataVersionedFileHandle; StorageStructureIDAndFName storageStructureIDAndFName; // chunkToPageListHeadIdxMapBuilder holds pointers to the head of pageList of each chunk. // For instance, chunkToPageListHeadIdxMapBuilder[3] is a pointer in `pageLists` from where diff --git a/src/include/storage/storage_structure/lists/lists_update_iterator.h b/src/include/storage/storage_structure/lists/lists_update_iterator.h index cc77d805ca..704930a6d5 100644 --- a/src/include/storage/storage_structure/lists/lists_update_iterator.h +++ b/src/include/storage/storage_structure/lists/lists_update_iterator.h @@ -112,7 +112,7 @@ class AdjListsUpdateIterator : public ListsUpdateIterator { common::list_header_t oldHeader, uint64_t numElementsAfterInsertion) override { return ListHeaders::isALargeList(oldHeader) || numElementsAfterInsertion * lists->elementSize > - common::BufferPoolConstants::DEFAULT_PAGE_SIZE; + common::BufferPoolConstants::PAGE_4KB_SIZE; } }; diff --git a/src/include/storage/storage_structure/storage_structure.h b/src/include/storage/storage_structure/storage_structure.h index a601565a31..0b5e3e82d9 100644 --- a/src/include/storage/storage_structure/storage_structure.h +++ b/src/include/storage/storage_structure/storage_structure.h @@ -26,14 +26,14 @@ class StorageStructure { : logger{common::LoggerUtils::getLogger(common::LoggerConstants::LoggerEnum::STORAGE)}, storageStructureID{storageStructureIDAndFName.storageStructureID}, bufferManager{bufferManager}, wal{wal} { - fileHandle = bufferManager.getBufferManagedFileHandle(storageStructureIDAndFName.fName, + fileHandle = bufferManager.getBMFileHandle(storageStructureIDAndFName.fName, FileHandle::O_PERSISTENT_FILE_NO_CREATE, - BufferManagedFileHandle::FileVersionedType::VERSIONED_FILE); + BMFileHandle::FileVersionedType::VERSIONED_FILE); } virtual ~StorageStructure() = default; - inline BufferManagedFileHandle* getFileHandle() { return fileHandle.get(); } + inline BMFileHandle* getFileHandle() { return fileHandle.get(); } protected: void addNewPageToFileHandle(); @@ -50,7 +50,7 @@ class StorageStructure { protected: std::shared_ptr logger; StorageStructureID storageStructureID; - std::unique_ptr fileHandle; + std::unique_ptr fileHandle; BufferManager& bufferManager; WAL* wal; }; diff --git a/src/include/storage/storage_structure/storage_structure_utils.h b/src/include/storage/storage_structure/storage_structure_utils.h index f82f82cc05..c840e335c0 100644 --- a/src/include/storage/storage_structure/storage_structure_utils.h +++ b/src/include/storage/storage_structure/storage_structure_utils.h @@ -6,7 +6,7 @@ #include #include "common/types/types.h" -#include "storage/buffer_manager/buffer_managed_file_handle.h" +#include "storage/buffer_manager/bm_file_handle.h" #include "storage/buffer_manager/buffer_manager.h" #include "storage/wal/wal.h" #include "transaction/transaction.h" @@ -41,36 +41,33 @@ class StorageStructureUtils { constexpr static uint32_t NULL_CHUNK_OR_LARGE_LIST_HEAD_IDX = UINT32_MAX; public: - static std::pair - getFileHandleAndPhysicalPageIdxToPin(BufferManagedFileHandle& fileHandle, - common::page_idx_t physicalPageIdx, WAL& wal, transaction::TransactionType trxType); + static std::pair getFileHandleAndPhysicalPageIdxToPin( + BMFileHandle& fileHandle, common::page_idx_t physicalPageIdx, WAL& wal, + transaction::TransactionType trxType); static WALPageIdxAndFrame createWALVersionIfNecessaryAndPinPage( - common::page_idx_t originalPageIdx, bool insertingNewPage, - BufferManagedFileHandle& fileHandle, StorageStructureID storageStructureID, - BufferManager& bufferManager, WAL& wal); + common::page_idx_t originalPageIdx, bool insertingNewPage, BMFileHandle& fileHandle, + StorageStructureID storageStructureID, BufferManager& bufferManager, WAL& wal); - static void readWALVersionOfPage(BufferManagedFileHandle& fileHandle, - common::page_idx_t originalPageIdx, BufferManager& bufferManager, WAL& wal, - const std::function& readOp); + static void readWALVersionOfPage(BMFileHandle& fileHandle, common::page_idx_t originalPageIdx, + BufferManager& bufferManager, WAL& wal, const std::function& readOp); // Note: This function updates a page "transactionally", i.e., creates the WAL version of the // page if it doesn't exist. For the original page to be updated, the current WRITE trx needs to // commit and checkpoint. - static void updatePage(BufferManagedFileHandle& fileHandle, - StorageStructureID storageStructureID, common::page_idx_t originalPageIdx, - bool isInsertingNewPage, BufferManager& bufferManager, WAL& wal, - const std::function& updateOp); + static void updatePage(BMFileHandle& fileHandle, StorageStructureID storageStructureID, + common::page_idx_t originalPageIdx, bool isInsertingNewPage, BufferManager& bufferManager, + WAL& wal, const std::function& updateOp); // Unpins the WAL version of a page that was updated and releases the lock of the page (recall // we use the same lock to do operations on both the original and WAL versions of the page). static void unpinWALPageAndReleaseOriginalPageLock(WALPageIdxAndFrame& walPageIdxAndFrame, - BufferManagedFileHandle& fileHandle, BufferManager& bufferManager, WAL& wal); + BMFileHandle& fileHandle, BufferManager& bufferManager, WAL& wal); private: static void unpinPageIdxInWALAndReleaseOriginalPageLock(common::page_idx_t pageIdxInWAL, - common::page_idx_t originalPageIdx, BufferManagedFileHandle& fileHandle, - BufferManager& bufferManager, WAL& wal); + common::page_idx_t originalPageIdx, BMFileHandle& fileHandle, BufferManager& bufferManager, + WAL& wal); }; } // namespace storage } // namespace kuzu diff --git a/src/include/storage/wal/wal.h b/src/include/storage/wal/wal.h index 9b01f2bcee..313e870ef0 100644 --- a/src/include/storage/wal/wal.h +++ b/src/include/storage/wal/wal.h @@ -14,7 +14,7 @@ namespace kuzu { namespace storage { using lock_t = std::unique_lock; -constexpr uint64_t WAL_HEADER_PAGE_SIZE = common::BufferPoolConstants::DEFAULT_PAGE_SIZE; +constexpr uint64_t WAL_HEADER_PAGE_SIZE = common::BufferPoolConstants::PAGE_4KB_SIZE; constexpr uint64_t WAL_HEADER_PAGE_NUM_RECORDS_FIELD_SIZE = sizeof(uint64_t); constexpr uint64_t WAL_HEADER_PAGE_NEXT_HEADER_PAGE_IDX_FIELD_SIZE = sizeof(common::page_idx_t); constexpr uint64_t WAL_HEADER_PAGE_PREFIX_FIELD_SIZES = @@ -27,7 +27,7 @@ class BaseWALAndWALIterator { protected: BaseWALAndWALIterator() : BaseWALAndWALIterator(nullptr) {} - explicit BaseWALAndWALIterator(std::shared_ptr fileHandle) + explicit BaseWALAndWALIterator(std::shared_ptr fileHandle) : fileHandle{std::move(fileHandle)}, offsetInCurrentHeaderPage{INT64_MAX}, currentHeaderPageIdx{INT32_MAX} { currentHeaderPageBuffer = std::make_unique(WAL_HEADER_PAGE_SIZE); @@ -60,7 +60,7 @@ class BaseWALAndWALIterator { } public: - std::shared_ptr fileHandle; + std::shared_ptr fileHandle; // Used by WAL as the next offset to write and by WALIterator as the next offset to read uint64_t offsetInCurrentHeaderPage; // First header page of the WAL, if it exists, is always located at page 0 of the WAL. @@ -172,7 +172,7 @@ class WAL : public BaseWALAndWALIterator { class WALIterator : public BaseWALAndWALIterator { public: - explicit WALIterator(std::shared_ptr fileHandle, std::mutex& mtx); + explicit WALIterator(std::shared_ptr fileHandle, std::mutex& mtx); inline bool hasNextRecord() { lock_t lck{mtx}; diff --git a/src/include/storage/wal_replayer.h b/src/include/storage/wal_replayer.h index 7697dc27a3..99cbc21fae 100644 --- a/src/include/storage/wal_replayer.h +++ b/src/include/storage/wal_replayer.h @@ -32,9 +32,9 @@ class WALReplayer { void replayWALRecord(WALRecord& walRecord); void checkpointOrRollbackVersionedFileHandleAndBufferManager( const WALRecord& walRecord, const StorageStructureID& storageStructureID); - void truncateFileIfInsertion(BufferManagedFileHandle* fileHandle, - const PageUpdateOrInsertRecord& pageInsertOrUpdateRecord); - BufferManagedFileHandle* getVersionedFileHandleIfWALVersionAndBMShouldBeCleared( + void truncateFileIfInsertion( + BMFileHandle* fileHandle, const PageUpdateOrInsertRecord& pageInsertOrUpdateRecord); + BMFileHandle* getVersionedFileHandleIfWALVersionAndBMShouldBeCleared( const StorageStructureID& storageStructureID); std::unique_ptr getCatalogForRecovery(common::DBFileType dbFileType); @@ -46,7 +46,7 @@ class WALReplayer { StorageManager* storageManager; BufferManager* bufferManager; MemoryManager* memoryManager; - std::shared_ptr walFileHandle; + std::shared_ptr walFileHandle; std::unique_ptr pageBuffer; std::shared_ptr logger; WAL* wal; diff --git a/src/main/database.cpp b/src/main/database.cpp index ec042a15a1..2a9836ce59 100644 --- a/src/main/database.cpp +++ b/src/main/database.cpp @@ -19,17 +19,14 @@ namespace main { SystemConfig::SystemConfig() : SystemConfig(-1u) {} -SystemConfig::SystemConfig(uint64_t bufferPoolSize) { - if (bufferPoolSize == -1u) { +SystemConfig::SystemConfig(uint64_t bufferPoolSize_) { + if (bufferPoolSize_ == -1u) { auto systemMemSize = (std::uint64_t)sysconf(_SC_PHYS_PAGES) * (std::uint64_t)sysconf(_SC_PAGESIZE); - bufferPoolSize = (uint64_t)(StorageConstants::DEFAULT_BUFFER_POOL_RATIO * - (double_t)std::min(systemMemSize, (std::uint64_t)UINTPTR_MAX)); + bufferPoolSize_ = (uint64_t)(BufferPoolConstants::DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM * + (double_t)std::min(systemMemSize, (std::uint64_t)UINTPTR_MAX)); } - defaultPageBufferPoolSize = - (uint64_t)((double_t)bufferPoolSize * StorageConstants::DEFAULT_PAGES_BUFFER_RATIO); - largePageBufferPoolSize = - (uint64_t)((double_t)bufferPoolSize * StorageConstants::LARGE_PAGES_BUFFER_RATIO); + bufferPoolSize = bufferPoolSize_; maxNumThreads = std::thread::hardware_concurrency(); } @@ -40,8 +37,7 @@ Database::Database(std::string databasePath, SystemConfig systemConfig) initLoggers(); initDBDirAndCoreFilesIfNecessary(); logger = LoggerUtils::getLogger(LoggerConstants::LoggerEnum::DATABASE); - bufferManager = std::make_unique( - this->systemConfig.defaultPageBufferPoolSize, this->systemConfig.largePageBufferPoolSize); + bufferManager = std::make_unique(this->systemConfig.bufferPoolSize); memoryManager = std::make_unique(bufferManager.get()); wal = std::make_unique(this->databasePath, *bufferManager); recoverIfNecessary(); diff --git a/src/processor/operator/aggregate/aggregate_hash_table.cpp b/src/processor/operator/aggregate/aggregate_hash_table.cpp index b9beafb282..dfe85929ad 100644 --- a/src/processor/operator/aggregate/aggregate_hash_table.cpp +++ b/src/processor/operator/aggregate/aggregate_hash_table.cpp @@ -178,9 +178,9 @@ void AggregateHashTable::initializeFT( void AggregateHashTable::initializeHashTable(uint64_t numEntriesToAllocate) { maxNumHashSlots = nextPowerOfTwo( - std::max(BufferPoolConstants::LARGE_PAGE_SIZE / sizeof(HashSlot), numEntriesToAllocate)); + std::max(BufferPoolConstants::PAGE_256KB_SIZE / sizeof(HashSlot), numEntriesToAllocate)); bitmask = maxNumHashSlots - 1; - auto numHashSlotsPerBlock = BufferPoolConstants::LARGE_PAGE_SIZE / sizeof(HashSlot); + auto numHashSlotsPerBlock = BufferPoolConstants::PAGE_256KB_SIZE / sizeof(HashSlot); assert(numHashSlotsPerBlock == nextPowerOfTwo(numHashSlotsPerBlock)); numSlotsPerBlockLog2 = log2(numHashSlotsPerBlock); slotIdxInBlockMask = BitmaskUtils::all1sMaskForLeastSignificantBits(numSlotsPerBlockLog2); diff --git a/src/processor/operator/hash_join/join_hash_table.cpp b/src/processor/operator/hash_join/join_hash_table.cpp index 635e131d44..93944c6f8a 100644 --- a/src/processor/operator/hash_join/join_hash_table.cpp +++ b/src/processor/operator/hash_join/join_hash_table.cpp @@ -12,7 +12,7 @@ namespace processor { JoinHashTable::JoinHashTable(MemoryManager& memoryManager, uint64_t numKeyColumns, std::unique_ptr tableSchema) : BaseHashTable{memoryManager}, numKeyColumns{numKeyColumns} { - auto numSlotsPerBlock = BufferPoolConstants::LARGE_PAGE_SIZE / sizeof(uint8_t*); + auto numSlotsPerBlock = BufferPoolConstants::PAGE_256KB_SIZE / sizeof(uint8_t*); assert(numSlotsPerBlock == nextPowerOfTwo(numSlotsPerBlock)); numSlotsPerBlockLog2 = std::log2(numSlotsPerBlock); slotIdxInBlockMask = BitmaskUtils::all1sMaskForLeastSignificantBits(numSlotsPerBlockLog2); diff --git a/src/processor/operator/order_by/key_block_merger.cpp b/src/processor/operator/order_by/key_block_merger.cpp index 51ff0e3e81..0079bd7436 100644 --- a/src/processor/operator/order_by/key_block_merger.cpp +++ b/src/processor/operator/order_by/key_block_merger.cpp @@ -13,7 +13,7 @@ namespace processor { MergedKeyBlocks::MergedKeyBlocks( uint32_t numBytesPerTuple, uint64_t numTuples, MemoryManager* memoryManager) : numBytesPerTuple{numBytesPerTuple}, - numTuplesPerBlock{(uint32_t)(BufferPoolConstants::LARGE_PAGE_SIZE / numBytesPerTuple)}, + numTuplesPerBlock{(uint32_t)(BufferPoolConstants::PAGE_256KB_SIZE / numBytesPerTuple)}, numTuples{numTuples}, endTupleOffset{numTuplesPerBlock * numBytesPerTuple} { auto numKeyBlocks = numTuples / numTuplesPerBlock + (numTuples % numTuplesPerBlock ? 1 : 0); for (auto i = 0u; i < numKeyBlocks; i++) { @@ -24,7 +24,7 @@ MergedKeyBlocks::MergedKeyBlocks( // This constructor is used to convert a keyBlock to a MergedKeyBlocks. MergedKeyBlocks::MergedKeyBlocks(uint32_t numBytesPerTuple, std::shared_ptr keyBlock) : numBytesPerTuple{numBytesPerTuple}, - numTuplesPerBlock{(uint32_t)(BufferPoolConstants::LARGE_PAGE_SIZE / numBytesPerTuple)}, + numTuplesPerBlock{(uint32_t)(BufferPoolConstants::PAGE_256KB_SIZE / numBytesPerTuple)}, numTuples{keyBlock->numTuples}, endTupleOffset{numTuplesPerBlock * numBytesPerTuple} { keyBlocks.emplace_back(keyBlock); } diff --git a/src/processor/operator/order_by/order_by_key_encoder.cpp b/src/processor/operator/order_by/order_by_key_encoder.cpp index a725fd5078..e9f3ed2750 100644 --- a/src/processor/operator/order_by/order_by_key_encoder.cpp +++ b/src/processor/operator/order_by/order_by_key_encoder.cpp @@ -22,11 +22,11 @@ OrderByKeyEncoder::OrderByKeyEncoder(std::vector& orderByVectors, } keyBlocks.emplace_back(std::make_unique(memoryManager)); assert(this->numBytesPerTuple == getNumBytesPerTuple()); - maxNumTuplesPerBlock = BufferPoolConstants::LARGE_PAGE_SIZE / numBytesPerTuple; + maxNumTuplesPerBlock = BufferPoolConstants::PAGE_256KB_SIZE / numBytesPerTuple; if (maxNumTuplesPerBlock <= 0) { throw RuntimeException(StringUtils::string_format( "TupleSize({} bytes) is larger than the LARGE_PAGE_SIZE({} bytes)", numBytesPerTuple, - BufferPoolConstants::LARGE_PAGE_SIZE)); + BufferPoolConstants::PAGE_256KB_SIZE)); } encodeFunctions.resize(orderByVectors.size()); for (auto i = 0u; i < orderByVectors.size(); i++) { diff --git a/src/processor/result/factorized_table.cpp b/src/processor/result/factorized_table.cpp index fbf87dcea3..83aa56eca3 100644 --- a/src/processor/result/factorized_table.cpp +++ b/src/processor/result/factorized_table.cpp @@ -87,11 +87,11 @@ void DataBlockCollection::merge(DataBlockCollection& other) { FactorizedTable::FactorizedTable( MemoryManager* memoryManager, std::unique_ptr tableSchema) : memoryManager{memoryManager}, tableSchema{std::move(tableSchema)}, numTuples{0} { - assert(this->tableSchema->getNumBytesPerTuple() <= BufferPoolConstants::LARGE_PAGE_SIZE); + assert(this->tableSchema->getNumBytesPerTuple() <= BufferPoolConstants::PAGE_256KB_SIZE); if (!this->tableSchema->isEmpty()) { inMemOverflowBuffer = std::make_unique(memoryManager); numTuplesPerBlock = - BufferPoolConstants::LARGE_PAGE_SIZE / this->tableSchema->getNumBytesPerTuple(); + BufferPoolConstants::PAGE_256KB_SIZE / this->tableSchema->getNumBytesPerTuple(); flatTupleBlockCollection = std::make_unique( this->tableSchema->getNumBytesPerTuple(), numTuplesPerBlock); unflatTupleBlockCollection = std::make_unique(); @@ -119,7 +119,7 @@ uint8_t* FactorizedTable::appendEmptyTuple() { flatTupleBlockCollection->append(std::make_unique(memoryManager)); } auto& block = flatTupleBlockCollection->getBlocks().back(); - uint8_t* tuplePtr = block->getData() + BufferPoolConstants::LARGE_PAGE_SIZE - block->freeSize; + uint8_t* tuplePtr = block->getData() + BufferPoolConstants::PAGE_256KB_SIZE - block->freeSize; block->freeSize -= tableSchema->getNumBytesPerTuple(); block->numTuples++; numTuples++; @@ -380,7 +380,7 @@ uint64_t FactorizedTable::computeNumTuplesToAppend( std::vector FactorizedTable::allocateFlatTupleBlocks( uint64_t numTuplesToAppend) { auto numBytesPerTuple = tableSchema->getNumBytesPerTuple(); - assert(numBytesPerTuple < BufferPoolConstants::LARGE_PAGE_SIZE); + assert(numBytesPerTuple < BufferPoolConstants::PAGE_256KB_SIZE); std::vector appendingInfos; while (numTuplesToAppend > 0) { if (flatTupleBlockCollection->isEmpty() || @@ -391,7 +391,7 @@ std::vector FactorizedTable::allocateFlatTupleBlocks( auto numTuplesToAppendInCurBlock = std::min(numTuplesToAppend, block->freeSize / numBytesPerTuple); appendingInfos.emplace_back( - block->getData() + BufferPoolConstants::LARGE_PAGE_SIZE - block->freeSize, + block->getData() + BufferPoolConstants::PAGE_256KB_SIZE - block->freeSize, numTuplesToAppendInCurBlock); block->freeSize -= numTuplesToAppendInCurBlock * numBytesPerTuple; block->numTuples += numTuplesToAppendInCurBlock; @@ -401,14 +401,14 @@ std::vector FactorizedTable::allocateFlatTupleBlocks( } uint8_t* FactorizedTable::allocateUnflatTupleBlock(uint32_t numBytes) { - assert(numBytes < BufferPoolConstants::LARGE_PAGE_SIZE); + assert(numBytes < BufferPoolConstants::PAGE_256KB_SIZE); if (unflatTupleBlockCollection->isEmpty()) { unflatTupleBlockCollection->append(std::make_unique(memoryManager)); } auto lastBlock = unflatTupleBlockCollection->getBlocks().back().get(); if (lastBlock->freeSize > numBytes) { lastBlock->freeSize -= numBytes; - return lastBlock->getData() + BufferPoolConstants::LARGE_PAGE_SIZE - lastBlock->freeSize - + return lastBlock->getData() + BufferPoolConstants::PAGE_256KB_SIZE - lastBlock->freeSize - numBytes; } unflatTupleBlockCollection->append(std::make_unique(memoryManager)); diff --git a/src/storage/buffer_manager/CMakeLists.txt b/src/storage/buffer_manager/CMakeLists.txt index b3cbc4e4ea..4c52cda0a5 100644 --- a/src/storage/buffer_manager/CMakeLists.txt +++ b/src/storage/buffer_manager/CMakeLists.txt @@ -1,8 +1,8 @@ add_library(kuzu_storage_buffer_manager OBJECT - buffer_managed_file_handle.cpp + vm_region.cpp + bm_file_handle.cpp buffer_manager.cpp - buffer_pool.cpp memory_manager.cpp) set(ALL_OBJECT_FILES diff --git a/src/storage/buffer_manager/buffer_managed_file_handle.cpp b/src/storage/buffer_manager/bm_file_handle.cpp similarity index 60% rename from src/storage/buffer_manager/buffer_managed_file_handle.cpp rename to src/storage/buffer_manager/bm_file_handle.cpp index 216c7a2647..b63272ae81 100644 --- a/src/storage/buffer_manager/buffer_managed_file_handle.cpp +++ b/src/storage/buffer_manager/bm_file_handle.cpp @@ -1,61 +1,74 @@ -#include "storage/buffer_manager/buffer_managed_file_handle.h" +#include "storage/buffer_manager/bm_file_handle.h" + +#include "storage/buffer_manager/buffer_manager.h" using namespace kuzu::common; namespace kuzu { namespace storage { -BufferManagedFileHandle::BufferManagedFileHandle( - const std::string& path, uint8_t flags, FileVersionedType fileVersionedType) - : FileHandle{path, flags}, fileVersionedType{fileVersionedType} { - initPageIdxToFrameMapAndLocks(); - if (fileVersionedType == FileVersionedType::VERSIONED_FILE) { - resizePageGroupLocksAndPageVersionsWithoutLock(); - } +void PageState::setInFrame(common::page_idx_t pageIdx_) { + pageIdx = 0; + pageIdx = pageIdx_; + pageIdx |= IS_IN_FRAME_MASK; } -void BufferManagedFileHandle::initPageIdxToFrameMapAndLocks() { - pageIdxToFrameMap.resize(pageCapacity); - pageLocks.resize(pageCapacity); - for (auto i = 0ull; i < numPages; i++) { - pageIdxToFrameMap[i] = std::make_unique>(UINT32_MAX); - pageLocks[i] = std::make_unique(); +bool PageState::acquireLock(LockMode lockMode) { + if (lockMode == LockMode::SPIN) { + while (lock.test_and_set()) // spinning + ; + return true; } + return !lock.test_and_set(); } -common::page_idx_t BufferManagedFileHandle::addNewPageWithoutLock() { - if (numPages == pageCapacity) { - pageCapacity += StorageConstants::PAGE_GROUP_SIZE; - pageIdxToFrameMap.resize(pageCapacity); - pageLocks.resize(pageCapacity); +void PageState::resetState() { + pageIdx = 0; + pinCount = 0; + evictionTimestamp = 0; +} + +BMFileHandle::BMFileHandle(const std::string& path, uint8_t flags, BufferManager* bm, + common::PageSizeClass pageSizeClass, FileVersionedType fileVersionedType) + : FileHandle{path, flags}, bm{bm}, pageSizeClass{pageSizeClass}, fileVersionedType{ + fileVersionedType} { + initPageStatesAndGroups(); +} + +void BMFileHandle::initPageStatesAndGroups() { + pageStates.resize(pageCapacity); + for (auto i = 0ull; i < numPages; i++) { + pageStates[i] = std::make_unique(); } - pageLocks[numPages] = std::make_unique(); - pageIdxToFrameMap[numPages] = std::make_unique>(UINT32_MAX); - auto newPageIdx = numPages++; - if (fileVersionedType == FileVersionedType::VERSIONED_FILE) { - resizePageGroupLocksAndPageVersionsWithoutLock(); + auto numPageGroups = getNumPageGroups(); + frameGroupIdxes.resize(numPageGroups); + pageGroupLocks.resize(numPageGroups); + pageVersions.resize(numPageGroups); + for (auto i = 0u; i < numPageGroups; i++) { + frameGroupIdxes[i] = bm->addNewFrameGroup(pageSizeClass); + pageGroupLocks[i] = std::make_unique(); } - return newPageIdx; } -bool BufferManagedFileHandle::acquirePageLock(page_idx_t pageIdx, bool block) { - if (block) { - while (!acquire(pageIdx)) {} // spinning - return true; +common::page_idx_t BMFileHandle::addNewPageWithoutLock() { + if (numPages == pageCapacity) { + addNewPageGroupWithoutLock(); } - return acquire(pageIdx); + pageStates[numPages] = std::make_unique(); + return numPages++; } -bool BufferManagedFileHandle::acquire(common::page_idx_t pageIdx) { - if (pageIdx >= pageLocks.size()) { - throw RuntimeException( - StringUtils::string_format("pageIdx {} is >= pageLocks.size()", pageIdx)); +void BMFileHandle::addNewPageGroupWithoutLock() { + pageCapacity += StorageConstants::PAGE_GROUP_SIZE; + pageStates.resize(pageCapacity); + frameGroupIdxes.push_back(bm->addNewFrameGroup(pageSizeClass)); + if (fileVersionedType == FileVersionedType::VERSIONED_FILE) { + pageGroupLocks.push_back(std::make_unique()); + pageVersions.emplace_back(); } - auto retVal = !pageLocks[pageIdx]->test_and_set(std::memory_order_acquire); - return retVal; } -void BufferManagedFileHandle::createPageVersionGroupIfNecessary(page_idx_t pageIdx) { +void BMFileHandle::createPageVersionGroupIfNecessary(page_idx_t pageIdx) { assert(fileVersionedType == FileVersionedType::VERSIONED_FILE); // Note that we do not have to acquire an xlock here because this function assumes that prior to // calling this function, pageVersion and pageGroupLocks have been resized correctly. @@ -79,50 +92,40 @@ void BufferManagedFileHandle::createPageVersionGroupIfNecessary(page_idx_t pageI pageGroupLocks[pageGroupIdxAndPosInGroup.pageIdx]->clear(); } -void BufferManagedFileHandle::resetToZeroPagesAndPageCapacity() { +void BMFileHandle::resetToZeroPagesAndPageCapacity() { std::unique_lock xlock(fhSharedMutex); numPages = 0; pageCapacity = 0; FileUtils::truncateFileToEmpty(fileInfo.get()); - initPageIdxToFrameMapAndLocks(); + initPageStatesAndGroups(); } -void BufferManagedFileHandle::removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx) { +void BMFileHandle::removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx) { std::unique_lock xLck{fhSharedMutex}; removePageIdxAndTruncateIfNecessaryWithoutLock(pageIdx); - if (fileVersionedType == FileVersionedType::VERSIONED_FILE) { - resizePageGroupLocksAndPageVersionsWithoutLock(); - } } -void BufferManagedFileHandle::removePageIdxAndTruncateIfNecessaryWithoutLock( +void BMFileHandle::removePageIdxAndTruncateIfNecessaryWithoutLock( common::page_idx_t pageIdxToRemove) { if (numPages <= pageIdxToRemove) { return; } for (auto pageIdx = pageIdxToRemove; pageIdx < numPages; ++pageIdx) { - pageIdxToFrameMap[pageIdx].reset(); - pageLocks[pageIdx].reset(); + pageStates[pageIdx].reset(); } numPages = pageIdxToRemove; -} - -void BufferManagedFileHandle::resizePageGroupLocksAndPageVersionsWithoutLock() { auto numPageGroups = getNumPageGroups(); - if (pageGroupLocks.size() == numPageGroups) { + if (numPageGroups == frameGroupIdxes.size()) { return; - } else if (pageGroupLocks.size() < numPageGroups) { - for (auto i = pageGroupLocks.size(); i < numPageGroups; ++i) { - pageGroupLocks.push_back(std::make_unique()); - } - } else { - pageGroupLocks.resize(numPageGroups); } + assert(numPageGroups < frameGroupIdxes.size()); + frameGroupIdxes.resize(numPageGroups); + pageGroupLocks.resize(numPageGroups); pageVersions.resize(numPageGroups); } // This function assumes that the caller has already acquired the lock for originalPageIdx. -bool BufferManagedFileHandle::hasWALPageVersionNoPageLock(common::page_idx_t originalPageIdx) { +bool BMFileHandle::hasWALPageVersionNoPageLock(common::page_idx_t originalPageIdx) { assert(fileVersionedType == FileVersionedType::VERSIONED_FILE); auto pageGroupIdxAndPosInGroup = PageUtils::getPageElementCursorForPos(originalPageIdx, StorageConstants::PAGE_GROUP_SIZE); @@ -141,7 +144,7 @@ bool BufferManagedFileHandle::hasWALPageVersionNoPageLock(common::page_idx_t ori return retVal; } -void BufferManagedFileHandle::clearWALPageVersionIfNecessary(common::page_idx_t pageIdx) { +void BMFileHandle::clearWALPageVersionIfNecessary(common::page_idx_t pageIdx) { { std::shared_lock sLck{fhSharedMutex}; if (numPages <= pageIdx) { @@ -150,12 +153,12 @@ void BufferManagedFileHandle::clearWALPageVersionIfNecessary(common::page_idx_t } createPageVersionGroupIfNecessary(pageIdx); setWALPageVersionNoLock(pageIdx, UINT32_MAX); + // TODO(Guodong): Why do we release lock here? Need to understand how the lock was acquired. releasePageLock(pageIdx); } // This function assumes that the caller has already acquired the lock for originalPageIdx. -common::page_idx_t BufferManagedFileHandle::getWALPageVersionNoPageLock( - common::page_idx_t originalPageIdx) { +common::page_idx_t BMFileHandle::getWALPageVersionNoPageLock(common::page_idx_t originalPageIdx) { assert(fileVersionedType == FileVersionedType::VERSIONED_FILE); // See the comment about a shared lock in hasWALPageVersionNoPageLock std::shared_lock sLck{fhSharedMutex}; @@ -164,14 +167,14 @@ common::page_idx_t BufferManagedFileHandle::getWALPageVersionNoPageLock( return pageVersions[pageGroupIdxAndPosInGroup.pageIdx][pageGroupIdxAndPosInGroup.elemPosInPage]; } -void BufferManagedFileHandle::setWALPageVersion( +void BMFileHandle::setWALPageVersion( common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL) { assert(fileVersionedType == FileVersionedType::VERSIONED_FILE); std::shared_lock sLck{fhSharedMutex}; setWALPageVersionNoLock(originalPageIdx, pageIdxInWAL); } -void BufferManagedFileHandle::setWALPageVersionNoLock( +void BMFileHandle::setWALPageVersionNoLock( common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL) { auto pageGroupIdxAndPosInGroup = PageUtils::getPageElementCursorForPos(originalPageIdx, StorageConstants::PAGE_GROUP_SIZE); diff --git a/src/storage/buffer_manager/buffer_manager.cpp b/src/storage/buffer_manager/buffer_manager.cpp index 7d202f28a9..eca67841c1 100644 --- a/src/storage/buffer_manager/buffer_manager.cpp +++ b/src/storage/buffer_manager/buffer_manager.cpp @@ -1,5 +1,8 @@ #include "storage/buffer_manager/buffer_manager.h" +#include + +#include "common/constants.h" #include "common/exception.h" #include "spdlog/spdlog.h" @@ -8,13 +11,50 @@ using namespace kuzu::common; namespace kuzu { namespace storage { -BufferManager::BufferManager(uint64_t maxSizeForDefaultPagePool, uint64_t maxSizeForLargePagePool) +// In this function, we try to remove as more as possible candidates that are not evictable from the +// eviction queue until we hit a candidate that is evictable. +// Two kinds of candidates are not evictable: 1) it is currently pinned; 2) it has been recently +// visited. +// To identify those recently accessed candidates, we use the eviction timestamp. If the +// eviction timestamp of a candidate is different from the timestamp in its corresponding pageState, +// it means that the candidate has been recently visited and we should not evict it. The idea is +// that eviction timestamp is a logical per-page timestamp starting from 0, and is incremented each +// time the page is pushed into the eviction queue. For example, the first time p5 is pushed into +// the eviction queue, it will end up with a timestamp 1. When we push a page into the queue, we +// create an EvictionCandidate object for the page. Let's call this object c0 when p5 is first +// pushed. c0 will consist of (ptr to p5, 1), where the latter is the eviction timestamp at the time +// c0 is put into the queue. Suppose p5 is put into the eviction queue again (e.g., because it was +// pinned and unpinned). At this point we create another EvictionCandidate object c1 (ptr to p5, 2) +// where the latter eviction timestamp is now incremented by 1, which makes c1 now not evictable. +// This idea is inspired by DuckDB's queue-based eviction implementation. +void EvictionQueue::removeNonEvictableCandidates() { + EvictionCandidate evictionCandidate; + while (true) { + if (!queue->try_dequeue(evictionCandidate)) { + break; + } + if (evictionCandidate.pageState->getPinCount() != 0 || + evictionCandidate.pageState->getEvictionTimestamp() != + evictionCandidate.evictionTimestamp) { + // Remove the candidate from the eviction queue if it is still pinned or if it has + // been recently visited. + continue; + } else { + queue->enqueue(evictionCandidate); + break; + } + } +} + +BufferManager::BufferManager(uint64_t bufferPoolSize) : logger{LoggerUtils::getLogger(common::LoggerConstants::LoggerEnum::BUFFER_MANAGER)}, - bufferPoolDefaultPages(std::make_unique( - BufferPoolConstants::DEFAULT_PAGE_SIZE, maxSizeForDefaultPagePool)), - bufferPoolLargePages(std::make_unique( - BufferPoolConstants::LARGE_PAGE_SIZE, maxSizeForLargePagePool)) { - logger->info("Done Initializing Buffer Manager."); + usedMemory{0}, bufferPoolSize{bufferPoolSize}, numEvictionQueueInsertions{0} { + logger->info("Done initializing buffer manager."); + vmRegions.resize(2); + vmRegions[0] = std::make_unique( + PageSizeClass::PAGE_4KB, BufferPoolConstants::DEFAULT_VM_REGION_MAX_SIZE); + vmRegions[1] = std::make_unique(PageSizeClass::PAGE_256KB, bufferPoolSize); + evictionQueue = std::make_unique(); } BufferManager::~BufferManager() = default; @@ -28,61 +68,196 @@ BufferManager::~BufferManager() = default; // should be flushed to disk if it is evicted. // (3) If multiple threads are writing to the page, they should coordinate separately because they // both get access to the same piece of memory. -uint8_t* BufferManager::pin(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - return fileHandle.isLargePaged() ? bufferPoolLargePages->pin(fileHandle, pageIdx) : - bufferPoolDefaultPages->pin(fileHandle, pageIdx); -} - -// Important Note: This function will pin a page but if the page was not yet in a frame, it will -// not read it from the file. So this can be used if the page is a new page of a file, or a page -// of a temporary file that is being re-used and its contents is not important. -// -// If this is the new page of a file: the caller should call this function immediately after a new -// page is added FileHandle, ensuring that no other thread can try to pin the newly created page -// (with serious side effects). See the detailed explanation in FileHandle::addNewPage() for -// details. -uint8_t* BufferManager::pinWithoutReadingFromFile( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - return fileHandle.isLargePaged() ? - bufferPoolLargePages->pinWithoutReadingFromFile(fileHandle, pageIdx) : - bufferPoolDefaultPages->pinWithoutReadingFromFile(fileHandle, pageIdx); +uint8_t* BufferManager::pin( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy) { + fileHandle.acquirePageLock(pageIdx, LockMode::SPIN); + auto retVal = pinWithoutAcquiringPageLock(fileHandle, pageIdx, pageReadPolicy); + fileHandle.releasePageLock(pageIdx); + return retVal; +} + +uint8_t* BufferManager::pinWithoutAcquiringPageLock( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy) { + auto pageState = fileHandle.getPageState(pageIdx); + if (pageState->isInFrame()) { + pageState->incrementPinCount(); + } else { + if (!claimAFrame(fileHandle, pageIdx, pageReadPolicy)) { + pageState->releaseLock(); + throw BufferManagerException("Failed to claim a frame."); + } + } + return getFrame(fileHandle, pageIdx); } // Important Note: The caller should make sure that they have pinned the page before calling this. -void BufferManager::setPinnedPageDirty(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - fileHandle.isLargePaged() ? bufferPoolLargePages->setPinnedPageDirty(fileHandle, pageIdx) : - bufferPoolDefaultPages->setPinnedPageDirty(fileHandle, pageIdx); +void BufferManager::setPinnedPageDirty(BMFileHandle& fileHandle, page_idx_t pageIdx) { + fileHandle.acquirePageLock(pageIdx, LockMode::SPIN); + auto pageState = fileHandle.getPageState(pageIdx); + if (pageState && pageState->getPinCount() >= 1) { + pageState->setDirty(); + fileHandle.releasePageLock(pageIdx); + } else { + fileHandle.releasePageLock(pageIdx); + throw BufferManagerException("If a page is not in memory or is not pinned, cannot set " + "it to isDirty = true. filePath: " + + fileHandle.getFileInfo()->path + + " pageIdx: " + std::to_string(pageIdx) + "."); + } +} + +void BufferManager::unpin(BMFileHandle& fileHandle, page_idx_t pageIdx) { + fileHandle.acquirePageLock(pageIdx, LockMode::SPIN); + unpinWithoutAcquiringPageLock(fileHandle, pageIdx); + fileHandle.releasePageLock(pageIdx); +} + +void BufferManager::unpinWithoutAcquiringPageLock( + BMFileHandle& fileHandle, common::page_idx_t pageIdx) { + auto pageState = fileHandle.getPageState(pageIdx); + // `count` is the value of `pinCount` before sub. + auto count = pageState->decrementPinCount(); + assert(count >= 1); + if (count == 1) { + addToEvictionQueue(&fileHandle, pageState); + } +} + +// This function tries to load the given page into a frame. Due to our design of mmap, each page is +// uniquely mapped to a frame. Thus, claiming a frame is equivalent to ensuring enough physical +// memory is available. +// First, we reserve the memory for the page, which increments the atomic counter `usedMemory`. +// Then, we check if there is enough memory available. If not, we evict pages until we have enough +// or we can find no more pages to be evicted. +// Lastly, we double check if the needed memory is available. If not, we free the memory we reserved +// and return false, otherwise, we load the page to its corresponding frame and return true. +bool BufferManager::claimAFrame( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy) { + page_offset_t pageSizeToClaim = fileHandle.getPageSize(); + // Reserve the memory for the page. + auto currentUsedMem = reserveUsedMemory(pageSizeToClaim); + uint64_t claimedMemory = 0; + // Evict pages if necessary until we have enough memory. + while ((currentUsedMem + pageSizeToClaim - claimedMemory) > bufferPoolSize.load()) { + EvictionCandidate evictionCandidate; + if (!evictionQueue->dequeue(evictionCandidate)) { + // Cannot find more pages to be evicted. Free the memory we reserved and return false. + freeUsedMemory(pageSizeToClaim); + return false; + } + if (!evictionCandidate.isEvictable()) { + continue; + } + // We found a page whose pin count can be 0, and potentially haven't been accessed since + // enqueued. We try to evict the page from its frame by calling `tryEvictPage`, which will + // check if the page's pin count is actually 0 and the page has not been accessed recently, + // if so, we evict the page from its frame. + claimedMemory += tryEvictPage(evictionCandidate); + currentUsedMem = usedMemory.load(); + } + if ((currentUsedMem + pageSizeToClaim - claimedMemory) > bufferPoolSize.load()) { + // Cannot claim the memory needed. Free the memory we reserved and return false. + freeUsedMemory(pageSizeToClaim); + return false; + } + // Have enough memory available now, load the page into its corresponding frame. + cachePageIntoFrame(fileHandle, pageIdx, pageReadPolicy); + freeUsedMemory(claimedMemory); + return true; +} + +void BufferManager::addToEvictionQueue(BMFileHandle* fileHandle, PageState* pageState) { + auto timestampBeforeEvict = pageState->incrementEvictionTimestamp(); + if (++numEvictionQueueInsertions == BufferPoolConstants::EVICTION_QUEUE_PURGING_INTERVAL) { + evictionQueue->removeNonEvictableCandidates(); + numEvictionQueueInsertions = 0; + } + evictionQueue->enqueue(fileHandle, pageState, timestampBeforeEvict + 1); +} + +uint64_t BufferManager::tryEvictPage(EvictionCandidate& candidate) { + auto& pageState = *candidate.pageState; + if (!pageState.acquireLock(LockMode::NON_BLOCKING)) { + return 0; + } + // We check pinCount and evictionTimestamp again after acquiring the lock on page currently + // residing in the frame. At this point in time, no other thread can change the pinCount and the + // evictionTimestamp. + if (!candidate.isEvictable()) { + pageState.releaseLock(); + return 0; + } + // Else, flush out the frame into the file page if the frame is dirty. Then remove the page + // from the frame and release the lock on it. + flushIfDirtyWithoutLock(*candidate.fileHandle, pageState.getPageIdx()); + auto numBytesFreed = candidate.fileHandle->getPageSize(); + releaseFrameForPage(*candidate.fileHandle, pageState.getPageIdx()); + pageState.resetState(); + pageState.releaseLock(); + return numBytesFreed; +} + +void BufferManager::cachePageIntoFrame( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy) { + auto pageState = fileHandle.getPageState(pageIdx); + pageState->setPinCount(1); + pageState->clearDirty(); + if (pageReadPolicy == PageReadPolicy::READ_PAGE) { + FileUtils::readFromFile(fileHandle.getFileInfo(), (void*)getFrame(fileHandle, pageIdx), + fileHandle.getPageSize(), pageIdx * fileHandle.getPageSize()); + } + pageState->setInFrame(pageIdx); +} + +void BufferManager::flushIfDirtyWithoutLock(BMFileHandle& fileHandle, common::page_idx_t pageIdx) { + auto pageState = fileHandle.getPageState(pageIdx); + if (pageState->isDirty()) { + FileUtils::writeToFile(fileHandle.getFileInfo(), getFrame(fileHandle, pageIdx), + fileHandle.getPageSize(), pageIdx * fileHandle.getPageSize()); + } } -void BufferManager::unpin(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - return fileHandle.isLargePaged() ? bufferPoolLargePages->unpin(fileHandle, pageIdx) : - bufferPoolDefaultPages->unpin(fileHandle, pageIdx); +void BufferManager::removeFilePagesFromFrames(BMFileHandle& fileHandle) { + for (auto pageIdx = 0u; pageIdx < fileHandle.getNumPages(); ++pageIdx) { + removePageFromFrame(fileHandle, pageIdx, false /* do not flush */); + } } -void BufferManager::removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle) { - fileHandle.isLargePaged() ? bufferPoolLargePages->removeFilePagesFromFrames(fileHandle) : - bufferPoolDefaultPages->removeFilePagesFromFrames(fileHandle); +void BufferManager::flushAllDirtyPagesInFrames(BMFileHandle& fileHandle) { + for (auto pageIdx = 0u; pageIdx < fileHandle.getNumPages(); ++pageIdx) { + removePageFromFrame(fileHandle, pageIdx, true /* flush */); + } } -void BufferManager::flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle) { - fileHandle.isLargePaged() ? bufferPoolLargePages->flushAllDirtyPagesInFrames(fileHandle) : - bufferPoolDefaultPages->flushAllDirtyPagesInFrames(fileHandle); +void BufferManager::updateFrameIfPageIsInFrameWithoutLock( + BMFileHandle& fileHandle, uint8_t* newPage, page_idx_t pageIdx) { + auto pageState = fileHandle.getPageState(pageIdx); + if (pageState) { + memcpy(getFrame(fileHandle, pageIdx), newPage, BufferPoolConstants::PAGE_4KB_SIZE); + } } -void BufferManager::updateFrameIfPageIsInFrameWithoutPageOrFrameLock( - BufferManagedFileHandle& fileHandle, uint8_t* newPage, page_idx_t pageIdx) { - fileHandle.isLargePaged() ? - bufferPoolLargePages->updateFrameIfPageIsInFrameWithoutPageOrFrameLock( - fileHandle, newPage, pageIdx) : - bufferPoolDefaultPages->updateFrameIfPageIsInFrameWithoutPageOrFrameLock( - fileHandle, newPage, pageIdx); +void BufferManager::removePageFromFrameIfNecessary(BMFileHandle& fileHandle, page_idx_t pageIdx) { + if (pageIdx >= fileHandle.getNumPages()) { + return; + } + removePageFromFrame(fileHandle, pageIdx, false /* do not flush */); } -void BufferManager::removePageFromFrameIfNecessary( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - fileHandle.isLargePaged() ? - bufferPoolLargePages->removePageFromFrameWithoutFlushingIfNecessary(fileHandle, pageIdx) : - bufferPoolDefaultPages->removePageFromFrameWithoutFlushingIfNecessary(fileHandle, pageIdx); +// NOTE: We assume the page is not pinned here. +void BufferManager::removePageFromFrame( + BMFileHandle& fileHandle, common::page_idx_t pageIdx, bool shouldFlush) { + fileHandle.acquirePageLock(pageIdx, LockMode::SPIN); + auto pageState = fileHandle.getPageState(pageIdx); + if (pageState && pageState->isInFrame()) { + if (shouldFlush) { + flushIfDirtyWithoutLock(fileHandle, pageIdx); + } + fileHandle.clearPageState(pageIdx); + releaseFrameForPage(fileHandle, pageIdx); + freeUsedMemory(fileHandle.getPageSize()); + } + fileHandle.releasePageLock(pageIdx); } } // namespace storage diff --git a/src/storage/buffer_manager/buffer_pool.cpp b/src/storage/buffer_manager/buffer_pool.cpp deleted file mode 100644 index 6f8c6d23c6..0000000000 --- a/src/storage/buffer_manager/buffer_pool.cpp +++ /dev/null @@ -1,292 +0,0 @@ -#include "storage/buffer_manager/buffer_pool.h" - -#include - -#include "common/constants.h" -#include "common/exception.h" -#include "common/utils.h" -#include "spdlog/spdlog.h" - -using namespace kuzu::common; - -namespace kuzu { -namespace storage { - -Frame::Frame(page_offset_t pageSize, std::uint8_t* buffer) - : frameLock{ATOMIC_FLAG_INIT}, pageSize{pageSize}, buffer{buffer} { - resetFrameWithoutLock(); -} - -Frame::~Frame() noexcept(false) { - auto count = pinCount.load(); - if (0 != count && -1u != count) { - throw BufferManagerException( - "Deleting buffer that is still pinned. pinCount: " + std::to_string(count) + - " pageIdx: " + std::to_string(pageIdx)); - } -} - -void Frame::resetFrameWithoutLock() { - fileHandlePtr = -1u; - pageIdx = -1u; - pinCount = -1u; - recentlyAccessed = false; - isDirty = false; -} - -bool Frame::acquireFrameLock(bool block) { - if (block) { - while (frameLock.test_and_set()) // spinning - ; - return true; - } - return !frameLock.test_and_set(); -} - -void Frame::releaseBuffer() { - int error = madvise(buffer, pageSize, MADV_DONTNEED); - if (error) { - throw BufferManagerException("Releasing frame buffer failed with error code " + - std::to_string(error) + ": " + - std::string(std::strerror(errno))); - } -} - -BufferPool::BufferPool(uint64_t pageSize, uint64_t maxSize) - : logger{LoggerUtils::getLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER)}, - pageSize{pageSize}, clockHand{0}, - numFrames((page_idx_t)(ceil((double)maxSize / (double)pageSize))) { - assert(pageSize == BufferPoolConstants::DEFAULT_PAGE_SIZE || - pageSize == BufferPoolConstants::LARGE_PAGE_SIZE); - auto mmapRegion = (uint8_t*)mmap( - NULL, (numFrames * pageSize), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - for (auto i = 0u; i < numFrames; ++i) { - auto buffer = mmapRegion + (i * pageSize); - bufferCache.emplace_back(std::make_unique(pageSize, buffer)); - } - logger->info("Initialize buffer pool with the max size {}B, #{}byte-pages {}.", maxSize, - pageSize, numFrames); -} - -uint8_t* BufferPool::pin(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - return pin(fileHandle, pageIdx, false /* read page from file */); -} - -uint8_t* BufferPool::pinWithoutReadingFromFile( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - return pin(fileHandle, pageIdx, true /* do not read page from file */); -} - -void BufferPool::removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle) { - for (auto pageIdx = 0u; pageIdx < fileHandle.getNumPages(); ++pageIdx) { - removePageFromFrame(fileHandle, pageIdx, false /* do not flush */); - } -} - -void BufferPool::removePageFromFrame( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool shouldFlush) { - fileHandle.acquirePageLock(pageIdx, true /*block*/); - auto frameIdx = fileHandle.getFrameIdx(pageIdx); - if (BufferManagedFileHandle::isAFrame(frameIdx)) { - auto& frame = bufferCache[frameIdx]; - frame->acquireFrameLock(true /* block */); - if (shouldFlush) { - flushIfDirty(frame); - } - clearFrameAndUnswizzleWithoutLock(frame, fileHandle, pageIdx); - frame->releaseBuffer(); - frame->releaseFrameLock(); - } - fileHandle.releasePageLock(pageIdx); -} - -void BufferPool::removePageFromFrameWithoutFlushingIfNecessary( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - if (pageIdx >= fileHandle.getNumPages()) { - return; - } - removePageFromFrame(fileHandle, pageIdx, false /* do not flush */); -} - -void BufferPool::flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle) { - for (auto pageIdx = 0u; pageIdx < fileHandle.getNumPages(); ++pageIdx) { - removePageFromFrame(fileHandle, pageIdx, true /* flush */); - } -} - -void BufferPool::updateFrameIfPageIsInFrameWithoutPageOrFrameLock( - BufferManagedFileHandle& fileHandle, uint8_t* newPage, page_idx_t pageIdx) { - auto frameIdx = fileHandle.getFrameIdx(pageIdx); - if (BufferManagedFileHandle::isAFrame(frameIdx)) { - memcpy(bufferCache[frameIdx]->buffer, newPage, BufferPoolConstants::DEFAULT_PAGE_SIZE); - } -} - -uint8_t* BufferPool::pin( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool doNotReadFromFile) { - fileHandle.acquirePageLock(pageIdx, true /*block*/); - auto retVal = pinWithoutAcquiringPageLock(fileHandle, pageIdx, doNotReadFromFile); - fileHandle.releasePageLock(pageIdx); - return retVal; -} - -uint8_t* BufferPool::pinWithoutAcquiringPageLock( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool doNotReadFromFile) { - auto frameIdx = fileHandle.getFrameIdx(pageIdx); - if (BufferManagedFileHandle::isAFrame(frameIdx)) { - auto& frame = bufferCache[frameIdx]; - frame->pinCount.fetch_add(1); - frame->recentlyAccessed = true; - bmMetrics.numCacheHit += 1; - } else { - frameIdx = claimAFrame(fileHandle, pageIdx, doNotReadFromFile); - fileHandle.swizzle(pageIdx, frameIdx); - if (!doNotReadFromFile) { - bmMetrics.numCacheMiss += 1; - } - } - bmMetrics.numPins += 1; - return bufferCache[fileHandle.getFrameIdx(pageIdx)]->buffer; -} - -void BufferPool::setPinnedPageDirty(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - fileHandle.acquirePageLock(pageIdx, true /*block*/); - auto frameIdx = fileHandle.getFrameIdx(pageIdx); - if (!BufferManagedFileHandle::isAFrame((frameIdx)) || - (bufferCache[frameIdx]->pinCount.load() < 1)) { - fileHandle.releasePageLock(pageIdx); - throw BufferManagerException("If a page is not in memory or is not pinned, cannot set " - "it to isDirty = true.filePath: " + - fileHandle.getFileInfo()->path + - " pageIdx: " + std::to_string(pageIdx) + "."); - } - bufferCache[frameIdx]->setIsDirty(true /* isDirty */); - fileHandle.releasePageLock(pageIdx); -} - -page_idx_t BufferPool::claimAFrame( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool doNotReadFromFile) { - auto localClockHand = clockHand.load(); - auto startFrame = localClockHand % numFrames; - for (auto i = 0u; i < 2 * numFrames; ++i) { - auto frameIdx = (startFrame + i) % numFrames; - auto pinCount = bufferCache[frameIdx]->pinCount.load(); - if ((-1u == pinCount && fillEmptyFrame(frameIdx, fileHandle, pageIdx, doNotReadFromFile)) || - (0u == pinCount && tryEvict(frameIdx, fileHandle, pageIdx, doNotReadFromFile))) { - moveClockHand(localClockHand + i + 1); - return frameIdx; - } - } - throw BufferManagerException("Cannot find a frame to evict from."); -} - -bool BufferPool::fillEmptyFrame(page_idx_t frameIdx, BufferManagedFileHandle& fileHandle, - page_idx_t pageIdx, bool doNotReadFromFile) { - auto& frame = bufferCache[frameIdx]; - if (!frame->acquireFrameLock(false)) { - return false; - } - if (-1u == frame->pinCount.load()) { - readNewPageIntoFrame(*frame, fileHandle, pageIdx, doNotReadFromFile); - frame->releaseFrameLock(); - return true; - } - frame->releaseFrameLock(); - return false; -} - -bool BufferPool::tryEvict(page_idx_t frameIdx, BufferManagedFileHandle& fileHandle, - page_idx_t pageIdx, bool doNotReadFromFile) { - auto& frame = bufferCache[frameIdx]; - if (frame->recentlyAccessed) { - frame->recentlyAccessed = false; - bmMetrics.numRecentlyAccessedWalkover += 1; - return false; - } - if (!frame->acquireFrameLock(false)) { - return false; - } - auto pageIdxInFrame = frame->pageIdx.load(); - auto fileHandleInFrame = - reinterpret_cast(frame->fileHandlePtr.load()); - if (!fileHandleInFrame->acquirePageLock(pageIdxInFrame, false)) { - bmMetrics.numEvictFails += 1; - frame->releaseFrameLock(); - return false; - } - // We check pinCount again after acquiring the lock on page currently residing in the frame. At - // this point in time, no other thread can change the pinCount. - if (0u != frame->pinCount.load()) { - bmMetrics.numEvictFails += 1; - fileHandleInFrame->releasePageLock(pageIdxInFrame); - frame->releaseFrameLock(); - return false; - } - // Else, flush out the frame into the file page if the frame is dirty. Then remove the page from - // the frame and release the lock on it. - flushIfDirty(frame); - clearFrameAndUnswizzleWithoutLock(frame, *fileHandleInFrame, pageIdxInFrame); - fileHandleInFrame->releasePageLock(pageIdxInFrame); - // Update the frame information and release the lock on frame. - readNewPageIntoFrame(*frame, fileHandle, pageIdx, doNotReadFromFile); - frame->releaseFrameLock(); - bmMetrics.numEvicts += 1; - return true; -} - -void BufferPool::flushIfDirty(const std::unique_ptr& frame) { - auto fileHandleInFrame = reinterpret_cast(frame->fileHandlePtr.load()); - auto pageIdxInFrame = frame->pageIdx.load(); - if (frame->isDirty) { - bmMetrics.numDirtyPageWriteIO += 1; - fileHandleInFrame->writePage(frame->buffer, pageIdxInFrame); - } -} - -void BufferPool::clearFrameAndUnswizzleWithoutLock(const std::unique_ptr& frame, - BufferManagedFileHandle& fileHandleInFrame, page_idx_t pageIdxInFrame) { - frame->resetFrameWithoutLock(); - fileHandleInFrame.unswizzle(pageIdxInFrame); -} - -void BufferPool::readNewPageIntoFrame( - Frame& frame, BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool doNotReadFromFile) { - frame.pinCount.store(1); - frame.recentlyAccessed = true; - frame.isDirty = false; - frame.pageIdx.store(pageIdx); - frame.fileHandlePtr.store(reinterpret_cast(&fileHandle)); - if (!doNotReadFromFile) { - fileHandle.readPage(frame.buffer, pageIdx); - } -} - -void BufferPool::moveClockHand(uint64_t newClockHand) { - do { - auto currClockHand = clockHand.load(); - if (currClockHand > newClockHand) { - return; - } - if (clockHand.compare_exchange_strong( - currClockHand, newClockHand, std::memory_order_seq_cst)) { - return; - } - } while (true); -} - -void BufferPool::unpin(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - fileHandle.acquirePageLock(pageIdx, true /*block*/); - unpinWithoutAcquiringPageLock(fileHandle, pageIdx); - fileHandle.releasePageLock(pageIdx); -} - -void BufferPool::unpinWithoutAcquiringPageLock( - BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) { - auto& frame = bufferCache[fileHandle.getFrameIdx(pageIdx)]; - // `count` is the value of `pinCount` before sub. - auto count = frame->pinCount.fetch_sub(1); - assert(count >= 1); -} - -} // namespace storage -} // namespace kuzu diff --git a/src/storage/buffer_manager/memory_manager.cpp b/src/storage/buffer_manager/memory_manager.cpp index cc70629b1b..0edf34d725 100644 --- a/src/storage/buffer_manager/memory_manager.cpp +++ b/src/storage/buffer_manager/memory_manager.cpp @@ -2,33 +2,49 @@ #include +#include "common/utils.h" + using namespace kuzu::common; namespace kuzu { namespace storage { -std::unique_ptr MemoryManager::allocateBlock(bool initializeToZero) { - std::lock_guard lock(memMgrLock); +MemoryBuffer::MemoryBuffer(MemoryAllocator* allocator, page_idx_t pageIdx, uint8_t* buffer) + : buffer{buffer}, pageIdx{pageIdx}, allocator{allocator} {} + +MemoryBuffer::~MemoryBuffer() { + if (buffer != nullptr) { + allocator->freeBlock(pageIdx); + } +} + +MemoryAllocator::MemoryAllocator(BufferManager* bm) : bm{bm} { + pageSize = BufferPoolConstants::PAGE_256KB_SIZE; + fh = bm->getBMFileHandle("mm-256KB", FileHandle::O_IN_MEM_TEMP_FILE, + BMFileHandle::FileVersionedType::NON_VERSIONED_FILE, PAGE_256KB); +} + +MemoryAllocator::~MemoryAllocator() = default; + +std::unique_ptr MemoryAllocator::allocateBuffer(bool initializeToZero) { + std::unique_lock lock(allocatorLock); page_idx_t pageIdx; - uint8_t* data; if (freePages.empty()) { pageIdx = fh->addNewPage(); } else { pageIdx = freePages.top(); freePages.pop(); } - data = bm->pinWithoutReadingFromFile(*fh, pageIdx); - - auto blockHandle = std::make_unique(pageIdx, data); + auto buffer = bm->pin(*fh, pageIdx, BufferManager::PageReadPolicy::DONT_READ_PAGE); + auto memoryBuffer = std::make_unique(this, pageIdx, buffer); if (initializeToZero) { - memset(blockHandle->data, 0, BufferPoolConstants::LARGE_PAGE_SIZE); + memset(memoryBuffer->buffer, 0, pageSize); } - - return blockHandle; + return memoryBuffer; } -void MemoryManager::freeBlock(page_idx_t pageIdx) { - std::lock_guard lock(memMgrLock); +void MemoryAllocator::freeBlock(page_idx_t pageIdx) { + std::unique_lock lock(allocatorLock); bm->unpin(*fh, pageIdx); freePages.push(pageIdx); } diff --git a/src/storage/buffer_manager/vm_region.cpp b/src/storage/buffer_manager/vm_region.cpp new file mode 100644 index 0000000000..7c3fae1817 --- /dev/null +++ b/src/storage/buffer_manager/vm_region.cpp @@ -0,0 +1,52 @@ +#include "storage/buffer_manager/vm_region.h" + +#include + +#include "common/exception.h" + +using namespace kuzu::common; + +namespace kuzu { +namespace storage { + +VMRegion::VMRegion(PageSizeClass pageSizeClass, uint64_t maxRegionSize) : numFrameGroups{0} { + if (maxRegionSize > (std::size_t)-1) { + throw BufferManagerException("maxRegionSize is beyond the max available mmap region size."); + } + frameSize = pageSizeClass == PageSizeClass::PAGE_4KB ? BufferPoolConstants::PAGE_4KB_SIZE : + BufferPoolConstants::PAGE_256KB_SIZE; + auto numBytesForFrameGroup = frameSize * StorageConstants::PAGE_GROUP_SIZE; + maxNumFrameGroups = (maxRegionSize + numBytesForFrameGroup - 1) / numBytesForFrameGroup; + // Create a private anonymous mapping. The mapping is not shared with other processes and not + // backed by any file, and its content are initialized to zero. + region = (uint8_t*)mmap(NULL, getMaxRegionSize(), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1 /* fd */, 0 /* offset */); + if (region == MAP_FAILED) { + throw BufferManagerException( + "Mmap for size " + std::to_string(getMaxRegionSize()) + " failed."); + } +} + +VMRegion::~VMRegion() { + munmap(region, getMaxRegionSize()); +} + +void VMRegion::releaseFrame(common::frame_idx_t frameIdx) { + int error = madvise(getFrame(frameIdx), frameSize, MADV_DONTNEED); + if (error != 0) { + throw BufferManagerException( + "Releasing physical memory associated with a frame failed with error code " + + std::to_string(error) + ": " + std::string(std::strerror(errno))); + } +} + +frame_group_idx_t VMRegion::addNewFrameGroup() { + std::unique_lock xLck{mtx}; + if (numFrameGroups >= maxNumFrameGroups) { + throw BufferManagerException("No more frame groups can be added to the allocator."); + } + return numFrameGroups++; +} + +} // namespace storage +} // namespace kuzu diff --git a/src/storage/copy_arrow/copy_node_arrow.cpp b/src/storage/copy_arrow/copy_node_arrow.cpp index ae05aa5b2b..5103f29f82 100644 --- a/src/storage/copy_arrow/copy_node_arrow.cpp +++ b/src/storage/copy_arrow/copy_node_arrow.cpp @@ -188,7 +188,7 @@ void CopyNodeArrow::putPropsOfLineIntoColumns( column->setElement(nodeOffset, reinterpret_cast(&val)); } break; case STRING: { - stringToken = stringToken.substr(0, BufferPoolConstants::DEFAULT_PAGE_SIZE); + stringToken = stringToken.substr(0, BufferPoolConstants::PAGE_4KB_SIZE); data = stringToken.c_str(); auto val = column->getInMemOverflowFile()->copyString(data, overflowCursors[columnIdx]); diff --git a/src/storage/copy_arrow/copy_rel_arrow.cpp b/src/storage/copy_arrow/copy_rel_arrow.cpp index d9b60a62d9..5c35b0efed 100644 --- a/src/storage/copy_arrow/copy_rel_arrow.cpp +++ b/src/storage/copy_arrow/copy_rel_arrow.cpp @@ -393,7 +393,7 @@ void CopyRelArrow::putPropsOfLineIntoColumns(CopyRelArrow* copier, continue; } auto stringToken = - currentToken->get()->ToString().substr(0, BufferPoolConstants::DEFAULT_PAGE_SIZE); + currentToken->get()->ToString().substr(0, BufferPoolConstants::PAGE_4KB_SIZE); const char* data = stringToken.c_str(); switch (properties[propertyIdx].dataType.typeID) { case INT64: { @@ -489,7 +489,7 @@ void CopyRelArrow::putPropsOfLineIntoLists(CopyRelArrow* copier, continue; } auto stringToken = - currentToken->get()->ToString().substr(0, BufferPoolConstants::DEFAULT_PAGE_SIZE); + currentToken->get()->ToString().substr(0, BufferPoolConstants::PAGE_4KB_SIZE); const char* data = stringToken.c_str(); switch (properties[propertyIdx].dataType.typeID) { case INT64: { diff --git a/src/storage/copy_arrow/copy_structures_arrow.cpp b/src/storage/copy_arrow/copy_structures_arrow.cpp index 89006ea155..4276ba1bc9 100644 --- a/src/storage/copy_arrow/copy_structures_arrow.cpp +++ b/src/storage/copy_arrow/copy_structures_arrow.cpp @@ -241,10 +241,10 @@ std::unique_ptr CopyStructuresArrow::getArrowVarList(std::string& l, int6 values.push_back(std::move(value)); } auto numBytesOfOverflow = values.size() * Types::getDataTypeSize(childDataType.typeID); - if (numBytesOfOverflow >= BufferPoolConstants::DEFAULT_PAGE_SIZE) { + if (numBytesOfOverflow >= BufferPoolConstants::PAGE_4KB_SIZE) { throw ReaderException(StringUtils::string_format( "Maximum num bytes of a LIST is {}. Input list's num bytes is {}.", - BufferPoolConstants::DEFAULT_PAGE_SIZE, numBytesOfOverflow)); + BufferPoolConstants::PAGE_4KB_SIZE, numBytesOfOverflow)); } return make_unique( DataType(VAR_LIST, std::make_unique(childDataType)), std::move(values)); diff --git a/src/storage/index/hash_index.cpp b/src/storage/index/hash_index.cpp index e266586e58..c344de6279 100644 --- a/src/storage/index/hash_index.cpp +++ b/src/storage/index/hash_index.cpp @@ -125,9 +125,8 @@ HashIndex::HashIndex(const StorageStructureIDAndFName& storageStructureIDAndF const DataType& keyDataType, BufferManager& bufferManager, WAL* wal) : BaseHashIndex{keyDataType}, storageStructureIDAndFName{storageStructureIDAndFName}, bm{bufferManager}, wal{wal} { - fileHandle = bufferManager.getBufferManagedFileHandle(storageStructureIDAndFName.fName, - FileHandle::O_PERSISTENT_FILE_NO_CREATE, - BufferManagedFileHandle::FileVersionedType::VERSIONED_FILE); + fileHandle = bufferManager.getBMFileHandle(storageStructureIDAndFName.fName, + FileHandle::O_PERSISTENT_FILE_NO_CREATE, BMFileHandle::FileVersionedType::VERSIONED_FILE); headerArray = std::make_unique>(*fileHandle, storageStructureIDAndFName.storageStructureID, INDEX_HEADER_ARRAY_HEADER_PAGE_IDX, &bm, wal); diff --git a/src/storage/storage_manager.cpp b/src/storage/storage_manager.cpp index d54b5f2ba6..4b8ab5fdae 100644 --- a/src/storage/storage_manager.cpp +++ b/src/storage/storage_manager.cpp @@ -2,7 +2,6 @@ #include -#include "spdlog/spdlog.h" #include "storage/buffer_manager/buffer_manager.h" #include "storage/wal_replayer.h" diff --git a/src/storage/storage_structure/column.cpp b/src/storage/storage_structure/column.cpp index e6b94fc24c..f3276f12b6 100644 --- a/src/storage/storage_structure/column.cpp +++ b/src/storage/storage_structure/column.cpp @@ -70,7 +70,7 @@ Value Column::readValue(offset_t offset) { bool Column::isNull(offset_t nodeOffset, Transaction* transaction) { auto cursor = PageUtils::getPageElementCursorForPos(nodeOffset, numElementsPerPage); auto originalPageIdx = cursor.pageIdx; - fileHandle->acquirePageLock(originalPageIdx, true /* block */); + fileHandle->acquirePageLock(originalPageIdx, LockMode::SPIN); auto checkWALVersionOfPage = !transaction->isReadOnly() && fileHandle->hasWALPageVersionNoPageLock(originalPageIdx); uint8_t* frame; @@ -78,10 +78,10 @@ bool Column::isNull(offset_t nodeOffset, Transaction* transaction) { if (checkWALVersionOfPage) { pageIdxInWAL = fileHandle->getWALPageVersionNoPageLock(originalPageIdx); frame = bufferManager.pinWithoutAcquiringPageLock( - *wal->fileHandle, pageIdxInWAL, false /* read from file */); + *wal->fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::READ_PAGE); } else { frame = bufferManager.pinWithoutAcquiringPageLock( - *fileHandle, originalPageIdx, false /* read from file */); + *fileHandle, originalPageIdx, BufferManager::PageReadPolicy::READ_PAGE); } auto nullEntries = (uint64_t*)(frame + (elementSize * numElementsPerPage)); auto isNull = NullMask::isNull(nullEntries, cursor.elemPosInPage); diff --git a/src/storage/storage_structure/disk_array.cpp b/src/storage/storage_structure/disk_array.cpp index 00cd1c9f63..46933aa9e0 100644 --- a/src/storage/storage_structure/disk_array.cpp +++ b/src/storage/storage_structure/disk_array.cpp @@ -12,7 +12,7 @@ namespace storage { DiskArrayHeader::DiskArrayHeader(uint64_t elementSize) : alignedElementSizeLog2{(uint64_t)ceil(log2(elementSize))}, - numElementsPerPageLog2{BufferPoolConstants::DEFAULT_PAGE_SIZE_LOG_2 - alignedElementSizeLog2}, + numElementsPerPageLog2{BufferPoolConstants::PAGE_4KB_SIZE_LOG2 - alignedElementSizeLog2}, elementPageOffsetMask{BitmaskUtils::all1sMaskForLeastSignificantBits(numElementsPerPageLog2)}, firstPIPPageIdx{StorageStructureUtils::NULL_PAGE_IDX}, numElements{0}, numAPs{0} {} @@ -79,19 +79,18 @@ U BaseDiskArray::get(uint64_t idx, TransactionType trxType) { checkOutOfBoundAccess(trxType, idx); auto apCursor = getAPIdxAndOffsetInAP(idx); page_idx_t apPageIdx = getAPPageIdxNoLock(apCursor.pageIdx, trxType); - auto& bufferManagedFileHandle = (BufferManagedFileHandle&)fileHandle; + auto& bmFileHandle = (BMFileHandle&)fileHandle; if (trxType == TransactionType::READ_ONLY || !hasTransactionalUpdates || - !bufferManagedFileHandle.hasWALPageVersionNoPageLock(apPageIdx)) { - auto frame = bufferManager->pin(bufferManagedFileHandle, apPageIdx); + !bmFileHandle.hasWALPageVersionNoPageLock(apPageIdx)) { + auto frame = bufferManager->pin(bmFileHandle, apPageIdx); auto retVal = *(U*)(frame + apCursor.offsetInPage); - bufferManager->unpin(bufferManagedFileHandle, apPageIdx); + bufferManager->unpin(bmFileHandle, apPageIdx); return retVal; } else { U retVal; - StorageStructureUtils::readWALVersionOfPage(bufferManagedFileHandle, apPageIdx, - *bufferManager, *wal, [&retVal, &apCursor](const uint8_t* frame) -> void { - retVal = *(U*)(frame + apCursor.offsetInPage); - }); + StorageStructureUtils::readWALVersionOfPage(bmFileHandle, apPageIdx, *bufferManager, *wal, + [&retVal, &apCursor]( + const uint8_t* frame) -> void { retVal = *(U*)(frame + apCursor.offsetInPage); }); return retVal; } } @@ -112,8 +111,8 @@ void BaseDiskArray::update(uint64_t idx, U val) { // getAPPageIdxNoLock logic needs to change to give the same guarantee (e.g., an apIdx = 0, may // no longer to be guaranteed to be in pips[0].) page_idx_t apPageIdx = getAPPageIdxNoLock(apCursor.pageIdx, TransactionType::WRITE); - StorageStructureUtils::updatePage((BufferManagedFileHandle&)fileHandle, storageStructureID, - apPageIdx, false /* not inserting a new page */, *bufferManager, *wal, + StorageStructureUtils::updatePage((BMFileHandle&)fileHandle, storageStructureID, apPageIdx, + false /* not inserting a new page */, *bufferManager, *wal, [&apCursor, &val](uint8_t* frame) -> void { *(U*)(frame + apCursor.offsetInPage) = val; }); } @@ -122,7 +121,7 @@ uint64_t BaseDiskArray::pushBack(U val) { std::unique_lock xLck{diskArraySharedMtx}; hasTransactionalUpdates = true; uint64_t elementIdx; - StorageStructureUtils::updatePage((BufferManagedFileHandle&)(fileHandle), storageStructureID, + StorageStructureUtils::updatePage((BMFileHandle&)(fileHandle), storageStructureID, headerPageIdx, false /* not inserting a new page */, *bufferManager, *wal, [this, &val, &elementIdx](uint8_t* frame) -> void { auto updatedDiskArrayHeader = ((DiskArrayHeader*)frame); @@ -131,8 +130,8 @@ uint64_t BaseDiskArray::pushBack(U val) { auto [apPageIdx, isNewlyAdded] = getAPPageIdxAndAddAPToPIPIfNecessaryForWriteTrxNoLock( (DiskArrayHeader*)frame, apCursor.pageIdx); // Now do the push back. - StorageStructureUtils::updatePage((BufferManagedFileHandle&)(fileHandle), - storageStructureID, apPageIdx, isNewlyAdded, *bufferManager, *wal, + StorageStructureUtils::updatePage((BMFileHandle&)(fileHandle), storageStructureID, + apPageIdx, isNewlyAdded, *bufferManager, *wal, [&apCursor, &val]( uint8_t* frame) -> void { *(U*)(frame + apCursor.offsetInPage) = val; }); updatedDiskArrayHeader->numElements++; @@ -161,7 +160,7 @@ void BaseDiskArray::setNextPIPPageIDxOfPIPNoLock(DiskArrayHeader* updatedDisk * pipPageIdxOfPreviousPIP. 2) if pipPageIdxOfPreviousPIP is an existing PIP, in which * case again this function is not creating pipPageIdxOfPreviousPIP. */ - StorageStructureUtils::updatePage((BufferManagedFileHandle&)fileHandle, storageStructureID, + StorageStructureUtils::updatePage((BMFileHandle&)fileHandle, storageStructureID, pipPageIdxOfPreviousPIP, false /* not inserting a new page */, *bufferManager, *wal, [&nextPIPPageIdx]( const uint8_t* frame) -> void { ((PIP*)frame)->nextPipPageIdx = nextPIPPageIdx; }); @@ -186,10 +185,10 @@ page_idx_t BaseDiskArray::getAPPageIdxNoLock(page_idx_t apIdx, TransactionTyp } else { page_idx_t retVal; page_idx_t pageIdxOfUpdatedPip = getUpdatedPageIdxOfPipNoLock(pipIdx); - StorageStructureUtils::readWALVersionOfPage((BufferManagedFileHandle&)fileHandle, - pageIdxOfUpdatedPip, *bufferManager, *wal, - [&retVal, &offsetInPIP]( - const uint8_t* frame) -> void { retVal = ((PIP*)frame)->pageIdxs[offsetInPIP]; }); + StorageStructureUtils::readWALVersionOfPage((BMFileHandle&)fileHandle, pageIdxOfUpdatedPip, + *bufferManager, *wal, [&retVal, &offsetInPIP](const uint8_t* frame) -> void { + retVal = ((PIP*)frame)->pageIdxs[offsetInPIP]; + }); return retVal; } } @@ -204,9 +203,8 @@ page_idx_t BaseDiskArray::getUpdatedPageIdxOfPipNoLock(uint64_t pipIdx) { template void BaseDiskArray::clearWALPageVersionAndRemovePageFromFrameIfNecessary(page_idx_t pageIdx) { - ((BufferManagedFileHandle&)this->fileHandle).clearWALPageVersionIfNecessary(pageIdx); - bufferManager->removePageFromFrameIfNecessary( - (BufferManagedFileHandle&)this->fileHandle, pageIdx); + ((BMFileHandle&)this->fileHandle).clearWALPageVersionIfNecessary(pageIdx); + bufferManager->removePageFromFrameIfNecessary((BMFileHandle&)this->fileHandle, pageIdx); } template @@ -236,7 +234,7 @@ void BaseDiskArray::checkpointOrRollbackInMemoryIfNecessaryNoLock(bool isChec clearWALPageVersionAndRemovePageFromFrameIfNecessary(pipPageIdxOfNewPIP); if (!isCheckpoint) { // These are newly inserted pages, so we can truncate the file handle. - ((BufferManagedFileHandle&)this->fileHandle) + ((BMFileHandle&)this->fileHandle) .removePageIdxAndTruncateIfNecessary(pipPageIdxOfNewPIP); } } @@ -259,14 +257,14 @@ bool BaseDiskArray::hasPIPUpdatesNoLock(uint64_t pipIdx) { template uint64_t BaseDiskArray::readUInt64HeaderFieldNoLock( TransactionType trxType, std::function readOp) { - auto bufferManagedFileHandle = reinterpret_cast(&fileHandle); + auto bmFileHandle = reinterpret_cast(&fileHandle); if ((trxType == TransactionType::READ_ONLY) || - !bufferManagedFileHandle->hasWALPageVersionNoPageLock(headerPageIdx)) { + !bmFileHandle->hasWALPageVersionNoPageLock(headerPageIdx)) { return readOp(&this->header); } else { uint64_t retVal; - StorageStructureUtils::readWALVersionOfPage((BufferManagedFileHandle&)fileHandle, - headerPageIdx, *bufferManager, *wal, [&retVal, &readOp](uint8_t* frame) -> void { + StorageStructureUtils::readWALVersionOfPage((BMFileHandle&)fileHandle, headerPageIdx, + *bufferManager, *wal, [&retVal, &readOp](uint8_t* frame) -> void { retVal = readOp((DiskArrayHeader*)frame); }); return retVal; @@ -314,8 +312,8 @@ std::pair BaseDiskArray::getAPPageIdxAndAddAPToPIPIfNecessa setNextPIPPageIDxOfPIPNoLock(updatedDiskArrayHeader, pipIdxOfPreviousPIP, pipPageIdx); } // Finally we update the PIP page (possibly newly created) and add newAPPageIdx into it. - StorageStructureUtils::updatePage((BufferManagedFileHandle&)fileHandle, storageStructureID, - pipPageIdx, isInsertingANewPIPPage, *bufferManager, *wal, + StorageStructureUtils::updatePage((BMFileHandle&)fileHandle, storageStructureID, pipPageIdx, + isInsertingANewPIPPage, *bufferManager, *wal, [&isInsertingANewPIPPage, &newAPPageIdx, &offsetOfNewAPInPIP]( const uint8_t* frame) -> void { if (isInsertingANewPIPPage) { @@ -376,7 +374,7 @@ void InMemDiskArray::checkpointOrRollbackInMemoryIfNecessaryNoLock(bool isChe uint64_t numOldAPs = this->getNumAPsNoLock(TransactionType::READ_ONLY); for (uint64_t apIdx = 0; apIdx < numOldAPs; ++apIdx) { uint64_t apPageIdx = this->getAPPageIdxNoLock(apIdx, TransactionType::READ_ONLY); - if (reinterpret_cast(this->fileHandle) + if (reinterpret_cast(this->fileHandle) .hasWALPageVersionNoPageLock(apPageIdx)) { // Note we can directly read the new image from disk because the WALReplayer checkpoints // the disk image of the page before calling @@ -423,7 +421,7 @@ void InMemDiskArray::checkpointOrRollbackInMemoryIfNecessaryNoLock(bool isChe BaseDiskArray::checkpointOrRollbackInMemoryIfNecessaryNoLock(true /* is checkpoint */); } else { BaseDiskArray::checkpointOrRollbackInMemoryIfNecessaryNoLock(false /* is rollback */); - ((BufferManagedFileHandle&)this->fileHandle) + ((BMFileHandle&)this->fileHandle) .removePageIdxAndTruncateIfNecessary(minNewAPPageIdxToTruncateTo); } } diff --git a/src/storage/storage_structure/disk_overflow_file.cpp b/src/storage/storage_structure/disk_overflow_file.cpp index 28a758c00b..2165a8adcb 100644 --- a/src/storage/storage_structure/disk_overflow_file.cpp +++ b/src/storage/storage_structure/disk_overflow_file.cpp @@ -11,16 +11,16 @@ using namespace kuzu::common; namespace kuzu { namespace storage { -void DiskOverflowFile::pinOverflowPageCache(BufferManagedFileHandle* bufferManagedFileHandleToPin, +void DiskOverflowFile::pinOverflowPageCache(BMFileHandle* bmFileHandleToPin, page_idx_t pageIdxToPin, OverflowPageCache& overflowPageCache) { - overflowPageCache.frame = bufferManager.pin(*bufferManagedFileHandleToPin, pageIdxToPin); - overflowPageCache.bufferManagedFileHandle = bufferManagedFileHandleToPin; + overflowPageCache.frame = bufferManager.pin(*bmFileHandleToPin, pageIdxToPin); + overflowPageCache.bmFileHandle = bmFileHandleToPin; overflowPageCache.pageIdx = pageIdxToPin; } void DiskOverflowFile::unpinOverflowPageCache(OverflowPageCache& overflowPageCache) { if (overflowPageCache.pageIdx != UINT32_MAX) { - bufferManager.unpin(*overflowPageCache.bufferManagedFileHandle, overflowPageCache.pageIdx); + bufferManager.unpin(*overflowPageCache.bmFileHandle, overflowPageCache.pageIdx); } } @@ -162,12 +162,12 @@ std::vector> DiskOverflowFile::readList( void DiskOverflowFile::addNewPageIfNecessaryWithoutLock(uint32_t numBytesToAppend) { PageElementCursor byteCursor = PageUtils::getPageElementCursorForPos( - nextBytePosToWriteTo, BufferPoolConstants::DEFAULT_PAGE_SIZE); - if ((byteCursor.elemPosInPage == 0) || ((byteCursor.elemPosInPage + numBytesToAppend - 1) > - BufferPoolConstants::DEFAULT_PAGE_SIZE)) { + nextBytePosToWriteTo, BufferPoolConstants::PAGE_4KB_SIZE); + if ((byteCursor.elemPosInPage == 0) || + ((byteCursor.elemPosInPage + numBytesToAppend - 1) > BufferPoolConstants::PAGE_4KB_SIZE)) { // Note that if byteCursor.pos is already 0 the next operation keeps the nextBytePos // where it is. - nextBytePosToWriteTo = (fileHandle->getNumPages() * BufferPoolConstants::DEFAULT_PAGE_SIZE); + nextBytePosToWriteTo = (fileHandle->getNumPages() * BufferPoolConstants::PAGE_4KB_SIZE); addNewPageToFileHandle(); } } @@ -176,13 +176,13 @@ void DiskOverflowFile::setStringOverflowWithoutLock( const char* srcRawString, uint64_t len, ku_string_t& diskDstString) { if (len <= ku_string_t::SHORT_STR_LENGTH) { return; - } else if (len > BufferPoolConstants::DEFAULT_PAGE_SIZE) { + } else if (len > BufferPoolConstants::PAGE_4KB_SIZE) { throw RuntimeException(StringUtils::getLongStringErrorMessage( - srcRawString, BufferPoolConstants::DEFAULT_PAGE_SIZE)); + srcRawString, BufferPoolConstants::PAGE_4KB_SIZE)); } addNewPageIfNecessaryWithoutLock(len); auto updatedPageInfoAndWALPageFrame = createWALVersionOfPageIfNecessaryForElement( - nextBytePosToWriteTo, BufferPoolConstants::DEFAULT_PAGE_SIZE); + nextBytePosToWriteTo, BufferPoolConstants::PAGE_4KB_SIZE); memcpy(updatedPageInfoAndWALPageFrame.frame + updatedPageInfoAndWALPageFrame.posInPage, srcRawString, len); TypeUtils::encodeOverflowPtr(diskDstString.overflowPtr, @@ -222,14 +222,14 @@ void DiskOverflowFile::writeStringOverflowAndUpdateOverflowPtr( void DiskOverflowFile::setListRecursiveIfNestedWithoutLock( const ku_list_t& inMemSrcList, ku_list_t& diskDstList, const DataType& dataType) { auto elementSize = Types::getDataTypeSize(*dataType.childType); - if (inMemSrcList.size * elementSize > BufferPoolConstants::DEFAULT_PAGE_SIZE) { + if (inMemSrcList.size * elementSize > BufferPoolConstants::PAGE_4KB_SIZE) { throw RuntimeException(StringUtils::string_format( "Maximum num bytes of a LIST is %d. Input list's num bytes is %d.", - BufferPoolConstants::DEFAULT_PAGE_SIZE, inMemSrcList.size * elementSize)); + BufferPoolConstants::PAGE_4KB_SIZE, inMemSrcList.size * elementSize)); } addNewPageIfNecessaryWithoutLock(inMemSrcList.size * elementSize); auto updatedPageInfoAndWALPageFrame = createWALVersionOfPageIfNecessaryForElement( - nextBytePosToWriteTo, BufferPoolConstants::DEFAULT_PAGE_SIZE); + nextBytePosToWriteTo, BufferPoolConstants::PAGE_4KB_SIZE); diskDstList.size = inMemSrcList.size; // Copy non-overflow part for elements in the list. memcpy(updatedPageInfoAndWALPageFrame.frame + updatedPageInfoAndWALPageFrame.posInPage, diff --git a/src/storage/storage_structure/in_mem_file.cpp b/src/storage/storage_structure/in_mem_file.cpp index b9517a51d2..16e3127f43 100644 --- a/src/storage/storage_structure/in_mem_file.cpp +++ b/src/storage/storage_structure/in_mem_file.cpp @@ -30,7 +30,7 @@ uint32_t InMemFile::addANewPage(bool setToZero) { pages.push_back( std::make_unique(numElementsInAPage, numBytesForElement, hasNullMask)); if (setToZero) { - memset(pages[newPageIdx]->data, 0, BufferPoolConstants::DEFAULT_PAGE_SIZE); + memset(pages[newPageIdx]->data, 0, BufferPoolConstants::PAGE_4KB_SIZE); } return newPageIdx; } @@ -43,8 +43,7 @@ void InMemFile::flush() { for (auto pageIdx = 0u; pageIdx < pages.size(); pageIdx++) { pages[pageIdx]->encodeNullBits(); FileUtils::writeToFile(fileInfo.get(), pages[pageIdx]->data, - BufferPoolConstants::DEFAULT_PAGE_SIZE, - pageIdx * BufferPoolConstants::DEFAULT_PAGE_SIZE); + BufferPoolConstants::PAGE_4KB_SIZE, pageIdx * BufferPoolConstants::PAGE_4KB_SIZE); } } @@ -57,7 +56,7 @@ ku_string_t InMemOverflowFile::appendString(const char* rawString) { if (length > ku_string_t::SHORT_STR_LENGTH) { std::unique_lock lck{lock}; // Allocate a new page if necessary. - if (nextOffsetInPageToAppend + length >= BufferPoolConstants::DEFAULT_PAGE_SIZE) { + if (nextOffsetInPageToAppend + length >= BufferPoolConstants::PAGE_4KB_SIZE) { addANewPage(); nextOffsetInPageToAppend = 0; nextPageIdxToAppend++; @@ -134,7 +133,7 @@ ku_list_t InMemOverflowFile::copyList(const Value& listValue, PageByteCursor& ov resultKUList.size = listValue.listVal.size(); // Allocate a new page if necessary. if (overflowCursor.offsetInPage + (resultKUList.size * numBytesOfListElement) >= - BufferPoolConstants::DEFAULT_PAGE_SIZE || + BufferPoolConstants::PAGE_4KB_SIZE || overflowCursor.pageIdx == UINT32_MAX) { overflowCursor.offsetInPage = 0; overflowCursor.pageIdx = addANewOverflowPage(); @@ -168,7 +167,7 @@ ku_list_t InMemOverflowFile::copyList(const Value& listValue, PageByteCursor& ov void InMemOverflowFile::copyStringOverflow( PageByteCursor& overflowCursor, uint8_t* srcOverflow, ku_string_t* dstKUString) { // Allocate a new page if necessary. - if (overflowCursor.offsetInPage + dstKUString->len >= BufferPoolConstants::DEFAULT_PAGE_SIZE || + if (overflowCursor.offsetInPage + dstKUString->len >= BufferPoolConstants::PAGE_4KB_SIZE || overflowCursor.pageIdx == UINT32_MAX) { overflowCursor.offsetInPage = 0; overflowCursor.pageIdx = addANewOverflowPage(); @@ -187,7 +186,7 @@ void InMemOverflowFile::copyListOverflowFromFile(InMemOverflowFile* srcInMemOver auto numBytesOfListElement = Types::getDataTypeSize(*listChildDataType); // Allocate a new page if necessary. if (dstOverflowCursor.offsetInPage + (dstKUList->size * numBytesOfListElement) >= - BufferPoolConstants::DEFAULT_PAGE_SIZE || + BufferPoolConstants::PAGE_4KB_SIZE || dstOverflowCursor.pageIdx == UINT32_MAX) { dstOverflowCursor.offsetInPage = 0; dstOverflowCursor.pageIdx = addANewOverflowPage(); @@ -231,7 +230,7 @@ void InMemOverflowFile::copyListOverflowToFile( auto numBytesOfListElement = Types::getDataTypeSize(*childDataType); // Allocate a new page if necessary. if (pageByteCursor.offsetInPage + (srcKUList->size * numBytesOfListElement) >= - BufferPoolConstants::DEFAULT_PAGE_SIZE || + BufferPoolConstants::PAGE_4KB_SIZE || pageByteCursor.pageIdx == UINT32_MAX) { pageByteCursor.offsetInPage = 0; pageByteCursor.pageIdx = addANewOverflowPage(); diff --git a/src/storage/storage_structure/in_mem_page.cpp b/src/storage/storage_structure/in_mem_page.cpp index 09b0dd03ce..958dba3473 100644 --- a/src/storage/storage_structure/in_mem_page.cpp +++ b/src/storage/storage_structure/in_mem_page.cpp @@ -10,7 +10,7 @@ namespace storage { InMemPage::InMemPage(uint32_t maxNumElements, uint16_t numBytesForElement, bool hasNullEntries) : nullEntriesInPage{nullptr}, maxNumElements{maxNumElements} { - buffer = std::make_unique(BufferPoolConstants::DEFAULT_PAGE_SIZE); + buffer = std::make_unique(BufferPoolConstants::PAGE_4KB_SIZE); data = buffer.get(); if (hasNullEntries) { // In a page, null entries are stored right after the element data. Each null entry contains diff --git a/src/storage/storage_structure/lists/list_headers.cpp b/src/storage/storage_structure/lists/list_headers.cpp index b9bd973d1d..6386539343 100644 --- a/src/storage/storage_structure/lists/list_headers.cpp +++ b/src/storage/storage_structure/lists/list_headers.cpp @@ -33,9 +33,9 @@ ListHeaders::ListHeaders(const StorageStructureIDAndFName& storageStructureIDAnd storageStructureIDAndFName.storageStructureID.listFileID.listFileType = ListFileType::HEADERS; storageStructureIDAndFName.fName = StorageUtils::getListHeadersFName(storageStructureIDAndFNameForBaseList.fName); - fileHandle = bufferManager->getBufferManagedFileHandle(storageStructureIDAndFName.fName, + fileHandle = bufferManager->getBMFileHandle(storageStructureIDAndFName.fName, FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS, - BufferManagedFileHandle::FileVersionedType::VERSIONED_FILE); + BMFileHandle::FileVersionedType::VERSIONED_FILE); storageStructureIDAndFName.storageStructureID.listFileID.listFileType = ListFileType::HEADERS; storageStructureIDAndFName.fName = fileHandle->getFileInfo()->path; headersDiskArray = std::make_unique>(*fileHandle, diff --git a/src/storage/storage_structure/lists/lists.cpp b/src/storage/storage_structure/lists/lists.cpp index 74ad5d32bf..a7b94807fb 100644 --- a/src/storage/storage_structure/lists/lists.cpp +++ b/src/storage/storage_structure/lists/lists.cpp @@ -397,16 +397,16 @@ std::unordered_set RelIDList::getDeletedRelOffsetsInListForNodeOffset( auto numElementsToReadInCurPage = std::min(numElementsInPersistentStore - numElementsRead, (uint64_t)(numElementsPerPage - pageCursor.elemPosInPage)); auto physicalPageIdx = pageMapper(pageCursor.pageIdx); - auto frame = bufferManager.pin(*fileHandle, physicalPageIdx) + - getElemByteOffset(pageCursor.elemPosInPage); + auto buffer = bufferManager.pin(*fileHandle, physicalPageIdx) + + getElemByteOffset(pageCursor.elemPosInPage); for (auto i = 0u; i < numElementsToReadInCurPage; i++) { - auto relID = *(int64_t*)frame; + auto relID = *(int64_t*)buffer; if (listsUpdatesStore->isRelDeletedInPersistentStore( storageStructureIDAndFName.storageStructureID.listFileID, nodeOffset, relID)) { deletedRelOffsetsInList.emplace(numElementsRead); } numElementsRead++; - frame += elementSize; + buffer += elementSize; } bufferManager.unpin(*fileHandle, physicalPageIdx); pageCursor.nextPage(); @@ -424,16 +424,16 @@ list_offset_t RelIDList::getListOffset(offset_t nodeOffset, offset_t relOffset) auto numElementsToReadInCurPage = std::min(numElementsInPersistentStore - numElementsRead, (uint64_t)(numElementsPerPage - pageCursor.elemPosInPage)); auto physicalPageIdx = pageMapper(pageCursor.pageIdx); - auto frame = bufferManager.pin(*fileHandle, physicalPageIdx) + - getElemByteOffset(pageCursor.elemPosInPage); + auto buffer = bufferManager.pin(*fileHandle, physicalPageIdx) + + getElemByteOffset(pageCursor.elemPosInPage); for (auto i = 0u; i < numElementsToReadInCurPage; i++) { - auto relIDInList = *(int64_t*)frame; + auto relIDInList = *(int64_t*)buffer; if (relIDInList == relOffset) { bufferManager.unpin(*fileHandle, physicalPageIdx); return numElementsRead; } numElementsRead++; - frame += elementSize; + buffer += elementSize; } bufferManager.unpin(*fileHandle, physicalPageIdx); pageCursor.nextPage(); diff --git a/src/storage/storage_structure/lists/lists_metadata.cpp b/src/storage/storage_structure/lists/lists_metadata.cpp index 4250cea752..2b7a9399fa 100644 --- a/src/storage/storage_structure/lists/lists_metadata.cpp +++ b/src/storage/storage_structure/lists/lists_metadata.cpp @@ -17,9 +17,8 @@ ListsMetadata::ListsMetadata( storageStructureIDAndFName.storageStructureID.listFileID.listFileType = ListFileType::METADATA; storageStructureIDAndFName.fName = StorageUtils::getListMetadataFName(storageStructureIDAndFNameForBaseList.fName); - metadataVersionedFileHandle = bufferManager->getBufferManagedFileHandle( - storageStructureIDAndFName.fName, FileHandle::O_PERSISTENT_FILE_NO_CREATE, - BufferManagedFileHandle::FileVersionedType::VERSIONED_FILE); + metadataVersionedFileHandle = bufferManager->getBMFileHandle(storageStructureIDAndFName.fName, + FileHandle::O_PERSISTENT_FILE_NO_CREATE, BMFileHandle::FileVersionedType::VERSIONED_FILE); chunkToPageListHeadIdxMap = std::make_unique>( *metadataVersionedFileHandle, storageStructureIDAndFName.storageStructureID, CHUNK_PAGE_LIST_HEAD_IDX_MAP_HEADER_PAGE_IDX, bufferManager, wal); diff --git a/src/storage/storage_structure/storage_structure.cpp b/src/storage/storage_structure/storage_structure.cpp index 72a912309d..8ce4557a20 100644 --- a/src/storage/storage_structure/storage_structure.cpp +++ b/src/storage/storage_structure/storage_structure.cpp @@ -17,7 +17,7 @@ void StorageStructure::addNewPageToFileHandle() { auto pageIdxInOriginalFile = fileHandle->addNewPage(); auto pageIdxInWAL = wal->logPageInsertRecord(storageStructureID, pageIdxInOriginalFile); bufferManager.pinWithoutAcquiringPageLock( - *wal->fileHandle, pageIdxInWAL, true /* do not read from file */); + *wal->fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::DONT_READ_PAGE); fileHandle->createPageVersionGroupIfNecessary(pageIdxInOriginalFile); fileHandle->setWALPageVersion(pageIdxInOriginalFile, pageIdxInWAL); bufferManager.setPinnedPageDirty(*wal->fileHandle, pageIdxInWAL); diff --git a/src/storage/storage_structure/storage_structure_utils.cpp b/src/storage/storage_structure/storage_structure_utils.cpp index 06b75ebe72..4249f2e6aa 100644 --- a/src/storage/storage_structure/storage_structure_utils.cpp +++ b/src/storage/storage_structure/storage_structure_utils.cpp @@ -5,9 +5,9 @@ using namespace kuzu::common; namespace kuzu { namespace storage { -std::pair -StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin(BufferManagedFileHandle& fileHandle, - page_idx_t physicalPageIdx, WAL& wal, transaction::TransactionType trxType) { +std::pair StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin( + BMFileHandle& fileHandle, page_idx_t physicalPageIdx, WAL& wal, + transaction::TransactionType trxType) { if (trxType == transaction::TransactionType::READ_ONLY || !fileHandle.hasWALPageVersionNoPageLock(physicalPageIdx)) { return std::make_pair(&fileHandle, physicalPageIdx); @@ -17,7 +17,7 @@ StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin(BufferManagedFileHan } } -void StorageStructureUtils::updatePage(BufferManagedFileHandle& fileHandle, +void StorageStructureUtils::updatePage(BMFileHandle& fileHandle, StorageStructureID storageStructureID, page_idx_t originalPageIdx, bool isInsertingNewPage, BufferManager& bufferManager, WAL& wal, const std::function& updateOp) { auto walPageIdxAndFrame = StorageStructureUtils::createWALVersionIfNecessaryAndPinPage( @@ -26,37 +26,38 @@ void StorageStructureUtils::updatePage(BufferManagedFileHandle& fileHandle, unpinWALPageAndReleaseOriginalPageLock(walPageIdxAndFrame, fileHandle, bufferManager, wal); } -void StorageStructureUtils::readWALVersionOfPage(BufferManagedFileHandle& fileHandle, +void StorageStructureUtils::readWALVersionOfPage(BMFileHandle& fileHandle, page_idx_t originalPageIdx, BufferManager& bufferManager, WAL& wal, const std::function& readOp) { page_idx_t pageIdxInWAL = fileHandle.getWALPageVersionNoPageLock(originalPageIdx); auto frame = bufferManager.pinWithoutAcquiringPageLock( - *wal.fileHandle, pageIdxInWAL, false /* read from file */); + *wal.fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::READ_PAGE); readOp(frame); unpinPageIdxInWALAndReleaseOriginalPageLock( pageIdxInWAL, originalPageIdx, fileHandle, bufferManager, wal); } WALPageIdxAndFrame StorageStructureUtils::createWALVersionIfNecessaryAndPinPage( - page_idx_t originalPageIdx, bool insertingNewPage, BufferManagedFileHandle& fileHandle, + page_idx_t originalPageIdx, bool insertingNewPage, BMFileHandle& fileHandle, StorageStructureID storageStructureID, BufferManager& bufferManager, WAL& wal) { fileHandle.createPageVersionGroupIfNecessary(originalPageIdx); - fileHandle.acquirePageLock(originalPageIdx, true /* block */); + fileHandle.acquirePageLock(originalPageIdx, LockMode::SPIN); page_idx_t pageIdxInWAL; uint8_t* frame; if (fileHandle.hasWALPageVersionNoPageLock(originalPageIdx)) { pageIdxInWAL = fileHandle.getWALPageVersionNoPageLock(originalPageIdx); frame = bufferManager.pinWithoutAcquiringPageLock( - *wal.fileHandle, pageIdxInWAL, false /* read from file */); + *wal.fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::READ_PAGE); } else { pageIdxInWAL = wal.logPageUpdateRecord( storageStructureID, originalPageIdx /* pageIdxInOriginalFile */); frame = bufferManager.pinWithoutAcquiringPageLock( - *wal.fileHandle, pageIdxInWAL, true /* do not read from file */); - uint8_t* originalFrame = bufferManager.pinWithoutAcquiringPageLock( - fileHandle, originalPageIdx, insertingNewPage); + *wal.fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::DONT_READ_PAGE); + auto originalFrame = bufferManager.pinWithoutAcquiringPageLock(fileHandle, originalPageIdx, + insertingNewPage ? BufferManager::PageReadPolicy::DONT_READ_PAGE : + BufferManager::PageReadPolicy::READ_PAGE); // Note: This logic only works for db files with DEFAULT_PAGE_SIZEs. - memcpy(frame, originalFrame, BufferPoolConstants::DEFAULT_PAGE_SIZE); + memcpy(frame, originalFrame, BufferPoolConstants::PAGE_4KB_SIZE); bufferManager.unpinWithoutAcquiringPageLock(fileHandle, originalPageIdx); fileHandle.setWALPageVersionNoLock( originalPageIdx /* pageIdxInOriginalFile */, pageIdxInWAL); @@ -66,16 +67,15 @@ WALPageIdxAndFrame StorageStructureUtils::createWALVersionIfNecessaryAndPinPage( } void StorageStructureUtils::unpinWALPageAndReleaseOriginalPageLock( - WALPageIdxAndFrame& walPageIdxAndFrame, BufferManagedFileHandle& fileHandle, - BufferManager& bufferManager, WAL& wal) { + WALPageIdxAndFrame& walPageIdxAndFrame, BMFileHandle& fileHandle, BufferManager& bufferManager, + WAL& wal) { StorageStructureUtils::unpinPageIdxInWALAndReleaseOriginalPageLock( walPageIdxAndFrame.pageIdxInWAL, walPageIdxAndFrame.originalPageIdx, fileHandle, bufferManager, wal); } void StorageStructureUtils::unpinPageIdxInWALAndReleaseOriginalPageLock(page_idx_t pageIdxInWAL, - page_idx_t originalPageIdx, BufferManagedFileHandle& fileHandle, BufferManager& bufferManager, - WAL& wal) { + page_idx_t originalPageIdx, BMFileHandle& fileHandle, BufferManager& bufferManager, WAL& wal) { bufferManager.unpinWithoutAcquiringPageLock(*wal.fileHandle, pageIdxInWAL); fileHandle.releasePageLock(originalPageIdx); } diff --git a/src/storage/storage_utils.cpp b/src/storage/storage_utils.cpp index a04128e4ed..6ee590dc69 100644 --- a/src/storage/storage_utils.cpp +++ b/src/storage/storage_utils.cpp @@ -186,11 +186,11 @@ uint32_t PageUtils::getNumElementsInAPage(uint32_t elementSize, bool hasNull) { auto numBytesPerNullEntry = NullMask::NUM_BITS_PER_NULL_ENTRY >> 3; auto numNullEntries = hasNull ? (uint32_t)ceil( - (double)BufferPoolConstants::DEFAULT_PAGE_SIZE / + (double)BufferPoolConstants::PAGE_4KB_SIZE / (double)(((uint64_t)elementSize << NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2) + numBytesPerNullEntry)) : 0; - return (BufferPoolConstants::DEFAULT_PAGE_SIZE - (numNullEntries * numBytesPerNullEntry)) / + return (BufferPoolConstants::PAGE_4KB_SIZE - (numNullEntries * numBytesPerNullEntry)) / elementSize; } diff --git a/src/storage/wal/wal.cpp b/src/storage/wal/wal.cpp index 0e2730d42e..107215dcfe 100644 --- a/src/storage/wal/wal.cpp +++ b/src/storage/wal/wal.cpp @@ -12,11 +12,11 @@ namespace storage { WAL::WAL(const std::string& directory, BufferManager& bufferManager) : logger{LoggerUtils::getLogger(LoggerConstants::LoggerEnum::WAL)}, directory{directory}, bufferManager{bufferManager}, isLastLoggedRecordCommit_{false} { - fileHandle = bufferManager.getBufferManagedFileHandle( - common::FileUtils::joinPath( - directory, std::string(common::StorageConstants::WAL_FILE_SUFFIX)), - FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS, - BufferManagedFileHandle::FileVersionedType::NON_VERSIONED_FILE); + fileHandle = + bufferManager.getBMFileHandle(common::FileUtils::joinPath(directory, + std::string(common::StorageConstants::WAL_FILE_SUFFIX)), + FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS, + BMFileHandle::FileVersionedType::NON_VERSIONED_FILE); initCurrentPage(); } @@ -173,7 +173,7 @@ void WAL::setIsLastRecordCommit() { } } -WALIterator::WALIterator(std::shared_ptr fileHandle, std::mutex& mtx) +WALIterator::WALIterator(std::shared_ptr fileHandle, std::mutex& mtx) : BaseWALAndWALIterator{std::move(fileHandle)}, mtx{mtx} { resetCurrentHeaderPagePrefix(); if (this->fileHandle->getNumPages() > 0) { diff --git a/src/storage/wal_replayer.cpp b/src/storage/wal_replayer.cpp index d651a5985b..eaf95944e4 100644 --- a/src/storage/wal_replayer.cpp +++ b/src/storage/wal_replayer.cpp @@ -25,7 +25,7 @@ WALReplayer::WALReplayer(WAL* wal, StorageManager* storageManager, MemoryManager void WALReplayer::init() { logger = LoggerUtils::getLogger(LoggerConstants::LoggerEnum::STORAGE); walFileHandle = wal->fileHandle; - pageBuffer = std::make_unique(BufferPoolConstants::DEFAULT_PAGE_SIZE); + pageBuffer = std::make_unique(BufferPoolConstants::PAGE_4KB_SIZE); } void WALReplayer::replay() { @@ -81,9 +81,9 @@ void WALReplayer::replayWALRecord(WALRecord& walRecord) { walFileHandle->readPage( pageBuffer.get(), walRecord.pageInsertOrUpdateRecord.pageIdxInWAL); FileUtils::writeToFile(fileInfoOfStorageStructure.get(), pageBuffer.get(), - BufferPoolConstants::DEFAULT_PAGE_SIZE, + BufferPoolConstants::PAGE_4KB_SIZE, walRecord.pageInsertOrUpdateRecord.pageIdxInOriginalFile * - BufferPoolConstants::DEFAULT_PAGE_SIZE); + BufferPoolConstants::PAGE_4KB_SIZE); } if (!isRecovering) { // 2: If we are not recovering, we do any in-memory checkpointing or rolling back work @@ -420,7 +420,7 @@ void WALReplayer::replayWALRecord(WALRecord& walRecord) { } void WALReplayer::truncateFileIfInsertion( - BufferManagedFileHandle* fileHandle, const PageUpdateOrInsertRecord& pageInsertOrUpdateRecord) { + BMFileHandle* fileHandle, const PageUpdateOrInsertRecord& pageInsertOrUpdateRecord) { if (pageInsertOrUpdateRecord.isInsert) { // If we are rolling back and this is a page insertion we truncate the fileHandle's // data structures that hold locks for pageIdxs. @@ -440,7 +440,7 @@ void WALReplayer::truncateFileIfInsertion( void WALReplayer::checkpointOrRollbackVersionedFileHandleAndBufferManager( const WALRecord& walRecord, const StorageStructureID& storageStructureID) { - BufferManagedFileHandle* fileHandle = + BMFileHandle* fileHandle = getVersionedFileHandleIfWALVersionAndBMShouldBeCleared(storageStructureID); if (fileHandle) { fileHandle->clearWALPageVersionIfNecessary( @@ -449,15 +449,15 @@ void WALReplayer::checkpointOrRollbackVersionedFileHandleAndBufferManager( // Update the page in buffer manager if it is in a frame. Note that we assume // that the pageBuffer currently contains the contents of the WALVersion, so the // caller needs to make sure that this assumption holds. - bufferManager->updateFrameIfPageIsInFrameWithoutPageOrFrameLock(*fileHandle, - pageBuffer.get(), walRecord.pageInsertOrUpdateRecord.pageIdxInOriginalFile); + bufferManager->updateFrameIfPageIsInFrameWithoutLock(*fileHandle, pageBuffer.get(), + walRecord.pageInsertOrUpdateRecord.pageIdxInOriginalFile); } else { truncateFileIfInsertion(fileHandle, walRecord.pageInsertOrUpdateRecord); } } } -BufferManagedFileHandle* WALReplayer::getVersionedFileHandleIfWALVersionAndBMShouldBeCleared( +BMFileHandle* WALReplayer::getVersionedFileHandleIfWALVersionAndBMShouldBeCleared( const StorageStructureID& storageStructureID) { switch (storageStructureID.storageStructureType) { case StorageStructureType::COLUMN: { diff --git a/test/include/graph_test/graph_test.h b/test/include/graph_test/graph_test.h index d83ba422ea..882d4fb706 100644 --- a/test/include/graph_test/graph_test.h +++ b/test/include/graph_test/graph_test.h @@ -24,7 +24,7 @@ class BaseGraphTest : public Test { public: void SetUp() override { systemConfig = std::make_unique( - common::StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING); + common::BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING); if (common::FileUtils::fileOrPathExists(TestHelper::getTmpTestDir())) { common::FileUtils::removeDir(TestHelper::getTmpTestDir()); } @@ -58,11 +58,8 @@ class BaseGraphTest : public Test { static inline transaction::TransactionManager* getTransactionManager(main::Database& database) { return database.transactionManager.get(); } - static inline uint64_t getDefaultBMSize(main::Database& database) { - return database.systemConfig.defaultPageBufferPoolSize; - } - static inline uint64_t getLargeBMSize(main::Database& database) { - return database.systemConfig.largePageBufferPoolSize; + static inline uint64_t getBMSize(main::Database& database) { + return database.systemConfig.bufferPoolSize; } static inline storage::WAL* getWAL(main::Database& database) { return database.wal.get(); } static inline void commitAndCheckpointOrRollback(main::Database& database, diff --git a/test/include/main_test_helper/main_test_helper.h b/test/include/main_test_helper/main_test_helper.h index 30962eea91..3d2ea876bb 100644 --- a/test/include/main_test_helper/main_test_helper.h +++ b/test/include/main_test_helper/main_test_helper.h @@ -10,8 +10,8 @@ class ApiTest : public BaseGraphTest { public: void SetUp() override { BaseGraphTest::SetUp(); - systemConfig->defaultPageBufferPoolSize = (1ull << 26); - systemConfig->largePageBufferPoolSize = (1ull << 26); + systemConfig->bufferPoolSize = + common::BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING; createDBAndConn(); initGraph(); } diff --git a/test/processor/order_by/key_block_merger_test.cpp b/test/processor/order_by/key_block_merger_test.cpp index 859eca31ce..7b2399f845 100644 --- a/test/processor/order_by/key_block_merger_test.cpp +++ b/test/processor/order_by/key_block_merger_test.cpp @@ -20,11 +20,8 @@ class KeyBlockMergerTest : public Test { void SetUp() override { LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER); LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE); - bufferManager = - std::make_unique(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::DEFAULT_PAGES_BUFFER_RATIO, - StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::LARGE_PAGES_BUFFER_RATIO); + bufferManager = std::make_unique( + BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING); memoryManager = std::make_unique(bufferManager.get()); } @@ -36,7 +33,7 @@ class KeyBlockMergerTest : public Test { public: std::unique_ptr bufferManager; std::unique_ptr memoryManager; - uint32_t numTuplesPerBlockInFT = BufferPoolConstants::LARGE_PAGE_SIZE / 8; + uint32_t numTuplesPerBlockInFT = BufferPoolConstants::PAGE_256KB_SIZE / 8; static void checkTupleIdxesAndFactorizedTableIdxes(uint8_t* keyBlockPtr, const uint64_t keyBlockEntrySizeInBytes, diff --git a/test/processor/order_by/order_by_key_encoder_test.cpp b/test/processor/order_by/order_by_key_encoder_test.cpp index 7845abd41a..ea0c0f836c 100644 --- a/test/processor/order_by/order_by_key_encoder_test.cpp +++ b/test/processor/order_by/order_by_key_encoder_test.cpp @@ -16,11 +16,8 @@ class OrderByKeyEncoderTest : public Test { void SetUp() override { LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER); LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE); - bufferManager = - std::make_unique(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::DEFAULT_PAGES_BUFFER_RATIO, - StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::LARGE_PAGES_BUFFER_RATIO); + bufferManager = std::make_unique( + BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING); memoryManager = std::make_unique(bufferManager.get()); } @@ -134,7 +131,7 @@ class OrderByKeyEncoderTest : public Test { std::unique_ptr bufferManager; std::unique_ptr memoryManager; const uint32_t ftIdx = 14; - const uint32_t numTuplesPerBlockInFT = BufferPoolConstants::LARGE_PAGE_SIZE / 8; + const uint32_t numTuplesPerBlockInFT = BufferPoolConstants::PAGE_256KB_SIZE / 8; }; TEST_F(OrderByKeyEncoderTest, singleOrderByColInt64UnflatTest) { @@ -576,7 +573,7 @@ TEST_F(OrderByKeyEncoderTest, largeNumBytesPerTupleErrorTest) { // If the numBytesPerTuple is larger than 4096 bytes, the encoder will raise an encoding // exception we need ((LARGE_PAGE_SIZE - 8) / 9 + 1 number of columns(with datatype INT) to // trigger that exception. - auto numOfOrderByCols = (BufferPoolConstants::LARGE_PAGE_SIZE - 8) / 9 + 1; + auto numOfOrderByCols = (BufferPoolConstants::PAGE_256KB_SIZE - 8) / 9 + 1; auto [valueVectors, dataChunk] = getInt64TestValueVector(1, numOfOrderByCols, true); auto isAscOrder = std::vector(numOfOrderByCols, true); try { @@ -587,13 +584,13 @@ TEST_F(OrderByKeyEncoderTest, largeNumBytesPerTupleErrorTest) { ASSERT_STREQ(e.what(), StringUtils::string_format("Runtime exception: TupleSize({} bytes) is larger than " "the LARGE_PAGE_SIZE({} bytes)", - 9 * numOfOrderByCols + 8, BufferPoolConstants::LARGE_PAGE_SIZE) + 9 * numOfOrderByCols + 8, BufferPoolConstants::PAGE_256KB_SIZE) .c_str()); } catch (std::exception& e) { FAIL(); } } TEST_F(OrderByKeyEncoderTest, singleTuplePerBlockTest) { - uint32_t numOfOrderByCols = (BufferPoolConstants::LARGE_PAGE_SIZE - 8) / 9; + uint32_t numOfOrderByCols = (BufferPoolConstants::PAGE_256KB_SIZE - 8) / 9; uint32_t numOfElementsPerCol = 10; auto [valueVectors, dataChunk] = getInt64TestValueVector(numOfElementsPerCol, numOfOrderByCols, true); diff --git a/test/processor/order_by/radix_sort_test.cpp b/test/processor/order_by/radix_sort_test.cpp index 2a99e47f31..391adbccaa 100644 --- a/test/processor/order_by/radix_sort_test.cpp +++ b/test/processor/order_by/radix_sort_test.cpp @@ -20,11 +20,8 @@ class RadixSortTest : public Test { void SetUp() override { LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER); LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE); - bufferManager = - std::make_unique(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::DEFAULT_PAGES_BUFFER_RATIO, - StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::LARGE_PAGES_BUFFER_RATIO); + bufferManager = std::make_unique( + BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING); memoryManager = std::make_unique(bufferManager.get()); } @@ -37,7 +34,7 @@ class RadixSortTest : public Test { std::unique_ptr bufferManager; std::unique_ptr memoryManager; const uint8_t factorizedTableIdx = 9; - const uint32_t numTuplesPerBlockInFT = BufferPoolConstants::LARGE_PAGE_SIZE / 8; + const uint32_t numTuplesPerBlockInFT = BufferPoolConstants::PAGE_256KB_SIZE / 8; void checkTupleIdxesAndFactorizedTableIdxes(uint8_t* keyBlockPtr, const uint64_t entrySize, const std::vector& expectedFTBlockOffsetOrder) { diff --git a/test/runner/e2e_ddl_test.cpp b/test/runner/e2e_ddl_test.cpp index 7c714f58c5..dc987cb48d 100644 --- a/test/runner/e2e_ddl_test.cpp +++ b/test/runner/e2e_ddl_test.cpp @@ -107,11 +107,8 @@ class TinySnbDDLTest : public DBTest { DBTest::SetUp(); catalog = getCatalog(*database); profiler = std::make_unique(); - bufferManager = - std::make_unique(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::DEFAULT_PAGES_BUFFER_RATIO, - StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::LARGE_PAGES_BUFFER_RATIO); + bufferManager = std::make_unique( + BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING); memoryManager = std::make_unique(bufferManager.get()); executionContext = std::make_unique( 1 /* numThreads */, profiler.get(), memoryManager.get(), bufferManager.get()); diff --git a/test/runner/e2e_set_transaction_test.cpp b/test/runner/e2e_set_transaction_test.cpp index 0b7a7d5907..77bc96d041 100644 --- a/test/runner/e2e_set_transaction_test.cpp +++ b/test/runner/e2e_set_transaction_test.cpp @@ -182,7 +182,7 @@ TEST_F(SetNodeStructuredPropTransactionTest, SetNodeLongStringPropRollbackTest) TEST_F(SetNodeStructuredPropTransactionTest, SetVeryLongStringErrorsTest) { conn->beginWriteTransaction(); std::string veryLongStr = ""; - for (auto i = 0u; i < BufferPoolConstants::DEFAULT_PAGE_SIZE + 1; ++i) { + for (auto i = 0u; i < BufferPoolConstants::PAGE_4KB_SIZE + 1; ++i) { veryLongStr += "a"; } auto result = conn->query("MATCH (a:person) WHERE a.ID=0 SET a.fName='" + veryLongStr + "'"); diff --git a/test/storage/CMakeLists.txt b/test/storage/CMakeLists.txt index 77660e47a3..cae622b3e2 100644 --- a/test/storage/CMakeLists.txt +++ b/test/storage/CMakeLists.txt @@ -1,4 +1,3 @@ -add_kuzu_test(buffer_manager_test buffer_manager_test.cpp) #add_kuzu_test(disk_array_update_test disk_array_update_test.cpp) add_kuzu_test(node_insertion_deletion_test node_insertion_deletion_test.cpp) add_kuzu_test(wal_record_test wal_record_test.cpp) diff --git a/test/storage/buffer_manager_test.cpp b/test/storage/buffer_manager_test.cpp deleted file mode 100644 index 0e4467e6df..0000000000 --- a/test/storage/buffer_manager_test.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "graph_test/graph_test.h" -#include "storage/buffer_manager/buffer_manager.h" - -using namespace kuzu::common; -using namespace kuzu::storage; -using namespace kuzu::testing; - -class BufferManagerTests : public Test { - -protected: - void SetUp() override { - FileUtils::createDir(TestHelper::getTmpTestDir()); - LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER); - LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE); - } - - void TearDown() override { - FileUtils::removeDir(TestHelper::getTmpTestDir()); - LoggerUtils::dropLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER); - LoggerUtils::dropLogger(LoggerConstants::LoggerEnum::STORAGE); - } -}; - -TEST_F(BufferManagerTests, RemoveFilePagesFromFramesTest) { - BufferManagedFileHandle fileHandle(std::string(TestHelper::getTmpTestDir()) + "bm_test.bin", - FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS, - BufferManagedFileHandle::FileVersionedType::NON_VERSIONED_FILE); - uint64_t numPagesToAdd = 1000; - for (int pageIdx = 0; pageIdx < numPagesToAdd; ++pageIdx) { - fileHandle.addNewPage(); - } - auto bufferManager = - std::make_unique(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::DEFAULT_PAGES_BUFFER_RATIO, - StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::LARGE_PAGES_BUFFER_RATIO); - // Pin and unpin some pages - bufferManager->pinWithoutReadingFromFile(fileHandle, 10); - bufferManager->pinWithoutReadingFromFile(fileHandle, 999); - for (int pageIdx = 0; pageIdx < numPagesToAdd; ++pageIdx) { - if (pageIdx == 10 || pageIdx == 999) { - ASSERT_TRUE(BufferManagedFileHandle::isAFrame(fileHandle.getFrameIdx(pageIdx))); - } else { - ASSERT_FALSE(BufferManagedFileHandle::isAFrame(fileHandle.getFrameIdx(pageIdx))); - } - } - bufferManager->unpin(fileHandle, 10); - bufferManager->unpin(fileHandle, 999); - bufferManager->removeFilePagesFromFrames(fileHandle); - for (int pageIdx = 0; pageIdx < numPagesToAdd; ++pageIdx) { - ASSERT_FALSE(BufferManagedFileHandle::isAFrame(fileHandle.getFrameIdx(pageIdx))); - } -} diff --git a/test/storage/wal_test.cpp b/test/storage/wal_test.cpp index 6d009346c0..2b52e07897 100644 --- a/test/storage/wal_test.cpp +++ b/test/storage/wal_test.cpp @@ -12,11 +12,8 @@ class WALTests : public Test { LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER); LoggerUtils::createLogger(LoggerConstants::LoggerEnum::WAL); LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE); - bufferManager = - std::make_unique(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::DEFAULT_PAGES_BUFFER_RATIO, - StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::LARGE_PAGES_BUFFER_RATIO); + bufferManager = std::make_unique( + BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING); wal = make_unique(TestHelper::getTmpTestDir(), *bufferManager); } diff --git a/test/transaction/transaction_manager_test.cpp b/test/transaction/transaction_manager_test.cpp index 032055494f..26545559d6 100644 --- a/test/transaction/transaction_manager_test.cpp +++ b/test/transaction/transaction_manager_test.cpp @@ -17,11 +17,8 @@ class TransactionManagerTest : public Test { LoggerUtils::createLogger(LoggerConstants::LoggerEnum::WAL); LoggerUtils::createLogger(LoggerConstants::LoggerEnum::TRANSACTION_MANAGER); LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE); - bufferManager = - std::make_unique(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::DEFAULT_PAGES_BUFFER_RATIO, - StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING * - StorageConstants::LARGE_PAGES_BUFFER_RATIO); + bufferManager = std::make_unique( + BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING); wal = std::make_unique(TestHelper::getTmpTestDir(), *bufferManager); transactionManager = std::make_unique(*wal); } diff --git a/test/transaction/transaction_test.cpp b/test/transaction/transaction_test.cpp index ddb22cfaaa..fa22826c81 100644 --- a/test/transaction/transaction_test.cpp +++ b/test/transaction/transaction_test.cpp @@ -21,7 +21,7 @@ class TransactionTests : public DBTest { } void initWithoutLoadingGraph() { - systemConfig->largePageBufferPoolSize = (1ull << 22); + systemConfig->bufferPoolSize = (1ull << 22); // Note we do not actually use the connection field in these tests. We only need the // database. createDBAndConn(); diff --git a/third_party/concurrentqueue/LICENSE.md b/third_party/concurrentqueue/LICENSE.md new file mode 100644 index 0000000000..519338976f --- /dev/null +++ b/third_party/concurrentqueue/LICENSE.md @@ -0,0 +1,62 @@ +This license file applies to everything in this repository except that which +is explicitly annotated as being written by other authors, i.e. the Boost +queue (included in the benchmarks for comparison), Intel's TBB library (ditto), +dlib::pipe (ditto), +the CDSChecker tool (used for verification), the Relacy model checker (ditto), +and Jeff Preshing's semaphore implementation (used in the blocking queue) which +has a zlib license (embedded in lightweightsempahore.h). + +--- + +Simplified BSD License: + +Copyright (c) 2013-2016, Cameron Desrochers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or other materials +provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +--- + +I have also chosen to dual-license under the Boost Software License as an alternative to +the Simplified BSD license above: + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/concurrentqueue/blockingconcurrentqueue.h b/third_party/concurrentqueue/blockingconcurrentqueue.h new file mode 100644 index 0000000000..205a4db707 --- /dev/null +++ b/third_party/concurrentqueue/blockingconcurrentqueue.h @@ -0,0 +1,582 @@ +// Provides an efficient blocking version of moodycamel::ConcurrentQueue. +// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified +// BSD license, available at the top of concurrentqueue.h. +// Also dual-licensed under the Boost Software License (see LICENSE.md) +// Uses Jeff Preshing's semaphore implementation (under the terms of its +// separate zlib license, see lightweightsemaphore.h). + +#pragma once + +#include "concurrentqueue.h" +#include "lightweightsemaphore.h" + +#include +#include +#include +#include +#include + +namespace moodycamel +{ +// This is a blocking version of the queue. It has an almost identical interface to +// the normal non-blocking version, with the addition of various wait_dequeue() methods +// and the removal of producer-specific dequeue methods. +template +class BlockingConcurrentQueue +{ +private: + typedef ::moodycamel::ConcurrentQueue ConcurrentQueue; + typedef ::moodycamel::LightweightSemaphore LightweightSemaphore; + +public: + typedef typename ConcurrentQueue::producer_token_t producer_token_t; + typedef typename ConcurrentQueue::consumer_token_t consumer_token_t; + + typedef typename ConcurrentQueue::index_t index_t; + typedef typename ConcurrentQueue::size_t size_t; + typedef typename std::make_signed::type ssize_t; + + static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE; + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD; + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE; + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE; + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE; + static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE; + +public: + // Creates a queue with at least `capacity` element slots; note that the + // actual number of elements that can be inserted without additional memory + // allocation depends on the number of producers and the block size (e.g. if + // the block size is equal to `capacity`, only a single block will be allocated + // up-front, which means only a single producer will be able to enqueue elements + // without an extra allocation -- blocks aren't shared between producers). + // This method is not thread safe -- it is up to the user to ensure that the + // queue is fully constructed before it starts being used by other threads (this + // includes making the memory effects of construction visible, possibly with a + // memory barrier). + explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE) + : inner(capacity), sema(create(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy) + { + assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); + if (!sema) { + MOODYCAMEL_THROW(std::bad_alloc()); + } + } + + BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) + : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy) + { + assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); + if (!sema) { + MOODYCAMEL_THROW(std::bad_alloc()); + } + } + + // Disable copying and copy assignment + BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + + // Moving is supported, but note that it is *not* a thread-safe operation. + // Nobody can use the queue while it's being moved, and the memory effects + // of that move must be propagated to other threads before they can use it. + // Note: When a queue is moved, its tokens are still valid but can only be + // used with the destination queue (i.e. semantically they are moved along + // with the queue itself). + BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + : inner(std::move(other.inner)), sema(std::move(other.sema)) + { } + + inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + { + return swap_internal(other); + } + + // Swaps this queue's state with the other's. Not thread-safe. + // Swapping two queues does not invalidate their tokens, however + // the tokens that were created for one queue must be used with + // only the swapped queue (i.e. the tokens are tied to the + // queue's movable state, not the object itself). + inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT + { + swap_internal(other); + } + +private: + BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other) + { + if (this == &other) { + return *this; + } + + inner.swap(other.inner); + sema.swap(other.sema); + return *this; + } + +public: + // Enqueues a single item (by copying it). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T const& item) + { + if ((details::likely)(inner.enqueue(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T&& item) + { + if ((details::likely)(inner.enqueue(std::move(item)))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T const& item) + { + if ((details::likely)(inner.enqueue(token, item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T&& item) + { + if ((details::likely)(inner.enqueue(token, std::move(item)))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues several items. + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved instead of copied. + // Thread-safe. + template + inline bool enqueue_bulk(It itemFirst, size_t count) + { + if ((details::likely)(inner.enqueue_bulk(std::forward(itemFirst), count))) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues several items using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails + // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + if ((details::likely)(inner.enqueue_bulk(token, std::forward(itemFirst), count))) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues a single item (by copying it). + // Does not allocate memory. Fails if not enough room to enqueue (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0). + // Thread-safe. + inline bool try_enqueue(T const& item) + { + if (inner.try_enqueue(item)) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible). + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Thread-safe. + inline bool try_enqueue(T&& item) + { + if (inner.try_enqueue(std::move(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T const& item) + { + if (inner.try_enqueue(token, item)) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T&& item) + { + if (inner.try_enqueue(token, std::move(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues several items. + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool try_enqueue_bulk(It itemFirst, size_t count) + { + if (inner.try_enqueue_bulk(std::forward(itemFirst), count)) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues several items using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + if (inner.try_enqueue_bulk(token, std::forward(itemFirst), count)) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue(U& item) + { + if (sema->tryWait()) { + while (!inner.try_dequeue(item)) { + continue; + } + return true; + } + return false; + } + + // Attempts to dequeue from the queue using an explicit consumer token. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue(consumer_token_t& token, U& item) + { + if (sema->tryWait()) { + while (!inner.try_dequeue(token, item)) { + continue; + } + return true; + } + return false; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + + + // Blocks the current thread until there's something to dequeue, then + // dequeues it. + // Never allocates. Thread-safe. + template + inline void wait_dequeue(U& item) + { + while (!sema->wait()) { + continue; + } + while (!inner.try_dequeue(item)) { + continue; + } + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout (specified in microseconds) expires. Returns false + // without setting `item` if the timeout expires, otherwise assigns + // to `item` and returns true. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs) + { + if (!sema->wait(timeout_usecs)) { + return false; + } + while (!inner.try_dequeue(item)) { + continue; + } + return true; + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout expires. Returns false without setting `item` if the + // timeout expires, otherwise assigns to `item` and returns true. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(U& item, std::chrono::duration const& timeout) + { + return wait_dequeue_timed(item, std::chrono::duration_cast(timeout).count()); + } + + // Blocks the current thread until there's something to dequeue, then + // dequeues it using an explicit consumer token. + // Never allocates. Thread-safe. + template + inline void wait_dequeue(consumer_token_t& token, U& item) + { + while (!sema->wait()) { + continue; + } + while (!inner.try_dequeue(token, item)) { + continue; + } + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout (specified in microseconds) expires. Returns false + // without setting `item` if the timeout expires, otherwise assigns + // to `item` and returns true. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs) + { + if (!sema->wait(timeout_usecs)) { + return false; + } + while (!inner.try_dequeue(token, item)) { + continue; + } + return true; + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout expires. Returns false without setting `item` if the + // timeout expires, otherwise assigns to `item` and returns true. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration const& timeout) + { + return wait_dequeue_timed(token, item, std::chrono::duration_cast(timeout).count()); + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which will + // always be at least one (this method blocks until the queue + // is non-empty) and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue_bulk. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration const& timeout) + { + return wait_dequeue_bulk_timed(itemFirst, max, std::chrono::duration_cast(timeout).count()); + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which will + // always be at least one (this method blocks until the queue + // is non-empty) and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue_bulk. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration const& timeout) + { + return wait_dequeue_bulk_timed(token, itemFirst, max, std::chrono::duration_cast(timeout).count()); + } + + + // Returns an estimate of the total number of elements currently in the queue. This + // estimate is only accurate if the queue has completely stabilized before it is called + // (i.e. all enqueue and dequeue operations have completed and their memory effects are + // visible on the calling thread, and no further operations start while this method is + // being called). + // Thread-safe. + inline size_t size_approx() const + { + return (size_t)sema->availableApprox(); + } + + + // Returns true if the underlying atomic variables used by + // the queue are lock-free (they should be on most platforms). + // Thread-safe. + static constexpr bool is_lock_free() + { + return ConcurrentQueue::is_lock_free(); + } + + +private: + template + static inline U* create(A1&& a1, A2&& a2) + { + void* p = (Traits::malloc)(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1), std::forward(a2)) : nullptr; + } + + template + static inline void destroy(U* p) + { + if (p != nullptr) { + p->~U(); + } + (Traits::free)(p); + } + +private: + ConcurrentQueue inner; + std::unique_ptr sema; +}; + + +template +inline void swap(BlockingConcurrentQueue& a, BlockingConcurrentQueue& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +} // end namespace moodycamel diff --git a/third_party/concurrentqueue/concurrentqueue.h b/third_party/concurrentqueue/concurrentqueue.h new file mode 100644 index 0000000000..4b2ad791d2 --- /dev/null +++ b/third_party/concurrentqueue/concurrentqueue.h @@ -0,0 +1,3747 @@ +// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. +// An overview, including benchmark results, is provided here: +// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ +// The full design is also described in excruciating detail at: +// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue + +// Simplified BSD license: +// Copyright (c) 2013-2020, Cameron Desrochers. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Also dual-licensed under the Boost Software License (see LICENSE.md) + +#pragma once + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and +// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings +// upon assigning any computed values) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" + +#ifdef MCDBGQ_USE_RELACY +#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" +#endif +#endif + +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher +// does not support `if constexpr`, so we have no choice but to simply disable the warning +#pragma warning(push) +#pragma warning(disable: 4127) // conditional expression is constant +#endif + +#if defined(__APPLE__) +#include "TargetConditionals.h" +#endif + +#ifdef MCDBGQ_USE_RELACY +#include "relacy/relacy_std.hpp" +#include "relacy_shims.h" +// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations. +// We'll override the default trait malloc ourselves without a macro. +#undef new +#undef delete +#undef malloc +#undef free +#else +#include // Requires C++11. Sorry VS2010. +#include +#endif +#include // for max_align_t +#include +#include +#include +#include +#include +#include +#include // for CHAR_BIT +#include +#include // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading +#include // used for thread exit synchronization + +// Platform-specific definitions of a numeric thread ID type and an invalid value +namespace moodycamel { namespace details { + template struct thread_id_converter { + typedef thread_id_t thread_id_numeric_size_t; + typedef thread_id_t thread_id_hash_t; + static thread_id_hash_t prehash(thread_id_t const& x) { return x; } + }; +} } +#if defined(MCDBGQ_USE_RELACY) +namespace moodycamel { namespace details { + typedef std::uint32_t thread_id_t; + static const thread_id_t invalid_thread_id = 0xFFFFFFFFU; + static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU; + static inline thread_id_t thread_id() { return rl::thread_index(); } +} } +#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__) +// No sense pulling in windows.h in a header, we'll manually declare the function +// we use and rely on backwards-compatibility for this not to break +extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void); +namespace moodycamel { namespace details { + static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows"); + typedef std::uint32_t thread_id_t; + static const thread_id_t invalid_thread_id = 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx + static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4. + static inline thread_id_t thread_id() { return static_cast(::GetCurrentThreadId()); } +} } +#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL) +namespace moodycamel { namespace details { + static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes"); + + typedef std::thread::id thread_id_t; + static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID + + // Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's + // only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't + // be. + static inline thread_id_t thread_id() { return std::this_thread::get_id(); } + + template struct thread_id_size { }; + template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; }; + template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; }; + + template<> struct thread_id_converter { + typedef thread_id_size::numeric_t thread_id_numeric_size_t; +#ifndef __APPLE__ + typedef std::size_t thread_id_hash_t; +#else + typedef thread_id_numeric_size_t thread_id_hash_t; +#endif + + static thread_id_hash_t prehash(thread_id_t const& x) + { +#ifndef __APPLE__ + return std::hash()(x); +#else + return *reinterpret_cast(&x); +#endif + } + }; +} } +#else +// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475 +// In order to get a numeric thread ID in a platform-independent way, we use a thread-local +// static variable's address as a thread identifier :-) +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define MOODYCAMEL_THREADLOCAL __thread +#elif defined(_MSC_VER) +#define MOODYCAMEL_THREADLOCAL __declspec(thread) +#else +// Assume C++11 compliant compiler +#define MOODYCAMEL_THREADLOCAL thread_local +#endif +namespace moodycamel { namespace details { + typedef std::uintptr_t thread_id_t; + static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr + static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned. + inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast(&x); } +} } +#endif + +// Constexpr if +#ifndef MOODYCAMEL_CONSTEXPR_IF +#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L +#define MOODYCAMEL_CONSTEXPR_IF if constexpr +#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]] +#else +#define MOODYCAMEL_CONSTEXPR_IF if +#define MOODYCAMEL_MAYBE_UNUSED +#endif +#endif + +// Exceptions +#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED +#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) +#define MOODYCAMEL_EXCEPTIONS_ENABLED +#endif +#endif +#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED +#define MOODYCAMEL_TRY try +#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__) +#define MOODYCAMEL_RETHROW throw +#define MOODYCAMEL_THROW(expr) throw (expr) +#else +#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true) +#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false) +#define MOODYCAMEL_RETHROW +#define MOODYCAMEL_THROW(expr) +#endif + +#ifndef MOODYCAMEL_NOEXCEPT +#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) +#define MOODYCAMEL_NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800 +// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-( +// We have to assume *all* non-trivial constructors may throw on VS2012! +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference::value && std::is_move_constructible::value ? std::is_trivially_move_constructible::value : std::is_trivially_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference::value && std::is_move_assignable::value ? std::is_trivially_move_assignable::value || std::is_nothrow_move_assignable::value : std::is_trivially_copy_assignable::value || std::is_nothrow_copy_assignable::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900 +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference::value && std::is_move_constructible::value ? std::is_trivially_move_constructible::value || std::is_nothrow_move_constructible::value : std::is_trivially_copy_constructible::value || std::is_nothrow_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference::value && std::is_move_assignable::value ? std::is_trivially_move_assignable::value || std::is_nothrow_move_assignable::value : std::is_trivially_copy_assignable::value || std::is_nothrow_copy_assignable::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)) +#else +#define MOODYCAMEL_NOEXCEPT noexcept +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr) +#endif +#endif + +#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#else +// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 +// g++ <=4.7 doesn't support thread_local either. +// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) +// Assume `thread_local` is fully supported in all other C++11 compilers/platforms +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // tentatively enabled for now; years ago several users report having problems with it on +#endif +#endif +#endif + +// VS2012 doesn't support deleted functions. +// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called. +#ifndef MOODYCAMEL_DELETE_FUNCTION +#if defined(_MSC_VER) && _MSC_VER < 1800 +#define MOODYCAMEL_DELETE_FUNCTION +#else +#define MOODYCAMEL_DELETE_FUNCTION = delete +#endif +#endif + +namespace moodycamel { namespace details { +#ifndef MOODYCAMEL_ALIGNAS +// VS2013 doesn't support alignas or alignof, and align() requires a constant literal +#if defined(_MSC_VER) && _MSC_VER <= 1800 +#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) +#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned::value, T>::type + template struct Vs2013Aligned { }; // default, unsupported alignment + template struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; }; + template struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; }; + template struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; }; + template struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; }; + template struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; }; + template struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; }; + template struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; }; + template struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; }; + template struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; }; +#else + template struct identity { typedef T type; }; +#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) +#define MOODYCAMEL_ALIGNOF(obj) alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity::type +#endif +#endif +} } + + +// TSAN can false report races in lock-free code. To enable TSAN to be used from projects that use this one, +// we can apply per-function compile-time suppression. +// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer +#define MOODYCAMEL_NO_TSAN +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #undef MOODYCAMEL_NO_TSAN + #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread"))) + #endif // TSAN +#endif // TSAN + +// Compiler-specific likely/unlikely hints +namespace moodycamel { namespace details { +#if defined(__GNUC__) + static inline bool (likely)(bool x) { return __builtin_expect((x), true); } + static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); } +#else + static inline bool (likely)(bool x) { return x; } + static inline bool (unlikely)(bool x) { return x; } +#endif +} } + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG +#include "internal/concurrentqueue_internal_debug.h" +#endif + +namespace moodycamel { +namespace details { + template + struct const_numeric_max { + static_assert(std::is_integral::value, "const_numeric_max can only be used with integers"); + static const T value = std::numeric_limits::is_signed + ? (static_cast(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast(1) + : static_cast(-1); + }; + +#if defined(__GLIBCXX__) + typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while +#else + typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: +#endif + + // Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting + // 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64. + typedef union { + std_max_align_t x; + long long y; + void* z; + } max_align_t; +} + +// Default traits for the ConcurrentQueue. To change some of the +// traits without re-implementing all of them, inherit from this +// struct and shadow the declarations you wish to be different; +// since the traits are used as a template type parameter, the +// shadowed declarations will be used where defined, and the defaults +// otherwise. +struct ConcurrentQueueDefaultTraits +{ + // General-purpose size type. std::size_t is strongly recommended. + typedef std::size_t size_t; + + // The type used for the enqueue and dequeue indices. Must be at least as + // large as size_t. Should be significantly larger than the number of elements + // you expect to hold at once, especially if you have a high turnover rate; + // for example, on 32-bit x86, if you expect to have over a hundred million + // elements or pump several million elements through your queue in a very + // short space of time, using a 32-bit type *may* trigger a race condition. + // A 64-bit int type is recommended in that case, and in practice will + // prevent a race condition no matter the usage of the queue. Note that + // whether the queue is lock-free with a 64-int type depends on the whether + // std::atomic is lock-free, which is platform-specific. + typedef std::size_t index_t; + + // Internally, all elements are enqueued and dequeued from multi-element + // blocks; this is the smallest controllable unit. If you expect few elements + // but many producers, a smaller block size should be favoured. For few producers + // and/or many elements, a larger block size is preferred. A sane default + // is provided. Must be a power of 2. + static const size_t BLOCK_SIZE = 32; + + // For explicit producers (i.e. when using a producer token), the block is + // checked for being empty by iterating through a list of flags, one per element. + // For large block sizes, this is too inefficient, and switching to an atomic + // counter-based approach is faster. The switch is made for block sizes strictly + // larger than this threshold. + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; + + // How many full blocks can be expected for a single explicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; + + // How many full blocks can be expected for a single implicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; + + // The initial size of the hash table mapping thread IDs to implicit producers. + // Note that the hash is resized every time it becomes half full. + // Must be a power of two, and either 0 or at least 1. If 0, implicit production + // (using the enqueue methods without an explicit producer token) is disabled. + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; + + // Controls the number of items that an explicit consumer (i.e. one with a token) + // must consume before it causes all consumers to rotate and move on to the next + // internal queue. + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; + + // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. + // Enqueue operations that would cause this limit to be surpassed will fail. Note + // that this limit is enforced at the block level (for performance reasons), i.e. + // it's rounded up to the nearest block size. + static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max::value; + + // The number of times to spin before sleeping when waiting on a semaphore. + // Recommended values are on the order of 1000-10000 unless the number of + // consumer threads exceeds the number of idle cores (in which case try 0-100). + // Only affects instances of the BlockingConcurrentQueue. + static const int MAX_SEMA_SPINS = 10000; + + // Whether to recycle dynamically-allocated blocks into an internal free list or + // not. If false, only pre-allocated blocks (controlled by the constructor + // arguments) will be recycled, and all others will be `free`d back to the heap. + // Note that blocks consumed by explicit producers are only freed on destruction + // of the queue (not following destruction of the token) regardless of this trait. + static const bool RECYCLE_ALLOCATED_BLOCKS = false; + + +#ifndef MCDBGQ_USE_RELACY + // Memory allocation can be customized if needed. + // malloc should return nullptr on failure, and handle alignment like std::malloc. +#if defined(malloc) || defined(free) + // Gah, this is 2015, stop defining macros that break standard code already! + // Work around malloc/free being special macros: + static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); } + static inline void WORKAROUND_free(void* ptr) { return free(ptr); } + static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); } + static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); } +#else + static inline void* malloc(size_t size) { return std::malloc(size); } + static inline void free(void* ptr) { return std::free(ptr); } +#endif +#else + // Debug versions when running under the Relacy race detector (ignore + // these in user code) + static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); } + static inline void free(void* ptr) { return rl::rl_free(ptr, $); } +#endif +}; + + +// When producing or consuming many elements, the most efficient way is to: +// 1) Use one of the bulk-operation methods of the queue with a token +// 2) Failing that, use the bulk-operation methods without a token +// 3) Failing that, create a token and use that with the single-item methods +// 4) Failing that, use the single-parameter methods of the queue +// Having said that, don't create tokens willy-nilly -- ideally there should be +// a maximum of one token per thread (of each kind). +struct ProducerToken; +struct ConsumerToken; + +template class ConcurrentQueue; +template class BlockingConcurrentQueue; +class ConcurrentQueueTests; + + +namespace details +{ + struct ConcurrentQueueProducerTypelessBase + { + ConcurrentQueueProducerTypelessBase* next; + std::atomic inactive; + ProducerToken* token; + + ConcurrentQueueProducerTypelessBase() + : next(nullptr), inactive(false), token(nullptr) + { + } + }; + + template struct _hash_32_or_64 { + static inline std::uint32_t hash(std::uint32_t h) + { + // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + // Since the thread ID is already unique, all we really want to do is propagate that + // uniqueness evenly across all the bits, so that we can use a subset of the bits while + // reducing collisions significantly + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + return h ^ (h >> 16); + } + }; + template<> struct _hash_32_or_64<1> { + static inline std::uint64_t hash(std::uint64_t h) + { + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + return h ^ (h >> 33); + } + }; + template struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { }; + + static inline size_t hash_thread_id(thread_id_t id) + { + static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values"); + return static_cast(hash_32_or_64::thread_id_hash_t)>::hash( + thread_id_converter::prehash(id))); + } + + template + static inline bool circular_less_than(T a, T b) + { + static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "circular_less_than is intended to be used only with unsigned integer types"); + return static_cast(a - b) > static_cast(static_cast(1) << (static_cast(sizeof(T) * CHAR_BIT - 1))); + // Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931 + // silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here. + } + + template + static inline char* align_for(char* ptr) + { + const std::size_t alignment = std::alignment_of::value; + return ptr + (alignment - (reinterpret_cast(ptr) % alignment)) % alignment; + } + + template + static inline T ceil_to_pow_2(T x) + { + static_assert(std::is_integral::value && !std::numeric_limits::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types"); + + // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + for (std::size_t i = 1; i < sizeof(T); i <<= 1) { + x |= x >> (i << 3); + } + ++x; + return x; + } + + template + static inline void swap_relaxed(std::atomic& left, std::atomic& right) + { + T temp = std::move(left.load(std::memory_order_relaxed)); + left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); + right.store(std::move(temp), std::memory_order_relaxed); + } + + template + static inline T const& nomove(T const& x) + { + return x; + } + + template + struct nomove_if + { + template + static inline T const& eval(T const& x) + { + return x; + } + }; + + template<> + struct nomove_if + { + template + static inline auto eval(U&& x) + -> decltype(std::forward(x)) + { + return std::forward(x); + } + }; + + template + static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it) + { + return *it; + } + +#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) + template struct is_trivially_destructible : std::is_trivially_destructible { }; +#else + template struct is_trivially_destructible : std::has_trivial_destructor { }; +#endif + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY + typedef RelacyThreadExitListener ThreadExitListener; + typedef RelacyThreadExitNotifier ThreadExitNotifier; +#else + class ThreadExitNotifier; + + struct ThreadExitListener + { + typedef void (*callback_t)(void*); + callback_t callback; + void* userData; + + ThreadExitListener* next; // reserved for use by the ThreadExitNotifier + ThreadExitNotifier* chain; // reserved for use by the ThreadExitNotifier + }; + + class ThreadExitNotifier + { + public: + static void subscribe(ThreadExitListener* listener) + { + auto& tlsInst = instance(); + std::lock_guard guard(mutex()); + listener->next = tlsInst.tail; + listener->chain = &tlsInst; + tlsInst.tail = listener; + } + + static void unsubscribe(ThreadExitListener* listener) + { + std::lock_guard guard(mutex()); + if (!listener->chain) { + return; // race with ~ThreadExitNotifier + } + auto& tlsInst = *listener->chain; + listener->chain = nullptr; + ThreadExitListener** prev = &tlsInst.tail; + for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) { + if (ptr == listener) { + *prev = ptr->next; + break; + } + prev = &ptr->next; + } + } + + private: + ThreadExitNotifier() : tail(nullptr) { } + ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; + ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; + + ~ThreadExitNotifier() + { + // This thread is about to exit, let everyone know! + assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined."); + std::lock_guard guard(mutex()); + for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) { + ptr->chain = nullptr; + ptr->callback(ptr->userData); + } + } + + // Thread-local + static inline ThreadExitNotifier& instance() + { + static thread_local ThreadExitNotifier notifier; + return notifier; + } + + static inline std::mutex& mutex() + { + // Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called + static std::mutex mutex; + return mutex; + } + + private: + ThreadExitListener* tail; + }; +#endif +#endif + + template struct static_is_lock_free_num { enum { value = 0 }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_CHAR_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_SHORT_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_INT_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_LONG_LOCK_FREE }; }; + template<> struct static_is_lock_free_num { enum { value = ATOMIC_LLONG_LOCK_FREE }; }; + template struct static_is_lock_free : static_is_lock_free_num::type> { }; + template<> struct static_is_lock_free { enum { value = ATOMIC_BOOL_LOCK_FREE }; }; + template struct static_is_lock_free { enum { value = ATOMIC_POINTER_LOCK_FREE }; }; +} + + +struct ProducerToken +{ + template + explicit ProducerToken(ConcurrentQueue& queue); + + template + explicit ProducerToken(BlockingConcurrentQueue& queue); + + ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT + : producer(other.producer) + { + other.producer = nullptr; + if (producer != nullptr) { + producer->token = this; + } + } + + inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT + { + std::swap(producer, other.producer); + if (producer != nullptr) { + producer->token = this; + } + if (other.producer != nullptr) { + other.producer->token = &other; + } + } + + // A token is always valid unless: + // 1) Memory allocation failed during construction + // 2) It was moved via the move constructor + // (Note: assignment does a swap, leaving both potentially valid) + // 3) The associated queue was destroyed + // Note that if valid() returns true, that only indicates + // that the token is valid for use with a specific queue, + // but not which one; that's up to the user to track. + inline bool valid() const { return producer != nullptr; } + + ~ProducerToken() + { + if (producer != nullptr) { + producer->token = nullptr; + producer->inactive.store(true, std::memory_order_release); + } + } + + // Disable copying and assignment + ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; + ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; + +private: + template friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + +protected: + details::ConcurrentQueueProducerTypelessBase* producer; +}; + + +struct ConsumerToken +{ + template + explicit ConsumerToken(ConcurrentQueue& q); + + template + explicit ConsumerToken(BlockingConcurrentQueue& q); + + ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT + : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer) + { + } + + inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT + { + std::swap(initialOffset, other.initialOffset); + std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset); + std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent); + std::swap(currentProducer, other.currentProducer); + std::swap(desiredProducer, other.desiredProducer); + } + + // Disable copying and assignment + ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; + ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; + +private: + template friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + +private: // but shared with ConcurrentQueue + std::uint32_t initialOffset; + std::uint32_t lastKnownGlobalOffset; + std::uint32_t itemsConsumedFromCurrent; + details::ConcurrentQueueProducerTypelessBase* currentProducer; + details::ConcurrentQueueProducerTypelessBase* desiredProducer; +}; + +// Need to forward-declare this swap because it's in a namespace. +// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, typename ConcurrentQueue::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT; + + +template +class ConcurrentQueue +{ +public: + typedef ::moodycamel::ProducerToken producer_token_t; + typedef ::moodycamel::ConsumerToken consumer_token_t; + + typedef typename Traits::index_t index_t; + typedef typename Traits::size_t size_t; + + static const size_t BLOCK_SIZE = static_cast(Traits::BLOCK_SIZE); + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::EXPLICIT_INITIAL_INDEX_SIZE); + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::IMPLICIT_INITIAL_INDEX_SIZE); + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE); + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!) +#pragma warning(disable: 4309) // static_cast: Truncation of constant value +#endif + static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max::value - static_cast(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max::value : ((static_cast(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::size_t must be an unsigned integral type"); + static_assert(!std::numeric_limits::is_signed && std::is_integral::value, "Traits::index_t must be an unsigned integral type"); + static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t"); + static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)"); + static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)"); + static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); + static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); + static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2"); + static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)"); + +public: + // Creates a queue with at least `capacity` element slots; note that the + // actual number of elements that can be inserted without additional memory + // allocation depends on the number of producers and the block size (e.g. if + // the block size is equal to `capacity`, only a single block will be allocated + // up-front, which means only a single producer will be able to enqueue elements + // without an extra allocation -- blocks aren't shared between producers). + // This method is not thread safe -- it is up to the user to ensure that the + // queue is fully constructed before it starts being used by other threads (this + // includes making the memory effects of construction visible, possibly with a + // memory barrier). + explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) + { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + // Track all the producers using a fully-resolved typed list for + // each kind; this makes it possible to debug them starting from + // the root queue object (otherwise wacky casts are needed that + // don't compile in the debugger's expression evaluator). + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Computes the correct amount of pre-allocated blocks for you based + // on the minimum number of elements you want available at any given + // time, and the maximum concurrent number of each type of producer. + ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) + { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers); + populate_initial_block_list(blocks); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Note: The queue should not be accessed concurrently while it's + // being deleted. It's up to the user to synchronize this. + // This method is not thread safe. + ~ConcurrentQueue() + { + // Destroy producers + auto ptr = producerListTail.load(std::memory_order_relaxed); + while (ptr != nullptr) { + auto next = ptr->next_prod(); + if (ptr->token != nullptr) { + ptr->token->producer = nullptr; + } + destroy(ptr); + ptr = next; + } + + // Destroy implicit producer hash tables + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { + auto hash = implicitProducerHash.load(std::memory_order_relaxed); + while (hash != nullptr) { + auto prev = hash->prev; + if (prev != nullptr) { // The last hash is part of this object and was not allocated dynamically + for (size_t i = 0; i != hash->capacity; ++i) { + hash->entries[i].~ImplicitProducerKVP(); + } + hash->~ImplicitProducerHash(); + (Traits::free)(hash); + } + hash = prev; + } + } + + // Destroy global free list + auto block = freeList.head_unsafe(); + while (block != nullptr) { + auto next = block->freeListNext.load(std::memory_order_relaxed); + if (block->dynamicallyAllocated) { + destroy(block); + } + block = next; + } + + // Destroy initial free list + destroy_array(initialBlockPool, initialBlockPoolSize); + } + + // Disable copying and copy assignment + ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + + // Moving is supported, but note that it is *not* a thread-safe operation. + // Nobody can use the queue while it's being moved, and the memory effects + // of that move must be propagated to other threads before they can use it. + // Note: When a queue is moved, its tokens are still valid but can only be + // used with the destination queue (i.e. semantically they are moved along + // with the queue itself). + ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + : producerListTail(other.producerListTail.load(std::memory_order_relaxed)), + producerCount(other.producerCount.load(std::memory_order_relaxed)), + initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)), + initialBlockPool(other.initialBlockPool), + initialBlockPoolSize(other.initialBlockPoolSize), + freeList(std::move(other.freeList)), + nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)), + globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)) + { + // Move the other one into this, and leave the other one as an empty queue + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + swap_implicit_producer_hashes(other); + + other.producerListTail.store(nullptr, std::memory_order_relaxed); + other.producerCount.store(0, std::memory_order_relaxed); + other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); + other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + + other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); + other.initialBlockPoolSize = 0; + other.initialBlockPool = nullptr; + + reown_producers(); + } + + inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + { + return swap_internal(other); + } + + // Swaps this queue's state with the other's. Not thread-safe. + // Swapping two queues does not invalidate their tokens, however + // the tokens that were created for one queue must be used with + // only the swapped queue (i.e. the tokens are tied to the + // queue's movable state, not the object itself). + inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT + { + swap_internal(other); + } + +private: + ConcurrentQueue& swap_internal(ConcurrentQueue& other) + { + if (this == &other) { + return *this; + } + + details::swap_relaxed(producerListTail, other.producerListTail); + details::swap_relaxed(producerCount, other.producerCount); + details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); + std::swap(initialBlockPool, other.initialBlockPool); + std::swap(initialBlockPoolSize, other.initialBlockPoolSize); + freeList.swap(other.freeList); + details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); + details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset); + + swap_implicit_producer_hashes(other); + + reown_producers(); + other.reown_producers(); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + details::swap_relaxed(explicitProducers, other.explicitProducers); + details::swap_relaxed(implicitProducers, other.implicitProducers); +#endif + + return *this; + } + +public: + // Enqueues a single item (by copying it). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T const& item) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(item); + } + + // Enqueues a single item (by moving it, if possible). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T&& item) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(std::move(item)); + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T const& item) + { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T&& item) + { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved instead of copied. + // Thread-safe. + template + bool enqueue_bulk(It itemFirst, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue_bulk(itemFirst, count); + } + + // Enqueues several items using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails + // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return inner_enqueue_bulk(token, itemFirst, count); + } + + // Enqueues a single item (by copying it). + // Does not allocate memory. Fails if not enough room to enqueue (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0). + // Thread-safe. + inline bool try_enqueue(T const& item) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(item); + } + + // Enqueues a single item (by moving it, if possible). + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Thread-safe. + inline bool try_enqueue(T&& item) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue(std::move(item)); + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T const& item) + { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T&& item) + { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(It itemFirst, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else return inner_enqueue_bulk(itemFirst, count); + } + + // Enqueues several items using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return inner_enqueue_bulk(token, itemFirst, count); + } + + + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(U& item) + { + // Instead of simply trying each producer in turn (which could cause needless contention on the first + // producer), we score them heuristically. + size_t nonEmptyCount = 0; + ProducerBase* best = nullptr; + size_t bestSize = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) { + auto size = ptr->size_approx(); + if (size > 0) { + if (size > bestSize) { + bestSize = size; + best = ptr; + } + ++nonEmptyCount; + } + } + + // If there was at least one non-empty queue but it appears empty at the time + // we try to dequeue from it, we need to make sure every queue's been tried + if (nonEmptyCount > 0) { + if ((details::likely)(best->dequeue(item))) { + return true; + } + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr != best && ptr->dequeue(item)) { + return true; + } + } + } + return false; + } + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // This differs from the try_dequeue(item) method in that this one does + // not attempt to reduce contention by interleaving the order that producer + // streams are dequeued from. So, using this method can reduce overall throughput + // under contention, but will give more predictable results in single-threaded + // consumer scenarios. This is mostly only useful for internal unit tests. + // Never allocates. Thread-safe. + template + bool try_dequeue_non_interleaved(U& item) + { + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->dequeue(item)) { + return true; + } + } + return false; + } + + // Attempts to dequeue from the queue using an explicit consumer token. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(consumer_token_t& token, U& item) + { + // The idea is roughly as follows: + // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less + // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place + // If there's no items where you're supposed to be, keep moving until you find a producer with some items + // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it + + if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return false; + } + } + + // If there was at least one non-empty queue but it appears empty at the time + // we try to dequeue from it, we need to make sure every queue's been tried + if (static_cast(token.currentProducer)->dequeue(item)) { + if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return true; + } + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + if (ptr->dequeue(item)) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = 1; + return true; + } + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return false; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + count += ptr->dequeue_bulk(itemFirst, max - count); + if (count == max) { + break; + } + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return 0; + } + } + + size_t count = static_cast(token.currentProducer)->dequeue_bulk(itemFirst, max); + if (count == max) { + if ((token.itemsConsumedFromCurrent += static_cast(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return max; + } + token.itemsConsumedFromCurrent += static_cast(count); + max -= count; + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + auto dequeued = ptr->dequeue_bulk(itemFirst, max); + count += dequeued; + if (dequeued != 0) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = static_cast(dequeued); + } + if (dequeued == max) { + break; + } + max -= dequeued; + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return count; + } + + + + // Attempts to dequeue from a specific producer's inner queue. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns false if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item) + { + return static_cast(producer.producer)->dequeue(item); + } + + // Attempts to dequeue several elements from a specific producer's inner queue. + // Returns the number of items actually dequeued. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns 0 if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max) + { + return static_cast(producer.producer)->dequeue_bulk(itemFirst, max); + } + + + // Returns an estimate of the total number of elements currently in the queue. This + // estimate is only accurate if the queue has completely stabilized before it is called + // (i.e. all enqueue and dequeue operations have completed and their memory effects are + // visible on the calling thread, and no further operations start while this method is + // being called). + // Thread-safe. + size_t size_approx() const + { + size_t size = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + size += ptr->size_approx(); + } + return size; + } + + + // Returns true if the underlying atomic variables used by + // the queue are lock-free (they should be on most platforms). + // Thread-safe. + static constexpr bool is_lock_free() + { + return + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::value == 2 && + details::static_is_lock_free::thread_id_numeric_size_t>::value == 2; + } + + +private: + friend struct ProducerToken; + friend struct ConsumerToken; + struct ExplicitProducer; + friend struct ExplicitProducer; + struct ImplicitProducer; + friend struct ImplicitProducer; + friend class ConcurrentQueueTests; + + enum AllocationMode { CanAlloc, CannotAlloc }; + + + /////////////////////////////// + // Queue methods + /////////////////////////////// + + template + inline bool inner_enqueue(producer_token_t const& token, U&& element) + { + return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue(std::forward(element)); + } + + template + inline bool inner_enqueue(U&& element) + { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue(std::forward(element)); + } + + template + inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk(itemFirst, count); + } + + template + inline bool inner_enqueue_bulk(It itemFirst, size_t count) + { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk(itemFirst, count); + } + + inline bool update_current_producer_after_rotation(consumer_token_t& token) + { + // Ah, there's been a rotation, figure out where we should be! + auto tail = producerListTail.load(std::memory_order_acquire); + if (token.desiredProducer == nullptr && tail == nullptr) { + return false; + } + auto prodCount = producerCount.load(std::memory_order_relaxed); + auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); + if ((details::unlikely)(token.desiredProducer == nullptr)) { + // Aha, first time we're dequeueing anything. + // Figure out our local position + // Note: offset is from start, not end, but we're traversing from end -- subtract from count first + std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); + token.desiredProducer = tail; + for (std::uint32_t i = 0; i != offset; ++i) { + token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + } + + std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; + if (delta >= prodCount) { + delta = delta % prodCount; + } + for (std::uint32_t i = 0; i != delta; ++i) { + token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + + token.lastKnownGlobalOffset = globalOffset; + token.currentProducer = token.desiredProducer; + token.itemsConsumedFromCurrent = 0; + return true; + } + + + /////////////////////////// + // Free list + /////////////////////////// + + template + struct FreeListNode + { + FreeListNode() : freeListRefs(0), freeListNext(nullptr) { } + + std::atomic freeListRefs; + std::atomic freeListNext; + }; + + // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but + // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly + // speedy under low contention. + template // N must inherit FreeListNode or have the same fields (and initialization of them) + struct FreeList + { + FreeList() : freeListHead(nullptr) { } + FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); } + void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); } + + FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; + FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; + + inline void add(N* node) + { +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to + // set it using a fetch_add + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) { + // Oh look! We were the last ones referencing this node, and we know + // we want to add it to the free list, so let's do it! + add_knowing_refcount_is_zero(node); + } + } + + inline N* try_get() + { +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + auto head = freeListHead.load(std::memory_order_acquire); + while (head != nullptr) { + auto prevHead = head; + auto refs = head->freeListRefs.load(std::memory_order_relaxed); + if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) { + head = freeListHead.load(std::memory_order_acquire); + continue; + } + + // Good, reference count has been incremented (it wasn't at zero), which means we can read the + // next and not worry about it changing between now and the time we do the CAS + auto next = head->freeListNext.load(std::memory_order_relaxed); + if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) { + // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no + // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). + assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); + + // Decrease refcount twice, once for our ref, and once for the list's ref + head->freeListRefs.fetch_sub(2, std::memory_order_release); + return head; + } + + // OK, the head must have changed on us, but we still need to decrease the refcount we increased. + // Note that we don't need to release any memory effects, but we do need to ensure that the reference + // count decrement happens-after the CAS on the head. + refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); + if (refs == SHOULD_BE_ON_FREELIST + 1) { + add_knowing_refcount_is_zero(prevHead); + } + } + + return nullptr; + } + + // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) + N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } + + private: + inline void add_knowing_refcount_is_zero(N* node) + { + // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run + // only one copy of this method per node at a time, i.e. the single thread case), then we know + // we can safely change the next pointer of the node; however, once the refcount is back above + // zero, then other threads could increase it (happens under heavy contention, when the refcount + // goes to zero in between a load and a refcount increment of a node in try_get, then back up to + // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS + // to add the node to the actual list fails, decrease the refcount and leave the add operation to + // the next thread who puts the refcount back at zero (which could be us, hence the loop). + auto head = freeListHead.load(std::memory_order_relaxed); + while (true) { + node->freeListNext.store(head, std::memory_order_relaxed); + node->freeListRefs.store(1, std::memory_order_release); + if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) { + // Hmm, the add failed, but we can only try again when the refcount goes back to zero + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) { + continue; + } + } + return; + } + } + + private: + // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) + std::atomic freeListHead; + + static const std::uint32_t REFS_MASK = 0x7FFFFFFF; + static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; + +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugMutex mutex; +#endif + }; + + + /////////////////////////// + // Block + /////////////////////////// + + enum InnerQueueContext { implicit_context = 0, explicit_context = 1 }; + + struct Block + { + Block() + : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true) + { +#ifdef MCDBGQ_TRACKMEM + owner = nullptr; +#endif + } + + template + inline bool is_empty() const + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Check flags + for (size_t i = 0; i < BLOCK_SIZE; ++i) { + if (!emptyFlags[i].load(std::memory_order_relaxed)) { + return false; + } + } + + // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + else { + // Check counter + if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) { + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE); + return false; + } + } + + // Returns true if the block is now empty (does not apply in explicit context) + template + inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flag + assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); + emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].store(true, std::memory_order_release); + return false; + } + else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release); + assert(prevVal < BLOCK_SIZE); + return prevVal == BLOCK_SIZE - 1; + } + } + + // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). + // Returns true if the block is now empty (does not apply in explicit context). + template + inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flags + std::atomic_thread_fence(std::memory_order_release); + i = BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1)) - count + 1; + for (size_t j = 0; j != count; ++j) { + assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); + emptyFlags[i + j].store(true, std::memory_order_relaxed); + } + return false; + } + else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release); + assert(prevVal + count <= BLOCK_SIZE); + return prevVal + count == BLOCK_SIZE; + } + } + + template + inline void set_all_empty() + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set all flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(true, std::memory_order_relaxed); + } + } + else { + // Reset counter + elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); + } + } + + template + inline void reset_empty() + { + MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Reset flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(false, std::memory_order_relaxed); + } + } + else { + // Reset counter + elementsCompletelyDequeued.store(0, std::memory_order_relaxed); + } + } + + inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } + inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); } + + private: + static_assert(std::alignment_of::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time"); + MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; + public: + Block* next; + std::atomic elementsCompletelyDequeued; + std::atomic emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1]; + public: + std::atomic freeListRefs; + std::atomic freeListNext; + bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' + +#ifdef MCDBGQ_TRACKMEM + void* owner; +#endif + }; + static_assert(std::alignment_of::value >= std::alignment_of::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping"); + + +#ifdef MCDBGQ_TRACKMEM +public: + struct MemStats; +private: +#endif + + /////////////////////////// + // Producer base + /////////////////////////// + + struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase + { + ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) : + tailIndex(0), + headIndex(0), + dequeueOptimisticCount(0), + dequeueOvercommit(0), + tailBlock(nullptr), + isExplicit(isExplicit_), + parent(parent_) + { + } + + virtual ~ProducerBase() { } + + template + inline bool dequeue(U& element) + { + if (isExplicit) { + return static_cast(this)->dequeue(element); + } + else { + return static_cast(this)->dequeue(element); + } + } + + template + inline size_t dequeue_bulk(It& itemFirst, size_t max) + { + if (isExplicit) { + return static_cast(this)->dequeue_bulk(itemFirst, max); + } + else { + return static_cast(this)->dequeue_bulk(itemFirst, max); + } + } + + inline ProducerBase* next_prod() const { return static_cast(next); } + + inline size_t size_approx() const + { + auto tail = tailIndex.load(std::memory_order_relaxed); + auto head = headIndex.load(std::memory_order_relaxed); + return details::circular_less_than(head, tail) ? static_cast(tail - head) : 0; + } + + inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } + protected: + std::atomic tailIndex; // Where to enqueue to next + std::atomic headIndex; // Where to dequeue from next + + std::atomic dequeueOptimisticCount; + std::atomic dequeueOvercommit; + + Block* tailBlock; + + public: + bool isExplicit; + ConcurrentQueue* parent; + + protected: +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + /////////////////////////// + // Explicit queue + /////////////////////////// + + struct ExplicitProducer : public ProducerBase + { + explicit ExplicitProducer(ConcurrentQueue* parent_) : + ProducerBase(parent_, true), + blockIndex(nullptr), + pr_blockIndexSlotsUsed(0), + pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), + pr_blockIndexFront(0), + pr_blockIndexEntries(nullptr), + pr_blockIndexRaw(nullptr) + { + size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1; + if (poolBasedIndexSize > pr_blockIndexSize) { + pr_blockIndexSize = poolBasedIndexSize; + } + + new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE + } + + ~ExplicitProducer() + { + // Destruct any elements not yet dequeued. + // Since we're in the destructor, we can assume all elements + // are either completely dequeued or completely not (no halfways). + if (this->tailBlock != nullptr) { // Note this means there must be a block index too + // First find the block that's partially dequeued, if any + Block* halfDequeuedBlock = nullptr; + if ((this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) != 0) { + // The head's not on a block boundary, meaning a block somewhere is partially dequeued + // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary) + size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1); + while (details::circular_less_than(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) { + i = (i + 1) & (pr_blockIndexSize - 1); + } + assert(details::circular_less_than(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed))); + halfDequeuedBlock = pr_blockIndexEntries[i].block; + } + + // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) + auto block = this->tailBlock; + do { + block = block->next; + if (block->ConcurrentQueue::Block::template is_empty()) { + continue; + } + + size_t i = 0; // Offset into block + if (block == halfDequeuedBlock) { + i = static_cast(this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); + } + + // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index + auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast(this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); + while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) { + (*block)[i++]->~T(); + } + } while (block != this->tailBlock); + } + + // Destroy all blocks that we own + if (this->tailBlock != nullptr) { + auto block = this->tailBlock; + do { + auto nextBlock = block->next; + this->parent->add_block_to_free_list(block); + block = nextBlock; + } while (block != this->tailBlock); + } + + // Destroy the block indices + auto header = static_cast(pr_blockIndexRaw); + while (header != nullptr) { + auto prev = static_cast(header->prev); + header->~BlockIndexHeader(); + (Traits::free)(header); + header = prev; + } + } + + template + inline bool enqueue(U&& element) + { + index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto startBlock = this->tailBlock; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { + // We can re-use the block ahead of us, it's empty! + this->tailBlock = this->tailBlock->next; + this->tailBlock->ConcurrentQueue::Block::template reset_empty(); + + // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the + // last block from it first -- except instead of removing then adding, we can just overwrite). + // Note that there must be a valid block index here, since even if allocation failed in the ctor, + // it would have been re-attempted when adding the first block to the queue; since there is such + // a block, a block index must have been successfully allocated. + } + else { + // Whatever head value we see here is >= the last value we saw here (relatively), + // and <= its current value. Since we have the most recent tail, the head must be + // <= to it. + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) + || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + // We can't enqueue in another block because there's not enough leeway -- the + // tail could surpass the head by the time the block fills up! (Or we'll exceed + // the size limit, if the second part of the condition was true.) + return false; + } + // We're going to need a new block; check that the block index has room + if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) { + // Hmm, the circular block index is already full -- we'll need + // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if + // the initial allocation failed in the constructor. + + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index(pr_blockIndexSlotsUsed)) { + return false; + } + } + + // Insert a new block in the circular linked list + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + return false; + } +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } + else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + ++pr_blockIndexSlotsUsed; + } + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + // The constructor may throw. We want the element not to appear in the queue in + // that case (without corrupting the queue): + MOODYCAMEL_TRY { + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH (...) { + // Revert change to the current block, but leave the new block available + // for next time + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock; + MOODYCAMEL_RETHROW; + } + } + else { + (void)startBlock; + (void)originalBlockIndexSlotsUsed; + } + + // Add block to block index + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U& element) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { + // Might be something to dequeue, let's give it a try + + // Note that this if is purely for performance purposes in the common case when the queue is + // empty and the values are eventually consistent -- we may enter here spuriously. + + // Note that whatever the values of overcommit and tail are, they are not going to change (unless we + // change them) and must be the same value at this point (inside the if) as when the if condition was + // evaluated. + + // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below. + // This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in + // the fetch_add below will result in a value at least as recent as that (and therefore at least as large). + // Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all + // read-modify-write operations are guaranteed to work on the latest value in the modification order), but + // unfortunately that can't be shown to be correct using only the C++11 standard. + // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case + std::atomic_thread_fence(std::memory_order_acquire); + + // Increment optimistic counter, then check if it went over the boundary + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); + + // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever + // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now + // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon + // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount. + // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently) + // overflow; in such a case, though, the logic still holds since the difference between the two is maintained. + + // Note that we reload tail here in case it changed; it will be the same value as before or greater, since + // this load is sequenced after (happens after) the earlier load above. This is supported by read-read + // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { + // Guaranteed to be at least one element to dequeue! + + // Get the index. Note that since there's guaranteed to be at least one element, this + // will never exceed tail. We need to do an acquire-release fence here since it's possible + // that whatever condition got us to this point was for an earlier enqueued element (that + // we already see the memory effects for), but that by the time we increment somebody else + // has incremented it, and we need to see the memory effects for *that* element, which is + // in such a case is necessarily visible on the thread that incremented it in the first + // place with the more current condition (they must have acquired a tail that is at least + // as recent). + auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + + // Determine which block the element is in + + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); + + // We need to be careful here about subtracting and dividing because of index wrap-around. + // When an index wraps, we need to preserve the sign of the offset when dividing it by the + // block size (in order to get a correct signed block count offset in all cases): + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto blockBaseIndex = index & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast(static_cast::type>(blockBaseIndex - headBase) / static_cast::type>(BLOCK_SIZE)); + auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block; + + // Dequeue + auto& el = *((*block)[index]); + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { + // Make sure the element is still fully dequeued and destroyed even if the assignment + // throws + struct Guard { + Block* block; + index_t index; + + ~Guard() + { + (*block)[index]->~T(); + block->ConcurrentQueue::Block::template set_empty(index); + } + } guard = { block, index }; + + element = std::move(el); // NOLINT + } + else { + element = std::move(el); // NOLINT + el.~T(); // NOLINT + block->ConcurrentQueue::Block::template set_empty(index); + } + + return true; + } + else { + // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write + } + } + + return false; + } + + template + bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) + { + // First, we need to make sure we have enough room to enqueue all of the elements; + // this means pre-allocating blocks and putting them in the block index (but only if + // all the allocations succeeded). + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + auto originalBlockIndexFront = pr_blockIndexFront; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + + Block* firstAllocatedBlock = nullptr; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { + // Allocate as many blocks as possible from ahead + while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + this->tailBlock = this->tailBlock->next; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; + + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Now allocate as many blocks as necessary from the block pool + while (blockBaseDiff > 0) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + else if (full || !new_block_index(originalBlockIndexSlotsUsed)) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + + // pr_blockIndexFront is updated inside new_block_index, so we need to + // update our fallback value too (since we keep the new index even if we + // later fail) + originalBlockIndexFront = originalBlockIndexSlotsUsed; + } + + // Insert a new block in the circular linked list + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template set_all_empty(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } + else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; + + ++pr_blockIndexSlotsUsed; + + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and + // publish the new block index front + auto block = firstAllocatedBlock; + while (true) { + block->ConcurrentQueue::Block::template reset_empty(); + if (block == this->tailBlock) { + break; + } + block = block->next; + } + + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + } + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + auto endBlock = this->tailBlock; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } + else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + // Must use copy constructor even if move constructor is available + // because we may have to revert if there's an exception. + // Sorry about the horrible templated next line, but it was the only way + // to disable moving *at compile time*, which is important because a type + // may only define a (noexcept) move constructor, and so calls to the + // cctor will not compile, even if they are in an if branch that will never + // be executed + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH (...) { + // Oh dear, an exception's been thrown -- destroy the elements that + // were enqueued so far and revert the entire bulk operation (we'll keep + // any allocated blocks in our linked list for later, though). + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + if (firstAllocatedBlock != nullptr) + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release); + } + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + size_t dequeue_bulk(It& itemFirst, size_t max) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this + // will never exceed tail. + auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Determine which block the first element is in + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); + + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto firstBlockBaseIndex = firstIndex & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast(static_cast::type>(firstBlockBaseIndex - headBase) / static_cast::type>(BLOCK_SIZE)); + auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); + + // Iterate the blocks and dequeue + auto index = firstIndex; + do { + auto firstIndexInBlock = index; + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + auto block = localBlockIndex->entries[indexIndex].block; + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } + else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH (...) { + // It's too late to revert the dequeue, but we can make sure that all + // the dequeued objects are properly destroyed and the block index + // (and empty count) are properly updated before we propagate the exception + do { + block = localBlockIndex->entries[indexIndex].block; + while (index != endIndex) { + (*block)[index++]->~T(); + } + block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + + firstIndexInBlock = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, static_cast(endIndex - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } + else { + // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent + this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); + } + } + + return 0; + } + + private: + struct BlockIndexEntry + { + index_t base; + Block* block; + }; + + struct BlockIndexHeader + { + size_t size; + std::atomic front; // Current slot (not next, like pr_blockIndexFront) + BlockIndexEntry* entries; + void* prev; + }; + + + bool new_block_index(size_t numberOfFilledSlotsToExpose) + { + auto prevBlockSizeMask = pr_blockIndexSize - 1; + + // Create the new block + pr_blockIndexSize <<= 1; + auto newRawPtr = static_cast((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize)); + if (newRawPtr == nullptr) { + pr_blockIndexSize >>= 1; // Reset to allow graceful retry + return false; + } + + auto newBlockIndexEntries = reinterpret_cast(details::align_for(newRawPtr + sizeof(BlockIndexHeader))); + + // Copy in all the old indices, if any + size_t j = 0; + if (pr_blockIndexSlotsUsed != 0) { + auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; + do { + newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; + i = (i + 1) & prevBlockSizeMask; + } while (i != pr_blockIndexFront); + } + + // Update everything + auto header = new (newRawPtr) BlockIndexHeader; + header->size = pr_blockIndexSize; + header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); + header->entries = newBlockIndexEntries; + header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later + + pr_blockIndexFront = j; + pr_blockIndexEntries = newBlockIndexEntries; + pr_blockIndexRaw = newRawPtr; + blockIndex.store(header, std::memory_order_release); + + return true; + } + + private: + std::atomic blockIndex; + + // To be used by producer only -- consumer must use the ones in referenced by blockIndex + size_t pr_blockIndexSlotsUsed; + size_t pr_blockIndexSize; + size_t pr_blockIndexFront; // Next slot (not current) + BlockIndexEntry* pr_blockIndexEntries; + void* pr_blockIndexRaw; + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ExplicitProducer* nextExplicitProducer; + private: +#endif + +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + ////////////////////////////////// + // Implicit queue + ////////////////////////////////// + + struct ImplicitProducer : public ProducerBase + { + ImplicitProducer(ConcurrentQueue* parent_) : + ProducerBase(parent_, false), + nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), + blockIndex(nullptr) + { + new_block_index(); + } + + ~ImplicitProducer() + { + // Note that since we're in the destructor we can assume that all enqueue/dequeue operations + // completed already; this means that all undequeued elements are placed contiguously across + // contiguous blocks, and that only the first and last remaining blocks can be only partially + // empty (all other remaining blocks must be completely full). + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + // Unregister ourselves for thread termination notification + if (!this->inactive.load(std::memory_order_relaxed)) { + details::ThreadExitNotifier::unsubscribe(&threadExitListener); + } +#endif + + // Destroy all remaining elements! + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto index = this->headIndex.load(std::memory_order_relaxed); + Block* block = nullptr; + assert(index == tail || details::circular_less_than(index, tail)); + bool forceFreeLastBlock = index != tail; // If we enter the loop, then the last (tail) block will not be freed + while (index != tail) { + if ((index & static_cast(BLOCK_SIZE - 1)) == 0 || block == nullptr) { + if (block != nullptr) { + // Free the old block + this->parent->add_block_to_free_list(block); + } + + block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed); + } + + ((*block)[index])->~T(); + ++index; + } + // Even if the queue is empty, there's still one block that's not on the free list + // (unless the head index reached the end of it, in which case the tail will be poised + // to create a new block). + if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast(BLOCK_SIZE - 1)) != 0)) { + this->parent->add_block_to_free_list(this->tailBlock); + } + + // Destroy block index + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + if (localBlockIndex != nullptr) { + for (size_t i = 0; i != localBlockIndex->capacity; ++i) { + localBlockIndex->index[i]->~BlockIndexEntry(); + } + do { + auto prev = localBlockIndex->prev; + localBlockIndex->~BlockIndexHeader(); + (Traits::free)(localBlockIndex); + localBlockIndex = prev; + } while (localBlockIndex != nullptr); + } + } + + template + inline bool enqueue(U&& element) + { + index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + return false; + } +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Find out where we'll be inserting this block in the block index + BlockIndexEntry* idxEntry; + if (!insert_block_index_entry(idxEntry, currentTailIndex)) { + return false; + } + + // Get ahold of a new block + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + return false; + } +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + // May throw, try to insert now before we publish the fact that we have this new block + MOODYCAMEL_TRY { + new ((*newBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH (...) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(newBlock); + MOODYCAMEL_RETHROW; + } + } + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + this->tailBlock = newBlock; + + MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U& element) + { + // See ExplicitProducer::dequeue for rationale and explanation + index_t tail = this->tailIndex.load(std::memory_order_relaxed); + index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { + std::atomic_thread_fence(std::memory_order_acquire); + + index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { + index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + // Determine which block the element is in + auto entry = get_block_index_entry_for_index(index); + + // Dequeue + auto block = entry->value.load(std::memory_order_relaxed); + auto& el = *((*block)[index]); + + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + // Note: Acquiring the mutex with every dequeue instead of only when a block + // is released is very sub-optimal, but it is, after all, purely debug code. + debug::DebugLock lock(producer->mutex); +#endif + struct Guard { + Block* block; + index_t index; + BlockIndexEntry* entry; + ConcurrentQueue* parent; + + ~Guard() + { + (*block)[index]->~T(); + if (block->ConcurrentQueue::Block::template set_empty(index)) { + entry->value.store(nullptr, std::memory_order_relaxed); + parent->add_block_to_free_list(block); + } + } + } guard = { block, index, entry, this->parent }; + + element = std::move(el); // NOLINT + } + else { + element = std::move(el); // NOLINT + el.~T(); // NOLINT + + if (block->ConcurrentQueue::Block::template set_empty(index)) { + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Add the block back into the global free pool (and remove from block index) + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list(block); // releases the above store + } + } + + return true; + } + else { + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); + } + } + + return false; + } + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4706) // assignment within conditional expression +#endif + template + bool enqueue_bulk(It itemFirst, size_t count) + { + // First, we need to make sure we have enough room to enqueue all of the elements; + // this means pre-allocating blocks and putting them in the block index (but only if + // all the allocations succeeded). + + // Note that the tailBlock we start off with may not be owned by us any more; + // this happens if it was filled up exactly to the top (setting tailIndex to + // the first index of the next block which is not yet allocated), then dequeued + // completely (putting it on the free list) before we enqueue again. + + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + Block* firstAllocatedBlock = nullptr; + auto endBlock = this->tailBlock; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + do { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + // Find out where we'll be inserting this block in the block index + BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell + Block* newBlock; + bool indexInserted = false; + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = !details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + + if (full || !(indexInserted = insert_block_index_entry(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block()) == nullptr) { + // Index allocation or block allocation failed; revert any other allocations + // and index insertions done so far for this operation + if (indexInserted) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + } + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + + return false; + } + +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + newBlock->next = nullptr; + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + // Store the chain of blocks so that we can undo if later allocations fail, + // and so that we can find the blocks when we do the actual enqueueing + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) { + assert(this->tailBlock != nullptr); + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + endBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; + } while (blockBaseDiff > 0); + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } + else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH (...) { + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + auto idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + template + size_t dequeue_bulk(It& itemFirst, size_t max) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this + // will never exceed tail. + auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Iterate the blocks and dequeue + auto index = firstIndex; + BlockIndexHeader* localBlockIndex; + auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); + do { + auto blockStartIndex = index; + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + + auto entry = localBlockIndex->index[indexIndex]; + auto block = entry->value.load(std::memory_order_relaxed); + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } + else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH (...) { + do { + entry = localBlockIndex->index[indexIndex]; + block = entry->value.load(std::memory_order_relaxed); + while (index != endIndex) { + (*block)[index++]->~T(); + } + + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + entry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(block); + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + + blockStartIndex = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), endIndex) ? firstIndex + static_cast(actualCount) : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, static_cast(endIndex - blockStartIndex))) { + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Note that the set_many_empty above did a release, meaning that anybody who acquires the block + // we're about to free can use it safely since our writes (and reads!) will have happened-before then. + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list(block); // releases the above store + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } + else { + this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); + } + } + + return 0; + } + + private: + // The block size must be > 1, so any number with the low bit set is an invalid block base index + static const index_t INVALID_BLOCK_BASE = 1; + + struct BlockIndexEntry + { + std::atomic key; + std::atomic value; + }; + + struct BlockIndexHeader + { + size_t capacity; + std::atomic tail; + BlockIndexEntry* entries; + BlockIndexEntry** index; + BlockIndexHeader* prev; + }; + + template + inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex) + { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); // We're the only writer thread, relaxed is OK + if (localBlockIndex == nullptr) { + return false; // this can happen if new_block_index failed in the constructor + } + size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE || + idxEntry->value.load(std::memory_order_relaxed) == nullptr) { + + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + + // No room in the old block index, try to allocate another one! + MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) { + return false; + } + else if (!new_block_index()) { + return false; + } + else { + localBlockIndex = blockIndex.load(std::memory_order_relaxed); + newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + } + + inline void rewind_block_index_tail() + { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed); + } + + inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const + { + BlockIndexHeader* localBlockIndex; + auto idx = get_block_index_index_for_index(index, localBlockIndex); + return localBlockIndex->index[idx]; + } + + inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + index &= ~static_cast(BLOCK_SIZE - 1); + localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto tail = localBlockIndex->tail.load(std::memory_order_acquire); + auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed); + assert(tailBase != INVALID_BLOCK_BASE); + // Note: Must use division instead of shift because the index may wrap around, causing a negative + // offset, whose negativity we want to preserve + auto offset = static_cast(static_cast::type>(index - tailBase) / static_cast::type>(BLOCK_SIZE)); + size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); + assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr); + return idx; + } + + bool new_block_index() + { + auto prev = blockIndex.load(std::memory_order_relaxed); + size_t prevCapacity = prev == nullptr ? 0 : prev->capacity; + auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity; + auto raw = static_cast((Traits::malloc)( + sizeof(BlockIndexHeader) + + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * entryCount + + std::alignment_of::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity)); + if (raw == nullptr) { + return false; + } + + auto header = new (raw) BlockIndexHeader; + auto entries = reinterpret_cast(details::align_for(raw + sizeof(BlockIndexHeader))); + auto index = reinterpret_cast(details::align_for(reinterpret_cast(entries) + sizeof(BlockIndexEntry) * entryCount)); + if (prev != nullptr) { + auto prevTail = prev->tail.load(std::memory_order_relaxed); + auto prevPos = prevTail; + size_t i = 0; + do { + prevPos = (prevPos + 1) & (prev->capacity - 1); + index[i++] = prev->index[prevPos]; + } while (prevPos != prevTail); + assert(i == prevCapacity); + } + for (size_t i = 0; i != entryCount; ++i) { + new (entries + i) BlockIndexEntry; + entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed); + index[prevCapacity + i] = entries + i; + } + header->prev = prev; + header->entries = entries; + header->index = index; + header->capacity = nextBlockIndexCapacity; + header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed); + + blockIndex.store(header, std::memory_order_release); + + nextBlockIndexCapacity <<= 1; + + return true; + } + + private: + size_t nextBlockIndexCapacity; + std::atomic blockIndex; + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + public: + details::ThreadExitListener threadExitListener; + private: +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ImplicitProducer* nextImplicitProducer; + private: +#endif + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + mutable debug::DebugMutex mutex; +#endif +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + + ////////////////////////////////// + // Block pool manipulation + ////////////////////////////////// + + void populate_initial_block_list(size_t blockCount) + { + initialBlockPoolSize = blockCount; + if (initialBlockPoolSize == 0) { + initialBlockPool = nullptr; + return; + } + + initialBlockPool = create_array(blockCount); + if (initialBlockPool == nullptr) { + initialBlockPoolSize = 0; + } + for (size_t i = 0; i < initialBlockPoolSize; ++i) { + initialBlockPool[i].dynamicallyAllocated = false; + } + } + + inline Block* try_get_block_from_initial_pool() + { + if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) { + return nullptr; + } + + auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); + + return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; + } + + inline void add_block_to_free_list(Block* block) + { +#ifdef MCDBGQ_TRACKMEM + block->owner = nullptr; +#endif + if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) { + destroy(block); + } + else { + freeList.add(block); + } + } + + inline void add_blocks_to_free_list(Block* block) + { + while (block != nullptr) { + auto next = block->next; + add_block_to_free_list(block); + block = next; + } + } + + inline Block* try_get_block_from_free_list() + { + return freeList.try_get(); + } + + // Gets a free block from one of the memory pools, or allocates a new one (if applicable) + template + Block* requisition_block() + { + auto block = try_get_block_from_initial_pool(); + if (block != nullptr) { + return block; + } + + block = try_get_block_from_free_list(); + if (block != nullptr) { + return block; + } + + MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) { + return create(); + } + else { + return nullptr; + } + } + + +#ifdef MCDBGQ_TRACKMEM + public: + struct MemStats { + size_t allocatedBlocks; + size_t usedBlocks; + size_t freeBlocks; + size_t ownedBlocksExplicit; + size_t ownedBlocksImplicit; + size_t implicitProducers; + size_t explicitProducers; + size_t elementsEnqueued; + size_t blockClassBytes; + size_t queueClassBytes; + size_t implicitBlockIndexBytes; + size_t explicitBlockIndexBytes; + + friend class ConcurrentQueue; + + private: + static MemStats getFor(ConcurrentQueue* q) + { + MemStats stats = { 0 }; + + stats.elementsEnqueued = q->size_approx(); + + auto block = q->freeList.head_unsafe(); + while (block != nullptr) { + ++stats.allocatedBlocks; + ++stats.freeBlocks; + block = block->freeListNext.load(std::memory_order_relaxed); + } + + for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + bool implicit = dynamic_cast(ptr) != nullptr; + stats.implicitProducers += implicit ? 1 : 0; + stats.explicitProducers += implicit ? 0 : 1; + + if (implicit) { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ImplicitProducer); + auto head = prod->headIndex.load(std::memory_order_relaxed); + auto tail = prod->tailIndex.load(std::memory_order_relaxed); + auto hash = prod->blockIndex.load(std::memory_order_relaxed); + if (hash != nullptr) { + for (size_t i = 0; i != hash->capacity; ++i) { + if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) { + ++stats.allocatedBlocks; + ++stats.ownedBlocksImplicit; + } + } + stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry); + for (; hash != nullptr; hash = hash->prev) { + stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*); + } + } + for (; details::circular_less_than(head, tail); head += BLOCK_SIZE) { + //auto block = prod->get_block_index_entry_for_index(head); + ++stats.usedBlocks; + } + } + else { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ExplicitProducer); + auto tailBlock = prod->tailBlock; + bool wasNonEmpty = false; + if (tailBlock != nullptr) { + auto block = tailBlock; + do { + ++stats.allocatedBlocks; + if (!block->ConcurrentQueue::Block::template is_empty() || wasNonEmpty) { + ++stats.usedBlocks; + wasNonEmpty = wasNonEmpty || block != tailBlock; + } + ++stats.ownedBlocksExplicit; + block = block->next; + } while (block != tailBlock); + } + auto index = prod->blockIndex.load(std::memory_order_relaxed); + while (index != nullptr) { + stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry); + index = static_cast(index->prev); + } + } + } + + auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed); + stats.allocatedBlocks += freeOnInitialPool; + stats.freeBlocks += freeOnInitialPool; + + stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; + stats.queueClassBytes += sizeof(ConcurrentQueue); + + return stats; + } + }; + + // For debugging only. Not thread-safe. + MemStats getMemStats() + { + return MemStats::getFor(this); + } + private: + friend struct MemStats; +#endif + + + ////////////////////////////////// + // Producer list manipulation + ////////////////////////////////// + + ProducerBase* recycle_or_create_producer(bool isExplicit) + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + // Try to re-use one first + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) { + bool expected = true; + if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) { + // We caught one! It's been marked as activated, the caller can have it + return ptr; + } + } + } + + return add_producer(isExplicit ? static_cast(create(this)) : create(this)); + } + + ProducerBase* add_producer(ProducerBase* producer) + { + // Handle failed memory allocation + if (producer == nullptr) { + return nullptr; + } + + producerCount.fetch_add(1, std::memory_order_relaxed); + + // Add it to the lock-free list + auto prevTail = producerListTail.load(std::memory_order_relaxed); + do { + producer->next = prevTail; + } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + if (producer->isExplicit) { + auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); + do { + static_cast(producer)->nextExplicitProducer = prevTailExplicit; + } while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast(producer), std::memory_order_release, std::memory_order_relaxed)); + } + else { + auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed); + do { + static_cast(producer)->nextImplicitProducer = prevTailImplicit; + } while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast(producer), std::memory_order_release, std::memory_order_relaxed)); + } +#endif + + return producer; + } + + void reown_producers() + { + // After another instance is moved-into/swapped-with this one, all the + // producers we stole still think their parents are the other queue. + // So fix them up! + for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) { + ptr->parent = this; + } + } + + + ////////////////////////////////// + // Implicit producer hash + ////////////////////////////////// + + struct ImplicitProducerKVP + { + std::atomic key; + ImplicitProducer* value; // No need for atomicity since it's only read by the thread that sets it in the first place + + ImplicitProducerKVP() : value(nullptr) { } + + ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT + { + key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed); + value = other.value; + } + + inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT + { + if (this != &other) { + details::swap_relaxed(key, other.key); + std::swap(value, other.value); + } + } + }; + + template + friend void moodycamel::swap(typename ConcurrentQueue::ImplicitProducerKVP&, typename ConcurrentQueue::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT; + + struct ImplicitProducerHash + { + size_t capacity; + ImplicitProducerKVP* entries; + ImplicitProducerHash* prev; + }; + + inline void populate_initial_implicit_producer_hash() + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } + else { + implicitProducerHashCount.store(0, std::memory_order_relaxed); + auto hash = &initialImplicitProducerHash; + hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + hash->entries = &initialImplicitProducerHashEntries[0]; + for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { + initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + hash->prev = nullptr; + implicitProducerHash.store(hash, std::memory_order_relaxed); + } + } + + void swap_implicit_producer_hashes(ConcurrentQueue& other) + { + MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } + else { + // Swap (assumes our implicit producer hash is initialized) + initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); + initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; + other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; + + details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); + + details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); + if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { + implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); + } + else { + ImplicitProducerHash* hash; + for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &initialImplicitProducerHash; + } + if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { + other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); + } + else { + ImplicitProducerHash* hash; + for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) { + continue; + } + hash->prev = &other.initialImplicitProducerHash; + } + } + } + + // Only fails (returns nullptr) if memory allocation fails + ImplicitProducer* get_or_add_implicit_producer() + { + // Note that since the data is essentially thread-local (key is thread ID), + // there's a reduced need for fences (memory ordering is already consistent + // for any individual thread), except for the current table itself. + + // Start by looking for the thread ID in the current and all previous hash tables. + // If it's not found, it must not be in there yet, since this same thread would + // have added it previously to one of the tables that we traversed. + + // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + + auto mainHash = implicitProducerHash.load(std::memory_order_acquire); + assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) + for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { + // Look for the id in this hash + auto index = hashedId; + while (true) { // Not an infinite loop because at least one slot is free in the hash table + index &= hash->capacity - 1u; + + auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); + if (probedKey == id) { + // Found it! If we had to search several hashes deep, though, we should lazily add it + // to the current main hash table to avoid the extended search next time. + // Note there's guaranteed to be room in the current hash table since every subsequent + // table implicitly reserves space for all previous tables (there's only one + // implicitProducerHashCount). + auto value = hash->entries[index].value; + if (hash != mainHash) { + index = hashedId; + while (true) { + index &= mainHash->capacity - 1u; + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed) || + mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) { +#else + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)) { +#endif + mainHash->entries[index].value = value; + break; + } + ++index; + } + } + + return value; + } + if (probedKey == details::invalid_thread_id) { + break; // Not in this hash table + } + ++index; + } + } + + // Insert! + auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); + while (true) { + // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) + if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) { + // We've acquired the resize lock, try to allocate a bigger hash table. + // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when + // we reload implicitProducerHash it must be the most recent version (it only gets changed within this + // locked block). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + if (newCount >= (mainHash->capacity >> 1)) { + size_t newCapacity = mainHash->capacity << 1; + while (newCount >= (newCapacity >> 1)) { + newCapacity <<= 1; + } + auto raw = static_cast((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity)); + if (raw == nullptr) { + // Allocation failed + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + return nullptr; + } + + auto newHash = new (raw) ImplicitProducerHash; + newHash->capacity = static_cast(newCapacity); + newHash->entries = reinterpret_cast(details::align_for(raw + sizeof(ImplicitProducerHash))); + for (size_t i = 0; i != newCapacity; ++i) { + new (newHash->entries + i) ImplicitProducerKVP; + newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + newHash->prev = mainHash; + implicitProducerHash.store(newHash, std::memory_order_release); + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + mainHash = newHash; + } + else { + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + } + } + + // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table + // to finish being allocated by another thread (and if we just finished allocating above, the condition will + // always be true) + if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) { + auto producer = static_cast(recycle_or_create_producer(false)); + if (producer == nullptr) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + return nullptr; + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback; + producer->threadExitListener.userData = producer; + details::ThreadExitNotifier::subscribe(&producer->threadExitListener); +#endif + + auto index = hashedId; + while (true) { + index &= mainHash->capacity - 1u; + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if (mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); // already counted as a used slot + mainHash->entries[index].value = producer; + break; + } +#endif + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, std::memory_order_relaxed)) { + mainHash->entries[index].value = producer; + break; + } + ++index; + } + return producer; + } + + // Hmm, the old hash is quite full and somebody else is busy allocating a new one. + // We need to wait for the allocating thread to finish (if it succeeds, we add, if not, + // we try to allocate ourselves). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + } + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + void implicit_producer_thread_exited(ImplicitProducer* producer) + { + // Remove from hash +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + auto hash = implicitProducerHash.load(std::memory_order_acquire); + assert(hash != nullptr); // The thread exit listener is only registered if we were added to a hash in the first place + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + details::thread_id_t probedKey; + + // We need to traverse all the hashes just in case other threads aren't on the current one yet and are + // trying to add an entry thinking there's a free slot (because they reused a producer) + for (; hash != nullptr; hash = hash->prev) { + auto index = hashedId; + do { + index &= hash->capacity - 1u; + probedKey = id; + if (hash->entries[index].key.compare_exchange_strong(probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)) { + break; + } + ++index; + } while (probedKey != details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place + } + + // Mark the queue as being recyclable + producer->inactive.store(true, std::memory_order_release); + } + + static void implicit_producer_thread_exited_callback(void* userData) + { + auto producer = static_cast(userData); + auto queue = producer->parent; + queue->implicit_producer_thread_exited(producer); + } +#endif + + ////////////////////////////////// + // Utility functions + ////////////////////////////////// + + template + static inline void* aligned_malloc(size_t size) + { + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of::value <= std::alignment_of::value) + return (Traits::malloc)(size); + else { + size_t alignment = std::alignment_of::value; + void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); + if (!raw) + return nullptr; + char* ptr = details::align_for(reinterpret_cast(raw) + sizeof(void*)); + *(reinterpret_cast(ptr) - 1) = raw; + return ptr; + } + } + + template + static inline void aligned_free(void* ptr) + { + MOODYCAMEL_CONSTEXPR_IF (std::alignment_of::value <= std::alignment_of::value) + return (Traits::free)(ptr); + else + (Traits::free)(ptr ? *(reinterpret_cast(ptr) - 1) : nullptr); + } + + template + static inline U* create_array(size_t count) + { + assert(count > 0); + U* p = static_cast(aligned_malloc(sizeof(U) * count)); + if (p == nullptr) + return nullptr; + + for (size_t i = 0; i != count; ++i) + new (p + i) U(); + return p; + } + + template + static inline void destroy_array(U* p, size_t count) + { + if (p != nullptr) { + assert(count > 0); + for (size_t i = count; i != 0; ) + (p + --i)->~U(); + } + aligned_free(p); + } + + template + static inline U* create() + { + void* p = aligned_malloc(sizeof(U)); + return p != nullptr ? new (p) U : nullptr; + } + + template + static inline U* create(A1&& a1) + { + void* p = aligned_malloc(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1)) : nullptr; + } + + template + static inline void destroy(U* p) + { + if (p != nullptr) + p->~U(); + aligned_free(p); + } + +private: + std::atomic producerListTail; + std::atomic producerCount; + + std::atomic initialBlockPoolIndex; + Block* initialBlockPool; + size_t initialBlockPoolSize; + +#ifndef MCDBGQ_USEDEBUGFREELIST + FreeList freeList; +#else + debug::DebugFreeList freeList; +#endif + + std::atomic implicitProducerHash; + std::atomic implicitProducerHashCount; // Number of slots logically used + ImplicitProducerHash initialImplicitProducerHash; + std::array initialImplicitProducerHashEntries; + std::atomic_flag implicitProducerHashResizeInProgress; + + std::atomic nextExplicitConsumerId; + std::atomic globalExplicitConsumerOffset; + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugMutex implicitProdMutex; +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + std::atomic explicitProducers; + std::atomic implicitProducers; +#endif +}; + + +template +ProducerToken::ProducerToken(ConcurrentQueue& queue) + : producer(queue.recycle_or_create_producer(true)) +{ + if (producer != nullptr) { + producer->token = this; + } +} + +template +ProducerToken::ProducerToken(BlockingConcurrentQueue& queue) + : producer(reinterpret_cast*>(&queue)->recycle_or_create_producer(true)) +{ + if (producer != nullptr) { + producer->token = this; + } +} + +template +ConsumerToken::ConsumerToken(ConcurrentQueue& queue) + : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) +{ + initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = static_cast(-1); +} + +template +ConsumerToken::ConsumerToken(BlockingConcurrentQueue& queue) + : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) +{ + initialOffset = reinterpret_cast*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = static_cast(-1); +} + +template +inline void swap(ConcurrentQueue& a, ConcurrentQueue& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, typename ConcurrentQueue::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +} + +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +#pragma warning(pop) +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +#pragma GCC diagnostic pop +#endif diff --git a/third_party/concurrentqueue/lightweightsemaphore.h b/third_party/concurrentqueue/lightweightsemaphore.h new file mode 100644 index 0000000000..41ba094384 --- /dev/null +++ b/third_party/concurrentqueue/lightweightsemaphore.h @@ -0,0 +1,425 @@ +// Provides an efficient implementation of a semaphore (LightweightSemaphore). +// This is an extension of Jeff Preshing's sempahore implementation (licensed +// under the terms of its separate zlib license) that has been adapted and +// extended by Cameron Desrochers. + +#pragma once + +#include // For std::size_t +#include +#include // For std::make_signed + +#if defined(_WIN32) +// Avoid including windows.h in a header; we only need a handful of +// items, so we'll redeclare them here (this is relatively safe since +// the API generally has to remain stable between Windows versions). +// I know this is an ugly hack but it still beats polluting the global +// namespace with thousands of generic names or adding a .cpp for nothing. +extern "C" { + struct _SECURITY_ATTRIBUTES; + __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName); + __declspec(dllimport) int __stdcall CloseHandle(void* hObject); + __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds); + __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount); +} +#elif defined(__MACH__) +#include +#elif defined(__unix__) +#include + +#if defined(__GLIBC_PREREQ) && defined(_GNU_SOURCE) +#if __GLIBC_PREREQ(2,30) +#define MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC +#endif +#endif +#endif + +namespace moodycamel +{ +namespace details +{ + +// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's +// portable + lightweight semaphore implementations, originally from +// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h +// LICENSE: +// Copyright (c) 2015 Jeff Preshing +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgement in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +#if defined(_WIN32) +class Semaphore +{ +private: + void* m_hSema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + const long maxLong = 0x7fffffff; + m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); + assert(m_hSema); + } + + ~Semaphore() + { + CloseHandle(m_hSema); + } + + bool wait() + { + const unsigned long infinite = 0xffffffff; + return WaitForSingleObject(m_hSema, infinite) == 0; + } + + bool try_wait() + { + return WaitForSingleObject(m_hSema, 0) == 0; + } + + bool timed_wait(std::uint64_t usecs) + { + return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0; + } + + void signal(int count = 1) + { + while (!ReleaseSemaphore(m_hSema, count, nullptr)); + } +}; +#elif defined(__MACH__) +//--------------------------------------------------------- +// Semaphore (Apple iOS and OSX) +// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html +//--------------------------------------------------------- +class Semaphore +{ +private: + semaphore_t m_sema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount); + assert(rc == KERN_SUCCESS); + (void)rc; + } + + ~Semaphore() + { + semaphore_destroy(mach_task_self(), m_sema); + } + + bool wait() + { + return semaphore_wait(m_sema) == KERN_SUCCESS; + } + + bool try_wait() + { + return timed_wait(0); + } + + bool timed_wait(std::uint64_t timeout_usecs) + { + mach_timespec_t ts; + ts.tv_sec = static_cast(timeout_usecs / 1000000); + ts.tv_nsec = static_cast((timeout_usecs % 1000000) * 1000); + + // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html + kern_return_t rc = semaphore_timedwait(m_sema, ts); + return rc == KERN_SUCCESS; + } + + void signal() + { + while (semaphore_signal(m_sema) != KERN_SUCCESS); + } + + void signal(int count) + { + while (count-- > 0) + { + while (semaphore_signal(m_sema) != KERN_SUCCESS); + } + } +}; +#elif defined(__unix__) +//--------------------------------------------------------- +// Semaphore (POSIX, Linux) +//--------------------------------------------------------- +class Semaphore +{ +private: + sem_t m_sema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + int rc = sem_init(&m_sema, 0, static_cast(initialCount)); + assert(rc == 0); + (void)rc; + } + + ~Semaphore() + { + sem_destroy(&m_sema); + } + + bool wait() + { + // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error + int rc; + do { + rc = sem_wait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool try_wait() + { + int rc; + do { + rc = sem_trywait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool timed_wait(std::uint64_t usecs) + { + struct timespec ts; + const int usecs_in_1_sec = 1000000; + const int nsecs_in_1_sec = 1000000000; +#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &ts); +#else + clock_gettime(CLOCK_REALTIME, &ts); +#endif + ts.tv_sec += (time_t)(usecs / usecs_in_1_sec); + ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000; + // sem_timedwait bombs if you have more than 1e9 in tv_nsec + // so we have to clean things up before passing it in + if (ts.tv_nsec >= nsecs_in_1_sec) { + ts.tv_nsec -= nsecs_in_1_sec; + ++ts.tv_sec; + } + + int rc; + do { +#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC + rc = sem_clockwait(&m_sema, CLOCK_MONOTONIC, &ts); +#else + rc = sem_timedwait(&m_sema, &ts); +#endif + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + void signal() + { + while (sem_post(&m_sema) == -1); + } + + void signal(int count) + { + while (count-- > 0) + { + while (sem_post(&m_sema) == -1); + } + } +}; +#else +#error Unsupported platform! (No semaphore wrapper available) +#endif + +} // end namespace details + + +//--------------------------------------------------------- +// LightweightSemaphore +//--------------------------------------------------------- +class LightweightSemaphore +{ +public: + typedef std::make_signed::type ssize_t; + +private: + std::atomic m_count; + details::Semaphore m_sema; + int m_maxSpins; + + bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) + { + ssize_t oldCount; + int spin = m_maxSpins; + while (--spin >= 0) + { + oldCount = m_count.load(std::memory_order_relaxed); + if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) + return true; + std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop. + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount > 0) + return true; + if (timeout_usecs < 0) + { + if (m_sema.wait()) + return true; + } + if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs)) + return true; + // At this point, we've timed out waiting for the semaphore, but the + // count is still decremented indicating we may still be waiting on + // it. So we have to re-adjust the count, but only if the semaphore + // wasn't signaled enough times for us too since then. If it was, we + // need to release the semaphore too. + while (true) + { + oldCount = m_count.load(std::memory_order_acquire); + if (oldCount >= 0 && m_sema.try_wait()) + return true; + if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) + return false; + } + } + + ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1) + { + assert(max > 0); + ssize_t oldCount; + int spin = m_maxSpins; + while (--spin >= 0) + { + oldCount = m_count.load(std::memory_order_relaxed); + if (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) + return oldCount - newCount; + } + std::atomic_signal_fence(std::memory_order_acquire); + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount <= 0) + { + if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs))) + { + while (true) + { + oldCount = m_count.load(std::memory_order_acquire); + if (oldCount >= 0 && m_sema.try_wait()) + break; + if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed)) + return 0; + } + } + } + if (max > 1) + return 1 + tryWaitMany(max - 1); + return 1; + } + +public: + LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins) + { + assert(initialCount >= 0); + assert(maxSpins >= 0); + } + + bool tryWait() + { + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) + return true; + } + return false; + } + + bool wait() + { + return tryWait() || waitWithPartialSpinning(); + } + + bool wait(std::int64_t timeout_usecs) + { + return tryWait() || waitWithPartialSpinning(timeout_usecs); + } + + // Acquires between 0 and (greedily) max, inclusive + ssize_t tryWaitMany(ssize_t max) + { + assert(max >= 0); + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) + return oldCount - newCount; + } + return 0; + } + + // Acquires at least one, and (greedily) at most max + ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs) + { + assert(max >= 0); + ssize_t result = tryWaitMany(max); + if (result == 0 && max > 0) + result = waitManyWithPartialSpinning(max, timeout_usecs); + return result; + } + + ssize_t waitMany(ssize_t max) + { + ssize_t result = waitMany(max, -1); + assert(result > 0); + return result; + } + + void signal(ssize_t count = 1) + { + assert(count >= 0); + ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); + ssize_t toRelease = -oldCount < count ? -oldCount : count; + if (toRelease > 0) + { + m_sema.signal((int)toRelease); + } + } + + std::size_t availableApprox() const + { + ssize_t count = m_count.load(std::memory_order_relaxed); + return count > 0 ? static_cast(count) : 0; + } +}; + +} // end namespace moodycamel diff --git a/tools/python_api/src_cpp/py_database.cpp b/tools/python_api/src_cpp/py_database.cpp index 8b67c9eb95..b93fa7fbc8 100644 --- a/tools/python_api/src_cpp/py_database.cpp +++ b/tools/python_api/src_cpp/py_database.cpp @@ -12,10 +12,7 @@ void PyDatabase::initialize(py::handle& m) { PyDatabase::PyDatabase(const std::string& databasePath, uint64_t bufferPoolSize) { auto systemConfig = SystemConfig(); if (bufferPoolSize > 0) { - systemConfig.defaultPageBufferPoolSize = - bufferPoolSize * StorageConstants::DEFAULT_PAGES_BUFFER_RATIO; - systemConfig.largePageBufferPoolSize = - bufferPoolSize * StorageConstants::LARGE_PAGES_BUFFER_RATIO; + systemConfig.bufferPoolSize = bufferPoolSize; } database = std::make_unique(databasePath, systemConfig); }