From 2c18ab38e138faef6d4c6faee8fb0bb0e8c4dd1e Mon Sep 17 00:00:00 2001
From: Guodong Jin <guod.jin@gmail.com>
Date: Sun, 12 Mar 2023 22:28:48 -0400
Subject: [PATCH] single pool for bm and mm

---
 CMakeLists.txt                                |    1 +
 src/binder/bind/bind_ddl.cpp                  |    2 +-
 src/common/in_mem_overflow_buffer.cpp         |    6 +-
 src/include/common/constants.h                |   46 +-
 src/include/common/in_mem_overflow_buffer.h   |   22 +-
 src/include/common/types/types.h              |    3 +
 src/include/main/database.h                   |    8 +-
 .../processor/result/factorized_table.h       |   14 +-
 .../storage/buffer_manager/bm_file_handle.h   |  121 +
 .../buffer_managed_file_handle.h              |   73 -
 .../storage/buffer_manager/buffer_manager.h   |  196 +-
 .../storage/buffer_manager/buffer_pool.h      |  141 -
 .../storage/buffer_manager/memory_manager.h   |   67 +-
 .../storage/buffer_manager/vm_region.h        |   46 +
 src/include/storage/file_handle.h             |    4 +-
 src/include/storage/index/hash_index.h        |    6 +-
 .../storage/storage_structure/column.h        |    4 +-
 .../storage/storage_structure/disk_array.h    |    8 +-
 .../storage_structure/disk_overflow_file.h    |    8 +-
 .../storage_structure/lists/list_headers.h    |    2 +-
 .../storage_structure/lists/lists_metadata.h  |    2 +-
 .../lists/lists_update_iterator.h             |    2 +-
 .../storage_structure/storage_structure.h     |    8 +-
 .../storage_structure_utils.h                 |   31 +-
 src/include/storage/wal/wal.h                 |    8 +-
 src/include/storage/wal_replayer.h            |    8 +-
 src/main/database.cpp                         |   16 +-
 .../aggregate/aggregate_hash_table.cpp        |    4 +-
 .../operator/hash_join/join_hash_table.cpp    |    2 +-
 .../operator/order_by/key_block_merger.cpp    |    4 +-
 .../order_by/order_by_key_encoder.cpp         |    4 +-
 src/processor/result/factorized_table.cpp     |   14 +-
 src/storage/buffer_manager/CMakeLists.txt     |    4 +-
 ...ged_file_handle.cpp => bm_file_handle.cpp} |  127 +-
 src/storage/buffer_manager/buffer_manager.cpp |  271 +-
 src/storage/buffer_manager/buffer_pool.cpp    |  292 --
 src/storage/buffer_manager/memory_manager.cpp |   38 +-
 src/storage/buffer_manager/vm_region.cpp      |   52 +
 src/storage/copy_arrow/copy_node_arrow.cpp    |    2 +-
 src/storage/copy_arrow/copy_rel_arrow.cpp     |    4 +-
 .../copy_arrow/copy_structures_arrow.cpp      |    4 +-
 src/storage/index/hash_index.cpp              |    5 +-
 src/storage/storage_manager.cpp               |    1 -
 src/storage/storage_structure/column.cpp      |    6 +-
 src/storage/storage_structure/disk_array.cpp  |   60 +-
 .../storage_structure/disk_overflow_file.cpp  |   28 +-
 src/storage/storage_structure/in_mem_file.cpp |   15 +-
 src/storage/storage_structure/in_mem_page.cpp |    2 +-
 .../storage_structure/lists/list_headers.cpp  |    4 +-
 src/storage/storage_structure/lists/lists.cpp |   16 +-
 .../lists/lists_metadata.cpp                  |    5 +-
 .../storage_structure/storage_structure.cpp   |    2 +-
 .../storage_structure_utils.cpp               |   34 +-
 src/storage/storage_utils.cpp                 |    4 +-
 src/storage/wal/wal.cpp                       |   12 +-
 src/storage/wal_replayer.cpp                  |   16 +-
 test/include/graph_test/graph_test.h          |    9 +-
 .../main_test_helper/main_test_helper.h       |    4 +-
 .../order_by/key_block_merger_test.cpp        |    9 +-
 .../order_by/order_by_key_encoder_test.cpp    |   15 +-
 test/processor/order_by/radix_sort_test.cpp   |    9 +-
 test/runner/e2e_ddl_test.cpp                  |    7 +-
 test/runner/e2e_set_transaction_test.cpp      |    2 +-
 test/storage/CMakeLists.txt                   |    1 -
 test/storage/buffer_manager_test.cpp          |   53 -
 test/storage/wal_test.cpp                     |    7 +-
 test/transaction/transaction_manager_test.cpp |    7 +-
 test/transaction/transaction_test.cpp         |    2 +-
 third_party/concurrentqueue/LICENSE.md        |   62 +
 .../concurrentqueue/blockingconcurrentqueue.h |  582 +++
 third_party/concurrentqueue/concurrentqueue.h | 3747 +++++++++++++++++
 .../concurrentqueue/lightweightsemaphore.h    |  425 ++
 tools/python_api/src_cpp/py_database.cpp      |    5 +-
 73 files changed, 5770 insertions(+), 1061 deletions(-)
 create mode 100644 src/include/storage/buffer_manager/bm_file_handle.h
 delete mode 100644 src/include/storage/buffer_manager/buffer_managed_file_handle.h
 delete mode 100644 src/include/storage/buffer_manager/buffer_pool.h
 create mode 100644 src/include/storage/buffer_manager/vm_region.h
 rename src/storage/buffer_manager/{buffer_managed_file_handle.cpp => bm_file_handle.cpp} (60%)
 delete mode 100644 src/storage/buffer_manager/buffer_pool.cpp
 create mode 100644 src/storage/buffer_manager/vm_region.cpp
 delete mode 100644 test/storage/buffer_manager_test.cpp
 create mode 100644 third_party/concurrentqueue/LICENSE.md
 create mode 100644 third_party/concurrentqueue/blockingconcurrentqueue.h
 create mode 100644 third_party/concurrentqueue/concurrentqueue.h
 create mode 100644 third_party/concurrentqueue/lightweightsemaphore.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ae71c3f76..83407202d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -100,6 +100,7 @@ include_directories(third_party/nlohmann_json)
 include_directories(third_party/utf8proc/include)
 include_directories(third_party/pybind11/include)
 include_directories(third_party/re2/include)
+include_directories(third_party/concurrentqueue)
 
 add_subdirectory(third_party)
 add_subdirectory(src)
diff --git a/src/binder/bind/bind_ddl.cpp b/src/binder/bind/bind_ddl.cpp
index 47dd0973a1..9e026ad4c5 100644
--- a/src/binder/bind/bind_ddl.cpp
+++ b/src/binder/bind/bind_ddl.cpp
@@ -193,7 +193,7 @@ DataType Binder::bindDataType(const std::string& dataType) {
                 "The number of elements in a fixed list must be greater than 0. Given: " +
                 std::to_string(boundType.fixedNumElementsInList) + ".");
         }
-        if (Types::getDataTypeSize(boundType) > common::BufferPoolConstants::DEFAULT_PAGE_SIZE) {
+        if (Types::getDataTypeSize(boundType) > common::BufferPoolConstants::PAGE_4KB_SIZE) {
             throw common::BinderException("The size of fixed list is larger than a "
                                           "DEFAULT_PAGE_SIZE, which is not supported yet.");
         }
diff --git a/src/common/in_mem_overflow_buffer.cpp b/src/common/in_mem_overflow_buffer.cpp
index 96ddcdcd4e..61eb4d6407 100644
--- a/src/common/in_mem_overflow_buffer.cpp
+++ b/src/common/in_mem_overflow_buffer.cpp
@@ -4,18 +4,18 @@ namespace kuzu {
 namespace common {
 
 uint8_t* InMemOverflowBuffer::allocateSpace(uint64_t size) {
-    assert(size <= BufferPoolConstants::LARGE_PAGE_SIZE);
+    assert(size <= BufferPoolConstants::PAGE_256KB_SIZE);
     if (requireNewBlock(size)) {
         allocateNewBlock();
     }
-    auto data = currentBlock->block->data + currentBlock->currentOffset;
+    auto data = currentBlock->block->buffer + currentBlock->currentOffset;
     currentBlock->currentOffset += size;
     return data;
 }
 
 void InMemOverflowBuffer::allocateNewBlock() {
     auto newBlock = make_unique<BufferBlock>(
-        memoryManager->allocateBlock(false /* do not initialize to zero */));
+        memoryManager->allocateBuffer(false /* do not initialize to zero */));
     currentBlock = newBlock.get();
     blocks.push_back(std::move(newBlock));
 }
diff --git a/src/include/common/constants.h b/src/include/common/constants.h
index f34d4f94de..7de0953419 100644
--- a/src/include/common/constants.h
+++ b/src/include/common/constants.h
@@ -21,31 +21,40 @@ constexpr uint64_t DEFAULT_CHECKPOINT_WAIT_TIMEOUT_FOR_TRANSACTIONS_TO_LEAVE_IN_
 
 const std::string INTERNAL_ID_SUFFIX = "_id";
 
+enum PageSizeClass : uint8_t {
+    PAGE_4KB = 0,
+    PAGE_256KB = 1,
+};
+
 // Currently the system supports files with 2 different pages size, which we refer to as
-// DEFAULT_PAGE_SIZE and LARGE_PAGE_SIZE. Default size of the page which is the unit of read/write
-// to the database files, such as to store columns or lists. For now, this value cannot be changed.
-// But technically it can change from 2^12 to 2^16. 2^12 lower bound is assuming the OS page size is
-// 4K. 2^16 is because currently we leave 11 fixed number of bits for relOffInPage and the maximum
-// number of bytes needed for an edge is 20 bytes so 11 + log_2(20) = 15.xxx, so certainly over
-// 2^16-size pages, we cannot utilize the page for storing adjacency lists.
+// PAGE_4KB_SIZE and PAGE_256KB_SIZE. PAGE_4KB_SIZE is the default size of the page which is the
+// unit of read/write to the database files, such as to store columns or lists. For now, this value
+// cannot be changed. But technically it can change from 2^12 to 2^16. 2^12 lower bound is assuming
+// the OS page size is 4K. 2^16 is because currently we leave 11 fixed number of bits for
+// relOffInPage and the maximum number of bytes needed for an edge is 20 bytes so 11 + log_2(20)
+// = 15.xxx, so certainly over 2^16-size pages, we cannot utilize the page for storing adjacency
+// lists.
 struct BufferPoolConstants {
-    static constexpr uint64_t DEFAULT_PAGE_SIZE_LOG_2 = 12;
-    static constexpr uint64_t DEFAULT_PAGE_SIZE = 1 << DEFAULT_PAGE_SIZE_LOG_2;
+    static constexpr uint64_t PAGE_4KB_SIZE_LOG2 = 12;
+    static constexpr uint64_t PAGE_4KB_SIZE = (std::uint64_t)1 << PAGE_4KB_SIZE_LOG2;
     // Page size for files with large pages, e.g., temporary files that are used by operators that
     // may require large amounts of memory.
-    static constexpr uint64_t LARGE_PAGE_SIZE_LOG_2 = 18;
-    static constexpr uint64_t LARGE_PAGE_SIZE = 1 << LARGE_PAGE_SIZE_LOG_2;
+    static constexpr uint64_t PAGE_256KB_SIZE_LOG2 = 18;
+    static constexpr uint64_t PAGE_256KB_SIZE = (std::uint64_t)1 << PAGE_256KB_SIZE_LOG2;
+    // If a user does not specify a max size for BM, we by default set the max size of BM to
+    // maxPhyMemSize * DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM.
+    static constexpr double DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM = 0.8;
+    // For each PURGE_EVICTION_QUEUE_INTERVAL candidates added to the eviction queue, we will call
+    // `removeNonEvictableCandidates` to remove candidates that are not evictable. See
+    // `EvictionQueue::removeNonEvictableCandidates()` for more details.
+    static constexpr uint64_t EVICTION_QUEUE_PURGING_INTERVAL = 1024;
+    // The default max size for a VMRegion.
+    static constexpr uint64_t DEFAULT_VM_REGION_MAX_SIZE = (uint64_t)1 << 45; // (32TB)
+
+    static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 26; // (64MB)
 };
 
 struct StorageConstants {
-    // The default amount of memory pre-allocated to both the default and large pages buffer pool.
-    static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE = 1ull << 30;             // (1GB)
-    static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 27; // (128MB)
-    // The default ratio of system memory allocated to buffer pools (including default and large).
-    static constexpr double DEFAULT_BUFFER_POOL_RATIO = 0.8;
-    // The default ratio of buffer allocated to default and large pages.
-    static constexpr double DEFAULT_PAGES_BUFFER_RATIO = 0.75;
-    static constexpr double LARGE_PAGES_BUFFER_RATIO = 1.0 - DEFAULT_PAGES_BUFFER_RATIO;
     static constexpr char OVERFLOW_FILE_SUFFIX[] = ".ovf";
     static constexpr char COLUMN_FILE_SUFFIX[] = ".col";
     static constexpr char LISTS_FILE_SUFFIX[] = ".lists";
@@ -63,6 +72,7 @@ struct StorageConstants {
     // The number of pages that we add at one time when we need to grow a file.
     static constexpr uint64_t PAGE_GROUP_SIZE_LOG2 = 10;
     static constexpr uint64_t PAGE_GROUP_SIZE = (uint64_t)1 << PAGE_GROUP_SIZE_LOG2;
+    static constexpr uint64_t PAGE_IDX_IN_GROUP_MASK = ((uint64_t)1 << PAGE_GROUP_SIZE_LOG2) - 1;
 };
 
 struct ListsMetadataConstants {
diff --git a/src/include/common/in_mem_overflow_buffer.h b/src/include/common/in_mem_overflow_buffer.h
index cdd364737c..a7914685a5 100644
--- a/src/include/common/in_mem_overflow_buffer.h
+++ b/src/include/common/in_mem_overflow_buffer.h
@@ -10,13 +10,13 @@ namespace common {
 
 struct BufferBlock {
 public:
-    explicit BufferBlock(std::unique_ptr<storage::MemoryBlock> block)
-        : size{block->size}, currentOffset{0}, block{std::move(block)} {}
+    explicit BufferBlock(std::unique_ptr<storage::MemoryBuffer> block)
+        : size{block->allocator->getPageSize()}, currentOffset{0}, block{std::move(block)} {}
 
 public:
     uint64_t size;
     uint64_t currentOffset;
-    std::unique_ptr<storage::MemoryBlock> block;
+    std::unique_ptr<storage::MemoryBuffer> block;
 
     inline void resetCurrentOffset() { currentOffset = 0; }
 };
@@ -27,15 +27,6 @@ class InMemOverflowBuffer {
     explicit InMemOverflowBuffer(storage::MemoryManager* memoryManager)
         : memoryManager{memoryManager}, currentBlock{nullptr} {};
 
-    // The blocks used are allocated through the MemoryManager but are backed by the
-    // BufferManager. We need to therefore release them back by calling
-    // memoryManager->freeBlock.
-    ~InMemOverflowBuffer() {
-        for (auto& block : blocks) {
-            memoryManager->freeBlock(block->block->pageIdx);
-        }
-    }
-
     uint8_t* allocateSpace(uint64_t size);
 
     inline void merge(InMemOverflowBuffer& other) {
@@ -54,9 +45,6 @@ class InMemOverflowBuffer {
     inline void resetBuffer() {
         if (!blocks.empty()) {
             auto firstBlock = std::move(blocks[0]);
-            for (auto i = 1u; i < blocks.size(); ++i) {
-                memoryManager->freeBlock(blocks[i]->block->pageIdx);
-            }
             blocks.clear();
             firstBlock->resetCurrentOffset();
             blocks.push_back(std::move(firstBlock));
@@ -68,10 +56,10 @@ class InMemOverflowBuffer {
 
 private:
     inline bool requireNewBlock(uint64_t sizeToAllocate) {
-        if (sizeToAllocate > BufferPoolConstants::LARGE_PAGE_SIZE) {
+        if (sizeToAllocate > BufferPoolConstants::PAGE_256KB_SIZE) {
             throw RuntimeException("Require size " + std::to_string(sizeToAllocate) +
                                    " greater than single block size " +
-                                   std::to_string(BufferPoolConstants::LARGE_PAGE_SIZE) + ".");
+                                   std::to_string(BufferPoolConstants::PAGE_256KB_SIZE) + ".");
         }
         return currentBlock == nullptr ||
                (currentBlock->currentOffset + sizeToAllocate) > currentBlock->size;
diff --git a/src/include/common/types/types.h b/src/include/common/types/types.h
index c439e468a3..7e1f43fd36 100644
--- a/src/include/common/types/types.h
+++ b/src/include/common/types/types.h
@@ -16,8 +16,11 @@ namespace common {
 using sel_t = uint16_t;
 using hash_t = uint64_t;
 using page_idx_t = uint32_t;
+using frame_idx_t = page_idx_t;
 using page_offset_t = uint32_t;
 constexpr page_idx_t PAGE_IDX_MAX = UINT32_MAX;
+using page_group_idx_t = uint32_t;
+using frame_group_idx_t = page_group_idx_t;
 using list_header_t = uint32_t;
 using property_id_t = uint32_t;
 constexpr property_id_t INVALID_PROPERTY_ID = UINT32_MAX;
diff --git a/src/include/main/database.h b/src/include/main/database.h
index 0d49545def..97cc8116fe 100644
--- a/src/include/main/database.h
+++ b/src/include/main/database.h
@@ -20,15 +20,11 @@ KUZU_API struct SystemConfig {
     explicit SystemConfig();
     /**
      * @brief Creates a SystemConfig object.
-     * @param bufferPoolSize Buffer pool size in bytes.
-     * @note Currently, we have two internal buffer pools with different frame size of 4KB and
-     * 256KB. When a user sets a customized buffer pool size, it is divided into two internal pools
-     * based on the DEFAULT_PAGES_BUFFER_RATIO and LARGE_PAGES_BUFFER_RATIO.
+     * @param bufferPoolSize Max size of the buffer pool in bytes.
      */
     explicit SystemConfig(uint64_t bufferPoolSize);
 
-    uint64_t defaultPageBufferPoolSize;
-    uint64_t largePageBufferPoolSize;
+    uint64_t bufferPoolSize;
     uint64_t maxNumThreads;
 };
 
diff --git a/src/include/processor/result/factorized_table.h b/src/include/processor/result/factorized_table.h
index 8d842189d1..0af4a74e7c 100644
--- a/src/include/processor/result/factorized_table.h
+++ b/src/include/processor/result/factorized_table.h
@@ -33,21 +33,19 @@ class DataBlock {
 public:
     explicit DataBlock(storage::MemoryManager* memoryManager)
         : numTuples{0}, memoryManager{memoryManager} {
-        block = memoryManager->allocateBlock(true);
-        freeSize = block->size;
+        block = memoryManager->allocateBuffer(true /* initializeToZero */);
+        freeSize = block->allocator->getPageSize();
     }
 
     DataBlock(DataBlock&& other) = default;
 
-    ~DataBlock() { memoryManager->freeBlock(block->pageIdx); }
-
-    inline uint8_t* getData() const { return block->data; }
+    inline uint8_t* getData() const { return block->buffer; }
     inline void resetNumTuplesAndFreeSize() {
-        freeSize = common::BufferPoolConstants::LARGE_PAGE_SIZE;
+        freeSize = common::BufferPoolConstants::PAGE_256KB_SIZE;
         numTuples = 0;
     }
     inline void resetToZero() {
-        memset(block->data, 0, common::BufferPoolConstants::LARGE_PAGE_SIZE);
+        memset(block->buffer, 0, common::BufferPoolConstants::PAGE_256KB_SIZE);
     }
 
     static void copyTuples(DataBlock* blockToCopyFrom, ft_tuple_idx_t tupleIdxToCopyFrom,
@@ -60,7 +58,7 @@ class DataBlock {
     storage::MemoryManager* memoryManager;
 
 private:
-    std::unique_ptr<storage::MemoryBlock> block;
+    std::unique_ptr<storage::MemoryBuffer> block;
 };
 
 class DataBlockCollection {
diff --git a/src/include/storage/buffer_manager/bm_file_handle.h b/src/include/storage/buffer_manager/bm_file_handle.h
new file mode 100644
index 0000000000..fb6e5f752d
--- /dev/null
+++ b/src/include/storage/buffer_manager/bm_file_handle.h
@@ -0,0 +1,121 @@
+#pragma once
+
+#include "storage/buffer_manager/vm_region.h"
+#include "storage/file_handle.h"
+
+namespace kuzu {
+namespace storage {
+
+static constexpr uint64_t IS_IN_FRAME_MASK = 0x8000000000000000;
+static constexpr uint64_t DIRTY_MASK = 0x4000000000000000;
+static constexpr uint64_t PAGE_IDX_MASK = 0x3FFFFFFFFFFFFFFF;
+
+enum class LockMode : uint8_t { SPIN = 0, NON_BLOCKING = 1 };
+
+class BMFileHandle;
+class BufferManager;
+
+// Keeps the state information of a page in a file.
+class PageState {
+public:
+    inline bool isInFrame() const { return pageIdx & IS_IN_FRAME_MASK; }
+    inline void setDirty() { pageIdx |= DIRTY_MASK; }
+    inline void clearDirty() { pageIdx &= ~DIRTY_MASK; }
+    inline bool isDirty() const { return pageIdx & DIRTY_MASK; }
+    inline common::page_idx_t getPageIdx() const {
+        return (common::page_idx_t)(pageIdx & PAGE_IDX_MASK);
+    }
+    inline uint64_t incrementPinCount() { return pinCount.fetch_add(1); }
+    inline uint64_t decrementPinCount() { return pinCount.fetch_sub(1); }
+    inline void setPinCount(uint64_t newPinCount) { pinCount.store(newPinCount); }
+    inline uint64_t getPinCount() const { return pinCount.load(); }
+    inline uint64_t getEvictionTimestamp() const { return evictionTimestamp.load(); }
+    inline uint64_t incrementEvictionTimestamp() { return evictionTimestamp.fetch_add(1); }
+    inline void releaseLock() { lock.clear(); }
+
+    bool acquireLock(LockMode lockMode);
+    void setInFrame(common::page_idx_t pageIdx);
+    void resetState();
+
+private:
+    std::atomic_flag lock = ATOMIC_FLAG_INIT;
+    // Highest 1st bit indicates if this page is loaded or not, 2nd bit indicates if this
+    // page is dirty or not. The rest 62 bits records the page idx inside the file.
+    uint64_t pageIdx = 0;
+    std::atomic<uint32_t> pinCount = 0;
+    std::atomic<uint64_t> evictionTimestamp = 0;
+};
+
+// BMFileHandle is a file handle that is backed by BufferManager. It holds the state of
+// each page in the file. File Handle is the bridge between a Column/Lists/Index and the Buffer
+// Manager that abstracts the file in which that Column/Lists/Index is stored.
+// BMFileHandle supports two types of files: versioned and non-versioned. Versioned files
+// contains mapping from pages that have updates to the versioned pages in the wal file.
+// Currently, only MemoryManager and WAL files are non-versioned.
+class BMFileHandle : public FileHandle {
+public:
+    enum class FileVersionedType : uint8_t {
+        VERSIONED_FILE = 0,    // The file is backed by versioned pages in wal file.
+        NON_VERSIONED_FILE = 1 // The file does not have any versioned pages in wal file.
+    };
+
+    BMFileHandle(const std::string& path, uint8_t flags, BufferManager* bm,
+        common::PageSizeClass pageSizeClass, FileVersionedType fileVersionedType);
+
+    void createPageVersionGroupIfNecessary(common::page_idx_t pageIdx);
+
+    // This function is intended to be used after a fileInfo is created and we want the file
+    // to have not pages and page locks. Should be called after ensuring that the buffer manager
+    // does not hold any of the pages of the file.
+    void resetToZeroPagesAndPageCapacity();
+    void removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx);
+
+    bool hasWALPageVersionNoPageLock(common::page_idx_t pageIdx);
+    void clearWALPageVersionIfNecessary(common::page_idx_t pageIdx);
+    common::page_idx_t getWALPageVersionNoPageLock(common::page_idx_t pageIdx);
+    void setWALPageVersion(common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL);
+    void setWALPageVersionNoLock(common::page_idx_t pageIdx, common::page_idx_t pageVersion);
+
+    inline bool acquirePageLock(common::page_idx_t pageIdx, LockMode lockMode) {
+        return getPageState(pageIdx)->acquireLock(lockMode);
+    }
+    inline void releasePageLock(common::page_idx_t pageIdx) {
+        getPageState(pageIdx)->releaseLock();
+    }
+    inline PageState* getPageState(common::page_idx_t pageIdx) {
+        assert(pageIdx < numPages && pageStates[pageIdx]);
+        return pageStates[pageIdx].get();
+    }
+    inline void clearPageState(common::page_idx_t pageIdx) {
+        assert(pageIdx < numPages && pageStates[pageIdx]);
+        pageStates[pageIdx]->resetState();
+    }
+    inline common::frame_idx_t getFrameIdx(common::page_idx_t pageIdx) {
+        assert(pageIdx < pageCapacity);
+        return (frameGroupIdxes[pageIdx >> common::StorageConstants::PAGE_GROUP_SIZE_LOG2]
+                   << common::StorageConstants::PAGE_GROUP_SIZE_LOG2) |
+               (pageIdx & common::StorageConstants::PAGE_IDX_IN_GROUP_MASK);
+    }
+    inline common::PageSizeClass getPageSizeClass() const { return pageSizeClass; }
+
+private:
+    void initPageStatesAndGroups();
+    common::page_idx_t addNewPageWithoutLock() override;
+    void addNewPageGroupWithoutLock();
+    void removePageIdxAndTruncateIfNecessaryWithoutLock(common::page_idx_t pageIdxToRemove);
+    inline common::page_group_idx_t getNumPageGroups() {
+        return ceil((double)numPages / common::StorageConstants::PAGE_GROUP_SIZE);
+    }
+
+private:
+    FileVersionedType fileVersionedType;
+    BufferManager* bm;
+    common::PageSizeClass pageSizeClass;
+    std::vector<std::unique_ptr<PageState>> pageStates;
+    // Each file page group corresponds to a frame group in the VMRegion.
+    std::vector<common::page_group_idx_t> frameGroupIdxes;
+    std::vector<std::vector<common::page_idx_t>> pageVersions;
+    std::vector<std::unique_ptr<std::atomic_flag>> pageGroupLocks;
+};
+} // namespace storage
+} // namespace kuzu
diff --git a/src/include/storage/buffer_manager/buffer_managed_file_handle.h b/src/include/storage/buffer_manager/buffer_managed_file_handle.h
deleted file mode 100644
index 401d1069ed..0000000000
--- a/src/include/storage/buffer_manager/buffer_managed_file_handle.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#pragma once
-
-#include "storage/file_handle.h"
-
-namespace kuzu {
-namespace storage {
-
-// BufferManagedFileHandle is a file handle that is backed by BufferManager. It holds the state of
-// each in the file. File Handle is the bridge between a Column/Lists/Index and the Buffer Manager
-// that abstracts the file in which that Column/Lists/Index is stored.
-// BufferManagedFileHandle supports two types of files: versioned and non-versioned. Versioned files
-// contains mapping from pages that have updates to the versioned pages in the wal file.
-// Currently, only MemoryManager and WAL files are non-versioned.
-class BufferManagedFileHandle : public FileHandle {
-public:
-    enum class FileVersionedType : uint8_t {
-        VERSIONED_FILE = 0,    // The file is backed by versioned pages in wal file.
-        NON_VERSIONED_FILE = 1 // The file does not have any versioned pages in wal file.
-    };
-
-    BufferManagedFileHandle(
-        const std::string& path, uint8_t flags, FileVersionedType fileVersionedType);
-
-    bool acquirePageLock(common::page_idx_t pageIdx, bool block);
-    inline void releasePageLock(common::page_idx_t pageIdx) { pageLocks[pageIdx]->clear(); }
-
-    void createPageVersionGroupIfNecessary(common::page_idx_t pageIdx);
-
-    // This function is intended to be used after a fileInfo is created and we want the file
-    // to have not pages and page locks. Should be called after ensuring that the buffer manager
-    // does not hold any of the pages of the file.
-    void resetToZeroPagesAndPageCapacity();
-    void removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx);
-
-    bool hasWALPageVersionNoPageLock(common::page_idx_t pageIdx);
-    void clearWALPageVersionIfNecessary(common::page_idx_t pageIdx);
-    common::page_idx_t getWALPageVersionNoPageLock(common::page_idx_t pageIdx);
-    void setWALPageVersion(common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL);
-    void setWALPageVersionNoLock(common::page_idx_t pageIdx, common::page_idx_t pageVersion);
-
-    inline common::page_idx_t getFrameIdx(common::page_idx_t pageIdx) {
-        return pageIdxToFrameMap[pageIdx]->load();
-    }
-    inline void swizzle(common::page_idx_t pageIdx, common::page_idx_t swizzledVal) {
-        pageIdxToFrameMap[pageIdx]->store(swizzledVal);
-    }
-    inline void unswizzle(common::page_idx_t pageIdx) {
-        pageIdxToFrameMap[pageIdx]->store(UINT32_MAX);
-    }
-
-    static inline bool isAFrame(common::page_idx_t mappedFrameIdx) {
-        return UINT32_MAX != mappedFrameIdx;
-    }
-
-private:
-    void initPageIdxToFrameMapAndLocks();
-    common::page_idx_t addNewPageWithoutLock() override;
-    bool acquire(common::page_idx_t pageIdx);
-    void removePageIdxAndTruncateIfNecessaryWithoutLock(common::page_idx_t pageIdxToRemove);
-    void resizePageGroupLocksAndPageVersionsWithoutLock();
-    uint32_t getNumPageGroups() {
-        return ceil((double)numPages / common::StorageConstants::PAGE_GROUP_SIZE);
-    }
-
-private:
-    FileVersionedType fileVersionedType;
-    std::vector<std::unique_ptr<std::atomic_flag>> pageLocks;
-    std::vector<std::unique_ptr<std::atomic<common::page_idx_t>>> pageIdxToFrameMap;
-    std::vector<std::vector<common::page_idx_t>> pageVersions;
-    std::vector<std::unique_ptr<std::atomic_flag>> pageGroupLocks;
-};
-} // namespace storage
-} // namespace kuzu
diff --git a/src/include/storage/buffer_manager/buffer_manager.h b/src/include/storage/buffer_manager/buffer_manager.h
index 7ae12417b9..8b3079dfa1 100644
--- a/src/include/storage/buffer_manager/buffer_manager.h
+++ b/src/include/storage/buffer_manager/buffer_manager.h
@@ -1,11 +1,9 @@
 #pragma once
 
-#include <mutex>
 #include <vector>
 
-#include "common/metric.h"
-#include "storage/buffer_manager/buffer_managed_file_handle.h"
-#include "storage/buffer_manager/buffer_pool.h"
+#include "concurrentqueue.h"
+#include "storage/buffer_manager/bm_file_handle.h"
 
 namespace spdlog {
 class logger;
@@ -14,97 +12,155 @@ class logger;
 namespace kuzu {
 namespace storage {
 
+struct EvictionCandidate {
+    bool isEvictable() const {
+        return pageState->getEvictionTimestamp() == evictionTimestamp &&
+               pageState->getPinCount() == 0;
+    }
+
+    BMFileHandle* fileHandle;
+    PageState* pageState;
+    // The eviction timestamp of the corresponding page state at the time the candidate is enqueued.
+    uint64_t evictionTimestamp = -1u;
+};
+
+class EvictionQueue {
+public:
+    EvictionQueue() { queue = std::make_unique<moodycamel::ConcurrentQueue<EvictionCandidate>>(); }
+
+    inline void enqueue(
+        BMFileHandle* fileHandle, PageState* frameHandle, uint64_t evictionTimestamp) {
+        queue->enqueue(EvictionCandidate{fileHandle, frameHandle, evictionTimestamp});
+    }
+    inline bool dequeue(EvictionCandidate& candidate) { return queue->try_dequeue(candidate); }
+    void removeNonEvictableCandidates();
+
+private:
+    std::unique_ptr<moodycamel::ConcurrentQueue<EvictionCandidate>> queue;
+};
+
 /**
+ * The Buffer Manager (BM) is a centralized manager of database memory resources.
+ * It provides two main functionalities:
+ * 1) it provides the high-level functionality to pin() and unpin() the pages of the database files
+ * used by storage structures, such as the Column, Lists, or HashIndex in the storage layer, and
+ * operates via their BMFileHandle to read/write the page data into/out of one of the frames.
+ * 2) it supports the MemoryManager (MM) to allocate memory buffers that are not backed by
+ * any disk files. Similar to disk files, MM provides BMFileHandles backed by temp in-mem files to
+ * the BM to pin/unpin pages. Pin happens when MM tries to allocate a new memory buffer, and unpin
+ * happens when MM tries to reclaim a memory buffer.
  *
- * The Buffer Manager (BM) is the cache of database file pages. It provides the high-level
- * functionality of pin() and unpin() the pages of the database files used by the Column/Lists in
- * the storage layer, and operates via their FileHandles to make the page data available in one of
- * the frames. BM can also be used by any operator or other components of the system to acquire
- * memory blocks and ensure that they do not acquire memory directly from the OS. Depending on how
- * the user of the BM pins and unpins pages, operators can ensure either that the memory blocks they
- * acquire are safely spilled to disk and read back or always kept in memory (see below.)
+ * Specifically, in BM's context, page is the basic management unit of data in a file. The file can
+ * be a disk file, such as a column file, or an in-mem file, such as an temp in-memory file kept by
+ * the MM. Frame is the basic management unit of data resides in a VMRegion, namely in a virtual
+ * memory space. Each page is uniquely mapped to a frame, and it can be cached into or evicted from
+ * the frame. See `VMRegion` for more details.
  *
- * Currently the BM has internal BufferPools to cache pages of 2 size: DEFAULT_PAGE_SIZE and
- * LARGE_PAGE_SIZE, both of which are defined in configs.h. We only have a mechanism to control the
- * memory size of each BufferPool. So when the BM of the system is constructed, one pool of memory
- * is allocated to cache files whose pages are of size DEFAULT_PAGE_SIZE, and a separate pool of
- * memory is allocated to cache files whose pages are of size LARGE_PAGE_SIZE. Ideally we should
- * move towards allocating a single pool of memory from which different size pages are allocated.
- * The Umbra paper (http://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf) describes an
- * mmap-based mechanism to do this (where the responsibility to handle memory fragmentation is
- * delegated to the OS).
+ * When users unpin their pages, the BM might spill them to disk. The behavior of what is guaranteed
+ * to be kept in frame and what can be spilled to disk is directly determined by the pin/unpin
+ * calls of the users.
  *
- * The BM uses CLOCK replacement policy to evict pages from frames, which is an approximate LRU
- * policy that is based of FIFO-like operations.
+ * Also, BM provides some specialized functionalities:
+ * 1) it supports the caller to set pinned pages as dirty, which will be safely written back to disk
+ * when the pages are evicted;
+ * 2) it supports the caller to flush or remove pages from the BM;
+ * 3) it supports the caller to directly update the content of a frame.
  *
- * All access to the BM is through a FileHandle. To use the BM to acquire in-memory blocks users can
- * pin pages, which will then lead the BM to put these pages in memory, and then never unpin them
- * and the BM will never spill those pages to disk. However *make sure to unpin these pages*
- * eventually, otherwise this would be a form of internal memory leak. See InMemOverflowBuffer for
- * an example, where this is done during the deconstruction of the InMemOverflowBuffer.
+ * All accesses to the BM are through a FileHandle. This design is to de-centralize the management
+ * of page states from the BM to each file handle itself. Thus each on-disk file should have a
+ * unique BMFileHandle, and MM also holds a unique BMFileHandle, which is backed by an temp in-mem
+ * file, for all memory buffer allocations
  *
- * Users can also unpin their pages and then the BM might spill them to disk. The behavior of what
- * is guaranteed to be kept in memory and what can be spilled to disk is directly determined by the
- * pin/unpin calls of the users of BM.
+ * To start a Database, users need to specify the max size of the memory usage (`maxSize`) in BM.
+ * If users don't specify the value, the system will set maxSize to available physical mem *
+ * DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM (defined in constants.h).
+ * The BM relies on virtual memory regions mapped through `mmap` to anonymous address spaces.
+ * 1) For disk pages, BM allocates a virtual memory region of DEFAULT_VM_REGION_MAX_SIZE (defined in
+ * constants.h), which is usually much larger than `maxSize`, and is expected to be large enough to
+ * contain all disk pages. Each disk page in database files is directly mapped to a unique
+ * PAGE_4KB_SIZE frame in the region.
+ * 2) For each BMFileHandle backed by a temp in-mem file in MM, BM allocates a virtual memory region
+ * of `maxSize` for it. Each memory buffer is mapped to a unique PAGE_256KB_SIZE frame in that
+ * region. Both disk pages and memory buffers are all managed by the BM to make sure that actually
+ * used physical memory doesn't go beyond max size specified by users. Currently, the BM uses a
+ * queue based replacement policy and the MADV_DONTNEED hint to explicitly control evictions. See
+ * comments above `claimAFrame()` for more details.
  *
- * BufferManager supports a special pin function called pinWithoutReadingFromFile. A caller can
- * call the common::page_idx_t newPageIdx = fh::addNewPage() function on the FileHandle fh they
- * have, and then call bm::pinWithoutReadingFromFile(fh, newPageIdx), and the BM will not try to
- * read this page from the file (because the page has not yet been written).
+ * The design is inspired by vmcache in the paper "Virtual-Memory Assisted Buffer Management"
+ * (https://www.cs.cit.tum.de/fileadmin/w00cfj/dis/_my_direct_uploads/vmcache.pdf).
+ * We would also like to thank Fadhil Abubaker for doing the initial research and prototyping of
+ * Umbra's design in his CS 848 course project:
+ * https://github.com/fabubaker/kuzu/blob/umbra-bm/final_project_report.pdf.
  */
-class BufferManager {
 
+class BufferManager {
 public:
-    BufferManager(uint64_t maxSizeForDefaultPagePool, uint64_t maxSizeForLargePagePool);
+    enum class PageReadPolicy : uint8_t { READ_PAGE = 0, DONT_READ_PAGE = 1 };
+
+    explicit BufferManager(uint64_t bufferPoolSize);
     ~BufferManager();
 
-    uint8_t* pin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
+    uint8_t* pin(BMFileHandle& fileHandle, common::page_idx_t pageIdx,
+        PageReadPolicy pageReadPolicy = PageReadPolicy::READ_PAGE);
+    uint8_t* pinWithoutAcquiringPageLock(
+        BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy);
 
-    // The caller should ensure that the given pageIdx is indeed a new page, so should not be read
-    // from disk
-    uint8_t* pinWithoutReadingFromFile(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
+    void setPinnedPageDirty(BMFileHandle& fileHandle, common::page_idx_t pageIdx);
 
-    inline uint8_t* pinWithoutAcquiringPageLock(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile) {
-        return fileHandle.isLargePaged() ? bufferPoolLargePages->pinWithoutAcquiringPageLock(
-                                               fileHandle, pageIdx, doNotReadFromFile) :
-                                           bufferPoolDefaultPages->pinWithoutAcquiringPageLock(
-                                               fileHandle, pageIdx, doNotReadFromFile);
-    }
+    // The function assumes that the requested page is already pinned.
+    void unpin(BMFileHandle& fileHandle, common::page_idx_t pageIdx);
+    void unpinWithoutAcquiringPageLock(BMFileHandle& fileHandle, common::page_idx_t pageIdx);
 
-    void setPinnedPageDirty(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
+    void removeFilePagesFromFrames(BMFileHandle& fileHandle);
+    void flushAllDirtyPagesInFrames(BMFileHandle& fileHandle);
+    void updateFrameIfPageIsInFrameWithoutLock(
+        BMFileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx);
+    void removePageFromFrameIfNecessary(BMFileHandle& fileHandle, common::page_idx_t pageIdx);
 
-    // The function assumes that the requested page is already pinned.
-    void unpin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
-    inline void unpinWithoutAcquiringPageLock(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx) {
-        return fileHandle.isLargePaged() ?
-                   bufferPoolLargePages->unpinWithoutAcquiringPageLock(fileHandle, pageIdx) :
-                   bufferPoolDefaultPages->unpinWithoutAcquiringPageLock(fileHandle, pageIdx);
+    // For files that are managed by BM, their FileHandles should be created through this function.
+    inline std::unique_ptr<BMFileHandle> getBMFileHandle(const std::string& filePath, uint8_t flags,
+        BMFileHandle::FileVersionedType fileVersionedType,
+        common::PageSizeClass pageSizeClass = common::PAGE_4KB) {
+        return std::make_unique<BMFileHandle>(
+            filePath, flags, this, pageSizeClass, fileVersionedType);
+    }
+    inline common::frame_group_idx_t addNewFrameGroup(common::PageSizeClass pageSizeClass) {
+        return vmRegions[pageSizeClass]->addNewFrameGroup();
     }
 
-    void removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle);
+private:
+    bool claimAFrame(
+        BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy);
+    // Return number of bytes freed.
+    uint64_t tryEvictPage(EvictionCandidate& candidate);
 
-    void flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle);
-    void updateFrameIfPageIsInFrameWithoutPageOrFrameLock(
-        BufferManagedFileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx);
+    void cachePageIntoFrame(
+        BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy);
+    void flushIfDirtyWithoutLock(BMFileHandle& fileHandle, common::page_idx_t pageIdx);
+    void removePageFromFrame(
+        BMFileHandle& fileHandle, common::page_idx_t pageIdx, bool shouldFlush);
 
-    void removePageFromFrameIfNecessary(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
+    void addToEvictionQueue(BMFileHandle* fileHandle, PageState* pageState);
 
-    // Note: This function is not thread-safe.
-    // For files that are managed by BM, their FileHandles should be created through this function.
-    inline std::unique_ptr<BufferManagedFileHandle> getBufferManagedFileHandle(
-        const std::string& filePath, uint8_t flags,
-        BufferManagedFileHandle::FileVersionedType fileVersionedType) {
-        return std::make_unique<BufferManagedFileHandle>(filePath, flags, fileVersionedType);
+    inline uint64_t reserveUsedMemory(uint64_t size) { return usedMemory.fetch_add(size); }
+    inline uint64_t freeUsedMemory(uint64_t size) { return usedMemory.fetch_sub(size); }
+
+    inline uint8_t* getFrame(BMFileHandle& fileHandle, common::page_idx_t pageIdx) {
+        return vmRegions[fileHandle.getPageSizeClass()]->getFrame(fileHandle.getFrameIdx(pageIdx));
+    }
+    inline void releaseFrameForPage(BMFileHandle& fileHandle, common::page_idx_t pageIdx) {
+        vmRegions[fileHandle.getPageSizeClass()]->releaseFrame(fileHandle.getFrameIdx(pageIdx));
     }
 
 private:
     std::shared_ptr<spdlog::logger> logger;
-    std::unique_ptr<BufferPool> bufferPoolDefaultPages;
-    std::unique_ptr<BufferPool> bufferPoolLargePages;
+    std::atomic<uint64_t> usedMemory;
+    std::atomic<uint64_t> bufferPoolSize;
+    std::atomic<uint64_t> numEvictionQueueInsertions;
+    // Each VMRegion corresponds to a virtual memory region of a specific page size. Currently, we
+    // hold two sizes of PAGE_4KB and PAGE_256KB.
+    std::vector<std::unique_ptr<VMRegion>> vmRegions;
+    std::unique_ptr<EvictionQueue> evictionQueue;
 };
 
 } // namespace storage
diff --git a/src/include/storage/buffer_manager/buffer_pool.h b/src/include/storage/buffer_manager/buffer_pool.h
deleted file mode 100644
index de03738afe..0000000000
--- a/src/include/storage/buffer_manager/buffer_pool.h
+++ /dev/null
@@ -1,141 +0,0 @@
-#pragma once
-
-#include <mutex>
-#include <vector>
-
-#include "common/metric.h"
-#include "storage/buffer_manager/buffer_managed_file_handle.h"
-
-namespace spdlog {
-class logger;
-}
-
-namespace kuzu {
-namespace storage {
-
-struct BufferManagerMetrics {
-    uint64_t numPins{0};
-    // Number of pinning operations that required eviction from a Frame.
-    uint64_t numEvicts{0};
-    // Number of failed tries to evict the page from a Frame. This is incremented if either the
-    // eviction routine fails to get the lock on the page that is in the Frame or the pinCount of
-    // the Frame has increased after taking the locks of Frame and page.
-    uint64_t numEvictFails{0};
-    // Number of failed tried to evict the page frame a Frame because the Frame has been recently
-    // accessed and hence is given a second chance.
-    uint64_t numRecentlyAccessedWalkover{0};
-    uint64_t numCacheHit{0};
-    uint64_t numCacheMiss{0};
-    uint64_t numDirtyPageWriteIO{0};
-};
-
-// A frame is a unit of buffer space having a fixed size of 4KB, where a single file page is
-// read from the disk. Frame also stores other metadata to locate and maintain this buffer in the
-// Buffer Manager.
-class Frame {
-    friend class BufferPool;
-
-public:
-    explicit Frame(common::page_offset_t pageSize, uint8_t* buffer);
-    ~Frame() noexcept(false);
-
-private:
-    void resetFrameWithoutLock();
-    bool acquireFrameLock(bool block);
-    void releaseFrameLock() { frameLock.clear(); }
-    void setIsDirty(bool _isDirty) { isDirty = _isDirty; }
-    void releaseBuffer();
-
-private:
-    // fileHandlePtr and pageIdx identify the file and the page in file whose data the buffer is
-    // maintaining. pageIdx of -1u means that the frame is empty, i.e. it has no data.
-    std::atomic<uint64_t> fileHandlePtr;
-    std::atomic<common::page_idx_t> pageIdx;
-    std::atomic<uint32_t> pinCount;
-
-    bool recentlyAccessed;
-    bool isDirty;
-    uint8_t* buffer;
-    common::page_offset_t pageSize;
-    std::atomic_flag frameLock;
-};
-
-// The BufferPool is a cache of file pages of a fixed size. It provides the high-level functionality
-// of pin() and unpin() pages of files in memory and operates via their FileHandles
-// to make the page data available in one of the frames. It uses CLOCK replacement policy to evict
-// pages from frames, which is an approximate LRU policy that is based of FIFO-like operations.
-class BufferPool {
-    friend class BufferManager;
-
-public:
-    BufferPool(uint64_t pageSize, uint64_t maxSize);
-
-    uint8_t* pin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
-
-    // Pins a new page that has been added to the file. This means that the BufferManager does not
-    // need to read the page from the file for now. Ensuring that the given pageIdx is new is the
-    // responsibility of the caller.
-    uint8_t* pinWithoutReadingFromFile(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
-
-    uint8_t* pinWithoutAcquiringPageLock(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);
-
-    void setPinnedPageDirty(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
-
-    // The function assumes that the requested page is already pinned.
-    void unpin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
-
-    void unpinWithoutAcquiringPageLock(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
-
-    // Note: These two functions that remove pages from frames is not designed for concurrency and
-    // therefore not tested under concurrency. If this is called while other threads are accessing
-    // the BM, it should work safely but this is not tested.
-    void removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle);
-
-    void flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle);
-    void updateFrameIfPageIsInFrameWithoutPageOrFrameLock(
-        BufferManagedFileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx);
-
-    void removePageFromFrameWithoutFlushingIfNecessary(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
-
-private:
-    uint8_t* pin(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);
-
-    common::page_idx_t claimAFrame(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);
-
-    bool fillEmptyFrame(common::page_idx_t frameIdx, BufferManagedFileHandle& fileHandle,
-        common::page_idx_t pageIdx, bool doNotReadFromFile);
-
-    bool tryEvict(common::page_idx_t frameIdx, BufferManagedFileHandle& fileHandle,
-        common::page_idx_t pageIdx, bool doNotReadFromFile);
-
-    void moveClockHand(uint64_t newClockHand);
-    // Performs 2 actions:
-    // 1) Clears the contents of the frame.
-    // 2) Unswizzles the pageIdx in the frame.
-    void clearFrameAndUnswizzleWithoutLock(const std::unique_ptr<Frame>& frame,
-        BufferManagedFileHandle& fileHandleInFrame, common::page_idx_t pageIdxInFrame);
-    void readNewPageIntoFrame(Frame& frame, BufferManagedFileHandle& fileHandle,
-        common::page_idx_t pageIdx, bool doNotReadFromFile);
-
-    void flushIfDirty(const std::unique_ptr<Frame>& frame);
-
-    void removePageFromFrame(
-        BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool shouldFlush);
-
-private:
-    std::shared_ptr<spdlog::logger> logger;
-    uint64_t pageSize;
-    std::vector<std::unique_ptr<Frame>> bufferCache;
-    std::atomic<uint64_t> clockHand;
-    common::page_idx_t numFrames;
-    BufferManagerMetrics bmMetrics;
-};
-
-} // namespace storage
-} // namespace kuzu
diff --git a/src/include/storage/buffer_manager/memory_manager.h b/src/include/storage/buffer_manager/memory_manager.h
index 3d6208e650..bb10a960c9 100644
--- a/src/include/storage/buffer_manager/memory_manager.h
+++ b/src/include/storage/buffer_manager/memory_manager.h
@@ -11,42 +11,69 @@
 namespace kuzu {
 namespace storage {
 
-struct MemoryBlock {
+class MemoryAllocator;
 
+class MemoryBuffer {
 public:
-    explicit MemoryBlock(common::page_idx_t pageIdx, uint8_t* data)
-        : size(common::BufferPoolConstants::LARGE_PAGE_SIZE), pageIdx(pageIdx), data(data) {}
+    MemoryBuffer(MemoryAllocator* allocator, common::page_idx_t blockIdx, uint8_t* buffer);
+    ~MemoryBuffer();
 
 public:
-    uint64_t size;
+    uint8_t* buffer;
     common::page_idx_t pageIdx;
-    uint8_t* data;
+    MemoryAllocator* allocator;
 };
 
-// Memory manager for allocating/reclaiming large intermediate memory blocks. It can allocate a
-// memory block with fixed size of LARGE_PAGE_SIZE from the buffer manager.
-class MemoryManager {
+class MemoryAllocator {
+    friend class MemoryBuffer;
+
 public:
-    explicit MemoryManager(BufferManager* bm) : bm(bm) {
-        // Because the memory manager only manages blocks in memory, this file should never be
-        // created, opened, or written to. It's a place_holder name. We keep the name for logging
-        // purposes.
-        fh = bm->getBufferManagedFileHandle("mm-place-holder-file-name",
-            FileHandle::O_IN_MEM_TEMP_FILE,
-            BufferManagedFileHandle::FileVersionedType::NON_VERSIONED_FILE);
-    }
+    explicit MemoryAllocator(BufferManager* bm);
+    ~MemoryAllocator();
 
-    std::unique_ptr<MemoryBlock> allocateBlock(bool initializeToZero = false);
+    std::unique_ptr<MemoryBuffer> allocateBuffer(bool initializeToZero = false);
+    inline common::page_offset_t getPageSize() const { return pageSize; }
 
+private:
     void freeBlock(common::page_idx_t pageIdx);
 
+private:
+    std::unique_ptr<BMFileHandle> fh;
+    BufferManager* bm;
+    common::page_offset_t pageSize;
+    std::stack<common::page_idx_t> freePages;
+    std::mutex allocatorLock;
+};
+
+/*
+ * The Memory Manager (MM) is used for allocating/reclaiming intermediate memory blocks.
+ * It can allocate a memory buffer of size PAGE_256KB from the buffer manager backed by a
+ * BMFileHandle with temp in-mem file.
+ *
+ * Internally, MM uses a MemoryAllocator. The MemoryAllocator is holding the BMFileHandle backed by
+ * a temp in-mem file, and responsible for allocating/reclaiming memory buffers of its size class
+ * from the buffer manager. The MemoryAllocator keeps track of free pages in the BMFileHandle, so
+ * that it can reuse those freed pages without allocating new pages. The MemoryAllocator is
+ * thread-safe, so that multiple threads can allocate/reclaim memory blocks with the same size class
+ * at the same time.
+ *
+ * MM will return a MemoryBuffer to the caller, which is a wrapper of the allocated memory block,
+ * and it will automatically call its allocator to reclaim the memory block when it is destroyed.
+ */
+class MemoryManager {
+public:
+    explicit MemoryManager(BufferManager* bm) : bm{bm} {
+        allocator = std::make_unique<MemoryAllocator>(bm);
+    }
+
+    inline std::unique_ptr<MemoryBuffer> allocateBuffer(bool initializeToZero = false) {
+        return allocator->allocateBuffer(initializeToZero);
+    }
     inline BufferManager* getBufferManager() const { return bm; }
 
 private:
-    std::unique_ptr<BufferManagedFileHandle> fh;
     BufferManager* bm;
-    std::stack<common::page_idx_t> freePages;
-    std::mutex memMgrLock;
+    std::unique_ptr<MemoryAllocator> allocator;
 };
 } // namespace storage
 } // namespace kuzu
diff --git a/src/include/storage/buffer_manager/vm_region.h b/src/include/storage/buffer_manager/vm_region.h
new file mode 100644
index 0000000000..9a48db392f
--- /dev/null
+++ b/src/include/storage/buffer_manager/vm_region.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <mutex>
+
+#include "common/constants.h"
+#include "common/types/types.h"
+
+namespace kuzu {
+namespace storage {
+
+// A VMRegion holds a virtual memory region of a certain size allocated through mmap.
+// The region is divided into frame groups, each of which is a group of frames of the same size.
+// Each BMFileHandle should grab a frame group each time when they add a new file page group (see
+// `BMFileHandle::addNewPageGroupWithoutLock`). In this way, each file page group uniquely
+// corresponds to a frame group, thus, a page also uniquely corresponds to a frame in a VMRegion.
+class VMRegion {
+    friend class BufferManager;
+
+public:
+    explicit VMRegion(common::PageSizeClass pageSizeClass, uint64_t maxRegionSize);
+    ~VMRegion();
+
+    common::frame_group_idx_t addNewFrameGroup();
+
+    // Use `MADV_DONTNEED` to release physical memory associated with this frame.
+    void releaseFrame(common::frame_idx_t frameIdx);
+
+    inline uint8_t* getFrame(common::frame_idx_t frameIdx) {
+        return region + ((std::uint64_t)frameIdx * frameSize);
+    }
+
+private:
+    inline uint64_t getMaxRegionSize() const {
+        return maxNumFrameGroups * frameSize * common::StorageConstants::PAGE_GROUP_SIZE;
+    }
+
+private:
+    std::mutex mtx;
+    uint8_t* region;
+    uint32_t frameSize;
+    uint64_t numFrameGroups;
+    uint64_t maxNumFrameGroups;
+};
+
+} // namespace storage
+} // namespace kuzu
diff --git a/src/include/storage/file_handle.h b/src/include/storage/file_handle.h
index cd63b8a757..35d4981803 100644
--- a/src/include/storage/file_handle.h
+++ b/src/include/storage/file_handle.h
@@ -48,8 +48,8 @@ class FileHandle {
     inline common::page_idx_t getNumPages() const { return numPages; }
     inline common::FileInfo* getFileInfo() const { return fileInfo.get(); }
     inline uint64_t getPageSize() const {
-        return isLargePaged() ? common::BufferPoolConstants::LARGE_PAGE_SIZE :
-                                common::BufferPoolConstants::DEFAULT_PAGE_SIZE;
+        return isLargePaged() ? common::BufferPoolConstants::PAGE_256KB_SIZE :
+                                common::BufferPoolConstants::PAGE_4KB_SIZE;
     }
 
 protected:
diff --git a/src/include/storage/index/hash_index.h b/src/include/storage/index/hash_index.h
index 0658c301cb..d5347c1c5f 100644
--- a/src/include/storage/index/hash_index.h
+++ b/src/include/storage/index/hash_index.h
@@ -93,7 +93,7 @@ class HashIndex : public BaseHashIndex {
     void prepareCommitOrRollbackIfNecessary(bool isCommit);
     void checkpointInMemoryIfNecessary();
     void rollbackInMemoryIfNecessary() const;
-    inline BufferManagedFileHandle* getFileHandle() const { return fileHandle.get(); }
+    inline BMFileHandle* getFileHandle() const { return fileHandle.get(); }
 
 private:
     template<ChainedSlotsAction action>
@@ -134,7 +134,7 @@ class HashIndex : public BaseHashIndex {
     StorageStructureIDAndFName storageStructureIDAndFName;
     BufferManager& bm;
     WAL* wal;
-    std::unique_ptr<BufferManagedFileHandle> fileHandle;
+    std::unique_ptr<BMFileHandle> fileHandle;
     std::unique_ptr<BaseDiskArray<HashIndexHeader>> headerArray;
     std::unique_ptr<BaseDiskArray<Slot<T>>> pSlots;
     std::unique_ptr<BaseDiskArray<Slot<T>>> oSlots;
@@ -196,7 +196,7 @@ class PrimaryKeyIndex {
                    hashIndexForInt64->prepareCommitOrRollbackIfNecessary(isCommit) :
                    hashIndexForString->prepareCommitOrRollbackIfNecessary(isCommit);
     }
-    inline BufferManagedFileHandle* getFileHandle() {
+    inline BMFileHandle* getFileHandle() {
         return keyDataTypeID == common::INT64 ? hashIndexForInt64->getFileHandle() :
                                                 hashIndexForString->getFileHandle();
     }
diff --git a/src/include/storage/storage_structure/column.h b/src/include/storage/storage_structure/column.h
index 2187297aa1..a3b9d16ae7 100644
--- a/src/include/storage/storage_structure/column.h
+++ b/src/include/storage/storage_structure/column.h
@@ -89,9 +89,7 @@ class PropertyColumnWithOverflow : public Column {
     }
     inline DiskOverflowFile* getDiskOverflowFile() { return &diskOverflowFile; }
 
-    inline BufferManagedFileHandle* getDiskOverflowFileHandle() {
-        return diskOverflowFile.getFileHandle();
-    }
+    inline BMFileHandle* getDiskOverflowFileHandle() { return diskOverflowFile.getFileHandle(); }
 
 protected:
     DiskOverflowFile diskOverflowFile;
diff --git a/src/include/storage/storage_structure/disk_array.h b/src/include/storage/storage_structure/disk_array.h
index 98be02be8b..935a9e614c 100644
--- a/src/include/storage/storage_structure/disk_array.h
+++ b/src/include/storage/storage_structure/disk_array.h
@@ -2,7 +2,7 @@
 
 #include "common/constants.h"
 #include "common/types/types.h"
-#include "storage/buffer_manager/buffer_managed_file_handle.h"
+#include "storage/buffer_manager/bm_file_handle.h"
 #include "storage/wal/wal.h"
 #include "storage_structure_utils.h"
 #include "transaction/transaction.h"
@@ -13,7 +13,7 @@ namespace storage {
 class FileHandle;
 
 static constexpr uint64_t NUM_PAGE_IDXS_PER_PIP =
-    (common::BufferPoolConstants::DEFAULT_PAGE_SIZE - sizeof(common::page_idx_t)) /
+    (common::BufferPoolConstants::PAGE_4KB_SIZE - sizeof(common::page_idx_t)) /
     sizeof(common::page_idx_t);
 
 /**
@@ -199,10 +199,10 @@ class BaseInMemDiskArray : public BaseDiskArray<U> {
 protected:
     inline uint64_t addInMemoryArrayPage(bool setToZero) {
         inMemArrayPages.emplace_back(
-            std::make_unique<uint8_t[]>(common::BufferPoolConstants::DEFAULT_PAGE_SIZE));
+            std::make_unique<uint8_t[]>(common::BufferPoolConstants::PAGE_4KB_SIZE));
         if (setToZero) {
             memset(inMemArrayPages[inMemArrayPages.size() - 1].get(), 0,
-                common::BufferPoolConstants::DEFAULT_PAGE_SIZE);
+                common::BufferPoolConstants::PAGE_4KB_SIZE);
         }
         return inMemArrayPages.size() - 1;
     }
diff --git a/src/include/storage/storage_structure/disk_overflow_file.h b/src/include/storage/storage_structure/disk_overflow_file.h
index 0b858b2aa0..07aed6d498 100644
--- a/src/include/storage/storage_structure/disk_overflow_file.h
+++ b/src/include/storage/storage_structure/disk_overflow_file.h
@@ -24,7 +24,7 @@ class DiskOverflowFile : public StorageStructure {
               bufferManager, wal),
           loggedNewOverflowFileNextBytePosRecord{false} {
         nextBytePosToWriteTo =
-            fileHandle->getNumPages() * common::BufferPoolConstants::DEFAULT_PAGE_SIZE;
+            fileHandle->getNumPages() * common::BufferPoolConstants::PAGE_4KB_SIZE;
     }
 
     static inline StorageStructureIDAndFName constructOverflowStorageStructureIDAndFName(
@@ -73,7 +73,7 @@ class DiskOverflowFile : public StorageStructure {
 
 private:
     struct OverflowPageCache {
-        BufferManagedFileHandle* bufferManagedFileHandle = nullptr;
+        BMFileHandle* bmFileHandle = nullptr;
         common::page_idx_t pageIdx = UINT32_MAX;
         uint8_t* frame = nullptr;
     };
@@ -89,8 +89,8 @@ class DiskOverflowFile : public StorageStructure {
     void setListRecursiveIfNestedWithoutLock(const common::ku_list_t& inMemSrcList,
         common::ku_list_t& diskDstList, const common::DataType& dataType);
     void logNewOverflowFileNextBytePosRecordIfNecessaryWithoutLock();
-    void pinOverflowPageCache(BufferManagedFileHandle* bufferManagedFileHandleToPin,
-        common::page_idx_t pageIdxToPin, OverflowPageCache& overflowPageCache);
+    void pinOverflowPageCache(BMFileHandle* bmFileHandleToPin, common::page_idx_t pageIdxToPin,
+        OverflowPageCache& overflowPageCache);
     void unpinOverflowPageCache(OverflowPageCache& overflowPageCache);
 
 private:
diff --git a/src/include/storage/storage_structure/lists/list_headers.h b/src/include/storage/storage_structure/lists/list_headers.h
index adce582ac3..45e66e4833 100644
--- a/src/include/storage/storage_structure/lists/list_headers.h
+++ b/src/include/storage/storage_structure/lists/list_headers.h
@@ -135,7 +135,7 @@ class ListHeaders : public BaseListHeaders {
     std::unique_ptr<InMemDiskArray<common::list_header_t>> headersDiskArray;
 
 private:
-    std::unique_ptr<BufferManagedFileHandle> fileHandle;
+    std::unique_ptr<BMFileHandle> fileHandle;
     StorageStructureIDAndFName storageStructureIDAndFName;
 };
 } // namespace storage
diff --git a/src/include/storage/storage_structure/lists/lists_metadata.h b/src/include/storage/storage_structure/lists/lists_metadata.h
index aecedc7b43..5a1f146deb 100644
--- a/src/include/storage/storage_structure/lists/lists_metadata.h
+++ b/src/include/storage/storage_structure/lists/lists_metadata.h
@@ -69,7 +69,7 @@ class ListsMetadata : public BaseListsMetadata {
     }
 
 private:
-    std::unique_ptr<BufferManagedFileHandle> metadataVersionedFileHandle;
+    std::unique_ptr<BMFileHandle> metadataVersionedFileHandle;
     StorageStructureIDAndFName storageStructureIDAndFName;
     // chunkToPageListHeadIdxMapBuilder holds pointers to the head of pageList of each chunk.
     // For instance, chunkToPageListHeadIdxMapBuilder[3] is a pointer in `pageLists` from where
diff --git a/src/include/storage/storage_structure/lists/lists_update_iterator.h b/src/include/storage/storage_structure/lists/lists_update_iterator.h
index cc77d805ca..704930a6d5 100644
--- a/src/include/storage/storage_structure/lists/lists_update_iterator.h
+++ b/src/include/storage/storage_structure/lists/lists_update_iterator.h
@@ -112,7 +112,7 @@ class AdjListsUpdateIterator : public ListsUpdateIterator {
         common::list_header_t oldHeader, uint64_t numElementsAfterInsertion) override {
         return ListHeaders::isALargeList(oldHeader) ||
                numElementsAfterInsertion * lists->elementSize >
-                   common::BufferPoolConstants::DEFAULT_PAGE_SIZE;
+                   common::BufferPoolConstants::PAGE_4KB_SIZE;
     }
 };
 
diff --git a/src/include/storage/storage_structure/storage_structure.h b/src/include/storage/storage_structure/storage_structure.h
index a601565a31..0b5e3e82d9 100644
--- a/src/include/storage/storage_structure/storage_structure.h
+++ b/src/include/storage/storage_structure/storage_structure.h
@@ -26,14 +26,14 @@ class StorageStructure {
         : logger{common::LoggerUtils::getLogger(common::LoggerConstants::LoggerEnum::STORAGE)},
           storageStructureID{storageStructureIDAndFName.storageStructureID},
           bufferManager{bufferManager}, wal{wal} {
-        fileHandle = bufferManager.getBufferManagedFileHandle(storageStructureIDAndFName.fName,
+        fileHandle = bufferManager.getBMFileHandle(storageStructureIDAndFName.fName,
             FileHandle::O_PERSISTENT_FILE_NO_CREATE,
-            BufferManagedFileHandle::FileVersionedType::VERSIONED_FILE);
+            BMFileHandle::FileVersionedType::VERSIONED_FILE);
     }
 
     virtual ~StorageStructure() = default;
 
-    inline BufferManagedFileHandle* getFileHandle() { return fileHandle.get(); }
+    inline BMFileHandle* getFileHandle() { return fileHandle.get(); }
 
 protected:
     void addNewPageToFileHandle();
@@ -50,7 +50,7 @@ class StorageStructure {
 protected:
     std::shared_ptr<spdlog::logger> logger;
     StorageStructureID storageStructureID;
-    std::unique_ptr<BufferManagedFileHandle> fileHandle;
+    std::unique_ptr<BMFileHandle> fileHandle;
     BufferManager& bufferManager;
     WAL* wal;
 };
diff --git a/src/include/storage/storage_structure/storage_structure_utils.h b/src/include/storage/storage_structure/storage_structure_utils.h
index f82f82cc05..c840e335c0 100644
--- a/src/include/storage/storage_structure/storage_structure_utils.h
+++ b/src/include/storage/storage_structure/storage_structure_utils.h
@@ -6,7 +6,7 @@
 #include <unordered_map>
 
 #include "common/types/types.h"
-#include "storage/buffer_manager/buffer_managed_file_handle.h"
+#include "storage/buffer_manager/bm_file_handle.h"
 #include "storage/buffer_manager/buffer_manager.h"
 #include "storage/wal/wal.h"
 #include "transaction/transaction.h"
@@ -41,36 +41,33 @@ class StorageStructureUtils {
     constexpr static uint32_t NULL_CHUNK_OR_LARGE_LIST_HEAD_IDX = UINT32_MAX;
 
 public:
-    static std::pair<BufferManagedFileHandle*, common::page_idx_t>
-    getFileHandleAndPhysicalPageIdxToPin(BufferManagedFileHandle& fileHandle,
-        common::page_idx_t physicalPageIdx, WAL& wal, transaction::TransactionType trxType);
+    static std::pair<BMFileHandle*, common::page_idx_t> getFileHandleAndPhysicalPageIdxToPin(
+        BMFileHandle& fileHandle, common::page_idx_t physicalPageIdx, WAL& wal,
+        transaction::TransactionType trxType);
 
     static WALPageIdxAndFrame createWALVersionIfNecessaryAndPinPage(
-        common::page_idx_t originalPageIdx, bool insertingNewPage,
-        BufferManagedFileHandle& fileHandle, StorageStructureID storageStructureID,
-        BufferManager& bufferManager, WAL& wal);
+        common::page_idx_t originalPageIdx, bool insertingNewPage, BMFileHandle& fileHandle,
+        StorageStructureID storageStructureID, BufferManager& bufferManager, WAL& wal);
 
-    static void readWALVersionOfPage(BufferManagedFileHandle& fileHandle,
-        common::page_idx_t originalPageIdx, BufferManager& bufferManager, WAL& wal,
-        const std::function<void(uint8_t*)>& readOp);
+    static void readWALVersionOfPage(BMFileHandle& fileHandle, common::page_idx_t originalPageIdx,
+        BufferManager& bufferManager, WAL& wal, const std::function<void(uint8_t*)>& readOp);
 
     // Note: This function updates a page "transactionally", i.e., creates the WAL version of the
     // page if it doesn't exist. For the original page to be updated, the current WRITE trx needs to
     // commit and checkpoint.
-    static void updatePage(BufferManagedFileHandle& fileHandle,
-        StorageStructureID storageStructureID, common::page_idx_t originalPageIdx,
-        bool isInsertingNewPage, BufferManager& bufferManager, WAL& wal,
-        const std::function<void(uint8_t*)>& updateOp);
+    static void updatePage(BMFileHandle& fileHandle, StorageStructureID storageStructureID,
+        common::page_idx_t originalPageIdx, bool isInsertingNewPage, BufferManager& bufferManager,
+        WAL& wal, const std::function<void(uint8_t*)>& updateOp);
 
     // Unpins the WAL version of a page that was updated and releases the lock of the page (recall
     // we use the same lock to do operations on both the original and WAL versions of the page).
     static void unpinWALPageAndReleaseOriginalPageLock(WALPageIdxAndFrame& walPageIdxAndFrame,
-        BufferManagedFileHandle& fileHandle, BufferManager& bufferManager, WAL& wal);
+        BMFileHandle& fileHandle, BufferManager& bufferManager, WAL& wal);
 
 private:
     static void unpinPageIdxInWALAndReleaseOriginalPageLock(common::page_idx_t pageIdxInWAL,
-        common::page_idx_t originalPageIdx, BufferManagedFileHandle& fileHandle,
-        BufferManager& bufferManager, WAL& wal);
+        common::page_idx_t originalPageIdx, BMFileHandle& fileHandle, BufferManager& bufferManager,
+        WAL& wal);
 };
 } // namespace storage
 } // namespace kuzu
diff --git a/src/include/storage/wal/wal.h b/src/include/storage/wal/wal.h
index 9b01f2bcee..313e870ef0 100644
--- a/src/include/storage/wal/wal.h
+++ b/src/include/storage/wal/wal.h
@@ -14,7 +14,7 @@ namespace kuzu {
 namespace storage {
 
 using lock_t = std::unique_lock<std::mutex>;
-constexpr uint64_t WAL_HEADER_PAGE_SIZE = common::BufferPoolConstants::DEFAULT_PAGE_SIZE;
+constexpr uint64_t WAL_HEADER_PAGE_SIZE = common::BufferPoolConstants::PAGE_4KB_SIZE;
 constexpr uint64_t WAL_HEADER_PAGE_NUM_RECORDS_FIELD_SIZE = sizeof(uint64_t);
 constexpr uint64_t WAL_HEADER_PAGE_NEXT_HEADER_PAGE_IDX_FIELD_SIZE = sizeof(common::page_idx_t);
 constexpr uint64_t WAL_HEADER_PAGE_PREFIX_FIELD_SIZES =
@@ -27,7 +27,7 @@ class BaseWALAndWALIterator {
 protected:
     BaseWALAndWALIterator() : BaseWALAndWALIterator(nullptr) {}
 
-    explicit BaseWALAndWALIterator(std::shared_ptr<BufferManagedFileHandle> fileHandle)
+    explicit BaseWALAndWALIterator(std::shared_ptr<BMFileHandle> fileHandle)
         : fileHandle{std::move(fileHandle)}, offsetInCurrentHeaderPage{INT64_MAX},
           currentHeaderPageIdx{INT32_MAX} {
         currentHeaderPageBuffer = std::make_unique<uint8_t[]>(WAL_HEADER_PAGE_SIZE);
@@ -60,7 +60,7 @@ class BaseWALAndWALIterator {
     }
 
 public:
-    std::shared_ptr<BufferManagedFileHandle> fileHandle;
+    std::shared_ptr<BMFileHandle> fileHandle;
     // Used by WAL as the next offset to write and by WALIterator as the next offset to read
     uint64_t offsetInCurrentHeaderPage;
     // First header page of the WAL, if it exists, is always located at page 0 of the WAL.
@@ -172,7 +172,7 @@ class WAL : public BaseWALAndWALIterator {
 
 class WALIterator : public BaseWALAndWALIterator {
 public:
-    explicit WALIterator(std::shared_ptr<BufferManagedFileHandle> fileHandle, std::mutex& mtx);
+    explicit WALIterator(std::shared_ptr<BMFileHandle> fileHandle, std::mutex& mtx);
 
     inline bool hasNextRecord() {
         lock_t lck{mtx};
diff --git a/src/include/storage/wal_replayer.h b/src/include/storage/wal_replayer.h
index 7697dc27a3..99cbc21fae 100644
--- a/src/include/storage/wal_replayer.h
+++ b/src/include/storage/wal_replayer.h
@@ -32,9 +32,9 @@ class WALReplayer {
     void replayWALRecord(WALRecord& walRecord);
     void checkpointOrRollbackVersionedFileHandleAndBufferManager(
         const WALRecord& walRecord, const StorageStructureID& storageStructureID);
-    void truncateFileIfInsertion(BufferManagedFileHandle* fileHandle,
-        const PageUpdateOrInsertRecord& pageInsertOrUpdateRecord);
-    BufferManagedFileHandle* getVersionedFileHandleIfWALVersionAndBMShouldBeCleared(
+    void truncateFileIfInsertion(
+        BMFileHandle* fileHandle, const PageUpdateOrInsertRecord& pageInsertOrUpdateRecord);
+    BMFileHandle* getVersionedFileHandleIfWALVersionAndBMShouldBeCleared(
         const StorageStructureID& storageStructureID);
     std::unique_ptr<catalog::Catalog> getCatalogForRecovery(common::DBFileType dbFileType);
 
@@ -46,7 +46,7 @@ class WALReplayer {
     StorageManager* storageManager;
     BufferManager* bufferManager;
     MemoryManager* memoryManager;
-    std::shared_ptr<BufferManagedFileHandle> walFileHandle;
+    std::shared_ptr<BMFileHandle> walFileHandle;
     std::unique_ptr<uint8_t[]> pageBuffer;
     std::shared_ptr<spdlog::logger> logger;
     WAL* wal;
diff --git a/src/main/database.cpp b/src/main/database.cpp
index ec042a15a1..2a9836ce59 100644
--- a/src/main/database.cpp
+++ b/src/main/database.cpp
@@ -19,17 +19,14 @@ namespace main {
 
 SystemConfig::SystemConfig() : SystemConfig(-1u) {}
 
-SystemConfig::SystemConfig(uint64_t bufferPoolSize) {
-    if (bufferPoolSize == -1u) {
+SystemConfig::SystemConfig(uint64_t bufferPoolSize_) {
+    if (bufferPoolSize_ == -1u) {
         auto systemMemSize =
             (std::uint64_t)sysconf(_SC_PHYS_PAGES) * (std::uint64_t)sysconf(_SC_PAGESIZE);
-        bufferPoolSize = (uint64_t)(StorageConstants::DEFAULT_BUFFER_POOL_RATIO *
-                                    (double_t)std::min(systemMemSize, (std::uint64_t)UINTPTR_MAX));
+        bufferPoolSize_ = (uint64_t)(BufferPoolConstants::DEFAULT_PHY_MEM_SIZE_RATIO_FOR_BM *
+                                     (double_t)std::min(systemMemSize, (std::uint64_t)UINTPTR_MAX));
     }
-    defaultPageBufferPoolSize =
-        (uint64_t)((double_t)bufferPoolSize * StorageConstants::DEFAULT_PAGES_BUFFER_RATIO);
-    largePageBufferPoolSize =
-        (uint64_t)((double_t)bufferPoolSize * StorageConstants::LARGE_PAGES_BUFFER_RATIO);
+    bufferPoolSize = bufferPoolSize_;
     maxNumThreads = std::thread::hardware_concurrency();
 }
 
@@ -40,8 +37,7 @@ Database::Database(std::string databasePath, SystemConfig systemConfig)
     initLoggers();
     initDBDirAndCoreFilesIfNecessary();
     logger = LoggerUtils::getLogger(LoggerConstants::LoggerEnum::DATABASE);
-    bufferManager = std::make_unique<BufferManager>(
-        this->systemConfig.defaultPageBufferPoolSize, this->systemConfig.largePageBufferPoolSize);
+    bufferManager = std::make_unique<BufferManager>(this->systemConfig.bufferPoolSize);
     memoryManager = std::make_unique<MemoryManager>(bufferManager.get());
     wal = std::make_unique<WAL>(this->databasePath, *bufferManager);
     recoverIfNecessary();
diff --git a/src/processor/operator/aggregate/aggregate_hash_table.cpp b/src/processor/operator/aggregate/aggregate_hash_table.cpp
index b9beafb282..dfe85929ad 100644
--- a/src/processor/operator/aggregate/aggregate_hash_table.cpp
+++ b/src/processor/operator/aggregate/aggregate_hash_table.cpp
@@ -178,9 +178,9 @@ void AggregateHashTable::initializeFT(
 
 void AggregateHashTable::initializeHashTable(uint64_t numEntriesToAllocate) {
     maxNumHashSlots = nextPowerOfTwo(
-        std::max(BufferPoolConstants::LARGE_PAGE_SIZE / sizeof(HashSlot), numEntriesToAllocate));
+        std::max(BufferPoolConstants::PAGE_256KB_SIZE / sizeof(HashSlot), numEntriesToAllocate));
     bitmask = maxNumHashSlots - 1;
-    auto numHashSlotsPerBlock = BufferPoolConstants::LARGE_PAGE_SIZE / sizeof(HashSlot);
+    auto numHashSlotsPerBlock = BufferPoolConstants::PAGE_256KB_SIZE / sizeof(HashSlot);
     assert(numHashSlotsPerBlock == nextPowerOfTwo(numHashSlotsPerBlock));
     numSlotsPerBlockLog2 = log2(numHashSlotsPerBlock);
     slotIdxInBlockMask = BitmaskUtils::all1sMaskForLeastSignificantBits(numSlotsPerBlockLog2);
diff --git a/src/processor/operator/hash_join/join_hash_table.cpp b/src/processor/operator/hash_join/join_hash_table.cpp
index 635e131d44..93944c6f8a 100644
--- a/src/processor/operator/hash_join/join_hash_table.cpp
+++ b/src/processor/operator/hash_join/join_hash_table.cpp
@@ -12,7 +12,7 @@ namespace processor {
 JoinHashTable::JoinHashTable(MemoryManager& memoryManager, uint64_t numKeyColumns,
     std::unique_ptr<FactorizedTableSchema> tableSchema)
     : BaseHashTable{memoryManager}, numKeyColumns{numKeyColumns} {
-    auto numSlotsPerBlock = BufferPoolConstants::LARGE_PAGE_SIZE / sizeof(uint8_t*);
+    auto numSlotsPerBlock = BufferPoolConstants::PAGE_256KB_SIZE / sizeof(uint8_t*);
     assert(numSlotsPerBlock == nextPowerOfTwo(numSlotsPerBlock));
     numSlotsPerBlockLog2 = std::log2(numSlotsPerBlock);
     slotIdxInBlockMask = BitmaskUtils::all1sMaskForLeastSignificantBits(numSlotsPerBlockLog2);
diff --git a/src/processor/operator/order_by/key_block_merger.cpp b/src/processor/operator/order_by/key_block_merger.cpp
index 51ff0e3e81..0079bd7436 100644
--- a/src/processor/operator/order_by/key_block_merger.cpp
+++ b/src/processor/operator/order_by/key_block_merger.cpp
@@ -13,7 +13,7 @@ namespace processor {
 MergedKeyBlocks::MergedKeyBlocks(
     uint32_t numBytesPerTuple, uint64_t numTuples, MemoryManager* memoryManager)
     : numBytesPerTuple{numBytesPerTuple},
-      numTuplesPerBlock{(uint32_t)(BufferPoolConstants::LARGE_PAGE_SIZE / numBytesPerTuple)},
+      numTuplesPerBlock{(uint32_t)(BufferPoolConstants::PAGE_256KB_SIZE / numBytesPerTuple)},
       numTuples{numTuples}, endTupleOffset{numTuplesPerBlock * numBytesPerTuple} {
     auto numKeyBlocks = numTuples / numTuplesPerBlock + (numTuples % numTuplesPerBlock ? 1 : 0);
     for (auto i = 0u; i < numKeyBlocks; i++) {
@@ -24,7 +24,7 @@ MergedKeyBlocks::MergedKeyBlocks(
 // This constructor is used to convert a keyBlock to a MergedKeyBlocks.
 MergedKeyBlocks::MergedKeyBlocks(uint32_t numBytesPerTuple, std::shared_ptr<DataBlock> keyBlock)
     : numBytesPerTuple{numBytesPerTuple},
-      numTuplesPerBlock{(uint32_t)(BufferPoolConstants::LARGE_PAGE_SIZE / numBytesPerTuple)},
+      numTuplesPerBlock{(uint32_t)(BufferPoolConstants::PAGE_256KB_SIZE / numBytesPerTuple)},
       numTuples{keyBlock->numTuples}, endTupleOffset{numTuplesPerBlock * numBytesPerTuple} {
     keyBlocks.emplace_back(keyBlock);
 }
diff --git a/src/processor/operator/order_by/order_by_key_encoder.cpp b/src/processor/operator/order_by/order_by_key_encoder.cpp
index a725fd5078..e9f3ed2750 100644
--- a/src/processor/operator/order_by/order_by_key_encoder.cpp
+++ b/src/processor/operator/order_by/order_by_key_encoder.cpp
@@ -22,11 +22,11 @@ OrderByKeyEncoder::OrderByKeyEncoder(std::vector<ValueVector*>& orderByVectors,
     }
     keyBlocks.emplace_back(std::make_unique<DataBlock>(memoryManager));
     assert(this->numBytesPerTuple == getNumBytesPerTuple());
-    maxNumTuplesPerBlock = BufferPoolConstants::LARGE_PAGE_SIZE / numBytesPerTuple;
+    maxNumTuplesPerBlock = BufferPoolConstants::PAGE_256KB_SIZE / numBytesPerTuple;
     if (maxNumTuplesPerBlock <= 0) {
         throw RuntimeException(StringUtils::string_format(
             "TupleSize({} bytes) is larger than the LARGE_PAGE_SIZE({} bytes)", numBytesPerTuple,
-            BufferPoolConstants::LARGE_PAGE_SIZE));
+            BufferPoolConstants::PAGE_256KB_SIZE));
     }
     encodeFunctions.resize(orderByVectors.size());
     for (auto i = 0u; i < orderByVectors.size(); i++) {
diff --git a/src/processor/result/factorized_table.cpp b/src/processor/result/factorized_table.cpp
index fbf87dcea3..83aa56eca3 100644
--- a/src/processor/result/factorized_table.cpp
+++ b/src/processor/result/factorized_table.cpp
@@ -87,11 +87,11 @@ void DataBlockCollection::merge(DataBlockCollection& other) {
 FactorizedTable::FactorizedTable(
     MemoryManager* memoryManager, std::unique_ptr<FactorizedTableSchema> tableSchema)
     : memoryManager{memoryManager}, tableSchema{std::move(tableSchema)}, numTuples{0} {
-    assert(this->tableSchema->getNumBytesPerTuple() <= BufferPoolConstants::LARGE_PAGE_SIZE);
+    assert(this->tableSchema->getNumBytesPerTuple() <= BufferPoolConstants::PAGE_256KB_SIZE);
     if (!this->tableSchema->isEmpty()) {
         inMemOverflowBuffer = std::make_unique<InMemOverflowBuffer>(memoryManager);
         numTuplesPerBlock =
-            BufferPoolConstants::LARGE_PAGE_SIZE / this->tableSchema->getNumBytesPerTuple();
+            BufferPoolConstants::PAGE_256KB_SIZE / this->tableSchema->getNumBytesPerTuple();
         flatTupleBlockCollection = std::make_unique<DataBlockCollection>(
             this->tableSchema->getNumBytesPerTuple(), numTuplesPerBlock);
         unflatTupleBlockCollection = std::make_unique<DataBlockCollection>();
@@ -119,7 +119,7 @@ uint8_t* FactorizedTable::appendEmptyTuple() {
         flatTupleBlockCollection->append(std::make_unique<DataBlock>(memoryManager));
     }
     auto& block = flatTupleBlockCollection->getBlocks().back();
-    uint8_t* tuplePtr = block->getData() + BufferPoolConstants::LARGE_PAGE_SIZE - block->freeSize;
+    uint8_t* tuplePtr = block->getData() + BufferPoolConstants::PAGE_256KB_SIZE - block->freeSize;
     block->freeSize -= tableSchema->getNumBytesPerTuple();
     block->numTuples++;
     numTuples++;
@@ -380,7 +380,7 @@ uint64_t FactorizedTable::computeNumTuplesToAppend(
 std::vector<BlockAppendingInfo> FactorizedTable::allocateFlatTupleBlocks(
     uint64_t numTuplesToAppend) {
     auto numBytesPerTuple = tableSchema->getNumBytesPerTuple();
-    assert(numBytesPerTuple < BufferPoolConstants::LARGE_PAGE_SIZE);
+    assert(numBytesPerTuple < BufferPoolConstants::PAGE_256KB_SIZE);
     std::vector<BlockAppendingInfo> appendingInfos;
     while (numTuplesToAppend > 0) {
         if (flatTupleBlockCollection->isEmpty() ||
@@ -391,7 +391,7 @@ std::vector<BlockAppendingInfo> FactorizedTable::allocateFlatTupleBlocks(
         auto numTuplesToAppendInCurBlock =
             std::min(numTuplesToAppend, block->freeSize / numBytesPerTuple);
         appendingInfos.emplace_back(
-            block->getData() + BufferPoolConstants::LARGE_PAGE_SIZE - block->freeSize,
+            block->getData() + BufferPoolConstants::PAGE_256KB_SIZE - block->freeSize,
             numTuplesToAppendInCurBlock);
         block->freeSize -= numTuplesToAppendInCurBlock * numBytesPerTuple;
         block->numTuples += numTuplesToAppendInCurBlock;
@@ -401,14 +401,14 @@ std::vector<BlockAppendingInfo> FactorizedTable::allocateFlatTupleBlocks(
 }
 
 uint8_t* FactorizedTable::allocateUnflatTupleBlock(uint32_t numBytes) {
-    assert(numBytes < BufferPoolConstants::LARGE_PAGE_SIZE);
+    assert(numBytes < BufferPoolConstants::PAGE_256KB_SIZE);
     if (unflatTupleBlockCollection->isEmpty()) {
         unflatTupleBlockCollection->append(std::make_unique<DataBlock>(memoryManager));
     }
     auto lastBlock = unflatTupleBlockCollection->getBlocks().back().get();
     if (lastBlock->freeSize > numBytes) {
         lastBlock->freeSize -= numBytes;
-        return lastBlock->getData() + BufferPoolConstants::LARGE_PAGE_SIZE - lastBlock->freeSize -
+        return lastBlock->getData() + BufferPoolConstants::PAGE_256KB_SIZE - lastBlock->freeSize -
                numBytes;
     }
     unflatTupleBlockCollection->append(std::make_unique<DataBlock>(memoryManager));
diff --git a/src/storage/buffer_manager/CMakeLists.txt b/src/storage/buffer_manager/CMakeLists.txt
index b3cbc4e4ea..4c52cda0a5 100644
--- a/src/storage/buffer_manager/CMakeLists.txt
+++ b/src/storage/buffer_manager/CMakeLists.txt
@@ -1,8 +1,8 @@
 add_library(kuzu_storage_buffer_manager
         OBJECT
-        buffer_managed_file_handle.cpp
+        vm_region.cpp
+        bm_file_handle.cpp
         buffer_manager.cpp
-        buffer_pool.cpp
         memory_manager.cpp)
 
 set(ALL_OBJECT_FILES
diff --git a/src/storage/buffer_manager/buffer_managed_file_handle.cpp b/src/storage/buffer_manager/bm_file_handle.cpp
similarity index 60%
rename from src/storage/buffer_manager/buffer_managed_file_handle.cpp
rename to src/storage/buffer_manager/bm_file_handle.cpp
index 216c7a2647..b63272ae81 100644
--- a/src/storage/buffer_manager/buffer_managed_file_handle.cpp
+++ b/src/storage/buffer_manager/bm_file_handle.cpp
@@ -1,61 +1,74 @@
-#include "storage/buffer_manager/buffer_managed_file_handle.h"
+#include "storage/buffer_manager/bm_file_handle.h"
+
+#include "storage/buffer_manager/buffer_manager.h"
 
 using namespace kuzu::common;
 
 namespace kuzu {
 namespace storage {
 
-BufferManagedFileHandle::BufferManagedFileHandle(
-    const std::string& path, uint8_t flags, FileVersionedType fileVersionedType)
-    : FileHandle{path, flags}, fileVersionedType{fileVersionedType} {
-    initPageIdxToFrameMapAndLocks();
-    if (fileVersionedType == FileVersionedType::VERSIONED_FILE) {
-        resizePageGroupLocksAndPageVersionsWithoutLock();
-    }
+void PageState::setInFrame(common::page_idx_t pageIdx_) {
+    pageIdx = 0;
+    pageIdx = pageIdx_;
+    pageIdx |= IS_IN_FRAME_MASK;
 }
 
-void BufferManagedFileHandle::initPageIdxToFrameMapAndLocks() {
-    pageIdxToFrameMap.resize(pageCapacity);
-    pageLocks.resize(pageCapacity);
-    for (auto i = 0ull; i < numPages; i++) {
-        pageIdxToFrameMap[i] = std::make_unique<std::atomic<common::page_idx_t>>(UINT32_MAX);
-        pageLocks[i] = std::make_unique<std::atomic_flag>();
+bool PageState::acquireLock(LockMode lockMode) {
+    if (lockMode == LockMode::SPIN) {
+        while (lock.test_and_set()) // spinning
+            ;
+        return true;
     }
+    return !lock.test_and_set();
 }
 
-common::page_idx_t BufferManagedFileHandle::addNewPageWithoutLock() {
-    if (numPages == pageCapacity) {
-        pageCapacity += StorageConstants::PAGE_GROUP_SIZE;
-        pageIdxToFrameMap.resize(pageCapacity);
-        pageLocks.resize(pageCapacity);
+void PageState::resetState() {
+    pageIdx = 0;
+    pinCount = 0;
+    evictionTimestamp = 0;
+}
+
+BMFileHandle::BMFileHandle(const std::string& path, uint8_t flags, BufferManager* bm,
+    common::PageSizeClass pageSizeClass, FileVersionedType fileVersionedType)
+    : FileHandle{path, flags}, bm{bm}, pageSizeClass{pageSizeClass}, fileVersionedType{
+                                                                         fileVersionedType} {
+    initPageStatesAndGroups();
+}
+
+void BMFileHandle::initPageStatesAndGroups() {
+    pageStates.resize(pageCapacity);
+    for (auto i = 0ull; i < numPages; i++) {
+        pageStates[i] = std::make_unique<PageState>();
     }
-    pageLocks[numPages] = std::make_unique<std::atomic_flag>();
-    pageIdxToFrameMap[numPages] = std::make_unique<std::atomic<common::page_idx_t>>(UINT32_MAX);
-    auto newPageIdx = numPages++;
-    if (fileVersionedType == FileVersionedType::VERSIONED_FILE) {
-        resizePageGroupLocksAndPageVersionsWithoutLock();
+    auto numPageGroups = getNumPageGroups();
+    frameGroupIdxes.resize(numPageGroups);
+    pageGroupLocks.resize(numPageGroups);
+    pageVersions.resize(numPageGroups);
+    for (auto i = 0u; i < numPageGroups; i++) {
+        frameGroupIdxes[i] = bm->addNewFrameGroup(pageSizeClass);
+        pageGroupLocks[i] = std::make_unique<std::atomic_flag>();
     }
-    return newPageIdx;
 }
 
-bool BufferManagedFileHandle::acquirePageLock(page_idx_t pageIdx, bool block) {
-    if (block) {
-        while (!acquire(pageIdx)) {} // spinning
-        return true;
+common::page_idx_t BMFileHandle::addNewPageWithoutLock() {
+    if (numPages == pageCapacity) {
+        addNewPageGroupWithoutLock();
     }
-    return acquire(pageIdx);
+    pageStates[numPages] = std::make_unique<PageState>();
+    return numPages++;
 }
 
-bool BufferManagedFileHandle::acquire(common::page_idx_t pageIdx) {
-    if (pageIdx >= pageLocks.size()) {
-        throw RuntimeException(
-            StringUtils::string_format("pageIdx {} is >= pageLocks.size()", pageIdx));
+void BMFileHandle::addNewPageGroupWithoutLock() {
+    pageCapacity += StorageConstants::PAGE_GROUP_SIZE;
+    pageStates.resize(pageCapacity);
+    frameGroupIdxes.push_back(bm->addNewFrameGroup(pageSizeClass));
+    if (fileVersionedType == FileVersionedType::VERSIONED_FILE) {
+        pageGroupLocks.push_back(std::make_unique<std::atomic_flag>());
+        pageVersions.emplace_back();
     }
-    auto retVal = !pageLocks[pageIdx]->test_and_set(std::memory_order_acquire);
-    return retVal;
 }
 
-void BufferManagedFileHandle::createPageVersionGroupIfNecessary(page_idx_t pageIdx) {
+void BMFileHandle::createPageVersionGroupIfNecessary(page_idx_t pageIdx) {
     assert(fileVersionedType == FileVersionedType::VERSIONED_FILE);
     // Note that we do not have to acquire an xlock here because this function assumes that prior to
     // calling this function,  pageVersion and pageGroupLocks have been resized correctly.
@@ -79,50 +92,40 @@ void BufferManagedFileHandle::createPageVersionGroupIfNecessary(page_idx_t pageI
     pageGroupLocks[pageGroupIdxAndPosInGroup.pageIdx]->clear();
 }
 
-void BufferManagedFileHandle::resetToZeroPagesAndPageCapacity() {
+void BMFileHandle::resetToZeroPagesAndPageCapacity() {
     std::unique_lock xlock(fhSharedMutex);
     numPages = 0;
     pageCapacity = 0;
     FileUtils::truncateFileToEmpty(fileInfo.get());
-    initPageIdxToFrameMapAndLocks();
+    initPageStatesAndGroups();
 }
 
-void BufferManagedFileHandle::removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx) {
+void BMFileHandle::removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx) {
     std::unique_lock xLck{fhSharedMutex};
     removePageIdxAndTruncateIfNecessaryWithoutLock(pageIdx);
-    if (fileVersionedType == FileVersionedType::VERSIONED_FILE) {
-        resizePageGroupLocksAndPageVersionsWithoutLock();
-    }
 }
 
-void BufferManagedFileHandle::removePageIdxAndTruncateIfNecessaryWithoutLock(
+void BMFileHandle::removePageIdxAndTruncateIfNecessaryWithoutLock(
     common::page_idx_t pageIdxToRemove) {
     if (numPages <= pageIdxToRemove) {
         return;
     }
     for (auto pageIdx = pageIdxToRemove; pageIdx < numPages; ++pageIdx) {
-        pageIdxToFrameMap[pageIdx].reset();
-        pageLocks[pageIdx].reset();
+        pageStates[pageIdx].reset();
     }
     numPages = pageIdxToRemove;
-}
-
-void BufferManagedFileHandle::resizePageGroupLocksAndPageVersionsWithoutLock() {
     auto numPageGroups = getNumPageGroups();
-    if (pageGroupLocks.size() == numPageGroups) {
+    if (numPageGroups == frameGroupIdxes.size()) {
         return;
-    } else if (pageGroupLocks.size() < numPageGroups) {
-        for (auto i = pageGroupLocks.size(); i < numPageGroups; ++i) {
-            pageGroupLocks.push_back(std::make_unique<std::atomic_flag>());
-        }
-    } else {
-        pageGroupLocks.resize(numPageGroups);
     }
+    assert(numPageGroups < frameGroupIdxes.size());
+    frameGroupIdxes.resize(numPageGroups);
+    pageGroupLocks.resize(numPageGroups);
     pageVersions.resize(numPageGroups);
 }
 
 // This function assumes that the caller has already acquired the lock for originalPageIdx.
-bool BufferManagedFileHandle::hasWALPageVersionNoPageLock(common::page_idx_t originalPageIdx) {
+bool BMFileHandle::hasWALPageVersionNoPageLock(common::page_idx_t originalPageIdx) {
     assert(fileVersionedType == FileVersionedType::VERSIONED_FILE);
     auto pageGroupIdxAndPosInGroup =
         PageUtils::getPageElementCursorForPos(originalPageIdx, StorageConstants::PAGE_GROUP_SIZE);
@@ -141,7 +144,7 @@ bool BufferManagedFileHandle::hasWALPageVersionNoPageLock(common::page_idx_t ori
     return retVal;
 }
 
-void BufferManagedFileHandle::clearWALPageVersionIfNecessary(common::page_idx_t pageIdx) {
+void BMFileHandle::clearWALPageVersionIfNecessary(common::page_idx_t pageIdx) {
     {
         std::shared_lock sLck{fhSharedMutex};
         if (numPages <= pageIdx) {
@@ -150,12 +153,12 @@ void BufferManagedFileHandle::clearWALPageVersionIfNecessary(common::page_idx_t
     }
     createPageVersionGroupIfNecessary(pageIdx);
     setWALPageVersionNoLock(pageIdx, UINT32_MAX);
+    // TODO(Guodong): Why do we release lock here? Need to understand how the lock was acquired.
     releasePageLock(pageIdx);
 }
 
 // This function assumes that the caller has already acquired the lock for originalPageIdx.
-common::page_idx_t BufferManagedFileHandle::getWALPageVersionNoPageLock(
-    common::page_idx_t originalPageIdx) {
+common::page_idx_t BMFileHandle::getWALPageVersionNoPageLock(common::page_idx_t originalPageIdx) {
     assert(fileVersionedType == FileVersionedType::VERSIONED_FILE);
     // See the comment about a shared lock in hasWALPageVersionNoPageLock
     std::shared_lock sLck{fhSharedMutex};
@@ -164,14 +167,14 @@ common::page_idx_t BufferManagedFileHandle::getWALPageVersionNoPageLock(
     return pageVersions[pageGroupIdxAndPosInGroup.pageIdx][pageGroupIdxAndPosInGroup.elemPosInPage];
 }
 
-void BufferManagedFileHandle::setWALPageVersion(
+void BMFileHandle::setWALPageVersion(
     common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL) {
     assert(fileVersionedType == FileVersionedType::VERSIONED_FILE);
     std::shared_lock sLck{fhSharedMutex};
     setWALPageVersionNoLock(originalPageIdx, pageIdxInWAL);
 }
 
-void BufferManagedFileHandle::setWALPageVersionNoLock(
+void BMFileHandle::setWALPageVersionNoLock(
     common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL) {
     auto pageGroupIdxAndPosInGroup =
         PageUtils::getPageElementCursorForPos(originalPageIdx, StorageConstants::PAGE_GROUP_SIZE);
diff --git a/src/storage/buffer_manager/buffer_manager.cpp b/src/storage/buffer_manager/buffer_manager.cpp
index 7d202f28a9..eca67841c1 100644
--- a/src/storage/buffer_manager/buffer_manager.cpp
+++ b/src/storage/buffer_manager/buffer_manager.cpp
@@ -1,5 +1,8 @@
 #include "storage/buffer_manager/buffer_manager.h"
 
+#include <sys/mman.h>
+
+#include "common/constants.h"
 #include "common/exception.h"
 #include "spdlog/spdlog.h"
 
@@ -8,13 +11,50 @@ using namespace kuzu::common;
 namespace kuzu {
 namespace storage {
 
-BufferManager::BufferManager(uint64_t maxSizeForDefaultPagePool, uint64_t maxSizeForLargePagePool)
+// In this function, we try to remove as more as possible candidates that are not evictable from the
+// eviction queue until we hit a candidate that is evictable.
+// Two kinds of candidates are not evictable: 1) it is currently pinned; 2) it has been recently
+// visited.
+// To identify those recently accessed candidates, we use the eviction timestamp. If the
+// eviction timestamp of a candidate is different from the timestamp in its corresponding pageState,
+// it means that the candidate has been recently visited and we should not evict it. The idea is
+// that eviction timestamp is a logical per-page timestamp starting from 0, and is incremented each
+// time the page is pushed into the eviction queue. For example, the first time p5 is pushed into
+// the eviction queue, it will end up with a timestamp 1. When we push a page into the queue, we
+// create an EvictionCandidate object for the page. Let's call this object c0 when p5 is first
+// pushed. c0 will consist of (ptr to p5, 1), where the latter is the eviction timestamp at the time
+// c0 is put into the queue. Suppose p5 is put into the eviction queue again (e.g., because it was
+// pinned and unpinned). At this point we create another EvictionCandidate object c1 (ptr to p5, 2)
+// where the latter eviction timestamp is now incremented by 1, which makes c1 now not evictable.
+// This idea is inspired by DuckDB's queue-based eviction implementation.
+void EvictionQueue::removeNonEvictableCandidates() {
+    EvictionCandidate evictionCandidate;
+    while (true) {
+        if (!queue->try_dequeue(evictionCandidate)) {
+            break;
+        }
+        if (evictionCandidate.pageState->getPinCount() != 0 ||
+            evictionCandidate.pageState->getEvictionTimestamp() !=
+                evictionCandidate.evictionTimestamp) {
+            // Remove the candidate from the eviction queue if it is still pinned or if it has
+            // been recently visited.
+            continue;
+        } else {
+            queue->enqueue(evictionCandidate);
+            break;
+        }
+    }
+}
+
+BufferManager::BufferManager(uint64_t bufferPoolSize)
     : logger{LoggerUtils::getLogger(common::LoggerConstants::LoggerEnum::BUFFER_MANAGER)},
-      bufferPoolDefaultPages(std::make_unique<BufferPool>(
-          BufferPoolConstants::DEFAULT_PAGE_SIZE, maxSizeForDefaultPagePool)),
-      bufferPoolLargePages(std::make_unique<BufferPool>(
-          BufferPoolConstants::LARGE_PAGE_SIZE, maxSizeForLargePagePool)) {
-    logger->info("Done Initializing Buffer Manager.");
+      usedMemory{0}, bufferPoolSize{bufferPoolSize}, numEvictionQueueInsertions{0} {
+    logger->info("Done initializing buffer manager.");
+    vmRegions.resize(2);
+    vmRegions[0] = std::make_unique<VMRegion>(
+        PageSizeClass::PAGE_4KB, BufferPoolConstants::DEFAULT_VM_REGION_MAX_SIZE);
+    vmRegions[1] = std::make_unique<VMRegion>(PageSizeClass::PAGE_256KB, bufferPoolSize);
+    evictionQueue = std::make_unique<EvictionQueue>();
 }
 
 BufferManager::~BufferManager() = default;
@@ -28,61 +68,196 @@ BufferManager::~BufferManager() = default;
 // should be flushed to disk if it is evicted.
 // (3) If multiple threads are writing to the page, they should coordinate separately because they
 // both get access to the same piece of memory.
-uint8_t* BufferManager::pin(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    return fileHandle.isLargePaged() ? bufferPoolLargePages->pin(fileHandle, pageIdx) :
-                                       bufferPoolDefaultPages->pin(fileHandle, pageIdx);
-}
-
-// Important Note: This function will pin a page but if the page was not yet in a frame, it will
-// not read it from the file. So this can be used if the page is a new page of a file, or a page
-// of a temporary file that is being re-used and its contents is not important.
-//
-// If this is the new page of a file: the caller should call this function immediately after a new
-// page is added FileHandle, ensuring that no other thread can try to pin the newly created page
-// (with serious side effects). See the detailed explanation in FileHandle::addNewPage() for
-// details.
-uint8_t* BufferManager::pinWithoutReadingFromFile(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    return fileHandle.isLargePaged() ?
-               bufferPoolLargePages->pinWithoutReadingFromFile(fileHandle, pageIdx) :
-               bufferPoolDefaultPages->pinWithoutReadingFromFile(fileHandle, pageIdx);
+uint8_t* BufferManager::pin(
+    BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy) {
+    fileHandle.acquirePageLock(pageIdx, LockMode::SPIN);
+    auto retVal = pinWithoutAcquiringPageLock(fileHandle, pageIdx, pageReadPolicy);
+    fileHandle.releasePageLock(pageIdx);
+    return retVal;
+}
+
+uint8_t* BufferManager::pinWithoutAcquiringPageLock(
+    BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy) {
+    auto pageState = fileHandle.getPageState(pageIdx);
+    if (pageState->isInFrame()) {
+        pageState->incrementPinCount();
+    } else {
+        if (!claimAFrame(fileHandle, pageIdx, pageReadPolicy)) {
+            pageState->releaseLock();
+            throw BufferManagerException("Failed to claim a frame.");
+        }
+    }
+    return getFrame(fileHandle, pageIdx);
 }
 
 // Important Note: The caller should make sure that they have pinned the page before calling this.
-void BufferManager::setPinnedPageDirty(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    fileHandle.isLargePaged() ? bufferPoolLargePages->setPinnedPageDirty(fileHandle, pageIdx) :
-                                bufferPoolDefaultPages->setPinnedPageDirty(fileHandle, pageIdx);
+void BufferManager::setPinnedPageDirty(BMFileHandle& fileHandle, page_idx_t pageIdx) {
+    fileHandle.acquirePageLock(pageIdx, LockMode::SPIN);
+    auto pageState = fileHandle.getPageState(pageIdx);
+    if (pageState && pageState->getPinCount() >= 1) {
+        pageState->setDirty();
+        fileHandle.releasePageLock(pageIdx);
+    } else {
+        fileHandle.releasePageLock(pageIdx);
+        throw BufferManagerException("If a page is not in memory or is not pinned, cannot set "
+                                     "it to isDirty = true. filePath: " +
+                                     fileHandle.getFileInfo()->path +
+                                     " pageIdx: " + std::to_string(pageIdx) + ".");
+    }
+}
+
+void BufferManager::unpin(BMFileHandle& fileHandle, page_idx_t pageIdx) {
+    fileHandle.acquirePageLock(pageIdx, LockMode::SPIN);
+    unpinWithoutAcquiringPageLock(fileHandle, pageIdx);
+    fileHandle.releasePageLock(pageIdx);
+}
+
+void BufferManager::unpinWithoutAcquiringPageLock(
+    BMFileHandle& fileHandle, common::page_idx_t pageIdx) {
+    auto pageState = fileHandle.getPageState(pageIdx);
+    // `count` is the value of `pinCount` before sub.
+    auto count = pageState->decrementPinCount();
+    assert(count >= 1);
+    if (count == 1) {
+        addToEvictionQueue(&fileHandle, pageState);
+    }
+}
+
+// This function tries to load the given page into a frame. Due to our design of mmap, each page is
+// uniquely mapped to a frame. Thus, claiming a frame is equivalent to ensuring enough physical
+// memory is available.
+// First, we reserve the memory for the page, which increments the atomic counter `usedMemory`.
+// Then, we check if there is enough memory available. If not, we evict pages until we have enough
+// or we can find no more pages to be evicted.
+// Lastly, we double check if the needed memory is available. If not, we free the memory we reserved
+// and return false, otherwise, we load the page to its corresponding frame and return true.
+bool BufferManager::claimAFrame(
+    BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy) {
+    page_offset_t pageSizeToClaim = fileHandle.getPageSize();
+    // Reserve the memory for the page.
+    auto currentUsedMem = reserveUsedMemory(pageSizeToClaim);
+    uint64_t claimedMemory = 0;
+    // Evict pages if necessary until we have enough memory.
+    while ((currentUsedMem + pageSizeToClaim - claimedMemory) > bufferPoolSize.load()) {
+        EvictionCandidate evictionCandidate;
+        if (!evictionQueue->dequeue(evictionCandidate)) {
+            // Cannot find more pages to be evicted. Free the memory we reserved and return false.
+            freeUsedMemory(pageSizeToClaim);
+            return false;
+        }
+        if (!evictionCandidate.isEvictable()) {
+            continue;
+        }
+        // We found a page whose pin count can be 0, and potentially haven't been accessed since
+        // enqueued. We try to evict the page from its frame by calling `tryEvictPage`, which will
+        // check if the page's pin count is actually 0 and the page has not been accessed recently,
+        // if so, we evict the page from its frame.
+        claimedMemory += tryEvictPage(evictionCandidate);
+        currentUsedMem = usedMemory.load();
+    }
+    if ((currentUsedMem + pageSizeToClaim - claimedMemory) > bufferPoolSize.load()) {
+        // Cannot claim the memory needed. Free the memory we reserved and return false.
+        freeUsedMemory(pageSizeToClaim);
+        return false;
+    }
+    // Have enough memory available now, load the page into its corresponding frame.
+    cachePageIntoFrame(fileHandle, pageIdx, pageReadPolicy);
+    freeUsedMemory(claimedMemory);
+    return true;
+}
+
+void BufferManager::addToEvictionQueue(BMFileHandle* fileHandle, PageState* pageState) {
+    auto timestampBeforeEvict = pageState->incrementEvictionTimestamp();
+    if (++numEvictionQueueInsertions == BufferPoolConstants::EVICTION_QUEUE_PURGING_INTERVAL) {
+        evictionQueue->removeNonEvictableCandidates();
+        numEvictionQueueInsertions = 0;
+    }
+    evictionQueue->enqueue(fileHandle, pageState, timestampBeforeEvict + 1);
+}
+
+uint64_t BufferManager::tryEvictPage(EvictionCandidate& candidate) {
+    auto& pageState = *candidate.pageState;
+    if (!pageState.acquireLock(LockMode::NON_BLOCKING)) {
+        return 0;
+    }
+    // We check pinCount and evictionTimestamp again after acquiring the lock on page currently
+    // residing in the frame. At this point in time, no other thread can change the pinCount and the
+    // evictionTimestamp.
+    if (!candidate.isEvictable()) {
+        pageState.releaseLock();
+        return 0;
+    }
+    // Else, flush out the frame into the file page if the frame is dirty. Then remove the page
+    // from the frame and release the lock on it.
+    flushIfDirtyWithoutLock(*candidate.fileHandle, pageState.getPageIdx());
+    auto numBytesFreed = candidate.fileHandle->getPageSize();
+    releaseFrameForPage(*candidate.fileHandle, pageState.getPageIdx());
+    pageState.resetState();
+    pageState.releaseLock();
+    return numBytesFreed;
+}
+
+void BufferManager::cachePageIntoFrame(
+    BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy) {
+    auto pageState = fileHandle.getPageState(pageIdx);
+    pageState->setPinCount(1);
+    pageState->clearDirty();
+    if (pageReadPolicy == PageReadPolicy::READ_PAGE) {
+        FileUtils::readFromFile(fileHandle.getFileInfo(), (void*)getFrame(fileHandle, pageIdx),
+            fileHandle.getPageSize(), pageIdx * fileHandle.getPageSize());
+    }
+    pageState->setInFrame(pageIdx);
+}
+
+void BufferManager::flushIfDirtyWithoutLock(BMFileHandle& fileHandle, common::page_idx_t pageIdx) {
+    auto pageState = fileHandle.getPageState(pageIdx);
+    if (pageState->isDirty()) {
+        FileUtils::writeToFile(fileHandle.getFileInfo(), getFrame(fileHandle, pageIdx),
+            fileHandle.getPageSize(), pageIdx * fileHandle.getPageSize());
+    }
 }
 
-void BufferManager::unpin(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    return fileHandle.isLargePaged() ? bufferPoolLargePages->unpin(fileHandle, pageIdx) :
-                                       bufferPoolDefaultPages->unpin(fileHandle, pageIdx);
+void BufferManager::removeFilePagesFromFrames(BMFileHandle& fileHandle) {
+    for (auto pageIdx = 0u; pageIdx < fileHandle.getNumPages(); ++pageIdx) {
+        removePageFromFrame(fileHandle, pageIdx, false /* do not flush */);
+    }
 }
 
-void BufferManager::removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle) {
-    fileHandle.isLargePaged() ? bufferPoolLargePages->removeFilePagesFromFrames(fileHandle) :
-                                bufferPoolDefaultPages->removeFilePagesFromFrames(fileHandle);
+void BufferManager::flushAllDirtyPagesInFrames(BMFileHandle& fileHandle) {
+    for (auto pageIdx = 0u; pageIdx < fileHandle.getNumPages(); ++pageIdx) {
+        removePageFromFrame(fileHandle, pageIdx, true /* flush */);
+    }
 }
 
-void BufferManager::flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle) {
-    fileHandle.isLargePaged() ? bufferPoolLargePages->flushAllDirtyPagesInFrames(fileHandle) :
-                                bufferPoolDefaultPages->flushAllDirtyPagesInFrames(fileHandle);
+void BufferManager::updateFrameIfPageIsInFrameWithoutLock(
+    BMFileHandle& fileHandle, uint8_t* newPage, page_idx_t pageIdx) {
+    auto pageState = fileHandle.getPageState(pageIdx);
+    if (pageState) {
+        memcpy(getFrame(fileHandle, pageIdx), newPage, BufferPoolConstants::PAGE_4KB_SIZE);
+    }
 }
 
-void BufferManager::updateFrameIfPageIsInFrameWithoutPageOrFrameLock(
-    BufferManagedFileHandle& fileHandle, uint8_t* newPage, page_idx_t pageIdx) {
-    fileHandle.isLargePaged() ?
-        bufferPoolLargePages->updateFrameIfPageIsInFrameWithoutPageOrFrameLock(
-            fileHandle, newPage, pageIdx) :
-        bufferPoolDefaultPages->updateFrameIfPageIsInFrameWithoutPageOrFrameLock(
-            fileHandle, newPage, pageIdx);
+void BufferManager::removePageFromFrameIfNecessary(BMFileHandle& fileHandle, page_idx_t pageIdx) {
+    if (pageIdx >= fileHandle.getNumPages()) {
+        return;
+    }
+    removePageFromFrame(fileHandle, pageIdx, false /* do not flush */);
 }
 
-void BufferManager::removePageFromFrameIfNecessary(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    fileHandle.isLargePaged() ?
-        bufferPoolLargePages->removePageFromFrameWithoutFlushingIfNecessary(fileHandle, pageIdx) :
-        bufferPoolDefaultPages->removePageFromFrameWithoutFlushingIfNecessary(fileHandle, pageIdx);
+// NOTE: We assume the page is not pinned here.
+void BufferManager::removePageFromFrame(
+    BMFileHandle& fileHandle, common::page_idx_t pageIdx, bool shouldFlush) {
+    fileHandle.acquirePageLock(pageIdx, LockMode::SPIN);
+    auto pageState = fileHandle.getPageState(pageIdx);
+    if (pageState && pageState->isInFrame()) {
+        if (shouldFlush) {
+            flushIfDirtyWithoutLock(fileHandle, pageIdx);
+        }
+        fileHandle.clearPageState(pageIdx);
+        releaseFrameForPage(fileHandle, pageIdx);
+        freeUsedMemory(fileHandle.getPageSize());
+    }
+    fileHandle.releasePageLock(pageIdx);
 }
 
 } // namespace storage
diff --git a/src/storage/buffer_manager/buffer_pool.cpp b/src/storage/buffer_manager/buffer_pool.cpp
deleted file mode 100644
index 6f8c6d23c6..0000000000
--- a/src/storage/buffer_manager/buffer_pool.cpp
+++ /dev/null
@@ -1,292 +0,0 @@
-#include "storage/buffer_manager/buffer_pool.h"
-
-#include <sys/mman.h>
-
-#include "common/constants.h"
-#include "common/exception.h"
-#include "common/utils.h"
-#include "spdlog/spdlog.h"
-
-using namespace kuzu::common;
-
-namespace kuzu {
-namespace storage {
-
-Frame::Frame(page_offset_t pageSize, std::uint8_t* buffer)
-    : frameLock{ATOMIC_FLAG_INIT}, pageSize{pageSize}, buffer{buffer} {
-    resetFrameWithoutLock();
-}
-
-Frame::~Frame() noexcept(false) {
-    auto count = pinCount.load();
-    if (0 != count && -1u != count) {
-        throw BufferManagerException(
-            "Deleting buffer that is still pinned. pinCount: " + std::to_string(count) +
-            " pageIdx: " + std::to_string(pageIdx));
-    }
-}
-
-void Frame::resetFrameWithoutLock() {
-    fileHandlePtr = -1u;
-    pageIdx = -1u;
-    pinCount = -1u;
-    recentlyAccessed = false;
-    isDirty = false;
-}
-
-bool Frame::acquireFrameLock(bool block) {
-    if (block) {
-        while (frameLock.test_and_set()) // spinning
-            ;
-        return true;
-    }
-    return !frameLock.test_and_set();
-}
-
-void Frame::releaseBuffer() {
-    int error = madvise(buffer, pageSize, MADV_DONTNEED);
-    if (error) {
-        throw BufferManagerException("Releasing frame buffer failed with error code " +
-                                     std::to_string(error) + ": " +
-                                     std::string(std::strerror(errno)));
-    }
-}
-
-BufferPool::BufferPool(uint64_t pageSize, uint64_t maxSize)
-    : logger{LoggerUtils::getLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER)},
-      pageSize{pageSize}, clockHand{0},
-      numFrames((page_idx_t)(ceil((double)maxSize / (double)pageSize))) {
-    assert(pageSize == BufferPoolConstants::DEFAULT_PAGE_SIZE ||
-           pageSize == BufferPoolConstants::LARGE_PAGE_SIZE);
-    auto mmapRegion = (uint8_t*)mmap(
-        NULL, (numFrames * pageSize), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    for (auto i = 0u; i < numFrames; ++i) {
-        auto buffer = mmapRegion + (i * pageSize);
-        bufferCache.emplace_back(std::make_unique<Frame>(pageSize, buffer));
-    }
-    logger->info("Initialize buffer pool with the max size {}B, #{}byte-pages {}.", maxSize,
-        pageSize, numFrames);
-}
-
-uint8_t* BufferPool::pin(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    return pin(fileHandle, pageIdx, false /* read page from file */);
-}
-
-uint8_t* BufferPool::pinWithoutReadingFromFile(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    return pin(fileHandle, pageIdx, true /* do not read page from file */);
-}
-
-void BufferPool::removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle) {
-    for (auto pageIdx = 0u; pageIdx < fileHandle.getNumPages(); ++pageIdx) {
-        removePageFromFrame(fileHandle, pageIdx, false /* do not flush */);
-    }
-}
-
-void BufferPool::removePageFromFrame(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool shouldFlush) {
-    fileHandle.acquirePageLock(pageIdx, true /*block*/);
-    auto frameIdx = fileHandle.getFrameIdx(pageIdx);
-    if (BufferManagedFileHandle::isAFrame(frameIdx)) {
-        auto& frame = bufferCache[frameIdx];
-        frame->acquireFrameLock(true /* block */);
-        if (shouldFlush) {
-            flushIfDirty(frame);
-        }
-        clearFrameAndUnswizzleWithoutLock(frame, fileHandle, pageIdx);
-        frame->releaseBuffer();
-        frame->releaseFrameLock();
-    }
-    fileHandle.releasePageLock(pageIdx);
-}
-
-void BufferPool::removePageFromFrameWithoutFlushingIfNecessary(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    if (pageIdx >= fileHandle.getNumPages()) {
-        return;
-    }
-    removePageFromFrame(fileHandle, pageIdx, false /* do not flush */);
-}
-
-void BufferPool::flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle) {
-    for (auto pageIdx = 0u; pageIdx < fileHandle.getNumPages(); ++pageIdx) {
-        removePageFromFrame(fileHandle, pageIdx, true /* flush */);
-    }
-}
-
-void BufferPool::updateFrameIfPageIsInFrameWithoutPageOrFrameLock(
-    BufferManagedFileHandle& fileHandle, uint8_t* newPage, page_idx_t pageIdx) {
-    auto frameIdx = fileHandle.getFrameIdx(pageIdx);
-    if (BufferManagedFileHandle::isAFrame(frameIdx)) {
-        memcpy(bufferCache[frameIdx]->buffer, newPage, BufferPoolConstants::DEFAULT_PAGE_SIZE);
-    }
-}
-
-uint8_t* BufferPool::pin(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool doNotReadFromFile) {
-    fileHandle.acquirePageLock(pageIdx, true /*block*/);
-    auto retVal = pinWithoutAcquiringPageLock(fileHandle, pageIdx, doNotReadFromFile);
-    fileHandle.releasePageLock(pageIdx);
-    return retVal;
-}
-
-uint8_t* BufferPool::pinWithoutAcquiringPageLock(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool doNotReadFromFile) {
-    auto frameIdx = fileHandle.getFrameIdx(pageIdx);
-    if (BufferManagedFileHandle::isAFrame(frameIdx)) {
-        auto& frame = bufferCache[frameIdx];
-        frame->pinCount.fetch_add(1);
-        frame->recentlyAccessed = true;
-        bmMetrics.numCacheHit += 1;
-    } else {
-        frameIdx = claimAFrame(fileHandle, pageIdx, doNotReadFromFile);
-        fileHandle.swizzle(pageIdx, frameIdx);
-        if (!doNotReadFromFile) {
-            bmMetrics.numCacheMiss += 1;
-        }
-    }
-    bmMetrics.numPins += 1;
-    return bufferCache[fileHandle.getFrameIdx(pageIdx)]->buffer;
-}
-
-void BufferPool::setPinnedPageDirty(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    fileHandle.acquirePageLock(pageIdx, true /*block*/);
-    auto frameIdx = fileHandle.getFrameIdx(pageIdx);
-    if (!BufferManagedFileHandle::isAFrame((frameIdx)) ||
-        (bufferCache[frameIdx]->pinCount.load() < 1)) {
-        fileHandle.releasePageLock(pageIdx);
-        throw BufferManagerException("If a page is not in memory or is not pinned, cannot set "
-                                     "it to isDirty = true.filePath: " +
-                                     fileHandle.getFileInfo()->path +
-                                     " pageIdx: " + std::to_string(pageIdx) + ".");
-    }
-    bufferCache[frameIdx]->setIsDirty(true /* isDirty */);
-    fileHandle.releasePageLock(pageIdx);
-}
-
-page_idx_t BufferPool::claimAFrame(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool doNotReadFromFile) {
-    auto localClockHand = clockHand.load();
-    auto startFrame = localClockHand % numFrames;
-    for (auto i = 0u; i < 2 * numFrames; ++i) {
-        auto frameIdx = (startFrame + i) % numFrames;
-        auto pinCount = bufferCache[frameIdx]->pinCount.load();
-        if ((-1u == pinCount && fillEmptyFrame(frameIdx, fileHandle, pageIdx, doNotReadFromFile)) ||
-            (0u == pinCount && tryEvict(frameIdx, fileHandle, pageIdx, doNotReadFromFile))) {
-            moveClockHand(localClockHand + i + 1);
-            return frameIdx;
-        }
-    }
-    throw BufferManagerException("Cannot find a frame to evict from.");
-}
-
-bool BufferPool::fillEmptyFrame(page_idx_t frameIdx, BufferManagedFileHandle& fileHandle,
-    page_idx_t pageIdx, bool doNotReadFromFile) {
-    auto& frame = bufferCache[frameIdx];
-    if (!frame->acquireFrameLock(false)) {
-        return false;
-    }
-    if (-1u == frame->pinCount.load()) {
-        readNewPageIntoFrame(*frame, fileHandle, pageIdx, doNotReadFromFile);
-        frame->releaseFrameLock();
-        return true;
-    }
-    frame->releaseFrameLock();
-    return false;
-}
-
-bool BufferPool::tryEvict(page_idx_t frameIdx, BufferManagedFileHandle& fileHandle,
-    page_idx_t pageIdx, bool doNotReadFromFile) {
-    auto& frame = bufferCache[frameIdx];
-    if (frame->recentlyAccessed) {
-        frame->recentlyAccessed = false;
-        bmMetrics.numRecentlyAccessedWalkover += 1;
-        return false;
-    }
-    if (!frame->acquireFrameLock(false)) {
-        return false;
-    }
-    auto pageIdxInFrame = frame->pageIdx.load();
-    auto fileHandleInFrame =
-        reinterpret_cast<BufferManagedFileHandle*>(frame->fileHandlePtr.load());
-    if (!fileHandleInFrame->acquirePageLock(pageIdxInFrame, false)) {
-        bmMetrics.numEvictFails += 1;
-        frame->releaseFrameLock();
-        return false;
-    }
-    // We check pinCount again after acquiring the lock on page currently residing in the frame. At
-    // this point in time, no other thread can change the pinCount.
-    if (0u != frame->pinCount.load()) {
-        bmMetrics.numEvictFails += 1;
-        fileHandleInFrame->releasePageLock(pageIdxInFrame);
-        frame->releaseFrameLock();
-        return false;
-    }
-    // Else, flush out the frame into the file page if the frame is dirty. Then remove the page from
-    // the frame and release the lock on it.
-    flushIfDirty(frame);
-    clearFrameAndUnswizzleWithoutLock(frame, *fileHandleInFrame, pageIdxInFrame);
-    fileHandleInFrame->releasePageLock(pageIdxInFrame);
-    // Update the frame information and release the lock on frame.
-    readNewPageIntoFrame(*frame, fileHandle, pageIdx, doNotReadFromFile);
-    frame->releaseFrameLock();
-    bmMetrics.numEvicts += 1;
-    return true;
-}
-
-void BufferPool::flushIfDirty(const std::unique_ptr<Frame>& frame) {
-    auto fileHandleInFrame = reinterpret_cast<FileHandle*>(frame->fileHandlePtr.load());
-    auto pageIdxInFrame = frame->pageIdx.load();
-    if (frame->isDirty) {
-        bmMetrics.numDirtyPageWriteIO += 1;
-        fileHandleInFrame->writePage(frame->buffer, pageIdxInFrame);
-    }
-}
-
-void BufferPool::clearFrameAndUnswizzleWithoutLock(const std::unique_ptr<Frame>& frame,
-    BufferManagedFileHandle& fileHandleInFrame, page_idx_t pageIdxInFrame) {
-    frame->resetFrameWithoutLock();
-    fileHandleInFrame.unswizzle(pageIdxInFrame);
-}
-
-void BufferPool::readNewPageIntoFrame(
-    Frame& frame, BufferManagedFileHandle& fileHandle, page_idx_t pageIdx, bool doNotReadFromFile) {
-    frame.pinCount.store(1);
-    frame.recentlyAccessed = true;
-    frame.isDirty = false;
-    frame.pageIdx.store(pageIdx);
-    frame.fileHandlePtr.store(reinterpret_cast<uint64_t>(&fileHandle));
-    if (!doNotReadFromFile) {
-        fileHandle.readPage(frame.buffer, pageIdx);
-    }
-}
-
-void BufferPool::moveClockHand(uint64_t newClockHand) {
-    do {
-        auto currClockHand = clockHand.load();
-        if (currClockHand > newClockHand) {
-            return;
-        }
-        if (clockHand.compare_exchange_strong(
-                currClockHand, newClockHand, std::memory_order_seq_cst)) {
-            return;
-        }
-    } while (true);
-}
-
-void BufferPool::unpin(BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    fileHandle.acquirePageLock(pageIdx, true /*block*/);
-    unpinWithoutAcquiringPageLock(fileHandle, pageIdx);
-    fileHandle.releasePageLock(pageIdx);
-}
-
-void BufferPool::unpinWithoutAcquiringPageLock(
-    BufferManagedFileHandle& fileHandle, page_idx_t pageIdx) {
-    auto& frame = bufferCache[fileHandle.getFrameIdx(pageIdx)];
-    // `count` is the value of `pinCount` before sub.
-    auto count = frame->pinCount.fetch_sub(1);
-    assert(count >= 1);
-}
-
-} // namespace storage
-} // namespace kuzu
diff --git a/src/storage/buffer_manager/memory_manager.cpp b/src/storage/buffer_manager/memory_manager.cpp
index cc70629b1b..0edf34d725 100644
--- a/src/storage/buffer_manager/memory_manager.cpp
+++ b/src/storage/buffer_manager/memory_manager.cpp
@@ -2,33 +2,49 @@
 
 #include <cstring>
 
+#include "common/utils.h"
+
 using namespace kuzu::common;
 
 namespace kuzu {
 namespace storage {
 
-std::unique_ptr<MemoryBlock> MemoryManager::allocateBlock(bool initializeToZero) {
-    std::lock_guard<std::mutex> lock(memMgrLock);
+MemoryBuffer::MemoryBuffer(MemoryAllocator* allocator, page_idx_t pageIdx, uint8_t* buffer)
+    : buffer{buffer}, pageIdx{pageIdx}, allocator{allocator} {}
+
+MemoryBuffer::~MemoryBuffer() {
+    if (buffer != nullptr) {
+        allocator->freeBlock(pageIdx);
+    }
+}
+
+MemoryAllocator::MemoryAllocator(BufferManager* bm) : bm{bm} {
+    pageSize = BufferPoolConstants::PAGE_256KB_SIZE;
+    fh = bm->getBMFileHandle("mm-256KB", FileHandle::O_IN_MEM_TEMP_FILE,
+        BMFileHandle::FileVersionedType::NON_VERSIONED_FILE, PAGE_256KB);
+}
+
+MemoryAllocator::~MemoryAllocator() = default;
+
+std::unique_ptr<MemoryBuffer> MemoryAllocator::allocateBuffer(bool initializeToZero) {
+    std::unique_lock<std::mutex> lock(allocatorLock);
     page_idx_t pageIdx;
-    uint8_t* data;
     if (freePages.empty()) {
         pageIdx = fh->addNewPage();
     } else {
         pageIdx = freePages.top();
         freePages.pop();
     }
-    data = bm->pinWithoutReadingFromFile(*fh, pageIdx);
-
-    auto blockHandle = std::make_unique<MemoryBlock>(pageIdx, data);
+    auto buffer = bm->pin(*fh, pageIdx, BufferManager::PageReadPolicy::DONT_READ_PAGE);
+    auto memoryBuffer = std::make_unique<MemoryBuffer>(this, pageIdx, buffer);
     if (initializeToZero) {
-        memset(blockHandle->data, 0, BufferPoolConstants::LARGE_PAGE_SIZE);
+        memset(memoryBuffer->buffer, 0, pageSize);
     }
-
-    return blockHandle;
+    return memoryBuffer;
 }
 
-void MemoryManager::freeBlock(page_idx_t pageIdx) {
-    std::lock_guard<std::mutex> lock(memMgrLock);
+void MemoryAllocator::freeBlock(page_idx_t pageIdx) {
+    std::unique_lock<std::mutex> lock(allocatorLock);
     bm->unpin(*fh, pageIdx);
     freePages.push(pageIdx);
 }
diff --git a/src/storage/buffer_manager/vm_region.cpp b/src/storage/buffer_manager/vm_region.cpp
new file mode 100644
index 0000000000..7c3fae1817
--- /dev/null
+++ b/src/storage/buffer_manager/vm_region.cpp
@@ -0,0 +1,52 @@
+#include "storage/buffer_manager/vm_region.h"
+
+#include <sys/mman.h>
+
+#include "common/exception.h"
+
+using namespace kuzu::common;
+
+namespace kuzu {
+namespace storage {
+
+VMRegion::VMRegion(PageSizeClass pageSizeClass, uint64_t maxRegionSize) : numFrameGroups{0} {
+    if (maxRegionSize > (std::size_t)-1) {
+        throw BufferManagerException("maxRegionSize is beyond the max available mmap region size.");
+    }
+    frameSize = pageSizeClass == PageSizeClass::PAGE_4KB ? BufferPoolConstants::PAGE_4KB_SIZE :
+                                                           BufferPoolConstants::PAGE_256KB_SIZE;
+    auto numBytesForFrameGroup = frameSize * StorageConstants::PAGE_GROUP_SIZE;
+    maxNumFrameGroups = (maxRegionSize + numBytesForFrameGroup - 1) / numBytesForFrameGroup;
+    // Create a private anonymous mapping. The mapping is not shared with other processes and not
+    // backed by any file, and its content are initialized to zero.
+    region = (uint8_t*)mmap(NULL, getMaxRegionSize(), PROT_READ | PROT_WRITE,
+        MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1 /* fd */, 0 /* offset */);
+    if (region == MAP_FAILED) {
+        throw BufferManagerException(
+            "Mmap for size " + std::to_string(getMaxRegionSize()) + " failed.");
+    }
+}
+
+VMRegion::~VMRegion() {
+    munmap(region, getMaxRegionSize());
+}
+
+void VMRegion::releaseFrame(common::frame_idx_t frameIdx) {
+    int error = madvise(getFrame(frameIdx), frameSize, MADV_DONTNEED);
+    if (error != 0) {
+        throw BufferManagerException(
+            "Releasing physical memory associated with a frame failed with error code " +
+            std::to_string(error) + ": " + std::string(std::strerror(errno)));
+    }
+}
+
+frame_group_idx_t VMRegion::addNewFrameGroup() {
+    std::unique_lock xLck{mtx};
+    if (numFrameGroups >= maxNumFrameGroups) {
+        throw BufferManagerException("No more frame groups can be added to the allocator.");
+    }
+    return numFrameGroups++;
+}
+
+} // namespace storage
+} // namespace kuzu
diff --git a/src/storage/copy_arrow/copy_node_arrow.cpp b/src/storage/copy_arrow/copy_node_arrow.cpp
index ae05aa5b2b..5103f29f82 100644
--- a/src/storage/copy_arrow/copy_node_arrow.cpp
+++ b/src/storage/copy_arrow/copy_node_arrow.cpp
@@ -188,7 +188,7 @@ void CopyNodeArrow::putPropsOfLineIntoColumns(
                 column->setElement(nodeOffset, reinterpret_cast<uint8_t*>(&val));
             } break;
             case STRING: {
-                stringToken = stringToken.substr(0, BufferPoolConstants::DEFAULT_PAGE_SIZE);
+                stringToken = stringToken.substr(0, BufferPoolConstants::PAGE_4KB_SIZE);
                 data = stringToken.c_str();
                 auto val =
                     column->getInMemOverflowFile()->copyString(data, overflowCursors[columnIdx]);
diff --git a/src/storage/copy_arrow/copy_rel_arrow.cpp b/src/storage/copy_arrow/copy_rel_arrow.cpp
index d9b60a62d9..5c35b0efed 100644
--- a/src/storage/copy_arrow/copy_rel_arrow.cpp
+++ b/src/storage/copy_arrow/copy_rel_arrow.cpp
@@ -393,7 +393,7 @@ void CopyRelArrow::putPropsOfLineIntoColumns(CopyRelArrow* copier,
             continue;
         }
         auto stringToken =
-            currentToken->get()->ToString().substr(0, BufferPoolConstants::DEFAULT_PAGE_SIZE);
+            currentToken->get()->ToString().substr(0, BufferPoolConstants::PAGE_4KB_SIZE);
         const char* data = stringToken.c_str();
         switch (properties[propertyIdx].dataType.typeID) {
         case INT64: {
@@ -489,7 +489,7 @@ void CopyRelArrow::putPropsOfLineIntoLists(CopyRelArrow* copier,
             continue;
         }
         auto stringToken =
-            currentToken->get()->ToString().substr(0, BufferPoolConstants::DEFAULT_PAGE_SIZE);
+            currentToken->get()->ToString().substr(0, BufferPoolConstants::PAGE_4KB_SIZE);
         const char* data = stringToken.c_str();
         switch (properties[propertyIdx].dataType.typeID) {
         case INT64: {
diff --git a/src/storage/copy_arrow/copy_structures_arrow.cpp b/src/storage/copy_arrow/copy_structures_arrow.cpp
index 89006ea155..4276ba1bc9 100644
--- a/src/storage/copy_arrow/copy_structures_arrow.cpp
+++ b/src/storage/copy_arrow/copy_structures_arrow.cpp
@@ -241,10 +241,10 @@ std::unique_ptr<Value> CopyStructuresArrow::getArrowVarList(std::string& l, int6
         values.push_back(std::move(value));
     }
     auto numBytesOfOverflow = values.size() * Types::getDataTypeSize(childDataType.typeID);
-    if (numBytesOfOverflow >= BufferPoolConstants::DEFAULT_PAGE_SIZE) {
+    if (numBytesOfOverflow >= BufferPoolConstants::PAGE_4KB_SIZE) {
         throw ReaderException(StringUtils::string_format(
             "Maximum num bytes of a LIST is {}. Input list's num bytes is {}.",
-            BufferPoolConstants::DEFAULT_PAGE_SIZE, numBytesOfOverflow));
+            BufferPoolConstants::PAGE_4KB_SIZE, numBytesOfOverflow));
     }
     return make_unique<Value>(
         DataType(VAR_LIST, std::make_unique<DataType>(childDataType)), std::move(values));
diff --git a/src/storage/index/hash_index.cpp b/src/storage/index/hash_index.cpp
index e266586e58..c344de6279 100644
--- a/src/storage/index/hash_index.cpp
+++ b/src/storage/index/hash_index.cpp
@@ -125,9 +125,8 @@ HashIndex<T>::HashIndex(const StorageStructureIDAndFName& storageStructureIDAndF
     const DataType& keyDataType, BufferManager& bufferManager, WAL* wal)
     : BaseHashIndex{keyDataType},
       storageStructureIDAndFName{storageStructureIDAndFName}, bm{bufferManager}, wal{wal} {
-    fileHandle = bufferManager.getBufferManagedFileHandle(storageStructureIDAndFName.fName,
-        FileHandle::O_PERSISTENT_FILE_NO_CREATE,
-        BufferManagedFileHandle::FileVersionedType::VERSIONED_FILE);
+    fileHandle = bufferManager.getBMFileHandle(storageStructureIDAndFName.fName,
+        FileHandle::O_PERSISTENT_FILE_NO_CREATE, BMFileHandle::FileVersionedType::VERSIONED_FILE);
     headerArray = std::make_unique<BaseDiskArray<HashIndexHeader>>(*fileHandle,
         storageStructureIDAndFName.storageStructureID, INDEX_HEADER_ARRAY_HEADER_PAGE_IDX, &bm,
         wal);
diff --git a/src/storage/storage_manager.cpp b/src/storage/storage_manager.cpp
index d54b5f2ba6..4b8ab5fdae 100644
--- a/src/storage/storage_manager.cpp
+++ b/src/storage/storage_manager.cpp
@@ -2,7 +2,6 @@
 
 #include <fstream>
 
-#include "spdlog/spdlog.h"
 #include "storage/buffer_manager/buffer_manager.h"
 #include "storage/wal_replayer.h"
 
diff --git a/src/storage/storage_structure/column.cpp b/src/storage/storage_structure/column.cpp
index e6b94fc24c..f3276f12b6 100644
--- a/src/storage/storage_structure/column.cpp
+++ b/src/storage/storage_structure/column.cpp
@@ -70,7 +70,7 @@ Value Column::readValue(offset_t offset) {
 bool Column::isNull(offset_t nodeOffset, Transaction* transaction) {
     auto cursor = PageUtils::getPageElementCursorForPos(nodeOffset, numElementsPerPage);
     auto originalPageIdx = cursor.pageIdx;
-    fileHandle->acquirePageLock(originalPageIdx, true /* block */);
+    fileHandle->acquirePageLock(originalPageIdx, LockMode::SPIN);
     auto checkWALVersionOfPage =
         !transaction->isReadOnly() && fileHandle->hasWALPageVersionNoPageLock(originalPageIdx);
     uint8_t* frame;
@@ -78,10 +78,10 @@ bool Column::isNull(offset_t nodeOffset, Transaction* transaction) {
     if (checkWALVersionOfPage) {
         pageIdxInWAL = fileHandle->getWALPageVersionNoPageLock(originalPageIdx);
         frame = bufferManager.pinWithoutAcquiringPageLock(
-            *wal->fileHandle, pageIdxInWAL, false /* read from file */);
+            *wal->fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::READ_PAGE);
     } else {
         frame = bufferManager.pinWithoutAcquiringPageLock(
-            *fileHandle, originalPageIdx, false /* read from file */);
+            *fileHandle, originalPageIdx, BufferManager::PageReadPolicy::READ_PAGE);
     }
     auto nullEntries = (uint64_t*)(frame + (elementSize * numElementsPerPage));
     auto isNull = NullMask::isNull(nullEntries, cursor.elemPosInPage);
diff --git a/src/storage/storage_structure/disk_array.cpp b/src/storage/storage_structure/disk_array.cpp
index 00cd1c9f63..46933aa9e0 100644
--- a/src/storage/storage_structure/disk_array.cpp
+++ b/src/storage/storage_structure/disk_array.cpp
@@ -12,7 +12,7 @@ namespace storage {
 
 DiskArrayHeader::DiskArrayHeader(uint64_t elementSize)
     : alignedElementSizeLog2{(uint64_t)ceil(log2(elementSize))},
-      numElementsPerPageLog2{BufferPoolConstants::DEFAULT_PAGE_SIZE_LOG_2 - alignedElementSizeLog2},
+      numElementsPerPageLog2{BufferPoolConstants::PAGE_4KB_SIZE_LOG2 - alignedElementSizeLog2},
       elementPageOffsetMask{BitmaskUtils::all1sMaskForLeastSignificantBits(numElementsPerPageLog2)},
       firstPIPPageIdx{StorageStructureUtils::NULL_PAGE_IDX}, numElements{0}, numAPs{0} {}
 
@@ -79,19 +79,18 @@ U BaseDiskArray<U>::get(uint64_t idx, TransactionType trxType) {
     checkOutOfBoundAccess(trxType, idx);
     auto apCursor = getAPIdxAndOffsetInAP(idx);
     page_idx_t apPageIdx = getAPPageIdxNoLock(apCursor.pageIdx, trxType);
-    auto& bufferManagedFileHandle = (BufferManagedFileHandle&)fileHandle;
+    auto& bmFileHandle = (BMFileHandle&)fileHandle;
     if (trxType == TransactionType::READ_ONLY || !hasTransactionalUpdates ||
-        !bufferManagedFileHandle.hasWALPageVersionNoPageLock(apPageIdx)) {
-        auto frame = bufferManager->pin(bufferManagedFileHandle, apPageIdx);
+        !bmFileHandle.hasWALPageVersionNoPageLock(apPageIdx)) {
+        auto frame = bufferManager->pin(bmFileHandle, apPageIdx);
         auto retVal = *(U*)(frame + apCursor.offsetInPage);
-        bufferManager->unpin(bufferManagedFileHandle, apPageIdx);
+        bufferManager->unpin(bmFileHandle, apPageIdx);
         return retVal;
     } else {
         U retVal;
-        StorageStructureUtils::readWALVersionOfPage(bufferManagedFileHandle, apPageIdx,
-            *bufferManager, *wal, [&retVal, &apCursor](const uint8_t* frame) -> void {
-                retVal = *(U*)(frame + apCursor.offsetInPage);
-            });
+        StorageStructureUtils::readWALVersionOfPage(bmFileHandle, apPageIdx, *bufferManager, *wal,
+            [&retVal, &apCursor](
+                const uint8_t* frame) -> void { retVal = *(U*)(frame + apCursor.offsetInPage); });
         return retVal;
     }
 }
@@ -112,8 +111,8 @@ void BaseDiskArray<U>::update(uint64_t idx, U val) {
     // getAPPageIdxNoLock logic needs to change to give the same guarantee (e.g., an apIdx = 0, may
     // no longer to be guaranteed to be in pips[0].)
     page_idx_t apPageIdx = getAPPageIdxNoLock(apCursor.pageIdx, TransactionType::WRITE);
-    StorageStructureUtils::updatePage((BufferManagedFileHandle&)fileHandle, storageStructureID,
-        apPageIdx, false /* not inserting a new page */, *bufferManager, *wal,
+    StorageStructureUtils::updatePage((BMFileHandle&)fileHandle, storageStructureID, apPageIdx,
+        false /* not inserting a new page */, *bufferManager, *wal,
         [&apCursor, &val](uint8_t* frame) -> void { *(U*)(frame + apCursor.offsetInPage) = val; });
 }
 
@@ -122,7 +121,7 @@ uint64_t BaseDiskArray<U>::pushBack(U val) {
     std::unique_lock xLck{diskArraySharedMtx};
     hasTransactionalUpdates = true;
     uint64_t elementIdx;
-    StorageStructureUtils::updatePage((BufferManagedFileHandle&)(fileHandle), storageStructureID,
+    StorageStructureUtils::updatePage((BMFileHandle&)(fileHandle), storageStructureID,
         headerPageIdx, false /* not inserting a new page */, *bufferManager, *wal,
         [this, &val, &elementIdx](uint8_t* frame) -> void {
             auto updatedDiskArrayHeader = ((DiskArrayHeader*)frame);
@@ -131,8 +130,8 @@ uint64_t BaseDiskArray<U>::pushBack(U val) {
             auto [apPageIdx, isNewlyAdded] = getAPPageIdxAndAddAPToPIPIfNecessaryForWriteTrxNoLock(
                 (DiskArrayHeader*)frame, apCursor.pageIdx);
             // Now do the push back.
-            StorageStructureUtils::updatePage((BufferManagedFileHandle&)(fileHandle),
-                storageStructureID, apPageIdx, isNewlyAdded, *bufferManager, *wal,
+            StorageStructureUtils::updatePage((BMFileHandle&)(fileHandle), storageStructureID,
+                apPageIdx, isNewlyAdded, *bufferManager, *wal,
                 [&apCursor, &val](
                     uint8_t* frame) -> void { *(U*)(frame + apCursor.offsetInPage) = val; });
             updatedDiskArrayHeader->numElements++;
@@ -161,7 +160,7 @@ void BaseDiskArray<U>::setNextPIPPageIDxOfPIPNoLock(DiskArrayHeader* updatedDisk
          * pipPageIdxOfPreviousPIP. 2) if pipPageIdxOfPreviousPIP is an existing PIP, in which
          * case again this function is not creating pipPageIdxOfPreviousPIP.
          */
-        StorageStructureUtils::updatePage((BufferManagedFileHandle&)fileHandle, storageStructureID,
+        StorageStructureUtils::updatePage((BMFileHandle&)fileHandle, storageStructureID,
             pipPageIdxOfPreviousPIP, false /* not inserting a new page */, *bufferManager, *wal,
             [&nextPIPPageIdx](
                 const uint8_t* frame) -> void { ((PIP*)frame)->nextPipPageIdx = nextPIPPageIdx; });
@@ -186,10 +185,10 @@ page_idx_t BaseDiskArray<U>::getAPPageIdxNoLock(page_idx_t apIdx, TransactionTyp
     } else {
         page_idx_t retVal;
         page_idx_t pageIdxOfUpdatedPip = getUpdatedPageIdxOfPipNoLock(pipIdx);
-        StorageStructureUtils::readWALVersionOfPage((BufferManagedFileHandle&)fileHandle,
-            pageIdxOfUpdatedPip, *bufferManager, *wal,
-            [&retVal, &offsetInPIP](
-                const uint8_t* frame) -> void { retVal = ((PIP*)frame)->pageIdxs[offsetInPIP]; });
+        StorageStructureUtils::readWALVersionOfPage((BMFileHandle&)fileHandle, pageIdxOfUpdatedPip,
+            *bufferManager, *wal, [&retVal, &offsetInPIP](const uint8_t* frame) -> void {
+                retVal = ((PIP*)frame)->pageIdxs[offsetInPIP];
+            });
         return retVal;
     }
 }
@@ -204,9 +203,8 @@ page_idx_t BaseDiskArray<U>::getUpdatedPageIdxOfPipNoLock(uint64_t pipIdx) {
 
 template<typename U>
 void BaseDiskArray<U>::clearWALPageVersionAndRemovePageFromFrameIfNecessary(page_idx_t pageIdx) {
-    ((BufferManagedFileHandle&)this->fileHandle).clearWALPageVersionIfNecessary(pageIdx);
-    bufferManager->removePageFromFrameIfNecessary(
-        (BufferManagedFileHandle&)this->fileHandle, pageIdx);
+    ((BMFileHandle&)this->fileHandle).clearWALPageVersionIfNecessary(pageIdx);
+    bufferManager->removePageFromFrameIfNecessary((BMFileHandle&)this->fileHandle, pageIdx);
 }
 
 template<typename U>
@@ -236,7 +234,7 @@ void BaseDiskArray<U>::checkpointOrRollbackInMemoryIfNecessaryNoLock(bool isChec
         clearWALPageVersionAndRemovePageFromFrameIfNecessary(pipPageIdxOfNewPIP);
         if (!isCheckpoint) {
             // These are newly inserted pages, so we can truncate the file handle.
-            ((BufferManagedFileHandle&)this->fileHandle)
+            ((BMFileHandle&)this->fileHandle)
                 .removePageIdxAndTruncateIfNecessary(pipPageIdxOfNewPIP);
         }
     }
@@ -259,14 +257,14 @@ bool BaseDiskArray<U>::hasPIPUpdatesNoLock(uint64_t pipIdx) {
 template<typename U>
 uint64_t BaseDiskArray<U>::readUInt64HeaderFieldNoLock(
     TransactionType trxType, std::function<uint64_t(DiskArrayHeader*)> readOp) {
-    auto bufferManagedFileHandle = reinterpret_cast<BufferManagedFileHandle*>(&fileHandle);
+    auto bmFileHandle = reinterpret_cast<BMFileHandle*>(&fileHandle);
     if ((trxType == TransactionType::READ_ONLY) ||
-        !bufferManagedFileHandle->hasWALPageVersionNoPageLock(headerPageIdx)) {
+        !bmFileHandle->hasWALPageVersionNoPageLock(headerPageIdx)) {
         return readOp(&this->header);
     } else {
         uint64_t retVal;
-        StorageStructureUtils::readWALVersionOfPage((BufferManagedFileHandle&)fileHandle,
-            headerPageIdx, *bufferManager, *wal, [&retVal, &readOp](uint8_t* frame) -> void {
+        StorageStructureUtils::readWALVersionOfPage((BMFileHandle&)fileHandle, headerPageIdx,
+            *bufferManager, *wal, [&retVal, &readOp](uint8_t* frame) -> void {
                 retVal = readOp((DiskArrayHeader*)frame);
             });
         return retVal;
@@ -314,8 +312,8 @@ std::pair<page_idx_t, bool> BaseDiskArray<U>::getAPPageIdxAndAddAPToPIPIfNecessa
             setNextPIPPageIDxOfPIPNoLock(updatedDiskArrayHeader, pipIdxOfPreviousPIP, pipPageIdx);
         }
         // Finally we update the PIP page (possibly newly created) and add newAPPageIdx into it.
-        StorageStructureUtils::updatePage((BufferManagedFileHandle&)fileHandle, storageStructureID,
-            pipPageIdx, isInsertingANewPIPPage, *bufferManager, *wal,
+        StorageStructureUtils::updatePage((BMFileHandle&)fileHandle, storageStructureID, pipPageIdx,
+            isInsertingANewPIPPage, *bufferManager, *wal,
             [&isInsertingANewPIPPage, &newAPPageIdx, &offsetOfNewAPInPIP](
                 const uint8_t* frame) -> void {
                 if (isInsertingANewPIPPage) {
@@ -376,7 +374,7 @@ void InMemDiskArray<T>::checkpointOrRollbackInMemoryIfNecessaryNoLock(bool isChe
     uint64_t numOldAPs = this->getNumAPsNoLock(TransactionType::READ_ONLY);
     for (uint64_t apIdx = 0; apIdx < numOldAPs; ++apIdx) {
         uint64_t apPageIdx = this->getAPPageIdxNoLock(apIdx, TransactionType::READ_ONLY);
-        if (reinterpret_cast<BufferManagedFileHandle&>(this->fileHandle)
+        if (reinterpret_cast<BMFileHandle&>(this->fileHandle)
                 .hasWALPageVersionNoPageLock(apPageIdx)) {
             // Note we can directly read the new image from disk because the WALReplayer checkpoints
             // the disk image of the page before calling
@@ -423,7 +421,7 @@ void InMemDiskArray<T>::checkpointOrRollbackInMemoryIfNecessaryNoLock(bool isChe
         BaseDiskArray<T>::checkpointOrRollbackInMemoryIfNecessaryNoLock(true /* is checkpoint */);
     } else {
         BaseDiskArray<T>::checkpointOrRollbackInMemoryIfNecessaryNoLock(false /* is rollback */);
-        ((BufferManagedFileHandle&)this->fileHandle)
+        ((BMFileHandle&)this->fileHandle)
             .removePageIdxAndTruncateIfNecessary(minNewAPPageIdxToTruncateTo);
     }
 }
diff --git a/src/storage/storage_structure/disk_overflow_file.cpp b/src/storage/storage_structure/disk_overflow_file.cpp
index 28a758c00b..2165a8adcb 100644
--- a/src/storage/storage_structure/disk_overflow_file.cpp
+++ b/src/storage/storage_structure/disk_overflow_file.cpp
@@ -11,16 +11,16 @@ using namespace kuzu::common;
 namespace kuzu {
 namespace storage {
 
-void DiskOverflowFile::pinOverflowPageCache(BufferManagedFileHandle* bufferManagedFileHandleToPin,
+void DiskOverflowFile::pinOverflowPageCache(BMFileHandle* bmFileHandleToPin,
     page_idx_t pageIdxToPin, OverflowPageCache& overflowPageCache) {
-    overflowPageCache.frame = bufferManager.pin(*bufferManagedFileHandleToPin, pageIdxToPin);
-    overflowPageCache.bufferManagedFileHandle = bufferManagedFileHandleToPin;
+    overflowPageCache.frame = bufferManager.pin(*bmFileHandleToPin, pageIdxToPin);
+    overflowPageCache.bmFileHandle = bmFileHandleToPin;
     overflowPageCache.pageIdx = pageIdxToPin;
 }
 
 void DiskOverflowFile::unpinOverflowPageCache(OverflowPageCache& overflowPageCache) {
     if (overflowPageCache.pageIdx != UINT32_MAX) {
-        bufferManager.unpin(*overflowPageCache.bufferManagedFileHandle, overflowPageCache.pageIdx);
+        bufferManager.unpin(*overflowPageCache.bmFileHandle, overflowPageCache.pageIdx);
     }
 }
 
@@ -162,12 +162,12 @@ std::vector<std::unique_ptr<Value>> DiskOverflowFile::readList(
 
 void DiskOverflowFile::addNewPageIfNecessaryWithoutLock(uint32_t numBytesToAppend) {
     PageElementCursor byteCursor = PageUtils::getPageElementCursorForPos(
-        nextBytePosToWriteTo, BufferPoolConstants::DEFAULT_PAGE_SIZE);
-    if ((byteCursor.elemPosInPage == 0) || ((byteCursor.elemPosInPage + numBytesToAppend - 1) >
-                                               BufferPoolConstants::DEFAULT_PAGE_SIZE)) {
+        nextBytePosToWriteTo, BufferPoolConstants::PAGE_4KB_SIZE);
+    if ((byteCursor.elemPosInPage == 0) ||
+        ((byteCursor.elemPosInPage + numBytesToAppend - 1) > BufferPoolConstants::PAGE_4KB_SIZE)) {
         // Note that if byteCursor.pos is already 0 the next operation keeps the nextBytePos
         // where it is.
-        nextBytePosToWriteTo = (fileHandle->getNumPages() * BufferPoolConstants::DEFAULT_PAGE_SIZE);
+        nextBytePosToWriteTo = (fileHandle->getNumPages() * BufferPoolConstants::PAGE_4KB_SIZE);
         addNewPageToFileHandle();
     }
 }
@@ -176,13 +176,13 @@ void DiskOverflowFile::setStringOverflowWithoutLock(
     const char* srcRawString, uint64_t len, ku_string_t& diskDstString) {
     if (len <= ku_string_t::SHORT_STR_LENGTH) {
         return;
-    } else if (len > BufferPoolConstants::DEFAULT_PAGE_SIZE) {
+    } else if (len > BufferPoolConstants::PAGE_4KB_SIZE) {
         throw RuntimeException(StringUtils::getLongStringErrorMessage(
-            srcRawString, BufferPoolConstants::DEFAULT_PAGE_SIZE));
+            srcRawString, BufferPoolConstants::PAGE_4KB_SIZE));
     }
     addNewPageIfNecessaryWithoutLock(len);
     auto updatedPageInfoAndWALPageFrame = createWALVersionOfPageIfNecessaryForElement(
-        nextBytePosToWriteTo, BufferPoolConstants::DEFAULT_PAGE_SIZE);
+        nextBytePosToWriteTo, BufferPoolConstants::PAGE_4KB_SIZE);
     memcpy(updatedPageInfoAndWALPageFrame.frame + updatedPageInfoAndWALPageFrame.posInPage,
         srcRawString, len);
     TypeUtils::encodeOverflowPtr(diskDstString.overflowPtr,
@@ -222,14 +222,14 @@ void DiskOverflowFile::writeStringOverflowAndUpdateOverflowPtr(
 void DiskOverflowFile::setListRecursiveIfNestedWithoutLock(
     const ku_list_t& inMemSrcList, ku_list_t& diskDstList, const DataType& dataType) {
     auto elementSize = Types::getDataTypeSize(*dataType.childType);
-    if (inMemSrcList.size * elementSize > BufferPoolConstants::DEFAULT_PAGE_SIZE) {
+    if (inMemSrcList.size * elementSize > BufferPoolConstants::PAGE_4KB_SIZE) {
         throw RuntimeException(StringUtils::string_format(
             "Maximum num bytes of a LIST is %d. Input list's num bytes is %d.",
-            BufferPoolConstants::DEFAULT_PAGE_SIZE, inMemSrcList.size * elementSize));
+            BufferPoolConstants::PAGE_4KB_SIZE, inMemSrcList.size * elementSize));
     }
     addNewPageIfNecessaryWithoutLock(inMemSrcList.size * elementSize);
     auto updatedPageInfoAndWALPageFrame = createWALVersionOfPageIfNecessaryForElement(
-        nextBytePosToWriteTo, BufferPoolConstants::DEFAULT_PAGE_SIZE);
+        nextBytePosToWriteTo, BufferPoolConstants::PAGE_4KB_SIZE);
     diskDstList.size = inMemSrcList.size;
     // Copy non-overflow part for elements in the list.
     memcpy(updatedPageInfoAndWALPageFrame.frame + updatedPageInfoAndWALPageFrame.posInPage,
diff --git a/src/storage/storage_structure/in_mem_file.cpp b/src/storage/storage_structure/in_mem_file.cpp
index b9517a51d2..16e3127f43 100644
--- a/src/storage/storage_structure/in_mem_file.cpp
+++ b/src/storage/storage_structure/in_mem_file.cpp
@@ -30,7 +30,7 @@ uint32_t InMemFile::addANewPage(bool setToZero) {
     pages.push_back(
         std::make_unique<InMemPage>(numElementsInAPage, numBytesForElement, hasNullMask));
     if (setToZero) {
-        memset(pages[newPageIdx]->data, 0, BufferPoolConstants::DEFAULT_PAGE_SIZE);
+        memset(pages[newPageIdx]->data, 0, BufferPoolConstants::PAGE_4KB_SIZE);
     }
     return newPageIdx;
 }
@@ -43,8 +43,7 @@ void InMemFile::flush() {
     for (auto pageIdx = 0u; pageIdx < pages.size(); pageIdx++) {
         pages[pageIdx]->encodeNullBits();
         FileUtils::writeToFile(fileInfo.get(), pages[pageIdx]->data,
-            BufferPoolConstants::DEFAULT_PAGE_SIZE,
-            pageIdx * BufferPoolConstants::DEFAULT_PAGE_SIZE);
+            BufferPoolConstants::PAGE_4KB_SIZE, pageIdx * BufferPoolConstants::PAGE_4KB_SIZE);
     }
 }
 
@@ -57,7 +56,7 @@ ku_string_t InMemOverflowFile::appendString(const char* rawString) {
     if (length > ku_string_t::SHORT_STR_LENGTH) {
         std::unique_lock lck{lock};
         // Allocate a new page if necessary.
-        if (nextOffsetInPageToAppend + length >= BufferPoolConstants::DEFAULT_PAGE_SIZE) {
+        if (nextOffsetInPageToAppend + length >= BufferPoolConstants::PAGE_4KB_SIZE) {
             addANewPage();
             nextOffsetInPageToAppend = 0;
             nextPageIdxToAppend++;
@@ -134,7 +133,7 @@ ku_list_t InMemOverflowFile::copyList(const Value& listValue, PageByteCursor& ov
     resultKUList.size = listValue.listVal.size();
     // Allocate a new page if necessary.
     if (overflowCursor.offsetInPage + (resultKUList.size * numBytesOfListElement) >=
-            BufferPoolConstants::DEFAULT_PAGE_SIZE ||
+            BufferPoolConstants::PAGE_4KB_SIZE ||
         overflowCursor.pageIdx == UINT32_MAX) {
         overflowCursor.offsetInPage = 0;
         overflowCursor.pageIdx = addANewOverflowPage();
@@ -168,7 +167,7 @@ ku_list_t InMemOverflowFile::copyList(const Value& listValue, PageByteCursor& ov
 void InMemOverflowFile::copyStringOverflow(
     PageByteCursor& overflowCursor, uint8_t* srcOverflow, ku_string_t* dstKUString) {
     // Allocate a new page if necessary.
-    if (overflowCursor.offsetInPage + dstKUString->len >= BufferPoolConstants::DEFAULT_PAGE_SIZE ||
+    if (overflowCursor.offsetInPage + dstKUString->len >= BufferPoolConstants::PAGE_4KB_SIZE ||
         overflowCursor.pageIdx == UINT32_MAX) {
         overflowCursor.offsetInPage = 0;
         overflowCursor.pageIdx = addANewOverflowPage();
@@ -187,7 +186,7 @@ void InMemOverflowFile::copyListOverflowFromFile(InMemOverflowFile* srcInMemOver
     auto numBytesOfListElement = Types::getDataTypeSize(*listChildDataType);
     // Allocate a new page if necessary.
     if (dstOverflowCursor.offsetInPage + (dstKUList->size * numBytesOfListElement) >=
-            BufferPoolConstants::DEFAULT_PAGE_SIZE ||
+            BufferPoolConstants::PAGE_4KB_SIZE ||
         dstOverflowCursor.pageIdx == UINT32_MAX) {
         dstOverflowCursor.offsetInPage = 0;
         dstOverflowCursor.pageIdx = addANewOverflowPage();
@@ -231,7 +230,7 @@ void InMemOverflowFile::copyListOverflowToFile(
     auto numBytesOfListElement = Types::getDataTypeSize(*childDataType);
     // Allocate a new page if necessary.
     if (pageByteCursor.offsetInPage + (srcKUList->size * numBytesOfListElement) >=
-            BufferPoolConstants::DEFAULT_PAGE_SIZE ||
+            BufferPoolConstants::PAGE_4KB_SIZE ||
         pageByteCursor.pageIdx == UINT32_MAX) {
         pageByteCursor.offsetInPage = 0;
         pageByteCursor.pageIdx = addANewOverflowPage();
diff --git a/src/storage/storage_structure/in_mem_page.cpp b/src/storage/storage_structure/in_mem_page.cpp
index 09b0dd03ce..958dba3473 100644
--- a/src/storage/storage_structure/in_mem_page.cpp
+++ b/src/storage/storage_structure/in_mem_page.cpp
@@ -10,7 +10,7 @@ namespace storage {
 
 InMemPage::InMemPage(uint32_t maxNumElements, uint16_t numBytesForElement, bool hasNullEntries)
     : nullEntriesInPage{nullptr}, maxNumElements{maxNumElements} {
-    buffer = std::make_unique<uint8_t[]>(BufferPoolConstants::DEFAULT_PAGE_SIZE);
+    buffer = std::make_unique<uint8_t[]>(BufferPoolConstants::PAGE_4KB_SIZE);
     data = buffer.get();
     if (hasNullEntries) {
         // In a page, null entries are stored right after the element data. Each null entry contains
diff --git a/src/storage/storage_structure/lists/list_headers.cpp b/src/storage/storage_structure/lists/list_headers.cpp
index b9bd973d1d..6386539343 100644
--- a/src/storage/storage_structure/lists/list_headers.cpp
+++ b/src/storage/storage_structure/lists/list_headers.cpp
@@ -33,9 +33,9 @@ ListHeaders::ListHeaders(const StorageStructureIDAndFName& storageStructureIDAnd
     storageStructureIDAndFName.storageStructureID.listFileID.listFileType = ListFileType::HEADERS;
     storageStructureIDAndFName.fName =
         StorageUtils::getListHeadersFName(storageStructureIDAndFNameForBaseList.fName);
-    fileHandle = bufferManager->getBufferManagedFileHandle(storageStructureIDAndFName.fName,
+    fileHandle = bufferManager->getBMFileHandle(storageStructureIDAndFName.fName,
         FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS,
-        BufferManagedFileHandle::FileVersionedType::VERSIONED_FILE);
+        BMFileHandle::FileVersionedType::VERSIONED_FILE);
     storageStructureIDAndFName.storageStructureID.listFileID.listFileType = ListFileType::HEADERS;
     storageStructureIDAndFName.fName = fileHandle->getFileInfo()->path;
     headersDiskArray = std::make_unique<InMemDiskArray<list_header_t>>(*fileHandle,
diff --git a/src/storage/storage_structure/lists/lists.cpp b/src/storage/storage_structure/lists/lists.cpp
index 74ad5d32bf..a7b94807fb 100644
--- a/src/storage/storage_structure/lists/lists.cpp
+++ b/src/storage/storage_structure/lists/lists.cpp
@@ -397,16 +397,16 @@ std::unordered_set<uint64_t> RelIDList::getDeletedRelOffsetsInListForNodeOffset(
         auto numElementsToReadInCurPage = std::min(numElementsInPersistentStore - numElementsRead,
             (uint64_t)(numElementsPerPage - pageCursor.elemPosInPage));
         auto physicalPageIdx = pageMapper(pageCursor.pageIdx);
-        auto frame = bufferManager.pin(*fileHandle, physicalPageIdx) +
-                     getElemByteOffset(pageCursor.elemPosInPage);
+        auto buffer = bufferManager.pin(*fileHandle, physicalPageIdx) +
+                      getElemByteOffset(pageCursor.elemPosInPage);
         for (auto i = 0u; i < numElementsToReadInCurPage; i++) {
-            auto relID = *(int64_t*)frame;
+            auto relID = *(int64_t*)buffer;
             if (listsUpdatesStore->isRelDeletedInPersistentStore(
                     storageStructureIDAndFName.storageStructureID.listFileID, nodeOffset, relID)) {
                 deletedRelOffsetsInList.emplace(numElementsRead);
             }
             numElementsRead++;
-            frame += elementSize;
+            buffer += elementSize;
         }
         bufferManager.unpin(*fileHandle, physicalPageIdx);
         pageCursor.nextPage();
@@ -424,16 +424,16 @@ list_offset_t RelIDList::getListOffset(offset_t nodeOffset, offset_t relOffset)
         auto numElementsToReadInCurPage = std::min(numElementsInPersistentStore - numElementsRead,
             (uint64_t)(numElementsPerPage - pageCursor.elemPosInPage));
         auto physicalPageIdx = pageMapper(pageCursor.pageIdx);
-        auto frame = bufferManager.pin(*fileHandle, physicalPageIdx) +
-                     getElemByteOffset(pageCursor.elemPosInPage);
+        auto buffer = bufferManager.pin(*fileHandle, physicalPageIdx) +
+                      getElemByteOffset(pageCursor.elemPosInPage);
         for (auto i = 0u; i < numElementsToReadInCurPage; i++) {
-            auto relIDInList = *(int64_t*)frame;
+            auto relIDInList = *(int64_t*)buffer;
             if (relIDInList == relOffset) {
                 bufferManager.unpin(*fileHandle, physicalPageIdx);
                 return numElementsRead;
             }
             numElementsRead++;
-            frame += elementSize;
+            buffer += elementSize;
         }
         bufferManager.unpin(*fileHandle, physicalPageIdx);
         pageCursor.nextPage();
diff --git a/src/storage/storage_structure/lists/lists_metadata.cpp b/src/storage/storage_structure/lists/lists_metadata.cpp
index 4250cea752..2b7a9399fa 100644
--- a/src/storage/storage_structure/lists/lists_metadata.cpp
+++ b/src/storage/storage_structure/lists/lists_metadata.cpp
@@ -17,9 +17,8 @@ ListsMetadata::ListsMetadata(
     storageStructureIDAndFName.storageStructureID.listFileID.listFileType = ListFileType::METADATA;
     storageStructureIDAndFName.fName =
         StorageUtils::getListMetadataFName(storageStructureIDAndFNameForBaseList.fName);
-    metadataVersionedFileHandle = bufferManager->getBufferManagedFileHandle(
-        storageStructureIDAndFName.fName, FileHandle::O_PERSISTENT_FILE_NO_CREATE,
-        BufferManagedFileHandle::FileVersionedType::VERSIONED_FILE);
+    metadataVersionedFileHandle = bufferManager->getBMFileHandle(storageStructureIDAndFName.fName,
+        FileHandle::O_PERSISTENT_FILE_NO_CREATE, BMFileHandle::FileVersionedType::VERSIONED_FILE);
     chunkToPageListHeadIdxMap = std::make_unique<InMemDiskArray<uint32_t>>(
         *metadataVersionedFileHandle, storageStructureIDAndFName.storageStructureID,
         CHUNK_PAGE_LIST_HEAD_IDX_MAP_HEADER_PAGE_IDX, bufferManager, wal);
diff --git a/src/storage/storage_structure/storage_structure.cpp b/src/storage/storage_structure/storage_structure.cpp
index 72a912309d..8ce4557a20 100644
--- a/src/storage/storage_structure/storage_structure.cpp
+++ b/src/storage/storage_structure/storage_structure.cpp
@@ -17,7 +17,7 @@ void StorageStructure::addNewPageToFileHandle() {
     auto pageIdxInOriginalFile = fileHandle->addNewPage();
     auto pageIdxInWAL = wal->logPageInsertRecord(storageStructureID, pageIdxInOriginalFile);
     bufferManager.pinWithoutAcquiringPageLock(
-        *wal->fileHandle, pageIdxInWAL, true /* do not read from file */);
+        *wal->fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::DONT_READ_PAGE);
     fileHandle->createPageVersionGroupIfNecessary(pageIdxInOriginalFile);
     fileHandle->setWALPageVersion(pageIdxInOriginalFile, pageIdxInWAL);
     bufferManager.setPinnedPageDirty(*wal->fileHandle, pageIdxInWAL);
diff --git a/src/storage/storage_structure/storage_structure_utils.cpp b/src/storage/storage_structure/storage_structure_utils.cpp
index 06b75ebe72..4249f2e6aa 100644
--- a/src/storage/storage_structure/storage_structure_utils.cpp
+++ b/src/storage/storage_structure/storage_structure_utils.cpp
@@ -5,9 +5,9 @@ using namespace kuzu::common;
 namespace kuzu {
 namespace storage {
 
-std::pair<BufferManagedFileHandle*, page_idx_t>
-StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin(BufferManagedFileHandle& fileHandle,
-    page_idx_t physicalPageIdx, WAL& wal, transaction::TransactionType trxType) {
+std::pair<BMFileHandle*, page_idx_t> StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin(
+    BMFileHandle& fileHandle, page_idx_t physicalPageIdx, WAL& wal,
+    transaction::TransactionType trxType) {
     if (trxType == transaction::TransactionType::READ_ONLY ||
         !fileHandle.hasWALPageVersionNoPageLock(physicalPageIdx)) {
         return std::make_pair(&fileHandle, physicalPageIdx);
@@ -17,7 +17,7 @@ StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin(BufferManagedFileHan
     }
 }
 
-void StorageStructureUtils::updatePage(BufferManagedFileHandle& fileHandle,
+void StorageStructureUtils::updatePage(BMFileHandle& fileHandle,
     StorageStructureID storageStructureID, page_idx_t originalPageIdx, bool isInsertingNewPage,
     BufferManager& bufferManager, WAL& wal, const std::function<void(uint8_t*)>& updateOp) {
     auto walPageIdxAndFrame = StorageStructureUtils::createWALVersionIfNecessaryAndPinPage(
@@ -26,37 +26,38 @@ void StorageStructureUtils::updatePage(BufferManagedFileHandle& fileHandle,
     unpinWALPageAndReleaseOriginalPageLock(walPageIdxAndFrame, fileHandle, bufferManager, wal);
 }
 
-void StorageStructureUtils::readWALVersionOfPage(BufferManagedFileHandle& fileHandle,
+void StorageStructureUtils::readWALVersionOfPage(BMFileHandle& fileHandle,
     page_idx_t originalPageIdx, BufferManager& bufferManager, WAL& wal,
     const std::function<void(uint8_t*)>& readOp) {
     page_idx_t pageIdxInWAL = fileHandle.getWALPageVersionNoPageLock(originalPageIdx);
     auto frame = bufferManager.pinWithoutAcquiringPageLock(
-        *wal.fileHandle, pageIdxInWAL, false /* read from file */);
+        *wal.fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::READ_PAGE);
     readOp(frame);
     unpinPageIdxInWALAndReleaseOriginalPageLock(
         pageIdxInWAL, originalPageIdx, fileHandle, bufferManager, wal);
 }
 
 WALPageIdxAndFrame StorageStructureUtils::createWALVersionIfNecessaryAndPinPage(
-    page_idx_t originalPageIdx, bool insertingNewPage, BufferManagedFileHandle& fileHandle,
+    page_idx_t originalPageIdx, bool insertingNewPage, BMFileHandle& fileHandle,
     StorageStructureID storageStructureID, BufferManager& bufferManager, WAL& wal) {
     fileHandle.createPageVersionGroupIfNecessary(originalPageIdx);
-    fileHandle.acquirePageLock(originalPageIdx, true /* block */);
+    fileHandle.acquirePageLock(originalPageIdx, LockMode::SPIN);
     page_idx_t pageIdxInWAL;
     uint8_t* frame;
     if (fileHandle.hasWALPageVersionNoPageLock(originalPageIdx)) {
         pageIdxInWAL = fileHandle.getWALPageVersionNoPageLock(originalPageIdx);
         frame = bufferManager.pinWithoutAcquiringPageLock(
-            *wal.fileHandle, pageIdxInWAL, false /* read from file */);
+            *wal.fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::READ_PAGE);
     } else {
         pageIdxInWAL = wal.logPageUpdateRecord(
             storageStructureID, originalPageIdx /* pageIdxInOriginalFile */);
         frame = bufferManager.pinWithoutAcquiringPageLock(
-            *wal.fileHandle, pageIdxInWAL, true /* do not read from file */);
-        uint8_t* originalFrame = bufferManager.pinWithoutAcquiringPageLock(
-            fileHandle, originalPageIdx, insertingNewPage);
+            *wal.fileHandle, pageIdxInWAL, BufferManager::PageReadPolicy::DONT_READ_PAGE);
+        auto originalFrame = bufferManager.pinWithoutAcquiringPageLock(fileHandle, originalPageIdx,
+            insertingNewPage ? BufferManager::PageReadPolicy::DONT_READ_PAGE :
+                               BufferManager::PageReadPolicy::READ_PAGE);
         // Note: This logic only works for db files with DEFAULT_PAGE_SIZEs.
-        memcpy(frame, originalFrame, BufferPoolConstants::DEFAULT_PAGE_SIZE);
+        memcpy(frame, originalFrame, BufferPoolConstants::PAGE_4KB_SIZE);
         bufferManager.unpinWithoutAcquiringPageLock(fileHandle, originalPageIdx);
         fileHandle.setWALPageVersionNoLock(
             originalPageIdx /* pageIdxInOriginalFile */, pageIdxInWAL);
@@ -66,16 +67,15 @@ WALPageIdxAndFrame StorageStructureUtils::createWALVersionIfNecessaryAndPinPage(
 }
 
 void StorageStructureUtils::unpinWALPageAndReleaseOriginalPageLock(
-    WALPageIdxAndFrame& walPageIdxAndFrame, BufferManagedFileHandle& fileHandle,
-    BufferManager& bufferManager, WAL& wal) {
+    WALPageIdxAndFrame& walPageIdxAndFrame, BMFileHandle& fileHandle, BufferManager& bufferManager,
+    WAL& wal) {
     StorageStructureUtils::unpinPageIdxInWALAndReleaseOriginalPageLock(
         walPageIdxAndFrame.pageIdxInWAL, walPageIdxAndFrame.originalPageIdx, fileHandle,
         bufferManager, wal);
 }
 
 void StorageStructureUtils::unpinPageIdxInWALAndReleaseOriginalPageLock(page_idx_t pageIdxInWAL,
-    page_idx_t originalPageIdx, BufferManagedFileHandle& fileHandle, BufferManager& bufferManager,
-    WAL& wal) {
+    page_idx_t originalPageIdx, BMFileHandle& fileHandle, BufferManager& bufferManager, WAL& wal) {
     bufferManager.unpinWithoutAcquiringPageLock(*wal.fileHandle, pageIdxInWAL);
     fileHandle.releasePageLock(originalPageIdx);
 }
diff --git a/src/storage/storage_utils.cpp b/src/storage/storage_utils.cpp
index a04128e4ed..6ee590dc69 100644
--- a/src/storage/storage_utils.cpp
+++ b/src/storage/storage_utils.cpp
@@ -186,11 +186,11 @@ uint32_t PageUtils::getNumElementsInAPage(uint32_t elementSize, bool hasNull) {
     auto numBytesPerNullEntry = NullMask::NUM_BITS_PER_NULL_ENTRY >> 3;
     auto numNullEntries =
         hasNull ? (uint32_t)ceil(
-                      (double)BufferPoolConstants::DEFAULT_PAGE_SIZE /
+                      (double)BufferPoolConstants::PAGE_4KB_SIZE /
                       (double)(((uint64_t)elementSize << NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2) +
                                numBytesPerNullEntry)) :
                   0;
-    return (BufferPoolConstants::DEFAULT_PAGE_SIZE - (numNullEntries * numBytesPerNullEntry)) /
+    return (BufferPoolConstants::PAGE_4KB_SIZE - (numNullEntries * numBytesPerNullEntry)) /
            elementSize;
 }
 
diff --git a/src/storage/wal/wal.cpp b/src/storage/wal/wal.cpp
index 0e2730d42e..107215dcfe 100644
--- a/src/storage/wal/wal.cpp
+++ b/src/storage/wal/wal.cpp
@@ -12,11 +12,11 @@ namespace storage {
 WAL::WAL(const std::string& directory, BufferManager& bufferManager)
     : logger{LoggerUtils::getLogger(LoggerConstants::LoggerEnum::WAL)}, directory{directory},
       bufferManager{bufferManager}, isLastLoggedRecordCommit_{false} {
-    fileHandle = bufferManager.getBufferManagedFileHandle(
-        common::FileUtils::joinPath(
-            directory, std::string(common::StorageConstants::WAL_FILE_SUFFIX)),
-        FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS,
-        BufferManagedFileHandle::FileVersionedType::NON_VERSIONED_FILE);
+    fileHandle =
+        bufferManager.getBMFileHandle(common::FileUtils::joinPath(directory,
+                                          std::string(common::StorageConstants::WAL_FILE_SUFFIX)),
+            FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS,
+            BMFileHandle::FileVersionedType::NON_VERSIONED_FILE);
     initCurrentPage();
 }
 
@@ -173,7 +173,7 @@ void WAL::setIsLastRecordCommit() {
     }
 }
 
-WALIterator::WALIterator(std::shared_ptr<BufferManagedFileHandle> fileHandle, std::mutex& mtx)
+WALIterator::WALIterator(std::shared_ptr<BMFileHandle> fileHandle, std::mutex& mtx)
     : BaseWALAndWALIterator{std::move(fileHandle)}, mtx{mtx} {
     resetCurrentHeaderPagePrefix();
     if (this->fileHandle->getNumPages() > 0) {
diff --git a/src/storage/wal_replayer.cpp b/src/storage/wal_replayer.cpp
index d651a5985b..eaf95944e4 100644
--- a/src/storage/wal_replayer.cpp
+++ b/src/storage/wal_replayer.cpp
@@ -25,7 +25,7 @@ WALReplayer::WALReplayer(WAL* wal, StorageManager* storageManager, MemoryManager
 void WALReplayer::init() {
     logger = LoggerUtils::getLogger(LoggerConstants::LoggerEnum::STORAGE);
     walFileHandle = wal->fileHandle;
-    pageBuffer = std::make_unique<uint8_t[]>(BufferPoolConstants::DEFAULT_PAGE_SIZE);
+    pageBuffer = std::make_unique<uint8_t[]>(BufferPoolConstants::PAGE_4KB_SIZE);
 }
 
 void WALReplayer::replay() {
@@ -81,9 +81,9 @@ void WALReplayer::replayWALRecord(WALRecord& walRecord) {
             walFileHandle->readPage(
                 pageBuffer.get(), walRecord.pageInsertOrUpdateRecord.pageIdxInWAL);
             FileUtils::writeToFile(fileInfoOfStorageStructure.get(), pageBuffer.get(),
-                BufferPoolConstants::DEFAULT_PAGE_SIZE,
+                BufferPoolConstants::PAGE_4KB_SIZE,
                 walRecord.pageInsertOrUpdateRecord.pageIdxInOriginalFile *
-                    BufferPoolConstants::DEFAULT_PAGE_SIZE);
+                    BufferPoolConstants::PAGE_4KB_SIZE);
         }
         if (!isRecovering) {
             // 2: If we are not recovering, we do any in-memory checkpointing or rolling back work
@@ -420,7 +420,7 @@ void WALReplayer::replayWALRecord(WALRecord& walRecord) {
 }
 
 void WALReplayer::truncateFileIfInsertion(
-    BufferManagedFileHandle* fileHandle, const PageUpdateOrInsertRecord& pageInsertOrUpdateRecord) {
+    BMFileHandle* fileHandle, const PageUpdateOrInsertRecord& pageInsertOrUpdateRecord) {
     if (pageInsertOrUpdateRecord.isInsert) {
         // If we are rolling back and this is a page insertion we truncate the fileHandle's
         // data structures that hold locks for pageIdxs.
@@ -440,7 +440,7 @@ void WALReplayer::truncateFileIfInsertion(
 
 void WALReplayer::checkpointOrRollbackVersionedFileHandleAndBufferManager(
     const WALRecord& walRecord, const StorageStructureID& storageStructureID) {
-    BufferManagedFileHandle* fileHandle =
+    BMFileHandle* fileHandle =
         getVersionedFileHandleIfWALVersionAndBMShouldBeCleared(storageStructureID);
     if (fileHandle) {
         fileHandle->clearWALPageVersionIfNecessary(
@@ -449,15 +449,15 @@ void WALReplayer::checkpointOrRollbackVersionedFileHandleAndBufferManager(
             // Update the page in buffer manager if it is in a frame. Note that we assume
             // that the pageBuffer currently contains the contents of the WALVersion, so the
             // caller needs to make sure that this assumption holds.
-            bufferManager->updateFrameIfPageIsInFrameWithoutPageOrFrameLock(*fileHandle,
-                pageBuffer.get(), walRecord.pageInsertOrUpdateRecord.pageIdxInOriginalFile);
+            bufferManager->updateFrameIfPageIsInFrameWithoutLock(*fileHandle, pageBuffer.get(),
+                walRecord.pageInsertOrUpdateRecord.pageIdxInOriginalFile);
         } else {
             truncateFileIfInsertion(fileHandle, walRecord.pageInsertOrUpdateRecord);
         }
     }
 }
 
-BufferManagedFileHandle* WALReplayer::getVersionedFileHandleIfWALVersionAndBMShouldBeCleared(
+BMFileHandle* WALReplayer::getVersionedFileHandleIfWALVersionAndBMShouldBeCleared(
     const StorageStructureID& storageStructureID) {
     switch (storageStructureID.storageStructureType) {
     case StorageStructureType::COLUMN: {
diff --git a/test/include/graph_test/graph_test.h b/test/include/graph_test/graph_test.h
index d83ba422ea..882d4fb706 100644
--- a/test/include/graph_test/graph_test.h
+++ b/test/include/graph_test/graph_test.h
@@ -24,7 +24,7 @@ class BaseGraphTest : public Test {
 public:
     void SetUp() override {
         systemConfig = std::make_unique<main::SystemConfig>(
-            common::StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
+            common::BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
         if (common::FileUtils::fileOrPathExists(TestHelper::getTmpTestDir())) {
             common::FileUtils::removeDir(TestHelper::getTmpTestDir());
         }
@@ -58,11 +58,8 @@ class BaseGraphTest : public Test {
     static inline transaction::TransactionManager* getTransactionManager(main::Database& database) {
         return database.transactionManager.get();
     }
-    static inline uint64_t getDefaultBMSize(main::Database& database) {
-        return database.systemConfig.defaultPageBufferPoolSize;
-    }
-    static inline uint64_t getLargeBMSize(main::Database& database) {
-        return database.systemConfig.largePageBufferPoolSize;
+    static inline uint64_t getBMSize(main::Database& database) {
+        return database.systemConfig.bufferPoolSize;
     }
     static inline storage::WAL* getWAL(main::Database& database) { return database.wal.get(); }
     static inline void commitAndCheckpointOrRollback(main::Database& database,
diff --git a/test/include/main_test_helper/main_test_helper.h b/test/include/main_test_helper/main_test_helper.h
index 30962eea91..3d2ea876bb 100644
--- a/test/include/main_test_helper/main_test_helper.h
+++ b/test/include/main_test_helper/main_test_helper.h
@@ -10,8 +10,8 @@ class ApiTest : public BaseGraphTest {
 public:
     void SetUp() override {
         BaseGraphTest::SetUp();
-        systemConfig->defaultPageBufferPoolSize = (1ull << 26);
-        systemConfig->largePageBufferPoolSize = (1ull << 26);
+        systemConfig->bufferPoolSize =
+            common::BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING;
         createDBAndConn();
         initGraph();
     }
diff --git a/test/processor/order_by/key_block_merger_test.cpp b/test/processor/order_by/key_block_merger_test.cpp
index 859eca31ce..7b2399f845 100644
--- a/test/processor/order_by/key_block_merger_test.cpp
+++ b/test/processor/order_by/key_block_merger_test.cpp
@@ -20,11 +20,8 @@ class KeyBlockMergerTest : public Test {
     void SetUp() override {
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER);
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE);
-        bufferManager =
-            std::make_unique<BufferManager>(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                                                StorageConstants::DEFAULT_PAGES_BUFFER_RATIO,
-                StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                    StorageConstants::LARGE_PAGES_BUFFER_RATIO);
+        bufferManager = std::make_unique<BufferManager>(
+            BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
         memoryManager = std::make_unique<MemoryManager>(bufferManager.get());
     }
 
@@ -36,7 +33,7 @@ class KeyBlockMergerTest : public Test {
 public:
     std::unique_ptr<BufferManager> bufferManager;
     std::unique_ptr<MemoryManager> memoryManager;
-    uint32_t numTuplesPerBlockInFT = BufferPoolConstants::LARGE_PAGE_SIZE / 8;
+    uint32_t numTuplesPerBlockInFT = BufferPoolConstants::PAGE_256KB_SIZE / 8;
 
     static void checkTupleIdxesAndFactorizedTableIdxes(uint8_t* keyBlockPtr,
         const uint64_t keyBlockEntrySizeInBytes,
diff --git a/test/processor/order_by/order_by_key_encoder_test.cpp b/test/processor/order_by/order_by_key_encoder_test.cpp
index 7845abd41a..ea0c0f836c 100644
--- a/test/processor/order_by/order_by_key_encoder_test.cpp
+++ b/test/processor/order_by/order_by_key_encoder_test.cpp
@@ -16,11 +16,8 @@ class OrderByKeyEncoderTest : public Test {
     void SetUp() override {
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER);
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE);
-        bufferManager =
-            std::make_unique<BufferManager>(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                                                StorageConstants::DEFAULT_PAGES_BUFFER_RATIO,
-                StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                    StorageConstants::LARGE_PAGES_BUFFER_RATIO);
+        bufferManager = std::make_unique<BufferManager>(
+            BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
         memoryManager = std::make_unique<MemoryManager>(bufferManager.get());
     }
 
@@ -134,7 +131,7 @@ class OrderByKeyEncoderTest : public Test {
     std::unique_ptr<BufferManager> bufferManager;
     std::unique_ptr<MemoryManager> memoryManager;
     const uint32_t ftIdx = 14;
-    const uint32_t numTuplesPerBlockInFT = BufferPoolConstants::LARGE_PAGE_SIZE / 8;
+    const uint32_t numTuplesPerBlockInFT = BufferPoolConstants::PAGE_256KB_SIZE / 8;
 };
 
 TEST_F(OrderByKeyEncoderTest, singleOrderByColInt64UnflatTest) {
@@ -576,7 +573,7 @@ TEST_F(OrderByKeyEncoderTest, largeNumBytesPerTupleErrorTest) {
     // If the numBytesPerTuple is larger than 4096 bytes, the encoder will raise an encoding
     // exception we need ((LARGE_PAGE_SIZE - 8) / 9  + 1 number of columns(with datatype INT) to
     // trigger that exception.
-    auto numOfOrderByCols = (BufferPoolConstants::LARGE_PAGE_SIZE - 8) / 9 + 1;
+    auto numOfOrderByCols = (BufferPoolConstants::PAGE_256KB_SIZE - 8) / 9 + 1;
     auto [valueVectors, dataChunk] = getInt64TestValueVector(1, numOfOrderByCols, true);
     auto isAscOrder = std::vector<bool>(numOfOrderByCols, true);
     try {
@@ -587,13 +584,13 @@ TEST_F(OrderByKeyEncoderTest, largeNumBytesPerTupleErrorTest) {
         ASSERT_STREQ(e.what(),
             StringUtils::string_format("Runtime exception: TupleSize({} bytes) is larger than "
                                        "the LARGE_PAGE_SIZE({} bytes)",
-                9 * numOfOrderByCols + 8, BufferPoolConstants::LARGE_PAGE_SIZE)
+                9 * numOfOrderByCols + 8, BufferPoolConstants::PAGE_256KB_SIZE)
                 .c_str());
     } catch (std::exception& e) { FAIL(); }
 }
 
 TEST_F(OrderByKeyEncoderTest, singleTuplePerBlockTest) {
-    uint32_t numOfOrderByCols = (BufferPoolConstants::LARGE_PAGE_SIZE - 8) / 9;
+    uint32_t numOfOrderByCols = (BufferPoolConstants::PAGE_256KB_SIZE - 8) / 9;
     uint32_t numOfElementsPerCol = 10;
     auto [valueVectors, dataChunk] =
         getInt64TestValueVector(numOfElementsPerCol, numOfOrderByCols, true);
diff --git a/test/processor/order_by/radix_sort_test.cpp b/test/processor/order_by/radix_sort_test.cpp
index 2a99e47f31..391adbccaa 100644
--- a/test/processor/order_by/radix_sort_test.cpp
+++ b/test/processor/order_by/radix_sort_test.cpp
@@ -20,11 +20,8 @@ class RadixSortTest : public Test {
     void SetUp() override {
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER);
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE);
-        bufferManager =
-            std::make_unique<BufferManager>(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                                                StorageConstants::DEFAULT_PAGES_BUFFER_RATIO,
-                StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                    StorageConstants::LARGE_PAGES_BUFFER_RATIO);
+        bufferManager = std::make_unique<BufferManager>(
+            BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
         memoryManager = std::make_unique<MemoryManager>(bufferManager.get());
     }
 
@@ -37,7 +34,7 @@ class RadixSortTest : public Test {
     std::unique_ptr<BufferManager> bufferManager;
     std::unique_ptr<MemoryManager> memoryManager;
     const uint8_t factorizedTableIdx = 9;
-    const uint32_t numTuplesPerBlockInFT = BufferPoolConstants::LARGE_PAGE_SIZE / 8;
+    const uint32_t numTuplesPerBlockInFT = BufferPoolConstants::PAGE_256KB_SIZE / 8;
 
     void checkTupleIdxesAndFactorizedTableIdxes(uint8_t* keyBlockPtr, const uint64_t entrySize,
         const std::vector<uint64_t>& expectedFTBlockOffsetOrder) {
diff --git a/test/runner/e2e_ddl_test.cpp b/test/runner/e2e_ddl_test.cpp
index 7c714f58c5..dc987cb48d 100644
--- a/test/runner/e2e_ddl_test.cpp
+++ b/test/runner/e2e_ddl_test.cpp
@@ -107,11 +107,8 @@ class TinySnbDDLTest : public DBTest {
         DBTest::SetUp();
         catalog = getCatalog(*database);
         profiler = std::make_unique<Profiler>();
-        bufferManager =
-            std::make_unique<BufferManager>(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                                                StorageConstants::DEFAULT_PAGES_BUFFER_RATIO,
-                StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                    StorageConstants::LARGE_PAGES_BUFFER_RATIO);
+        bufferManager = std::make_unique<BufferManager>(
+            BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
         memoryManager = std::make_unique<MemoryManager>(bufferManager.get());
         executionContext = std::make_unique<ExecutionContext>(
             1 /* numThreads */, profiler.get(), memoryManager.get(), bufferManager.get());
diff --git a/test/runner/e2e_set_transaction_test.cpp b/test/runner/e2e_set_transaction_test.cpp
index 0b7a7d5907..77bc96d041 100644
--- a/test/runner/e2e_set_transaction_test.cpp
+++ b/test/runner/e2e_set_transaction_test.cpp
@@ -182,7 +182,7 @@ TEST_F(SetNodeStructuredPropTransactionTest, SetNodeLongStringPropRollbackTest)
 TEST_F(SetNodeStructuredPropTransactionTest, SetVeryLongStringErrorsTest) {
     conn->beginWriteTransaction();
     std::string veryLongStr = "";
-    for (auto i = 0u; i < BufferPoolConstants::DEFAULT_PAGE_SIZE + 1; ++i) {
+    for (auto i = 0u; i < BufferPoolConstants::PAGE_4KB_SIZE + 1; ++i) {
         veryLongStr += "a";
     }
     auto result = conn->query("MATCH (a:person) WHERE a.ID=0 SET a.fName='" + veryLongStr + "'");
diff --git a/test/storage/CMakeLists.txt b/test/storage/CMakeLists.txt
index 77660e47a3..cae622b3e2 100644
--- a/test/storage/CMakeLists.txt
+++ b/test/storage/CMakeLists.txt
@@ -1,4 +1,3 @@
-add_kuzu_test(buffer_manager_test buffer_manager_test.cpp)
 #add_kuzu_test(disk_array_update_test disk_array_update_test.cpp)
 add_kuzu_test(node_insertion_deletion_test node_insertion_deletion_test.cpp)
 add_kuzu_test(wal_record_test wal_record_test.cpp)
diff --git a/test/storage/buffer_manager_test.cpp b/test/storage/buffer_manager_test.cpp
deleted file mode 100644
index 0e4467e6df..0000000000
--- a/test/storage/buffer_manager_test.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-#include "graph_test/graph_test.h"
-#include "storage/buffer_manager/buffer_manager.h"
-
-using namespace kuzu::common;
-using namespace kuzu::storage;
-using namespace kuzu::testing;
-
-class BufferManagerTests : public Test {
-
-protected:
-    void SetUp() override {
-        FileUtils::createDir(TestHelper::getTmpTestDir());
-        LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER);
-        LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE);
-    }
-
-    void TearDown() override {
-        FileUtils::removeDir(TestHelper::getTmpTestDir());
-        LoggerUtils::dropLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER);
-        LoggerUtils::dropLogger(LoggerConstants::LoggerEnum::STORAGE);
-    }
-};
-
-TEST_F(BufferManagerTests, RemoveFilePagesFromFramesTest) {
-    BufferManagedFileHandle fileHandle(std::string(TestHelper::getTmpTestDir()) + "bm_test.bin",
-        FileHandle::O_PERSISTENT_FILE_CREATE_NOT_EXISTS,
-        BufferManagedFileHandle::FileVersionedType::NON_VERSIONED_FILE);
-    uint64_t numPagesToAdd = 1000;
-    for (int pageIdx = 0; pageIdx < numPagesToAdd; ++pageIdx) {
-        fileHandle.addNewPage();
-    }
-    auto bufferManager =
-        std::make_unique<BufferManager>(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                                            StorageConstants::DEFAULT_PAGES_BUFFER_RATIO,
-            StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                StorageConstants::LARGE_PAGES_BUFFER_RATIO);
-    // Pin and unpin some pages
-    bufferManager->pinWithoutReadingFromFile(fileHandle, 10);
-    bufferManager->pinWithoutReadingFromFile(fileHandle, 999);
-    for (int pageIdx = 0; pageIdx < numPagesToAdd; ++pageIdx) {
-        if (pageIdx == 10 || pageIdx == 999) {
-            ASSERT_TRUE(BufferManagedFileHandle::isAFrame(fileHandle.getFrameIdx(pageIdx)));
-        } else {
-            ASSERT_FALSE(BufferManagedFileHandle::isAFrame(fileHandle.getFrameIdx(pageIdx)));
-        }
-    }
-    bufferManager->unpin(fileHandle, 10);
-    bufferManager->unpin(fileHandle, 999);
-    bufferManager->removeFilePagesFromFrames(fileHandle);
-    for (int pageIdx = 0; pageIdx < numPagesToAdd; ++pageIdx) {
-        ASSERT_FALSE(BufferManagedFileHandle::isAFrame(fileHandle.getFrameIdx(pageIdx)));
-    }
-}
diff --git a/test/storage/wal_test.cpp b/test/storage/wal_test.cpp
index 6d009346c0..2b52e07897 100644
--- a/test/storage/wal_test.cpp
+++ b/test/storage/wal_test.cpp
@@ -12,11 +12,8 @@ class WALTests : public Test {
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::BUFFER_MANAGER);
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::WAL);
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE);
-        bufferManager =
-            std::make_unique<BufferManager>(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                                                StorageConstants::DEFAULT_PAGES_BUFFER_RATIO,
-                StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                    StorageConstants::LARGE_PAGES_BUFFER_RATIO);
+        bufferManager = std::make_unique<BufferManager>(
+            BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
         wal = make_unique<WAL>(TestHelper::getTmpTestDir(), *bufferManager);
     }
 
diff --git a/test/transaction/transaction_manager_test.cpp b/test/transaction/transaction_manager_test.cpp
index 032055494f..26545559d6 100644
--- a/test/transaction/transaction_manager_test.cpp
+++ b/test/transaction/transaction_manager_test.cpp
@@ -17,11 +17,8 @@ class TransactionManagerTest : public Test {
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::WAL);
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::TRANSACTION_MANAGER);
         LoggerUtils::createLogger(LoggerConstants::LoggerEnum::STORAGE);
-        bufferManager =
-            std::make_unique<BufferManager>(StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                                                StorageConstants::DEFAULT_PAGES_BUFFER_RATIO,
-                StorageConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING *
-                    StorageConstants::LARGE_PAGES_BUFFER_RATIO);
+        bufferManager = std::make_unique<BufferManager>(
+            BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
         wal = std::make_unique<WAL>(TestHelper::getTmpTestDir(), *bufferManager);
         transactionManager = std::make_unique<TransactionManager>(*wal);
     }
diff --git a/test/transaction/transaction_test.cpp b/test/transaction/transaction_test.cpp
index ddb22cfaaa..fa22826c81 100644
--- a/test/transaction/transaction_test.cpp
+++ b/test/transaction/transaction_test.cpp
@@ -21,7 +21,7 @@ class TransactionTests : public DBTest {
     }
 
     void initWithoutLoadingGraph() {
-        systemConfig->largePageBufferPoolSize = (1ull << 22);
+        systemConfig->bufferPoolSize = (1ull << 22);
         // Note we do not actually use the connection field in these tests. We only need the
         // database.
         createDBAndConn();
diff --git a/third_party/concurrentqueue/LICENSE.md b/third_party/concurrentqueue/LICENSE.md
new file mode 100644
index 0000000000..519338976f
--- /dev/null
+++ b/third_party/concurrentqueue/LICENSE.md
@@ -0,0 +1,62 @@
+This license file applies to everything in this repository except that which
+is explicitly annotated as being written by other authors, i.e. the Boost
+queue (included in the benchmarks for comparison), Intel's TBB library (ditto),
+dlib::pipe (ditto),
+the CDSChecker tool (used for verification), the Relacy model checker (ditto),
+and Jeff Preshing's semaphore implementation (used in the blocking queue) which
+has a zlib license (embedded in lightweightsempahore.h).
+
+---
+
+Simplified BSD License:
+
+Copyright (c) 2013-2016, Cameron Desrochers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this list of
+conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice, this list of
+conditions and the following disclaimer in the documentation and/or other materials
+provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---
+
+I have also chosen to dual-license under the Boost Software License as an alternative to
+the Simplified BSD license above:
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/third_party/concurrentqueue/blockingconcurrentqueue.h b/third_party/concurrentqueue/blockingconcurrentqueue.h
new file mode 100644
index 0000000000..205a4db707
--- /dev/null
+++ b/third_party/concurrentqueue/blockingconcurrentqueue.h
@@ -0,0 +1,582 @@
+// Provides an efficient blocking version of moodycamel::ConcurrentQueue.
+// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified
+// BSD license, available at the top of concurrentqueue.h.
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
+// Uses Jeff Preshing's semaphore implementation (under the terms of its
+// separate zlib license, see lightweightsemaphore.h).
+
+#pragma once
+
+#include "concurrentqueue.h"
+#include "lightweightsemaphore.h"
+
+#include <type_traits>
+#include <cerrno>
+#include <memory>
+#include <chrono>
+#include <ctime>
+
+namespace moodycamel
+{
+// This is a blocking version of the queue. It has an almost identical interface to
+// the normal non-blocking version, with the addition of various wait_dequeue() methods
+// and the removal of producer-specific dequeue methods.
+template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
+class BlockingConcurrentQueue
+{
+private:
+	typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
+	typedef ::moodycamel::LightweightSemaphore LightweightSemaphore;
+
+public:
+	typedef typename ConcurrentQueue::producer_token_t producer_token_t;
+	typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
+	
+	typedef typename ConcurrentQueue::index_t index_t;
+	typedef typename ConcurrentQueue::size_t size_t;
+	typedef typename std::make_signed<size_t>::type ssize_t;
+	
+	static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
+	static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
+	
+public:
+	// Creates a queue with at least `capacity` element slots; note that the
+	// actual number of elements that can be inserted without additional memory
+	// allocation depends on the number of producers and the block size (e.g. if
+	// the block size is equal to `capacity`, only a single block will be allocated
+	// up-front, which means only a single producer will be able to enqueue elements
+	// without an extra allocation -- blocks aren't shared between producers).
+	// This method is not thread safe -- it is up to the user to ensure that the
+	// queue is fully constructed before it starts being used by other threads (this
+	// includes making the memory effects of construction visible, possibly with a
+	// memory barrier).
+	explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
+		: inner(capacity), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+	{
+		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+		if (!sema) {
+			MOODYCAMEL_THROW(std::bad_alloc());
+		}
+	}
+	
+	BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+		: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
+	{
+		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
+		if (!sema) {
+			MOODYCAMEL_THROW(std::bad_alloc());
+		}
+	}
+	
+	// Disable copying and copy assignment
+	BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	
+	// Moving is supported, but note that it is *not* a thread-safe operation.
+	// Nobody can use the queue while it's being moved, and the memory effects
+	// of that move must be propagated to other threads before they can use it.
+	// Note: When a queue is moved, its tokens are still valid but can only be
+	// used with the destination queue (i.e. semantically they are moved along
+	// with the queue itself).
+	BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+		: inner(std::move(other.inner)), sema(std::move(other.sema))
+	{ }
+	
+	inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+	{
+		return swap_internal(other);
+	}
+	
+	// Swaps this queue's state with the other's. Not thread-safe.
+	// Swapping two queues does not invalidate their tokens, however
+	// the tokens that were created for one queue must be used with
+	// only the swapped queue (i.e. the tokens are tied to the
+	// queue's movable state, not the object itself).
+	inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap_internal(other);
+	}
+	
+private:
+	BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
+	{
+		if (this == &other) {
+			return *this;
+		}
+		
+		inner.swap(other.inner);
+		sema.swap(other.sema);
+		return *this;
+	}
+	
+public:
+	// Enqueues a single item (by copying it).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T const& item)
+	{
+		if ((details::likely)(inner.enqueue(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T&& item)
+	{
+		if ((details::likely)(inner.enqueue(std::move(item)))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T const& item)
+	{
+		if ((details::likely)(inner.enqueue(token, item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T&& item)
+	{
+		if ((details::likely)(inner.enqueue(token, std::move(item)))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues several items.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool enqueue_bulk(It itemFirst, size_t count)
+	{
+		if ((details::likely)(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails
+	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		if ((details::likely)(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by copying it).
+	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T const& item)
+	{
+		if (inner.try_enqueue(item)) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T&& item)
+	{
+		if (inner.try_enqueue(std::move(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T const& item)
+	{
+		if (inner.try_enqueue(token, item)) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T&& item)
+	{
+		if (inner.try_enqueue(token, std::move(item))) {
+			sema->signal();
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues several items.
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool try_enqueue_bulk(It itemFirst, size_t count)
+	{
+		if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
+			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
+			return true;
+		}
+		return false;
+	}
+	
+	
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue(U& item)
+	{
+		if (sema->tryWait()) {
+			while (!inner.try_dequeue(item)) {
+				continue;
+			}
+			return true;
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue from the queue using an explicit consumer token.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue(consumer_token_t& token, U& item)
+	{
+		if (sema->tryWait()) {
+			while (!inner.try_dequeue(token, item)) {
+				continue;
+			}
+			return true;
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	
+	
+	// Blocks the current thread until there's something to dequeue, then
+	// dequeues it.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline void wait_dequeue(U& item)
+	{
+		while (!sema->wait()) {
+			continue;
+		}
+		while (!inner.try_dequeue(item)) {
+			continue;
+		}
+	}
+
+	// Blocks the current thread until either there's something to dequeue
+	// or the timeout (specified in microseconds) expires. Returns false
+	// without setting `item` if the timeout expires, otherwise assigns
+	// to `item` and returns true.
+	// Using a negative timeout indicates an indefinite timeout,
+	// and is thus functionally equivalent to calling wait_dequeue.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs)
+	{
+		if (!sema->wait(timeout_usecs)) {
+			return false;
+		}
+		while (!inner.try_dequeue(item)) {
+			continue;
+		}
+		return true;
+	}
+    
+    // Blocks the current thread until either there's something to dequeue
+	// or the timeout expires. Returns false without setting `item` if the
+    // timeout expires, otherwise assigns to `item` and returns true.
+	// Never allocates. Thread-safe.
+	template<typename U, typename Rep, typename Period>
+	inline bool wait_dequeue_timed(U& item, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_timed(item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+	
+	// Blocks the current thread until there's something to dequeue, then
+	// dequeues it using an explicit consumer token.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline void wait_dequeue(consumer_token_t& token, U& item)
+	{
+		while (!sema->wait()) {
+			continue;
+		}
+		while (!inner.try_dequeue(token, item)) {
+			continue;
+		}
+	}
+	
+	// Blocks the current thread until either there's something to dequeue
+	// or the timeout (specified in microseconds) expires. Returns false
+	// without setting `item` if the timeout expires, otherwise assigns
+	// to `item` and returns true.
+	// Using a negative timeout indicates an indefinite timeout,
+	// and is thus functionally equivalent to calling wait_dequeue.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs)
+	{
+		if (!sema->wait(timeout_usecs)) {
+			return false;
+		}
+		while (!inner.try_dequeue(token, item)) {
+			continue;
+		}
+		return true;
+	}
+    
+    // Blocks the current thread until either there's something to dequeue
+	// or the timeout expires. Returns false without setting `item` if the
+    // timeout expires, otherwise assigns to `item` and returns true.
+	// Never allocates. Thread-safe.
+	template<typename U, typename Rep, typename Period>
+	inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_timed(token, item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued, which will
+	// always be at least one (this method blocks until the queue
+	// is non-empty) and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued, which can
+	// be 0 if the timeout expires while waiting for elements,
+	// and at most max.
+	// Using a negative timeout indicates an indefinite timeout,
+	// and is thus functionally equivalent to calling wait_dequeue_bulk.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
+		}
+		return count;
+	}
+    
+    // Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued, which can
+	// be 0 if the timeout expires while waiting for elements,
+	// and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It, typename Rep, typename Period>
+	inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_bulk_timed<It&>(itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued, which will
+	// always be at least one (this method blocks until the queue
+	// is non-empty) and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued, which can
+	// be 0 if the timeout expires while waiting for elements,
+	// and at most max.
+	// Using a negative timeout indicates an indefinite timeout,
+	// and is thus functionally equivalent to calling wait_dequeue_bulk.
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs)
+	{
+		size_t count = 0;
+		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
+		while (count != max) {
+			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued, which can
+	// be 0 if the timeout expires while waiting for elements,
+	// and at most max.
+	// Never allocates. Thread-safe.
+	template<typename It, typename Rep, typename Period>
+	inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
+    {
+        return wait_dequeue_bulk_timed<It&>(token, itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+    }
+	
+	
+	// Returns an estimate of the total number of elements currently in the queue. This
+	// estimate is only accurate if the queue has completely stabilized before it is called
+	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
+	// visible on the calling thread, and no further operations start while this method is
+	// being called).
+	// Thread-safe.
+	inline size_t size_approx() const
+	{
+		return (size_t)sema->availableApprox();
+	}
+	
+	
+	// Returns true if the underlying atomic variables used by
+	// the queue are lock-free (they should be on most platforms).
+	// Thread-safe.
+	static constexpr bool is_lock_free()
+	{
+		return ConcurrentQueue::is_lock_free();
+	}
+	
+
+private:
+	template<typename U, typename A1, typename A2>
+	static inline U* create(A1&& a1, A2&& a2)
+	{
+		void* p = (Traits::malloc)(sizeof(U));
+		return p != nullptr ? new (p) U(std::forward<A1>(a1), std::forward<A2>(a2)) : nullptr;
+	}
+	
+	template<typename U>
+	static inline void destroy(U* p)
+	{
+		if (p != nullptr) {
+			p->~U();
+		}
+		(Traits::free)(p);
+	}
+	
+private:
+	ConcurrentQueue inner;
+	std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
+};
+
+
+template<typename T, typename Traits>
+inline void swap(BlockingConcurrentQueue<T, Traits>& a, BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+}	// end namespace moodycamel
diff --git a/third_party/concurrentqueue/concurrentqueue.h b/third_party/concurrentqueue/concurrentqueue.h
new file mode 100644
index 0000000000..4b2ad791d2
--- /dev/null
+++ b/third_party/concurrentqueue/concurrentqueue.h
@@ -0,0 +1,3747 @@
+// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
+// An overview, including benchmark results, is provided here:
+//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
+// The full design is also described in excruciating detail at:
+//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue
+
+// Simplified BSD license:
+// Copyright (c) 2013-2020, Cameron Desrochers.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice, this list of
+// conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice, this list of
+// conditions and the following disclaimer in the documentation and/or other materials
+// provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Also dual-licensed under the Boost Software License (see LICENSE.md)
+
+#pragma once
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
+// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
+// upon assigning any computed values)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+
+#ifdef MCDBGQ_USE_RELACY
+#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
+#endif
+#endif
+
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
+// does not support `if constexpr`, so we have no choice but to simply disable the warning
+#pragma warning(push)
+#pragma warning(disable: 4127)  // conditional expression is constant
+#endif
+
+#if defined(__APPLE__)
+#include "TargetConditionals.h"
+#endif
+
+#ifdef MCDBGQ_USE_RELACY
+#include "relacy/relacy_std.hpp"
+#include "relacy_shims.h"
+// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
+// We'll override the default trait malloc ourselves without a macro.
+#undef new
+#undef delete
+#undef malloc
+#undef free
+#else
+#include <atomic>		// Requires C++11. Sorry VS2010.
+#include <cassert>
+#endif
+#include <cstddef>              // for max_align_t
+#include <cstdint>
+#include <cstdlib>
+#include <type_traits>
+#include <algorithm>
+#include <utility>
+#include <limits>
+#include <climits>		// for CHAR_BIT
+#include <array>
+#include <thread>		// partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
+#include <mutex>        // used for thread exit synchronization
+
+// Platform-specific definitions of a numeric thread ID type and an invalid value
+namespace moodycamel { namespace details {
+	template<typename thread_id_t> struct thread_id_converter {
+		typedef thread_id_t thread_id_numeric_size_t;
+		typedef thread_id_t thread_id_hash_t;
+		static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
+	};
+} }
+#if defined(MCDBGQ_USE_RELACY)
+namespace moodycamel { namespace details {
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
+	static inline thread_id_t thread_id() { return rl::thread_index(); }
+} }
+#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
+// No sense pulling in windows.h in a header, we'll manually declare the function
+// we use and rely on backwards-compatibility for this not to break
+extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
+namespace moodycamel { namespace details {
+	static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
+	typedef std::uint32_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;			// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
+	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
+	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
+} }
+#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL)
+namespace moodycamel { namespace details {
+	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
+	
+	typedef std::thread::id thread_id_t;
+	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID
+
+	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
+	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
+	// be.
+	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }
+
+	template<std::size_t> struct thread_id_size { };
+	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
+	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };
+
+	template<> struct thread_id_converter<thread_id_t> {
+		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
+#ifndef __APPLE__
+		typedef std::size_t thread_id_hash_t;
+#else
+		typedef thread_id_numeric_size_t thread_id_hash_t;
+#endif
+
+		static thread_id_hash_t prehash(thread_id_t const& x)
+		{
+#ifndef __APPLE__
+			return std::hash<std::thread::id>()(x);
+#else
+			return *reinterpret_cast<thread_id_hash_t const*>(&x);
+#endif
+		}
+	};
+} }
+#else
+// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
+// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
+// static variable's address as a thread identifier :-)
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define MOODYCAMEL_THREADLOCAL __thread
+#elif defined(_MSC_VER)
+#define MOODYCAMEL_THREADLOCAL __declspec(thread)
+#else
+// Assume C++11 compliant compiler
+#define MOODYCAMEL_THREADLOCAL thread_local
+#endif
+namespace moodycamel { namespace details {
+	typedef std::uintptr_t thread_id_t;
+	static const thread_id_t invalid_thread_id  = 0;		// Address can't be nullptr
+	static const thread_id_t invalid_thread_id2 = 1;		// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
+	inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
+} }
+#endif
+
+// Constexpr if
+#ifndef MOODYCAMEL_CONSTEXPR_IF
+#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
+#define MOODYCAMEL_CONSTEXPR_IF if constexpr
+#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
+#else
+#define MOODYCAMEL_CONSTEXPR_IF if
+#define MOODYCAMEL_MAYBE_UNUSED
+#endif
+#endif
+
+// Exceptions
+#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
+#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
+#define MOODYCAMEL_EXCEPTIONS_ENABLED
+#endif
+#endif
+#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
+#define MOODYCAMEL_TRY try
+#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
+#define MOODYCAMEL_RETHROW throw
+#define MOODYCAMEL_THROW(expr) throw (expr)
+#else
+#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
+#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
+#define MOODYCAMEL_RETHROW
+#define MOODYCAMEL_THROW(expr)
+#endif
+
+#ifndef MOODYCAMEL_NOEXCEPT
+#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
+#define MOODYCAMEL_NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
+// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
+// We have to assume *all* non-trivial constructors may throw on VS2012!
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
+#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
+#else
+#define MOODYCAMEL_NOEXCEPT noexcept
+#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
+#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
+#endif
+#endif
+
+#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#else
+// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
+// g++ <=4.7 doesn't support thread_local either.
+// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
+#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
+// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
+#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // tentatively enabled for now; years ago several users report having problems with it on
+#endif
+#endif
+#endif
+
+// VS2012 doesn't support deleted functions. 
+// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
+#ifndef MOODYCAMEL_DELETE_FUNCTION
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#define MOODYCAMEL_DELETE_FUNCTION
+#else
+#define MOODYCAMEL_DELETE_FUNCTION = delete
+#endif
+#endif
+
+namespace moodycamel { namespace details {
+#ifndef MOODYCAMEL_ALIGNAS
+// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
+#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
+	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
+	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
+	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
+	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
+	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
+	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
+	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
+	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
+	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
+	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
+#else
+	template<typename T> struct identity { typedef T type; };
+#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
+#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
+#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
+#endif
+#endif
+} }
+
+
+// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
+// we can apply per-function compile-time suppression.
+// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
+#define MOODYCAMEL_NO_TSAN
+#if defined(__has_feature)
+ #if __has_feature(thread_sanitizer)
+  #undef MOODYCAMEL_NO_TSAN
+  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
+ #endif // TSAN
+#endif // TSAN
+
+// Compiler-specific likely/unlikely hints
+namespace moodycamel { namespace details {
+#if defined(__GNUC__)
+	static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
+	static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
+#else
+	static inline bool (likely)(bool x) { return x; }
+	static inline bool (unlikely)(bool x) { return x; }
+#endif
+} }
+
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+#include "internal/concurrentqueue_internal_debug.h"
+#endif
+
+namespace moodycamel {
+namespace details {
+	template<typename T>
+	struct const_numeric_max {
+		static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
+		static const T value = std::numeric_limits<T>::is_signed
+			? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
+			: static_cast<T>(-1);
+	};
+
+#if defined(__GLIBCXX__)
+	typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add it to std:: for a while
+#else
+	typedef std::max_align_t std_max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
+#endif
+
+	// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
+	// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
+	typedef union {
+		std_max_align_t x;
+		long long y;
+		void* z;
+	} max_align_t;
+}
+
+// Default traits for the ConcurrentQueue. To change some of the
+// traits without re-implementing all of them, inherit from this
+// struct and shadow the declarations you wish to be different;
+// since the traits are used as a template type parameter, the
+// shadowed declarations will be used where defined, and the defaults
+// otherwise.
+struct ConcurrentQueueDefaultTraits
+{
+	// General-purpose size type. std::size_t is strongly recommended.
+	typedef std::size_t size_t;
+	
+	// The type used for the enqueue and dequeue indices. Must be at least as
+	// large as size_t. Should be significantly larger than the number of elements
+	// you expect to hold at once, especially if you have a high turnover rate;
+	// for example, on 32-bit x86, if you expect to have over a hundred million
+	// elements or pump several million elements through your queue in a very
+	// short space of time, using a 32-bit type *may* trigger a race condition.
+	// A 64-bit int type is recommended in that case, and in practice will
+	// prevent a race condition no matter the usage of the queue. Note that
+	// whether the queue is lock-free with a 64-int type depends on the whether
+	// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
+	typedef std::size_t index_t;
+	
+	// Internally, all elements are enqueued and dequeued from multi-element
+	// blocks; this is the smallest controllable unit. If you expect few elements
+	// but many producers, a smaller block size should be favoured. For few producers
+	// and/or many elements, a larger block size is preferred. A sane default
+	// is provided. Must be a power of 2.
+	static const size_t BLOCK_SIZE = 32;
+	
+	// For explicit producers (i.e. when using a producer token), the block is
+	// checked for being empty by iterating through a list of flags, one per element.
+	// For large block sizes, this is too inefficient, and switching to an atomic
+	// counter-based approach is faster. The switch is made for block sizes strictly
+	// larger than this threshold.
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
+	
+	// How many full blocks can be expected for a single explicit producer? This should
+	// reflect that number's maximum for optimal performance. Must be a power of 2.
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
+	
+	// How many full blocks can be expected for a single implicit producer? This should
+	// reflect that number's maximum for optimal performance. Must be a power of 2.
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
+	
+	// The initial size of the hash table mapping thread IDs to implicit producers.
+	// Note that the hash is resized every time it becomes half full.
+	// Must be a power of two, and either 0 or at least 1. If 0, implicit production
+	// (using the enqueue methods without an explicit producer token) is disabled.
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
+	
+	// Controls the number of items that an explicit consumer (i.e. one with a token)
+	// must consume before it causes all consumers to rotate and move on to the next
+	// internal queue.
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
+	
+	// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
+	// Enqueue operations that would cause this limit to be surpassed will fail. Note
+	// that this limit is enforced at the block level (for performance reasons), i.e.
+	// it's rounded up to the nearest block size.
+	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;
+
+	// The number of times to spin before sleeping when waiting on a semaphore.
+	// Recommended values are on the order of 1000-10000 unless the number of
+	// consumer threads exceeds the number of idle cores (in which case try 0-100).
+	// Only affects instances of the BlockingConcurrentQueue.
+	static const int MAX_SEMA_SPINS = 10000;
+
+	// Whether to recycle dynamically-allocated blocks into an internal free list or
+	// not. If false, only pre-allocated blocks (controlled by the constructor
+	// arguments) will be recycled, and all others will be `free`d back to the heap.
+	// Note that blocks consumed by explicit producers are only freed on destruction
+	// of the queue (not following destruction of the token) regardless of this trait.
+	static const bool RECYCLE_ALLOCATED_BLOCKS = false;
+
+	
+#ifndef MCDBGQ_USE_RELACY
+	// Memory allocation can be customized if needed.
+	// malloc should return nullptr on failure, and handle alignment like std::malloc.
+#if defined(malloc) || defined(free)
+	// Gah, this is 2015, stop defining macros that break standard code already!
+	// Work around malloc/free being special macros:
+	static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
+	static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
+	static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
+	static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
+#else
+	static inline void* malloc(size_t size) { return std::malloc(size); }
+	static inline void free(void* ptr) { return std::free(ptr); }
+#endif
+#else
+	// Debug versions when running under the Relacy race detector (ignore
+	// these in user code)
+	static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
+	static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
+#endif
+};
+
+
+// When producing or consuming many elements, the most efficient way is to:
+//    1) Use one of the bulk-operation methods of the queue with a token
+//    2) Failing that, use the bulk-operation methods without a token
+//    3) Failing that, create a token and use that with the single-item methods
+//    4) Failing that, use the single-parameter methods of the queue
+// Having said that, don't create tokens willy-nilly -- ideally there should be
+// a maximum of one token per thread (of each kind).
+struct ProducerToken;
+struct ConsumerToken;
+
+template<typename T, typename Traits> class ConcurrentQueue;
+template<typename T, typename Traits> class BlockingConcurrentQueue;
+class ConcurrentQueueTests;
+
+
+namespace details
+{
+	struct ConcurrentQueueProducerTypelessBase
+	{
+		ConcurrentQueueProducerTypelessBase* next;
+		std::atomic<bool> inactive;
+		ProducerToken* token;
+		
+		ConcurrentQueueProducerTypelessBase()
+			: next(nullptr), inactive(false), token(nullptr)
+		{
+		}
+	};
+	
+	template<bool use32> struct _hash_32_or_64 {
+		static inline std::uint32_t hash(std::uint32_t h)
+		{
+			// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+			// Since the thread ID is already unique, all we really want to do is propagate that
+			// uniqueness evenly across all the bits, so that we can use a subset of the bits while
+			// reducing collisions significantly
+			h ^= h >> 16;
+			h *= 0x85ebca6b;
+			h ^= h >> 13;
+			h *= 0xc2b2ae35;
+			return h ^ (h >> 16);
+		}
+	};
+	template<> struct _hash_32_or_64<1> {
+		static inline std::uint64_t hash(std::uint64_t h)
+		{
+			h ^= h >> 33;
+			h *= 0xff51afd7ed558ccd;
+			h ^= h >> 33;
+			h *= 0xc4ceb9fe1a85ec53;
+			return h ^ (h >> 33);
+		}
+	};
+	template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };
+	
+	static inline size_t hash_thread_id(thread_id_t id)
+	{
+		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
+		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
+			thread_id_converter<thread_id_t>::prehash(id)));
+	}
+	
+	template<typename T>
+	static inline bool circular_less_than(T a, T b)
+	{
+		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");
+		return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << (static_cast<T>(sizeof(T) * CHAR_BIT - 1)));
+		// Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931
+		//       silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here.
+	}
+	
+	template<typename U>
+	static inline char* align_for(char* ptr)
+	{
+		const std::size_t alignment = std::alignment_of<U>::value;
+		return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
+	}
+
+	template<typename T>
+	static inline T ceil_to_pow_2(T x)
+	{
+		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");
+
+		// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+		--x;
+		x |= x >> 1;
+		x |= x >> 2;
+		x |= x >> 4;
+		for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
+			x |= x >> (i << 3);
+		}
+		++x;
+		return x;
+	}
+	
+	template<typename T>
+	static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
+	{
+		T temp = std::move(left.load(std::memory_order_relaxed));
+		left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
+		right.store(std::move(temp), std::memory_order_relaxed);
+	}
+	
+	template<typename T>
+	static inline T const& nomove(T const& x)
+	{
+		return x;
+	}
+	
+	template<bool Enable>
+	struct nomove_if
+	{
+		template<typename T>
+		static inline T const& eval(T const& x)
+		{
+			return x;
+		}
+	};
+	
+	template<>
+	struct nomove_if<false>
+	{
+		template<typename U>
+		static inline auto eval(U&& x)
+			-> decltype(std::forward<U>(x))
+		{
+			return std::forward<U>(x);
+		}
+	};
+	
+	template<typename It>
+	static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
+	{
+		return *it;
+	}
+	
+#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
+	template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
+#else
+	template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
+#endif
+	
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+#ifdef MCDBGQ_USE_RELACY
+	typedef RelacyThreadExitListener ThreadExitListener;
+	typedef RelacyThreadExitNotifier ThreadExitNotifier;
+#else
+	class ThreadExitNotifier;
+
+	struct ThreadExitListener
+	{
+		typedef void (*callback_t)(void*);
+		callback_t callback;
+		void* userData;
+		
+		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
+		ThreadExitNotifier* chain;		// reserved for use by the ThreadExitNotifier
+	};
+
+	class ThreadExitNotifier
+	{
+	public:
+		static void subscribe(ThreadExitListener* listener)
+		{
+			auto& tlsInst = instance();
+			std::lock_guard<std::mutex> guard(mutex());
+			listener->next = tlsInst.tail;
+			listener->chain = &tlsInst;
+			tlsInst.tail = listener;
+		}
+		
+		static void unsubscribe(ThreadExitListener* listener)
+		{
+			std::lock_guard<std::mutex> guard(mutex());
+			if (!listener->chain) {
+				return;  // race with ~ThreadExitNotifier
+			}
+			auto& tlsInst = *listener->chain;
+			listener->chain = nullptr;
+			ThreadExitListener** prev = &tlsInst.tail;
+			for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
+				if (ptr == listener) {
+					*prev = ptr->next;
+					break;
+				}
+				prev = &ptr->next;
+			}
+		}
+		
+	private:
+		ThreadExitNotifier() : tail(nullptr) { }
+		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
+		
+		~ThreadExitNotifier()
+		{
+			// This thread is about to exit, let everyone know!
+			assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
+			std::lock_guard<std::mutex> guard(mutex());
+			for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
+				ptr->chain = nullptr;
+				ptr->callback(ptr->userData);
+			}
+		}
+		
+		// Thread-local
+		static inline ThreadExitNotifier& instance()
+		{
+			static thread_local ThreadExitNotifier notifier;
+			return notifier;
+		}
+
+		static inline std::mutex& mutex()
+		{
+			// Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called
+			static std::mutex mutex;
+			return mutex;
+		}
+		
+	private:
+		ThreadExitListener* tail;
+	};
+#endif
+#endif
+	
+	template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
+	template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; };
+	template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; };
+	template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {  };
+	template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; };
+	template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; };
+}
+
+
+struct ProducerToken
+{
+	template<typename T, typename Traits>
+	explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
+	
+	template<typename T, typename Traits>
+	explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
+	
+	ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+		: producer(other.producer)
+	{
+		other.producer = nullptr;
+		if (producer != nullptr) {
+			producer->token = this;
+		}
+	}
+	
+	inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap(other);
+		return *this;
+	}
+	
+	void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
+	{
+		std::swap(producer, other.producer);
+		if (producer != nullptr) {
+			producer->token = this;
+		}
+		if (other.producer != nullptr) {
+			other.producer->token = &other;
+		}
+	}
+	
+	// A token is always valid unless:
+	//     1) Memory allocation failed during construction
+	//     2) It was moved via the move constructor
+	//        (Note: assignment does a swap, leaving both potentially valid)
+	//     3) The associated queue was destroyed
+	// Note that if valid() returns true, that only indicates
+	// that the token is valid for use with a specific queue,
+	// but not which one; that's up to the user to track.
+	inline bool valid() const { return producer != nullptr; }
+	
+	~ProducerToken()
+	{
+		if (producer != nullptr) {
+			producer->token = nullptr;
+			producer->inactive.store(true, std::memory_order_release);
+		}
+	}
+	
+	// Disable copying and assignment
+	ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	
+private:
+	template<typename T, typename Traits> friend class ConcurrentQueue;
+	friend class ConcurrentQueueTests;
+	
+protected:
+	details::ConcurrentQueueProducerTypelessBase* producer;
+};
+
+
+struct ConsumerToken
+{
+	template<typename T, typename Traits>
+	explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
+	
+	template<typename T, typename Traits>
+	explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
+	
+	ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+		: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
+	{
+	}
+	
+	inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap(other);
+		return *this;
+	}
+	
+	void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
+	{
+		std::swap(initialOffset, other.initialOffset);
+		std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
+		std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
+		std::swap(currentProducer, other.currentProducer);
+		std::swap(desiredProducer, other.desiredProducer);
+	}
+	
+	// Disable copying and assignment
+	ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
+
+private:
+	template<typename T, typename Traits> friend class ConcurrentQueue;
+	friend class ConcurrentQueueTests;
+	
+private: // but shared with ConcurrentQueue
+	std::uint32_t initialOffset;
+	std::uint32_t lastKnownGlobalOffset;
+	std::uint32_t itemsConsumedFromCurrent;
+	details::ConcurrentQueueProducerTypelessBase* currentProducer;
+	details::ConcurrentQueueProducerTypelessBase* desiredProducer;
+};
+
+// Need to forward-declare this swap because it's in a namespace.
+// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT;
+
+
+template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
+class ConcurrentQueue
+{
+public:
+	typedef ::moodycamel::ProducerToken producer_token_t;
+	typedef ::moodycamel::ConsumerToken consumer_token_t;
+	
+	typedef typename Traits::index_t index_t;
+	typedef typename Traits::size_t size_t;
+	
+	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
+	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
+	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
+	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
+	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
+	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
+#pragma warning(disable: 4309)		// static_cast: Truncation of constant value
+#endif
+	static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+	static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");
+	static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
+	static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
+	static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
+	static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
+	static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+	static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
+	static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
+	static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");
+
+public:
+	// Creates a queue with at least `capacity` element slots; note that the
+	// actual number of elements that can be inserted without additional memory
+	// allocation depends on the number of producers and the block size (e.g. if
+	// the block size is equal to `capacity`, only a single block will be allocated
+	// up-front, which means only a single producer will be able to enqueue elements
+	// without an extra allocation -- blocks aren't shared between producers).
+	// This method is not thread safe -- it is up to the user to ensure that the
+	// queue is fully constructed before it starts being used by other threads (this
+	// includes making the memory effects of construction visible, possibly with a
+	// memory barrier).
+	explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		// Track all the producers using a fully-resolved typed list for
+		// each kind; this makes it possible to debug them starting from
+		// the root queue object (otherwise wacky casts are needed that
+		// don't compile in the debugger's expression evaluator).
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+	
+	// Computes the correct amount of pre-allocated blocks for you based
+	// on the minimum number of elements you want available at any given
+	// time, and the maximum concurrent number of each type of producer.
+	ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
+		: producerListTail(nullptr),
+		producerCount(0),
+		initialBlockPoolIndex(0),
+		nextExplicitConsumerId(0),
+		globalExplicitConsumerOffset(0)
+	{
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
+		populate_initial_block_list(blocks);
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+	}
+	
+	// Note: The queue should not be accessed concurrently while it's
+	// being deleted. It's up to the user to synchronize this.
+	// This method is not thread safe.
+	~ConcurrentQueue()
+	{
+		// Destroy producers
+		auto ptr = producerListTail.load(std::memory_order_relaxed);
+		while (ptr != nullptr) {
+			auto next = ptr->next_prod();
+			if (ptr->token != nullptr) {
+				ptr->token->producer = nullptr;
+			}
+			destroy(ptr);
+			ptr = next;
+		}
+		
+		// Destroy implicit producer hash tables
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
+			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
+			while (hash != nullptr) {
+				auto prev = hash->prev;
+				if (prev != nullptr) {		// The last hash is part of this object and was not allocated dynamically
+					for (size_t i = 0; i != hash->capacity; ++i) {
+						hash->entries[i].~ImplicitProducerKVP();
+					}
+					hash->~ImplicitProducerHash();
+					(Traits::free)(hash);
+				}
+				hash = prev;
+			}
+		}
+		
+		// Destroy global free list
+		auto block = freeList.head_unsafe();
+		while (block != nullptr) {
+			auto next = block->freeListNext.load(std::memory_order_relaxed);
+			if (block->dynamicallyAllocated) {
+				destroy(block);
+			}
+			block = next;
+		}
+		
+		// Destroy initial free list
+		destroy_array(initialBlockPool, initialBlockPoolSize);
+	}
+
+	// Disable copying and copy assignment
+	ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
+	
+	// Moving is supported, but note that it is *not* a thread-safe operation.
+	// Nobody can use the queue while it's being moved, and the memory effects
+	// of that move must be propagated to other threads before they can use it.
+	// Note: When a queue is moved, its tokens are still valid but can only be
+	// used with the destination queue (i.e. semantically they are moved along
+	// with the queue itself).
+	ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+		: producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
+		producerCount(other.producerCount.load(std::memory_order_relaxed)),
+		initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
+		initialBlockPool(other.initialBlockPool),
+		initialBlockPoolSize(other.initialBlockPoolSize),
+		freeList(std::move(other.freeList)),
+		nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
+		globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
+	{
+		// Move the other one into this, and leave the other one as an empty queue
+		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+		populate_initial_implicit_producer_hash();
+		swap_implicit_producer_hashes(other);
+		
+		other.producerListTail.store(nullptr, std::memory_order_relaxed);
+		other.producerCount.store(0, std::memory_order_relaxed);
+		other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
+		other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
+		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
+		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
+#endif
+		
+		other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
+		other.initialBlockPoolSize = 0;
+		other.initialBlockPool = nullptr;
+		
+		reown_producers();
+	}
+	
+	inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
+	{
+		return swap_internal(other);
+	}
+	
+	// Swaps this queue's state with the other's. Not thread-safe.
+	// Swapping two queues does not invalidate their tokens, however
+	// the tokens that were created for one queue must be used with
+	// only the swapped queue (i.e. the tokens are tied to the
+	// queue's movable state, not the object itself).
+	inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
+	{
+		swap_internal(other);
+	}
+	
+private:
+	ConcurrentQueue& swap_internal(ConcurrentQueue& other)
+	{
+		if (this == &other) {
+			return *this;
+		}
+		
+		details::swap_relaxed(producerListTail, other.producerListTail);
+		details::swap_relaxed(producerCount, other.producerCount);
+		details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
+		std::swap(initialBlockPool, other.initialBlockPool);
+		std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
+		freeList.swap(other.freeList);
+		details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
+		details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
+		
+		swap_implicit_producer_hashes(other);
+		
+		reown_producers();
+		other.reown_producers();
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		details::swap_relaxed(explicitProducers, other.explicitProducers);
+		details::swap_relaxed(implicitProducers, other.implicitProducers);
+#endif
+		
+		return *this;
+	}
+	
+public:
+	// Enqueues a single item (by copying it).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T const& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Allocates memory if required. Only fails if memory allocation fails (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
+	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(T&& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CanAlloc>(std::move(item));
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T const& item)
+	{
+		return inner_enqueue<CanAlloc>(token, item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Thread-safe.
+	inline bool enqueue(producer_token_t const& token, T&& item)
+	{
+		return inner_enqueue<CanAlloc>(token, std::move(item));
+	}
+	
+	// Enqueues several items.
+	// Allocates memory if required. Only fails if memory allocation fails (or
+	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool enqueue_bulk(It itemFirst, size_t count)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Allocates memory if required. Only fails if memory allocation fails
+	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
+	}
+	
+	// Enqueues a single item (by copying it).
+	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
+	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
+	// is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T const& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible).
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Thread-safe.
+	inline bool try_enqueue(T&& item)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue<CannotAlloc>(std::move(item));
+	}
+	
+	// Enqueues a single item (by copying it) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T const& item)
+	{
+		return inner_enqueue<CannotAlloc>(token, item);
+	}
+	
+	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Thread-safe.
+	inline bool try_enqueue(producer_token_t const& token, T&& item)
+	{
+		return inner_enqueue<CannotAlloc>(token, std::move(item));
+	}
+	
+	// Enqueues several items.
+	// Does not allocate memory (except for one-time implicit producer).
+	// Fails if not enough room to enqueue (or implicit production is
+	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool try_enqueue_bulk(It itemFirst, size_t count)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
+		else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
+	}
+	
+	// Enqueues several items using an explicit producer token.
+	// Does not allocate memory. Fails if not enough room to enqueue.
+	// Note: Use std::make_move_iterator if the elements should be moved
+	// instead of copied.
+	// Thread-safe.
+	template<typename It>
+	bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
+	}
+	
+	
+	
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue(U& item)
+	{
+		// Instead of simply trying each producer in turn (which could cause needless contention on the first
+		// producer), we score them heuristically.
+		size_t nonEmptyCount = 0;
+		ProducerBase* best = nullptr;
+		size_t bestSize = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
+			auto size = ptr->size_approx();
+			if (size > 0) {
+				if (size > bestSize) {
+					bestSize = size;
+					best = ptr;
+				}
+				++nonEmptyCount;
+			}
+		}
+		
+		// If there was at least one non-empty queue but it appears empty at the time
+		// we try to dequeue from it, we need to make sure every queue's been tried
+		if (nonEmptyCount > 0) {
+			if ((details::likely)(best->dequeue(item))) {
+				return true;
+			}
+			for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+				if (ptr != best && ptr->dequeue(item)) {
+					return true;
+				}
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue from the queue.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// This differs from the try_dequeue(item) method in that this one does
+	// not attempt to reduce contention by interleaving the order that producer
+	// streams are dequeued from. So, using this method can reduce overall throughput
+	// under contention, but will give more predictable results in single-threaded
+	// consumer scenarios. This is mostly only useful for internal unit tests.
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue_non_interleaved(U& item)
+	{
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			if (ptr->dequeue(item)) {
+				return true;
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue from the queue using an explicit consumer token.
+	// Returns false if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	bool try_dequeue(consumer_token_t& token, U& item)
+	{
+		// The idea is roughly as follows:
+		// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
+		// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
+		// If there's no items where you're supposed to be, keep moving until you find a producer with some items
+		// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
+		
+		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+			if (!update_current_producer_after_rotation(token)) {
+				return false;
+			}
+		}
+		
+		// If there was at least one non-empty queue but it appears empty at the time
+		// we try to dequeue from it, we need to make sure every queue's been tried
+		if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
+			if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+			}
+			return true;
+		}
+		
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
+		if (ptr == nullptr) {
+			ptr = tail;
+		}
+		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
+			if (ptr->dequeue(item)) {
+				token.currentProducer = ptr;
+				token.itemsConsumedFromCurrent = 1;
+				return true;
+			}
+			ptr = ptr->next_prod();
+			if (ptr == nullptr) {
+				ptr = tail;
+			}
+		}
+		return false;
+	}
+	
+	// Attempts to dequeue several elements from the queue.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	size_t try_dequeue_bulk(It itemFirst, size_t max)
+	{
+		size_t count = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			count += ptr->dequeue_bulk(itemFirst, max - count);
+			if (count == max) {
+				break;
+			}
+		}
+		return count;
+	}
+	
+	// Attempts to dequeue several elements from the queue using an explicit consumer token.
+	// Returns the number of items actually dequeued.
+	// Returns 0 if all producer streams appeared empty at the time they
+	// were checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
+	{
+		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
+			if (!update_current_producer_after_rotation(token)) {
+				return 0;
+			}
+		}
+		
+		size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
+		if (count == max) {
+			if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
+				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
+			}
+			return max;
+		}
+		token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
+		max -= count;
+		
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
+		if (ptr == nullptr) {
+			ptr = tail;
+		}
+		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
+			auto dequeued = ptr->dequeue_bulk(itemFirst, max);
+			count += dequeued;
+			if (dequeued != 0) {
+				token.currentProducer = ptr;
+				token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
+			}
+			if (dequeued == max) {
+				break;
+			}
+			max -= dequeued;
+			ptr = ptr->next_prod();
+			if (ptr == nullptr) {
+				ptr = tail;
+			}
+		}
+		return count;
+	}
+	
+	
+	
+	// Attempts to dequeue from a specific producer's inner queue.
+	// If you happen to know which producer you want to dequeue from, this
+	// is significantly faster than using the general-case try_dequeue methods.
+	// Returns false if the producer's queue appeared empty at the time it
+	// was checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename U>
+	inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)
+	{
+		return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
+	}
+	
+	// Attempts to dequeue several elements from a specific producer's inner queue.
+	// Returns the number of items actually dequeued.
+	// If you happen to know which producer you want to dequeue from, this
+	// is significantly faster than using the general-case try_dequeue methods.
+	// Returns 0 if the producer's queue appeared empty at the time it
+	// was checked (so, the queue is likely but not guaranteed to be empty).
+	// Never allocates. Thread-safe.
+	template<typename It>
+	inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)
+	{
+		return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
+	}
+	
+	
+	// Returns an estimate of the total number of elements currently in the queue. This
+	// estimate is only accurate if the queue has completely stabilized before it is called
+	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
+	// visible on the calling thread, and no further operations start while this method is
+	// being called).
+	// Thread-safe.
+	size_t size_approx() const
+	{
+		size_t size = 0;
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			size += ptr->size_approx();
+		}
+		return size;
+	}
+	
+	
+	// Returns true if the underlying atomic variables used by
+	// the queue are lock-free (they should be on most platforms).
+	// Thread-safe.
+	static constexpr bool is_lock_free()
+	{
+		return
+			details::static_is_lock_free<bool>::value == 2 &&
+			details::static_is_lock_free<size_t>::value == 2 &&
+			details::static_is_lock_free<std::uint32_t>::value == 2 &&
+			details::static_is_lock_free<index_t>::value == 2 &&
+			details::static_is_lock_free<void*>::value == 2 &&
+			details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;
+	}
+
+
+private:
+	friend struct ProducerToken;
+	friend struct ConsumerToken;
+	struct ExplicitProducer;
+	friend struct ExplicitProducer;
+	struct ImplicitProducer;
+	friend struct ImplicitProducer;
+	friend class ConcurrentQueueTests;
+		
+	enum AllocationMode { CanAlloc, CannotAlloc };
+	
+	
+	///////////////////////////////
+	// Queue methods
+	///////////////////////////////
+	
+	template<AllocationMode canAlloc, typename U>
+	inline bool inner_enqueue(producer_token_t const& token, U&& element)
+	{
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
+	}
+	
+	template<AllocationMode canAlloc, typename U>
+	inline bool inner_enqueue(U&& element)
+	{
+		auto producer = get_or_add_implicit_producer();
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
+	}
+	
+	template<AllocationMode canAlloc, typename It>
+	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
+	{
+		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
+	}
+	
+	template<AllocationMode canAlloc, typename It>
+	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
+	{
+		auto producer = get_or_add_implicit_producer();
+		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
+	}
+	
+	inline bool update_current_producer_after_rotation(consumer_token_t& token)
+	{
+		// Ah, there's been a rotation, figure out where we should be!
+		auto tail = producerListTail.load(std::memory_order_acquire);
+		if (token.desiredProducer == nullptr && tail == nullptr) {
+			return false;
+		}
+		auto prodCount = producerCount.load(std::memory_order_relaxed);
+		auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);
+		if ((details::unlikely)(token.desiredProducer == nullptr)) {
+			// Aha, first time we're dequeueing anything.
+			// Figure out our local position
+			// Note: offset is from start, not end, but we're traversing from end -- subtract from count first
+			std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
+			token.desiredProducer = tail;
+			for (std::uint32_t i = 0; i != offset; ++i) {
+				token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
+				if (token.desiredProducer == nullptr) {
+					token.desiredProducer = tail;
+				}
+			}
+		}
+		
+		std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
+		if (delta >= prodCount) {
+			delta = delta % prodCount;
+		}
+		for (std::uint32_t i = 0; i != delta; ++i) {
+			token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
+			if (token.desiredProducer == nullptr) {
+				token.desiredProducer = tail;
+			}
+		}
+		
+		token.lastKnownGlobalOffset = globalOffset;
+		token.currentProducer = token.desiredProducer;
+		token.itemsConsumedFromCurrent = 0;
+		return true;
+	}
+	
+	
+	///////////////////////////
+	// Free list
+	///////////////////////////
+	
+	template <typename N>
+	struct FreeListNode
+	{
+		FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
+		
+		std::atomic<std::uint32_t> freeListRefs;
+		std::atomic<N*> freeListNext;
+	};
+	
+	// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
+	// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
+	// speedy under low contention.
+	template<typename N>		// N must inherit FreeListNode or have the same fields (and initialization of them)
+	struct FreeList
+	{
+		FreeList() : freeListHead(nullptr) { }
+		FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
+		void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }
+		
+		FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
+		
+		inline void add(N* node)
+		{
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+			debug::DebugLock lock(mutex);
+#endif		
+			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
+			// set it using a fetch_add
+			if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
+				// Oh look! We were the last ones referencing this node, and we know
+				// we want to add it to the free list, so let's do it!
+		 		add_knowing_refcount_is_zero(node);
+			}
+		}
+		
+		inline N* try_get()
+		{
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+			debug::DebugLock lock(mutex);
+#endif		
+			auto head = freeListHead.load(std::memory_order_acquire);
+			while (head != nullptr) {
+				auto prevHead = head;
+				auto refs = head->freeListRefs.load(std::memory_order_relaxed);
+				if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {
+					head = freeListHead.load(std::memory_order_acquire);
+					continue;
+				}
+				
+				// Good, reference count has been incremented (it wasn't at zero), which means we can read the
+				// next and not worry about it changing between now and the time we do the CAS
+				auto next = head->freeListNext.load(std::memory_order_relaxed);
+				if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) {
+					// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
+					// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
+					assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
+					
+					// Decrease refcount twice, once for our ref, and once for the list's ref
+					head->freeListRefs.fetch_sub(2, std::memory_order_release);
+					return head;
+				}
+				
+				// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
+				// Note that we don't need to release any memory effects, but we do need to ensure that the reference
+				// count decrement happens-after the CAS on the head.
+				refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
+				if (refs == SHOULD_BE_ON_FREELIST + 1) {
+					add_knowing_refcount_is_zero(prevHead);
+				}
+			}
+			
+			return nullptr;
+		}
+		
+		// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
+		N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
+		
+	private:
+		inline void add_knowing_refcount_is_zero(N* node)
+		{
+			// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
+			// only one copy of this method per node at a time, i.e. the single thread case), then we know
+			// we can safely change the next pointer of the node; however, once the refcount is back above
+			// zero, then other threads could increase it (happens under heavy contention, when the refcount
+			// goes to zero in between a load and a refcount increment of a node in try_get, then back up to
+			// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
+			// to add the node to the actual list fails, decrease the refcount and leave the add operation to
+			// the next thread who puts the refcount back at zero (which could be us, hence the loop).
+			auto head = freeListHead.load(std::memory_order_relaxed);
+			while (true) {
+				node->freeListNext.store(head, std::memory_order_relaxed);
+				node->freeListRefs.store(1, std::memory_order_release);
+				if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) {
+					// Hmm, the add failed, but we can only try again when the refcount goes back to zero
+					if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {
+						continue;
+					}
+				}
+				return;
+			}
+		}
+		
+	private:
+		// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
+		std::atomic<N*> freeListHead;
+	
+	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
+	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
+		
+#ifdef MCDBGQ_NOLOCKFREE_FREELIST
+		debug::DebugMutex mutex;
+#endif
+	};
+	
+	
+	///////////////////////////
+	// Block
+	///////////////////////////
+	
+	enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
+	
+	struct Block
+	{
+		Block()
+			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true)
+		{
+#ifdef MCDBGQ_TRACKMEM
+			owner = nullptr;
+#endif
+		}
+		
+		template<InnerQueueContext context>
+		inline bool is_empty() const
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Check flags
+				for (size_t i = 0; i < BLOCK_SIZE; ++i) {
+					if (!emptyFlags[i].load(std::memory_order_relaxed)) {
+						return false;
+					}
+				}
+				
+				// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
+				std::atomic_thread_fence(std::memory_order_acquire);
+				return true;
+			}
+			else {
+				// Check counter
+				if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
+					std::atomic_thread_fence(std::memory_order_acquire);
+					return true;
+				}
+				assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
+				return false;
+			}
+		}
+		
+		// Returns true if the block is now empty (does not apply in explicit context)
+		template<InnerQueueContext context>
+		inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set flag
+				assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
+				emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
+				return false;
+			}
+			else {
+				// Increment counter
+				auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
+				assert(prevVal < BLOCK_SIZE);
+				return prevVal == BLOCK_SIZE - 1;
+			}
+		}
+		
+		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
+		// Returns true if the block is now empty (does not apply in explicit context).
+		template<InnerQueueContext context>
+		inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count)
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set flags
+				std::atomic_thread_fence(std::memory_order_release);
+				i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
+				for (size_t j = 0; j != count; ++j) {
+					assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
+					emptyFlags[i + j].store(true, std::memory_order_relaxed);
+				}
+				return false;
+			}
+			else {
+				// Increment counter
+				auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
+				assert(prevVal + count <= BLOCK_SIZE);
+				return prevVal + count == BLOCK_SIZE;
+			}
+		}
+		
+		template<InnerQueueContext context>
+		inline void set_all_empty()
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Set all flags
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+					emptyFlags[i].store(true, std::memory_order_relaxed);
+				}
+			}
+			else {
+				// Reset counter
+				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
+			}
+		}
+		
+		template<InnerQueueContext context>
+		inline void reset_empty()
+		{
+			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
+				// Reset flags
+				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
+					emptyFlags[i].store(false, std::memory_order_relaxed);
+				}
+			}
+			else {
+				// Reset counter
+				elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
+			}
+		}
+		
+		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
+		
+	private:
+		static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
+		MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;
+	public:
+		Block* next;
+		std::atomic<size_t> elementsCompletelyDequeued;
+		std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
+	public:
+		std::atomic<std::uint32_t> freeListRefs;
+		std::atomic<Block*> freeListNext;
+		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
+		
+#ifdef MCDBGQ_TRACKMEM
+		void* owner;
+#endif
+	};
+	static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");
+
+
+#ifdef MCDBGQ_TRACKMEM
+public:
+	struct MemStats;
+private:
+#endif
+	
+	///////////////////////////
+	// Producer base
+	///////////////////////////
+	
+	struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase
+	{
+		ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
+			tailIndex(0),
+			headIndex(0),
+			dequeueOptimisticCount(0),
+			dequeueOvercommit(0),
+			tailBlock(nullptr),
+			isExplicit(isExplicit_),
+			parent(parent_)
+		{
+		}
+		
+		virtual ~ProducerBase() { }
+		
+		template<typename U>
+		inline bool dequeue(U& element)
+		{
+			if (isExplicit) {
+				return static_cast<ExplicitProducer*>(this)->dequeue(element);
+			}
+			else {
+				return static_cast<ImplicitProducer*>(this)->dequeue(element);
+			}
+		}
+		
+		template<typename It>
+		inline size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			if (isExplicit) {
+				return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
+			}
+			else {
+				return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
+			}
+		}
+		
+		inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
+		
+		inline size_t size_approx() const
+		{
+			auto tail = tailIndex.load(std::memory_order_relaxed);
+			auto head = headIndex.load(std::memory_order_relaxed);
+			return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
+		}
+		
+		inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
+	protected:
+		std::atomic<index_t> tailIndex;		// Where to enqueue to next
+		std::atomic<index_t> headIndex;		// Where to dequeue from next
+		
+		std::atomic<index_t> dequeueOptimisticCount;
+		std::atomic<index_t> dequeueOvercommit;
+		
+		Block* tailBlock;
+		
+	public:
+		bool isExplicit;
+		ConcurrentQueue* parent;
+		
+	protected:
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	///////////////////////////
+	// Explicit queue
+	///////////////////////////
+		
+	struct ExplicitProducer : public ProducerBase
+	{
+		explicit ExplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, true),
+			blockIndex(nullptr),
+			pr_blockIndexSlotsUsed(0),
+			pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
+			pr_blockIndexFront(0),
+			pr_blockIndexEntries(nullptr),
+			pr_blockIndexRaw(nullptr)
+		{
+			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
+			if (poolBasedIndexSize > pr_blockIndexSize) {
+				pr_blockIndexSize = poolBasedIndexSize;
+			}
+			
+			new_block_index(0);		// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
+		}
+		
+		~ExplicitProducer()
+		{
+			// Destruct any elements not yet dequeued.
+			// Since we're in the destructor, we can assume all elements
+			// are either completely dequeued or completely not (no halfways).
+			if (this->tailBlock != nullptr) {		// Note this means there must be a block index too
+				// First find the block that's partially dequeued, if any
+				Block* halfDequeuedBlock = nullptr;
+				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
+					// The head's not on a block boundary, meaning a block somewhere is partially dequeued
+					// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
+					size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
+					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
+						i = (i + 1) & (pr_blockIndexSize - 1);
+					}
+					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
+					halfDequeuedBlock = pr_blockIndexEntries[i].block;
+				}
+				
+				// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
+				auto block = this->tailBlock;
+				do {
+					block = block->next;
+					if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+						continue;
+					}
+					
+					size_t i = 0;	// Offset into block
+					if (block == halfDequeuedBlock) {
+						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					}
+					
+					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
+					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
+					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
+						(*block)[i++]->~T();
+					}
+				} while (block != this->tailBlock);
+			}
+			
+			// Destroy all blocks that we own
+			if (this->tailBlock != nullptr) {
+				auto block = this->tailBlock;
+				do {
+					auto nextBlock = block->next;
+					this->parent->add_block_to_free_list(block);
+					block = nextBlock;
+				} while (block != this->tailBlock);
+			}
+			
+			// Destroy the block indices
+			auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
+			while (header != nullptr) {
+				auto prev = static_cast<BlockIndexHeader*>(header->prev);
+				header->~BlockIndexHeader();
+				(Traits::free)(header);
+				header = prev;
+			}
+		}
+		
+		template<AllocationMode allocMode, typename U>
+		inline bool enqueue(U&& element)
+		{
+			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			index_t newTailIndex = 1 + currentTailIndex;
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+				// We reached the end of a block, start a new one
+				auto startBlock = this->tailBlock;
+				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+				if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+					// We can re-use the block ahead of us, it's empty!					
+					this->tailBlock = this->tailBlock->next;
+					this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					
+					// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
+					// last block from it first -- except instead of removing then adding, we can just overwrite).
+					// Note that there must be a valid block index here, since even if allocation failed in the ctor,
+					// it would have been re-attempted when adding the first block to the queue; since there is such
+					// a block, a block index must have been successfully allocated.
+				}
+				else {
+					// Whatever head value we see here is >= the last value we saw here (relatively),
+					// and <= its current value. Since we have the most recent tail, the head must be
+					// <= to it.
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
+						|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+						// We can't enqueue in another block because there's not enough leeway -- the
+						// tail could surpass the head by the time the block fills up! (Or we'll exceed
+						// the size limit, if the second part of the condition was true.)
+						return false;
+					}
+					// We're going to need a new block; check that the block index has room
+					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {
+						// Hmm, the circular block index is already full -- we'll need
+						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
+						// the initial allocation failed in the constructor.
+						
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							return false;
+						}
+						else if (!new_block_index(pr_blockIndexSlotsUsed)) {
+							return false;
+						}
+					}
+					
+					// Insert a new block in the circular linked list
+					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+					if (newBlock == nullptr) {
+						return false;
+					}
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					if (this->tailBlock == nullptr) {
+						newBlock->next = newBlock;
+					}
+					else {
+						newBlock->next = this->tailBlock->next;
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					++pr_blockIndexSlotsUsed;
+				}
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					// The constructor may throw. We want the element not to appear in the queue in
+					// that case (without corrupting the queue):
+					MOODYCAMEL_TRY {
+						new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+					}
+					MOODYCAMEL_CATCH (...) {
+						// Revert change to the current block, but leave the new block available
+						// for next time
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				else {
+					(void)startBlock;
+					(void)originalBlockIndexSlotsUsed;
+				}
+				
+				// Add block to block index
+				auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+				entry.base = currentTailIndex;
+				entry.block = this->tailBlock;
+				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
+				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					this->tailIndex.store(newTailIndex, std::memory_order_release);
+					return true;
+				}
+			}
+			
+			// Enqueue
+			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename U>
+		bool dequeue(U& element)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+				// Might be something to dequeue, let's give it a try
+				
+				// Note that this if is purely for performance purposes in the common case when the queue is
+				// empty and the values are eventually consistent -- we may enter here spuriously.
+				
+				// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
+				// change them) and must be the same value at this point (inside the if) as when the if condition was
+				// evaluated.
+
+				// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
+				// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
+				// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
+				// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
+				// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
+				// unfortunately that can't be shown to be correct using only the C++11 standard.
+				// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				// Increment optimistic counter, then check if it went over the boundary
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+				
+				// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
+				// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
+				// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
+				// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
+				// However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
+				// overflow; in such a case, though, the logic still holds since the difference between the two is maintained.
+				
+				// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
+				// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
+				// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+					// Guaranteed to be at least one element to dequeue!
+					
+					// Get the index. Note that since there's guaranteed to be at least one element, this
+					// will never exceed tail. We need to do an acquire-release fence here since it's possible
+					// that whatever condition got us to this point was for an earlier enqueued element (that
+					// we already see the memory effects for), but that by the time we increment somebody else
+					// has incremented it, and we need to see the memory effects for *that* element, which is
+					// in such a case is necessarily visible on the thread that incremented it in the first
+					// place with the more current condition (they must have acquired a tail that is at least
+					// as recent).
+					auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+					
+					
+					// Determine which block the element is in
+					
+					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+					
+					// We need to be careful here about subtracting and dividing because of index wrap-around.
+					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
+					// block size (in order to get a correct signed block count offset in all cases):
+					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
+					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
+					
+					// Dequeue
+					auto& el = *((*block)[index]);
+					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
+						// Make sure the element is still fully dequeued and destroyed even if the assignment
+						// throws
+						struct Guard {
+							Block* block;
+							index_t index;
+							
+							~Guard()
+							{
+								(*block)[index]->~T();
+								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+							}
+						} guard = { block, index };
+
+						element = std::move(el); // NOLINT
+					}
+					else {
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
+						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
+					}
+					
+					return true;
+				}
+				else {
+					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);		// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
+				}
+			}
+		
+			return false;
+		}
+		
+		template<AllocationMode allocMode, typename It>
+		bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
+		{
+			// First, we need to make sure we have enough room to enqueue all of the elements;
+			// this means pre-allocating blocks and putting them in the block index (but only if
+			// all the allocations succeeded).
+			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			auto startBlock = this->tailBlock;
+			auto originalBlockIndexFront = pr_blockIndexFront;
+			auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
+			
+			Block* firstAllocatedBlock = nullptr;
+			
+			// Figure out how many blocks we'll need to allocate, and do so
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+			if (blockBaseDiff > 0) {
+				// Allocate as many blocks as possible from ahead
+				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					this->tailBlock = this->tailBlock->next;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+					
+					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+					entry.base = currentTailIndex;
+					entry.block = this->tailBlock;
+					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				}
+				
+				// Now allocate as many blocks as necessary from the block pool
+				while (blockBaseDiff > 0) {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
+						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+						else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
+							// Failed to allocate, undo changes (but keep injected blocks)
+							pr_blockIndexFront = originalBlockIndexFront;
+							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+							return false;
+						}
+						
+						// pr_blockIndexFront is updated inside new_block_index, so we need to
+						// update our fallback value too (since we keep the new index even if we
+						// later fail)
+						originalBlockIndexFront = originalBlockIndexSlotsUsed;
+					}
+					
+					// Insert a new block in the circular linked list
+					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+					if (newBlock == nullptr) {
+						pr_blockIndexFront = originalBlockIndexFront;
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+						return false;
+					}
+					
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
+					if (this->tailBlock == nullptr) {
+						newBlock->next = newBlock;
+					}
+					else {
+						newBlock->next = this->tailBlock->next;
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
+					
+					++pr_blockIndexSlotsUsed;
+					
+					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
+					entry.base = currentTailIndex;
+					entry.block = this->tailBlock;
+					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
+				}
+				
+				// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
+				// publish the new block index front
+				auto block = firstAllocatedBlock;
+				while (true) {
+					block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
+					if (block == this->tailBlock) {
+						break;
+					}
+					block = block->next;
+				}
+				
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+				}
+			}
+			
+			// Enqueue, one block at a time
+			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+			currentTailIndex = startTailIndex;
+			auto endBlock = this->tailBlock;
+			this->tailBlock = startBlock;
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+				this->tailBlock = firstAllocatedBlock;
+			}
+			while (true) {
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+					stopIndex = newTailIndex;
+				}
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					while (currentTailIndex != stopIndex) {
+						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+					}
+				}
+				else {
+					MOODYCAMEL_TRY {
+						while (currentTailIndex != stopIndex) {
+							// Must use copy constructor even if move constructor is available
+							// because we may have to revert if there's an exception.
+							// Sorry about the horrible templated next line, but it was the only way
+							// to disable moving *at compile time*, which is important because a type
+							// may only define a (noexcept) move constructor, and so calls to the
+							// cctor will not compile, even if they are in an if branch that will never
+							// be executed
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							++currentTailIndex;
+							++itemFirst;
+						}
+					}
+					MOODYCAMEL_CATCH (...) {
+						// Oh dear, an exception's been thrown -- destroy the elements that
+						// were enqueued so far and revert the entire bulk operation (we'll keep
+						// any allocated blocks in our linked list for later, though).
+						auto constructedStopIndex = currentTailIndex;
+						auto lastBlockEnqueued = this->tailBlock;
+						
+						pr_blockIndexFront = originalBlockIndexFront;
+						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
+						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
+						
+						if (!details::is_trivially_destructible<T>::value) {
+							auto block = startBlock;
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+								block = firstAllocatedBlock;
+							}
+							currentTailIndex = startTailIndex;
+							while (true) {
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+									stopIndex = constructedStopIndex;
+								}
+								while (currentTailIndex != stopIndex) {
+									(*block)[currentTailIndex++]->~T();
+								}
+								if (block == lastBlockEnqueued) {
+									break;
+								}
+								block = block->next;
+							}
+						}
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				if (this->tailBlock == endBlock) {
+					assert(currentTailIndex == newTailIndex);
+					break;
+				}
+				this->tailBlock = this->tailBlock->next;
+			}
+			
+			MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+				if (firstAllocatedBlock != nullptr)
+					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
+			}
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename It>
+		size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
+			if (details::circular_less_than<size_t>(0, desiredCount)) {
+				desiredCount = desiredCount < max ? desiredCount : max;
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
+				
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+				if (details::circular_less_than<size_t>(0, actualCount)) {
+					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+					if (actualCount < desiredCount) {
+						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+					}
+					
+					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+					// will never exceed tail.
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+					
+					// Determine which block the first element is in
+					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
+					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
+					
+					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
+					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
+					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
+					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
+					
+					// Iterate the blocks and dequeue
+					auto index = firstIndex;
+					do {
+						auto firstIndexInBlock = index;
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+						auto block = localBlockIndex->entries[indexIndex].block;
+						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
+							while (index != endIndex) {
+								auto& el = *((*block)[index]);
+								*itemFirst++ = std::move(el);
+								el.~T();
+								++index;
+							}
+						}
+						else {
+							MOODYCAMEL_TRY {
+								while (index != endIndex) {
+									auto& el = *((*block)[index]);
+									*itemFirst = std::move(el);
+									++itemFirst;
+									el.~T();
+									++index;
+								}
+							}
+							MOODYCAMEL_CATCH (...) {
+								// It's too late to revert the dequeue, but we can make sure that all
+								// the dequeued objects are properly destroyed and the block index
+								// (and empty count) are properly updated before we propagate the exception
+								do {
+									block = localBlockIndex->entries[indexIndex].block;
+									while (index != endIndex) {
+										(*block)[index++]->~T();
+									}
+									block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+									
+									firstIndexInBlock = index;
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+								} while (index != firstIndex + actualCount);
+								
+								MOODYCAMEL_RETHROW;
+							}
+						}
+						block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
+						indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
+					} while (index != firstIndex + actualCount);
+					
+					return actualCount;
+				}
+				else {
+					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
+					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+				}
+			}
+			
+			return 0;
+		}
+		
+	private:
+		struct BlockIndexEntry
+		{
+			index_t base;
+			Block* block;
+		};
+		
+		struct BlockIndexHeader
+		{
+			size_t size;
+			std::atomic<size_t> front;		// Current slot (not next, like pr_blockIndexFront)
+			BlockIndexEntry* entries;
+			void* prev;
+		};
+		
+		
+		bool new_block_index(size_t numberOfFilledSlotsToExpose)
+		{
+			auto prevBlockSizeMask = pr_blockIndexSize - 1;
+			
+			// Create the new block
+			pr_blockIndexSize <<= 1;
+			auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
+			if (newRawPtr == nullptr) {
+				pr_blockIndexSize >>= 1;		// Reset to allow graceful retry
+				return false;
+			}
+			
+			auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
+			
+			// Copy in all the old indices, if any
+			size_t j = 0;
+			if (pr_blockIndexSlotsUsed != 0) {
+				auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
+				do {
+					newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
+					i = (i + 1) & prevBlockSizeMask;
+				} while (i != pr_blockIndexFront);
+			}
+			
+			// Update everything
+			auto header = new (newRawPtr) BlockIndexHeader;
+			header->size = pr_blockIndexSize;
+			header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
+			header->entries = newBlockIndexEntries;
+			header->prev = pr_blockIndexRaw;		// we link the new block to the old one so we can free it later
+			
+			pr_blockIndexFront = j;
+			pr_blockIndexEntries = newBlockIndexEntries;
+			pr_blockIndexRaw = newRawPtr;
+			blockIndex.store(header, std::memory_order_release);
+			
+			return true;
+		}
+		
+	private:
+		std::atomic<BlockIndexHeader*> blockIndex;
+		
+		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
+		size_t pr_blockIndexSlotsUsed;
+		size_t pr_blockIndexSize;
+		size_t pr_blockIndexFront;		// Next slot (not current)
+		BlockIndexEntry* pr_blockIndexEntries;
+		void* pr_blockIndexRaw;
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ExplicitProducer* nextExplicitProducer;
+	private:
+#endif
+		
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	//////////////////////////////////
+	// Implicit queue
+	//////////////////////////////////
+	
+	struct ImplicitProducer : public ProducerBase
+	{			
+		ImplicitProducer(ConcurrentQueue* parent_) :
+			ProducerBase(parent_, false),
+			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
+			blockIndex(nullptr)
+		{
+			new_block_index();
+		}
+		
+		~ImplicitProducer()
+		{
+			// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
+			// completed already; this means that all undequeued elements are placed contiguously across
+			// contiguous blocks, and that only the first and last remaining blocks can be only partially
+			// empty (all other remaining blocks must be completely full).
+			
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+			// Unregister ourselves for thread termination notification
+			if (!this->inactive.load(std::memory_order_relaxed)) {
+				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
+			}
+#endif
+			
+			// Destroy all remaining elements!
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto index = this->headIndex.load(std::memory_order_relaxed);
+			Block* block = nullptr;
+			assert(index == tail || details::circular_less_than(index, tail));
+			bool forceFreeLastBlock = index != tail;		// If we enter the loop, then the last (tail) block will not be freed
+			while (index != tail) {
+				if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
+					if (block != nullptr) {
+						// Free the old block
+						this->parent->add_block_to_free_list(block);
+					}
+					
+					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
+				}
+				
+				((*block)[index])->~T();
+				++index;
+			}
+			// Even if the queue is empty, there's still one block that's not on the free list
+			// (unless the head index reached the end of it, in which case the tail will be poised
+			// to create a new block).
+			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
+				this->parent->add_block_to_free_list(this->tailBlock);
+			}
+			
+			// Destroy block index
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			if (localBlockIndex != nullptr) {
+				for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
+					localBlockIndex->index[i]->~BlockIndexEntry();
+				}
+				do {
+					auto prev = localBlockIndex->prev;
+					localBlockIndex->~BlockIndexHeader();
+					(Traits::free)(localBlockIndex);
+					localBlockIndex = prev;
+				} while (localBlockIndex != nullptr);
+			}
+		}
+		
+		template<AllocationMode allocMode, typename U>
+		inline bool enqueue(U&& element)
+		{
+			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			index_t newTailIndex = 1 + currentTailIndex;
+			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+				// We reached the end of a block, start a new one
+				auto head = this->headIndex.load(std::memory_order_relaxed);
+				assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
+					return false;
+				}
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+				debug::DebugLock lock(mutex);
+#endif
+				// Find out where we'll be inserting this block in the block index
+				BlockIndexEntry* idxEntry;
+				if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
+					return false;
+				}
+				
+				// Get ahold of a new block
+				auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
+				if (newBlock == nullptr) {
+					rewind_block_index_tail();
+					idxEntry->value.store(nullptr, std::memory_order_relaxed);
+					return false;
+				}
+#ifdef MCDBGQ_TRACKMEM
+				newBlock->owner = this;
+#endif
+				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					// May throw, try to insert now before we publish the fact that we have this new block
+					MOODYCAMEL_TRY {
+						new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
+					}
+					MOODYCAMEL_CATCH (...) {
+						rewind_block_index_tail();
+						idxEntry->value.store(nullptr, std::memory_order_relaxed);
+						this->parent->add_block_to_free_list(newBlock);
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				// Insert the new block into the index
+				idxEntry->value.store(newBlock, std::memory_order_relaxed);
+				
+				this->tailBlock = newBlock;
+				
+				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
+					this->tailIndex.store(newTailIndex, std::memory_order_release);
+					return true;
+				}
+			}
+			
+			// Enqueue
+			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
+			
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+		
+		template<typename U>
+		bool dequeue(U& element)
+		{
+			// See ExplicitProducer::dequeue for rationale and explanation
+			index_t tail = this->tailIndex.load(std::memory_order_relaxed);
+			index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
+					index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
+					
+					// Determine which block the element is in
+					auto entry = get_block_index_entry_for_index(index);
+					
+					// Dequeue
+					auto block = entry->value.load(std::memory_order_relaxed);
+					auto& el = *((*block)[index]);
+					
+					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+						// Note: Acquiring the mutex with every dequeue instead of only when a block
+						// is released is very sub-optimal, but it is, after all, purely debug code.
+						debug::DebugLock lock(producer->mutex);
+#endif
+						struct Guard {
+							Block* block;
+							index_t index;
+							BlockIndexEntry* entry;
+							ConcurrentQueue* parent;
+							
+							~Guard()
+							{
+								(*block)[index]->~T();
+								if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+									entry->value.store(nullptr, std::memory_order_relaxed);
+									parent->add_block_to_free_list(block);
+								}
+							}
+						} guard = { block, index, entry, this->parent };
+
+						element = std::move(el); // NOLINT
+					}
+					else {
+						element = std::move(el); // NOLINT
+						el.~T(); // NOLINT
+
+						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
+							{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+								debug::DebugLock lock(mutex);
+#endif
+								// Add the block back into the global free pool (and remove from block index)
+								entry->value.store(nullptr, std::memory_order_relaxed);
+							}
+							this->parent->add_block_to_free_list(block);		// releases the above store
+						}
+					}
+					
+					return true;
+				}
+				else {
+					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
+				}
+			}
+		
+			return false;
+		}
+		
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4706)  // assignment within conditional expression
+#endif
+		template<AllocationMode allocMode, typename It>
+		bool enqueue_bulk(It itemFirst, size_t count)
+		{
+			// First, we need to make sure we have enough room to enqueue all of the elements;
+			// this means pre-allocating blocks and putting them in the block index (but only if
+			// all the allocations succeeded).
+			
+			// Note that the tailBlock we start off with may not be owned by us any more;
+			// this happens if it was filled up exactly to the top (setting tailIndex to
+			// the first index of the next block which is not yet allocated), then dequeued
+			// completely (putting it on the free list) before we enqueue again.
+			
+			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
+			auto startBlock = this->tailBlock;
+			Block* firstAllocatedBlock = nullptr;
+			auto endBlock = this->tailBlock;
+			
+			// Figure out how many blocks we'll need to allocate, and do so
+			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
+			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+			if (blockBaseDiff > 0) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+				debug::DebugLock lock(mutex);
+#endif
+				do {
+					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
+					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+					
+					// Find out where we'll be inserting this block in the block index
+					BlockIndexEntry* idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
+					Block* newBlock;
+					bool indexInserted = false;
+					auto head = this->headIndex.load(std::memory_order_relaxed);
+					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
+					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
+
+					if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
+						// Index allocation or block allocation failed; revert any other allocations
+						// and index insertions done so far for this operation
+						if (indexInserted) {
+							rewind_block_index_tail();
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+						}
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+							idxEntry = get_block_index_entry_for_index(currentTailIndex);
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+							rewind_block_index_tail();
+						}
+						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+						this->tailBlock = startBlock;
+						
+						return false;
+					}
+					
+#ifdef MCDBGQ_TRACKMEM
+					newBlock->owner = this;
+#endif
+					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
+					newBlock->next = nullptr;
+					
+					// Insert the new block into the index
+					idxEntry->value.store(newBlock, std::memory_order_relaxed);
+					
+					// Store the chain of blocks so that we can undo if later allocations fail,
+					// and so that we can find the blocks when we do the actual enqueueing
+					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
+						assert(this->tailBlock != nullptr);
+						this->tailBlock->next = newBlock;
+					}
+					this->tailBlock = newBlock;
+					endBlock = newBlock;
+					firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
+				} while (blockBaseDiff > 0);
+			}
+			
+			// Enqueue, one block at a time
+			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
+			currentTailIndex = startTailIndex;
+			this->tailBlock = startBlock;
+			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
+			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
+				this->tailBlock = firstAllocatedBlock;
+			}
+			while (true) {
+				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
+					stopIndex = newTailIndex;
+				}
+				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
+					while (currentTailIndex != stopIndex) {
+						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
+					}
+				}
+				else {
+					MOODYCAMEL_TRY {
+						while (currentTailIndex != stopIndex) {
+							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
+							++currentTailIndex;
+							++itemFirst;
+						}
+					}
+					MOODYCAMEL_CATCH (...) {
+						auto constructedStopIndex = currentTailIndex;
+						auto lastBlockEnqueued = this->tailBlock;
+						
+						if (!details::is_trivially_destructible<T>::value) {
+							auto block = startBlock;
+							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
+								block = firstAllocatedBlock;
+							}
+							currentTailIndex = startTailIndex;
+							while (true) {
+								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
+									stopIndex = constructedStopIndex;
+								}
+								while (currentTailIndex != stopIndex) {
+									(*block)[currentTailIndex++]->~T();
+								}
+								if (block == lastBlockEnqueued) {
+									break;
+								}
+								block = block->next;
+							}
+						}
+						
+						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
+						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
+							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
+							auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
+							idxEntry->value.store(nullptr, std::memory_order_relaxed);
+							rewind_block_index_tail();
+						}
+						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
+						this->tailBlock = startBlock;
+						MOODYCAMEL_RETHROW;
+					}
+				}
+				
+				if (this->tailBlock == endBlock) {
+					assert(currentTailIndex == newTailIndex);
+					break;
+				}
+				this->tailBlock = this->tailBlock->next;
+			}
+			this->tailIndex.store(newTailIndex, std::memory_order_release);
+			return true;
+		}
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+		
+		template<typename It>
+		size_t dequeue_bulk(It& itemFirst, size_t max)
+		{
+			auto tail = this->tailIndex.load(std::memory_order_relaxed);
+			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
+			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
+			if (details::circular_less_than<size_t>(0, desiredCount)) {
+				desiredCount = desiredCount < max ? desiredCount : max;
+				std::atomic_thread_fence(std::memory_order_acquire);
+				
+				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
+				
+				tail = this->tailIndex.load(std::memory_order_acquire);
+				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
+				if (details::circular_less_than<size_t>(0, actualCount)) {
+					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
+					if (actualCount < desiredCount) {
+						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
+					}
+					
+					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
+					// will never exceed tail.
+					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
+					
+					// Iterate the blocks and dequeue
+					auto index = firstIndex;
+					BlockIndexHeader* localBlockIndex;
+					auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
+					do {
+						auto blockStartIndex = index;
+						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+						
+						auto entry = localBlockIndex->index[indexIndex];
+						auto block = entry->value.load(std::memory_order_relaxed);
+						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
+							while (index != endIndex) {
+								auto& el = *((*block)[index]);
+								*itemFirst++ = std::move(el);
+								el.~T();
+								++index;
+							}
+						}
+						else {
+							MOODYCAMEL_TRY {
+								while (index != endIndex) {
+									auto& el = *((*block)[index]);
+									*itemFirst = std::move(el);
+									++itemFirst;
+									el.~T();
+									++index;
+								}
+							}
+							MOODYCAMEL_CATCH (...) {
+								do {
+									entry = localBlockIndex->index[indexIndex];
+									block = entry->value.load(std::memory_order_relaxed);
+									while (index != endIndex) {
+										(*block)[index++]->~T();
+									}
+									
+									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+										debug::DebugLock lock(mutex);
+#endif
+										entry->value.store(nullptr, std::memory_order_relaxed);
+										this->parent->add_block_to_free_list(block);
+									}
+									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+									
+									blockStartIndex = index;
+									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
+									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
+								} while (index != firstIndex + actualCount);
+								
+								MOODYCAMEL_RETHROW;
+							}
+						}
+						if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
+							{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+								debug::DebugLock lock(mutex);
+#endif
+								// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
+								// we're about to free can use it safely since our writes (and reads!) will have happened-before then.
+								entry->value.store(nullptr, std::memory_order_relaxed);
+							}
+							this->parent->add_block_to_free_list(block);		// releases the above store
+						}
+						indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
+					} while (index != firstIndex + actualCount);
+					
+					return actualCount;
+				}
+				else {
+					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
+				}
+			}
+			
+			return 0;
+		}
+		
+	private:
+		// The block size must be > 1, so any number with the low bit set is an invalid block base index
+		static const index_t INVALID_BLOCK_BASE = 1;
+		
+		struct BlockIndexEntry
+		{
+			std::atomic<index_t> key;
+			std::atomic<Block*> value;
+		};
+		
+		struct BlockIndexHeader
+		{
+			size_t capacity;
+			std::atomic<size_t> tail;
+			BlockIndexEntry* entries;
+			BlockIndexEntry** index;
+			BlockIndexHeader* prev;
+		};
+		
+		template<AllocationMode allocMode>
+		inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
+		{
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);		// We're the only writer thread, relaxed is OK
+			if (localBlockIndex == nullptr) {
+				return false;  // this can happen if new_block_index failed in the constructor
+			}
+			size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+			idxEntry = localBlockIndex->index[newTail];
+			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
+				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
+				
+				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+				localBlockIndex->tail.store(newTail, std::memory_order_release);
+				return true;
+			}
+			
+			// No room in the old block index, try to allocate another one!
+			MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
+				return false;
+			}
+			else if (!new_block_index()) {
+				return false;
+			}
+			else {
+				localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+				newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
+				idxEntry = localBlockIndex->index[newTail];
+				assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
+				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
+				localBlockIndex->tail.store(newTail, std::memory_order_release);
+				return true;
+			}
+		}
+		
+		inline void rewind_block_index_tail()
+		{
+			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
+			localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
+		}
+		
+		inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const
+		{
+			BlockIndexHeader* localBlockIndex;
+			auto idx = get_block_index_index_for_index(index, localBlockIndex);
+			return localBlockIndex->index[idx];
+		}
+		
+		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
+		{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+			debug::DebugLock lock(mutex);
+#endif
+			index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
+			localBlockIndex = blockIndex.load(std::memory_order_acquire);
+			auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
+			auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
+			assert(tailBase != INVALID_BLOCK_BASE);
+			// Note: Must use division instead of shift because the index may wrap around, causing a negative
+			// offset, whose negativity we want to preserve
+			auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
+			size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
+			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
+			return idx;
+		}
+		
+		bool new_block_index()
+		{
+			auto prev = blockIndex.load(std::memory_order_relaxed);
+			size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
+			auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
+			auto raw = static_cast<char*>((Traits::malloc)(
+				sizeof(BlockIndexHeader) +
+				std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
+				std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));
+			if (raw == nullptr) {
+				return false;
+			}
+			
+			auto header = new (raw) BlockIndexHeader;
+			auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
+			auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
+			if (prev != nullptr) {
+				auto prevTail = prev->tail.load(std::memory_order_relaxed);
+				auto prevPos = prevTail;
+				size_t i = 0;
+				do {
+					prevPos = (prevPos + 1) & (prev->capacity - 1);
+					index[i++] = prev->index[prevPos];
+				} while (prevPos != prevTail);
+				assert(i == prevCapacity);
+			}
+			for (size_t i = 0; i != entryCount; ++i) {
+				new (entries + i) BlockIndexEntry;
+				entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
+				index[prevCapacity + i] = entries + i;
+			}
+			header->prev = prev;
+			header->entries = entries;
+			header->index = index;
+			header->capacity = nextBlockIndexCapacity;
+			header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
+			
+			blockIndex.store(header, std::memory_order_release);
+			
+			nextBlockIndexCapacity <<= 1;
+			
+			return true;
+		}
+		
+	private:
+		size_t nextBlockIndexCapacity;
+		std::atomic<BlockIndexHeader*> blockIndex;
+
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+	public:
+		details::ThreadExitListener threadExitListener;
+	private:
+#endif
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	public:
+		ImplicitProducer* nextImplicitProducer;
+	private:
+#endif
+
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
+		mutable debug::DebugMutex mutex;
+#endif
+#ifdef MCDBGQ_TRACKMEM
+		friend struct MemStats;
+#endif
+	};
+	
+	
+	//////////////////////////////////
+	// Block pool manipulation
+	//////////////////////////////////
+	
+	void populate_initial_block_list(size_t blockCount)
+	{
+		initialBlockPoolSize = blockCount;
+		if (initialBlockPoolSize == 0) {
+			initialBlockPool = nullptr;
+			return;
+		}
+		
+		initialBlockPool = create_array<Block>(blockCount);
+		if (initialBlockPool == nullptr) {
+			initialBlockPoolSize = 0;
+		}
+		for (size_t i = 0; i < initialBlockPoolSize; ++i) {
+			initialBlockPool[i].dynamicallyAllocated = false;
+		}
+	}
+	
+	inline Block* try_get_block_from_initial_pool()
+	{
+		if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
+			return nullptr;
+		}
+		
+		auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
+		
+		return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
+	}
+	
+	inline void add_block_to_free_list(Block* block)
+	{
+#ifdef MCDBGQ_TRACKMEM
+		block->owner = nullptr;
+#endif
+		if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) {
+			destroy(block);
+		}
+		else {
+			freeList.add(block);
+		}
+	}
+	
+	inline void add_blocks_to_free_list(Block* block)
+	{
+		while (block != nullptr) {
+			auto next = block->next;
+			add_block_to_free_list(block);
+			block = next;
+		}
+	}
+	
+	inline Block* try_get_block_from_free_list()
+	{
+		return freeList.try_get();
+	}
+	
+	// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
+	template<AllocationMode canAlloc>
+	Block* requisition_block()
+	{
+		auto block = try_get_block_from_initial_pool();
+		if (block != nullptr) {
+			return block;
+		}
+		
+		block = try_get_block_from_free_list();
+		if (block != nullptr) {
+			return block;
+		}
+		
+		MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
+			return create<Block>();
+		}
+		else {
+			return nullptr;
+		}
+	}
+	
+
+#ifdef MCDBGQ_TRACKMEM
+	public:
+		struct MemStats {
+			size_t allocatedBlocks;
+			size_t usedBlocks;
+			size_t freeBlocks;
+			size_t ownedBlocksExplicit;
+			size_t ownedBlocksImplicit;
+			size_t implicitProducers;
+			size_t explicitProducers;
+			size_t elementsEnqueued;
+			size_t blockClassBytes;
+			size_t queueClassBytes;
+			size_t implicitBlockIndexBytes;
+			size_t explicitBlockIndexBytes;
+			
+			friend class ConcurrentQueue;
+			
+		private:
+			static MemStats getFor(ConcurrentQueue* q)
+			{
+				MemStats stats = { 0 };
+				
+				stats.elementsEnqueued = q->size_approx();
+			
+				auto block = q->freeList.head_unsafe();
+				while (block != nullptr) {
+					++stats.allocatedBlocks;
+					++stats.freeBlocks;
+					block = block->freeListNext.load(std::memory_order_relaxed);
+				}
+				
+				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
+					stats.implicitProducers += implicit ? 1 : 0;
+					stats.explicitProducers += implicit ? 0 : 1;
+					
+					if (implicit) {
+						auto prod = static_cast<ImplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ImplicitProducer);
+						auto head = prod->headIndex.load(std::memory_order_relaxed);
+						auto tail = prod->tailIndex.load(std::memory_order_relaxed);
+						auto hash = prod->blockIndex.load(std::memory_order_relaxed);
+						if (hash != nullptr) {
+							for (size_t i = 0; i != hash->capacity; ++i) {
+								if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
+									++stats.allocatedBlocks;
+									++stats.ownedBlocksImplicit;
+								}
+							}
+							stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
+							for (; hash != nullptr; hash = hash->prev) {
+								stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
+							}
+						}
+						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
+							//auto block = prod->get_block_index_entry_for_index(head);
+							++stats.usedBlocks;
+						}
+					}
+					else {
+						auto prod = static_cast<ExplicitProducer*>(ptr);
+						stats.queueClassBytes += sizeof(ExplicitProducer);
+						auto tailBlock = prod->tailBlock;
+						bool wasNonEmpty = false;
+						if (tailBlock != nullptr) {
+							auto block = tailBlock;
+							do {
+								++stats.allocatedBlocks;
+								if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
+									++stats.usedBlocks;
+									wasNonEmpty = wasNonEmpty || block != tailBlock;
+								}
+								++stats.ownedBlocksExplicit;
+								block = block->next;
+							} while (block != tailBlock);
+						}
+						auto index = prod->blockIndex.load(std::memory_order_relaxed);
+						while (index != nullptr) {
+							stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
+							index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
+						}
+					}
+				}
+				
+				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
+				stats.allocatedBlocks += freeOnInitialPool;
+				stats.freeBlocks += freeOnInitialPool;
+				
+				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
+				stats.queueClassBytes += sizeof(ConcurrentQueue);
+				
+				return stats;
+			}
+		};
+		
+		// For debugging only. Not thread-safe.
+		MemStats getMemStats()
+		{
+			return MemStats::getFor(this);
+		}
+	private:
+		friend struct MemStats;
+#endif
+	
+	
+	//////////////////////////////////
+	// Producer list manipulation
+	//////////////////////////////////	
+	
+	ProducerBase* recycle_or_create_producer(bool isExplicit)
+	{
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		// Try to re-use one first
+		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
+			if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {
+				bool expected = true;
+				if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {
+					// We caught one! It's been marked as activated, the caller can have it
+					return ptr;
+				}
+			}
+		}
+
+		return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
+	}
+	
+	ProducerBase* add_producer(ProducerBase* producer)
+	{
+		// Handle failed memory allocation
+		if (producer == nullptr) {
+			return nullptr;
+		}
+		
+		producerCount.fetch_add(1, std::memory_order_relaxed);
+		
+		// Add it to the lock-free list
+		auto prevTail = producerListTail.load(std::memory_order_relaxed);
+		do {
+			producer->next = prevTail;
+		} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
+		
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+		if (producer->isExplicit) {
+			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
+			} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+		else {
+			auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
+			do {
+				static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
+			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
+		}
+#endif
+		
+		return producer;
+	}
+	
+	void reown_producers()
+	{
+		// After another instance is moved-into/swapped-with this one, all the
+		// producers we stole still think their parents are the other queue.
+		// So fix them up!
+		for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
+			ptr->parent = this;
+		}
+	}
+	
+	
+	//////////////////////////////////
+	// Implicit producer hash
+	//////////////////////////////////
+	
+	struct ImplicitProducerKVP
+	{
+		std::atomic<details::thread_id_t> key;
+		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
+		
+		ImplicitProducerKVP() : value(nullptr) { }
+		
+		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
+		{
+			key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
+			value = other.value;
+		}
+		
+		inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
+		{
+			swap(other);
+			return *this;
+		}
+		
+		inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
+		{
+			if (this != &other) {
+				details::swap_relaxed(key, other.key);
+				std::swap(value, other.value);
+			}
+		}
+	};
+	
+	template<typename XT, typename XTraits>
+	friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
+	
+	struct ImplicitProducerHash
+	{
+		size_t capacity;
+		ImplicitProducerKVP* entries;
+		ImplicitProducerHash* prev;
+	};
+	
+	inline void populate_initial_implicit_producer_hash()
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
+		}
+		else {
+			implicitProducerHashCount.store(0, std::memory_order_relaxed);
+			auto hash = &initialImplicitProducerHash;
+			hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
+			hash->entries = &initialImplicitProducerHashEntries[0];
+			for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
+				initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+			}
+			hash->prev = nullptr;
+			implicitProducerHash.store(hash, std::memory_order_relaxed);
+		}
+	}
+	
+	void swap_implicit_producer_hashes(ConcurrentQueue& other)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
+			return;
+		}
+		else {
+			// Swap (assumes our implicit producer hash is initialized)
+			initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
+			initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
+			other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
+			
+			details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
+			
+			details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
+			if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
+				implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
+			}
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &initialImplicitProducerHash;
+			}
+			if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
+				other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
+			}
+			else {
+				ImplicitProducerHash* hash;
+				for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
+					continue;
+				}
+				hash->prev = &other.initialImplicitProducerHash;
+			}
+		}
+	}
+	
+	// Only fails (returns nullptr) if memory allocation fails
+	ImplicitProducer* get_or_add_implicit_producer()
+	{
+		// Note that since the data is essentially thread-local (key is thread ID),
+		// there's a reduced need for fences (memory ordering is already consistent
+		// for any individual thread), except for the current table itself.
+		
+		// Start by looking for the thread ID in the current and all previous hash tables.
+		// If it's not found, it must not be in there yet, since this same thread would
+		// have added it previously to one of the tables that we traversed.
+		
+		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
+		
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+		
+		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(mainHash != nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
+		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
+			// Look for the id in this hash
+			auto index = hashedId;
+			while (true) {		// Not an infinite loop because at least one slot is free in the hash table
+				index &= hash->capacity - 1u;
+				
+				auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
+				if (probedKey == id) {
+					// Found it! If we had to search several hashes deep, though, we should lazily add it
+					// to the current main hash table to avoid the extended search next time.
+					// Note there's guaranteed to be room in the current hash table since every subsequent
+					// table implicitly reserves space for all previous tables (there's only one
+					// implicitProducerHashCount).
+					auto value = hash->entries[index].value;
+					if (hash != mainHash) {
+						index = hashedId;
+						while (true) {
+							index &= mainHash->capacity - 1u;
+							auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+							auto reusable = details::invalid_thread_id2;
+							if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed) ||
+								mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+#else
+							if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+#endif
+								mainHash->entries[index].value = value;
+								break;
+							}
+							++index;
+						}
+					}
+					
+					return value;
+				}
+				if (probedKey == details::invalid_thread_id) {
+					break;		// Not in this hash table
+				}
+				++index;
+			}
+		}
+		
+		// Insert!
+		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
+		while (true) {
+			// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
+			if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
+				// We've acquired the resize lock, try to allocate a bigger hash table.
+				// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
+				// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
+				// locked block).
+				mainHash = implicitProducerHash.load(std::memory_order_acquire);
+				if (newCount >= (mainHash->capacity >> 1)) {
+					size_t newCapacity = mainHash->capacity << 1;
+					while (newCount >= (newCapacity >> 1)) {
+						newCapacity <<= 1;
+					}
+					auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
+					if (raw == nullptr) {
+						// Allocation failed
+						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+						implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
+						return nullptr;
+					}
+					
+					auto newHash = new (raw) ImplicitProducerHash;
+					newHash->capacity = static_cast<size_t>(newCapacity);
+					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
+					for (size_t i = 0; i != newCapacity; ++i) {
+						new (newHash->entries + i) ImplicitProducerKVP;
+						newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
+					}
+					newHash->prev = mainHash;
+					implicitProducerHash.store(newHash, std::memory_order_release);
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+					mainHash = newHash;
+				}
+				else {
+					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
+				}
+			}
+			
+			// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
+			// to finish being allocated by another thread (and if we just finished allocating above, the condition will
+			// always be true)
+			if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
+				auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false));
+				if (producer == nullptr) {
+					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
+					return nullptr;
+				}
+				
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+				producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
+				producer->threadExitListener.userData = producer;
+				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
+#endif
+				
+				auto index = hashedId;
+				while (true) {
+					index &= mainHash->capacity - 1u;
+					auto empty = details::invalid_thread_id;
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+					auto reusable = details::invalid_thread_id2;
+					if (mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);  // already counted as a used slot
+						mainHash->entries[index].value = producer;
+						break;
+					}
+#endif
+					if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+						mainHash->entries[index].value = producer;
+						break;
+					}
+					++index;
+				}
+				return producer;
+			}
+			
+			// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
+			// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
+			// we try to allocate ourselves).
+			mainHash = implicitProducerHash.load(std::memory_order_acquire);
+		}
+	}
+	
+#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
+	void implicit_producer_thread_exited(ImplicitProducer* producer)
+	{
+		// Remove from hash
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+		debug::DebugLock lock(implicitProdMutex);
+#endif
+		auto hash = implicitProducerHash.load(std::memory_order_acquire);
+		assert(hash != nullptr);		// The thread exit listener is only registered if we were added to a hash in the first place
+		auto id = details::thread_id();
+		auto hashedId = details::hash_thread_id(id);
+		details::thread_id_t probedKey;
+		
+		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
+		// trying to add an entry thinking there's a free slot (because they reused a producer)
+		for (; hash != nullptr; hash = hash->prev) {
+			auto index = hashedId;
+			do {
+				index &= hash->capacity - 1u;
+				probedKey = id;
+				if (hash->entries[index].key.compare_exchange_strong(probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)) {
+					break;
+				}
+				++index;
+			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
+		}
+		
+		// Mark the queue as being recyclable
+		producer->inactive.store(true, std::memory_order_release);
+	}
+	
+	static void implicit_producer_thread_exited_callback(void* userData)
+	{
+		auto producer = static_cast<ImplicitProducer*>(userData);
+		auto queue = producer->parent;
+		queue->implicit_producer_thread_exited(producer);
+	}
+#endif
+	
+	//////////////////////////////////
+	// Utility functions
+	//////////////////////////////////
+
+	template<typename TAlign>
+	static inline void* aligned_malloc(size_t size)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::malloc)(size);
+		else {
+			size_t alignment = std::alignment_of<TAlign>::value;
+			void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));
+			if (!raw)
+				return nullptr;
+			char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
+			*(reinterpret_cast<void**>(ptr) - 1) = raw;
+			return ptr;
+		}
+	}
+
+	template<typename TAlign>
+	static inline void aligned_free(void* ptr)
+	{
+		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
+			return (Traits::free)(ptr);
+		else
+			(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);
+	}
+
+	template<typename U>
+	static inline U* create_array(size_t count)
+	{
+		assert(count > 0);
+		U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
+		if (p == nullptr)
+			return nullptr;
+
+		for (size_t i = 0; i != count; ++i)
+			new (p + i) U();
+		return p;
+	}
+
+	template<typename U>
+	static inline void destroy_array(U* p, size_t count)
+	{
+		if (p != nullptr) {
+			assert(count > 0);
+			for (size_t i = count; i != 0; )
+				(p + --i)->~U();
+		}
+		aligned_free<U>(p);
+	}
+
+	template<typename U>
+	static inline U* create()
+	{
+		void* p = aligned_malloc<U>(sizeof(U));
+		return p != nullptr ? new (p) U : nullptr;
+	}
+
+	template<typename U, typename A1>
+	static inline U* create(A1&& a1)
+	{
+		void* p = aligned_malloc<U>(sizeof(U));
+		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
+	}
+
+	template<typename U>
+	static inline void destroy(U* p)
+	{
+		if (p != nullptr)
+			p->~U();
+		aligned_free<U>(p);
+	}
+
+private:
+	std::atomic<ProducerBase*> producerListTail;
+	std::atomic<std::uint32_t> producerCount;
+	
+	std::atomic<size_t> initialBlockPoolIndex;
+	Block* initialBlockPool;
+	size_t initialBlockPoolSize;
+	
+#ifndef MCDBGQ_USEDEBUGFREELIST
+	FreeList<Block> freeList;
+#else
+	debug::DebugFreeList<Block> freeList;
+#endif
+	
+	std::atomic<ImplicitProducerHash*> implicitProducerHash;
+	std::atomic<size_t> implicitProducerHashCount;		// Number of slots logically used
+	ImplicitProducerHash initialImplicitProducerHash;
+	std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
+	std::atomic_flag implicitProducerHashResizeInProgress;
+	
+	std::atomic<std::uint32_t> nextExplicitConsumerId;
+	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
+	
+#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
+	debug::DebugMutex implicitProdMutex;
+#endif
+	
+#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
+	std::atomic<ExplicitProducer*> explicitProducers;
+	std::atomic<ImplicitProducer*> implicitProducers;
+#endif
+};
+
+
+template<typename T, typename Traits>
+ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue)
+	: producer(queue.recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
+ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))
+{
+	if (producer != nullptr) {
+		producer->token = this;
+	}
+}
+
+template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
+}
+
+template<typename T, typename Traits>
+ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
+	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
+{
+	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
+	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
+}
+
+template<typename T, typename Traits>
+inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+template<typename T, typename Traits>
+inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT
+{
+	a.swap(b);
+}
+
+}
+
+#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
+#pragma warning(pop)
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#pragma GCC diagnostic pop
+#endif
diff --git a/third_party/concurrentqueue/lightweightsemaphore.h b/third_party/concurrentqueue/lightweightsemaphore.h
new file mode 100644
index 0000000000..41ba094384
--- /dev/null
+++ b/third_party/concurrentqueue/lightweightsemaphore.h
@@ -0,0 +1,425 @@
+// Provides an efficient implementation of a semaphore (LightweightSemaphore).
+// This is an extension of Jeff Preshing's sempahore implementation (licensed 
+// under the terms of its separate zlib license) that has been adapted and
+// extended by Cameron Desrochers.
+
+#pragma once
+
+#include <cstddef> // For std::size_t
+#include <atomic>
+#include <type_traits> // For std::make_signed<T>
+
+#if defined(_WIN32)
+// Avoid including windows.h in a header; we only need a handful of
+// items, so we'll redeclare them here (this is relatively safe since
+// the API generally has to remain stable between Windows versions).
+// I know this is an ugly hack but it still beats polluting the global
+// namespace with thousands of generic names or adding a .cpp for nothing.
+extern "C" {
+	struct _SECURITY_ATTRIBUTES;
+	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
+	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
+	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
+	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
+}
+#elif defined(__MACH__)
+#include <mach/mach.h>
+#elif defined(__unix__)
+#include <semaphore.h>
+
+#if defined(__GLIBC_PREREQ) && defined(_GNU_SOURCE)
+#if __GLIBC_PREREQ(2,30)
+#define MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
+#endif
+#endif
+#endif
+
+namespace moodycamel
+{
+namespace details
+{
+
+// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
+// portable + lightweight semaphore implementations, originally from
+// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
+// LICENSE:
+// Copyright (c) 2015 Jeff Preshing
+//
+// This software is provided 'as-is', without any express or implied
+// warranty. In no event will the authors be held liable for any damages
+// arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not
+//	claim that you wrote the original software. If you use this software
+//	in a product, an acknowledgement in the product documentation would be
+//	appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//	misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+#if defined(_WIN32)
+class Semaphore
+{
+private:
+	void* m_hSema;
+	
+	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+	Semaphore(int initialCount = 0)
+	{
+		assert(initialCount >= 0);
+		const long maxLong = 0x7fffffff;
+		m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+		assert(m_hSema);
+	}
+
+	~Semaphore()
+	{
+		CloseHandle(m_hSema);
+	}
+
+	bool wait()
+	{
+		const unsigned long infinite = 0xffffffff;
+		return WaitForSingleObject(m_hSema, infinite) == 0;
+	}
+	
+	bool try_wait()
+	{
+		return WaitForSingleObject(m_hSema, 0) == 0;
+	}
+	
+	bool timed_wait(std::uint64_t usecs)
+	{
+		return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
+	}
+
+	void signal(int count = 1)
+	{
+		while (!ReleaseSemaphore(m_hSema, count, nullptr));
+	}
+};
+#elif defined(__MACH__)
+//---------------------------------------------------------
+// Semaphore (Apple iOS and OSX)
+// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
+//---------------------------------------------------------
+class Semaphore
+{
+private:
+	semaphore_t m_sema;
+
+	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+	Semaphore(int initialCount = 0)
+	{
+		assert(initialCount >= 0);
+		kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+		assert(rc == KERN_SUCCESS);
+		(void)rc;
+	}
+
+	~Semaphore()
+	{
+		semaphore_destroy(mach_task_self(), m_sema);
+	}
+
+	bool wait()
+	{
+		return semaphore_wait(m_sema) == KERN_SUCCESS;
+	}
+	
+	bool try_wait()
+	{
+		return timed_wait(0);
+	}
+	
+	bool timed_wait(std::uint64_t timeout_usecs)
+	{
+		mach_timespec_t ts;
+		ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
+		ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
+
+		// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
+		kern_return_t rc = semaphore_timedwait(m_sema, ts);
+		return rc == KERN_SUCCESS;
+	}
+
+	void signal()
+	{
+		while (semaphore_signal(m_sema) != KERN_SUCCESS);
+	}
+
+	void signal(int count)
+	{
+		while (count-- > 0)
+		{
+			while (semaphore_signal(m_sema) != KERN_SUCCESS);
+		}
+	}
+};
+#elif defined(__unix__)
+//---------------------------------------------------------
+// Semaphore (POSIX, Linux)
+//---------------------------------------------------------
+class Semaphore
+{
+private:
+	sem_t m_sema;
+
+	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
+
+public:
+	Semaphore(int initialCount = 0)
+	{
+		assert(initialCount >= 0);
+		int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
+		assert(rc == 0);
+		(void)rc;
+	}
+
+	~Semaphore()
+	{
+		sem_destroy(&m_sema);
+	}
+
+	bool wait()
+	{
+		// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+		int rc;
+		do {
+			rc = sem_wait(&m_sema);
+		} while (rc == -1 && errno == EINTR);
+		return rc == 0;
+	}
+
+	bool try_wait()
+	{
+		int rc;
+		do {
+			rc = sem_trywait(&m_sema);
+		} while (rc == -1 && errno == EINTR);
+		return rc == 0;
+	}
+
+	bool timed_wait(std::uint64_t usecs)
+	{
+		struct timespec ts;
+		const int usecs_in_1_sec = 1000000;
+		const int nsecs_in_1_sec = 1000000000;
+#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
+		clock_gettime(CLOCK_MONOTONIC, &ts);
+#else
+		clock_gettime(CLOCK_REALTIME, &ts);
+#endif
+		ts.tv_sec += (time_t)(usecs / usecs_in_1_sec);
+		ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000;
+		// sem_timedwait bombs if you have more than 1e9 in tv_nsec
+		// so we have to clean things up before passing it in
+		if (ts.tv_nsec >= nsecs_in_1_sec) {
+			ts.tv_nsec -= nsecs_in_1_sec;
+			++ts.tv_sec;
+		}
+
+		int rc;
+		do {
+#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
+			rc = sem_clockwait(&m_sema, CLOCK_MONOTONIC, &ts);
+#else
+			rc = sem_timedwait(&m_sema, &ts);
+#endif
+		} while (rc == -1 && errno == EINTR);
+		return rc == 0;
+	}
+
+	void signal()
+	{
+		while (sem_post(&m_sema) == -1);
+	}
+
+	void signal(int count)
+	{
+		while (count-- > 0)
+		{
+			while (sem_post(&m_sema) == -1);
+		}
+	}
+};
+#else
+#error Unsupported platform! (No semaphore wrapper available)
+#endif
+
+}	// end namespace details
+
+
+//---------------------------------------------------------
+// LightweightSemaphore
+//---------------------------------------------------------
+class LightweightSemaphore
+{
+public:
+	typedef std::make_signed<std::size_t>::type ssize_t;
+
+private:
+	std::atomic<ssize_t> m_count;
+	details::Semaphore m_sema;
+	int m_maxSpins;
+
+	bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
+	{
+		ssize_t oldCount;
+		int spin = m_maxSpins;
+		while (--spin >= 0)
+		{
+			oldCount = m_count.load(std::memory_order_relaxed);
+			if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+				return true;
+			std::atomic_signal_fence(std::memory_order_acquire);	 // Prevent the compiler from collapsing the loop.
+		}
+		oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+		if (oldCount > 0)
+			return true;
+		if (timeout_usecs < 0)
+		{
+			if (m_sema.wait())
+				return true;
+		}
+		if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs))
+			return true;
+		// At this point, we've timed out waiting for the semaphore, but the
+		// count is still decremented indicating we may still be waiting on
+		// it. So we have to re-adjust the count, but only if the semaphore
+		// wasn't signaled enough times for us too since then. If it was, we
+		// need to release the semaphore too.
+		while (true)
+		{
+			oldCount = m_count.load(std::memory_order_acquire);
+			if (oldCount >= 0 && m_sema.try_wait())
+				return true;
+			if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
+				return false;
+		}
+	}
+
+	ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
+	{
+		assert(max > 0);
+		ssize_t oldCount;
+		int spin = m_maxSpins;
+		while (--spin >= 0)
+		{
+			oldCount = m_count.load(std::memory_order_relaxed);
+			if (oldCount > 0)
+			{
+				ssize_t newCount = oldCount > max ? oldCount - max : 0;
+				if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+					return oldCount - newCount;
+			}
+			std::atomic_signal_fence(std::memory_order_acquire);
+		}
+		oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
+		if (oldCount <= 0)
+		{
+			if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs)))
+			{
+				while (true)
+				{
+					oldCount = m_count.load(std::memory_order_acquire);
+					if (oldCount >= 0 && m_sema.try_wait())
+						break;
+					if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
+						return 0;
+				}
+			}
+		}
+		if (max > 1)
+			return 1 + tryWaitMany(max - 1);
+		return 1;
+	}
+
+public:
+	LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins)
+	{
+		assert(initialCount >= 0);
+		assert(maxSpins >= 0);
+	}
+
+	bool tryWait()
+	{
+		ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+		while (oldCount > 0)
+		{
+			if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
+				return true;
+		}
+		return false;
+	}
+
+	bool wait()
+	{
+		return tryWait() || waitWithPartialSpinning();
+	}
+
+	bool wait(std::int64_t timeout_usecs)
+	{
+		return tryWait() || waitWithPartialSpinning(timeout_usecs);
+	}
+
+	// Acquires between 0 and (greedily) max, inclusive
+	ssize_t tryWaitMany(ssize_t max)
+	{
+		assert(max >= 0);
+		ssize_t oldCount = m_count.load(std::memory_order_relaxed);
+		while (oldCount > 0)
+		{
+			ssize_t newCount = oldCount > max ? oldCount - max : 0;
+			if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
+				return oldCount - newCount;
+		}
+		return 0;
+	}
+
+	// Acquires at least one, and (greedily) at most max
+	ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
+	{
+		assert(max >= 0);
+		ssize_t result = tryWaitMany(max);
+		if (result == 0 && max > 0)
+			result = waitManyWithPartialSpinning(max, timeout_usecs);
+		return result;
+	}
+	
+	ssize_t waitMany(ssize_t max)
+	{
+		ssize_t result = waitMany(max, -1);
+		assert(result > 0);
+		return result;
+	}
+
+	void signal(ssize_t count = 1)
+	{
+		assert(count >= 0);
+		ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
+		ssize_t toRelease = -oldCount < count ? -oldCount : count;
+		if (toRelease > 0)
+		{
+			m_sema.signal((int)toRelease);
+		}
+	}
+	
+	std::size_t availableApprox() const
+	{
+		ssize_t count = m_count.load(std::memory_order_relaxed);
+		return count > 0 ? static_cast<std::size_t>(count) : 0;
+	}
+};
+
+}   // end namespace moodycamel
diff --git a/tools/python_api/src_cpp/py_database.cpp b/tools/python_api/src_cpp/py_database.cpp
index 8b67c9eb95..b93fa7fbc8 100644
--- a/tools/python_api/src_cpp/py_database.cpp
+++ b/tools/python_api/src_cpp/py_database.cpp
@@ -12,10 +12,7 @@ void PyDatabase::initialize(py::handle& m) {
 PyDatabase::PyDatabase(const std::string& databasePath, uint64_t bufferPoolSize) {
     auto systemConfig = SystemConfig();
     if (bufferPoolSize > 0) {
-        systemConfig.defaultPageBufferPoolSize =
-            bufferPoolSize * StorageConstants::DEFAULT_PAGES_BUFFER_RATIO;
-        systemConfig.largePageBufferPoolSize =
-            bufferPoolSize * StorageConstants::LARGE_PAGES_BUFFER_RATIO;
+        systemConfig.bufferPoolSize = bufferPoolSize;
     }
     database = std::make_unique<Database>(databasePath, systemConfig);
 }