Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate FileHandle and BufferManagedFileHandle #1365

Merged
merged 1 commit into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions src/include/common/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,14 @@ namespace common {
constexpr uint64_t DEFAULT_VECTOR_CAPACITY_LOG_2 = 11;
constexpr uint64_t DEFAULT_VECTOR_CAPACITY = (uint64_t)1 << DEFAULT_VECTOR_CAPACITY_LOG_2;

constexpr const double DEFAULT_HT_LOAD_FACTOR = 1.5;
constexpr const uint32_t VAR_LENGTH_EXTEND_MAX_DEPTH = 30;
constexpr double DEFAULT_HT_LOAD_FACTOR = 1.5;
constexpr uint32_t VAR_LENGTH_EXTEND_MAX_DEPTH = 30;

// This is the default thread sleep time we use when a thread,
// e.g., a worker thread is in TaskScheduler, needs to block.
constexpr const uint64_t THREAD_SLEEP_TIME_WHEN_WAITING_IN_MICROS = 500;
constexpr uint64_t THREAD_SLEEP_TIME_WHEN_WAITING_IN_MICROS = 500;

// The number of pages for which we maintain a lock and a page version array for. Multi version file
// design is meant to not contain any page version arrays if a group of pages do not contain
// any updates to reduce our memory footprint.
constexpr const uint64_t MULTI_VERSION_FILE_PAGE_GROUP_SIZE = 64;
constexpr const uint64_t DEFAULT_CHECKPOINT_WAIT_TIMEOUT_FOR_TRANSACTIONS_TO_LEAVE_IN_MICROS =
5000000;
constexpr uint64_t DEFAULT_CHECKPOINT_WAIT_TIMEOUT_FOR_TRANSACTIONS_TO_LEAVE_IN_MICROS = 5000000;

const std::string INTERNAL_ID_SUFFIX = "_id";

Expand All @@ -38,7 +33,7 @@ struct BufferPoolConstants {
static constexpr uint64_t DEFAULT_PAGE_SIZE = 1 << DEFAULT_PAGE_SIZE_LOG_2;
// Page size for files with large pages, e.g., temporary files that are used by operators that
// may require large amounts of memory.
static constexpr const uint64_t LARGE_PAGE_SIZE_LOG_2 = 18;
static constexpr uint64_t LARGE_PAGE_SIZE_LOG_2 = 18;
static constexpr uint64_t LARGE_PAGE_SIZE = 1 << LARGE_PAGE_SIZE_LOG_2;
};

Expand All @@ -64,7 +59,10 @@ struct StorageConstants {
static constexpr char RELS_METADATA_FILE_NAME_FOR_WAL[] = "rels.statistics.wal";
static constexpr char CATALOG_FILE_NAME[] = "catalog.bin";
static constexpr char CATALOG_FILE_NAME_FOR_WAL[] = "catalog.bin.wal";
constexpr static double ARRAY_RESIZING_FACTOR = 1.2;

// The number of pages that we add at one time when we need to grow a file.
static constexpr uint64_t PAGE_GROUP_SIZE_LOG2 = 10;
static constexpr uint64_t PAGE_GROUP_SIZE = (uint64_t)1 << PAGE_GROUP_SIZE_LOG2;
};

struct ListsMetadataConstants {
Expand Down
2 changes: 1 addition & 1 deletion src/include/common/in_mem_overflow_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

#include <vector>

#include "storage/buffer_manager/file_handle.h"
#include "storage/buffer_manager/memory_manager.h"
#include "storage/file_handle.h"

namespace kuzu {
namespace common {
Expand Down
13 changes: 6 additions & 7 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@
namespace kuzu {
namespace common {

typedef uint16_t sel_t;
typedef uint64_t hash_t;
typedef uint32_t page_idx_t;
typedef uint32_t page_offset_t;
using sel_t = uint16_t;
using hash_t = uint64_t;
using page_idx_t = uint32_t;
using page_offset_t = uint32_t;
constexpr page_idx_t PAGE_IDX_MAX = UINT32_MAX;
typedef uint32_t list_header_t;

typedef uint32_t property_id_t;
using list_header_t = uint32_t;
using property_id_t = uint32_t;
constexpr property_id_t INVALID_PROPERTY_ID = UINT32_MAX;

// System representation for a variable-sized overflow value.
Expand Down
73 changes: 73 additions & 0 deletions src/include/storage/buffer_manager/buffer_managed_file_handle.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#pragma once

#include "storage/file_handle.h"

namespace kuzu {
namespace storage {

// BufferManagedFileHandle is a file handle that is backed by BufferManager. It holds the state of
// each in the file. File Handle is the bridge between a Column/Lists/Index and the Buffer Manager
// that abstracts the file in which that Column/Lists/Index is stored.
// BufferManagedFileHandle supports two types of files: versioned and non-versioned. Versioned files
// contains mapping from pages that have updates to the versioned pages in the wal file.
// Currently, only MemoryManager and WAL files are non-versioned.
class BufferManagedFileHandle : public FileHandle {
public:
enum class FileVersionedType : uint8_t {
VERSIONED_FILE = 0, // The file is backed by versioned pages in wal file.
NON_VERSIONED_FILE = 1 // The file does not have any versioned pages in wal file.
};

BufferManagedFileHandle(
const std::string& path, uint8_t flags, FileVersionedType fileVersionedType);

bool acquirePageLock(common::page_idx_t pageIdx, bool block);
inline void releasePageLock(common::page_idx_t pageIdx) { pageLocks[pageIdx]->clear(); }

void createPageVersionGroupIfNecessary(common::page_idx_t pageIdx);

// This function is intended to be used after a fileInfo is created and we want the file
// to have not pages and page locks. Should be called after ensuring that the buffer manager
// does not hold any of the pages of the file.
void resetToZeroPagesAndPageCapacity();
void removePageIdxAndTruncateIfNecessary(common::page_idx_t pageIdx);

bool hasWALPageVersionNoPageLock(common::page_idx_t pageIdx);
void clearWALPageVersionIfNecessary(common::page_idx_t pageIdx);
common::page_idx_t getWALPageVersionNoPageLock(common::page_idx_t pageIdx);
void setWALPageVersion(common::page_idx_t originalPageIdx, common::page_idx_t pageIdxInWAL);
void setWALPageVersionNoLock(common::page_idx_t pageIdx, common::page_idx_t pageVersion);

inline common::page_idx_t getFrameIdx(common::page_idx_t pageIdx) {
return pageIdxToFrameMap[pageIdx]->load();
}
inline void swizzle(common::page_idx_t pageIdx, common::page_idx_t swizzledVal) {
pageIdxToFrameMap[pageIdx]->store(swizzledVal);
}
inline void unswizzle(common::page_idx_t pageIdx) {
pageIdxToFrameMap[pageIdx]->store(UINT32_MAX);
}

static inline bool isAFrame(common::page_idx_t mappedFrameIdx) {
return UINT32_MAX != mappedFrameIdx;
}

private:
void initPageIdxToFrameMapAndLocks();
common::page_idx_t addNewPageWithoutLock() override;
bool acquire(common::page_idx_t pageIdx);
void removePageIdxAndTruncateIfNecessaryWithoutLock(common::page_idx_t pageIdxToRemove);
void resizePageGroupLocksAndPageVersionsWithoutLock();
uint32_t getNumPageGroups() {
return ceil((double)numPages / common::StorageConstants::PAGE_GROUP_SIZE);
}

private:
FileVersionedType fileVersionedType;
std::vector<std::unique_ptr<std::atomic_flag>> pageLocks;
std::vector<std::unique_ptr<std::atomic<common::page_idx_t>>> pageIdxToFrameMap;
std::vector<std::vector<common::page_idx_t>> pageVersions;
std::vector<std::unique_ptr<std::atomic_flag>> pageGroupLocks;
};
} // namespace storage
} // namespace kuzu
33 changes: 22 additions & 11 deletions src/include/storage/buffer_manager/buffer_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <vector>

#include "common/metric.h"
#include "storage/buffer_manager/buffer_managed_file_handle.h"
#include "storage/buffer_manager/buffer_pool.h"
#include "storage/buffer_manager/file_handle.h"

namespace spdlog {
class logger;
Expand Down Expand Up @@ -58,39 +58,50 @@ class BufferManager {
BufferManager(uint64_t maxSizeForDefaultPagePool, uint64_t maxSizeForLargePagePool);
~BufferManager();

uint8_t* pin(FileHandle& fileHandle, common::page_idx_t pageIdx);
uint8_t* pin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

// The caller should ensure that the given pageIdx is indeed a new page, so should not be read
// from disk
uint8_t* pinWithoutReadingFromFile(FileHandle& fileHandle, common::page_idx_t pageIdx);
uint8_t* pinWithoutReadingFromFile(
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

inline uint8_t* pinWithoutAcquiringPageLock(
FileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile) {
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile) {
return fileHandle.isLargePaged() ? bufferPoolLargePages->pinWithoutAcquiringPageLock(
fileHandle, pageIdx, doNotReadFromFile) :
bufferPoolDefaultPages->pinWithoutAcquiringPageLock(
fileHandle, pageIdx, doNotReadFromFile);
}

void setPinnedPageDirty(FileHandle& fileHandle, common::page_idx_t pageIdx);
void setPinnedPageDirty(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

// The function assumes that the requested page is already pinned.
void unpin(FileHandle& fileHandle, common::page_idx_t pageIdx);
inline void unpinWithoutAcquiringPageLock(FileHandle& fileHandle, common::page_idx_t pageIdx) {
void unpin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);
inline void unpinWithoutAcquiringPageLock(
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx) {
return fileHandle.isLargePaged() ?
bufferPoolLargePages->unpinWithoutAcquiringPageLock(fileHandle, pageIdx) :
bufferPoolDefaultPages->unpinWithoutAcquiringPageLock(fileHandle, pageIdx);
}

void resize(uint64_t newSizeForDefaultPagePool, uint64_t newSizeForLargePagePool);

void removeFilePagesFromFrames(FileHandle& fileHandle);
void removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle);

void flushAllDirtyPagesInFrames(FileHandle& fileHandle);
void flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle);
void updateFrameIfPageIsInFrameWithoutPageOrFrameLock(
FileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx);
BufferManagedFileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx);

void removePageFromFrameIfNecessary(FileHandle& fileHandle, common::page_idx_t pageIdx);
void removePageFromFrameIfNecessary(
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

// Note: This function is not thread-safe.
// For files that are managed by BM, their FileHandles should be created through this function.
inline std::unique_ptr<BufferManagedFileHandle> getBufferManagedFileHandle(
const std::string& filePath, uint8_t flags,
BufferManagedFileHandle::FileVersionedType fileVersionedType) {
return std::make_unique<BufferManagedFileHandle>(filePath, flags, fileVersionedType);
}

private:
std::shared_ptr<spdlog::logger> logger;
Expand Down
44 changes: 24 additions & 20 deletions src/include/storage/buffer_manager/buffer_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <vector>

#include "common/metric.h"
#include "storage/buffer_manager/file_handle.h"
#include "storage/buffer_manager/buffer_managed_file_handle.h"

namespace spdlog {
class logger;
Expand Down Expand Up @@ -70,61 +70,65 @@ class BufferPool {
public:
BufferPool(uint64_t pageSize, uint64_t maxSize);

uint8_t* pin(FileHandle& fileHandle, common::page_idx_t pageIdx);
uint8_t* pin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

// Pins a new page that has been added to the file. This means that the BufferManager does not
// need to read the page from the file for now. Ensuring that the given pageIdx is new is the
// responsibility of the caller.
uint8_t* pinWithoutReadingFromFile(FileHandle& fileHandle, common::page_idx_t pageIdx);
uint8_t* pinWithoutReadingFromFile(
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

uint8_t* pinWithoutAcquiringPageLock(
FileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);

void setPinnedPageDirty(FileHandle& fileHandle, common::page_idx_t pageIdx);
void setPinnedPageDirty(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

// The function assumes that the requested page is already pinned.
void unpin(FileHandle& fileHandle, common::page_idx_t pageIdx);
void unpin(BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

void unpinWithoutAcquiringPageLock(FileHandle& fileHandle, common::page_idx_t pageIdx);
void unpinWithoutAcquiringPageLock(
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

void resize(uint64_t newSize);

// Note: These two functions that remove pages from frames is not designed for concurrency and
// therefore not tested under concurrency. If this is called while other threads are accessing
// the BM, it should work safely but this is not tested.
void removeFilePagesFromFrames(FileHandle& fileHandle);
void removeFilePagesFromFrames(BufferManagedFileHandle& fileHandle);

void flushAllDirtyPagesInFrames(FileHandle& fileHandle);
void flushAllDirtyPagesInFrames(BufferManagedFileHandle& fileHandle);
void updateFrameIfPageIsInFrameWithoutPageOrFrameLock(
FileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx);
BufferManagedFileHandle& fileHandle, uint8_t* newPage, common::page_idx_t pageIdx);

void removePageFromFrameWithoutFlushingIfNecessary(
FileHandle& fileHandle, common::page_idx_t pageIdx);
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx);

private:
uint8_t* pin(FileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);
uint8_t* pin(
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);

common::page_idx_t claimAFrame(
FileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);

bool fillEmptyFrame(common::page_idx_t frameIdx, FileHandle& fileHandle,
bool fillEmptyFrame(common::page_idx_t frameIdx, BufferManagedFileHandle& fileHandle,
common::page_idx_t pageIdx, bool doNotReadFromFile);

bool tryEvict(common::page_idx_t frameIdx, FileHandle& fileHandle, common::page_idx_t pageIdx,
bool doNotReadFromFile);
bool tryEvict(common::page_idx_t frameIdx, BufferManagedFileHandle& fileHandle,
common::page_idx_t pageIdx, bool doNotReadFromFile);

void moveClockHand(uint64_t newClockHand);
// Performs 2 actions:
// 1) Clears the contents of the frame.
// 2) Unswizzles the pageIdx in the frame.
void clearFrameAndUnswizzleWithoutLock(const std::unique_ptr<Frame>& frame,
FileHandle& fileHandleInFrame, common::page_idx_t pageIdxInFrame);
void readNewPageIntoFrame(
Frame& frame, FileHandle& fileHandle, common::page_idx_t pageIdx, bool doNotReadFromFile);
BufferManagedFileHandle& fileHandleInFrame, common::page_idx_t pageIdxInFrame);
void readNewPageIntoFrame(Frame& frame, BufferManagedFileHandle& fileHandle,
common::page_idx_t pageIdx, bool doNotReadFromFile);

void flushIfDirty(const std::unique_ptr<Frame>& frame);

void removePageFromFrame(FileHandle& fileHandle, common::page_idx_t pageIdx, bool shouldFlush);
void removePageFromFrame(
BufferManagedFileHandle& fileHandle, common::page_idx_t pageIdx, bool shouldFlush);

private:
std::shared_ptr<spdlog::logger> logger;
Expand Down
Loading