Skip to content

Commit

Permalink
Merge pull request #1540 from kuzudb/column-rework
Browse files Browse the repository at this point in the history
Rework column write function
  • Loading branch information
ray6080 committed May 15, 2023
2 parents 9d7a4d7 + efbc9a6 commit ab47844
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 202 deletions.
58 changes: 21 additions & 37 deletions src/include/storage/storage_structure/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ using lookup_data_func_t = std::function<void(transaction::Transaction* transact
PageElementCursor& pageCursor, common::ValueVector* resultVector, uint32_t posInVector,
uint32_t numElementsPerPage, common::table_id_t commonTableID,
DiskOverflowFile* diskOverflowFile)>;
using write_data_func_t =
std::function<void(uint8_t* frame, uint16_t posInFrame, common::ValueVector* vector,
common::table_id_t commonTableID, DiskOverflowFile* diskOverflowFile)>;

class Column : public BaseColumnOrList {
public:
Expand All @@ -42,6 +45,7 @@ class Column : public BaseColumnOrList {
tableID{tableID} {
scanDataFunc = Column::scanValuesFromPage;
lookupDataFunc = Column::lookupValueFromPage;
writeDataFunc = Column::writeValueToPage;
}

// Expose for feature store
Expand All @@ -50,7 +54,7 @@ class Column : public BaseColumnOrList {
virtual void read(transaction::Transaction* transaction, common::ValueVector* nodeIDVector,
common::ValueVector* resultVector);

void writeValues(common::ValueVector* nodeIDVector, common::ValueVector* vectorToWriteFrom);
void write(common::ValueVector* nodeIDVector, common::ValueVector* vectorToWriteFrom);

bool isNull(common::offset_t nodeOffset, transaction::Transaction* transaction);
void setNull(common::offset_t nodeOffset);
Expand All @@ -67,32 +71,13 @@ class Column : public BaseColumnOrList {
common::ValueVector* resultVector, uint32_t vectorPos);
virtual void scan(transaction::Transaction* transaction, common::ValueVector* nodeIDVector,
common::ValueVector* resultVector);
virtual void writeValueForSingleNodeIDPosition(common::offset_t nodeOffset,
common::ValueVector* vectorToWriteFrom, uint32_t posInVectorToWriteFrom);
WALPageIdxPosInPageAndFrame beginUpdatingPage(common::offset_t nodeOffset,
common::ValueVector* vectorToWriteFrom, uint32_t posInVectorToWriteFrom);
virtual void write(common::offset_t nodeOffset, common::ValueVector* vectorToWriteFrom,
uint32_t posInVectorToWriteFrom);

void readFromPage(transaction::Transaction* transaction, common::page_idx_t pageIdx,
const std::function<void(uint8_t*)>& func);

private:
// The reason why we make this function virtual is: we can't simply do memcpy on nodeIDs if
// the adjColumn has tableIDCompression, in this case we only store the nodeOffset in
// persistent store of adjColumn.
virtual inline void writeToPage(WALPageIdxPosInPageAndFrame& walPageInfo,
common::ValueVector* vectorToWriteFrom, uint32_t posInVectorToWriteFrom) {
memcpy(walPageInfo.frame + mapElementPosToByteOffset(walPageInfo.posInPage),
vectorToWriteFrom->getData() + getElemByteOffset(posInVectorToWriteFrom), elementSize);
}
// If necessary creates a second version (backed by the WAL) of a page that contains the fixed
// length part of the value that will be written to.
// Obtains *and does not release* the lock original page. Pins and updates the WAL version of
// the page. Finally updates the page with the new value from vectorToWriteFrom.
// Note that caller must ensure to unpin and release the WAL version of the page by calling
// StorageStructure::unpinWALPageAndReleaseOriginalPageLock.
WALPageIdxPosInPageAndFrame beginUpdatingPageAndWriteOnlyNullBit(
common::offset_t nodeOffset, bool isNull);

static void scanValuesFromPage(transaction::Transaction* transaction, uint8_t* frame,
PageElementCursor& pageCursor, common::ValueVector* resultVector, uint32_t posInVector,
uint32_t numElementsPerPage, uint32_t numValuesToRead, common::table_id_t commonTableID,
Expand All @@ -101,6 +86,8 @@ class Column : public BaseColumnOrList {
PageElementCursor& pageCursor, common::ValueVector* resultVector, uint32_t posInVector,
uint32_t numElementsPerPage, common::table_id_t commonTableID,
DiskOverflowFile* diskOverflowFile);
static void writeValueToPage(uint8_t* frame, uint16_t posInFrame, common::ValueVector* vector,
uint32_t posInVector, DiskOverflowFile* diskOverflowFile);

protected:
// no logical-physical page mapping is required for columns
Expand All @@ -110,6 +97,7 @@ class Column : public BaseColumnOrList {

scan_data_func_t scanDataFunc;
lookup_data_func_t lookupDataFunc;
write_data_func_t writeDataFunc;
common::table_id_t tableID;
std::unique_ptr<DiskOverflowFile> diskOverflowFile;
};
Expand All @@ -124,7 +112,6 @@ class PropertyColumnWithOverflow : public Column {
}

inline DiskOverflowFile* getDiskOverflowFile() { return diskOverflowFile.get(); }

inline BMFileHandle* getDiskOverflowFileHandle() { return diskOverflowFile->getFileHandle(); }
};

Expand All @@ -133,10 +120,9 @@ class StringPropertyColumn : public PropertyColumnWithOverflow {
StringPropertyColumn(const StorageStructureIDAndFName& structureIDAndFNameOfMainColumn,
const common::DataType& dataType, BufferManager* bufferManager, WAL* wal)
: PropertyColumnWithOverflow{
structureIDAndFNameOfMainColumn, dataType, bufferManager, wal} {};

void writeValueForSingleNodeIDPosition(common::offset_t nodeOffset,
common::ValueVector* vectorToWriteFrom, uint32_t posInVectorToWriteFrom) override;
structureIDAndFNameOfMainColumn, dataType, bufferManager, wal} {
writeDataFunc = StringPropertyColumn::writeStringToPage;
};

// Currently, used only in CopyCSV tests.
common::Value readValueForTestingOnly(common::offset_t offset) override;
Expand All @@ -157,6 +143,8 @@ class StringPropertyColumn : public PropertyColumnWithOverflow {
Column::scan(transaction, nodeIDVector, resultVector);
diskOverflowFile->scanStrings(transaction->getType(), *resultVector);
}
static void writeStringToPage(uint8_t* frame, uint16_t posInFrame, common::ValueVector* vector,
uint32_t posInVector, DiskOverflowFile* diskOverflowFile);
};

class ListPropertyColumn : public PropertyColumnWithOverflow {
Expand All @@ -167,11 +155,9 @@ class ListPropertyColumn : public PropertyColumnWithOverflow {
structureIDAndFNameOfMainColumn, dataType, bufferManager, wal} {
scanDataFunc = ListPropertyColumn::scanListsFromPage;
lookupDataFunc = ListPropertyColumn::lookupListFromPage;
writeDataFunc = ListPropertyColumn::writeListToPage;
};

void writeValueForSingleNodeIDPosition(common::offset_t nodeOffset,
common::ValueVector* vectorToWriteFrom, uint32_t posInVectorToWriteFrom) override;

common::Value readValueForTestingOnly(common::offset_t offset) override;

private:
Expand All @@ -183,6 +169,8 @@ class ListPropertyColumn : public PropertyColumnWithOverflow {
PageElementCursor& pageCursor, common::ValueVector* resultVector, uint32_t posInVector,
uint32_t numElementsPerPage, common::table_id_t commonTableID,
DiskOverflowFile* diskOverflowFile);
static void writeListToPage(uint8_t* frame, uint16_t posInFrame, common::ValueVector* vector,
uint32_t posInVector, DiskOverflowFile* diskOverflowFile);
};

class StructPropertyColumn : public Column {
Expand All @@ -205,16 +193,10 @@ class InternalIDColumn : public Column {
sizeof(common::offset_t), bufferManager, wal, tableID} {
scanDataFunc = InternalIDColumn::scanInternalIDsFromPage;
lookupDataFunc = InternalIDColumn::lookupInternalIDFromPage;
writeDataFunc = InternalIDColumn::writeInternalIDToPage;
}

private:
inline void writeToPage(WALPageIdxPosInPageAndFrame& walPageInfo,
common::ValueVector* vectorToWriteFrom, uint32_t posInVectorToWriteFrom) override {
auto relID = vectorToWriteFrom->getValue<common::relID_t>(posInVectorToWriteFrom);
memcpy(walPageInfo.frame + mapElementPosToByteOffset(walPageInfo.posInPage), &relID.offset,
sizeof(relID.offset));
}

static void scanInternalIDsFromPage(transaction::Transaction* transaction, uint8_t* frame,
PageElementCursor& pageCursor, common::ValueVector* resultVector, uint32_t posInVector,
uint32_t numElementsPerPage, uint32_t numValuesToRead, common::table_id_t commonTableID,
Expand All @@ -223,6 +205,8 @@ class InternalIDColumn : public Column {
PageElementCursor& pageCursor, common::ValueVector* resultVector, uint32_t posInVector,
uint32_t numElementsPerPage, common::table_id_t commonTableID,
DiskOverflowFile* diskOverflowFile);
static void writeInternalIDToPage(uint8_t* frame, uint16_t posInFrame,
common::ValueVector* vector, uint32_t posInVector, DiskOverflowFile* diskOverflowFile);
};

class AdjColumn : public InternalIDColumn {
Expand Down
15 changes: 2 additions & 13 deletions src/include/storage/storage_structure/storage_structure.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <functional>
#include <utility>

#include "common/constants.h"
#include "common/vector/value_vector.h"
Expand Down Expand Up @@ -70,7 +71,7 @@ class StorageStructure {
class BaseColumnOrList : public StorageStructure {

public:
BaseColumnOrList(const common::DataType& dataType) : dataType{dataType} {}
explicit BaseColumnOrList(common::DataType dataType) : dataType{std::move(dataType)} {}

// Maps the position of element in page to its byte offset in page.
// TODO(Everyone): we should slowly get rid of this function.
Expand Down Expand Up @@ -105,18 +106,6 @@ class BaseColumnOrList : public StorageStructure {
uint16_t pagePosOfFirstElement, uint64_t numValuesToRead, common::table_id_t commonTableID,
bool hasNoNullGuarantee);

void readInternalIDsBySequentialCopyWithSelState(transaction::Transaction* transaction,
common::ValueVector* vector, PageElementCursor& cursor,
const std::function<common::page_idx_t(common::page_idx_t)>& logicalToPhysicalPageMapper,
common::table_id_t commonTableID);

void readBySequentialCopyWithSelState(transaction::Transaction* transaction,
common::ValueVector* vector, PageElementCursor& cursor,
const std::function<common::page_idx_t(common::page_idx_t)>& logicalToPhysicalPageMapper);

void readSingleNullBit(common::ValueVector* valueVector, const uint8_t* frame,
uint64_t elementPos, uint64_t offsetInVector) const;

void setNullBitOfAPosInFrame(const uint8_t* frame, uint16_t elementPos, bool isNull) const;

void readNullBitsFromAPage(common::ValueVector* valueVector, const uint8_t* frame,
Expand Down
2 changes: 1 addition & 1 deletion src/processor/operator/update/set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ bool SetNodeProperty::getNextTuplesInternal(ExecutionContext* context) {
for (auto i = 0u; i < infos.size(); ++i) {
auto info = infos[i].get();
info->evaluator->evaluate();
info->column->writeValues(nodeIDVectors[i], info->evaluator->resultVector.get());
info->column->write(nodeIDVectors[i], info->evaluator->resultVector.get());
}
return true;
}
Expand Down
Loading

0 comments on commit ab47844

Please sign in to comment.