Skip to content

Commit

Permalink
remove currIdx from datachunk state
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Aug 27, 2023
1 parent d133941 commit 6292d5d
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/common/data_chunk/data_chunk_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace common {
std::shared_ptr<DataChunkState> DataChunkState::getSingleValueDataChunkState() {
auto state = std::make_shared<DataChunkState>(1);
state->initOriginalAndSelectedSize(1);
state->flat = true;
state->setToFlat();
return state;
}

Expand Down
12 changes: 5 additions & 7 deletions src/include/common/data_chunk/data_chunk_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ enum class FactorizationStateType : uint8_t {
class DataChunkState {
public:
DataChunkState() : DataChunkState(DEFAULT_VECTOR_CAPACITY) {}
explicit DataChunkState(uint64_t capacity) : flat{false}, originalSize{0} {
explicit DataChunkState(uint64_t capacity) : factorizationState{FactorizationStateType::UNFLAT}, originalSize{0} {
selVector = std::make_shared<SelectionVector>(capacity);
}

Expand All @@ -33,19 +33,17 @@ class DataChunkState {
inline uint64_t getOriginalSize() {
return originalSize;
}
inline bool isFlat() const { return flat; }
inline void setToFlat() { flat = true; }
inline void setToUnflat() { flat = false; }
inline bool isFlat() const { return factorizationState == FactorizationStateType::FLAT; }
inline void setToFlat() { factorizationState = FactorizationStateType::FLAT; }
inline void setToUnflat() { factorizationState = FactorizationStateType::UNFLAT; }

inline uint64_t getNumSelectedValues() const { return selVector->selectedSize; }

public:
std::shared_ptr<SelectionVector> selVector;

private:
// The currIdx is == 0 when vectors are flattened and -1 if the vectors are unflat.
// uint64_t currentIdx = UINT64_MAX;
bool flat = false;
FactorizationStateType factorizationState = FactorizationStateType::UNFLAT;
// We need to keep track of originalSize of DataChunks to perform consistent scans of vectors
// or lists. This is because all the vectors in a data chunk has to be the same length as they
// share the same selectedPositions array.Therefore, if there is a scan after a filter on the
Expand Down
10 changes: 5 additions & 5 deletions src/include/processor/operator/flatten.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace processor {

class Flatten : public PhysicalOperator, SelVectorOverWriter {
public:
Flatten(uint32_t dataChunkToFlattenPos, std::unique_ptr<PhysicalOperator> child, uint32_t id,
Flatten(data_chunk_pos_t dataChunkToFlattenPos, std::unique_ptr<PhysicalOperator> child, uint32_t id,
const std::string& paramsString)
: PhysicalOperator{PhysicalOperatorType::FLATTEN, std::move(child), id, paramsString},
dataChunkToFlattenPos{dataChunkToFlattenPos} {}
Expand All @@ -25,10 +25,10 @@ class Flatten : public PhysicalOperator, SelVectorOverWriter {
void resetToCurrentSelVector(std::shared_ptr<common::SelectionVector>& selVector) override;

private:
uint32_t dataChunkToFlattenPos;
std::shared_ptr<common::DataChunk> dataChunkToFlatten;
uint64_t idx = 0;
uint64_t size = 0;
data_chunk_pos_t dataChunkToFlattenPos;
common::DataChunkState* dataChunkState;
uint64_t currentIdx = 0;
uint64_t sizeToFlatten = 0;
};

} // namespace processor
Expand Down
1 change: 0 additions & 1 deletion src/processor/operator/aggregate/aggregate_hash_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,6 @@ void AggregateHashTable::initializeHashTable(uint64_t numEntriesToAllocate) {

void AggregateHashTable::initializeTmpVectors() {
hashState = std::make_shared<DataChunkState>();
// hashState->currIdx = 0;
hashState->setToFlat();
hashVector = std::make_unique<ValueVector>(LogicalTypeID::INT64, &memoryManager);
hashVector->state = hashState;
Expand Down
1 change: 0 additions & 1 deletion src/processor/operator/copy_from/read_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ bool ReadFile::getNextTuplesInternal(ExecutionContext* context) {
ArrowColumnVector::setArrowColumn(
resultSet->getValueVector(dataColumnPoses[i]).get(), recordTable->column((int)i));
}
// assert(!resultSet->dataChunks[0]->state->isFlat());
resultSet->dataChunks[0]->state->setToUnflat();
return true;
}
Expand Down
18 changes: 9 additions & 9 deletions src/processor/operator/flatten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,23 @@ namespace kuzu {
namespace processor {

void Flatten::initLocalStateInternal(ResultSet* resultSet, ExecutionContext* context) {
dataChunkToFlatten = resultSet->dataChunks[dataChunkToFlattenPos];
dataChunkState = resultSet->dataChunks[dataChunkToFlattenPos]->state.get();
currentSelVector->resetSelectorToValuePosBufferWithSize(1 /* size */);
}

bool Flatten::getNextTuplesInternal(ExecutionContext* context) {
if (idx == size) {
dataChunkToFlatten->state->setToUnflat();
restoreSelVector(dataChunkToFlatten->state->selVector);
if (currentIdx == sizeToFlatten) {
dataChunkState->setToUnflat(); // TODO(Xiyang): this should be part of restore/save
restoreSelVector(dataChunkState->selVector);
if (!children[0]->getNextTuple(context)) {
return false;
}
idx = 0;
size = dataChunkToFlatten->state->selVector->selectedSize;
saveSelVector(dataChunkToFlatten->state->selVector);
dataChunkToFlatten->state->setToFlat();
currentIdx = 0;
sizeToFlatten = dataChunkState->selVector->selectedSize;
saveSelVector(dataChunkState->selVector);
dataChunkState->setToFlat();
}
currentSelVector->selectedPositions[0] = prevSelVector->selectedPositions[idx++];
currentSelVector->selectedPositions[0] = prevSelVector->selectedPositions[currentIdx++];
metrics->numOutputTuple.incrementByOne();
return true;
}
Expand Down
4 changes: 3 additions & 1 deletion src/processor/operator/hash_join/hash_join_probe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ uint64_t HashJoinProbe::getLeftJoinResult() {
for (auto& vector : vectorsToReadInto) {
vector->setAsSingleNullEntry();
}
// The following for loop
// TODO(Xiyang): We have a bug in LEFT JOIN which should not discard NULL keys. To be more
// clear, NULL keys should only be discarded for probe but should not reflect on the vector.
// The following for loop is a temporary hack.
for (auto& vector : keyVectors) {
assert(vector->state->isFlat());
vector->state->selVector->selectedSize = 1;
Expand Down
1 change: 0 additions & 1 deletion src/processor/result/result_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ uint64_t ResultSet::getNumTuplesWithoutMultiplicity(
uint64_t numTuples = 1;
for (auto& dataChunkPos : dataChunksPosInScope) {
auto state = dataChunks[dataChunkPos]->state;
assert(state->getNumSelectedValues() != 0);
numTuples *= state->getNumSelectedValues();
}
return numTuples;
Expand Down
1 change: 0 additions & 1 deletion src/storage/wal/wal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ void WAL::logAddPropertyRecord(table_id_t tableID, property_id_t propertyID) {
}

void WAL::clearWAL() {
// bufferManager.clearEvictionQueue();
bufferManager.removeFilePagesFromFrames(*fileHandle);
fileHandle->resetToZeroPagesAndPageCapacity();
initCurrentPage();
Expand Down
1 change: 0 additions & 1 deletion test/test_files/tinysnb/match/one_hop.test
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

-LOG OneHopKnowsTest
-STATEMENT MATCH (a:person)-[e:knows]->(b:person) RETURN COUNT(*)
-PARALLELISM 1
-ENUMERATE
---- 1
14
Expand Down

0 comments on commit 6292d5d

Please sign in to comment.