diff --git a/CMakeLists.txt b/CMakeLists.txt index 793cdc1ce2..8c3c1a25a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.11) -project(Kuzu VERSION 0.0.6.4 LANGUAGES CXX) +project(Kuzu VERSION 0.0.6.5 LANGUAGES CXX) find_package(Threads REQUIRED) diff --git a/src/common/vector/auxiliary_buffer.cpp b/src/common/vector/auxiliary_buffer.cpp index 963e87ca6f..c01ce298e4 100644 --- a/src/common/vector/auxiliary_buffer.cpp +++ b/src/common/vector/auxiliary_buffer.cpp @@ -24,7 +24,7 @@ list_entry_t ListAuxiliaryBuffer::addList(uint64_t listSize) { auto listEntry = list_entry_t{size, listSize}; bool needResizeDataVector = size + listSize > capacity; while (size + listSize > capacity) { - capacity *= 2; + capacity *= VAR_LIST_RESIZE_RATIO; } if (needResizeDataVector) { resizeDataVector(dataVector.get()); diff --git a/src/include/common/constants.h b/src/include/common/constants.h index 5c76037fcb..1704d3b8b0 100644 --- a/src/include/common/constants.h +++ b/src/include/common/constants.h @@ -21,6 +21,8 @@ constexpr uint64_t THREAD_SLEEP_TIME_WHEN_WAITING_IN_MICROS = 500; constexpr uint64_t DEFAULT_CHECKPOINT_WAIT_TIMEOUT_FOR_TRANSACTIONS_TO_LEAVE_IN_MICROS = 5000000; +constexpr uint64_t VAR_LIST_RESIZE_RATIO = 2; + struct InternalKeyword { static constexpr char ANONYMOUS[] = ""; static constexpr char ID[] = "_ID"; diff --git a/src/include/storage/copier/string_column_chunk.h b/src/include/storage/copier/string_column_chunk.h index 92b7598c95..535b967317 100644 --- a/src/include/storage/copier/string_column_chunk.h +++ b/src/include/storage/copier/string_column_chunk.h @@ -15,7 +15,6 @@ class StringColumnChunk : public ColumnChunk { arrow::Array* array, common::offset_t startPosInChunk, uint32_t numValuesToAppend) final; void append(ColumnChunk* other, common::offset_t startPosInOtherChunk, common::offset_t startPosInChunk, uint32_t numValuesToAppend) final; - common::page_idx_t flushBuffer(BMFileHandle* dataFH, common::page_idx_t startPageIdx) final; template void setValueFromString(const char* value, uint64_t length, uint64_t pos) { @@ -26,13 +25,13 @@ class StringColumnChunk : public ColumnChunk { throw common::NotImplementedException("VarSizedColumnChunk::getValue"); } -protected: - inline common::page_idx_t getNumPagesForBuffer() const final { - auto numPagesForOffsets = ColumnChunk::getNumPagesForBuffer(); - return numPagesForOffsets + overflowFile->getNumPages(); - } + common::page_idx_t flushOverflowBuffer(BMFileHandle* dataFH, common::page_idx_t startPageIdx); private: + inline common::page_idx_t getNumPages() const final { + return ColumnChunk::getNumPages() + overflowFile->getNumPages(); + } + template void templateCopyVarSizedValuesFromString( arrow::Array* array, common::offset_t startPosInChunk, uint32_t numValuesToAppend); diff --git a/src/include/storage/copier/var_list_column_chunk.h b/src/include/storage/copier/var_list_column_chunk.h index 08b2ce91c6..b856d218fe 100644 --- a/src/include/storage/copier/var_list_column_chunk.h +++ b/src/include/storage/copier/var_list_column_chunk.h @@ -1,5 +1,6 @@ #pragma once +#include "arrow/array/array_nested.h" #include "storage/copier/column_chunk.h" using namespace kuzu::common; @@ -12,11 +13,13 @@ struct VarListDataColumnChunk { uint64_t numValuesInDataChunk; uint64_t capacityInDataChunk; - VarListDataColumnChunk(std::unique_ptr dataChunk) + explicit VarListDataColumnChunk(std::unique_ptr dataChunk) : dataChunk{std::move(dataChunk)}, numValuesInDataChunk{0}, capacityInDataChunk{StorageConstants::NODE_GROUP_SIZE} {} void reset(); + + void resize(uint64_t numValues); }; class VarListColumnChunk : public ColumnChunk { @@ -38,18 +41,42 @@ class VarListColumnChunk : public ColumnChunk { void append(arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend) override; + void append(ColumnChunk* other, common::offset_t startPosInOtherChunk, + common::offset_t startPosInChunk, uint32_t numValuesToAppend) final; + void copyVarListFromArrowString( arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend); + template void copyVarListFromArrowList( - arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend); + arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend) { + auto listArray = (T*)array; + auto dataChunkOffsetToAppend = varListDataColumnChunk.numValuesInDataChunk; + for (auto i = 0u; i < numValuesToAppend; i++) { + nullChunk->setNull(i + startPosInChunk, listArray->IsNull(i)); + auto length = listArray->value_length(i); + varListDataColumnChunk.numValuesInDataChunk += length; + setValue(varListDataColumnChunk.numValuesInDataChunk, i + startPosInChunk); + } + auto startOffset = listArray->value_offset(startPosInChunk); + auto endOffset = listArray->value_offset(startPosInChunk + numValuesToAppend); + varListDataColumnChunk.resize(varListDataColumnChunk.numValuesInDataChunk); + varListDataColumnChunk.dataChunk->append( + listArray->values().get(), dataChunkOffsetToAppend, endOffset - startOffset); + } void write(const common::Value& listVal, uint64_t posToWrite) override; - void resizeDataChunk(uint64_t numValues); - private: VarListDataColumnChunk varListDataColumnChunk; + + inline uint64_t getListLen(common::offset_t offset) const { + return getListOffset(offset + 1) - getListOffset(offset); + } + + inline offset_t getListOffset(common::offset_t offset) const { + return offset == 0 ? 0 : getValue(offset - 1); + } }; } // namespace storage diff --git a/src/include/storage/storage_info.h b/src/include/storage/storage_info.h index f73bc7e155..bf508ee535 100644 --- a/src/include/storage/storage_info.h +++ b/src/include/storage/storage_info.h @@ -12,9 +12,9 @@ using storage_version_t = uint64_t; struct StorageVersionInfo { static std::unordered_map getStorageVersionInfo() { - return {{"0.0.6.4", 13}, {"0.0.6.3", 12}, {"0.0.6.2", 11}, {"0.0.6.1", 10}, {"0.0.6", 9}, - {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3.5", 6}, {"0.0.3.4", 5}, {"0.0.3.3", 4}, - {"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}}; + return {{"0.0.6.5", 14}, {"0.0.6.4", 13}, {"0.0.6.3", 12}, {"0.0.6.2", 11}, {"0.0.6.1", 10}, + {"0.0.6", 9}, {"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3.5", 6}, {"0.0.3.4", 5}, + {"0.0.3.3", 4}, {"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}}; } static storage_version_t getStorageVersion(); diff --git a/src/include/storage/store/string_node_column.h b/src/include/storage/store/string_node_column.h index 4c99348ddd..b9e5cf7114 100644 --- a/src/include/storage/store/string_node_column.h +++ b/src/include/storage/store/string_node_column.h @@ -19,6 +19,12 @@ class StringNodeColumn : public NodeColumn { common::offset_t startOffsetInGroup, common::offset_t endOffsetInGroup, common::ValueVector* resultVector, uint64_t offsetInVector = 0) final; + common::page_idx_t append(ColumnChunk* columnChunk, common::page_idx_t startPageIdx, + common::node_group_idx_t nodeGroupIdx) final; + + void checkpointInMemory() final; + void rollbackInMemory() final; + protected: void scanInternal(transaction::Transaction* transaction, common::ValueVector* nodeIDVector, common::ValueVector* resultVector) final; @@ -27,10 +33,10 @@ class StringNodeColumn : public NodeColumn { private: void readStringValueFromOvf(transaction::Transaction* transaction, common::ku_string_t& kuStr, - common::ValueVector* resultVector, common::page_idx_t chunkStartPageIdx); + common::ValueVector* resultVector, common::node_group_idx_t nodeGroupIdx); private: - common::page_idx_t ovfPageIdxInChunk; + std::unique_ptr> overflowMetadataDA; }; } // namespace storage diff --git a/src/storage/copier/string_column_chunk.cpp b/src/storage/copier/string_column_chunk.cpp index dc1fcf1f90..c2498f2383 100644 --- a/src/storage/copier/string_column_chunk.cpp +++ b/src/storage/copier/string_column_chunk.cpp @@ -51,7 +51,7 @@ void StringColumnChunk::resetToEmpty() { void StringColumnChunk::append( arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend) { - assert(array->type_id() == arrow::Type::STRING || array->type_id() == arrow::Type::LIST); + assert(array->type_id() == arrow::Type::STRING); switch (array->type_id()) { case arrow::Type::STRING: { switch (dataType.getLogicalTypeID()) { @@ -76,7 +76,7 @@ void StringColumnChunk::append( void StringColumnChunk::append(ColumnChunk* other, offset_t startPosInOtherChunk, offset_t startPosInChunk, uint32_t numValuesToAppend) { - auto otherChunk = dynamic_cast(other); + auto otherChunk = reinterpret_cast(other); nullChunk->append( otherChunk->getNullChunk(), startPosInOtherChunk, startPosInChunk, numValuesToAppend); switch (dataType.getLogicalTypeID()) { @@ -91,21 +91,19 @@ void StringColumnChunk::append(ColumnChunk* other, offset_t startPosInOtherChunk } } -page_idx_t StringColumnChunk::flushBuffer(BMFileHandle* dataFH, page_idx_t startPageIdx) { - ColumnChunk::flushBuffer(dataFH, startPageIdx); - startPageIdx += ColumnChunk::getNumPagesForBuffer(); +page_idx_t StringColumnChunk::flushOverflowBuffer(BMFileHandle* dataFH, page_idx_t startPageIdx) { for (auto i = 0u; i < overflowFile->getNumPages(); i++) { FileUtils::writeToFile(dataFH->getFileInfo(), overflowFile->getPage(i)->data, BufferPoolConstants::PAGE_4KB_SIZE, startPageIdx * BufferPoolConstants::PAGE_4KB_SIZE); startPageIdx++; } - return getNumPagesForBuffer(); + return overflowFile->getNumPages(); } void StringColumnChunk::appendStringColumnChunk(StringColumnChunk* other, offset_t startPosInOtherChunk, offset_t startPosInChunk, uint32_t numValuesToAppend) { - auto otherKuVals = (ku_string_t*)(other->buffer.get()); - auto kuVals = (ku_string_t*)(buffer.get()); + auto otherKuVals = reinterpret_cast(other->buffer.get()); + auto kuVals = reinterpret_cast(buffer.get()); for (auto i = 0u; i < numValuesToAppend; i++) { auto posInChunk = i + startPosInChunk; auto posInOtherChunk = i + startPosInOtherChunk; diff --git a/src/storage/copier/var_list_column_chunk.cpp b/src/storage/copier/var_list_column_chunk.cpp index 4ddd0cdbf8..87c31071c6 100644 --- a/src/storage/copier/var_list_column_chunk.cpp +++ b/src/storage/copier/var_list_column_chunk.cpp @@ -11,6 +11,16 @@ void VarListDataColumnChunk::reset() { numValuesInDataChunk = 0; } +void VarListDataColumnChunk::resize(uint64_t numValues) { + if (numValues <= capacityInDataChunk) { + return; + } + while (capacityInDataChunk < numValues) { + capacityInDataChunk *= VAR_LIST_RESIZE_RATIO; + } + dataChunk->resize(capacityInDataChunk); +} + VarListColumnChunk::VarListColumnChunk(LogicalType dataType, CopyDescription* copyDescription) : ColumnChunk{std::move(dataType), copyDescription, true /* hasNullChunk */}, varListDataColumnChunk{ColumnChunkFactory::createColumnChunk( @@ -20,13 +30,17 @@ VarListColumnChunk::VarListColumnChunk(LogicalType dataType, CopyDescription* co void VarListColumnChunk::append( arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend) { - assert(array->type_id() == arrow::Type::STRING || array->type_id() == arrow::Type::LIST); + assert(array->type_id() == arrow::Type::STRING || array->type_id() == arrow::Type::LIST || + array->type_id() == arrow::Type::LARGE_LIST); switch (array->type_id()) { case arrow::Type::STRING: { copyVarListFromArrowString(array, startPosInChunk, numValuesToAppend); } break; case arrow::Type::LIST: { - copyVarListFromArrowList(array, startPosInChunk, numValuesToAppend); + copyVarListFromArrowList(array, startPosInChunk, numValuesToAppend); + } break; + case arrow::Type::LARGE_LIST: { + copyVarListFromArrowList(array, startPosInChunk, numValuesToAppend); } break; default: { throw NotImplementedException("ListColumnChunk::appendArray"); @@ -34,6 +48,24 @@ void VarListColumnChunk::append( } } +void VarListColumnChunk::append(ColumnChunk* other, offset_t startPosInOtherChunk, + offset_t startPosInChunk, uint32_t numValuesToAppend) { + nullChunk->append( + other->getNullChunk(), startPosInOtherChunk, startPosInChunk, numValuesToAppend); + auto otherListChunk = reinterpret_cast(other); + auto offsetInDataChunkToAppend = varListDataColumnChunk.numValuesInDataChunk; + for (auto i = 0u; i < numValuesToAppend; i++) { + varListDataColumnChunk.numValuesInDataChunk += + otherListChunk->getListLen(startPosInOtherChunk + i); + setValue(varListDataColumnChunk.numValuesInDataChunk, startPosInChunk + i); + } + auto startOffset = otherListChunk->getListOffset(startPosInOtherChunk); + auto endOffset = otherListChunk->getListOffset(startPosInOtherChunk + numValuesToAppend); + varListDataColumnChunk.resize(varListDataColumnChunk.numValuesInDataChunk); + varListDataColumnChunk.dataChunk->append(otherListChunk->varListDataColumnChunk.dataChunk.get(), + startOffset, offsetInDataChunkToAppend, endOffset - startOffset); +} + void VarListColumnChunk::copyVarListFromArrowString( arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend) { auto stringArray = (arrow::StringArray*)array; @@ -62,29 +94,10 @@ void VarListColumnChunk::copyVarListFromArrowString( } } -void VarListColumnChunk::copyVarListFromArrowList( - arrow::Array* array, offset_t startPosInChunk, uint32_t numValuesToAppend) { - // auto listArray = (arrow::ListArray*)array; - // auto offsetArray = listArray->offsets()->Slice(1 /* offset */); - // append(offsetArray.get(), startPosInChunk, numValuesToAppend); - // if (offsetArray->data()->MayHaveNulls()) { - // for (auto i = 0u; i < numValuesToAppend; i++) { - // nullChunk->setNull(i + startPosInChunk, offsetArray->data()->IsNull(i)); - // } - // } else { - // nullChunk->setRangeNoNull(startPosInChunk, numValuesToAppend); - // } - // auto startOffset = listArray->value_offset(startPosInChunk); - // auto endOffset = listArray->value_offset(startPosInChunk + numValuesToAppend); - // varListDataColumnChunk.dataChunk->resize(endOffset - startOffset); - // varListDataColumnChunk.dataChunk->append( - // listArray->offsets().get(), startOffset, endOffset - startOffset); -} - void VarListColumnChunk::write(const Value& listVal, uint64_t posToWrite) { assert(listVal.getDataType()->getPhysicalType() == PhysicalTypeID::VAR_LIST); auto numValuesInList = NestedVal::getChildrenSize(&listVal); - resizeDataChunk(varListDataColumnChunk.numValuesInDataChunk + numValuesInList); + varListDataColumnChunk.resize(varListDataColumnChunk.numValuesInDataChunk + numValuesInList); for (auto i = 0u; i < numValuesInList; i++) { varListDataColumnChunk.dataChunk->write( *NestedVal::getChildVal(&listVal, i), varListDataColumnChunk.numValuesInDataChunk); @@ -104,15 +117,5 @@ void VarListColumnChunk::resetToEmpty() { varListDataColumnChunk.reset(); } -void VarListColumnChunk::resizeDataChunk(uint64_t numValues) { - if (numValues <= varListDataColumnChunk.capacityInDataChunk) { - return; - } - while (varListDataColumnChunk.capacityInDataChunk < numValues) { - varListDataColumnChunk.capacityInDataChunk *= 2; - } - varListDataColumnChunk.dataChunk->resize(varListDataColumnChunk.capacityInDataChunk); -} - } // namespace storage } // namespace kuzu diff --git a/src/storage/storage_manager.cpp b/src/storage/storage_manager.cpp index 23cf65fe6d..fa135d3007 100644 --- a/src/storage/storage_manager.cpp +++ b/src/storage/storage_manager.cpp @@ -46,6 +46,10 @@ std::unique_ptr StorageManager::createMetadataDAHInfo( metadataDAHInfo->childrenInfos.push_back( createMetadataDAHInfo(*VarListType::getChildType(&dataType))); } break; + case PhysicalTypeID::STRING: { + auto dummyChildType = LogicalType{LogicalTypeID::ANY}; + metadataDAHInfo->childrenInfos.push_back(createMetadataDAHInfo(dummyChildType)); + } break; default: { // DO NOTHING. } diff --git a/src/storage/store/string_node_column.cpp b/src/storage/store/string_node_column.cpp index 4b588d862a..30b298b9d7 100644 --- a/src/storage/store/string_node_column.cpp +++ b/src/storage/store/string_node_column.cpp @@ -1,5 +1,7 @@ #include "storage/store/string_node_column.h" +#include "storage/copier/string_column_chunk.h" + using namespace kuzu::catalog; using namespace kuzu::common; using namespace kuzu::transaction; @@ -22,11 +24,12 @@ StringNodeColumn::StringNodeColumn(LogicalType dataType, const MetadataDAHInfo& BMFileHandle* dataFH, BMFileHandle* metadataFH, BufferManager* bufferManager, WAL* wal) : NodeColumn{ std::move(dataType), metaDAHeaderInfo, dataFH, metadataFH, bufferManager, wal, true} { - ovfPageIdxInChunk = ColumnChunk::getNumPagesForBytes( - numBytesPerFixedSizedValue << StorageConstants::NODE_GROUP_SIZE_LOG2); if (this->dataType.getLogicalTypeID() == LogicalTypeID::STRING) { writeNodeColumnFunc = StringNodeColumnFunc::writeStringValuesToPage; } + overflowMetadataDA = std::make_unique>(*metadataFH, + StorageStructureID::newMetadataID(), metaDAHeaderInfo.childrenInfos[0]->dataDAHPageIdx, + bufferManager, wal); } void StringNodeColumn::scan(transaction::Transaction* transaction, node_group_idx_t nodeGroupIdx, @@ -37,24 +40,44 @@ void StringNodeColumn::scan(transaction::Transaction* transaction, node_group_id NodeColumn::scan(transaction, nodeGroupIdx, startOffsetInGroup, endOffsetInGroup, resultVector, offsetInVector); auto numValuesToRead = endOffsetInGroup - startOffsetInGroup; - auto chunkStartPageIdx = metadataDA->get(nodeGroupIdx, transaction->getType()).pageIdx; for (auto i = 0u; i < numValuesToRead; i++) { auto pos = offsetInVector + i; if (resultVector->isNull(pos)) { continue; } readStringValueFromOvf( - transaction, resultVector->getValue(pos), resultVector, chunkStartPageIdx); + transaction, resultVector->getValue(pos), resultVector, nodeGroupIdx); } } +page_idx_t StringNodeColumn::append( + storage::ColumnChunk* columnChunk, page_idx_t startPageIdx, node_group_idx_t nodeGroupIdx) { + auto numPagesForMainChunk = NodeColumn::append(columnChunk, startPageIdx, nodeGroupIdx); + auto stringColumnChunk = reinterpret_cast(columnChunk); + auto numPagesForOverflow = + stringColumnChunk->flushOverflowBuffer(dataFH, startPageIdx + numPagesForMainChunk); + overflowMetadataDA->resize(nodeGroupIdx + 1); + overflowMetadataDA->update(nodeGroupIdx, + ColumnChunkMetadata{startPageIdx + numPagesForMainChunk, numPagesForOverflow}); + return numPagesForMainChunk + numPagesForOverflow; +} + +void StringNodeColumn::checkpointInMemory() { + NodeColumn::checkpointInMemory(); + overflowMetadataDA->checkpointInMemoryIfNecessary(); +} + +void StringNodeColumn::rollbackInMemory() { + NodeColumn::rollbackInMemory(); + overflowMetadataDA->rollbackInMemoryIfNecessary(); +} + void StringNodeColumn::scanInternal( Transaction* transaction, ValueVector* nodeIDVector, ValueVector* resultVector) { assert(resultVector->dataType.getPhysicalType() == PhysicalTypeID::STRING); auto startNodeOffset = nodeIDVector->readNodeOffset(0); assert(startNodeOffset % DEFAULT_VECTOR_CAPACITY == 0); auto nodeGroupIdx = getNodeGroupIdxFromNodeOffset(startNodeOffset); - auto chunkStartPageIdx = metadataDA->get(nodeGroupIdx, transaction->getType()).pageIdx; NodeColumn::scanInternal(transaction, nodeIDVector, resultVector); for (auto i = 0u; i < nodeIDVector->state->selVector->selectedSize; i++) { auto pos = nodeIDVector->state->selVector->selectedPositions[i]; @@ -62,7 +85,7 @@ void StringNodeColumn::scanInternal( continue; } readStringValueFromOvf( - transaction, resultVector->getValue(pos), resultVector, chunkStartPageIdx); + transaction, resultVector->getValue(pos), resultVector, nodeGroupIdx); } } @@ -71,25 +94,24 @@ void StringNodeColumn::lookupInternal( assert(dataType.getPhysicalType() == PhysicalTypeID::STRING); auto startNodeOffset = nodeIDVector->readNodeOffset(0); auto nodeGroupIdx = getNodeGroupIdxFromNodeOffset(startNodeOffset); - auto chunkStartPageIdx = metadataDA->get(nodeGroupIdx, transaction->getType()).pageIdx; NodeColumn::lookupInternal(transaction, nodeIDVector, resultVector); for (auto i = 0u; i < nodeIDVector->state->selVector->selectedSize; i++) { auto pos = resultVector->state->selVector->selectedPositions[i]; if (!resultVector->isNull(pos)) { - readStringValueFromOvf(transaction, resultVector->getValue(pos), - resultVector, chunkStartPageIdx); + readStringValueFromOvf( + transaction, resultVector->getValue(pos), resultVector, nodeGroupIdx); } } } void StringNodeColumn::readStringValueFromOvf(Transaction* transaction, ku_string_t& kuStr, - ValueVector* resultVector, page_idx_t chunkStartPageIdx) { + ValueVector* resultVector, node_group_idx_t nodeGroupIdx) { if (ku_string_t::isShortString(kuStr.len)) { return; } PageByteCursor cursor; TypeUtils::decodeOverflowPtr(kuStr.overflowPtr, cursor.pageIdx, cursor.offsetInPage); - cursor.pageIdx += (ovfPageIdxInChunk + chunkStartPageIdx); + cursor.pageIdx += overflowMetadataDA->get(nodeGroupIdx, TransactionType::READ_ONLY).pageIdx; auto [fileHandleToPin, pageIdxToPin] = StorageStructureUtils::getFileHandleAndPhysicalPageIdxToPin( *dataFH, cursor.pageIdx, *wal, transaction->getType()); diff --git a/test/test_files/copy/copy_node_parquet.test b/test/test_files/copy/copy_node_parquet.test index af2e3a6785..48c28c8c5a 100644 --- a/test/test_files/copy/copy_node_parquet.test +++ b/test/test_files/copy/copy_node_parquet.test @@ -1,7 +1,5 @@ -GROUP CopyNodeFromParquetTest -DATASET PARQUET copy-test/node/parquet --SKIP -# FIXME: Found a concurrent bug. Fix later. -- diff --git a/test/test_files/shortest_path/bfs_sssp.test b/test/test_files/shortest_path/bfs_sssp.test index fc290c914a..487304b790 100644 --- a/test/test_files/shortest_path/bfs_sssp.test +++ b/test/test_files/shortest_path/bfs_sssp.test @@ -3,7 +3,7 @@ -- --CASE Bfs +-CASE BfsCSV -LOG SingleSourceAllDestinationsSSP -STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' RETURN a.fName, b.fName, rels(r), properties(nodes(r), 'fName') diff --git a/test/test_files/shortest_path/bfs_sssp_parquet.test b/test/test_files/shortest_path/bfs_sssp_parquet.test index d5474e74bb..a43f6c3177 100644 --- a/test/test_files/shortest_path/bfs_sssp_parquet.test +++ b/test/test_files/shortest_path/bfs_sssp_parquet.test @@ -1,43 +1,41 @@ -# FIXME: this test is segfaulting -GROUP ShortestPathTest -DATASET PARQUET CSV_TO_PARQUET(shortest-path-tests) --SKIP -- -CASE BfsParquet -LOG SingleSourceAllDestinationsSSP --STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' RETURN a.fName, b.fName, r +-STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' RETURN a.fName, b.fName, rels(r), properties(nodes(r), 'fName') ---- 7 -Alice|Bob|[0:0,1:0,0:1] -Alice|Carol|[0:0,1:1,0:2] -Alice|Dan|[0:0,1:2,0:3] -Alice|Elizabeth|[0:0,1:0,0:1,1:6,0:4] -Alice|Farooq|[0:0,1:0,0:1,1:6,0:4,1:13,0:5] -Alice|Greg|[0:0,1:0,0:1,1:6,0:4,1:14,0:6] -Alice|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|[0:0,1:0,0:1,1:6,0:4,1:15,0:7] +Alice|Bob|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1)]|[] +Alice|Carol|[(0:0)-{_LABEL: knows, _ID: 1:1}->(0:2)]|[] +Alice|Dan|[(0:0)-{_LABEL: knows, _ID: 1:2}->(0:3)]|[] +Alice|Elizabeth|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1),(0:1)-{_LABEL: knows, _ID: 1:6}->(0:4)]|[Bob] +Alice|Farooq|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1),(0:1)-{_LABEL: knows, _ID: 1:6}->(0:4),(0:4)-{_LABEL: knows, _ID: 1:13}->(0:5)]|[Bob,Elizabeth] +Alice|Greg|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1),(0:1)-{_LABEL: knows, _ID: 1:6}->(0:4),(0:4)-{_LABEL: knows, _ID: 1:14}->(0:6)]|[Bob,Elizabeth] +Alice|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1),(0:1)-{_LABEL: knows, _ID: 1:6}->(0:4),(0:4)-{_LABEL: knows, _ID: 1:15}->(0:7)]|[Bob,Elizabeth] -LOG AllSourcesSingleDestinationQuery --STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE b.fName = 'Alice' RETURN a.fName, b.fName, r +-STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE b.fName = 'Alice' RETURN a.fName, b.fName, rels(r), properties(nodes(r), 'usedNames') ---- 6 -Bob|Alice|[0:0,1:3,0:1] -Carol|Alice|[0:0,1:7,0:2] -Dan|Alice|[0:0,1:10,0:3] -Elizabeth|Alice|[0:0,1:20,0:7,1:15,0:4] -Farooq|Alice|[0:0,1:20,0:7,1:17,0:5] -Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|Alice|[0:0,1:20,0:7] +Bob|Alice|[(0:0)-{_LABEL: knows, _ID: 1:3}->(0:1)]|[] +Carol|Alice|[(0:0)-{_LABEL: knows, _ID: 1:7}->(0:2)]|[] +Dan|Alice|[(0:0)-{_LABEL: knows, _ID: 1:10}->(0:3)]|[] +Elizabeth|Alice|[(0:0)-{_LABEL: knows, _ID: 1:20}->(0:7),(0:7)-{_LABEL: knows, _ID: 1:15}->(0:4)]|[[Ad,De,Hi,Kye,Orlan]] +Farooq|Alice|[(0:0)-{_LABEL: knows, _ID: 1:20}->(0:7),(0:7)-{_LABEL: knows, _ID: 1:17}->(0:5)]|[[Ad,De,Hi,Kye,Orlan]] +Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|Alice|[(0:0)-{_LABEL: knows, _ID: 1:20}->(0:7)]|[] -LOG SingleSourceWithAllProperties -STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' RETURN length(r), b, a ---- 7 -1|(label:person, 0:1, {ID:2, fName:Bob, gender:2, isStudent:True, isWorker:False, age:30, eyeSight:5.100000, birthdate:1900-01-01, registerTime:2008-11-03 15:25:30.000526, lastJobDuration:10 years 5 months 13:00:00.000024, workedHours:[12,8], usedNames:[Bobby], courseScoresPerTerm:[[8,9],[9,10]]})|(label:person, 0:0, {ID:0, fName:Alice, gender:1, isStudent:True, isWorker:False, age:35, eyeSight:5.000000, birthdate:1900-01-01, registerTime:2011-08-20 11:25:30, lastJobDuration:3 years 2 days 13:02:00, workedHours:[10,5], usedNames:[Aida], courseScoresPerTerm:[[10,8],[6,7,8]]}) -1|(label:person, 0:2, {ID:3, fName:Carol, gender:1, isStudent:False, isWorker:True, age:45, eyeSight:5.000000, birthdate:1940-06-22, registerTime:1911-08-20 02:32:21, lastJobDuration:48:24:11, workedHours:[4,5], usedNames:[Carmen,Fred], courseScoresPerTerm:[[8,10]]})|(label:person, 0:0, {ID:0, fName:Alice, gender:1, isStudent:True, isWorker:False, age:35, eyeSight:5.000000, birthdate:1900-01-01, registerTime:2011-08-20 11:25:30, lastJobDuration:3 years 2 days 13:02:00, workedHours:[10,5], usedNames:[Aida], courseScoresPerTerm:[[10,8],[6,7,8]]}) -1|(label:person, 0:3, {ID:5, fName:Dan, gender:2, isStudent:False, isWorker:True, age:20, eyeSight:4.800000, birthdate:1950-07-23, registerTime:2031-11-30 12:25:30, lastJobDuration:10 years 5 months 13:00:00.000024, workedHours:[1,9], usedNames:[Wolfeschlegelstein,Daniel], courseScoresPerTerm:[[7,4],[8,8],[9]]})|(label:person, 0:0, {ID:0, fName:Alice, gender:1, isStudent:True, isWorker:False, age:35, eyeSight:5.000000, birthdate:1900-01-01, registerTime:2011-08-20 11:25:30, lastJobDuration:3 years 2 days 13:02:00, workedHours:[10,5], usedNames:[Aida], courseScoresPerTerm:[[10,8],[6,7,8]]}) -2|(label:person, 0:4, {ID:7, fName:Elizabeth, gender:1, isStudent:False, isWorker:True, age:20, eyeSight:4.700000, birthdate:1980-10-26, registerTime:1976-12-23 11:21:42, lastJobDuration:48:24:11, workedHours:[2], usedNames:[Ein], courseScoresPerTerm:[[6],[7],[8]]})|(label:person, 0:0, {ID:0, fName:Alice, gender:1, isStudent:True, isWorker:False, age:35, eyeSight:5.000000, birthdate:1900-01-01, registerTime:2011-08-20 11:25:30, lastJobDuration:3 years 2 days 13:02:00, workedHours:[10,5], usedNames:[Aida], courseScoresPerTerm:[[10,8],[6,7,8]]}) -3|(label:person, 0:5, {ID:8, fName:Farooq, gender:2, isStudent:True, isWorker:False, age:25, eyeSight:4.500000, birthdate:1980-10-26, registerTime:1972-07-31 13:22:30.678559, lastJobDuration:00:18:00.024, workedHours:[3,4,5,6,7], usedNames:[Fesdwe], courseScoresPerTerm:[[8]]})|(label:person, 0:0, {ID:0, fName:Alice, gender:1, isStudent:True, isWorker:False, age:35, eyeSight:5.000000, birthdate:1900-01-01, registerTime:2011-08-20 11:25:30, lastJobDuration:3 years 2 days 13:02:00, workedHours:[10,5], usedNames:[Aida], courseScoresPerTerm:[[10,8],[6,7,8]]}) -3|(label:person, 0:6, {ID:9, fName:Greg, gender:2, isStudent:False, isWorker:False, age:40, eyeSight:4.900000, birthdate:1980-10-26, registerTime:1976-12-23 04:41:42, lastJobDuration:10 years 5 months 13:00:00.000024, workedHours:[1], usedNames:[Grad], courseScoresPerTerm:[[10]]})|(label:person, 0:0, {ID:0, fName:Alice, gender:1, isStudent:True, isWorker:False, age:35, eyeSight:5.000000, birthdate:1900-01-01, registerTime:2011-08-20 11:25:30, lastJobDuration:3 years 2 days 13:02:00, workedHours:[10,5], usedNames:[Aida], courseScoresPerTerm:[[10,8],[6,7,8]]}) -3|(label:person, 0:7, {ID:10, fName:Hubert Blaine Wolfeschlegelsteinhausenbergerdorff, gender:2, isStudent:False, isWorker:True, age:83, eyeSight:4.900000, birthdate:1990-11-27, registerTime:2023-02-21 13:25:30, lastJobDuration:3 years 2 days 13:02:00, workedHours:[10,11,12,3,4,5,6,7], usedNames:[Ad,De,Hi,Kye,Orlan], courseScoresPerTerm:[[7],[10],[6,7]]})|(label:person, 0:0, {ID:0, fName:Alice, gender:1, isStudent:True, isWorker:False, age:35, eyeSight:5.000000, birthdate:1900-01-01, registerTime:2011-08-20 11:25:30, lastJobDuration:3 years 2 days 13:02:00, workedHours:[10,5], usedNames:[Aida], courseScoresPerTerm:[[10,8],[6,7,8]]}) +1|{_ID: 0:1, _LABEL: person, ID: 2, fName: Bob, gender: 2, isStudent: True, isWorker: False, age: 30, eyeSight: 5.100000, birthdate: 1900-01-01, registerTime: 2008-11-03 15:25:30.000526, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [12,8], usedNames: [Bobby], courseScoresPerTerm: [[8,9],[9,10]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]} +1|{_ID: 0:2, _LABEL: person, ID: 3, fName: Carol, gender: 1, isStudent: False, isWorker: True, age: 45, eyeSight: 5.000000, birthdate: 1940-06-22, registerTime: 1911-08-20 02:32:21, lastJobDuration: 48:24:11, workedHours: [4,5], usedNames: [Carmen,Fred], courseScoresPerTerm: [[8,10]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]} +1|{_ID: 0:3, _LABEL: person, ID: 5, fName: Dan, gender: 2, isStudent: False, isWorker: True, age: 20, eyeSight: 4.800000, birthdate: 1950-07-23, registerTime: 2031-11-30 12:25:30, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [1,9], usedNames: [Wolfeschlegelstein,Daniel], courseScoresPerTerm: [[7,4],[8,8],[9]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]} +2|{_ID: 0:4, _LABEL: person, ID: 7, fName: Elizabeth, gender: 1, isStudent: False, isWorker: True, age: 20, eyeSight: 4.700000, birthdate: 1980-10-26, registerTime: 1976-12-23 11:21:42, lastJobDuration: 48:24:11, workedHours: [2], usedNames: [Ein], courseScoresPerTerm: [[6],[7],[8]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]} +3|{_ID: 0:5, _LABEL: person, ID: 8, fName: Farooq, gender: 2, isStudent: True, isWorker: False, age: 25, eyeSight: 4.500000, birthdate: 1980-10-26, registerTime: 1972-07-31 13:22:30.678559, lastJobDuration: 00:18:00.024, workedHours: [3,4,5,6,7], usedNames: [Fesdwe], courseScoresPerTerm: [[8]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]} +3|{_ID: 0:6, _LABEL: person, ID: 9, fName: Greg, gender: 2, isStudent: False, isWorker: False, age: 40, eyeSight: 4.900000, birthdate: 1980-10-26, registerTime: 1976-12-23 04:41:42, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [1], usedNames: [Grad], courseScoresPerTerm: [[10]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]} +3|{_ID: 0:7, _LABEL: person, ID: 10, fName: Hubert Blaine Wolfeschlegelsteinhausenbergerdorff, gender: 2, isStudent: False, isWorker: True, age: 83, eyeSight: 4.900000, birthdate: 1990-11-27, registerTime: 2023-02-21 13:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,11,12,3,4,5,6,7], usedNames: [Ad,De,Hi,Kye,Orlan], courseScoresPerTerm: [[7],[10],[6,7]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]} -LOG SingleSourceSingleDestination -STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' AND b.fName = 'Bob' RETURN a.fName, b.fName, length(r) @@ -45,13 +43,13 @@ Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|Alice|[0:0,1:20,0:7] Alice|Bob|1 -LOG SingleSourceAllDestinations2 --STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..2]->(b:person) WHERE a.fName = 'Elizabeth' RETURN a.fName, b.fName, r +-STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..2]->(b:person) WHERE a.fName = 'Elizabeth' RETURN a.fName, b.fName, rels(r), properties(nodes(r), '_Label') ---- 5 -Elizabeth|Alice|[0:4,1:15,0:7,1:20,0:0] -Elizabeth|Dan|[0:4,1:15,0:7,1:21,0:3] -Elizabeth|Farooq|[0:4,1:13,0:5] -Elizabeth|Greg|[0:4,1:14,0:6] -Elizabeth|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|[0:4,1:15,0:7] +Elizabeth|Alice|[(0:4)-{_LABEL: knows, _ID: 1:15}->(0:7),(0:7)-{_LABEL: knows, _ID: 1:20}->(0:0)]|[person] +Elizabeth|Dan|[(0:4)-{_LABEL: knows, _ID: 1:15}->(0:7),(0:7)-{_LABEL: knows, _ID: 1:21}->(0:3)]|[person] +Elizabeth|Farooq|[(0:4)-{_LABEL: knows, _ID: 1:13}->(0:5)]|[] +Elizabeth|Greg|[(0:4)-{_LABEL: knows, _ID: 1:14}->(0:6)]|[] +Elizabeth|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|[(0:4)-{_LABEL: knows, _ID: 1:15}->(0:7)]|[] -LOG SingleSourceUnreachableDestination -STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' AND b.fName = 'Alice11' RETURN a.fName, b.fName, r