From 5e79415fc70c86ff25eea04535a73f85bb826e4b Mon Sep 17 00:00:00 2001 From: Guodong Jin Date: Tue, 12 Mar 2024 19:58:36 +0800 Subject: [PATCH] fix sliding out-of-place commit and null strings --- src/storage/store/column.cpp | 21 ++-- src/storage/store/column_chunk.cpp | 12 ++- src/storage/store/dictionary_chunk.cpp | 4 +- src/storage/store/rel_table_data.cpp | 1 + src/storage/store/string_column.cpp | 3 + src/storage/store/string_column_chunk.cpp | 4 + test/test_files/issue/issue2.test | 112 ++++++++++++++++++++++ 7 files changed, 140 insertions(+), 17 deletions(-) create mode 100644 test/test_files/issue/issue2.test diff --git a/src/storage/store/column.cpp b/src/storage/store/column.cpp index e9e78bfa7f6..82f12d5701c 100644 --- a/src/storage/store/column.cpp +++ b/src/storage/store/column.cpp @@ -777,20 +777,15 @@ void Column::commitColumnChunkOutOfPlace(Transaction* transaction, node_group_id } else { auto chunkMeta = getMetadata(nodeGroupIdx, transaction->getType()); auto maxDstOffset = getMaxOffset(dstOffsets); - if (maxDstOffset < chunkMeta.numValues) { - // TODO(Guodong): Should consider caching the scanned column chunk to avoid redundant - // scans in the same transaction. - auto columnChunk = getEmptyChunkForCommit(chunkMeta.numValues + dstOffsets.size()); - scan(transaction, nodeGroupIdx, columnChunk.get()); - for (auto i = 0u; i < dstOffsets.size(); i++) { - columnChunk->write(chunk, srcOffset + i, dstOffsets[i], 1 /* numValues */); - } - columnChunk->finalize(); - append(columnChunk.get(), nodeGroupIdx); - } else { - chunk->finalize(); - append(chunk, nodeGroupIdx); + // TODO(Guodong): Should consider caching the scanned column chunk to avoid redundant + // scans in the same transaction. + auto columnChunk = getEmptyChunkForCommit(chunkMeta.numValues + dstOffsets.size()); + scan(transaction, nodeGroupIdx, columnChunk.get()); + for (auto i = 0u; i < dstOffsets.size(); i++) { + columnChunk->write(chunk, srcOffset + i, dstOffsets[i], 1 /* numValues */); } + columnChunk->finalize(); + append(columnChunk.get(), nodeGroupIdx); } } diff --git a/src/storage/store/column_chunk.cpp b/src/storage/store/column_chunk.cpp index 309dcf9833f..986c08a6e6a 100644 --- a/src/storage/store/column_chunk.cpp +++ b/src/storage/store/column_chunk.cpp @@ -267,8 +267,10 @@ void ColumnChunk::write(ValueVector* vector, offset_t offsetInVector, offset_t o void ColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, offset_t dstOffsetInChunk, offset_t numValuesToCopy) { - KU_ASSERT(numValues >= (dstOffsetInChunk + numValuesToCopy)); KU_ASSERT(srcChunk->dataType.getPhysicalType() == dataType.getPhysicalType()); + if ((dstOffsetInChunk + numValuesToCopy) >= numValues) { + numValues = dstOffsetInChunk + numValuesToCopy; + } memcpy(buffer.get() + dstOffsetInChunk * numBytesPerValue, srcChunk->buffer.get() + srcOffsetInChunk * numBytesPerValue, numValuesToCopy * numBytesPerValue); @@ -454,7 +456,9 @@ void BoolColumnChunk::write(ValueVector* vector, offset_t offsetInVector, offset void BoolColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, offset_t dstOffsetInChunk, offset_t numValuesToCopy) { - KU_ASSERT(numValues >= (dstOffsetInChunk + numValuesToCopy)); + if ((dstOffsetInChunk + numValuesToCopy) >= numValues) { + numValues = dstOffsetInChunk + numValuesToCopy; + } NullMask::copyNullMask((uint64_t*)static_cast(srcChunk)->buffer.get(), srcOffsetInChunk, (uint64_t*)buffer.get(), dstOffsetInChunk, numValuesToCopy); } @@ -478,7 +482,9 @@ void NullColumnChunk::write(ValueVector* vector, offset_t offsetInVector, offset void NullColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, offset_t dstOffsetInChunk, offset_t numValuesToCopy) { - KU_ASSERT(numValues >= (dstOffsetInChunk + numValuesToCopy)); + if ((dstOffsetInChunk + numValuesToCopy) >= numValues) { + numValues = dstOffsetInChunk + numValuesToCopy; + } copyFromBuffer((uint64_t*)static_cast(srcChunk)->buffer.get(), srcOffsetInChunk, dstOffsetInChunk, numValuesToCopy); } diff --git a/src/storage/store/dictionary_chunk.cpp b/src/storage/store/dictionary_chunk.cpp index 69dc48ffda7..8f6f853e689 100644 --- a/src/storage/store/dictionary_chunk.cpp +++ b/src/storage/store/dictionary_chunk.cpp @@ -35,7 +35,9 @@ void DictionaryChunk::resetToEmpty() { } uint64_t DictionaryChunk::getStringLength(string_index_t index) const { - if (index + 1 < offsetChunk->getNumValues()) { + if (stringDataChunk->getNumValues() == 0) { + return 0; + } else if (index + 1 < offsetChunk->getNumValues()) { KU_ASSERT(offsetChunk->getValue(index + 1) >= offsetChunk->getValue(index)); return offsetChunk->getValue(index + 1) - diff --git a/src/storage/store/rel_table_data.cpp b/src/storage/store/rel_table_data.cpp index 65e9d999971..a10ea4392b4 100644 --- a/src/storage/store/rel_table_data.cpp +++ b/src/storage/store/rel_table_data.cpp @@ -507,6 +507,7 @@ void RelTableData::distributeAndUpdateColumn(Transaction* transaction, auto newSize = localState.rightCSROffset - localState.leftCSROffset + 1; auto newChunk = ColumnChunkFactory::createColumnChunk( *column->getDataType().copy(), enableCompression, newSize); + newChunk->getNullChunk()->resetToAllNull(); auto maxNumNodesToDistribute = std::min( rightNodeBoundary - leftNodeBoundary + 1, persistentState.header.offset->getNumValues()); // Third, copy the rels to the new chunk. diff --git a/src/storage/store/string_column.cpp b/src/storage/store/string_column.cpp index eb333fbc6e8..42cb4f1ddbc 100644 --- a/src/storage/store/string_column.cpp +++ b/src/storage/store/string_column.cpp @@ -204,6 +204,9 @@ bool StringColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t n auto strChunk = ku_dynamic_cast(chunk); auto length = std::min((uint64_t)dstOffsets.size(), strChunk->getNumValues()); for (auto i = 0u; i < length; i++) { + if (strChunk->getNullChunk()->isNull(i)) { + continue; + } strLenToAdd += strChunk->getStringLength(i + srcOffset); } auto numStrings = dstOffsets.size(); diff --git a/src/storage/store/string_column_chunk.cpp b/src/storage/store/string_column_chunk.cpp index 696b6e36a3f..e0c18523546 100644 --- a/src/storage/store/string_column_chunk.cpp +++ b/src/storage/store/string_column_chunk.cpp @@ -91,6 +91,9 @@ void StringColumnChunk::write(ColumnChunk* chunk, ColumnChunk* dstOffsets, bool void StringColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, offset_t dstOffsetInChunk, offset_t numValuesToCopy) { KU_ASSERT(srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::STRING); + if ((dstOffsetInChunk + numValuesToCopy) >= numValues) { + numValues = dstOffsetInChunk + numValuesToCopy; + } for (auto i = 0u; i < numValuesToCopy; i++) { auto srcPos = srcOffsetInChunk + i; auto dstPos = dstOffsetInChunk + i; @@ -163,6 +166,7 @@ void StringColumnChunk::finalize() { template<> std::string_view StringColumnChunk::getValue(offset_t pos) const { KU_ASSERT(pos < numValues); + KU_ASSERT(!nullChunk->isNull(pos)); auto index = ColumnChunk::getValue(pos); return dictionaryChunk->getString(index); } diff --git a/test/test_files/issue/issue2.test b/test/test_files/issue/issue2.test new file mode 100644 index 00000000000..fa51eba6ca1 --- /dev/null +++ b/test/test_files/issue/issue2.test @@ -0,0 +1,112 @@ +-GROUP IssueTest +-DATASET CSV empty + +-- + +-CASE 3055 +-STATEMENT CREATE NODE TABLE test ( id STRING, prop1 STRING, prop2 INT64, prop3 STRING, prop4 STRING, PRIMARY KEY(id) ) +---- ok +-STATEMENT MERGE (n:test { id: "ewm8ewnn@l23:" }) SET n.prop1 = "model", n.prop2 = 6, n.prop3 = "trips_2", n.prop4 = "0bbhfwnenwafnw88989382932wwewej332938293jiwojewie923892389283923" RETURN n.id +---- ok +-STATEMENT MERGE (n:test { id: "ewm8ewnn ewweklnbgeireg@l23:ewe" }) SET n.prop1 = "model", n.prop2 = 16, n.prop3 = "normalize_data", n.prop4 = "1bbhfwnenwafnw88989234342wwewej332938293jiwojewie923892389283923" RETURN n.id +---- ok +-STATEMENT MERGE (n:test { id: "ewm8ewnn ewweklnbgew888g@lew3:ewe" }) SET n.prop1 = "model", n.prop2 = 74, n.prop3 = "zones", n.prop4 = "2bbhfwnenaafnw88989234342wwewej332938293jiwojewie923192389283923" RETURN n.id +---- ok +-STATEMENT MERGE (n:test { id: "ewm8ew43lnbgew888g@lew3:ewe" }) SET n.prop1 = "expectation", n.prop2 = 27, n.prop3 = "test_trips_1", n.prop4 = "3bbhfwnenaafnw88989234342wwewej332938213jiwojewie923192389283923" RETURN n.id +---- ok +-STATEMENT MERGE (n:test { id: "ewm8ew43ffbgew888g@lew3:ewe" }) SET n.prop1 = "expectation", n.prop2 = 37, n.prop3 = "test_trips_2", n.prop4 = "4bbhfwnenaafnw88989234312wwewej332938213jiwojewie923192389283923" RETURN n.id +---- ok +-STATEMENT MERGE (n:test { id: "ewm8ew43ffrrr88g@lew3:ewe" }) SET n.prop1 = "expectation", n.prop2 = 47, n.prop3 = "test_norm_data_1", n.prop4 = "5bbhfwnenaafnw88989234312wwewej332938213jiwojewie923192389283923" RETURN n.id +---- ok +-STATEMENT MERGE (n:test { id: "fwfwfe8g@lew3:ewe" }) SET n.prop1 = "model", n.prop4 = "6bbhfwnenaafnw88989234312wwewej332938213jiwojewie923192389283923" RETURN n.id +---- ok +-STATEMENT MATCH (n:test) WHERE n.id = "fwfwfe8g@lew3:ewe" AND n.prop1 = "model" AND n.prop2 IS NULL AND n.prop3 IS NULL RETURN n +---- ok +-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ewnn ewweklnbgew888g@lew3:ewe" AND n.prop1 = "model" AND n.prop2 = 74 AND n.prop3 = "zones" RETURN n +---- ok +-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ewnn@l23:" AND n.prop1 = "model" AND n.prop2 = 6 AND n.prop3 = "trips_2" RETURN n +---- ok +-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ewnn ewweklnbgeireg@l23:ewe" AND n.prop1 = "model" AND n.prop2 = 16 AND n.prop3 = "normalize_data" RETURN n +---- ok +-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ew43lnbgew888g@lew3:ewe" AND n.prop1 = "expectation" AND n.prop2 = 27 AND n.prop3 = "test_trips_1" RETURN n +---- ok +-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ew43ffbgew888g@lew3:ewe" AND n.prop1 = "expectation" AND n.prop2 = 37 AND n.prop3 = "test_trips_2" RETURN n +---- ok +-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ew43ffrrr88g@lew3:ewe" AND n.prop1 = "expectation" AND n.prop2 = 47 AND n.prop3 = "test_norm_data_1" RETURN n +---- ok +-STATEMENT MERGE (n:test { id: "nfewnfwn+wnfw"}) RETURN n.id +---- ok +-STATEMENT MERGE (n:test { id: "nfewn32n+wnfw"}) RETURN n.id +---- ok +-STATEMENT CREATE NODE TABLE res ( name STRING, PRIMARY KEY(name) ) +---- ok +-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name +---- ok +-STATEMENT CREATE REL TABLE etable ( FROM res TO test , eprop1 STRING, eprop2 STRING, eprop3 STRING ) +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "nfewnfwn+wnfw" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "nfewj932-ew2b-5f33-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "fwfsiewmk" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "fwfsiewmk" AND t.id = "nfewnfwn+wnfw" CREATE (f)-[r:etable { eprop1 : "0.0.0", eprop3 : "nfewj932-ew2b-5f33-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "nfewn32n+wnfw" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "mdewj932-ew2b-5f44-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "s3scan+taxi_fhvhv" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "mdewj932-ew2b-5f44-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "fwfsiewmk" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "fwfsiewmk" AND t.id = "nfewn32n+wnfw" CREATE (f)-[r:etable { eprop1 : "0.0.0", eprop3 : "mdewj932-ew2b-5f44-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "fwfsiewmk" AND t.id = "s3scan+taxi_fhvhv" CREATE (f)-[r:etable { eprop1 : "0.0.0", eprop3 : "mdewj932-ew2b-5f44-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ewnn ewweklnbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "qqewj932-ew2b-5f88-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ewnn@l23:" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "qqewj932-ew2b-5f88-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ew43ffrrr88g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "qqewj932-ew2b-5f88-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MATCH (f) - [r:etable] -> (t) WHERE t.id = 'ewm8ewnn@l23:' and r.eprop3 = 'qqewj932-ew2b-5f88-nfew' RETURN COUNT(*); +---- 1 +1 +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ew43ffbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "ooewj932-ew2b-5f19-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "pdanwewer" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "pdanwewer" AND t.id = "ewm8ew43ffbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.2.4", eprop3 : "ooewj932-ew2b-5f19-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "another_dep" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "another_dep" AND t.id = "ewm8ew43ffbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "9.9.9", eprop3 : "ooewj932-ew2b-5f19-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ewnn ewweklnbgeireg@l23:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "rrewj932-ew2b-5f92-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "pdanwewer" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "pdanwewer" AND t.id = "ewm8ewnn ewweklnbgeireg@l23:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "rrewj932-ew2b-5f92-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "numpy" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "numpy" AND t.id = "ewm8ewnn ewweklnbgeireg@l23:ewe" CREATE (f)-[r:etable { eprop1 : "2.2.2", eprop3 : "rrewj932-ew2b-5f92-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "fwfwfe8g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "mmewj932-ew2b-5f00-nfew" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ew43lnbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "piewj932-ew2b-5f00-nfrr" }]->(t) RETURN r +---- ok +-STATEMENT MERGE (n:res { name: "pdanwewer" }) RETURN n.name +---- ok +-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "pdanwewer" AND t.id = "ewm8ew43lnbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.2.3", eprop3 : "piewj932-ew2b-5f00-nfrr" }]->(t) RETURN r +---- ok +-STATEMENT MATCH (f) - [r:etable] -> (t) WHERE t.id = 'ewm8ewnn@l23:' and r.eprop3 = 'qqewj932-ew2b-5f88-nfew' RETURN COUNT(*); +---- 1 +1