Skip to content

Commit

Permalink
fix sliding out-of-place commit and null strings
Browse files Browse the repository at this point in the history
  • Loading branch information
ray6080 committed Mar 14, 2024
1 parent ff186a5 commit 5e79415
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 17 deletions.
21 changes: 8 additions & 13 deletions src/storage/store/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,20 +777,15 @@ void Column::commitColumnChunkOutOfPlace(Transaction* transaction, node_group_id
} else {
auto chunkMeta = getMetadata(nodeGroupIdx, transaction->getType());
auto maxDstOffset = getMaxOffset(dstOffsets);
if (maxDstOffset < chunkMeta.numValues) {
// TODO(Guodong): Should consider caching the scanned column chunk to avoid redundant
// scans in the same transaction.
auto columnChunk = getEmptyChunkForCommit(chunkMeta.numValues + dstOffsets.size());
scan(transaction, nodeGroupIdx, columnChunk.get());
for (auto i = 0u; i < dstOffsets.size(); i++) {
columnChunk->write(chunk, srcOffset + i, dstOffsets[i], 1 /* numValues */);
}
columnChunk->finalize();
append(columnChunk.get(), nodeGroupIdx);
} else {
chunk->finalize();
append(chunk, nodeGroupIdx);
// TODO(Guodong): Should consider caching the scanned column chunk to avoid redundant
// scans in the same transaction.
auto columnChunk = getEmptyChunkForCommit(chunkMeta.numValues + dstOffsets.size());
scan(transaction, nodeGroupIdx, columnChunk.get());
for (auto i = 0u; i < dstOffsets.size(); i++) {
columnChunk->write(chunk, srcOffset + i, dstOffsets[i], 1 /* numValues */);
}
columnChunk->finalize();
append(columnChunk.get(), nodeGroupIdx);
}
}

Expand Down
12 changes: 9 additions & 3 deletions src/storage/store/column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,10 @@ void ColumnChunk::write(ValueVector* vector, offset_t offsetInVector, offset_t o

void ColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk, offset_t dstOffsetInChunk,
offset_t numValuesToCopy) {
KU_ASSERT(numValues >= (dstOffsetInChunk + numValuesToCopy));
KU_ASSERT(srcChunk->dataType.getPhysicalType() == dataType.getPhysicalType());
if ((dstOffsetInChunk + numValuesToCopy) >= numValues) {
numValues = dstOffsetInChunk + numValuesToCopy;
}
memcpy(buffer.get() + dstOffsetInChunk * numBytesPerValue,
srcChunk->buffer.get() + srcOffsetInChunk * numBytesPerValue,
numValuesToCopy * numBytesPerValue);
Expand Down Expand Up @@ -454,7 +456,9 @@ void BoolColumnChunk::write(ValueVector* vector, offset_t offsetInVector, offset

void BoolColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk,
offset_t dstOffsetInChunk, offset_t numValuesToCopy) {
KU_ASSERT(numValues >= (dstOffsetInChunk + numValuesToCopy));
if ((dstOffsetInChunk + numValuesToCopy) >= numValues) {
numValues = dstOffsetInChunk + numValuesToCopy;
}
NullMask::copyNullMask((uint64_t*)static_cast<BoolColumnChunk*>(srcChunk)->buffer.get(),
srcOffsetInChunk, (uint64_t*)buffer.get(), dstOffsetInChunk, numValuesToCopy);
}
Expand All @@ -478,7 +482,9 @@ void NullColumnChunk::write(ValueVector* vector, offset_t offsetInVector, offset

void NullColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk,
offset_t dstOffsetInChunk, offset_t numValuesToCopy) {
KU_ASSERT(numValues >= (dstOffsetInChunk + numValuesToCopy));
if ((dstOffsetInChunk + numValuesToCopy) >= numValues) {
numValues = dstOffsetInChunk + numValuesToCopy;
}
copyFromBuffer((uint64_t*)static_cast<NullColumnChunk*>(srcChunk)->buffer.get(),
srcOffsetInChunk, dstOffsetInChunk, numValuesToCopy);
}
Expand Down
4 changes: 3 additions & 1 deletion src/storage/store/dictionary_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ void DictionaryChunk::resetToEmpty() {
}

uint64_t DictionaryChunk::getStringLength(string_index_t index) const {
if (index + 1 < offsetChunk->getNumValues()) {
if (stringDataChunk->getNumValues() == 0) {
return 0;
} else if (index + 1 < offsetChunk->getNumValues()) {
KU_ASSERT(offsetChunk->getValue<string_offset_t>(index + 1) >=
offsetChunk->getValue<string_offset_t>(index));
return offsetChunk->getValue<string_offset_t>(index + 1) -
Expand Down
1 change: 1 addition & 0 deletions src/storage/store/rel_table_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@ void RelTableData::distributeAndUpdateColumn(Transaction* transaction,
auto newSize = localState.rightCSROffset - localState.leftCSROffset + 1;
auto newChunk = ColumnChunkFactory::createColumnChunk(
*column->getDataType().copy(), enableCompression, newSize);
newChunk->getNullChunk()->resetToAllNull();
auto maxNumNodesToDistribute = std::min(
rightNodeBoundary - leftNodeBoundary + 1, persistentState.header.offset->getNumValues());
// Third, copy the rels to the new chunk.
Expand Down
3 changes: 3 additions & 0 deletions src/storage/store/string_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ bool StringColumn::canCommitInPlace(Transaction* transaction, node_group_idx_t n
auto strChunk = ku_dynamic_cast<ColumnChunk*, StringColumnChunk*>(chunk);
auto length = std::min((uint64_t)dstOffsets.size(), strChunk->getNumValues());
for (auto i = 0u; i < length; i++) {
if (strChunk->getNullChunk()->isNull(i)) {
continue;
}
strLenToAdd += strChunk->getStringLength(i + srcOffset);
}
auto numStrings = dstOffsets.size();
Expand Down
4 changes: 4 additions & 0 deletions src/storage/store/string_column_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ void StringColumnChunk::write(ColumnChunk* chunk, ColumnChunk* dstOffsets, bool
void StringColumnChunk::write(ColumnChunk* srcChunk, offset_t srcOffsetInChunk,
offset_t dstOffsetInChunk, offset_t numValuesToCopy) {
KU_ASSERT(srcChunk->getDataType().getPhysicalType() == PhysicalTypeID::STRING);
if ((dstOffsetInChunk + numValuesToCopy) >= numValues) {
numValues = dstOffsetInChunk + numValuesToCopy;
}
for (auto i = 0u; i < numValuesToCopy; i++) {
auto srcPos = srcOffsetInChunk + i;
auto dstPos = dstOffsetInChunk + i;
Expand Down Expand Up @@ -163,6 +166,7 @@ void StringColumnChunk::finalize() {
template<>
std::string_view StringColumnChunk::getValue<std::string_view>(offset_t pos) const {
KU_ASSERT(pos < numValues);
KU_ASSERT(!nullChunk->isNull(pos));
auto index = ColumnChunk::getValue<DictionaryChunk::string_index_t>(pos);
return dictionaryChunk->getString(index);
}
Expand Down
112 changes: 112 additions & 0 deletions test/test_files/issue/issue2.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
-GROUP IssueTest
-DATASET CSV empty

--

-CASE 3055
-STATEMENT CREATE NODE TABLE test ( id STRING, prop1 STRING, prop2 INT64, prop3 STRING, prop4 STRING, PRIMARY KEY(id) )
---- ok
-STATEMENT MERGE (n:test { id: "ewm8ewnn@l23:" }) SET n.prop1 = "model", n.prop2 = 6, n.prop3 = "trips_2", n.prop4 = "0bbhfwnenwafnw88989382932wwewej332938293jiwojewie923892389283923" RETURN n.id
---- ok
-STATEMENT MERGE (n:test { id: "ewm8ewnn ewweklnbgeireg@l23:ewe" }) SET n.prop1 = "model", n.prop2 = 16, n.prop3 = "normalize_data", n.prop4 = "1bbhfwnenwafnw88989234342wwewej332938293jiwojewie923892389283923" RETURN n.id
---- ok
-STATEMENT MERGE (n:test { id: "ewm8ewnn ewweklnbgew888g@lew3:ewe" }) SET n.prop1 = "model", n.prop2 = 74, n.prop3 = "zones", n.prop4 = "2bbhfwnenaafnw88989234342wwewej332938293jiwojewie923192389283923" RETURN n.id
---- ok
-STATEMENT MERGE (n:test { id: "ewm8ew43lnbgew888g@lew3:ewe" }) SET n.prop1 = "expectation", n.prop2 = 27, n.prop3 = "test_trips_1", n.prop4 = "3bbhfwnenaafnw88989234342wwewej332938213jiwojewie923192389283923" RETURN n.id
---- ok
-STATEMENT MERGE (n:test { id: "ewm8ew43ffbgew888g@lew3:ewe" }) SET n.prop1 = "expectation", n.prop2 = 37, n.prop3 = "test_trips_2", n.prop4 = "4bbhfwnenaafnw88989234312wwewej332938213jiwojewie923192389283923" RETURN n.id
---- ok
-STATEMENT MERGE (n:test { id: "ewm8ew43ffrrr88g@lew3:ewe" }) SET n.prop1 = "expectation", n.prop2 = 47, n.prop3 = "test_norm_data_1", n.prop4 = "5bbhfwnenaafnw88989234312wwewej332938213jiwojewie923192389283923" RETURN n.id
---- ok
-STATEMENT MERGE (n:test { id: "fwfwfe8g@lew3:ewe" }) SET n.prop1 = "model", n.prop4 = "6bbhfwnenaafnw88989234312wwewej332938213jiwojewie923192389283923" RETURN n.id
---- ok
-STATEMENT MATCH (n:test) WHERE n.id = "fwfwfe8g@lew3:ewe" AND n.prop1 = "model" AND n.prop2 IS NULL AND n.prop3 IS NULL RETURN n
---- ok
-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ewnn ewweklnbgew888g@lew3:ewe" AND n.prop1 = "model" AND n.prop2 = 74 AND n.prop3 = "zones" RETURN n
---- ok
-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ewnn@l23:" AND n.prop1 = "model" AND n.prop2 = 6 AND n.prop3 = "trips_2" RETURN n
---- ok
-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ewnn ewweklnbgeireg@l23:ewe" AND n.prop1 = "model" AND n.prop2 = 16 AND n.prop3 = "normalize_data" RETURN n
---- ok
-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ew43lnbgew888g@lew3:ewe" AND n.prop1 = "expectation" AND n.prop2 = 27 AND n.prop3 = "test_trips_1" RETURN n
---- ok
-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ew43ffbgew888g@lew3:ewe" AND n.prop1 = "expectation" AND n.prop2 = 37 AND n.prop3 = "test_trips_2" RETURN n
---- ok
-STATEMENT MATCH (n:test) WHERE n.id = "ewm8ew43ffrrr88g@lew3:ewe" AND n.prop1 = "expectation" AND n.prop2 = 47 AND n.prop3 = "test_norm_data_1" RETURN n
---- ok
-STATEMENT MERGE (n:test { id: "nfewnfwn+wnfw"}) RETURN n.id
---- ok
-STATEMENT MERGE (n:test { id: "nfewn32n+wnfw"}) RETURN n.id
---- ok
-STATEMENT CREATE NODE TABLE res ( name STRING, PRIMARY KEY(name) )
---- ok
-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name
---- ok
-STATEMENT CREATE REL TABLE etable ( FROM res TO test , eprop1 STRING, eprop2 STRING, eprop3 STRING )
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "nfewnfwn+wnfw" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "nfewj932-ew2b-5f33-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "fwfsiewmk" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "fwfsiewmk" AND t.id = "nfewnfwn+wnfw" CREATE (f)-[r:etable { eprop1 : "0.0.0", eprop3 : "nfewj932-ew2b-5f33-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "nfewn32n+wnfw" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "mdewj932-ew2b-5f44-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "s3scan+taxi_fhvhv" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "mdewj932-ew2b-5f44-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "fwfsiewmk" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "fwfsiewmk" AND t.id = "nfewn32n+wnfw" CREATE (f)-[r:etable { eprop1 : "0.0.0", eprop3 : "mdewj932-ew2b-5f44-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "fwfsiewmk" AND t.id = "s3scan+taxi_fhvhv" CREATE (f)-[r:etable { eprop1 : "0.0.0", eprop3 : "mdewj932-ew2b-5f44-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ewnn ewweklnbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "qqewj932-ew2b-5f88-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ewnn@l23:" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "qqewj932-ew2b-5f88-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ew43ffrrr88g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "qqewj932-ew2b-5f88-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MATCH (f) - [r:etable] -> (t) WHERE t.id = 'ewm8ewnn@l23:' and r.eprop3 = 'qqewj932-ew2b-5f88-nfew' RETURN COUNT(*);
---- 1
1
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ew43ffbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "ooewj932-ew2b-5f19-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "pdanwewer" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "pdanwewer" AND t.id = "ewm8ew43ffbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.2.4", eprop3 : "ooewj932-ew2b-5f19-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "another_dep" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "another_dep" AND t.id = "ewm8ew43ffbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "9.9.9", eprop3 : "ooewj932-ew2b-5f19-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ewnn ewweklnbgeireg@l23:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "rrewj932-ew2b-5f92-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "pdanwewer" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "pdanwewer" AND t.id = "ewm8ewnn ewweklnbgeireg@l23:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "rrewj932-ew2b-5f92-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "numpy" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "numpy" AND t.id = "ewm8ewnn ewweklnbgeireg@l23:ewe" CREATE (f)-[r:etable { eprop1 : "2.2.2", eprop3 : "rrewj932-ew2b-5f92-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "fwfwfe8g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "mmewj932-ew2b-5f00-nfew" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "nfewnnfw" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "nfewnnfw" AND t.id = "ewm8ew43lnbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.1.1", eprop3 : "piewj932-ew2b-5f00-nfrr" }]->(t) RETURN r
---- ok
-STATEMENT MERGE (n:res { name: "pdanwewer" }) RETURN n.name
---- ok
-STATEMENT MATCH (f:res), (t:test) WHERE f.name = "pdanwewer" AND t.id = "ewm8ew43lnbgew888g@lew3:ewe" CREATE (f)-[r:etable { eprop1 : "1.2.3", eprop3 : "piewj932-ew2b-5f00-nfrr" }]->(t) RETURN r
---- ok
-STATEMENT MATCH (f) - [r:etable] -> (t) WHERE t.id = 'ewm8ewnn@l23:' and r.eprop3 = 'qqewj932-ew2b-5f88-nfew' RETURN COUNT(*);
---- 1
1

0 comments on commit 5e79415

Please sign in to comment.