Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](segcompaction) fix convert delete bitmap core #38800

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions be/src/olap/rowset/segcompaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,8 +386,10 @@ void SegcompactionWorker::convert_segment_delete_bitmap(DeleteBitmapPtr src_dele
auto rowset_id = _writer->context().rowset_id;
const auto* seg_map =
src_delete_bitmap->get({rowset_id, src_seg_id, DeleteBitmap::TEMP_VERSION_COMMON});
_converted_delete_bitmap->set({rowset_id, dest_seg_id, DeleteBitmap::TEMP_VERSION_COMMON},
*seg_map);
if (seg_map != nullptr) {
_converted_delete_bitmap->set({rowset_id, dest_seg_id, DeleteBitmap::TEMP_VERSION_COMMON},
*seg_map);
}
}

void SegcompactionWorker::convert_segment_delete_bitmap(DeleteBitmapPtr src_delete_bitmap,
Expand All @@ -402,6 +404,9 @@ void SegcompactionWorker::convert_segment_delete_bitmap(DeleteBitmapPtr src_dele
for (uint32_t seg_id = src_begin; seg_id <= src_end; seg_id++) {
const auto* seg_map =
src_delete_bitmap->get({rowset_id, seg_id, DeleteBitmap::TEMP_VERSION_COMMON});
if (!seg_map) {
continue;
}
src.segment_id = seg_id;
for (unsigned int row_id : *seg_map) {
src.row_id = row_id;
Expand Down
119 changes: 65 additions & 54 deletions be/test/olap/segcompaction_mow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ class SegCompactionMoWTest : public ::testing::TestWithParam<std::string> {

bool check_data_read_with_delete_bitmap(TabletSchemaSPtr tablet_schema,
DeleteBitmapPtr delete_bitmap, RowsetSharedPtr rowset,
int expect_total_rows, int rows_mark_deleted) {
int expect_total_rows, int rows_mark_deleted,
bool skip_value_check = false) {
RowsetReaderContext reader_context;
reader_context.tablet_schema = tablet_schema;
// use this type to avoid cache from other ut
Expand Down Expand Up @@ -261,7 +262,10 @@ class SegCompactionMoWTest : public ::testing::TestWithParam<std::string> {
uint32_t k2 = *reinterpret_cast<uint32_t*>((char*)(&field2));
uint32_t v3 = *reinterpret_cast<uint32_t*>((char*)(&field3));
EXPECT_EQ(100 * v3 + k2, k1);
EXPECT_TRUE(v3 % 3 != 0); // all v3%3==0 is deleted
if (!skip_value_check) {
// all v3%3==0 is deleted in all segments with an even number of ids.
EXPECT_TRUE(k2 % 2 != 0 || v3 % 3 != 0);
}
num_rows_read++;
}
output_block->clear();
Expand Down Expand Up @@ -334,8 +338,8 @@ TEST_P(SegCompactionMoWTest, SegCompactionThenRead) {
{rowset_id, i, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
} else {
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (i % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, i, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -353,7 +357,11 @@ TEST_P(SegCompactionMoWTest, SegCompactionThenRead) {
for (auto entry : delete_bitmap->delete_bitmap) {
total_cardinality1 += entry.second.cardinality();
}
EXPECT_EQ(num_segments, delete_bitmap->delete_bitmap.size());
if (delete_ratio == "full") {
EXPECT_EQ(num_segments, delete_bitmap->delete_bitmap.size());
} else {
EXPECT_EQ(num_segments / 2 + num_segments % 2, delete_bitmap->delete_bitmap.size());
}
EXPECT_EQ(Status::OK(), rowset_writer->build(rowset));
std::vector<std::string> ls;
ls.push_back(fmt::format("{}_0.dat", raw_rsid));
Expand All @@ -372,8 +380,12 @@ TEST_P(SegCompactionMoWTest, SegCompactionThenRead) {
}
total_cardinality2 += entry.second.cardinality();
}
// 7 segments + 1 sentinel mark
EXPECT_EQ(8, delete_bitmap->delete_bitmap.size());
if (delete_ratio == "full") {
// 7 segments + 1 sentinel mark
EXPECT_EQ(8, delete_bitmap->delete_bitmap.size());
} else {
EXPECT_EQ(5, delete_bitmap->delete_bitmap.size());
}
EXPECT_EQ(total_cardinality1, total_cardinality2);
}

Expand Down Expand Up @@ -420,16 +432,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -448,16 +460,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -476,16 +488,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -504,16 +516,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand Down Expand Up @@ -572,16 +584,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -607,11 +619,10 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) {
ls.push_back("20048_5.dat"); // oooooooo
ls.push_back("20048_6.dat"); // O
EXPECT_TRUE(check_dir(ls));
// 7 segments + 1 sentinel mark
EXPECT_EQ(8, delete_bitmap->delete_bitmap.size());
EXPECT_EQ(6, delete_bitmap->delete_bitmap.size());
}
EXPECT_TRUE(check_data_read_with_delete_bitmap(tablet_schema, delete_bitmap, rowset,
total_written_rows, rows_mark_deleted));
total_written_rows, rows_mark_deleted, true));
}

TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) {
Expand Down Expand Up @@ -652,16 +663,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -680,16 +691,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -708,16 +719,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -736,16 +747,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -764,16 +775,16 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) {
vectorized::Block block = tablet_schema->create_block();
auto columns = block.mutate_columns();
for (int rid = 0; rid < rows_per_segment; ++rid) {
uint32_t k1 = rid * 100 + i;
uint32_t k2 = i;
uint32_t k1 = rid * 100 + segid;
uint32_t k2 = segid;
uint32_t k3 = rid;
uint32_t seq = 0;
columns[0]->insert_data((const char*)&k1, sizeof(k1));
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (segid % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, segid, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand Down Expand Up @@ -846,8 +857,8 @@ TEST_F(SegCompactionMoWTest, SegCompactionNotTrigger) {
columns[1]->insert_data((const char*)&k2, sizeof(k2));
columns[2]->insert_data((const char*)&k3, sizeof(k3));
columns[3]->insert_data((const char*)&seq, sizeof(seq));
// mark delete every 3 rows
if (rid % 3 == 0) {
// mark delete every 3 rows, for segments that seg_id is even number
if (i % 2 == 0 && rid % 3 == 0) {
writer_context.mow_context->delete_bitmap->add(
{rowset_id, i, DeleteBitmap::TEMP_VERSION_COMMON}, rid);
rows_mark_deleted++;
Expand All @@ -860,7 +871,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionNotTrigger) {
sleep(1);
}

EXPECT_EQ(num_segments, delete_bitmap->delete_bitmap.size());
EXPECT_EQ(num_segments / 2 + num_segments % 2, delete_bitmap->delete_bitmap.size());
EXPECT_EQ(Status::OK(), rowset_writer->build(rowset));
std::vector<std::string> ls;
ls.push_back("20050_0.dat");
Expand All @@ -872,7 +883,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionNotTrigger) {
ls.push_back("20050_6.dat");
ls.push_back("20050_7.dat");
EXPECT_TRUE(check_dir(ls));
EXPECT_EQ(num_segments, delete_bitmap->delete_bitmap.size());
EXPECT_EQ(num_segments / 2 + num_segments % 2, delete_bitmap->delete_bitmap.size());

EXPECT_FALSE(static_cast<BetaRowsetWriter*>(rowset_writer.get())->is_segcompacted());
}
Expand Down
Loading