Skip to content

Commit

Permalink
[enhancement](be-ut)Add more indexed column reader be unit test (apac…
Browse files Browse the repository at this point in the history
…he#25652)

Added more unit tests
1. key exists or does not exist in a single page
2. key exists or does not exist in multiple pages
3. key is between two pages.
  • Loading branch information
Yukang-Lian authored Oct 23, 2023
1 parent 17004c4 commit 9519d7e
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 0 deletions.
3 changes: 3 additions & 0 deletions be/src/olap/primary_key_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class PrimaryKeyIndexBuilder {

uint64_t disk_size() const { return _disk_size; }

// used for be ut
uint32_t data_page_num() const { return _primary_key_index_builder->data_page_num(); }

Slice min_key() { return Slice(_min_key.data(), _min_key.size() - _seq_col_length); }
Slice max_key() { return Slice(_max_key.data(), _max_key.size() - _seq_col_length); }

Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/rowset/segment_v2/indexed_column_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <stdint.h>

#include <cstddef>
#include <cstdint>
#include <memory>

#include "common/status.h"
Expand Down Expand Up @@ -85,6 +86,8 @@ class IndexedColumnWriter {

uint64_t disk_size() const { return _disk_size; }

uint32_t data_page_num() const { return _num_data_pages + 1; }

private:
Status _finish_current_data_page(size_t& num_val);

Expand Down
153 changes: 153 additions & 0 deletions be/test/olap/primary_key_index_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,157 @@ TEST_F(PrimaryKeyIndexTest, builder) {
}
}

TEST_F(PrimaryKeyIndexTest, multiple_pages) {
std::string filename = kTestDir + "/multiple_pages";
io::FileWriterPtr file_writer;
auto fs = io::global_local_filesystem();
EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());

config::primary_key_data_page_size = 5 * 5;
PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
static_cast<void>(builder.init());
size_t num_rows = 0;
std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
"00010", "00012", "00014", "00016", "00018"};
for (const std::string& key : keys) {
static_cast<void>(builder.add_item(key));
num_rows++;
}
EXPECT_EQ("00000", builder.min_key().to_string());
EXPECT_EQ("00018", builder.max_key().to_string());
EXPECT_EQ(builder.size(), 2 * 5 * 5);
EXPECT_GT(builder.data_page_num(), 1);
segment_v2::PrimaryKeyIndexMetaPB index_meta;
EXPECT_TRUE(builder.finalize(&index_meta));
EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
EXPECT_TRUE(file_writer->close().ok());
EXPECT_EQ(num_rows, builder.num_rows());

PrimaryKeyIndexReader index_reader;
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
EXPECT_EQ(num_rows, index_reader.num_rows());

std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
bool exact_match = false;
uint32_t row_id;
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_TRUE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_to_ordinal(i);
EXPECT_TRUE(status.ok());
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}
{
auto status = index_iterator->seek_to_ordinal(10);
EXPECT_TRUE(status.ok());
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(10, row_id);
}

std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009",
"00011", "00013", "00015", "00017"};
for (size_t i = 0; i < non_exist_keys.size(); i++) {
Slice slice(non_exist_keys[i]);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_FALSE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i + 1, row_id);
}
{
string key("00019");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_FALSE(exact_match);
EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
}
}

TEST_F(PrimaryKeyIndexTest, single_page) {
std::string filename = kTestDir + "/single_page";
io::FileWriterPtr file_writer;
auto fs = io::global_local_filesystem();
EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
config::primary_key_data_page_size = 32768;

PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
static_cast<void>(builder.init());
size_t num_rows = 0;
std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
"00010", "00012", "00014", "00016", "00018"};
for (const std::string& key : keys) {
static_cast<void>(builder.add_item(key));
num_rows++;
}
EXPECT_EQ("00000", builder.min_key().to_string());
EXPECT_EQ("00018", builder.max_key().to_string());
EXPECT_EQ(builder.size(), 2 * 5 * 5);
EXPECT_EQ(builder.data_page_num(), 1);
segment_v2::PrimaryKeyIndexMetaPB index_meta;
EXPECT_TRUE(builder.finalize(&index_meta));
EXPECT_EQ(builder.disk_size(), file_writer->bytes_appended());
EXPECT_TRUE(file_writer->close().ok());
EXPECT_EQ(num_rows, builder.num_rows());

PrimaryKeyIndexReader index_reader;
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
EXPECT_EQ(num_rows, index_reader.num_rows());

std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
bool exact_match = false;
uint32_t row_id;
for (size_t i = 0; i < keys.size(); i++) {
bool exists = index_reader.check_present(keys[i]);
EXPECT_TRUE(exists);
auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_TRUE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i, row_id);
}

std::vector<std::string> non_exist_keys {"00001", "00003", "00005", "00007", "00009",
"00011", "00013", "00015", "00017"};
for (size_t i = 0; i < non_exist_keys.size(); i++) {
Slice slice(non_exist_keys[i]);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_TRUE(status.ok());
EXPECT_FALSE(exact_match);
row_id = index_iterator->get_current_ordinal();
EXPECT_EQ(i + 1, row_id);
}
{
string key("00019");
Slice slice(key);
bool exists = index_reader.check_present(slice);
EXPECT_FALSE(exists);
auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
EXPECT_FALSE(exact_match);
EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
}
}
} // namespace doris

0 comments on commit 9519d7e

Please sign in to comment.