Skip to content

Commit

Permalink
tests: stabilize test_sharding_split_compaction (#8318)
Browse files Browse the repository at this point in the history
## Problem

This test incorrectly assumed that a post-split compaction would only
drop content. This was easily destabilized by any changes to image
generation rules.

## Summary of changes

- Before split, do a full image layer generation pass, to guarantee that
post-split compaction should only drop data, never create it.
- Fix the force_image_layer_creation mode of compaction that we use from
tests like this: previously it would try and generate image layers even
if one already existed with the same layer key, which caused compaction
to fail.
  • Loading branch information
jcsp committed Jul 10, 2024
1 parent fe13fcc commit e89ec55
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 1 deletion.
19 changes: 18 additions & 1 deletion pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ use std::{
ops::{Deref, Range},
};

use crate::pgdatadir_mapping::MAX_AUX_FILE_V2_DELTAS;
use crate::{
aux_file::AuxFileSizeEstimator,
tenant::{
layer_map::{LayerMap, SearchResult},
metadata::TimelineMetadata,
storage_layer::PersistentLayerDesc,
},
};
use crate::{
Expand All @@ -98,6 +98,7 @@ use crate::{
metrics::ScanLatencyOngoingRecording, tenant::timeline::logical_size::CurrentLogicalSize,
};
use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind};
use crate::{pgdatadir_mapping::MAX_AUX_FILE_V2_DELTAS, tenant::storage_layer::PersistentLayerKey};
use crate::{
pgdatadir_mapping::{AuxFilesDirectory, DirectoryKind},
virtual_file::{MaybeFatalIo, VirtualFile},
Expand Down Expand Up @@ -4572,6 +4573,22 @@ impl Timeline {
start = img_range.end;
continue;
}
} else if let ImageLayerCreationMode::Force = mode {
// When forced to create image layers, we might try and create them where they already
// exist. This mode is only used in tests/debug.
let layers = self.layers.read().await;
if layers.contains_key(&PersistentLayerKey {
key_range: img_range.clone(),
lsn_range: PersistentLayerDesc::image_layer_lsn_range(lsn),
is_delta: false,
}) {
tracing::info!(
"Skipping image layer at {lsn} {}..{}, already exists",
img_range.start,
img_range.end
);
continue;
}
}

let image_layer_writer = ImageLayerWriter::new(
Expand Down
8 changes: 8 additions & 0 deletions pageserver/src/tenant/timeline/layer_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,10 @@ impl LayerManager {
self.layer_fmgr.contains(layer)
}

pub(crate) fn contains_key(&self, key: &PersistentLayerKey) -> bool {
self.layer_fmgr.contains_key(key)
}

pub(crate) fn all_persistent_layers(&self) -> Vec<PersistentLayerKey> {
self.layer_fmgr.0.keys().cloned().collect_vec()
}
Expand All @@ -363,6 +367,10 @@ impl<T: AsLayerDesc + Clone> LayerFileManager<T> {
.clone()
}

fn contains_key(&self, key: &PersistentLayerKey) -> bool {
self.0.contains_key(key)
}

pub(crate) fn insert(&mut self, layer: T) {
let present = self.0.insert(layer.layer_desc().key(), layer.clone());
if present.is_some() && cfg!(debug_assertions) {
Expand Down
6 changes: 6 additions & 0 deletions test_runner/regress/test_sharding.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,12 @@ def test_sharding_split_compaction(neon_env_builder: NeonEnvBuilder, failpoint:
workload.validate()
workload.stop()

# Do a full image layer generation before splitting, so that when we compact after splitting
# we should only see sizes decrease (from post-split drops/rewrites), not increase (from image layer generation)
env.get_tenant_pageserver(tenant_id).http_client().timeline_compact(
tenant_id, timeline_id, force_image_layer_creation=True, wait_until_uploaded=True
)

# Split one shard into two
shards = env.storage_controller.tenant_shard_split(tenant_id, shard_count=2)

Expand Down

1 comment on commit e89ec55

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3129 tests run: 3002 passed, 0 failed, 127 skipped (full report)


Code coverage* (full report)

  • functions: 32.6% (6938 of 21302 functions)
  • lines: 50.0% (54559 of 109206 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
e89ec55 at 2024-07-10T14:41:40.212Z :recycle:

Please sign in to comment.