Skip to content

Commit

Permalink
Fix updating search backpressure settings crashing the cluster
Browse files Browse the repository at this point in the history
Signed-off-by: Gao Binlong <gbinlong@amazon.com>
  • Loading branch information
gaobinlong committed Aug 29, 2024
1 parent 2b84305 commit adcf328
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix indexing error when flat_object field is explicitly null ([#15375](https://github.com/opensearch-project/OpenSearch/pull/15375))
- Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393))
- Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394))
- Fix updating search backpressure settings crashing the cluster

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,116 @@
- match: {error.root_cause.0.type: "illegal_argument_exception"}
- match: { error.root_cause.0.reason: "Invalid SearchBackpressureMode: monitor-only" }
- match: { status: 400 }


---
"Test setting search backpressure cancellation settings":
- skip:
version: "- 2.3.99"
reason: "Introduced in 2.4.0"

- do:
cluster.put_settings:
body:
transient:
search_backpressure.search_task.cancellation_burst: 11
- is_true: acknowledged

- do:
cluster.get_settings:
flat_settings: false
- match: {transient.search_backpressure.search_task.cancellation_burst: "11"}

- do:
cluster.put_settings:
body:
transient:
search_backpressure.search_task.cancellation_rate: 0.1
- is_true: acknowledged

- do:
cluster.get_settings:
flat_settings: false
- match: {transient.search_backpressure.search_task.cancellation_rate: "0.1"}

- do:
cluster.put_settings:
body:
transient:
search_backpressure.search_task.cancellation_ratio: 0.2
- is_true: acknowledged

- do:
cluster.get_settings:
flat_settings: false
- match: {transient.search_backpressure.search_task.cancellation_ratio: "0.2"}

- do:
cluster.put_settings:
body:
transient:
search_backpressure.search_shard_task.cancellation_burst: 12
- is_true: acknowledged

- do:
cluster.get_settings:
flat_settings: false
- match: {transient.search_backpressure.search_shard_task.cancellation_burst: "12"}

- do:
cluster.put_settings:
body:
transient:
search_backpressure.search_shard_task.cancellation_rate: 0.3
- is_true: acknowledged

- do:
cluster.get_settings:
flat_settings: false
- match: {transient.search_backpressure.search_shard_task.cancellation_rate: "0.3"}

- do:
cluster.put_settings:
body:
transient:
search_backpressure.search_shard_task.cancellation_ratio: 0.4
- is_true: acknowledged

- do:
cluster.get_settings:
flat_settings: false
- match: {transient.search_backpressure.search_shard_task.cancellation_ratio: "0.4"}

---
"Test setting invalid search backpressure cancellation_rate and cancellation_ratio":
- skip:
version: "- 2.99.99"
reason: "Fixed in 3.0.0"

- do:
catch: /search_backpressure.search_task.cancellation_rate must be greater than zero/
cluster.put_settings:
body:
transient:
search_backpressure.search_task.cancellation_rate: 0.0

- do:
catch: /search_backpressure.search_task.cancellation_ratio must be greater than zero/
cluster.put_settings:
body:
transient:
search_backpressure.search_task.cancellation_ratio: 0.0

- do:
catch: /search_backpressure.search_shard_task.cancellation_rate must be greater than zero/
cluster.put_settings:
body:
transient:
search_backpressure.search_shard_task.cancellation_rate: 0.0

- do:
catch: /search_backpressure.search_shard_task.cancellation_ratio must be greater than zero/
cluster.put_settings:
body:
transient:
search_backpressure.search_shard_task.cancellation_ratio: 0.0
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,16 @@ public SearchBackpressureService(
timeNanosSupplier,
getSettings().getSearchTaskSettings().getCancellationRateNanos(),
getSettings().getSearchTaskSettings().getCancellationBurst(),
getSettings().getSearchTaskSettings().getCancellationRatio()
getSettings().getSearchTaskSettings().getCancellationRatio(),
getSettings().getSearchTaskSettings().getCancellationRate()
),
SearchShardTask.class,
new SearchBackpressureState(
timeNanosSupplier,
getSettings().getSearchShardTaskSettings().getCancellationRateNanos(),
getSettings().getSearchShardTaskSettings().getCancellationBurst(),
getSettings().getSearchShardTaskSettings().getCancellationRatio()
getSettings().getSearchShardTaskSettings().getCancellationRatio(),
getSettings().getSearchShardTaskSettings().getCancellationRate()
)
);
this.settings.getSearchTaskSettings().addListener(searchBackpressureStates.get(SearchTask.class));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,15 @@ public class SearchBackpressureState implements CancellationSettingsListener {
LongSupplier timeNanosSupplier,
double cancellationRateNanos,
double cancellationBurst,
double cancellationRatio
double cancellationRatio,
double cancellationRate
) {
rateLimiter = new AtomicReference<>(new TokenBucket(timeNanosSupplier, cancellationRateNanos, cancellationBurst));
ratioLimiter = new AtomicReference<>(new TokenBucket(this::getCompletionCount, cancellationRatio, cancellationBurst));
this.timeNanosSupplier = timeNanosSupplier;
this.cancellationBurst = cancellationBurst;
this.cancellationRatio = cancellationRatio;
this.cancellationRate = cancellationRate;
}

public long getCompletionCount() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ private static class Defaults {
Defaults.CANCELLATION_RATIO,
0.0,
1.0,
value -> {
if (value <= 0.0) {
throw new IllegalArgumentException("search_backpressure.cancellation_ratio must be greater than zero");
}
},
Setting.Property.Deprecated,
Setting.Property.Dynamic,
Setting.Property.NodeScope
Expand All @@ -79,6 +84,12 @@ private static class Defaults {
"search_backpressure.cancellation_rate",
Defaults.CANCELLATION_RATE,
0.0,
Double.MAX_VALUE,
value -> {
if (value <= 0.0) {
throw new IllegalArgumentException("search_backpressure.cancellation_rate must be greater than zero");
}
},
Setting.Property.Deprecated,
Setting.Property.Dynamic,
Setting.Property.NodeScope
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ private static class Defaults {
SearchBackpressureSettings.SETTING_CANCELLATION_RATIO,
0.0,
1.0,
value -> {
if (value <= 0.0) {
throw new IllegalArgumentException("search_backpressure.search_shard_task.cancellation_ratio must be greater than zero");
}
},
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
Expand All @@ -63,6 +68,12 @@ private static class Defaults {
"search_backpressure.search_shard_task.cancellation_rate",
SearchBackpressureSettings.SETTING_CANCELLATION_RATE,
0.0,
Double.MAX_VALUE,
value -> {
if (value <= 0.0) {
throw new IllegalArgumentException("search_backpressure.search_shard_task.cancellation_rate must be greater than zero");
}
},
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ private static class Defaults {
Defaults.CANCELLATION_RATIO,
0.0,
1.0,
value -> {
if (value <= 0.0) {
throw new IllegalArgumentException("search_backpressure.search_task.cancellation_ratio must be greater than zero");
}
},
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
Expand All @@ -64,6 +69,12 @@ private static class Defaults {
"search_backpressure.search_task.cancellation_rate",
Defaults.CANCELLATION_RATE,
0.0,
Double.MAX_VALUE,
value -> {
if (value <= 0.0) {
throw new IllegalArgumentException("search_backpressure.search_task.cancellation_rate must be greater than zero");
}
},
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,32 @@ public void testSearchBackpressureSettingValidateInvalidMode() {
() -> new SearchBackpressureSettings(settings, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS))
);
}

public void testInvalidCancellationRate() {
Settings settings1 = Settings.builder().put("search_backpressure.search_task.cancellation_rate", 0.0).build();
assertThrows(
IllegalArgumentException.class,
() -> new SearchBackpressureSettings(settings1, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS))
);

Settings settings2 = Settings.builder().put("search_backpressure.search_shard_task.cancellation_rate", 0.0).build();
assertThrows(
IllegalArgumentException.class,
() -> new SearchBackpressureSettings(settings2, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS))
);
}

public void testInvalidCancellationRatio() {
Settings settings1 = Settings.builder().put("search_backpressure.search_task.cancellation_ratio", 0.0).build();
assertThrows(
IllegalArgumentException.class,
() -> new SearchBackpressureSettings(settings1, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS))
);

Settings settings2 = Settings.builder().put("search_backpressure.search_shard_task.cancellation_ratio", 0.0).build();
assertThrows(
IllegalArgumentException.class,
() -> new SearchBackpressureSettings(settings2, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS))
);
}
}

0 comments on commit adcf328

Please sign in to comment.