From 2f10b96ca070874e9d37d628d170bc4c1186ba0e Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Fri, 7 Feb 2020 19:41:33 +0000 Subject: [PATCH 1/5] ILM make the set-single-node-allocation retryable --- .../core/ilm/SetSingleNodeAllocateStep.java | 15 +++-- .../ilm/TimeSeriesLifecycleActionsIT.java | 56 +++++++++++++++++++ 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java index 9bd3a70c5894a..6f2f04821cb28 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; import org.elasticsearch.client.Client; +import org.elasticsearch.client.transport.NoNodeAvailableException; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateObserver; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -54,6 +55,11 @@ public SetSingleNodeAllocateStep(StepKey key, StepKey nextStepKey, Client client super(key, nextStepKey, client); } + @Override + public boolean isRetryable() { + return true; + } + @Override public void performAction(IndexMetaData indexMetaData, ClusterState clusterState, ClusterStateObserver observer, Listener listener) { final RoutingNodes routingNodes = clusterState.getRoutingNodes(); @@ -65,7 +71,6 @@ public void performAction(IndexMetaData indexMetaData, ClusterState clusterState .stream() .collect(Collectors.groupingBy(ShardRouting::shardId)); - if (routingsByShardId.isEmpty() == false) { for (RoutingNode node : routingNodes) { boolean canAllocateOneCopyOfEachShard = routingsByShardId.values().stream() // For each shard @@ -79,6 +84,7 @@ public void performAction(IndexMetaData indexMetaData, ClusterState clusterState // Shuffle the list of nodes so the one we pick is random Randomness.shuffle(validNodeIds); Optional nodeId = validNodeIds.stream().findAny(); + if (nodeId.isPresent()) { Settings settings = Settings.builder() .put(IndexMetaData.INDEX_ROUTING_REQUIRE_GROUP_SETTING.getKey() + "_id", nodeId.get()).build(); @@ -88,12 +94,13 @@ public void performAction(IndexMetaData indexMetaData, ClusterState clusterState getClient().admin().indices().updateSettings(updateSettingsRequest, ActionListener.wrap(response -> listener.onResponse(true), listener::onFailure)); } else { - // No nodes currently match the allocation rules so just wait until there is one that does + // No nodes currently match the allocation rules, so report this as an error and we'll retry logger.debug("could not find any nodes to allocate index [{}] onto prior to shrink"); - listener.onResponse(false); + listener.onFailure(new NoNodeAvailableException("could not find any nodes to allocate index [{}] onto prior to shrink")); } } else { - // There are no shards for the index, the index might be gone + // There are no shards for the index, the index might be gone. Even though this is a retryable step ILM will not retry in + // this case as we're using the periodic loop to trigger the retries and that is run over *existing* indices. listener.onFailure(new IndexNotFoundException(indexMetaData.getIndex())); } } diff --git a/x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java b/x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java index 667104bc61bb4..3a9f838e11e33 100644 --- a/x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java +++ b/x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java @@ -39,6 +39,7 @@ import org.elasticsearch.xpack.core.ilm.ReadOnlyAction; import org.elasticsearch.xpack.core.ilm.RolloverAction; import org.elasticsearch.xpack.core.ilm.SetPriorityAction; +import org.elasticsearch.xpack.core.ilm.SetSingleNodeAllocateStep; import org.elasticsearch.xpack.core.ilm.ShrinkAction; import org.elasticsearch.xpack.core.ilm.ShrinkStep; import org.elasticsearch.xpack.core.ilm.Step; @@ -585,6 +586,61 @@ public void testShrinkDuringSnapshot() throws Exception { assertOK(client().performRequest(new Request("DELETE", "/_snapshot/repo/snapshot"))); } + public void testSetSingleNodeAllocationRetriesUntilItSucceeds() throws Exception { + int numShards = 2; + int expectedFinalShards = 1; + String shrunkenIndex = ShrinkAction.SHRUNKEN_INDEX_PREFIX + index; + createIndexWithSettings(index, Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)); + + ensureGreen(index); + + // unallocate all index shards + Request setAllocationToMissingAttribute = new Request("PUT", "/" + index + "/_settings"); + setAllocationToMissingAttribute.setJsonEntity("{\n" + + " \"settings\": {\n" + + " \"index.routing.allocation.include.rack\": \"bogus_rack\"" + + " }\n" + + "}"); + client().performRequest(setAllocationToMissingAttribute); + + ensureHealth(index, (request) -> { + request.addParameter("wait_for_status", "red"); + request.addParameter("timeout", "70s"); + request.addParameter("level", "shards"); + }); + + // assign the policy that'll attempt to shrink the index + createNewSingletonPolicy("warm", new ShrinkAction(expectedFinalShards)); + updatePolicy(index, policy); + + assertThat("ILM did not start retrying the set-single-node-allocation step", waitUntil(() -> { + try { + Map explainIndexResponse = explainIndex(index); + if (explainIndexResponse == null) { + return false; + } + String failedStep = (String) explainIndexResponse.get("failed_step"); + Integer retryCount = (Integer) explainIndexResponse.get(FAILED_STEP_RETRY_COUNT_FIELD); + return failedStep != null && failedStep.equals(SetSingleNodeAllocateStep.NAME) && retryCount != null && retryCount >= 1; + } catch (IOException e) { + return false; + } + }, 30, TimeUnit.SECONDS), is(true)); + + Request resetAllocationForIndex = new Request("PUT", "/" + index + "/_settings"); + resetAllocationForIndex.setJsonEntity("{\n" + + " \"settings\": {\n" + + " \"index.routing.allocation.include.rack\": null" + + " }\n" + + "}"); + client().performRequest(resetAllocationForIndex); + + assertBusy(() -> assertTrue(indexExists(shrunkenIndex)), 30, TimeUnit.SECONDS); + assertBusy(() -> assertTrue(aliasExists(shrunkenIndex, index))); + assertBusy(() -> assertThat(getStepKeyForIndex(shrunkenIndex), equalTo(PhaseCompleteStep.finalStep("warm").getKey()))); + } + public void testFreezeAction() throws Exception { createIndexWithSettings(index, Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)); From d95d8a35813968d70fbc49d37a9982ba5b3ff383 Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Fri, 7 Feb 2020 20:06:08 +0000 Subject: [PATCH 2/5] Fix SetSingleNodeAllocateStepTests --- .../core/ilm/SetSingleNodeAllocateStepTests.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java index e988b0bb6622b..26db0d94472fb 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.client.transport.NoNodeAvailableException; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; @@ -45,7 +46,9 @@ import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.notNullValue; public class SetSingleNodeAllocateStepTests extends AbstractStepTestCase { @@ -563,22 +566,23 @@ private void assertNoValidNode(IndexMetaData indexMetaData, Index index, Discove SetSingleNodeAllocateStep step = createRandomInstance(); - SetOnce actionCompleted = new SetOnce<>(); + SetOnce actionCompleted = new SetOnce<>(); step.performAction(indexMetaData, clusterState, null, new Listener() { @Override public void onResponse(boolean complete) { - actionCompleted.set(complete); + throw new AssertionError("Unexpected method call"); } @Override public void onFailure(Exception e) { - throw new AssertionError("Unexpected method call", e); + actionCompleted.set(e); } }); - assertEquals(false, actionCompleted.get()); + Exception failure = actionCompleted.get(); + assertThat(failure, instanceOf(NoNodeAvailableException.class)); Mockito.verifyZeroInteractions(client); } From bc06d9aead48ce5d97e57b79c0575c4c8502d04e Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Fri, 7 Feb 2020 20:22:02 +0000 Subject: [PATCH 3/5] Remove unused import --- .../xpack/core/ilm/SetSingleNodeAllocateStepTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java index 26db0d94472fb..bb2558d195141 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStepTests.java @@ -48,7 +48,6 @@ import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.lessThanOrEqualTo; -import static org.hamcrest.Matchers.notNullValue; public class SetSingleNodeAllocateStepTests extends AbstractStepTestCase { From 0f7a22ec827468455aa4cf8527326da59140f75f Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Fri, 7 Feb 2020 22:17:04 +0000 Subject: [PATCH 4/5] Use correct logger and include index name in msg --- .../xpack/core/ilm/SetSingleNodeAllocateStep.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java index 6f2f04821cb28..e7ad4f800dbfe 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/SetSingleNodeAllocateStep.java @@ -5,8 +5,8 @@ */ package org.elasticsearch.xpack.core.ilm; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; import org.elasticsearch.client.Client; @@ -66,8 +66,9 @@ public void performAction(IndexMetaData indexMetaData, ClusterState clusterState RoutingAllocation allocation = new RoutingAllocation(ALLOCATION_DECIDERS, routingNodes, clusterState, null, System.nanoTime()); List validNodeIds = new ArrayList<>(); + String indexName = indexMetaData.getIndex().getName(); final Map> routingsByShardId = clusterState.getRoutingTable() - .allShards(indexMetaData.getIndex().getName()) + .allShards(indexName) .stream() .collect(Collectors.groupingBy(ShardRouting::shardId)); @@ -88,15 +89,16 @@ public void performAction(IndexMetaData indexMetaData, ClusterState clusterState if (nodeId.isPresent()) { Settings settings = Settings.builder() .put(IndexMetaData.INDEX_ROUTING_REQUIRE_GROUP_SETTING.getKey() + "_id", nodeId.get()).build(); - UpdateSettingsRequest updateSettingsRequest = new UpdateSettingsRequest(indexMetaData.getIndex().getName()) + UpdateSettingsRequest updateSettingsRequest = new UpdateSettingsRequest(indexName) .masterNodeTimeout(getMasterTimeout(clusterState)) .settings(settings); getClient().admin().indices().updateSettings(updateSettingsRequest, ActionListener.wrap(response -> listener.onResponse(true), listener::onFailure)); } else { // No nodes currently match the allocation rules, so report this as an error and we'll retry - logger.debug("could not find any nodes to allocate index [{}] onto prior to shrink"); - listener.onFailure(new NoNodeAvailableException("could not find any nodes to allocate index [{}] onto prior to shrink")); + logger.debug("could not find any nodes to allocate index [{}] onto prior to shrink", indexName); + listener.onFailure(new NoNodeAvailableException("could not find any nodes to allocate index [" + indexName + "] onto" + + " prior to shrink")); } } else { // There are no shards for the index, the index might be gone. Even though this is a retryable step ILM will not retry in From 6e6915856ded5afbdd5abd736416d34eae717b8c Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Mon, 10 Feb 2020 09:54:56 +0000 Subject: [PATCH 5/5] Use assertTrue --- .../elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java b/x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java index 3a9f838e11e33..c5629b7f20196 100644 --- a/x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java +++ b/x-pack/plugin/ilm/qa/multi-node/src/test/java/org/elasticsearch/xpack/ilm/TimeSeriesLifecycleActionsIT.java @@ -614,7 +614,7 @@ public void testSetSingleNodeAllocationRetriesUntilItSucceeds() throws Exception createNewSingletonPolicy("warm", new ShrinkAction(expectedFinalShards)); updatePolicy(index, policy); - assertThat("ILM did not start retrying the set-single-node-allocation step", waitUntil(() -> { + assertTrue("ILM did not start retrying the set-single-node-allocation step", waitUntil(() -> { try { Map explainIndexResponse = explainIndex(index); if (explainIndexResponse == null) { @@ -626,7 +626,7 @@ public void testSetSingleNodeAllocationRetriesUntilItSucceeds() throws Exception } catch (IOException e) { return false; } - }, 30, TimeUnit.SECONDS), is(true)); + }, 30, TimeUnit.SECONDS)); Request resetAllocationForIndex = new Request("PUT", "/" + index + "/_settings"); resetAllocationForIndex.setJsonEntity("{\n" +