From 0470eef73af8b9e8962c9e8e07cbc43dd86a243d Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 16 Dec 2019 12:47:31 +0100 Subject: [PATCH] Fix Index Deletion during Snapshot Finalization (#50202) With #45689 making it so that index metadata is written after all shards have been snapshotted we can't delete indices that are part of the upcoming snapshot finalization any longer and it is not sufficient to check if all shards of an index have been snapshotted before deciding that it is safe to delete it. This change forbids deleting any index that is in the process of being snapshot to avoid issues during snapshot finalization. Relates #50200 (doesn't fully fix yet because we're not fixing the `partial=true` snapshot case here --- .../snapshots/SnapshotsService.java | 19 ++------ .../snapshots/SnapshotResiliencyTests.java | 45 +++++++++++++++++++ 2 files changed, 49 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index a48f893dfd408..7c638bc332edc 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -1521,21 +1521,10 @@ public static Set snapshottingIndices(final ClusterState currentState, fi final Set indices = new HashSet<>(); for (final SnapshotsInProgress.Entry entry : snapshots.entries()) { if (entry.partial() == false) { - if (entry.state() == State.INIT) { - for (IndexId index : entry.indices()) { - IndexMetaData indexMetaData = currentState.metaData().index(index.getName()); - if (indexMetaData != null && indicesToCheck.contains(indexMetaData.getIndex())) { - indices.add(indexMetaData.getIndex()); - } - } - } else { - for (ObjectObjectCursor shard : entry.shards()) { - Index index = shard.key.getIndex(); - if (indicesToCheck.contains(index) - && shard.value.state().completed() == false - && currentState.getMetaData().index(index) != null) { - indices.add(index); - } + for (IndexId index : entry.indices()) { + IndexMetaData indexMetaData = currentState.metaData().index(index.getName()); + if (indexMetaData != null && indicesToCheck.contains(indexMetaData.getIndex())) { + indices.add(indexMetaData.getIndex()); } } } diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index 28d2beb2efcc4..a0e7e51098681 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -80,6 +80,7 @@ import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.action.support.AutoCreateIndex; import org.elasticsearch.action.support.DestructiveOperations; +import org.elasticsearch.action.support.GroupedActionListener; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.TransportAction; import org.elasticsearch.action.support.WriteRequest; @@ -502,6 +503,50 @@ public void testConcurrentSnapshotCreateAndDeleteOther() { } } + public void testConcurrentSnapshotDeleteAndDeleteIndex() { + setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10)); + + String repoName = "repo"; + String snapshotName = "snapshot"; + final String index = "test"; + + TestClusterNodes.TestClusterNode masterNode = + testClusterNodes.currentMaster(testClusterNodes.nodes.values().iterator().next().clusterService.state()); + + final StepListener> createIndicesListener = new StepListener<>(); + + continueOrDie(createRepoAndIndex(repoName, index, 1), createIndexResponse -> { + // create a few more indices to make it more likely that the subsequent index delete operation happens before snapshot + // finalization + final int indices = randomIntBetween(5, 20); + final GroupedActionListener listener = new GroupedActionListener<>(createIndicesListener, indices); + for (int i = 0; i < indices; ++i) { + client().admin().indices().create(new CreateIndexRequest("index-" + i), listener); + } + }); + + final StepListener createSnapshotResponseStepListener = new StepListener<>(); + + continueOrDie(createIndicesListener, createIndexResponses -> + client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(false) + .execute(createSnapshotResponseStepListener)); + + continueOrDie(createSnapshotResponseStepListener, + createSnapshotResponse -> client().admin().indices().delete(new DeleteIndexRequest(index), noopListener())); + + deterministicTaskQueue.runAllRunnableTasks(); + + SnapshotsInProgress finalSnapshotsInProgress = masterNode.clusterService.state().custom(SnapshotsInProgress.TYPE); + assertFalse(finalSnapshotsInProgress.entries().stream().anyMatch(entry -> entry.state().completed() == false)); + final Repository repository = masterNode.repositoriesService.repository(repoName); + Collection snapshotIds = getRepositoryData(repository).getSnapshotIds(); + assertThat(snapshotIds, hasSize(1)); + + final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotIds.iterator().next()); + assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); + assertEquals(0, snapshotInfo.failedShards()); + } + /** * Simulates concurrent restarts of data and master nodes as well as relocating a primary shard, while starting and subsequently * deleting a snapshot.