Skip to content

Commit

Permalink
Fix Index Deletion during Snapshot Finalization (elastic#50202)
Browse files Browse the repository at this point in the history
With elastic#45689 making it so that index metadata is written
after all shards have been snapshotted we can't delete indices
that are part of the upcoming snapshot finalization any longer
and it is not sufficient to check if all shards of an index have been
snapshotted before deciding that it is safe to delete it.
This change forbids deleting any index that is in the process of being
snapshot to avoid issues during snapshot finalization.

Relates elastic#50200 (doesn't fully fix yet because we're not fixing the `partial=true`
snapshot case here
  • Loading branch information
original-brownbear committed Dec 16, 2019
1 parent 4ced237 commit 707a3bb
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1535,21 +1535,10 @@ public static Set<Index> snapshottingIndices(final ClusterState currentState, fi
final Set<Index> indices = new HashSet<>();
for (final SnapshotsInProgress.Entry entry : snapshots.entries()) {
if (entry.partial() == false) {
if (entry.state() == State.INIT) {
for (IndexId index : entry.indices()) {
IndexMetaData indexMetaData = currentState.metaData().index(index.getName());
if (indexMetaData != null && indicesToCheck.contains(indexMetaData.getIndex())) {
indices.add(indexMetaData.getIndex());
}
}
} else {
for (ObjectObjectCursor<ShardId, SnapshotsInProgress.ShardSnapshotStatus> shard : entry.shards()) {
Index index = shard.key.getIndex();
if (indicesToCheck.contains(index)
&& shard.value.state().completed() == false
&& currentState.getMetaData().index(index) != null) {
indices.add(index);
}
for (IndexId index : entry.indices()) {
IndexMetaData indexMetaData = currentState.metaData().index(index.getName());
if (indexMetaData != null && indicesToCheck.contains(indexMetaData.getIndex())) {
indices.add(indexMetaData.getIndex());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import org.elasticsearch.action.support.ActiveShardCount;
import org.elasticsearch.action.support.AutoCreateIndex;
import org.elasticsearch.action.support.DestructiveOperations;
import org.elasticsearch.action.support.GroupedActionListener;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.action.support.TransportAction;
import org.elasticsearch.action.support.WriteRequest;
Expand Down Expand Up @@ -504,6 +505,50 @@ public void testConcurrentSnapshotCreateAndDeleteOther() {
}
}

public void testConcurrentSnapshotDeleteAndDeleteIndex() {
setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10));

String repoName = "repo";
String snapshotName = "snapshot";
final String index = "test";

TestClusterNodes.TestClusterNode masterNode =
testClusterNodes.currentMaster(testClusterNodes.nodes.values().iterator().next().clusterService.state());

final StepListener<Collection<CreateIndexResponse>> createIndicesListener = new StepListener<>();

continueOrDie(createRepoAndIndex(repoName, index, 1), createIndexResponse -> {
// create a few more indices to make it more likely that the subsequent index delete operation happens before snapshot
// finalization
final int indices = randomIntBetween(5, 20);
final GroupedActionListener<CreateIndexResponse> listener = new GroupedActionListener<>(createIndicesListener, indices);
for (int i = 0; i < indices; ++i) {
client().admin().indices().create(new CreateIndexRequest("index-" + i), listener);
}
});

final StepListener<CreateSnapshotResponse> createSnapshotResponseStepListener = new StepListener<>();

continueOrDie(createIndicesListener, createIndexResponses ->
client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(false)
.execute(createSnapshotResponseStepListener));

continueOrDie(createSnapshotResponseStepListener,
createSnapshotResponse -> client().admin().indices().delete(new DeleteIndexRequest(index), noopListener()));

deterministicTaskQueue.runAllRunnableTasks();

SnapshotsInProgress finalSnapshotsInProgress = masterNode.clusterService.state().custom(SnapshotsInProgress.TYPE);
assertFalse(finalSnapshotsInProgress.entries().stream().anyMatch(entry -> entry.state().completed() == false));
final Repository repository = masterNode.repositoriesService.repository(repoName);
Collection<SnapshotId> snapshotIds = getRepositoryData(repository).getSnapshotIds();
assertThat(snapshotIds, hasSize(1));

final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotIds.iterator().next());
assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
assertEquals(0, snapshotInfo.failedShards());
}

/**
* Simulates concurrent restarts of data and master nodes as well as relocating a primary shard, while starting and subsequently
* deleting a snapshot.
Expand Down

0 comments on commit 707a3bb

Please sign in to comment.