From d4b4225f63c8dc08ae2364e6841a8226c1e03bc2 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 4 Mar 2019 10:45:45 +0100 Subject: [PATCH] Snapshot Stability Fixes (#39550) * Backport of various snapshot stability fixes from `master` to `6.7` making the snapshot logic in `6.7` equivalent to that in `master` functionally * Includes #38368, #38025 and #37612 --- .../cluster/SnapshotsInProgress.java | 36 +- .../snapshots/SnapshotException.java | 4 - .../snapshots/SnapshotShardsService.java | 378 +++++++------- .../snapshots/SnapshotsService.java | 467 +++++++++--------- 4 files changed, 444 insertions(+), 441 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 7308d471afb9d..73be2ea006656 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -24,6 +24,7 @@ import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import org.elasticsearch.Version; import org.elasticsearch.cluster.ClusterState.Custom; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -93,9 +94,11 @@ public static class Entry { private final ImmutableOpenMap> waitingIndices; private final long startTime; private final long repositoryStateId; + @Nullable private final String failure; public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, State state, List indices, - long startTime, long repositoryStateId, ImmutableOpenMap shards) { + long startTime, long repositoryStateId, ImmutableOpenMap shards, + String failure) { this.state = state; this.snapshot = snapshot; this.includeGlobalState = includeGlobalState; @@ -110,15 +113,26 @@ public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, Sta this.waitingIndices = findWaitingIndices(shards); } this.repositoryStateId = repositoryStateId; + this.failure = failure; + } + + public Entry(Snapshot snapshot, boolean includeGlobalState, boolean partial, State state, List indices, + long startTime, long repositoryStateId, ImmutableOpenMap shards) { + this(snapshot, includeGlobalState, partial, state, indices, startTime, repositoryStateId, shards, null); } public Entry(Entry entry, State state, ImmutableOpenMap shards) { this(entry.snapshot, entry.includeGlobalState, entry.partial, state, entry.indices, entry.startTime, - entry.repositoryStateId, shards); + entry.repositoryStateId, shards, entry.failure); + } + + public Entry(Entry entry, State state, ImmutableOpenMap shards, String failure) { + this(entry.snapshot, entry.includeGlobalState, entry.partial, state, entry.indices, entry.startTime, + entry.repositoryStateId, shards, failure); } public Entry(Entry entry, ImmutableOpenMap shards) { - this(entry, entry.state, shards); + this(entry, entry.state, shards, entry.failure); } public Snapshot snapshot() { @@ -157,6 +171,10 @@ public long getRepositoryStateId() { return repositoryStateId; } + public String failure() { + return failure; + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -437,6 +455,12 @@ public SnapshotsInProgress(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(REPOSITORY_ID_INTRODUCED_VERSION)) { repositoryStateId = in.readLong(); } + final String failure; + if (in.getVersion().onOrAfter(Version.V_6_7_0)) { + failure = in.readOptionalString(); + } else { + failure = null; + } entries[i] = new Entry(snapshot, includeGlobalState, partial, @@ -444,7 +468,8 @@ public SnapshotsInProgress(StreamInput in) throws IOException { Collections.unmodifiableList(indexBuilder), startTime, repositoryStateId, - builder.build()); + builder.build(), + failure); } this.entries = Arrays.asList(entries); } @@ -476,6 +501,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(REPOSITORY_ID_INTRODUCED_VERSION)) { out.writeLong(entry.repositoryStateId); } + if (out.getVersion().onOrAfter(Version.V_6_7_0)) { + out.writeOptionalString(entry.failure); + } } } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotException.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotException.java index d389ed634f3af..05db85d6f7211 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotException.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotException.java @@ -51,10 +51,6 @@ public SnapshotException(final Snapshot snapshot, final String msg, final Throwa } } - public SnapshotException(final String repositoryName, final SnapshotId snapshotId, final String msg) { - this(repositoryName, snapshotId, msg, null); - } - public SnapshotException(final String repositoryName, final SnapshotId snapshotId, final String msg, final Throwable cause) { super("[" + repositoryName + ":" + snapshotId + "] " + msg, cause); this.repositoryName = repositoryName; diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java index 3f1cf1db32807..116a3f45b0087 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java @@ -69,26 +69,26 @@ import org.elasticsearch.repositories.Repository; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportChannel; +import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportRequest; +import org.elasticsearch.transport.TransportRequestDeduplicator; import org.elasticsearch.transport.TransportRequestHandler; import org.elasticsearch.transport.TransportResponse; +import org.elasticsearch.transport.TransportResponseHandler; import org.elasticsearch.transport.TransportService; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.Executor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; import java.util.function.Function; import java.util.stream.Collectors; import static java.util.Collections.emptyMap; -import static java.util.Collections.unmodifiableMap; +import static java.util.Collections.unmodifiableList; import static org.elasticsearch.cluster.SnapshotsInProgress.completed; import static org.elasticsearch.transport.EmptyTransportResponseHandler.INSTANCE_SAME; @@ -114,11 +114,11 @@ public class SnapshotShardsService extends AbstractLifecycleComponent implements private final ThreadPool threadPool; - private final Lock shutdownLock = new ReentrantLock(); + private final Map> shardSnapshots = new HashMap<>(); - private final Condition shutdownCondition = shutdownLock.newCondition(); - - private volatile Map> shardSnapshots = emptyMap(); + // A map of snapshots to the shardIds that we already reported to the master as failed + private final TransportRequestDeduplicator remoteFailedRequestDeduplicator = + new TransportRequestDeduplicator<>(); private final SnapshotStateExecutor snapshotStateExecutor = new SnapshotStateExecutor(); private final UpdateSnapshotStatusAction updateSnapshotStatusHandler; @@ -139,7 +139,7 @@ public SnapshotShardsService(Settings settings, ClusterService clusterService, S } // The constructor of UpdateSnapshotStatusAction will register itself to the TransportService. - this.updateSnapshotStatusHandler = new UpdateSnapshotStatusAction(settings, UPDATE_SNAPSHOT_STATUS_ACTION_NAME, + this.updateSnapshotStatusHandler = new UpdateSnapshotStatusAction( transportService, clusterService, threadPool, actionFilters, indexNameExpressionResolver); if (DiscoveryNode.isMasterNode(settings)) { @@ -147,7 +147,6 @@ public SnapshotShardsService(Settings settings, ClusterService clusterService, S transportService.registerRequestHandler(UPDATE_SNAPSHOT_STATUS_ACTION_NAME_V6, UpdateSnapshotStatusRequestV6::new, ThreadPool.Names.SAME, new UpdateSnapshotStateRequestHandlerV6()); } - } @Override @@ -161,16 +160,6 @@ protected void doStart() { @Override protected void doStop() { - shutdownLock.lock(); - try { - while(!shardSnapshots.isEmpty() && shutdownCondition.await(5, TimeUnit.SECONDS)) { - // Wait for at most 5 second for locally running snapshots to finish - } - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } finally { - shutdownLock.unlock(); - } } @Override @@ -185,7 +174,9 @@ public void clusterChanged(ClusterChangedEvent event) { SnapshotsInProgress currentSnapshots = event.state().custom(SnapshotsInProgress.TYPE); if ((previousSnapshots == null && currentSnapshots != null) || (previousSnapshots != null && previousSnapshots.equals(currentSnapshots) == false)) { - processIndexShardSnapshots(event); + synchronized (shardSnapshots) { + processIndexShardSnapshots(currentSnapshots, event.state().nodes().getMasterNode()); + } } String previousMasterNodeId = event.previousState().nodes().getMasterNodeId(); @@ -202,13 +193,14 @@ public void clusterChanged(ClusterChangedEvent event) { @Override public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) { // abort any snapshots occurring on the soon-to-be closed shard - Map> snapshotShardsMap = shardSnapshots; - for (Map.Entry> snapshotShards : snapshotShardsMap.entrySet()) { - Map shards = snapshotShards.getValue(); - if (shards.containsKey(shardId)) { - logger.debug("[{}] shard closing, abort snapshotting for snapshot [{}]", - shardId, snapshotShards.getKey().getSnapshotId()); - shards.get(shardId).abortIfNotCompleted("shard is closing, aborting"); + synchronized (shardSnapshots) { + for (Map.Entry> snapshotShards : shardSnapshots.entrySet()) { + Map shards = snapshotShards.getValue(); + if (shards.containsKey(shardId)) { + logger.debug("[{}] shard closing, abort snapshotting for snapshot [{}]", + shardId, snapshotShards.getKey().getSnapshotId()); + shards.get(shardId).abortIfNotCompleted("shard is closing, aborting"); + } } } } @@ -223,163 +215,146 @@ public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexSh * @return map of shard id to snapshot status */ public Map currentSnapshotShards(Snapshot snapshot) { - return shardSnapshots.get(snapshot); + synchronized (shardSnapshots) { + final Map current = shardSnapshots.get(snapshot); + return current == null ? null : new HashMap<>(current); + } } /** * Checks if any new shards should be snapshotted on this node * - * @param event cluster state changed event + * @param snapshotsInProgress Current snapshots in progress in cluster state */ - private void processIndexShardSnapshots(ClusterChangedEvent event) { - SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE); - Map> survivors = new HashMap<>(); + private void processIndexShardSnapshots(SnapshotsInProgress snapshotsInProgress, DiscoveryNode masterNode) { + cancelRemoved(snapshotsInProgress); + if (snapshotsInProgress != null) { + startNewSnapshots(snapshotsInProgress, masterNode); + } + } + + private void cancelRemoved(@Nullable SnapshotsInProgress snapshotsInProgress) { // First, remove snapshots that are no longer there - for (Map.Entry> entry : shardSnapshots.entrySet()) { + Iterator>> it = shardSnapshots.entrySet().iterator(); + while (it.hasNext()) { + final Map.Entry> entry = it.next(); final Snapshot snapshot = entry.getKey(); - if (snapshotsInProgress != null && snapshotsInProgress.snapshot(snapshot) != null) { - survivors.put(entry.getKey(), entry.getValue()); - } else { + if (snapshotsInProgress == null || snapshotsInProgress.snapshot(snapshot) == null) { // abort any running snapshots of shards for the removed entry; // this could happen if for some reason the cluster state update for aborting // running shards is missed, then the snapshot is removed is a subsequent cluster // state update, which is being processed here + it.remove(); for (IndexShardSnapshotStatus snapshotStatus : entry.getValue().values()) { snapshotStatus.abortIfNotCompleted("snapshot has been removed in cluster state, aborting"); } } } + } + private void startNewSnapshots(SnapshotsInProgress snapshotsInProgress, DiscoveryNode masterNode) { // For now we will be mostly dealing with a single snapshot at a time but might have multiple simultaneously running // snapshots in the future - Map> newSnapshots = new HashMap<>(); // Now go through all snapshots and update existing or create missing - final String localNodeId = event.state().nodes().getLocalNodeId(); - final DiscoveryNode masterNode = event.state().nodes().getMasterNode(); - final Map> snapshotIndices = new HashMap<>(); - if (snapshotsInProgress != null) { - for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { - snapshotIndices.put(entry.snapshot(), - entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity()))); - if (entry.state() == State.STARTED) { - Map startedShards = new HashMap<>(); - Map snapshotShards = shardSnapshots.get(entry.snapshot()); - for (ObjectObjectCursor shard : entry.shards()) { - // Add all new shards to start processing on - if (localNodeId.equals(shard.value.nodeId())) { - if (shard.value.state() == State.INIT && (snapshotShards == null || !snapshotShards.containsKey(shard.key))) { - logger.trace("[{}] - Adding shard to the queue", shard.key); - startedShards.put(shard.key, IndexShardSnapshotStatus.newInitializing()); - } + final String localNodeId = clusterService.localNode().getId(); + for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { + final State entryState = entry.state(); + if (entryState == State.STARTED) { + Map startedShards = null; + final Snapshot snapshot = entry.snapshot(); + Map snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap()); + for (ObjectObjectCursor shard : entry.shards()) { + // Add all new shards to start processing on + final ShardId shardId = shard.key; + final ShardSnapshotStatus shardSnapshotStatus = shard.value; + if (localNodeId.equals(shardSnapshotStatus.nodeId()) && shardSnapshotStatus.state() == State.INIT + && snapshotShards.containsKey(shardId) == false) { + logger.trace("[{}] - Adding shard to the queue", shardId); + if (startedShards == null) { + startedShards = new HashMap<>(); } + startedShards.put(shardId, IndexShardSnapshotStatus.newInitializing()); } - if (!startedShards.isEmpty()) { - newSnapshots.put(entry.snapshot(), startedShards); - if (snapshotShards != null) { - // We already saw this snapshot but we need to add more started shards - Map shards = new HashMap<>(); - // Put all shards that were already running on this node - shards.putAll(snapshotShards); - // Put all newly started shards - shards.putAll(startedShards); - survivors.put(entry.snapshot(), unmodifiableMap(shards)); - } else { - // Brand new snapshot that we haven't seen before - survivors.put(entry.snapshot(), unmodifiableMap(startedShards)); + } + if (startedShards != null && startedShards.isEmpty() == false) { + shardSnapshots.computeIfAbsent(snapshot, s -> new HashMap<>()).putAll(startedShards); + startNewShards(entry, startedShards, masterNode); + } + } else if (entryState == State.ABORTED) { + // Abort all running shards for this snapshot + final Snapshot snapshot = entry.snapshot(); + Map snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap()); + for (ObjectObjectCursor shard : entry.shards()) { + final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key); + if (snapshotStatus != null) { + final IndexShardSnapshotStatus.Copy lastSnapshotStatus = + snapshotStatus.abortIfNotCompleted("snapshot has been aborted"); + final Stage stage = lastSnapshotStatus.getStage(); + if (stage == Stage.FINALIZE) { + logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, " + + "letting it finish", snapshot, shard.key); + } else if (stage == Stage.DONE) { + logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, " + + "updating status on the master", snapshot, shard.key); + notifySuccessfulSnapshotShard(snapshot, shard.key, masterNode); + } else if (stage == Stage.FAILURE) { + logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, " + + "updating status on the master", snapshot, shard.key); + notifyFailedSnapshotShard(snapshot, shard.key, lastSnapshotStatus.getFailure(), masterNode); } - } - } else if (entry.state() == State.ABORTED) { - // Abort all running shards for this snapshot - Map snapshotShards = shardSnapshots.get(entry.snapshot()); - if (snapshotShards != null) { - final String failure = "snapshot has been aborted"; - for (ObjectObjectCursor shard : entry.shards()) { - final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key); - if (snapshotStatus != null) { - final IndexShardSnapshotStatus.Copy lastSnapshotStatus = snapshotStatus.abortIfNotCompleted(failure); - final Stage stage = lastSnapshotStatus.getStage(); - if (stage == Stage.FINALIZE) { - logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, " + - "letting it finish", entry.snapshot(), shard.key); - - } else if (stage == Stage.DONE) { - logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, " + - "updating status on the master", entry.snapshot(), shard.key); - notifySuccessfulSnapshotShard(entry.snapshot(), shard.key, localNodeId, masterNode); - - } else if (stage == Stage.FAILURE) { - logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, " + - "updating status on the master", entry.snapshot(), shard.key); - final String snapshotFailure = lastSnapshotStatus.getFailure(); - notifyFailedSnapshotShard(entry.snapshot(), shard.key, localNodeId, snapshotFailure, masterNode); - } - } + } else { + // due to CS batching we might have missed the INIT state and straight went into ABORTED + // notify master that abort has completed by moving to FAILED + if (shard.value.state() == State.ABORTED) { + notifyFailedSnapshotShard(snapshot, shard.key, shard.value.reason(), masterNode); } } } } } + } - // Update the list of snapshots that we saw and tried to started - // If startup of these shards fails later, we don't want to try starting these shards again - shutdownLock.lock(); - try { - shardSnapshots = unmodifiableMap(survivors); - if (shardSnapshots.isEmpty()) { - // Notify all waiting threads that no more snapshots - shutdownCondition.signalAll(); - } - } finally { - shutdownLock.unlock(); - } - - // We have new shards to starts - if (newSnapshots.isEmpty() == false) { - Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); - for (final Map.Entry> entry : newSnapshots.entrySet()) { - final Snapshot snapshot = entry.getKey(); - final Map indicesMap = snapshotIndices.get(snapshot); - assert indicesMap != null; - - for (final Map.Entry shardEntry : entry.getValue().entrySet()) { - final ShardId shardId = shardEntry.getKey(); - final IndexId indexId = indicesMap.get(shardId.getIndexName()); - executor.execute(new AbstractRunnable() { + private void startNewShards(SnapshotsInProgress.Entry entry, Map startedShards, + DiscoveryNode masterNode) { + final Snapshot snapshot = entry.snapshot(); + final Map indicesMap = entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + for (final Map.Entry shardEntry : startedShards.entrySet()) { + final ShardId shardId = shardEntry.getKey(); + final IndexId indexId = indicesMap.get(shardId.getIndexName()); + assert indexId != null; + executor.execute(new AbstractRunnable() { - final SetOnce failure = new SetOnce<>(); + private final SetOnce failure = new SetOnce<>(); - @Override - public void doRun() { - final IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShardOrNull(shardId.id()); - assert indexId != null; - snapshot(indexShard, snapshot, indexId, shardEntry.getValue()); - } + @Override + public void doRun() { + final IndexShard indexShard = + indicesService.indexServiceSafe(shardId.getIndex()).getShardOrNull(shardId.id()); + snapshot(indexShard, snapshot, indexId, shardEntry.getValue()); + } - @Override - public void onFailure(Exception e) { - logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to snapshot shard", - shardId, snapshot), e); - failure.set(e); - } + @Override + public void onFailure(Exception e) { + logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to snapshot shard", shardId, snapshot), e); + failure.set(e); + } - @Override - public void onRejection(Exception e) { - failure.set(e); - } + @Override + public void onRejection(Exception e) { + failure.set(e); + } - @Override - public void onAfter() { - final Exception exception = failure.get(); - if (exception != null) { - final String failure = ExceptionsHelper.detailedMessage(exception); - notifyFailedSnapshotShard(snapshot, shardId, localNodeId, failure, masterNode); - } else { - notifySuccessfulSnapshotShard(snapshot, shardId, localNodeId, masterNode); - } - } - }); + @Override + public void onAfter() { + final Exception exception = failure.get(); + if (exception != null) { + notifyFailedSnapshotShard(snapshot, shardId, ExceptionsHelper.detailedMessage(exception), masterNode); + } else { + notifySuccessfulSnapshotShard(snapshot, shardId, masterNode); + } } - } + }); } } @@ -432,8 +407,6 @@ private void syncShardStatsOnNewMaster(ClusterChangedEvent event) { if (snapshotsInProgress == null) { return; } - - final String localNodeId = event.state().nodes().getLocalNodeId(); final DiscoveryNode masterNode = event.state().nodes().getMasterNode(); for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) { if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) { @@ -442,7 +415,6 @@ private void syncShardStatsOnNewMaster(ClusterChangedEvent event) { ImmutableOpenMap masterShards = snapshot.shards(); for(Map.Entry localShard : localShards.entrySet()) { ShardId shardId = localShard.getKey(); - IndexShardSnapshotStatus localShardStatus = localShard.getValue(); ShardSnapshotStatus masterShard = masterShards.get(shardId); if (masterShard != null && masterShard.state().completed() == false) { final IndexShardSnapshotStatus.Copy indexShardSnapshotStatus = localShard.getValue().asCopy(); @@ -452,14 +424,13 @@ private void syncShardStatsOnNewMaster(ClusterChangedEvent event) { // but we think the shard is done - we need to make new master know that the shard is done logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard is done locally, " + "updating status on the master", snapshot.snapshot(), shardId); - notifySuccessfulSnapshotShard(snapshot.snapshot(), shardId, localNodeId, masterNode); + notifySuccessfulSnapshotShard(snapshot.snapshot(), shardId, masterNode); } else if (stage == Stage.FAILURE) { // but we think the shard failed - we need to make new master know that the shard failed logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard failed locally, " + "updating status on master", snapshot.snapshot(), shardId); - final String failure = indexShardSnapshotStatus.getFailure(); - notifyFailedSnapshotShard(snapshot.snapshot(), shardId, localNodeId, failure, masterNode); + notifyFailedSnapshotShard(snapshot.snapshot(), shardId, indexShardSnapshotStatus.getFailure(), masterNode); } } } @@ -528,34 +499,64 @@ public String toString() { } /** Notify the master node that the given shard has been successfully snapshotted **/ - void notifySuccessfulSnapshotShard(final Snapshot snapshot, - final ShardId shardId, - final String localNodeId, - final DiscoveryNode masterNode) { - sendSnapshotShardUpdate(snapshot, shardId, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode); + private void notifySuccessfulSnapshotShard(final Snapshot snapshot, final ShardId shardId, DiscoveryNode masterNode) { + sendSnapshotShardUpdate( + snapshot, shardId, new ShardSnapshotStatus(clusterService.localNode().getId(), State.SUCCESS), masterNode); } /** Notify the master node that the given shard failed to be snapshotted **/ - void notifyFailedSnapshotShard(final Snapshot snapshot, - final ShardId shardId, - final String localNodeId, - final String failure, - final DiscoveryNode masterNode) { - sendSnapshotShardUpdate(snapshot, shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, failure), masterNode); + private void notifyFailedSnapshotShard(Snapshot snapshot, ShardId shardId, String failure, DiscoveryNode masterNode) { + sendSnapshotShardUpdate( + snapshot, shardId, new ShardSnapshotStatus(clusterService.localNode().getId(), State.FAILED, failure), masterNode); } /** Updates the shard snapshot status by sending a {@link UpdateIndexShardSnapshotStatusRequest} to the master node */ - void sendSnapshotShardUpdate(final Snapshot snapshot, - final ShardId shardId, - final ShardSnapshotStatus status, - final DiscoveryNode masterNode) { + void sendSnapshotShardUpdate(Snapshot snapshot, ShardId shardId, ShardSnapshotStatus status, DiscoveryNode masterNode) { try { if (masterNode.getVersion().onOrAfter(Version.V_6_1_0)) { UpdateIndexShardSnapshotStatusRequest request = new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status); transportService.sendRequest(transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, request, INSTANCE_SAME); } else { - UpdateSnapshotStatusRequestV6 requestV6 = new UpdateSnapshotStatusRequestV6(snapshot, shardId, status); - transportService.sendRequest(masterNode, UPDATE_SNAPSHOT_STATUS_ACTION_NAME_V6, requestV6, INSTANCE_SAME); + remoteFailedRequestDeduplicator.executeOnce( + new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status), + new ActionListener() { + @Override + public void onResponse(Void aVoid) { + logger.trace("[{}] [{}] updated snapshot state", snapshot, status); + } + + @Override + public void onFailure(Exception e) { + logger.warn( + () -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e); + } + }, + (req, reqListener) -> transportService.sendRequest( + transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, req, + new TransportResponseHandler() { + @Override + public UpdateIndexShardSnapshotStatusResponse read(StreamInput in) throws IOException { + final UpdateIndexShardSnapshotStatusResponse response = new UpdateIndexShardSnapshotStatusResponse(); + response.readFrom(in); + return response; + } + + @Override + public void handleResponse(UpdateIndexShardSnapshotStatusResponse response) { + reqListener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + reqListener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + }) + ); } } catch (Exception e) { logger.warn(() -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e); @@ -588,11 +589,11 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS }); } - class SnapshotStateExecutor implements ClusterStateTaskExecutor { + private class SnapshotStateExecutor implements ClusterStateTaskExecutor { @Override public ClusterTasksResult - execute(ClusterState currentState, List tasks) throws Exception { + execute(ClusterState currentState, List tasks) { final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE); if (snapshots != null) { int changedCount = 0; @@ -622,8 +623,6 @@ class SnapshotStateExecutor implements ClusterStateTaskExecutor 0) { logger.trace("changed cluster state triggered by {} snapshot state updates", changedCount); - - final SnapshotsInProgress updatedSnapshots = - new SnapshotsInProgress(entries.toArray(new SnapshotsInProgress.Entry[entries.size()])); - return ClusterTasksResult.builder().successes(tasks).build( - ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, updatedSnapshots).build()); + return ClusterTasksResult.builder().successes(tasks) + .build(ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, + new SnapshotsInProgress(unmodifiableList(entries))).build()); } } return ClusterTasksResult.builder().successes(tasks).build(currentState); @@ -646,13 +643,14 @@ static class UpdateIndexShardSnapshotStatusResponse extends ActionResponse { } - class UpdateSnapshotStatusAction extends - TransportMasterNodeAction { - UpdateSnapshotStatusAction(Settings settings, String actionName, TransportService transportService, ClusterService clusterService, - ThreadPool threadPool, ActionFilters actionFilters, - IndexNameExpressionResolver indexNameExpressionResolver) { - super(settings, actionName, transportService, clusterService, threadPool, - actionFilters, indexNameExpressionResolver, UpdateIndexShardSnapshotStatusRequest::new); + private class UpdateSnapshotStatusAction + extends TransportMasterNodeAction { + UpdateSnapshotStatusAction(TransportService transportService, ClusterService clusterService, + ThreadPool threadPool, ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver) { + super( + settings, SnapshotShardsService.UPDATE_SNAPSHOT_STATUS_ACTION_NAME, transportService, clusterService, threadPool, + actionFilters, indexNameExpressionResolver, UpdateIndexShardSnapshotStatusRequest::new + ); } @Override @@ -667,7 +665,7 @@ protected UpdateIndexShardSnapshotStatusResponse newResponse() { @Override protected void masterOperation(UpdateIndexShardSnapshotStatusRequest request, ClusterState state, - ActionListener listener) throws Exception { + ActionListener listener) { innerUpdateSnapshotState(request, listener); } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index c7bf91b476c5b..998ab2a38639b 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -83,7 +83,9 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.stream.Collectors; +import java.util.stream.StreamSupport; +import static java.util.Collections.unmodifiableList; import static java.util.Collections.unmodifiableMap; import static org.elasticsearch.cluster.SnapshotsInProgress.completed; @@ -98,9 +100,9 @@ * the {@link #beginSnapshot(ClusterState, SnapshotsInProgress.Entry, boolean, ActionListener)} method kicks in and initializes * the snapshot in the repository and then populates list of shards that needs to be snapshotted in cluster state *
  • Each data node is watching for these shards and when new shards scheduled for snapshotting appear in the cluster state, data nodes - * start processing them through {@link SnapshotShardsService#processIndexShardSnapshots(ClusterChangedEvent)} method
  • + * start processing them through {@link SnapshotShardsService#processIndexShardSnapshots} method *
  • Once shard snapshot is created data node updates state of the shard in the cluster state using - * the {@link SnapshotShardsService#sendSnapshotShardUpdate(Snapshot, ShardId, ShardSnapshotStatus, DiscoveryNode)} method
  • + * the {@link SnapshotShardsService#sendSnapshotShardUpdate} method *
  • When last shard is completed master node in {@link SnapshotShardsService#innerUpdateSnapshotState} method marks the snapshot * as completed
  • *
  • After cluster state is updated, the {@link #endSnapshot(SnapshotsInProgress.Entry)} finalizes snapshot in the repository, @@ -121,6 +123,12 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus private final Map>> snapshotCompletionListeners = new ConcurrentHashMap<>(); + // Set of snapshots that are currently being initialized by this node + private final Set initializingSnapshots = Collections.synchronizedSet(new HashSet<>()); + + // Set of snapshots that are currently being ended by this node + private final Set endingSnapshots = Collections.synchronizedSet(new HashSet<>()); + @Inject public SnapshotsService(Settings settings, ClusterService clusterService, IndexNameExpressionResolver indexNameExpressionResolver, RepositoriesService repositoriesService, ThreadPool threadPool) { @@ -207,7 +215,7 @@ public List snapshots(final String repositoryName, } final ArrayList snapshotList = new ArrayList<>(snapshotSet); CollectionUtil.timSort(snapshotList); - return Collections.unmodifiableList(snapshotList); + return unmodifiableList(snapshotList); } /** @@ -223,7 +231,7 @@ public List currentSnapshots(final String repositoryName) { snapshotList.add(inProgressSnapshot(entry)); } CollectionUtil.timSort(snapshotList); - return Collections.unmodifiableList(snapshotList); + return unmodifiableList(snapshotList); } /** @@ -269,7 +277,7 @@ public ClusterState execute(ClusterState currentState) { if (snapshots == null || snapshots.entries().isEmpty()) { // Store newSnapshot here to be processed in clusterStateProcessed List indices = Arrays.asList(indexNameExpressionResolver.concreteIndexNames(currentState, - request.indicesOptions(), request.indices())); + request.indicesOptions(), request.indices())); logger.trace("[{}][{}] creating snapshot for indices [{}]", repositoryName, snapshotName, indices); List snapshotIndices = repositoryData.resolveNewIndices(indices); newSnapshot = new SnapshotsInProgress.Entry(new Snapshot(repositoryName, snapshotId), @@ -280,6 +288,7 @@ public ClusterState execute(ClusterState currentState) { System.currentTimeMillis(), repositoryData.getGenId(), null); + initializingSnapshots.add(newSnapshot.snapshot()); snapshots = new SnapshotsInProgress(newSnapshot); } else { throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, " a snapshot is already running"); @@ -290,6 +299,9 @@ public ClusterState execute(ClusterState currentState) { @Override public void onFailure(String source, Exception e) { logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to create snapshot", repositoryName, snapshotName), e); + if (newSnapshot != null) { + initializingSnapshots.remove(newSnapshot.snapshot()); + } newSnapshot = null; listener.onFailure(e); } @@ -297,7 +309,21 @@ public void onFailure(String source, Exception e) { @Override public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { if (newSnapshot != null) { - beginSnapshot(newState, newSnapshot, request.partial(), listener); + final Snapshot current = newSnapshot.snapshot(); + assert initializingSnapshots.contains(current); + beginSnapshot(newState, newSnapshot, request.partial(), new ActionListener() { + @Override + public void onResponse(final Snapshot snapshot) { + initializingSnapshots.remove(snapshot); + listener.onResponse(snapshot); + } + + @Override + public void onFailure(final Exception e) { + initializingSnapshots.remove(current); + listener.onFailure(e); + } + }); } } @@ -305,7 +331,6 @@ public void clusterStateProcessed(String source, ClusterState oldState, final Cl public TimeValue timeout() { return request.masterNodeTimeout(); } - }); } @@ -368,8 +393,11 @@ private void beginSnapshot(final ClusterState clusterState, boolean snapshotCreated; + boolean hadAbortedInitializations; + @Override protected void doRun() { + assert initializingSnapshots.contains(snapshot.snapshot()); Repository repository = repositoriesService.repository(snapshot.snapshot().getRepository()); MetaData metaData = clusterState.metaData(); @@ -394,9 +422,6 @@ protected void doRun() { } clusterService.submitStateUpdateTask("update_snapshot [" + snapshot.snapshot() + "]", new ClusterStateUpdateTask() { - SnapshotsInProgress.Entry endSnapshot; - String failure; - @Override public ClusterState execute(ClusterState currentState) { SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE); @@ -407,9 +432,13 @@ public ClusterState execute(ClusterState currentState) { continue; } - if (entry.state() != State.ABORTED) { - // Replace the snapshot that was just intialized - ImmutableOpenMap shards = + if (entry.state() == State.ABORTED) { + entries.add(entry); + assert entry.shards().isEmpty(); + hadAbortedInitializations = true; + } else { + // Replace the snapshot that was just initialized + ImmutableOpenMap shards = shards(currentState, entry.indices()); if (!partial) { Tuple, Set> indicesWithMissingShards = indicesWithMissingShards(shards, @@ -417,9 +446,6 @@ public ClusterState execute(ClusterState currentState) { Set missing = indicesWithMissingShards.v1(); Set closed = indicesWithMissingShards.v2(); if (missing.isEmpty() == false || closed.isEmpty() == false) { - endSnapshot = new SnapshotsInProgress.Entry(entry, State.FAILED, shards); - entries.add(endSnapshot); - final StringBuilder failureMessage = new StringBuilder(); if (missing.isEmpty() == false) { failureMessage.append("Indices don't have primary shards "); @@ -432,24 +458,15 @@ public ClusterState execute(ClusterState currentState) { failureMessage.append("Indices are closed "); failureMessage.append(closed); } - failure = failureMessage.toString(); + entries.add(new SnapshotsInProgress.Entry(entry, State.FAILED, shards, failureMessage.toString())); continue; } } - SnapshotsInProgress.Entry updatedSnapshot = new SnapshotsInProgress.Entry(entry, State.STARTED, shards); - entries.add(updatedSnapshot); - if (completed(shards.values())) { - endSnapshot = updatedSnapshot; - } - } else { - assert entry.state() == State.ABORTED : "expecting snapshot to be aborted during initialization"; - failure = "snapshot was aborted during initialization"; - endSnapshot = entry; - entries.add(endSnapshot); + entries.add(new SnapshotsInProgress.Entry(entry, State.STARTED, shards)); } } return ClusterState.builder(currentState) - .putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(Collections.unmodifiableList(entries))) + .putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(unmodifiableList(entries))) .build(); } @@ -478,12 +495,12 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS // should still exist when listener is registered. userCreateSnapshotListener.onResponse(snapshot.snapshot()); - // Now that snapshot completion listener is registered we can end the snapshot if needed - // We should end snapshot only if 1) we didn't accept it for processing (which happens when there - // is nothing to do) and 2) there was a snapshot in metadata that we should end. Otherwise we should - // go ahead and continue working on this snapshot rather then end here. - if (endSnapshot != null) { - endSnapshot(endSnapshot, failure); + if (hadAbortedInitializations) { + final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE); + assert snapshotsInProgress != null; + final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot()); + assert entry != null; + endSnapshot(entry); } } }); @@ -525,7 +542,7 @@ public void onFailure(Exception e) { cleanupAfterError(e); } - public void onNoLongerMaster(String source) { + public void onNoLongerMaster() { userCreateSnapshotListener.onFailure(e); } @@ -552,7 +569,7 @@ private void cleanupAfterError(Exception exception) { } - private SnapshotInfo inProgressSnapshot(SnapshotsInProgress.Entry entry) { + private static SnapshotInfo inProgressSnapshot(SnapshotsInProgress.Entry entry) { return new SnapshotInfo(entry.snapshot().getSnapshotId(), entry.indices().stream().map(IndexId::getName).collect(Collectors.toList()), entry.startTime(), entry.includeGlobalState()); @@ -610,7 +627,7 @@ public List currentSnapshots(final String repository, builder.add(entry); } } - return Collections.unmodifiableList(builder); + return unmodifiableList(builder); } /** @@ -666,7 +683,7 @@ public Map snapshotShards(final String reposi return unmodifiableMap(shardStatus); } - private SnapshotShardFailure findShardFailure(List shardFailures, ShardId shardId) { + private static SnapshotShardFailure findShardFailure(List shardFailures, ShardId shardId) { for (SnapshotShardFailure shardFailure : shardFailures) { if (shardId.getIndexName().equals(shardFailure.index()) && shardId.getId() == shardFailure.shardId()) { return shardFailure; @@ -680,14 +697,28 @@ public void applyClusterState(ClusterChangedEvent event) { try { if (event.localNodeMaster()) { // We don't remove old master when master flips anymore. So, we need to check for change in master - if (event.nodesRemoved() || event.previousState().nodes().isLocalNodeElectedMaster() == false) { - processSnapshotsOnRemovedNodes(event); + final SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE); + final boolean newMaster = event.previousState().nodes().isLocalNodeElectedMaster() == false; + if (snapshotsInProgress != null) { + if (newMaster || removedNodesCleanupNeeded(snapshotsInProgress, event.nodesDelta().removedNodes())) { + processSnapshotsOnRemovedNodes(); + } + if (event.routingTableChanged() && waitingShardsStartedOrUnassigned(snapshotsInProgress, event)) { + processStartedShards(); + } + // Cleanup all snapshots that have no more work left: + // 1. Completed snapshots + // 2. Snapshots in state INIT that the previous master failed to start + // 3. Snapshots in any other state that have all their shard tasks completed + snapshotsInProgress.entries().stream().filter( + entry -> entry.state().completed() + || initializingSnapshots.contains(entry.snapshot()) == false + && (entry.state() == State.INIT || completed(entry.shards().values())) + ).forEach(this::endSnapshot); } - if (event.routingTableChanged()) { - processStartedShards(event); + if (newMaster) { + finalizeSnapshotDeletionFromPreviousMaster(event); } - removeFinishedSnapshotFromClusterState(event); - finalizeSnapshotDeletionFromPreviousMaster(event); } } catch (Exception e) { logger.warn("Failed to update snapshot state ", e); @@ -706,166 +737,134 @@ public void applyClusterState(ClusterChangedEvent event) { * snapshot was deleted and a call to GET snapshots would reveal that the snapshot no longer exists. */ private void finalizeSnapshotDeletionFromPreviousMaster(ClusterChangedEvent event) { - if (event.localNodeMaster() && event.previousState().nodes().isLocalNodeElectedMaster() == false) { - SnapshotDeletionsInProgress deletionsInProgress = event.state().custom(SnapshotDeletionsInProgress.TYPE); - if (deletionsInProgress != null && deletionsInProgress.hasDeletionsInProgress()) { - assert deletionsInProgress.getEntries().size() == 1 : "only one in-progress deletion allowed per cluster"; - SnapshotDeletionsInProgress.Entry entry = deletionsInProgress.getEntries().get(0); - deleteSnapshotFromRepository(entry.getSnapshot(), null, entry.getRepositoryStateId()); - } + SnapshotDeletionsInProgress deletionsInProgress = event.state().custom(SnapshotDeletionsInProgress.TYPE); + if (deletionsInProgress != null && deletionsInProgress.hasDeletionsInProgress()) { + assert deletionsInProgress.getEntries().size() == 1 : "only one in-progress deletion allowed per cluster"; + SnapshotDeletionsInProgress.Entry entry = deletionsInProgress.getEntries().get(0); + deleteSnapshotFromRepository(entry.getSnapshot(), null, entry.getRepositoryStateId()); } } /** - * Removes a finished snapshot from the cluster state. This can happen if the previous - * master node processed a cluster state update that marked the snapshot as finished, - * but the previous master node died before removing the snapshot in progress from the - * cluster state. It is then the responsibility of the new master node to end the - * snapshot and remove it from the cluster state. + * Cleans up shard snapshots that were running on removed nodes */ - private void removeFinishedSnapshotFromClusterState(ClusterChangedEvent event) { - if (event.localNodeMaster() && !event.previousState().nodes().isLocalNodeElectedMaster()) { - SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE); - if (snapshotsInProgress != null && !snapshotsInProgress.entries().isEmpty()) { - for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { - if (entry.state().completed()) { - endSnapshot(entry); + private void processSnapshotsOnRemovedNodes() { + clusterService.submitStateUpdateTask("update snapshot state after node removal", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + DiscoveryNodes nodes = currentState.nodes(); + SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE); + if (snapshots == null) { + return currentState; + } + boolean changed = false; + ArrayList entries = new ArrayList<>(); + for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) { + SnapshotsInProgress.Entry updatedSnapshot = snapshot; + if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) { + ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); + boolean snapshotChanged = false; + for (ObjectObjectCursor shardEntry : snapshot.shards()) { + ShardSnapshotStatus shardStatus = shardEntry.value; + if (!shardStatus.state().completed() && shardStatus.nodeId() != null) { + if (nodes.nodeExists(shardStatus.nodeId())) { + shards.put(shardEntry.key, shardEntry.value); + } else { + // TODO: Restart snapshot on another node? + snapshotChanged = true; + logger.warn("failing snapshot of shard [{}] on closed node [{}]", + shardEntry.key, shardStatus.nodeId()); + shards.put(shardEntry.key, + new ShardSnapshotStatus(shardStatus.nodeId(), State.FAILED, "node shutdown")); + } + } + } + if (snapshotChanged) { + changed = true; + ImmutableOpenMap shardsMap = shards.build(); + if (!snapshot.state().completed() && completed(shardsMap.values())) { + updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.SUCCESS, shardsMap); + } else { + updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, snapshot.state(), shardsMap); + } + } + entries.add(updatedSnapshot); + } else if (snapshot.state() == State.INIT && initializingSnapshots.contains(snapshot.snapshot()) == false) { + changed = true; + // Mark the snapshot as aborted as it failed to start from the previous master + updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.ABORTED, snapshot.shards()); + entries.add(updatedSnapshot); + + // Clean up the snapshot that failed to start from the old master + deleteSnapshot(snapshot.snapshot(), new ActionListener() { + @Override + public void onResponse(Void aVoid) { + logger.debug("cleaned up abandoned snapshot {} in INIT state", snapshot.snapshot()); + } + + @Override + public void onFailure(Exception e) { + logger.warn("failed to clean up abandoned snapshot {} in INIT state", snapshot.snapshot()); + } + }, updatedSnapshot.getRepositoryStateId(), false); } } + if (changed) { + return ClusterState.builder(currentState) + .putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(unmodifiableList(entries))).build(); + } + return currentState; } - } + + @Override + public void onFailure(String source, Exception e) { + logger.warn("failed to update snapshot state after node removal"); + } + }); } - /** - * Cleans up shard snapshots that were running on removed nodes - * - * @param event cluster changed event - */ - private void processSnapshotsOnRemovedNodes(ClusterChangedEvent event) { - if (removedNodesCleanupNeeded(event)) { - // Check if we just became the master - final boolean newMaster = !event.previousState().nodes().isLocalNodeElectedMaster(); - clusterService.submitStateUpdateTask("update snapshot state after node removal", new ClusterStateUpdateTask() { - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - DiscoveryNodes nodes = currentState.nodes(); - SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE); - if (snapshots == null) { - return currentState; - } + private void processStartedShards() { + clusterService.submitStateUpdateTask("update snapshot state after shards started", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + RoutingTable routingTable = currentState.routingTable(); + SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE); + if (snapshots != null) { boolean changed = false; ArrayList entries = new ArrayList<>(); for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) { SnapshotsInProgress.Entry updatedSnapshot = snapshot; - boolean snapshotChanged = false; - if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) { - ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); - for (ObjectObjectCursor shardEntry : snapshot.shards()) { - ShardSnapshotStatus shardStatus = shardEntry.value; - if (!shardStatus.state().completed() && shardStatus.nodeId() != null) { - if (nodes.nodeExists(shardStatus.nodeId())) { - shards.put(shardEntry.key, shardEntry.value); - } else { - // TODO: Restart snapshot on another node? - snapshotChanged = true; - logger.warn("failing snapshot of shard [{}] on closed node [{}]", - shardEntry.key, shardStatus.nodeId()); - shards.put(shardEntry.key, new ShardSnapshotStatus(shardStatus.nodeId(), - State.FAILED, "node shutdown")); - } - } - } - if (snapshotChanged) { + if (snapshot.state() == State.STARTED) { + ImmutableOpenMap shards = processWaitingShards(snapshot.shards(), + routingTable); + if (shards != null) { changed = true; - ImmutableOpenMap shardsMap = shards.build(); - if (!snapshot.state().completed() && completed(shardsMap.values())) { - updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.SUCCESS, shardsMap); - endSnapshot(updatedSnapshot); + if (!snapshot.state().completed() && completed(shards.values())) { + updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.SUCCESS, shards); } else { - updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, snapshot.state(), shardsMap); + updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, shards); } } entries.add(updatedSnapshot); - } else if (snapshot.state() == State.INIT && newMaster) { - changed = true; - // Mark the snapshot as aborted as it failed to start from the previous master - updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.ABORTED, snapshot.shards()); - entries.add(updatedSnapshot); - - // Clean up the snapshot that failed to start from the old master - deleteSnapshot(snapshot.snapshot(), new ActionListener() { - @Override - public void onResponse(Void aVoid) { - logger.debug("cleaned up abandoned snapshot {} in INIT state", snapshot.snapshot()); - } - - @Override - public void onFailure(Exception e) { - logger.warn("failed to clean up abandoned snapshot {} in INIT state", snapshot.snapshot()); - } - }, updatedSnapshot.getRepositoryStateId(), false); } } if (changed) { - snapshots = new SnapshotsInProgress(entries.toArray(new SnapshotsInProgress.Entry[entries.size()])); - return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, snapshots).build(); - } - return currentState; - } - - @Override - public void onFailure(String source, Exception e) { - logger.warn("failed to update snapshot state after node removal"); - } - }); - } - } - - private void processStartedShards(ClusterChangedEvent event) { - if (waitingShardsStartedOrUnassigned(event)) { - clusterService.submitStateUpdateTask("update snapshot state after shards started", new ClusterStateUpdateTask() { - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - RoutingTable routingTable = currentState.routingTable(); - SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE); - if (snapshots != null) { - boolean changed = false; - ArrayList entries = new ArrayList<>(); - for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) { - SnapshotsInProgress.Entry updatedSnapshot = snapshot; - if (snapshot.state() == State.STARTED) { - ImmutableOpenMap shards = processWaitingShards(snapshot.shards(), - routingTable); - if (shards != null) { - changed = true; - if (!snapshot.state().completed() && completed(shards.values())) { - updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.SUCCESS, shards); - endSnapshot(updatedSnapshot); - } else { - updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, shards); - } - } - entries.add(updatedSnapshot); - } - } - if (changed) { - snapshots = new SnapshotsInProgress(entries.toArray(new SnapshotsInProgress.Entry[entries.size()])); - return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, snapshots).build(); - } + return ClusterState.builder(currentState) + .putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(unmodifiableList(entries))).build(); } - return currentState; } + return currentState; + } - @Override - public void onFailure(String source, Exception e) { - logger.warn(() -> - new ParameterizedMessage("failed to update snapshot state after shards started from [{}] ", source), e); - } - }); - } + @Override + public void onFailure(String source, Exception e) { + logger.warn(() -> + new ParameterizedMessage("failed to update snapshot state after shards started from [{}] ", source), e); + } + }); } - private ImmutableOpenMap processWaitingShards( + private static ImmutableOpenMap processWaitingShards( ImmutableOpenMap snapshotShards, RoutingTable routingTable) { boolean snapshotChanged = false; ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder(); @@ -905,19 +904,16 @@ private ImmutableOpenMap processWaitingShards( } } - private boolean waitingShardsStartedOrUnassigned(ClusterChangedEvent event) { - SnapshotsInProgress curr = event.state().custom(SnapshotsInProgress.TYPE); - if (curr != null) { - for (SnapshotsInProgress.Entry entry : curr.entries()) { - if (entry.state() == State.STARTED && !entry.waitingIndices().isEmpty()) { - for (ObjectCursor index : entry.waitingIndices().keys()) { - if (event.indexRoutingTableChanged(index.value)) { - IndexRoutingTable indexShardRoutingTable = event.state().getRoutingTable().index(index.value); - for (ShardId shardId : entry.waitingIndices().get(index.value)) { - ShardRouting shardRouting = indexShardRoutingTable.shard(shardId.id()).primaryShard(); - if (shardRouting != null && (shardRouting.started() || shardRouting.unassigned())) { - return true; - } + private static boolean waitingShardsStartedOrUnassigned(SnapshotsInProgress snapshotsInProgress, ClusterChangedEvent event) { + for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) { + if (entry.state() == State.STARTED) { + for (ObjectCursor index : entry.waitingIndices().keys()) { + if (event.indexRoutingTableChanged(index.value)) { + IndexRoutingTable indexShardRoutingTable = event.state().getRoutingTable().index(index.value); + for (ShardId shardId : entry.waitingIndices().get(index.value)) { + ShardRouting shardRouting = indexShardRoutingTable.shard(shardId.id()).primaryShard(); + if (shardRouting != null && (shardRouting.started() || shardRouting.unassigned())) { + return true; } } } @@ -927,28 +923,12 @@ private boolean waitingShardsStartedOrUnassigned(ClusterChangedEvent event) { return false; } - private boolean removedNodesCleanupNeeded(ClusterChangedEvent event) { - SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE); - if (snapshotsInProgress == null) { - return false; - } - // Check if we just became the master - boolean newMaster = !event.previousState().nodes().isLocalNodeElectedMaster(); - for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) { - if (newMaster && (snapshot.state() == State.SUCCESS || snapshot.state() == State.INIT)) { - // We just replaced old master and snapshots in intermediate states needs to be cleaned - return true; - } - for (DiscoveryNode node : event.nodesDelta().removedNodes()) { - for (ObjectCursor shardStatus : snapshot.shards().values()) { - if (!shardStatus.value.state().completed() && node.getId().equals(shardStatus.value.nodeId())) { - // At least one shard was running on the removed node - we need to fail it - return true; - } - } - } - } - return false; + private static boolean removedNodesCleanupNeeded(SnapshotsInProgress snapshotsInProgress, List removedNodes) { + // If at least one shard was running on a removed node - we need to fail it + return removedNodes.isEmpty() == false && snapshotsInProgress.entries().stream().flatMap(snapshot -> + StreamSupport.stream(((Iterable) () -> snapshot.shards().valuesIt()).spliterator(), false) + .filter(s -> s.state().completed() == false).map(ShardSnapshotStatus::nodeId)) + .anyMatch(removedNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet())::contains); } /** @@ -981,25 +961,16 @@ private Tuple, Set> indicesWithMissingShards( * * @param entry snapshot */ - void endSnapshot(final SnapshotsInProgress.Entry entry) { - endSnapshot(entry, null); - } - - - /** - * Finalizes the shard in repository and then removes it from cluster state - *

    - * This is non-blocking method that runs on a thread from SNAPSHOT thread pool - * - * @param entry snapshot - * @param failure failure reason or null if snapshot was successful - */ - private void endSnapshot(final SnapshotsInProgress.Entry entry, final String failure) { + private void endSnapshot(final SnapshotsInProgress.Entry entry) { + if (endingSnapshots.add(entry.snapshot()) == false) { + return; + } threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(new AbstractRunnable() { @Override protected void doRun() { final Snapshot snapshot = entry.snapshot(); final Repository repository = repositoriesService.repository(snapshot.getRepository()); + final String failure = entry.failure(); logger.trace("[{}] finalizing snapshot in repository, state: [{}], failure[{}]", snapshot, entry.state(), failure); ArrayList shardFailures = new ArrayList<>(); for (ObjectObjectCursor shardStatus : entry.shards()) { @@ -1015,7 +986,7 @@ protected void doRun() { entry.startTime(), failure, entry.shards().size(), - Collections.unmodifiableList(shardFailures), + unmodifiableList(shardFailures), entry.getRepositoryStateId(), entry.includeGlobalState()); removeSnapshotFromClusterState(snapshot, snapshotInfo, null); @@ -1033,7 +1004,7 @@ public void onFailure(final Exception e) { /** * Removes record of running snapshot from cluster state - * @param snapshot snapshot + * @param snapshot snapshot * @param snapshotInfo snapshot info if snapshot was successful * @param e exception if snapshot failed */ @@ -1043,11 +1014,11 @@ private void removeSnapshotFromClusterState(final Snapshot snapshot, final Snaps /** * Removes record of running snapshot from cluster state and notifies the listener when this action is complete - * @param snapshot snapshot + * @param snapshot snapshot * @param failure exception if snapshot failed * @param listener listener to notify when snapshot information is removed from the cluster state */ - private void removeSnapshotFromClusterState(final Snapshot snapshot, final SnapshotInfo snapshotInfo, final Exception failure, + private void removeSnapshotFromClusterState(final Snapshot snapshot, @Nullable SnapshotInfo snapshotInfo, final Exception failure, @Nullable CleanupAfterErrorListener listener) { clusterService.submitStateUpdateTask("remove snapshot metadata", new ClusterStateUpdateTask() { @@ -1065,8 +1036,8 @@ public ClusterState execute(ClusterState currentState) { } } if (changed) { - snapshots = new SnapshotsInProgress(entries.toArray(new SnapshotsInProgress.Entry[entries.size()])); - return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, snapshots).build(); + return ClusterState.builder(currentState) + .putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(unmodifiableList(entries))).build(); } } return currentState; @@ -1075,6 +1046,7 @@ public ClusterState execute(ClusterState currentState) { @Override public void onFailure(String source, Exception e) { logger.warn(() -> new ParameterizedMessage("[{}] failed to remove snapshot metadata", snapshot), e); + endingSnapshots.remove(snapshot); if (listener != null) { listener.onFailure(e); } @@ -1082,8 +1054,9 @@ public void onFailure(String source, Exception e) { @Override public void onNoLongerMaster(String source) { + endingSnapshots.remove(snapshot); if (listener != null) { - listener.onNoLongerMaster(source); + listener.onNoLongerMaster(); } } @@ -1101,6 +1074,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS logger.warn("Failed to notify listeners", e); } } + endingSnapshots.remove(snapshot); if (listener != null) { listener.onResponse(snapshotInfo); } @@ -1131,14 +1105,20 @@ public void deleteSnapshot(final String repositoryName, final String snapshotNam .filter(s -> s.getName().equals(snapshotName)) .findFirst(); // if nothing found by the same name, then look in the cluster state for current in progress snapshots + long repoGenId = repositoryData.getGenId(); if (matchedEntry.isPresent() == false) { - matchedEntry = currentSnapshots(repositoryName, Collections.emptyList()).stream() - .map(e -> e.snapshot().getSnapshotId()).filter(s -> s.getName().equals(snapshotName)).findFirst(); + Optional matchedInProgress = currentSnapshots(repositoryName, Collections.emptyList()).stream() + .filter(s -> s.snapshot().getSnapshotId().getName().equals(snapshotName)).findFirst(); + if (matchedInProgress.isPresent()) { + matchedEntry = matchedInProgress.map(s -> s.snapshot().getSnapshotId()); + // Derive repository generation if a snapshot is in progress because it will increment the generation when it finishes + repoGenId = matchedInProgress.get().getRepositoryStateId() + 1L; + } } if (matchedEntry.isPresent() == false) { throw new SnapshotMissingException(repositoryName, snapshotName); } - deleteSnapshot(new Snapshot(repositoryName, matchedEntry.get()), listener, repositoryData.getGenId(), immediatePriority); + deleteSnapshot(new Snapshot(repositoryName, matchedEntry.get()), listener, repoGenId, immediatePriority); } /** @@ -1201,10 +1181,12 @@ public ClusterState execute(ClusterState currentState) throws Exception { final ImmutableOpenMap shards; final State state = snapshotEntry.state(); + final String failure; if (state == State.INIT) { // snapshot is still initializing, mark it as aborted shards = snapshotEntry.shards(); - + assert shards.isEmpty(); + failure = "Snapshot was aborted during initialization"; } else if (state == State.STARTED) { // snapshot is started - mark every non completed shard as aborted final ImmutableOpenMap.Builder shardsBuilder = ImmutableOpenMap.builder(); @@ -1216,7 +1198,7 @@ public ClusterState execute(ClusterState currentState) throws Exception { shardsBuilder.put(shardEntry.key, status); } shards = shardsBuilder.build(); - + failure = "Snapshot was aborted by deletion"; } else { boolean hasUncompletedShards = false; // Cleanup in case a node gone missing and snapshot wasn't updated for some reason @@ -1237,10 +1219,10 @@ public ClusterState execute(ClusterState currentState) throws Exception { // where we force to finish the snapshot logger.debug("trying to delete completed snapshot with no finalizing shards - can delete immediately"); shards = snapshotEntry.shards(); - endSnapshot(snapshotEntry); } + failure = snapshotEntry.failure(); } - SnapshotsInProgress.Entry newSnapshot = new SnapshotsInProgress.Entry(snapshotEntry, State.ABORTED, shards); + SnapshotsInProgress.Entry newSnapshot = new SnapshotsInProgress.Entry(snapshotEntry, State.ABORTED, shards, failure); clusterStateBuilder.putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(newSnapshot)); } return clusterStateBuilder.build(); @@ -1391,7 +1373,8 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS * @param indices list of indices to be snapshotted * @return list of shard to be included into current snapshot */ - private ImmutableOpenMap shards(ClusterState clusterState, List indices) { + private static ImmutableOpenMap shards(ClusterState clusterState, + List indices) { ImmutableOpenMap.Builder builder = ImmutableOpenMap.builder(); MetaData metaData = clusterState.metaData(); for (IndexId index : indices) { @@ -1416,8 +1399,6 @@ private ImmutableOpenMap shard builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "primary shard is not allocated")); } else if (primary.relocating() || primary.initializing()) { - // The WAITING state was introduced in V1.2.0 - - // don't use it if there are nodes with older version in the cluster builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId(), State.WAITING)); } else if (!primary.started()) { builder.put(shardId,