Skip to content

Commit

Permalink
[Segment Replication] Refactor RemoteStoreReplicationSource (opensear…
Browse files Browse the repository at this point in the history
…ch-project#8767)

* [Segment Replication] Refactor remote replication source

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Unit test updates

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Self review

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Self review

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Segregate shard level tests for node to node and remote store segment replication

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Fix failing unit tests

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Fix failing UT

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Fix failing UT

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Address review comments

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Fix more unit tests

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Improve RemoteStoreReplicationSourceTests, remove unnecessary mocks and use actual failures for failure/exception use cases

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Spotless check fix

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Address review comments

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Ignore files already in store while computing segment file diff with primary

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Spotless fix

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Fix failing UT

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Spotless fix

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Move read/writes from IndexInput/Output to RemoteSegmentMetadata

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Address review commnt

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Update recovery flow to perform commits during recovery

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Remove un-necessary char

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Address review comments

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Update comment nit-pick

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Remove deletion logic causing read issues due to deleted segments_N

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Spotless fix

Signed-off-by: Suraj Singh <surajrider@gmail.com>

* Fix unit tests

Signed-off-by: Suraj Singh <surajrider@gmail.com>

---------

Signed-off-by: Suraj Singh <surajrider@gmail.com>
  • Loading branch information
dreamer-89 committed Aug 2, 2023
1 parent d711f66 commit e2112e0
Show file tree
Hide file tree
Showing 23 changed files with 1,496 additions and 1,125 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

package org.opensearch.indices.replication;

import org.opensearch.action.admin.indices.replication.SegmentReplicationStatsResponse;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.metadata.IndexMetadata;
Expand All @@ -24,7 +23,6 @@
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.index.IndexModule;
import org.opensearch.index.IndexService;
import org.opensearch.index.SegmentReplicationPerGroupStats;
import org.opensearch.index.SegmentReplicationShardStats;
import org.opensearch.index.engine.Engine;
import org.opensearch.index.shard.IndexShard;
Expand Down Expand Up @@ -134,24 +132,6 @@ protected void waitForSearchableDocs(long docCount, String... nodes) throws Exce
waitForSearchableDocs(docCount, Arrays.stream(nodes).collect(Collectors.toList()));
}

protected void waitForSegmentReplication(String node) throws Exception {
assertBusy(() -> {
SegmentReplicationStatsResponse segmentReplicationStatsResponse = client(node).admin()
.indices()
.prepareSegmentReplicationStats(INDEX_NAME)
.setDetailed(true)
.execute()
.actionGet();
final SegmentReplicationPerGroupStats perGroupStats = segmentReplicationStatsResponse.getReplicationStats()
.get(INDEX_NAME)
.get(0);
assertEquals(
perGroupStats.getReplicaStats().stream().findFirst().get().getCurrentReplicationState().getStage(),
SegmentReplicationState.Stage.DONE
);
}, 1, TimeUnit.MINUTES);
}

protected void verifyStoreContent() throws Exception {
assertBusy(() -> {
final ClusterState clusterState = getClusterState();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

package org.opensearch.snapshots;

import org.opensearch.action.ActionFuture;
import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse;
import org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStage;
import org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus;
import org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus;
import org.opensearch.cluster.SnapshotsInProgress;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.threadpool.ThreadPool;

import java.nio.file.Path;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.is;
import static org.opensearch.remotestore.RemoteStoreBaseIntegTestCase.remoteStoreClusterSettings;

public class RemoteIndexSnapshotStatusApiIT extends AbstractSnapshotIntegTestCase {

@Override
protected Settings nodeSettings(int nodeOrdinal) {
return Settings.builder()
.put(super.nodeSettings(nodeOrdinal))
.put(ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING.getKey(), 0) // We have tests that check by-timestamp order
.put(FeatureFlags.REMOTE_STORE, "true")
.put(FeatureFlags.SEGMENT_REPLICATION_EXPERIMENTAL, "true")
.put(remoteStoreClusterSettings("remote-store-repo-name"))
.build();
}

public void testStatusAPICallForShallowCopySnapshot() throws Exception {
disableRepoConsistencyCheck("Remote store repository is being used for the test");
internalCluster().startClusterManagerOnlyNode();
internalCluster().startDataOnlyNode();

final String snapshotRepoName = "snapshot-repo-name";
createRepository(snapshotRepoName, "fs", snapshotRepoSettingsForShallowCopy());

final Path remoteStoreRepoPath = randomRepoPath();
final String remoteStoreRepoName = "remote-store-repo-name";
createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath);

final String remoteStoreEnabledIndexName = "remote-index-1";
final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings();
createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings);
ensureGreen();

logger.info("--> indexing some data");
for (int i = 0; i < 100; i++) {
index(remoteStoreEnabledIndexName, "_doc", Integer.toString(i), "foo", "bar" + i);
}
refresh();

final String snapshot = "snapshot";
createFullSnapshot(snapshotRepoName, snapshot);
assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 1);

final SnapshotStatus snapshotStatus = getSnapshotStatus(snapshotRepoName, snapshot);
assertThat(snapshotStatus.getState(), is(SnapshotsInProgress.State.SUCCESS));

// Validating that the incremental file count and incremental file size is zero for shallow copy
final SnapshotIndexShardStatus shallowSnapshotShardState = stateFirstShard(snapshotStatus, remoteStoreEnabledIndexName);
assertThat(shallowSnapshotShardState.getStage(), is(SnapshotIndexShardStage.DONE));
assertThat(shallowSnapshotShardState.getStats().getTotalFileCount(), greaterThan(0));
assertThat(shallowSnapshotShardState.getStats().getTotalSize(), greaterThan(0L));
assertThat(shallowSnapshotShardState.getStats().getIncrementalFileCount(), is(0));
assertThat(shallowSnapshotShardState.getStats().getIncrementalSize(), is(0L));
}

public void testStatusAPIStatsForBackToBackShallowSnapshot() throws Exception {
disableRepoConsistencyCheck("Remote store repository is being used for the test");
internalCluster().startClusterManagerOnlyNode();
internalCluster().startDataOnlyNode();

final String snapshotRepoName = "snapshot-repo-name";
createRepository(snapshotRepoName, "fs", snapshotRepoSettingsForShallowCopy());

final Path remoteStoreRepoPath = randomRepoPath();
final String remoteStoreRepoName = "remote-store-repo-name";
createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath);

final String remoteStoreEnabledIndexName = "remote-index-1";
final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings();
createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings);
ensureGreen();

logger.info("--> indexing some data");
for (int i = 0; i < 100; i++) {
index(remoteStoreEnabledIndexName, "_doc", Integer.toString(i), "foo", "bar" + i);
}
refresh();

createFullSnapshot(snapshotRepoName, "test-snap-1");
assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 1);

SnapshotStatus snapshotStatus = getSnapshotStatus(snapshotRepoName, "test-snap-1");
assertThat(snapshotStatus.getState(), is(SnapshotsInProgress.State.SUCCESS));

SnapshotIndexShardStatus shallowSnapshotShardState = stateFirstShard(snapshotStatus, remoteStoreEnabledIndexName);
assertThat(shallowSnapshotShardState.getStage(), is(SnapshotIndexShardStage.DONE));
final int totalFileCount = shallowSnapshotShardState.getStats().getTotalFileCount();
final long totalSize = shallowSnapshotShardState.getStats().getTotalSize();
final int incrementalFileCount = shallowSnapshotShardState.getStats().getIncrementalFileCount();
final long incrementalSize = shallowSnapshotShardState.getStats().getIncrementalSize();

createFullSnapshot(snapshotRepoName, "test-snap-2");
assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 2);

snapshotStatus = getSnapshotStatus(snapshotRepoName, "test-snap-2");
assertThat(snapshotStatus.getState(), is(SnapshotsInProgress.State.SUCCESS));
shallowSnapshotShardState = stateFirstShard(snapshotStatus, remoteStoreEnabledIndexName);
assertThat(shallowSnapshotShardState.getStats().getTotalFileCount(), equalTo(totalFileCount));
assertThat(shallowSnapshotShardState.getStats().getTotalSize(), equalTo(totalSize));
assertThat(shallowSnapshotShardState.getStats().getIncrementalFileCount(), equalTo(incrementalFileCount));
assertThat(shallowSnapshotShardState.getStats().getIncrementalSize(), equalTo(incrementalSize));
}

public void testStatusAPICallInProgressShallowSnapshot() throws Exception {
disableRepoConsistencyCheck("Remote store repository is being used for the test");
internalCluster().startClusterManagerOnlyNode();
internalCluster().startDataOnlyNode();

final String snapshotRepoName = "snapshot-repo-name";
createRepository(snapshotRepoName, "mock", snapshotRepoSettingsForShallowCopy().put("block_on_data", true));

final Path remoteStoreRepoPath = randomRepoPath();
final String remoteStoreRepoName = "remote-store-repo-name";
createRepository(remoteStoreRepoName, "mock", remoteStoreRepoPath);

final String remoteStoreEnabledIndexName = "remote-index-1";
final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings();
createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings);
ensureGreen();

logger.info("--> indexing some data");
for (int i = 0; i < 100; i++) {
index(remoteStoreEnabledIndexName, "_doc", Integer.toString(i), "foo", "bar" + i);
}
refresh();

logger.info("--> snapshot");
ActionFuture<CreateSnapshotResponse> createSnapshotResponseActionFuture = startFullSnapshot(snapshotRepoName, "test-snap");

logger.info("--> wait for data nodes to get blocked");
awaitNumberOfSnapshotsInProgress(1);
assertEquals(
SnapshotsInProgress.State.STARTED,
client().admin()
.cluster()
.prepareSnapshotStatus(snapshotRepoName)
.setSnapshots("test-snap")
.get()
.getSnapshots()
.get(0)
.getState()
);

logger.info("--> unblock all data nodes");
unblockAllDataNodes(snapshotRepoName);

logger.info("--> wait for snapshot to finish");
createSnapshotResponseActionFuture.actionGet();
}

private static SnapshotIndexShardStatus stateFirstShard(SnapshotStatus snapshotStatus, String indexName) {
return snapshotStatus.getIndices().get(indexName).getShards().get(0);
}

private static SnapshotStatus getSnapshotStatus(String repoName, String snapshotName) {
try {
return client().admin().cluster().prepareSnapshotStatus(repoName).setSnapshots(snapshotName).get().getSnapshots().get(0);
} catch (SnapshotMissingException e) {
throw new AssertionError(e);
}
}
}
Loading

0 comments on commit e2112e0

Please sign in to comment.