Skip to content

Commit

Permalink
Strengthen validateClusterFormed check (#49248)
Browse files Browse the repository at this point in the history
Strengthens the validateClusterFormed check that is used by the test infrastructure to make
sure that nodes are properly connected and know about each other. Is used in situations where
the cluster is scaled up and down, and where there previously was a network disruption that has
been healed.

Closes #49243
  • Loading branch information
ywelsch committed Nov 21, 2019
1 parent d9835f7 commit 420825c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,9 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.junit.Assert.assertEquals;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.nullValue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
Expand Down Expand Up @@ -1239,29 +1239,35 @@ private synchronized void reset(boolean wipeData) throws IOException {

/** ensure a cluster is formed with all published nodes. */
public synchronized void validateClusterFormed() {
String name = randomFrom(random, getNodeNames());
validateClusterFormed(name);
}

/** ensure a cluster is formed with all published nodes, but do so by using the client of the specified node */
private synchronized void validateClusterFormed(String viaNode) {
Set<DiscoveryNode> expectedNodes = new HashSet<>();
final Set<DiscoveryNode> expectedNodes = new HashSet<>();
for (NodeAndClient nodeAndClient : nodes.values()) {
expectedNodes.add(getInstanceFromNode(ClusterService.class, nodeAndClient.node()).localNode());
}
logger.trace("validating cluster formed via [{}], expecting {}", viaNode, expectedNodes);
final Client client = client(viaNode);
logger.trace("validating cluster formed, expecting {}", expectedNodes);

try {
assertBusy(() -> {
DiscoveryNodes discoveryNodes = client.admin().cluster().prepareState().get().getState().nodes();
assertEquals(expectedNodes.size(), discoveryNodes.getSize());
for (DiscoveryNode expectedNode : expectedNodes) {
assertTrue("Expected node to exist: " + expectedNode, discoveryNodes.nodeExists(expectedNode));
}
final List<ClusterState> states = nodes.values().stream()
.map(node -> getInstanceFromNode(ClusterService.class, node.node()))
.map(ClusterService::state)
.collect(Collectors.toList());
final String debugString = ", expected nodes: " + expectedNodes + " and actual cluster states " + states;
// all nodes have a master
assertTrue("Missing master" + debugString, states.stream().allMatch(cs -> cs.nodes().getMasterNodeId() != null));
// all nodes have the same master (in same term)
assertEquals("Not all masters in same term" + debugString, 1,
states.stream().mapToLong(ClusterState::term).distinct().count());
// all nodes know about all other nodes
states.forEach(cs -> {
DiscoveryNodes discoveryNodes = cs.nodes();
assertEquals("Node size mismatch" + debugString, expectedNodes.size(), discoveryNodes.getSize());
for (DiscoveryNode expectedNode : expectedNodes) {
assertTrue("Expected node to exist: " + expectedNode + debugString, discoveryNodes.nodeExists(expectedNode));
}
});
}, 30, TimeUnit.SECONDS);
} catch (AssertionError ae) {
throw new IllegalStateException("cluster failed to form with expected nodes " + expectedNodes + " and actual nodes " +
client.admin().cluster().prepareState().get().getState().nodes());
throw new IllegalStateException("cluster failed to form", ae);
} catch (Exception e) {
throw new IllegalStateException(e);
}
Expand Down Expand Up @@ -1821,8 +1827,7 @@ private void restartNode(NodeAndClient nodeAndClient, RestartCallback callback)

if (callback.validateClusterForming() || excludedNodeIds.isEmpty() == false) {
// we have to validate cluster size to ensure that the restarted node has rejoined the cluster if it was master-eligible;
// we have to do this via the node that was just restarted as it may be that the master didn't yet process the fact that it left
validateClusterFormed(nodeAndClient.name);
validateClusterFormed();
}

if (excludedNodeIds.isEmpty() == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.test.disruption;

import com.carrotsearch.randomizedtesting.generators.RandomPicks;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.ClusterState;
Expand All @@ -41,8 +40,6 @@
import java.util.concurrent.CountDownLatch;
import java.util.function.BiConsumer;

import static org.junit.Assert.assertFalse;

/**
* Network disruptions are modeled using two components:
* 1) the {@link DisruptedLinks} represents the links in the network that are to be disrupted
Expand Down Expand Up @@ -119,10 +116,7 @@ public static void ensureFullyConnectedCluster(InternalTestCluster cluster) {
}

protected void ensureNodeCount(InternalTestCluster cluster) {
assertFalse("cluster failed to form after disruption was healed", cluster.client().admin().cluster().prepareHealth()
.setWaitForNodes(String.valueOf(cluster.size()))
.setWaitForNoRelocatingShards(true)
.get().isTimedOut());
cluster.validateClusterFormed();
}

@Override
Expand Down

0 comments on commit 420825c

Please sign in to comment.