Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strengthen validateClusterFormed check #49248

Merged
merged 3 commits into from
Nov 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.junit.Assert.assertEquals;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.nullValue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
Expand Down Expand Up @@ -1092,29 +1092,35 @@ private synchronized void reset(boolean wipeData) throws IOException {

/** ensure a cluster is formed with all published nodes. */
public synchronized void validateClusterFormed() {
String name = randomFrom(random, getNodeNames());
validateClusterFormed(name);
}

/** ensure a cluster is formed with all published nodes, but do so by using the client of the specified node */
private synchronized void validateClusterFormed(String viaNode) {
Set<DiscoveryNode> expectedNodes = new HashSet<>();
final Set<DiscoveryNode> expectedNodes = new HashSet<>();
for (NodeAndClient nodeAndClient : nodes.values()) {
expectedNodes.add(getInstanceFromNode(ClusterService.class, nodeAndClient.node()).localNode());
}
logger.trace("validating cluster formed via [{}], expecting {}", viaNode, expectedNodes);
final Client client = client(viaNode);
logger.trace("validating cluster formed, expecting {}", expectedNodes);

try {
assertBusy(() -> {
DiscoveryNodes discoveryNodes = client.admin().cluster().prepareState().get().getState().nodes();
assertEquals(expectedNodes.size(), discoveryNodes.getSize());
for (DiscoveryNode expectedNode : expectedNodes) {
assertTrue("Expected node to exist: " + expectedNode, discoveryNodes.nodeExists(expectedNode));
}
final List<ClusterState> states = nodes.values().stream()
.map(node -> getInstanceFromNode(ClusterService.class, node.node()))
.map(ClusterService::state)
.collect(Collectors.toList());
final String debugString = ", expected nodes: " + expectedNodes + " and actual cluster states " + states;
// all nodes have a master
assertTrue("Missing master" + debugString, states.stream().allMatch(cs -> cs.nodes().getMasterNodeId() != null));
// all nodes have the same master (in same term)
assertEquals("Not all masters in same term" + debugString, 1,
states.stream().mapToLong(ClusterState::term).distinct().count());
// all nodes know about all other nodes
states.forEach(cs -> {
DiscoveryNodes discoveryNodes = cs.nodes();
assertEquals("Node size mismatch" + debugString, expectedNodes.size(), discoveryNodes.getSize());
for (DiscoveryNode expectedNode : expectedNodes) {
assertTrue("Expected node to exist: " + expectedNode + debugString, discoveryNodes.nodeExists(expectedNode));
}
});
}, 30, TimeUnit.SECONDS);
} catch (AssertionError ae) {
throw new IllegalStateException("cluster failed to form with expected nodes " + expectedNodes + " and actual nodes " +
client.admin().cluster().prepareState().get().getState().nodes());
throw new IllegalStateException("cluster failed to form", ae);
} catch (Exception e) {
throw new IllegalStateException(e);
}
Expand Down Expand Up @@ -1663,8 +1669,7 @@ private void restartNode(NodeAndClient nodeAndClient, RestartCallback callback)

if (callback.validateClusterForming() || excludedNodeIds.isEmpty() == false) {
// we have to validate cluster size to ensure that the restarted node has rejoined the cluster if it was master-eligible;
// we have to do this via the node that was just restarted as it may be that the master didn't yet process the fact that it left
validateClusterFormed(nodeAndClient.name);
validateClusterFormed();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.test.disruption;

import com.carrotsearch.randomizedtesting.generators.RandomPicks;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.ClusterState;
Expand All @@ -41,8 +40,6 @@
import java.util.concurrent.CountDownLatch;
import java.util.function.BiConsumer;

import static org.junit.Assert.assertFalse;

/**
* Network disruptions are modeled using two components:
* 1) the {@link DisruptedLinks} represents the links in the network that are to be disrupted
Expand Down Expand Up @@ -119,10 +116,7 @@ public static void ensureFullyConnectedCluster(InternalTestCluster cluster) {
}

protected void ensureNodeCount(InternalTestCluster cluster) {
assertFalse("cluster failed to form after disruption was healed", cluster.client().admin().cluster().prepareHealth()
.setWaitForNodes(String.valueOf(cluster.size()))
.setWaitForNoRelocatingShards(true)
.get().isTimedOut());
cluster.validateClusterFormed();
}

@Override
Expand Down