mirror of https://github.com/apache/lucene.git
SOLR-10001: Fix overseer-roles test bug
This commit is contained in:
parent
a14d79366f
commit
eba9390965
|
@ -21,7 +21,6 @@ import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -254,39 +253,6 @@ public class CollectionsAPISolrJTest extends SolrCloudTestCase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void testAddAndRemoveRole() throws InterruptedException, IOException, SolrServerException {
|
|
||||||
|
|
||||||
String node = cluster.getRandomJetty(random()).getNodeName();
|
|
||||||
|
|
||||||
CollectionAdminRequest.addRole(node, "overseer").process(cluster.getSolrClient());
|
|
||||||
|
|
||||||
CollectionAdminResponse response = CollectionAdminRequest.getClusterStatus().process(cluster.getSolrClient());
|
|
||||||
|
|
||||||
NamedList<Object> rsp = response.getResponse();
|
|
||||||
NamedList<Object> cs = (NamedList<Object>) rsp.get("cluster");
|
|
||||||
assertNotNull("Cluster state should not be null", cs);
|
|
||||||
Map<String, Object> roles = (Map<String, Object>) cs.get("roles");
|
|
||||||
assertNotNull("Role information should not be null", roles);
|
|
||||||
List<String> overseer = (List<String>) roles.get("overseer");
|
|
||||||
assertNotNull(overseer);
|
|
||||||
assertEquals(1, overseer.size());
|
|
||||||
assertTrue(overseer.contains(node));
|
|
||||||
|
|
||||||
// Remove role
|
|
||||||
CollectionAdminRequest.removeRole(node, "overseer").process(cluster.getSolrClient());
|
|
||||||
|
|
||||||
response = CollectionAdminRequest.getClusterStatus().process(cluster.getSolrClient());
|
|
||||||
rsp = response.getResponse();
|
|
||||||
cs = (NamedList<Object>) rsp.get("cluster");
|
|
||||||
assertNotNull("Cluster state should not be null", cs);
|
|
||||||
roles = (Map<String, Object>) cs.get("roles");
|
|
||||||
assertNotNull("Role information should not be null", roles);
|
|
||||||
overseer = (List<String>) roles.get("overseer");
|
|
||||||
assertFalse(overseer.contains(node));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testOverseerStatus() throws IOException, SolrServerException {
|
public void testOverseerStatus() throws IOException, SolrServerException {
|
||||||
CollectionAdminResponse response = new CollectionAdminRequest.OverseerStatus().process(cluster.getSolrClient());
|
CollectionAdminResponse response = new CollectionAdminRequest.OverseerStatus().process(cluster.getSolrClient());
|
||||||
|
|
|
@ -17,28 +17,27 @@
|
||||||
package org.apache.solr.cloud;
|
package org.apache.solr.cloud;
|
||||||
|
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.net.URL;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Objects;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
|
||||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||||
import org.apache.solr.common.cloud.SolrZkClient;
|
|
||||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||||
import org.apache.solr.common.util.Utils;
|
import org.apache.solr.common.util.Utils;
|
||||||
import org.apache.solr.util.TimeOut;
|
import org.apache.solr.util.TimeOut;
|
||||||
import org.apache.zookeeper.data.Stat;
|
import org.apache.zookeeper.KeeperException;
|
||||||
import org.junit.Before;
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import static org.apache.solr.cloud.OverseerCollectionConfigSetProcessor.getLeaderNode;
|
import static org.apache.solr.cloud.OverseerCollectionConfigSetProcessor.getLeaderNode;
|
||||||
import static org.apache.solr.cloud.OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames;
|
import static org.apache.solr.cloud.OverseerTaskProcessor.getSortedElectionNodes;
|
||||||
import static org.hamcrest.CoreMatchers.not;
|
|
||||||
|
|
||||||
public class OverseerRolesTest extends SolrCloudTestCase {
|
public class OverseerRolesTest extends SolrCloudTestCase {
|
||||||
|
|
||||||
|
@ -51,117 +50,99 @@ public class OverseerRolesTest extends SolrCloudTestCase {
|
||||||
.configure();
|
.configure();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Before
|
private void waitForNewOverseer(int seconds, Predicate<String> state) throws Exception {
|
||||||
public void clearAllOverseerRoles() throws Exception {
|
TimeOut timeout = new TimeOut(seconds, TimeUnit.SECONDS);
|
||||||
for (String node : OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient())) {
|
String current = null;
|
||||||
CollectionAdminRequest.removeRole(node, "overseer").process(cluster.getSolrClient());
|
while (timeout.hasTimedOut() == false) {
|
||||||
}
|
current = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
|
||||||
}
|
if (state.test(current))
|
||||||
|
return;
|
||||||
@Test
|
|
||||||
public void testQuitCommand() throws Exception {
|
|
||||||
|
|
||||||
SolrZkClient zk = zkClient();
|
|
||||||
byte[] data = zk.getData("/overseer_elect/leader", null, new Stat(), true);
|
|
||||||
Map m = (Map) Utils.fromJSON(data);
|
|
||||||
String s = (String) m.get("id");
|
|
||||||
String leader = LeaderElector.getNodeName(s);
|
|
||||||
log.info("Current overseer: {}", leader);
|
|
||||||
Overseer.getStateUpdateQueue(zk)
|
|
||||||
.offer(Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(),
|
|
||||||
"id", s)));
|
|
||||||
final TimeOut timeout = new TimeOut(10, TimeUnit.SECONDS);
|
|
||||||
String newLeader = null;
|
|
||||||
for(;! timeout.hasTimedOut();){
|
|
||||||
newLeader = OverseerCollectionConfigSetProcessor.getLeaderNode(zk);
|
|
||||||
if (newLeader != null && !newLeader.equals(leader))
|
|
||||||
break;
|
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
assertThat("Leader not changed yet", newLeader, not(leader));
|
fail("Timed out waiting for overseer state change");
|
||||||
|
}
|
||||||
|
|
||||||
assertTrue("The old leader should have rejoined election",
|
private void waitForNewOverseer(int seconds, String expected) throws Exception {
|
||||||
OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zk).contains(leader));
|
waitForNewOverseer(seconds, s -> Objects.equals(s, expected));
|
||||||
|
}
|
||||||
|
|
||||||
|
private JettySolrRunner getOverseerJetty() throws Exception {
|
||||||
|
String overseer = getLeaderNode(zkClient());
|
||||||
|
URL overseerUrl = new URL("http://" + overseer.substring(0, overseer.indexOf('_')));
|
||||||
|
int hostPort = overseerUrl.getPort();
|
||||||
|
for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
|
||||||
|
if (jetty.getBaseUrl().getPort() == hostPort)
|
||||||
|
return jetty;
|
||||||
|
}
|
||||||
|
fail("Couldn't find overseer node " + overseer);
|
||||||
|
return null; // to keep the compiler happy
|
||||||
|
}
|
||||||
|
|
||||||
|
private void logOverseerState() throws KeeperException, InterruptedException {
|
||||||
|
log.info("Overseer: {}", getLeaderNode(zkClient()));
|
||||||
|
log.info("Election queue: ", getSortedElectionNodes(zkClient(), "/overseer_elect/election"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testOverseerRole() throws Exception {
|
public void testOverseerRole() throws Exception {
|
||||||
|
|
||||||
List<String> l = OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient()) ;
|
logOverseerState();
|
||||||
|
List<String> nodes = OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient());
|
||||||
|
String overseer1 = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
|
||||||
|
nodes.remove(overseer1);
|
||||||
|
|
||||||
log.info("All nodes {}", l);
|
Collections.shuffle(nodes, random());
|
||||||
String currentLeader = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
|
String overseer2 = nodes.get(0);
|
||||||
log.info("Current leader {} ", currentLeader);
|
log.info("### Setting overseer designate {}", overseer2);
|
||||||
l.remove(currentLeader);
|
|
||||||
|
|
||||||
Collections.shuffle(l, random());
|
CollectionAdminRequest.addRole(overseer2, "overseer").process(cluster.getSolrClient());
|
||||||
String overseerDesignate = l.get(0);
|
|
||||||
log.info("overseerDesignate {}", overseerDesignate);
|
|
||||||
|
|
||||||
CollectionAdminRequest.addRole(overseerDesignate, "overseer").process(cluster.getSolrClient());
|
waitForNewOverseer(15, overseer2);
|
||||||
|
|
||||||
TimeOut timeout = new TimeOut(15, TimeUnit.SECONDS);
|
|
||||||
|
|
||||||
boolean leaderchanged = false;
|
|
||||||
for (;!timeout.hasTimedOut();) {
|
|
||||||
if (overseerDesignate.equals(OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient()))) {
|
|
||||||
log.info("overseer designate is the new overseer");
|
|
||||||
leaderchanged =true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Thread.sleep(100);
|
|
||||||
}
|
|
||||||
assertTrue("could not set the new overseer . expected "+
|
|
||||||
overseerDesignate + " current order : " +
|
|
||||||
getSortedOverseerNodeNames(zkClient()) +
|
|
||||||
" ldr :"+ OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient()) ,leaderchanged);
|
|
||||||
|
|
||||||
//add another node as overseer
|
//add another node as overseer
|
||||||
l.remove(overseerDesignate);
|
nodes.remove(overseer2);
|
||||||
Collections.shuffle(l, random());
|
Collections.shuffle(nodes, random());
|
||||||
|
|
||||||
String anotherOverseer = l.get(0);
|
String overseer3 = nodes.get(0);
|
||||||
log.info("Adding another overseer designate {}", anotherOverseer);
|
log.info("### Adding another overseer designate {}", overseer3);
|
||||||
CollectionAdminRequest.addRole(anotherOverseer, "overseer").process(cluster.getSolrClient());
|
CollectionAdminRequest.addRole(overseer3, "overseer").process(cluster.getSolrClient());
|
||||||
|
|
||||||
String currentOverseer = getLeaderNode(zkClient());
|
// kill the current overseer, and check that the new designate becomes the new overseer
|
||||||
|
JettySolrRunner leaderJetty = getOverseerJetty();
|
||||||
|
logOverseerState();
|
||||||
|
|
||||||
log.info("Current Overseer {}", currentOverseer);
|
|
||||||
|
|
||||||
String hostPort = currentOverseer.substring(0, currentOverseer.indexOf('_'));
|
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
log.info("hostPort : {}", hostPort);
|
|
||||||
|
|
||||||
JettySolrRunner leaderJetty = null;
|
|
||||||
|
|
||||||
for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
|
|
||||||
String s = jetty.getBaseUrl().toString();
|
|
||||||
log.info("jetTy {}",s);
|
|
||||||
sb.append(s).append(" , ");
|
|
||||||
if (s.contains(hostPort)) {
|
|
||||||
leaderJetty = jetty;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
assertNotNull("Could not find a jetty2 kill", leaderJetty);
|
|
||||||
|
|
||||||
log.info("leader node {}", leaderJetty.getBaseUrl());
|
|
||||||
log.info("current election Queue",
|
|
||||||
OverseerCollectionConfigSetProcessor.getSortedElectionNodes(zkClient(), "/overseer_elect/election"));
|
|
||||||
ChaosMonkey.stop(leaderJetty);
|
ChaosMonkey.stop(leaderJetty);
|
||||||
timeout = new TimeOut(10, TimeUnit.SECONDS);
|
waitForNewOverseer(10, overseer3);
|
||||||
leaderchanged = false;
|
|
||||||
for (; !timeout.hasTimedOut(); ) {
|
// add another node as overseer
|
||||||
currentOverseer = getLeaderNode(zkClient());
|
nodes.remove(overseer3);
|
||||||
if (anotherOverseer.equals(currentOverseer)) {
|
Collections.shuffle(nodes, random());
|
||||||
leaderchanged = true;
|
String overseer4 = nodes.get(0);
|
||||||
break;
|
log.info("### Adding last overseer designate {}", overseer4);
|
||||||
}
|
CollectionAdminRequest.addRole(overseer4, "overseer").process(cluster.getSolrClient());
|
||||||
Thread.sleep(100);
|
logOverseerState();
|
||||||
}
|
|
||||||
assertTrue("New overseer designate has not become the overseer, expected : " + anotherOverseer + "actual : " + getLeaderNode(zkClient()), leaderchanged);
|
// remove the overseer role from the current overseer
|
||||||
|
CollectionAdminRequest.removeRole(overseer3, "overseer").process(cluster.getSolrClient());
|
||||||
|
waitForNewOverseer(15, overseer4);
|
||||||
|
|
||||||
|
// Add it back again - we now have two delegates, 4 and 3
|
||||||
|
CollectionAdminRequest.addRole(overseer3, "overseer").process(cluster.getSolrClient());
|
||||||
|
|
||||||
|
// explicitly tell the overseer to quit
|
||||||
|
String leaderId = OverseerCollectionConfigSetProcessor.getLeaderId(zkClient());
|
||||||
|
String leader = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
|
||||||
|
log.info("### Sending QUIT to overseer {}", leader);
|
||||||
|
Overseer.getStateUpdateQueue(zkClient())
|
||||||
|
.offer(Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(),
|
||||||
|
"id", leaderId)));
|
||||||
|
|
||||||
|
waitForNewOverseer(10, s -> Objects.equals(leader, s) == false);
|
||||||
|
|
||||||
|
logOverseerState();
|
||||||
|
assertTrue("The old leader should have rejoined election",
|
||||||
|
OverseerCollectionConfigSetProcessor.getSortedOverseerNodeNames(zkClient()).contains(leader));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue