diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java index 59be2db8454..e8dd2cc1f34 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java @@ -55,7 +55,6 @@ import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; -import org.apache.solr.handler.component.HttpShardHandlerFactory; import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardResponse; @@ -72,15 +71,11 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import static org.apache.solr.cloud.Assign.Node; @@ -281,7 +276,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { if(overseerDesignates.size() == 1 && overseerDesignates.contains(getLeaderNode(zk))) return; log.info("overseer designates {}", overseerDesignates); - List nodeNames = getSortedNodeNames(zk); + List nodeNames = getSortedOverseerNodeNames(zk); if(nodeNames.size()<2) return; // @@ -318,7 +313,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(2500, TimeUnit.MILLISECONDS); while (System.nanoTime() < timeout) { - List currentNodeNames = getSortedNodeNames(zk); + List currentNodeNames = getSortedOverseerNodeNames(zk); int totalLeaders = 0; @@ -339,7 +334,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { } if(!prioritizationComplete) { - log.warn("available designates and current state {} {} ", availableDesignates, getSortedNodeNames(zk)); + log.warn("available designates and current state {} {} ", availableDesignates, getSortedOverseerNodeNames(zk)); } } else { @@ -358,7 +353,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { } - public static List getSortedNodeNames(SolrZkClient zk) throws KeeperException, InterruptedException { + public static List getSortedOverseerNodeNames(SolrZkClient zk) throws KeeperException, InterruptedException { List children = null; try { children = zk.getChildren(OverseerElectionContext.PATH + LeaderElector.ELECTION_NODE, null, true); diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 3a4f3fd06cb..29fdc2d3254 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -58,6 +58,7 @@ import org.apache.solr.common.cloud.ZkCoreNodeProps; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZooKeeperException; +import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.URLUtil; import org.apache.solr.core.CoreContainer; @@ -1662,4 +1663,24 @@ public final class ZkController { } + public void checkOverseerDesignate() { + try { + byte[] data = zkClient.getData(ZkStateReader.ROLES, null, new Stat(), true); + if(data ==null) return; + Map roles = (Map) ZkStateReader.fromJSON(data); + if(roles ==null) return; + List nodeList= (List) roles.get("overseer"); + if(nodeList == null) return; + if(nodeList.contains(getNodeName())){ + ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.ADDROLE.toString().toLowerCase(Locale.ROOT), + "node", getNodeName(), + "role", "overseer"); + log.info("Going to add role {} ",props); + getOverseerCollectionQueue().offer(ZkStateReader.toJSON(props)); + } + } catch (Exception e) { + log.warn("could not readd the overseer designate ",e); + } + } + } diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index e1fbaf0cf0d..c4cbf52a42d 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -347,6 +347,7 @@ public class CoreContainer { } } } + zkSys.getZkController().checkOverseerDesignate(); } } diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java index 67ac49b8718..777a17886d0 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java @@ -20,7 +20,7 @@ package org.apache.solr.cloud; import static org.apache.solr.cloud.OverseerCollectionProcessor.MAX_SHARDS_PER_NODE; import static org.apache.solr.cloud.OverseerCollectionProcessor.NUM_SLICES; import static org.apache.solr.cloud.OverseerCollectionProcessor.REPLICATION_FACTOR; -import static org.apache.solr.cloud.OverseerCollectionProcessor.getSortedNodeNames; +import static org.apache.solr.cloud.OverseerCollectionProcessor.getSortedOverseerNodeNames; import static org.apache.solr.cloud.OverseerCollectionProcessor.getLeaderNode; import static org.apache.solr.common.cloud.ZkNodeProps.makeMap; @@ -95,7 +95,7 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ createCollection(collectionName, client); waitForRecoveriesToFinish(collectionName, false); - List l = OverseerCollectionProcessor.getSortedNodeNames(client.getZkStateReader().getZkClient()) ; + List l = OverseerCollectionProcessor.getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()) ; log.info("All nodes {}", l); String currentLeader = OverseerCollectionProcessor.getLeaderNode(client.getZkStateReader().getZkClient()); @@ -118,15 +118,9 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ } Thread.sleep(100); } - /*if(!leaderchanged){ - - log.warn("expected {}, current order {}", - overseerDesignate, - getSortedNodeNames(client.getZkStateReader().getZkClient())+ " ldr :"+ OverseerCollectionProcessor.getLeaderNode(client.getZkStateReader().getZkClient()) ); - }*/ assertTrue("could not set the new overseer . expected "+ overseerDesignate + " current order : " + - getSortedNodeNames(client.getZkStateReader().getZkClient()) + + getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()) + " ldr :"+ OverseerCollectionProcessor.getLeaderNode(client.getZkStateReader().getZkClient()) ,leaderchanged); @@ -145,7 +139,7 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ timeout = System.currentTimeMillis()+10000; leaderchanged = false; for(;System.currentTimeMillis() < timeout;){ - List sortedNodeNames = getSortedNodeNames(client.getZkStateReader().getZkClient()); + List sortedNodeNames = getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()); if(sortedNodeNames.get(1) .equals(anotherOverseer) || sortedNodeNames.get(0).equals(anotherOverseer)){ leaderchanged =true; break; @@ -153,14 +147,18 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ Thread.sleep(100); } - assertTrue("New overseer not the frontrunner : "+ getSortedNodeNames(client.getZkStateReader().getZkClient()) + " expected : "+ anotherOverseer, leaderchanged); + assertTrue("New overseer not the frontrunner : "+ getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()) + " expected : "+ anotherOverseer, leaderchanged); String currentOverseer = getLeaderNode(client.getZkStateReader().getZkClient()); + String killedOverseer = currentOverseer; + log.info("Current Overseer {}", currentOverseer); Pattern pattern = Pattern.compile("(.*):(\\d*)(.*)"); Matcher m = pattern.matcher(currentOverseer); + JettySolrRunner stoppedJetty =null; + if(m.matches()){ String hostPort = m.group(1)+":"+m.group(2); @@ -171,7 +169,7 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ if(s.contains(hostPort)){ log.info("leader node {}",s); ChaosMonkey.stop(jetty); - + stoppedJetty = jetty; timeout = System.currentTimeMillis()+10000; leaderchanged = false; for(;System.currentTimeMillis() < timeout;){ @@ -189,6 +187,25 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ } + ChaosMonkey.start(stoppedJetty); + + timeout = System.currentTimeMillis()+10000; + leaderchanged = false; + for(;System.currentTimeMillis() < timeout;){ + List sortedNodeNames = getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()); + if(sortedNodeNames.get(1).equals(killedOverseer) || sortedNodeNames.get(0).equals(killedOverseer)){ + leaderchanged =true; + break; + } + Thread.sleep(100); + } + + assertTrue("New overseer not the frontrunner : "+ getSortedOverseerNodeNames(client.getZkStateReader().getZkClient()) + " expected : "+ killedOverseer, leaderchanged); + + + + + client.shutdown();