diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java index 85bc009d72b..0948a8edc2c 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java +++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java @@ -43,7 +43,7 @@ import org.slf4j.LoggerFactory; */ public abstract class ElectionContext { - private static Logger log = LoggerFactory.getLogger(ElectionContext.class); + static Logger log = LoggerFactory.getLogger(ElectionContext.class); final String electionPath; final ZkNodeProps leaderProps; final String id; @@ -451,11 +451,11 @@ final class OverseerElectionContext extends ElectionContext { @Override void runLeaderProcess(boolean weAreReplacement) throws KeeperException, InterruptedException { - + log.info("I am going to be the leader {}", id); final String id = leaderSeqPath .substring(leaderSeqPath.lastIndexOf("/") + 1); ZkNodeProps myProps = new ZkNodeProps("id", id); - + zkClient.makePath(leaderPath, ZkStateReader.toJSON(myProps), CreateMode.EPHEMERAL, true); diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java index c98f98e0848..b5e08c54b38 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java @@ -19,7 +19,6 @@ package org.apache.solr.cloud; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.CloudSolrServer; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.CoreAdminRequest; @@ -223,18 +222,18 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { List overseerDesignates = (List) m.get("overseer"); if(overseerDesignates==null || overseerDesignates.isEmpty()) return; - + if(overseerDesignates.size() == 1 && overseerDesignates.contains(getLeaderNode(zk))) return; log.info("overseer designates {}", overseerDesignates); List nodeNames = getSortedNodeNames(zk); if(nodeNames.size()<2) return; // - Set nodesTobePushedBack = new HashSet(); + ArrayList nodesTobePushedBack = new ArrayList<>(); //ensure that the node right behind the leader , i.r at position 1 is a Overseer - Set availableDesignates = new HashSet(); + List availableDesignates = new ArrayList(); - log.debug("sorted nodes {}", nodeNames);//TODO to be removed + log.info("sorted nodes {}", nodeNames);//TODO to be removed for (int i = 0; i < nodeNames.size(); i++) { String s = nodeNames.get(i); @@ -243,7 +242,7 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { for(int j=0;j= 0; i--) { + String s = nodesTobePushedBack.get(i); log.info("pushing back {} ", s); - invokeRejoinOverseer(s); + invokeOverseerOp(s, "rejoin"); } //wait for a while to ensure the designate has indeed come in front @@ -295,14 +295,21 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { if(leaderNode ==null) return; if(!overseerDesignates.contains(leaderNode) && !availableDesignates.isEmpty()){ //this means there are designated Overseer nodes and I am not one of them , kill myself - log.info("I am not an overseerdesignate , rejoining election {} ", leaderNode); - invokeRejoinOverseer(leaderNode); + String newLeader = availableDesignates.get(0); + log.info("I am not an overseerdesignate , forcing a new leader {} ", newLeader); + invokeOverseerOp(newLeader, "leader"); } } public static List getSortedNodeNames(SolrZkClient zk) throws KeeperException, InterruptedException { - List children = zk.getChildren(OverseerElectionContext.PATH + LeaderElector.ELECTION_NODE, null, true); + List children = null; + try { + children = zk.getChildren(OverseerElectionContext.PATH + LeaderElector.ELECTION_NODE, null, true); + } catch (Exception e) { + log.warn("error ", e); + return new ArrayList(); + } LeaderElector.sortSeqs(children); ArrayList nodeNames = new ArrayList<>(children.size()); for (String c : children) nodeNames.add(LeaderElector.getNodeName(c)); @@ -324,9 +331,10 @@ public class OverseerCollectionProcessor implements Runnable, ClosableThread { return nodeName; } - private void invokeRejoinOverseer(String nodeName) { + private void invokeOverseerOp(String nodeName, String op) { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(CoreAdminParams.ACTION, CoreAdminAction.REJOINOVERSEERELECTION.toString()); + params.set(CoreAdminParams.ACTION, CoreAdminAction.OVERSEEROP.toString()); + params.set("op", op); params.set("qt", adminPath); ShardRequest sreq = new ShardRequest(); sreq.purpose = 1; diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index c42656276e6..7b8d60478f1 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -292,6 +292,19 @@ public final class ZkController { return leaderVoteWait; } + public void forceOverSeer(){ + try { + zkClient.delete("/overseer_elect/leader",-1, true); + log.info("Forcing me to be leader {} ",getBaseUrl()); + overseerElector.getContext().runLeaderProcess(true); + rejoinOverseerElection(); + } catch (Exception e) { + throw new SolrException(ErrorCode.SERVER_ERROR, " Error becoming overseer ",e); + + } + + } + private void registerAllCoresAsDown( final CurrentCoreDescriptorProvider registerOnReconnect, boolean updateLastPublished) { List descriptors = registerOnReconnect diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java index d59314f9ae1..75d0ce5e833 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java @@ -218,10 +218,13 @@ public class CoreAdminHandler extends RequestHandlerBase { this.handleRequestBufferUpdatesAction(req, rsp); break; } - case REJOINOVERSEERELECTION:{ + case OVERSEEROP:{ ZkController zkController = coreContainer.getZkController(); if(zkController != null){ - zkController.rejoinOverseerElection(); + String op = req.getParams().get("op"); + if("leader".equals(op)){ + zkController.forceOverSeer(); + } else if ("rejoin".equals(op)) zkController.rejoinOverseerElection(); } break; } diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java index be665467725..530d405d2a8 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerRolesTest.java @@ -17,6 +17,8 @@ package org.apache.solr.cloud; * limitations under the License. */ +import com.google.protobuf.TextFormat; +import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CloudSolrServer; @@ -47,7 +49,7 @@ import static org.apache.solr.cloud.OverseerCollectionProcessor.REPLICATION_FACT import static org.apache.solr.cloud.OverseerCollectionProcessor.getSortedNodeNames; import static org.apache.solr.common.cloud.ZkNodeProps.makeMap; import static org.apache.solr.common.params.CollectionParams.CollectionAction; -@Ignore("needs to restart the OverSeer") +@LuceneTestCase.Slow public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ private CloudSolrServer client; @@ -119,12 +121,16 @@ public class OverseerRolesTest extends AbstractFullDistribZkTestBase{ } Thread.sleep(100); } - if(!leaderchanged){ + /*if(!leaderchanged){ + log.warn("expected {}, current order {}", overseerDesignate, getSortedNodeNames(client.getZkStateReader().getZkClient())+ " ldr :"+ OverseerCollectionProcessor.getLeaderNode(client.getZkStateReader().getZkClient()) ); - } - assertTrue("could not set the new overseer",leaderchanged); + }*/ + assertTrue("could not set the new overseer . expected "+ + overseerDesignate + " current order : " + + getSortedNodeNames(client.getZkStateReader().getZkClient()) + + " ldr :"+ OverseerCollectionProcessor.getLeaderNode(client.getZkStateReader().getZkClient()) ,leaderchanged); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java index 08d59b34891..9b8a6cd3df3 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java @@ -128,7 +128,7 @@ public abstract class CoreAdminParams REQUESTAPPLYUPDATES, LOAD_ON_STARTUP, TRANSIENT, - REJOINOVERSEERELECTION; + OVERSEEROP; public static CoreAdminAction get( String p ) {