mirror of https://github.com/apache/lucene.git
SOLR-5650: When a replica becomes a leader, only peer sync with other replicas that last published an ACTIVE state.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1560221 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6851eab30c
commit
1e5f5795dc
|
@ -213,6 +213,9 @@ Bug Fixes
|
|||
* SOLR-5643: ConcurrentUpdateSolrServer will sometimes not spawn a new Runner
|
||||
thread even though there are updates in the queue. (Mark Miller)
|
||||
|
||||
* SOLR-5650: When a replica becomes a leader, only peer sync with other replicas
|
||||
that last published an ACTIVE state. (Mark Miller)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -208,7 +208,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
|
||||
boolean success = false;
|
||||
try {
|
||||
success = syncStrategy.sync(zkController, core, leaderProps);
|
||||
success = syncStrategy.sync(zkController, core, leaderProps, weAreReplacement);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "Exception while trying to sync", e);
|
||||
success = false;
|
||||
|
|
|
@ -74,8 +74,11 @@ public class SyncStrategy {
|
|||
public String baseUrl;
|
||||
}
|
||||
|
||||
public boolean sync(ZkController zkController, SolrCore core,
|
||||
ZkNodeProps leaderProps) {
|
||||
public boolean sync(ZkController zkController, SolrCore core, ZkNodeProps leaderProps) {
|
||||
return sync(zkController, core, leaderProps, false);
|
||||
}
|
||||
|
||||
public boolean sync(ZkController zkController, SolrCore core, ZkNodeProps leaderProps, boolean peerSyncOnlyWithActive) {
|
||||
if (SKIP_AUTO_RECOVERY) {
|
||||
return true;
|
||||
}
|
||||
|
@ -95,7 +98,7 @@ public class SyncStrategy {
|
|||
return false;
|
||||
}
|
||||
|
||||
success = syncReplicas(zkController, core, leaderProps);
|
||||
success = syncReplicas(zkController, core, leaderProps, peerSyncOnlyWithActive);
|
||||
} finally {
|
||||
SolrRequestInfo.clearRequestInfo();
|
||||
}
|
||||
|
@ -103,7 +106,7 @@ public class SyncStrategy {
|
|||
}
|
||||
|
||||
private boolean syncReplicas(ZkController zkController, SolrCore core,
|
||||
ZkNodeProps leaderProps) {
|
||||
ZkNodeProps leaderProps, boolean peerSyncOnlyWithActive) {
|
||||
boolean success = false;
|
||||
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
|
||||
String collection = cloudDesc.getCollectionName();
|
||||
|
@ -117,7 +120,7 @@ public class SyncStrategy {
|
|||
// first sync ourselves - we are the potential leader after all
|
||||
try {
|
||||
success = syncWithReplicas(zkController, core, leaderProps, collection,
|
||||
shardId);
|
||||
shardId, peerSyncOnlyWithActive);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "Sync Failed", e);
|
||||
}
|
||||
|
@ -145,7 +148,7 @@ public class SyncStrategy {
|
|||
}
|
||||
|
||||
private boolean syncWithReplicas(ZkController zkController, SolrCore core,
|
||||
ZkNodeProps props, String collection, String shardId) {
|
||||
ZkNodeProps props, String collection, String shardId, boolean peerSyncOnlyWithActive) {
|
||||
List<ZkCoreNodeProps> nodes = zkController.getZkStateReader()
|
||||
.getReplicaProps(collection, shardId,core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName(),
|
||||
props.getStr(ZkStateReader.CORE_NAME_PROP));
|
||||
|
@ -163,7 +166,7 @@ public class SyncStrategy {
|
|||
// if we can't reach a replica for sync, we still consider the overall sync a success
|
||||
// TODO: as an assurance, we should still try and tell the sync nodes that we couldn't reach
|
||||
// to recover once more?
|
||||
PeerSync peerSync = new PeerSync(core, syncWith, core.getUpdateHandler().getUpdateLog().numRecordsToKeep, true, true);
|
||||
PeerSync peerSync = new PeerSync(core, syncWith, core.getUpdateHandler().getUpdateLog().numRecordsToKeep, true, true, peerSyncOnlyWithActive);
|
||||
return peerSync.sync();
|
||||
}
|
||||
|
||||
|
|
|
@ -827,7 +827,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
props.put(ZkStateReader.CORE_NAME_PROP, cname);
|
||||
props.put(ZkStateReader.NODE_NAME_PROP, zkController.getNodeName());
|
||||
|
||||
boolean success = syncStrategy.sync(zkController, core, new ZkNodeProps(props));
|
||||
boolean success = syncStrategy.sync(zkController, core, new ZkNodeProps(props), true);
|
||||
// solrcloud_debug
|
||||
if (log.isDebugEnabled()) {
|
||||
try {
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.solr.common.SolrInputDocument;
|
|||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.ShardParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
@ -539,6 +540,17 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
|
||||
|
||||
public void processSync(ResponseBuilder rb, int nVersions, String sync) {
|
||||
|
||||
boolean onlyIfActive = rb.req.getParams().getBool("onlyIfActive", false);
|
||||
|
||||
if (onlyIfActive) {
|
||||
if (!rb.req.getCore().getCoreDescriptor().getCloudDescriptor().getLastPublished().equals(ZkStateReader.ACTIVE)) {
|
||||
log.info("Last published state was not ACTIVE, cannot sync.");
|
||||
rb.rsp.add("sync", "false");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
List<String> replicas = StrUtils.splitSmart(sync, ",", true);
|
||||
|
||||
boolean cantReachIsSuccess = rb.req.getParams().getBool("cantReachIsSuccess", false);
|
||||
|
|
|
@ -35,7 +35,6 @@ import org.apache.http.client.HttpClient;
|
|||
import org.apache.http.conn.ConnectTimeoutException;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
@ -77,9 +76,10 @@ public class PeerSync {
|
|||
private Set<Long> requestedUpdateSet;
|
||||
private long ourLowThreshold; // 20th percentile
|
||||
private long ourHighThreshold; // 80th percentile
|
||||
private boolean cantReachIsSuccess;
|
||||
private boolean getNoVersionsIsSuccess;
|
||||
private final boolean cantReachIsSuccess;
|
||||
private final boolean getNoVersionsIsSuccess;
|
||||
private final HttpClient client;
|
||||
private final boolean onlyIfActive;
|
||||
|
||||
// comparator that sorts by absolute value, putting highest first
|
||||
private static Comparator<Long> absComparator = new Comparator<Long>() {
|
||||
|
@ -124,12 +124,17 @@ public class PeerSync {
|
|||
}
|
||||
|
||||
public PeerSync(SolrCore core, List<String> replicas, int nUpdates, boolean cantReachIsSuccess, boolean getNoVersionsIsSuccess) {
|
||||
this(core, replicas, nUpdates, cantReachIsSuccess, getNoVersionsIsSuccess, false);
|
||||
}
|
||||
|
||||
public PeerSync(SolrCore core, List<String> replicas, int nUpdates, boolean cantReachIsSuccess, boolean getNoVersionsIsSuccess, boolean onlyIfActive) {
|
||||
this.replicas = replicas;
|
||||
this.nUpdates = nUpdates;
|
||||
this.maxUpdates = nUpdates;
|
||||
this.cantReachIsSuccess = cantReachIsSuccess;
|
||||
this.getNoVersionsIsSuccess = getNoVersionsIsSuccess;
|
||||
this.client = core.getCoreDescriptor().getCoreContainer().getUpdateShardHandler().getHttpClient();
|
||||
this.onlyIfActive = onlyIfActive;
|
||||
|
||||
uhandler = core.getUpdateHandler();
|
||||
ulog = uhandler.getUpdateLog();
|
||||
|
@ -431,9 +436,10 @@ public class PeerSync {
|
|||
|
||||
sreq.purpose = 0;
|
||||
sreq.params = new ModifiableSolrParams();
|
||||
sreq.params.set("qt","/get");
|
||||
sreq.params.set("distrib",false);
|
||||
sreq.params.set("qt", "/get");
|
||||
sreq.params.set("distrib", false);
|
||||
sreq.params.set("getUpdates", StrUtils.join(toRequest, ','));
|
||||
sreq.params.set("onlyIfActive", onlyIfActive);
|
||||
sreq.responses.clear(); // needs to be zeroed for correct correlation to occur
|
||||
|
||||
shardHandler.submit(sreq, sreq.shards[0], sreq.params);
|
||||
|
|
Loading…
Reference in New Issue