mirror of https://github.com/apache/lucene.git
SOLR-12011: Consistence problem when in-sync replicas are DOWN
This commit is contained in:
parent
ad7e94afb2
commit
9de4225e9a
|
@ -78,6 +78,9 @@ Upgrade Notes
|
||||||
* LUCENE-8161: If you are using the spatial JTS library with Solr, you must upgrade to 1.15.0. This new version
|
* LUCENE-8161: If you are using the spatial JTS library with Solr, you must upgrade to 1.15.0. This new version
|
||||||
of JTS is now dual-licensed to include a BSD style license.
|
of JTS is now dual-licensed to include a BSD style license.
|
||||||
|
|
||||||
|
* SOLR-12011: Replicas which are not up-to-date are not allowed to become leader. Use FORCELEADER API to
|
||||||
|
allow these replicas become leader.
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-11285: Simulation framework for autoscaling. (ab)
|
* SOLR-11285: Simulation framework for autoscaling. (ab)
|
||||||
|
@ -236,6 +239,8 @@ Bug Fixes
|
||||||
Also changed the display label in the Admin UI from routerField to router.field to match the actual API.
|
Also changed the display label in the Admin UI from routerField to router.field to match the actual API.
|
||||||
(Shawn Heisey via Cassandra Targett)
|
(Shawn Heisey via Cassandra Targett)
|
||||||
|
|
||||||
|
* SOLR-12011: Consistence problem when in-sync replicas are DOWN. (Cao Manh Dat)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class CloudDescriptor {
|
||||||
// set to true once a core has registered in zk
|
// set to true once a core has registered in zk
|
||||||
// set to false on detecting a session expiration
|
// set to false on detecting a session expiration
|
||||||
private volatile boolean hasRegistered = false;
|
private volatile boolean hasRegistered = false;
|
||||||
volatile Replica.State lastPublished = Replica.State.ACTIVE;
|
private volatile Replica.State lastPublished = Replica.State.ACTIVE;
|
||||||
|
|
||||||
public static final String NUM_SHARDS = "numShards";
|
public static final String NUM_SHARDS = "numShards";
|
||||||
|
|
||||||
|
|
|
@ -20,9 +20,10 @@ import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@ -359,13 +360,19 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
replicaType = core.getCoreDescriptor().getCloudDescriptor().getReplicaType();
|
replicaType = core.getCoreDescriptor().getCloudDescriptor().getReplicaType();
|
||||||
|
String coreNodeName = core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName();
|
||||||
// should I be leader?
|
// should I be leader?
|
||||||
if (weAreReplacement && !shouldIBeLeader(leaderProps, core, weAreReplacement)) {
|
if (zkController.getShardTerms(collection, shardId).registered(coreNodeName)
|
||||||
|
&& !zkController.getShardTerms(collection, shardId).canBecomeLeader(coreNodeName)) {
|
||||||
|
log.info("Can't become leader, term of replica {} less than leader", coreNodeName);
|
||||||
rejoinLeaderElection(core);
|
rejoinLeaderElection(core);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isClosed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
log.info("I may be the new leader - try and sync");
|
log.info("I may be the new leader - try and sync");
|
||||||
|
|
||||||
// we are going to attempt to be the leader
|
// we are going to attempt to be the leader
|
||||||
|
@ -516,8 +523,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
||||||
zkStateReader.forceUpdateCollection(collection);
|
zkStateReader.forceUpdateCollection(collection);
|
||||||
ClusterState clusterState = zkStateReader.getClusterState();
|
ClusterState clusterState = zkStateReader.getClusterState();
|
||||||
Replica rep = getReplica(clusterState, collection, leaderProps.getStr(ZkStateReader.CORE_NODE_NAME_PROP));
|
Replica rep = getReplica(clusterState, collection, leaderProps.getStr(ZkStateReader.CORE_NODE_NAME_PROP));
|
||||||
if (rep != null && rep.getState() != Replica.State.ACTIVE
|
if (rep != null && rep.getState() != Replica.State.ACTIVE) {
|
||||||
&& rep.getState() != Replica.State.RECOVERING) {
|
|
||||||
log.debug("We have become the leader after core registration but are not in an ACTIVE state - publishing ACTIVE");
|
log.debug("We have become the leader after core registration but are not in an ACTIVE state - publishing ACTIVE");
|
||||||
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
|
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
|
||||||
}
|
}
|
||||||
|
@ -593,34 +599,33 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (replicas != null && replicas.size() > 0) {
|
if (replicas != null && replicas.size() > 0) {
|
||||||
|
// set of replicas which is running in new LIR but lirState=DOWN
|
||||||
|
Set<String> replicasMustBeInLowerTerm = new HashSet<>();
|
||||||
for (String replicaCoreNodeName : replicas) {
|
for (String replicaCoreNodeName : replicas) {
|
||||||
|
|
||||||
if (coreNodeName.equals(replicaCoreNodeName))
|
if (coreNodeName.equals(replicaCoreNodeName))
|
||||||
continue; // added safe-guard so we don't mark this core as down
|
continue; // added safe-guard so we don't mark this core as down
|
||||||
|
|
||||||
if (zkController.getShardTerms(collection, shardId).registered(replicaCoreNodeName)) {
|
|
||||||
// the replica registered its term so it is running with the new LIR implementation
|
|
||||||
// we can put this replica into recovery by increase our terms
|
|
||||||
zkController.getShardTerms(collection, shardId).ensureTermsIsHigher(coreNodeName, Collections.singleton(replicaCoreNodeName));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
final Replica.State lirState = zkController.getLeaderInitiatedRecoveryState(coll, shardId, replicaCoreNodeName);
|
final Replica.State lirState = zkController.getLeaderInitiatedRecoveryState(coll, shardId, replicaCoreNodeName);
|
||||||
if (lirState == Replica.State.DOWN || lirState == Replica.State.RECOVERY_FAILED) {
|
if (lirState == Replica.State.DOWN || lirState == Replica.State.RECOVERY_FAILED) {
|
||||||
log.info("After core={} coreNodeName={} was elected leader, a replica coreNodeName={} was found in state: "
|
log.info("After core={} coreNodeName={} was elected leader, a replica coreNodeName={} was found in state: "
|
||||||
+ lirState.toString() + " and needing recovery.", coreName, coreNodeName, replicaCoreNodeName);
|
+ lirState.toString() + " and needing recovery.", coreName, coreNodeName, replicaCoreNodeName);
|
||||||
List<ZkCoreNodeProps> replicaProps =
|
List<Replica> replicasProps =
|
||||||
zkController.getZkStateReader().getReplicaProps(collection, shardId, coreNodeName);
|
zkController.getZkStateReader().getClusterState().getCollection(collection)
|
||||||
|
.getSlice(shardId).getReplicas(EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
|
||||||
|
|
||||||
if (replicaProps != null && replicaProps.size() > 0) {
|
if (replicasProps != null && replicasProps.size() > 0) {
|
||||||
ZkCoreNodeProps coreNodeProps = null;
|
ZkCoreNodeProps coreNodeProps = null;
|
||||||
for (ZkCoreNodeProps p : replicaProps) {
|
for (Replica p : replicasProps) {
|
||||||
if (((Replica)p.getNodeProps()).getName().equals(replicaCoreNodeName)) {
|
if (p.getName().equals(replicaCoreNodeName)) {
|
||||||
coreNodeProps = p;
|
coreNodeProps = new ZkCoreNodeProps(p);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (zkController.getShardTerms(collection, shardId).registered(replicaCoreNodeName)) {
|
||||||
|
replicasMustBeInLowerTerm.add(replicaCoreNodeName);
|
||||||
|
} else {
|
||||||
zkController.ensureReplicaInLeaderInitiatedRecovery(cc,
|
zkController.ensureReplicaInLeaderInitiatedRecovery(cc,
|
||||||
collection, shardId, coreNodeProps, core.getCoreDescriptor(),
|
collection, shardId, coreNodeProps, core.getCoreDescriptor(),
|
||||||
false /* forcePublishState */);
|
false /* forcePublishState */);
|
||||||
|
@ -628,6 +633,10 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// these replicas registered their terms so it is running with the new LIR implementation
|
||||||
|
// we can put this replica into recovery by increase our terms
|
||||||
|
zkController.getShardTerms(collection, shardId).ensureTermsIsHigher(coreNodeName, replicasMustBeInLowerTerm);
|
||||||
|
}
|
||||||
} // core gets closed automagically
|
} // core gets closed automagically
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -741,39 +750,6 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
||||||
leaderElector.joinElection(this, true);
|
leaderElector.joinElection(this, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean shouldIBeLeader(ZkNodeProps leaderProps, SolrCore core, boolean weAreReplacement) {
|
|
||||||
log.debug("Checking if I should try and be the leader.");
|
|
||||||
|
|
||||||
if (isClosed) {
|
|
||||||
log.debug("Bailing on leader process because we have been closed");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!weAreReplacement) {
|
|
||||||
// we are the first node starting in the shard - there is a configurable wait
|
|
||||||
// to make sure others participate in sync and leader election, we can be leader
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
String coreNodeName = core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName();
|
|
||||||
if (zkController.getShardTerms(collection, shardId).registered(coreNodeName)
|
|
||||||
&& !zkController.getShardTerms(collection, shardId).canBecomeLeader(coreNodeName)) {
|
|
||||||
log.info("Can't become leader, term of replica {} less than leader", coreNodeName);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (core.getCoreDescriptor().getCloudDescriptor().getLastPublished() == Replica.State.ACTIVE) {
|
|
||||||
log.debug("My last published State was Active, it's okay to be the leader.");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
log.debug("My last published State was "
|
|
||||||
+ core.getCoreDescriptor().getCloudDescriptor().getLastPublished()
|
|
||||||
+ ", I won't be the leader.");
|
|
||||||
// TODO: and if no one is a good candidate?
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final class OverseerElectionContext extends ElectionContext {
|
final class OverseerElectionContext extends ElectionContext {
|
||||||
|
|
|
@ -48,7 +48,7 @@ public class RecoveringCoreTermWatcher implements ZkShardTerms.CoreTermWatcher {
|
||||||
if (solrCore.getCoreDescriptor() == null || solrCore.getCoreDescriptor().getCloudDescriptor() == null) return true;
|
if (solrCore.getCoreDescriptor() == null || solrCore.getCoreDescriptor().getCloudDescriptor() == null) return true;
|
||||||
|
|
||||||
String coreNodeName = solrCore.getCoreDescriptor().getCloudDescriptor().getCoreNodeName();
|
String coreNodeName = solrCore.getCoreDescriptor().getCloudDescriptor().getCoreNodeName();
|
||||||
if (terms.canBecomeLeader(coreNodeName)) return true;
|
if (terms.haveHighestTermValue(coreNodeName)) return true;
|
||||||
if (lastTermDoRecovery.get() < terms.getTerm(coreNodeName)) {
|
if (lastTermDoRecovery.get() < terms.getTerm(coreNodeName)) {
|
||||||
log.info("Start recovery on {} because core's term is less than leader's term", coreNodeName);
|
log.info("Start recovery on {} because core's term is less than leader's term", coreNodeName);
|
||||||
lastTermDoRecovery.set(terms.getTerm(coreNodeName));
|
lastTermDoRecovery.set(terms.getTerm(coreNodeName));
|
||||||
|
|
|
@ -1045,7 +1045,7 @@ public class ZkController {
|
||||||
|
|
||||||
// This flag is used for testing rolling updates and should be removed in SOLR-11812
|
// This flag is used for testing rolling updates and should be removed in SOLR-11812
|
||||||
boolean isRunningInNewLIR = "new".equals(desc.getCoreProperty("lirVersion", "new"));
|
boolean isRunningInNewLIR = "new".equals(desc.getCoreProperty("lirVersion", "new"));
|
||||||
if (isRunningInNewLIR) {
|
if (isRunningInNewLIR && cloudDesc.getReplicaType() != Type.PULL) {
|
||||||
shardTerms.registerTerm(coreZkNodeName);
|
shardTerms.registerTerm(coreZkNodeName);
|
||||||
}
|
}
|
||||||
String shardId = cloudDesc.getShardId();
|
String shardId = cloudDesc.getShardId();
|
||||||
|
@ -1455,13 +1455,20 @@ public class ZkController {
|
||||||
|
|
||||||
// This flag is used for testing rolling updates and should be removed in SOLR-11812
|
// This flag is used for testing rolling updates and should be removed in SOLR-11812
|
||||||
boolean isRunningInNewLIR = "new".equals(cd.getCoreProperty("lirVersion", "new"));
|
boolean isRunningInNewLIR = "new".equals(cd.getCoreProperty("lirVersion", "new"));
|
||||||
if (state == Replica.State.RECOVERING && isRunningInNewLIR) {
|
// pull replicas are excluded because their terms are not considered
|
||||||
getShardTerms(collection, shardId).setEqualsToMax(coreNodeName);
|
if (state == Replica.State.RECOVERING && isRunningInNewLIR && cd.getCloudDescriptor().getReplicaType() != Type.PULL) {
|
||||||
|
// state is used by client, state of replica can change from RECOVERING to DOWN without needed to finish recovery
|
||||||
|
// by calling this we will know that a replica actually finished recovery or not
|
||||||
|
getShardTerms(collection, shardId).startRecovering(coreNodeName);
|
||||||
}
|
}
|
||||||
|
if (state == Replica.State.ACTIVE && isRunningInNewLIR && cd.getCloudDescriptor().getReplicaType() != Type.PULL) {
|
||||||
|
getShardTerms(collection, shardId).doneRecovering(coreNodeName);
|
||||||
|
}
|
||||||
|
|
||||||
ZkNodeProps m = new ZkNodeProps(props);
|
ZkNodeProps m = new ZkNodeProps(props);
|
||||||
|
|
||||||
if (updateLastState) {
|
if (updateLastState) {
|
||||||
cd.getCloudDescriptor().lastPublished = state;
|
cd.getCloudDescriptor().setLastPublished(state);
|
||||||
}
|
}
|
||||||
overseerJobQueue.offer(Utils.toJSON(m));
|
overseerJobQueue.offer(Utils.toJSON(m));
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
|
@ -100,6 +99,8 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
* @param replicasNeedingRecovery set of replicas in which their terms should be lower than leader's term
|
* @param replicasNeedingRecovery set of replicas in which their terms should be lower than leader's term
|
||||||
*/
|
*/
|
||||||
public void ensureTermsIsHigher(String leader, Set<String> replicasNeedingRecovery) {
|
public void ensureTermsIsHigher(String leader, Set<String> replicasNeedingRecovery) {
|
||||||
|
if (replicasNeedingRecovery.isEmpty()) return;
|
||||||
|
|
||||||
Terms newTerms;
|
Terms newTerms;
|
||||||
while( (newTerms = terms.increaseTerms(leader, replicasNeedingRecovery)) != null) {
|
while( (newTerms = terms.increaseTerms(leader, replicasNeedingRecovery)) != null) {
|
||||||
if (forceSaveTerms(newTerms)) return;
|
if (forceSaveTerms(newTerms)) return;
|
||||||
|
@ -107,7 +108,7 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Can this replica become leader or is this replica's term equals to leader's term?
|
* Can this replica become leader?
|
||||||
* @param coreNodeName of the replica
|
* @param coreNodeName of the replica
|
||||||
* @return true if this replica can become leader, false if otherwise
|
* @return true if this replica can become leader, false if otherwise
|
||||||
*/
|
*/
|
||||||
|
@ -115,6 +116,15 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
return terms.canBecomeLeader(coreNodeName);
|
return terms.canBecomeLeader(coreNodeName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should leader skip sending updates to this replica?
|
||||||
|
* @param coreNodeName of the replica
|
||||||
|
* @return true if this replica has term equals to leader's term, false if otherwise
|
||||||
|
*/
|
||||||
|
public boolean skipSendingUpdatesTo(String coreNodeName) {
|
||||||
|
return !terms.haveHighestTermValue(coreNodeName);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Did this replica registered its term? This is a sign to check f
|
* Did this replica registered its term? This is a sign to check f
|
||||||
* @param coreNodeName of the replica
|
* @param coreNodeName of the replica
|
||||||
|
@ -184,16 +194,59 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set a replica's term equals to leader's term
|
* Set a replica's term equals to leader's term.
|
||||||
|
* This call should only be used by {@link org.apache.solr.common.params.CollectionParams.CollectionAction#FORCELEADER}
|
||||||
* @param coreNodeName of the replica
|
* @param coreNodeName of the replica
|
||||||
*/
|
*/
|
||||||
public void setEqualsToMax(String coreNodeName) {
|
public void setTermEqualsToLeader(String coreNodeName) {
|
||||||
Terms newTerms;
|
Terms newTerms;
|
||||||
while ( (newTerms = terms.setEqualsToMax(coreNodeName)) != null) {
|
while ( (newTerms = terms.setTermEqualsToLeader(coreNodeName)) != null) {
|
||||||
if (forceSaveTerms(newTerms)) break;
|
if (forceSaveTerms(newTerms)) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setTermToZero(String coreNodeName) {
|
||||||
|
Terms newTerms;
|
||||||
|
while ( (newTerms = terms.setTermToZero(coreNodeName)) != null) {
|
||||||
|
if (forceSaveTerms(newTerms)) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark {@code coreNodeName} as recovering
|
||||||
|
*/
|
||||||
|
public void startRecovering(String coreNodeName) {
|
||||||
|
Terms newTerms;
|
||||||
|
while ( (newTerms = terms.startRecovering(coreNodeName)) != null) {
|
||||||
|
if (forceSaveTerms(newTerms)) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark {@code coreNodeName} as finished recovering
|
||||||
|
*/
|
||||||
|
public void doneRecovering(String coreNodeName) {
|
||||||
|
Terms newTerms;
|
||||||
|
while ( (newTerms = terms.doneRecovering(coreNodeName)) != null) {
|
||||||
|
if (forceSaveTerms(newTerms)) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When first updates come in, all replicas have some data now,
|
||||||
|
* so we must switch from term 0 (registered) to 1 (have some data)
|
||||||
|
*/
|
||||||
|
public void ensureHighestTermsAreNotZero() {
|
||||||
|
Terms newTerms;
|
||||||
|
while ( (newTerms = terms.ensureHighestTermsAreNotZero()) != null) {
|
||||||
|
if (forceSaveTerms(newTerms)) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getHighestTerm() {
|
||||||
|
return terms.getMaxTerm();
|
||||||
|
}
|
||||||
|
|
||||||
public long getTerm(String coreNodeName) {
|
public long getTerm(String coreNodeName) {
|
||||||
Long term = terms.getTerm(coreNodeName);
|
Long term = terms.getTerm(coreNodeName);
|
||||||
return term == null? -1 : term;
|
return term == null? -1 : term;
|
||||||
|
@ -232,6 +285,7 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
try {
|
try {
|
||||||
Stat stat = zkClient.setData(znodePath, znodeData, newTerms.version, true);
|
Stat stat = zkClient.setData(znodePath, znodeData, newTerms.version, true);
|
||||||
setNewTerms(new Terms(newTerms.values, stat.getVersion()));
|
setNewTerms(new Terms(newTerms.values, stat.getVersion()));
|
||||||
|
log.info("Successful update terms at {} to {}", znodePath, newTerms);
|
||||||
return true;
|
return true;
|
||||||
} catch (KeeperException.BadVersionException e) {
|
} catch (KeeperException.BadVersionException e) {
|
||||||
log.info("Failed to save terms, version is not match, retrying");
|
log.info("Failed to save terms, version is not match, retrying");
|
||||||
|
@ -367,6 +421,7 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
*/
|
*/
|
||||||
static class Terms {
|
static class Terms {
|
||||||
private final Map<String, Long> values;
|
private final Map<String, Long> values;
|
||||||
|
private final long maxTerm;
|
||||||
// ZK node version
|
// ZK node version
|
||||||
private final int version;
|
private final int version;
|
||||||
|
|
||||||
|
@ -377,14 +432,25 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
public Terms(Map<String, Long> values, int version) {
|
public Terms(Map<String, Long> values, int version) {
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.version = version;
|
this.version = version;
|
||||||
|
if (values.isEmpty()) this.maxTerm = 0;
|
||||||
|
else this.maxTerm = Collections.max(values.values());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Can this replica become leader or is this replica's term equals to leader's term?
|
* Can {@code coreNodeName} become leader?
|
||||||
* @param coreNodeName of the replica
|
* @param coreNodeName of the replica
|
||||||
* @return true if this replica can become leader, false if otherwise
|
* @return true if {@code coreNodeName} can become leader, false if otherwise
|
||||||
*/
|
*/
|
||||||
boolean canBecomeLeader(String coreNodeName) {
|
boolean canBecomeLeader(String coreNodeName) {
|
||||||
|
return haveHighestTermValue(coreNodeName) && !values.containsKey(coreNodeName + "_recovering");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is {@code coreNodeName}'s term highest?
|
||||||
|
* @param coreNodeName of the replica
|
||||||
|
* @return true if term of {@code coreNodeName} is highest
|
||||||
|
*/
|
||||||
|
boolean haveHighestTermValue(String coreNodeName) {
|
||||||
if (values.isEmpty()) return true;
|
if (values.isEmpty()) return true;
|
||||||
long maxTerm = Collections.max(values.values());
|
long maxTerm = Collections.max(values.values());
|
||||||
return values.getOrDefault(coreNodeName, 0L) == maxTerm;
|
return values.getOrDefault(coreNodeName, 0L) == maxTerm;
|
||||||
|
@ -427,6 +493,21 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
return new Terms(newValues, version);
|
return new Terms(newValues, version);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a new {@link Terms} in which highest terms are not zero
|
||||||
|
* @return null if highest terms are already larger than zero
|
||||||
|
*/
|
||||||
|
Terms ensureHighestTermsAreNotZero() {
|
||||||
|
if (maxTerm > 0) return null;
|
||||||
|
else {
|
||||||
|
HashMap<String, Long> newValues = new HashMap<>(values);
|
||||||
|
for (String replica : values.keySet()) {
|
||||||
|
newValues.put(replica, 1L);
|
||||||
|
}
|
||||||
|
return new Terms(newValues, version);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a new {@link Terms} in which term of {@code coreNodeName} is removed
|
* Return a new {@link Terms} in which term of {@code coreNodeName} is removed
|
||||||
* @param coreNodeName of the replica
|
* @param coreNodeName of the replica
|
||||||
|
@ -453,23 +534,70 @@ public class ZkShardTerms implements AutoCloseable{
|
||||||
return new Terms(newValues, version);
|
return new Terms(newValues, version);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Terms setTermToZero(String coreNodeName) {
|
||||||
|
if (values.getOrDefault(coreNodeName, -1L) == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
HashMap<String, Long> newValues = new HashMap<>(values);
|
||||||
|
newValues.put(coreNodeName, 0L);
|
||||||
|
return new Terms(newValues, version);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a new {@link Terms} in which the term of {@code coreNodeName} is max
|
* Return a new {@link Terms} in which the term of {@code coreNodeName} is max
|
||||||
* @param coreNodeName of the replica
|
* @param coreNodeName of the replica
|
||||||
* @return null if term of {@code coreNodeName} is already maximum
|
* @return null if term of {@code coreNodeName} is already maximum
|
||||||
*/
|
*/
|
||||||
Terms setEqualsToMax(String coreNodeName) {
|
Terms setTermEqualsToLeader(String coreNodeName) {
|
||||||
long maxTerm;
|
long maxTerm = getMaxTerm();
|
||||||
try {
|
|
||||||
maxTerm = Collections.max(values.values());
|
|
||||||
} catch (NoSuchElementException e){
|
|
||||||
maxTerm = 0;
|
|
||||||
}
|
|
||||||
if (values.get(coreNodeName) == maxTerm) return null;
|
if (values.get(coreNodeName) == maxTerm) return null;
|
||||||
|
|
||||||
HashMap<String, Long> newValues = new HashMap<>(values);
|
HashMap<String, Long> newValues = new HashMap<>(values);
|
||||||
newValues.put(coreNodeName, maxTerm);
|
newValues.put(coreNodeName, maxTerm);
|
||||||
return new Terms(newValues, version);
|
return new Terms(newValues, version);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long getMaxTerm() {
|
||||||
|
return maxTerm;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark {@code coreNodeName} as recovering
|
||||||
|
* @param coreNodeName of the replica
|
||||||
|
* @return null if {@code coreNodeName} is already marked as doing recovering
|
||||||
|
*/
|
||||||
|
Terms startRecovering(String coreNodeName) {
|
||||||
|
long maxTerm = getMaxTerm();
|
||||||
|
if (values.get(coreNodeName) == maxTerm && values.getOrDefault(coreNodeName+"_recovering", -1L) == maxTerm)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
HashMap<String, Long> newValues = new HashMap<>(values);
|
||||||
|
newValues.put(coreNodeName, maxTerm);
|
||||||
|
newValues.put(coreNodeName+"_recovering", maxTerm);
|
||||||
|
return new Terms(newValues, version);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark {@code coreNodeName} as finished recovering
|
||||||
|
* @param coreNodeName of the replica
|
||||||
|
* @return null if term of {@code coreNodeName} is already finished doing recovering
|
||||||
|
*/
|
||||||
|
Terms doneRecovering(String coreNodeName) {
|
||||||
|
if (!values.containsKey(coreNodeName+"_recovering")) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
HashMap<String, Long> newValues = new HashMap<>(values);
|
||||||
|
newValues.remove(coreNodeName+"_recovering");
|
||||||
|
return new Terms(newValues, version);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "Terms{" +
|
||||||
|
"values=" + values +
|
||||||
|
", version=" + version +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1132,6 +1132,8 @@ public class CoreContainer {
|
||||||
if (leader != null && leader.getState() == State.ACTIVE) {
|
if (leader != null && leader.getState() == State.ACTIVE) {
|
||||||
log.info("Found active leader, will attempt to create fresh core and recover.");
|
log.info("Found active leader, will attempt to create fresh core and recover.");
|
||||||
resetIndexDirectory(dcore, coreConfig);
|
resetIndexDirectory(dcore, coreConfig);
|
||||||
|
// the index of this core is emptied, its term should be set to 0
|
||||||
|
getZkController().getShardTerms(desc.getCollectionName(), desc.getShardId()).setTermToZero(desc.getCoreNodeName());
|
||||||
return new SolrCore(this, dcore, coreConfig);
|
return new SolrCore(this, dcore, coreConfig);
|
||||||
}
|
}
|
||||||
} catch (SolrException se) {
|
} catch (SolrException se) {
|
||||||
|
|
|
@ -1149,7 +1149,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
||||||
if (optionalMaxTerm.isPresent()) {
|
if (optionalMaxTerm.isPresent()) {
|
||||||
liveReplicas.stream()
|
liveReplicas.stream()
|
||||||
.filter(rep -> zkShardTerms.getTerm(rep.getName()) == optionalMaxTerm.getAsLong())
|
.filter(rep -> zkShardTerms.getTerm(rep.getName()) == optionalMaxTerm.getAsLong())
|
||||||
.forEach(rep -> zkShardTerms.setEqualsToMax(rep.getName()));
|
.forEach(rep -> zkShardTerms.setTermEqualsToLeader(rep.getName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.solr.cloud.ZkController;
|
import org.apache.solr.cloud.ZkController;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.solr.common.cloud.Replica;
|
|
||||||
import org.apache.solr.common.params.CoreAdminParams;
|
import org.apache.solr.common.params.CoreAdminParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
@ -234,16 +233,6 @@ enum CoreAdminOperation implements CoreAdminOp {
|
||||||
if (cname == null) {
|
if (cname == null) {
|
||||||
throw new IllegalArgumentException(CoreAdminParams.CORE + " is required");
|
throw new IllegalArgumentException(CoreAdminParams.CORE + " is required");
|
||||||
}
|
}
|
||||||
try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
|
|
||||||
|
|
||||||
// Setting the last published state for this core to be ACTIVE
|
|
||||||
if (core != null) {
|
|
||||||
core.getCoreDescriptor().getCloudDescriptor().setLastPublished(Replica.State.ACTIVE);
|
|
||||||
log().info("Setting the last published state for this core, {}, to {}", core.getName(), Replica.State.ACTIVE);
|
|
||||||
} else {
|
|
||||||
SolrException.log(log(), "Could not find core: " + cname);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}),
|
}),
|
||||||
|
|
||||||
BACKUPCORE_OP(BACKUPCORE, new BackupCoreOp()),
|
BACKUPCORE_OP(BACKUPCORE, new BackupCoreOp()),
|
||||||
|
|
|
@ -127,7 +127,10 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
|
||||||
|
|
||||||
ZkShardTerms shardTerms = coreContainer.getZkController().getShardTerms(collectionName, slice.getName());
|
ZkShardTerms shardTerms = coreContainer.getZkController().getShardTerms(collectionName, slice.getName());
|
||||||
// if the replica is waiting for leader to see recovery state, the leader should refresh its terms
|
// if the replica is waiting for leader to see recovery state, the leader should refresh its terms
|
||||||
if (waitForState == Replica.State.RECOVERING && shardTerms.registered(coreNodeName) && !shardTerms.canBecomeLeader(coreNodeName)) {
|
if (waitForState == Replica.State.RECOVERING && shardTerms.registered(coreNodeName) && shardTerms.skipSendingUpdatesTo(coreNodeName)) {
|
||||||
|
// The replica changed it term, then published itself as RECOVERING.
|
||||||
|
// This core already see replica as RECOVERING
|
||||||
|
// so it is guarantees that a live-fetch will be enough for this core to see max term published
|
||||||
shardTerms.refreshTerms();
|
shardTerms.refreshTerms();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,10 @@ package org.apache.solr.handler.admin;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.solr.cloud.CloudDescriptor;
|
||||||
import org.apache.solr.cloud.ZkController;
|
import org.apache.solr.cloud.ZkController;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.cloud.Slice;
|
||||||
import org.apache.solr.common.params.CoreAdminParams;
|
import org.apache.solr.common.params.CoreAdminParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
@ -61,11 +63,22 @@ class RestoreCoreOp implements CoreAdminHandler.CoreAdminOp {
|
||||||
|
|
||||||
URI locationUri = repository.createURI(location);
|
URI locationUri = repository.createURI(location);
|
||||||
try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
|
try (SolrCore core = it.handler.coreContainer.getCore(cname)) {
|
||||||
|
CloudDescriptor cd = core.getCoreDescriptor().getCloudDescriptor();
|
||||||
|
// this core must be the only replica in its shard otherwise
|
||||||
|
// we cannot guarantee consistency between replicas because when we add data (or restore index) to this replica
|
||||||
|
Slice slice = zkController.getClusterState().getCollection(cd.getCollectionName()).getSlice(cd.getShardId());
|
||||||
|
if (slice.getReplicas().size() != 1) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||||
|
"Failed to restore core=" + core.getName() + ", the core must be the only replica in its shard");
|
||||||
|
}
|
||||||
RestoreCore restoreCore = new RestoreCore(repository, core, locationUri, name);
|
RestoreCore restoreCore = new RestoreCore(repository, core, locationUri, name);
|
||||||
boolean success = restoreCore.doRestore();
|
boolean success = restoreCore.doRestore();
|
||||||
if (!success) {
|
if (!success) {
|
||||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Failed to restore core=" + core.getName());
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Failed to restore core=" + core.getName());
|
||||||
}
|
}
|
||||||
|
// other replicas to-be-created will know that they are out of date by
|
||||||
|
// looking at their term : 0 compare to term of this core : 1
|
||||||
|
zkController.getShardTerms(cd.getCollectionName(), cd.getShardId()).ensureHighestTermsAreNotZero();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,8 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.solr.cloud.CloudDescriptor;
|
||||||
|
import org.apache.solr.cloud.ZkShardTerms;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.cloud.ClusterState;
|
import org.apache.solr.common.cloud.ClusterState;
|
||||||
import org.apache.solr.common.cloud.DocCollection;
|
import org.apache.solr.common.cloud.DocCollection;
|
||||||
|
@ -111,6 +113,16 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
|
||||||
SolrCore newcore = it.handler.coreContainer.getCore(newCoreName);
|
SolrCore newcore = it.handler.coreContainer.getCore(newCoreName);
|
||||||
if (newcore != null) {
|
if (newcore != null) {
|
||||||
newCores.add(newcore);
|
newCores.add(newcore);
|
||||||
|
if (it.handler.coreContainer.isZooKeeperAware()) {
|
||||||
|
// this core must be the only replica in its shard otherwise
|
||||||
|
// we cannot guarantee consistency between replicas because when we add data to this replica
|
||||||
|
CloudDescriptor cd = newcore.getCoreDescriptor().getCloudDescriptor();
|
||||||
|
ClusterState clusterState = it.handler.coreContainer.getZkController().getClusterState();
|
||||||
|
if (clusterState.getCollection(cd.getCollectionName()).getSlice(cd.getShardId()).getReplicas().size() != 1) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||||
|
"Core with core name " + newCoreName + " must be the only replica in shard " + cd.getShardId());
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Core with core name " + newCoreName + " expected but doesn't exist.");
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Core with core name " + newCoreName + " expected but doesn't exist.");
|
||||||
}
|
}
|
||||||
|
@ -123,6 +135,15 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
|
||||||
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router, routeFieldName, splitKey);
|
SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router, routeFieldName, splitKey);
|
||||||
core.getUpdateHandler().split(cmd);
|
core.getUpdateHandler().split(cmd);
|
||||||
|
|
||||||
|
if (it.handler.coreContainer.isZooKeeperAware()) {
|
||||||
|
for (SolrCore newcore : newCores) {
|
||||||
|
// the index of the core changed from empty to have some data, its term must be not zero
|
||||||
|
CloudDescriptor cd = newcore.getCoreDescriptor().getCloudDescriptor();
|
||||||
|
ZkShardTerms zkShardTerms = it.handler.coreContainer.getZkController().getShardTerms(cd.getCollectionName(), cd.getShardId());
|
||||||
|
zkShardTerms.ensureHighestTermsAreNotZero();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// After the split has completed, someone (here?) should start the process of replaying the buffered updates.
|
// After the split has completed, someone (here?) should start the process of replaying the buffered updates.
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -173,6 +172,8 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
private boolean forwardToLeader = false;
|
private boolean forwardToLeader = false;
|
||||||
private boolean isSubShardLeader = false;
|
private boolean isSubShardLeader = false;
|
||||||
private List<Node> nodes;
|
private List<Node> nodes;
|
||||||
|
private Set<String> skippedCoreNodeNames;
|
||||||
|
private boolean isIndexChanged = false;
|
||||||
|
|
||||||
private UpdateCommand updateCommand; // the current command this processor is working on.
|
private UpdateCommand updateCommand; // the current command this processor is working on.
|
||||||
|
|
||||||
|
@ -334,9 +335,13 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
// that means I want to forward onto my replicas...
|
// that means I want to forward onto my replicas...
|
||||||
// so get the replicas...
|
// so get the replicas...
|
||||||
forwardToLeader = false;
|
forwardToLeader = false;
|
||||||
List<ZkCoreNodeProps> replicaProps = zkController.getZkStateReader()
|
ClusterState clusterState = zkController.getZkStateReader().getClusterState();
|
||||||
.getReplicaProps(collection, shardId, leaderReplica.getName(), null, Replica.State.DOWN);
|
String leaderCoreNodeName = leaderReplica.getName();
|
||||||
if (replicaProps == null) {
|
List<Replica> replicas = clusterState.getCollection(collection)
|
||||||
|
.getSlice(shardId)
|
||||||
|
.getReplicas(EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
|
||||||
|
replicas.removeIf((replica) -> replica.getName().equals(leaderCoreNodeName));
|
||||||
|
if (replicas.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -349,16 +354,20 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
log.info("test.distrib.skip.servers was found and contains:" + skipListSet);
|
log.info("test.distrib.skip.servers was found and contains:" + skipListSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Node> nodes = new ArrayList<>(replicaProps.size());
|
List<Node> nodes = new ArrayList<>(replicas.size());
|
||||||
|
skippedCoreNodeNames = new HashSet<>();
|
||||||
ZkShardTerms zkShardTerms = zkController.getShardTerms(collection, shardId);
|
ZkShardTerms zkShardTerms = zkController.getShardTerms(collection, shardId);
|
||||||
for (ZkCoreNodeProps props : replicaProps) {
|
for (Replica replica: replicas) {
|
||||||
String coreNodeName = ((Replica) props.getNodeProps()).getName();
|
String coreNodeName = replica.getName();
|
||||||
if (skipList != null && skipListSet.contains(props.getCoreUrl())) {
|
if (skipList != null && skipListSet.contains(replica.getCoreUrl())) {
|
||||||
log.info("check url:" + props.getCoreUrl() + " against:" + skipListSet + " result:true");
|
log.info("check url:" + replica.getCoreUrl() + " against:" + skipListSet + " result:true");
|
||||||
} else if(!isOldLIRMode && zkShardTerms.registered(coreNodeName) && !zkShardTerms.canBecomeLeader(coreNodeName)) {
|
} else if(!isOldLIRMode && zkShardTerms.registered(coreNodeName) && zkShardTerms.skipSendingUpdatesTo(coreNodeName)) {
|
||||||
log.info("skip url:{} cause its term is less than leader", props.getCoreUrl());
|
log.debug("skip url:{} cause its term is less than leader", replica.getCoreUrl());
|
||||||
|
skippedCoreNodeNames.add(replica.getName());
|
||||||
|
} else if (!clusterState.getLiveNodes().contains(replica.getNodeName()) || replica.getState() == Replica.State.DOWN) {
|
||||||
|
skippedCoreNodeNames.add(replica.getName());
|
||||||
} else {
|
} else {
|
||||||
nodes.add(new StdNode(props, collection, shardId));
|
nodes.add(new StdNode(new ZkCoreNodeProps(replica), collection, shardId));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nodes;
|
return nodes;
|
||||||
|
@ -750,6 +759,14 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
|
|
||||||
// TODO: optionally fail if n replicas are not reached...
|
// TODO: optionally fail if n replicas are not reached...
|
||||||
private void doFinish() {
|
private void doFinish() {
|
||||||
|
boolean shouldUpdateTerms = isLeader && !isOldLIRMode && isIndexChanged;
|
||||||
|
if (shouldUpdateTerms) {
|
||||||
|
ZkShardTerms zkShardTerms = zkController.getShardTerms(cloudDesc.getCollectionName(), cloudDesc.getShardId());
|
||||||
|
if (skippedCoreNodeNames != null) {
|
||||||
|
zkShardTerms.ensureTermsIsHigher(cloudDesc.getCoreNodeName(), skippedCoreNodeNames);
|
||||||
|
}
|
||||||
|
zkController.getShardTerms(collection, cloudDesc.getShardId()).ensureHighestTermsAreNotZero();
|
||||||
|
}
|
||||||
// TODO: if not a forward and replication req is not specified, we could
|
// TODO: if not a forward and replication req is not specified, we could
|
||||||
// send in a background thread
|
// send in a background thread
|
||||||
|
|
||||||
|
@ -758,7 +775,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
// TODO - we may need to tell about more than one error...
|
// TODO - we may need to tell about more than one error...
|
||||||
|
|
||||||
List<Error> errorsForClient = new ArrayList<>(errors.size());
|
List<Error> errorsForClient = new ArrayList<>(errors.size());
|
||||||
Map<ShardInfo, Set<String>> failedReplicas = new HashMap<>();
|
Set<String> replicasShouldBeInLowerTerms = new HashSet<>();
|
||||||
for (final SolrCmdDistributor.Error error : errors) {
|
for (final SolrCmdDistributor.Error error : errors) {
|
||||||
|
|
||||||
if (error.req.node instanceof RetryNode) {
|
if (error.req.node instanceof RetryNode) {
|
||||||
|
@ -856,9 +873,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
Throwable rootCause = SolrException.getRootCause(error.e);
|
Throwable rootCause = SolrException.getRootCause(error.e);
|
||||||
if (!isOldLIRMode && zkController.getShardTerms(collection, shardId).registered(coreNodeName)) {
|
if (!isOldLIRMode && zkController.getShardTerms(collection, shardId).registered(coreNodeName)) {
|
||||||
log.error("Setting up to try to start recovery on replica {} with url {} by increasing leader term", coreNodeName, replicaUrl, rootCause);
|
log.error("Setting up to try to start recovery on replica {} with url {} by increasing leader term", coreNodeName, replicaUrl, rootCause);
|
||||||
ShardInfo shardInfo = new ShardInfo(collection, shardId, leaderCoreNodeName);
|
replicasShouldBeInLowerTerms.add(coreNodeName);
|
||||||
failedReplicas.putIfAbsent(shardInfo, new HashSet<>());
|
|
||||||
failedReplicas.get(shardInfo).add(coreNodeName);
|
|
||||||
} else {
|
} else {
|
||||||
// The replica did not registered its term, so it must run with old LIR implementation
|
// The replica did not registered its term, so it must run with old LIR implementation
|
||||||
log.error("Setting up to try to start recovery on replica {}", replicaUrl, rootCause);
|
log.error("Setting up to try to start recovery on replica {}", replicaUrl, rootCause);
|
||||||
|
@ -891,11 +906,9 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!isOldLIRMode) {
|
if (!isOldLIRMode && !replicasShouldBeInLowerTerms.isEmpty()) {
|
||||||
for (Map.Entry<ShardInfo, Set<String>> entry : failedReplicas.entrySet()) {
|
zkController.getShardTerms(cloudDesc.getCollectionName(), cloudDesc.getShardId())
|
||||||
ShardInfo shardInfo = entry.getKey();
|
.ensureTermsIsHigher(cloudDesc.getCoreNodeName(), replicasShouldBeInLowerTerms);
|
||||||
zkController.getShardTerms(shardInfo.collection, shardInfo.shard).ensureTermsIsHigher(shardInfo.leader, entry.getValue());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// in either case, we need to attach the achieved and min rf to the response.
|
// in either case, we need to attach the achieved and min rf to the response.
|
||||||
if (leaderReplicationTracker != null || rollupReplicationTracker != null) {
|
if (leaderReplicationTracker != null || rollupReplicationTracker != null) {
|
||||||
|
@ -929,47 +942,16 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class ShardInfo {
|
|
||||||
private String collection;
|
|
||||||
private String shard;
|
|
||||||
private String leader;
|
|
||||||
|
|
||||||
public ShardInfo(String collection, String shard, String leader) {
|
|
||||||
this.collection = collection;
|
|
||||||
this.shard = shard;
|
|
||||||
this.leader = leader;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
|
|
||||||
ShardInfo shardInfo = (ShardInfo) o;
|
|
||||||
|
|
||||||
if (!collection.equals(shardInfo.collection)) return false;
|
|
||||||
if (!shard.equals(shardInfo.shard)) return false;
|
|
||||||
return leader.equals(shardInfo.leader);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int result = collection.hashCode();
|
|
||||||
result = 31 * result + shard.hashCode();
|
|
||||||
result = 31 * result + leader.hashCode();
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// must be synchronized by bucket
|
// must be synchronized by bucket
|
||||||
private void doLocalAdd(AddUpdateCommand cmd) throws IOException {
|
private void doLocalAdd(AddUpdateCommand cmd) throws IOException {
|
||||||
super.processAdd(cmd);
|
super.processAdd(cmd);
|
||||||
|
isIndexChanged = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// must be synchronized by bucket
|
// must be synchronized by bucket
|
||||||
private void doLocalDelete(DeleteUpdateCommand cmd) throws IOException {
|
private void doLocalDelete(DeleteUpdateCommand cmd) throws IOException {
|
||||||
super.processDelete(cmd);
|
super.processDelete(cmd);
|
||||||
|
isIndexChanged = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -0,0 +1,276 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.cloud;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.solr.JSONTestUtil;
|
||||||
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||||
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.cloud.Replica;
|
||||||
|
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||||
|
import org.apache.solr.common.cloud.ZkStateReader;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.common.util.TimeSource;
|
||||||
|
import org.apache.solr.util.TimeOut;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class TestCloudConsistency extends SolrCloudTestCase {
|
||||||
|
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
|
|
||||||
|
private static Map<JettySolrRunner, SocketProxy> proxies;
|
||||||
|
private static Map<URI, JettySolrRunner> jettys;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setupCluster() throws Exception {
|
||||||
|
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
|
||||||
|
System.setProperty("solr.ulog.numRecordsToKeep", "1000");
|
||||||
|
|
||||||
|
configureCluster(4)
|
||||||
|
.addConfig("conf", configset("cloud-minimal"))
|
||||||
|
.configure();
|
||||||
|
// Add proxies
|
||||||
|
proxies = new HashMap<>(cluster.getJettySolrRunners().size());
|
||||||
|
jettys = new HashMap<>();
|
||||||
|
for (JettySolrRunner jetty:cluster.getJettySolrRunners()) {
|
||||||
|
SocketProxy proxy = new SocketProxy();
|
||||||
|
jetty.setProxyPort(proxy.getListenPort());
|
||||||
|
cluster.stopJettySolrRunner(jetty);//TODO: Can we avoid this restart
|
||||||
|
cluster.startJettySolrRunner(jetty);
|
||||||
|
proxy.open(jetty.getBaseUrl().toURI());
|
||||||
|
LOG.info("Adding proxy for URL: " + jetty.getBaseUrl() + ". Proxy: " + proxy.getUrl());
|
||||||
|
proxies.put(jetty, proxy);
|
||||||
|
jettys.put(proxy.getUrl(), jetty);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownCluster() throws Exception {
|
||||||
|
for (SocketProxy proxy:proxies.values()) {
|
||||||
|
proxy.close();
|
||||||
|
}
|
||||||
|
proxies = null;
|
||||||
|
jettys = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOutOfSyncReplicasCannotBecomeLeader() throws Exception {
|
||||||
|
testOutOfSyncReplicasCannotBecomeLeader(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOutOfSyncReplicasCannotBecomeLeaderAfterRestart() throws Exception {
|
||||||
|
testOutOfSyncReplicasCannotBecomeLeader(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOutOfSyncReplicasCannotBecomeLeader(boolean onRestart) throws Exception {
|
||||||
|
final String collectionName = "outOfSyncReplicasCannotBecomeLeader-"+onRestart;
|
||||||
|
CollectionAdminRequest.createCollection(collectionName, 1, 3)
|
||||||
|
.setCreateNodeSet("")
|
||||||
|
.process(cluster.getSolrClient());
|
||||||
|
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
|
||||||
|
.setNode(cluster.getJettySolrRunner(0).getNodeName())
|
||||||
|
.process(cluster.getSolrClient());
|
||||||
|
waitForState("Timeout waiting for shard leader", collectionName, clusterShape(1, 1));
|
||||||
|
|
||||||
|
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
|
||||||
|
.setNode(cluster.getJettySolrRunner(1).getNodeName())
|
||||||
|
.process(cluster.getSolrClient());
|
||||||
|
CollectionAdminRequest.addReplicaToShard(collectionName, "shard1")
|
||||||
|
.setNode(cluster.getJettySolrRunner(2).getNodeName())
|
||||||
|
.process(cluster.getSolrClient());
|
||||||
|
waitForState("Timeout waiting for 1x3 collection", collectionName, clusterShape(1, 3));
|
||||||
|
|
||||||
|
addDocs(collectionName, 3, 1);
|
||||||
|
|
||||||
|
final Replica oldLeader = getCollectionState(collectionName).getSlice("shard1").getLeader();
|
||||||
|
assertEquals(cluster.getJettySolrRunner(0).getNodeName(), oldLeader.getNodeName());
|
||||||
|
|
||||||
|
if (onRestart) {
|
||||||
|
addDocToWhenOtherReplicasAreDown(collectionName, oldLeader, 4);
|
||||||
|
} else {
|
||||||
|
addDocWhenOtherReplicasAreNetworkPartitioned(collectionName, oldLeader, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertDocsExistInAllReplicas(getCollectionState(collectionName).getReplicas(), collectionName, 1, 4);
|
||||||
|
|
||||||
|
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adding doc when replicas (not leader) are down,
|
||||||
|
* These replicas are out-of-sync hence they should not become leader even when current leader is DOWN.
|
||||||
|
* Leader should be on node - 0
|
||||||
|
*/
|
||||||
|
private void addDocToWhenOtherReplicasAreDown(String collection, Replica leader, int docId) throws Exception {
|
||||||
|
ChaosMonkey.stop(cluster.getJettySolrRunner(1));
|
||||||
|
ChaosMonkey.stop(cluster.getJettySolrRunner(2));
|
||||||
|
waitForState("", collection, (liveNodes, collectionState) ->
|
||||||
|
collectionState.getSlice("shard1").getReplicas().stream()
|
||||||
|
.filter(replica -> replica.getState() == Replica.State.DOWN).count() == 2);
|
||||||
|
|
||||||
|
addDocs(collection, 1, docId);
|
||||||
|
ChaosMonkey.stop(cluster.getJettySolrRunner(0));
|
||||||
|
waitForState("", collection, (liveNodes, collectionState) -> collectionState.getReplica(leader.getName()).getState() == Replica.State.DOWN);
|
||||||
|
|
||||||
|
ChaosMonkey.start(cluster.getJettySolrRunner(1));
|
||||||
|
ChaosMonkey.start(cluster.getJettySolrRunner(2));
|
||||||
|
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.CURRENT_TIME);
|
||||||
|
while (!timeOut.hasTimedOut()) {
|
||||||
|
Replica newLeader = getCollectionState(collection).getSlice("shard1").getLeader();
|
||||||
|
if (newLeader != null && !newLeader.getName().equals(leader.getName()) && newLeader.getState() == Replica.State.ACTIVE) {
|
||||||
|
fail("Out of sync replica became leader " + newLeader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ChaosMonkey.start(cluster.getJettySolrRunner(0));
|
||||||
|
waitForState("Timeout waiting for leader", collection, (liveNodes, collectionState) -> {
|
||||||
|
Replica newLeader = collectionState.getLeader("shard1");
|
||||||
|
return newLeader != null && newLeader.getName().equals(leader.getName());
|
||||||
|
});
|
||||||
|
waitForState("Timeout waiting for active collection", collection, clusterShape(1, 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adding doc when replicas (not leader) are network partitioned with leader,
|
||||||
|
* These replicas are out-of-sync hence they should not become leader even when current leader is DOWN.
|
||||||
|
* Leader should be on node - 0
|
||||||
|
*/
|
||||||
|
private void addDocWhenOtherReplicasAreNetworkPartitioned(String collection, Replica leader, int docId) throws Exception {
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
proxies.get(cluster.getJettySolrRunner(i)).close();
|
||||||
|
}
|
||||||
|
addDoc(collection, docId, cluster.getJettySolrRunner(0));
|
||||||
|
ChaosMonkey.stop(cluster.getJettySolrRunner(0));
|
||||||
|
for (int i = 1; i < 3; i++) {
|
||||||
|
proxies.get(cluster.getJettySolrRunner(i)).reopen();
|
||||||
|
}
|
||||||
|
waitForState("Timeout waiting for leader goes DOWN", collection, (liveNodes, collectionState)
|
||||||
|
-> collectionState.getReplica(leader.getName()).getState() == Replica.State.DOWN);
|
||||||
|
|
||||||
|
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.CURRENT_TIME);
|
||||||
|
while (!timeOut.hasTimedOut()) {
|
||||||
|
Replica newLeader = getCollectionState(collection).getLeader("shard1");
|
||||||
|
if (newLeader != null && !newLeader.getName().equals(leader.getName()) && newLeader.getState() == Replica.State.ACTIVE) {
|
||||||
|
fail("Out of sync replica became leader " + newLeader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
proxies.get(cluster.getJettySolrRunner(0)).reopen();
|
||||||
|
ChaosMonkey.start(cluster.getJettySolrRunner(0));
|
||||||
|
waitForState("Timeout waiting for leader", collection, (liveNodes, collectionState) -> {
|
||||||
|
Replica newLeader = collectionState.getLeader("shard1");
|
||||||
|
return newLeader != null && newLeader.getName().equals(leader.getName());
|
||||||
|
});
|
||||||
|
waitForState("Timeout waiting for active collection", collection, clusterShape(1, 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addDocs(String collection, int numDocs, int startId) throws SolrServerException, IOException {
|
||||||
|
List<SolrInputDocument> docs = new ArrayList<>(numDocs);
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
int id = startId + i;
|
||||||
|
docs.add(new SolrInputDocument("id", String.valueOf(id), "fieldName_s", String.valueOf(id)));
|
||||||
|
}
|
||||||
|
cluster.getSolrClient().add(collection, docs);
|
||||||
|
cluster.getSolrClient().commit(collection);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addDoc(String collection, int docId, JettySolrRunner solrRunner) throws IOException, SolrServerException {
|
||||||
|
try (HttpSolrClient solrClient = new HttpSolrClient.Builder(solrRunner.getBaseUrl().toString()).build()) {
|
||||||
|
solrClient.add(collection, new SolrInputDocument("id", String.valueOf(docId), "fieldName_s", String.valueOf(docId)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertDocsExistInAllReplicas(List<Replica> notLeaders,
|
||||||
|
String testCollectionName, int firstDocId, int lastDocId) throws Exception {
|
||||||
|
Replica leader =
|
||||||
|
cluster.getSolrClient().getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
|
||||||
|
HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName);
|
||||||
|
List<HttpSolrClient> replicas =
|
||||||
|
new ArrayList<HttpSolrClient>(notLeaders.size());
|
||||||
|
|
||||||
|
for (Replica r : notLeaders) {
|
||||||
|
replicas.add(getHttpSolrClient(r, testCollectionName));
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
for (int d = firstDocId; d <= lastDocId; d++) {
|
||||||
|
String docId = String.valueOf(d);
|
||||||
|
assertDocExists(leaderSolr, testCollectionName, docId);
|
||||||
|
for (HttpSolrClient replicaSolr : replicas) {
|
||||||
|
assertDocExists(replicaSolr, testCollectionName, docId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (leaderSolr != null) {
|
||||||
|
leaderSolr.close();
|
||||||
|
}
|
||||||
|
for (HttpSolrClient replicaSolr : replicas) {
|
||||||
|
replicaSolr.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertDocExists(HttpSolrClient solr, String coll, String docId) throws Exception {
|
||||||
|
NamedList rsp = realTimeGetDocId(solr, docId);
|
||||||
|
String match = JSONTestUtil.matchObj("/id", rsp.get("doc"), docId);
|
||||||
|
assertTrue("Doc with id=" + docId + " not found in " + solr.getBaseURL()
|
||||||
|
+ " due to: " + match + "; rsp="+rsp, match == null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private NamedList realTimeGetDocId(HttpSolrClient solr, String docId) throws SolrServerException, IOException {
|
||||||
|
QueryRequest qr = new QueryRequest(params("qt", "/get", "id", docId, "distrib", "false"));
|
||||||
|
return solr.request(qr);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected HttpSolrClient getHttpSolrClient(Replica replica, String coll) throws Exception {
|
||||||
|
ZkCoreNodeProps zkProps = new ZkCoreNodeProps(replica);
|
||||||
|
String url = zkProps.getBaseUrl() + "/" + coll;
|
||||||
|
return getHttpSolrClient(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected JettySolrRunner getJettyForReplica(Replica replica) throws Exception {
|
||||||
|
String replicaBaseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
|
||||||
|
assertNotNull(replicaBaseUrl);
|
||||||
|
URL baseUrl = new URL(replicaBaseUrl);
|
||||||
|
|
||||||
|
JettySolrRunner proxy = jettys.get(baseUrl.toURI());
|
||||||
|
assertNotNull("No proxy found for " + baseUrl + "!", proxy);
|
||||||
|
return proxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -425,10 +425,19 @@ public class TestPullReplica extends SolrCloudTestCase {
|
||||||
Replica pullReplica = docCollection.getSlice("shard1").getReplicas(EnumSet.of(Replica.Type.PULL)).get(0);
|
Replica pullReplica = docCollection.getSlice("shard1").getReplicas(EnumSet.of(Replica.Type.PULL)).get(0);
|
||||||
assertTrue(pullReplica.isActive(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes()));
|
assertTrue(pullReplica.isActive(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes()));
|
||||||
|
|
||||||
|
long highestTerm = 0L;
|
||||||
|
try (ZkShardTerms zkShardTerms = new ZkShardTerms(collectionName, "shard1", zkClient())) {
|
||||||
|
highestTerm = zkShardTerms.getHighestTerm();
|
||||||
|
}
|
||||||
// add document, this should fail since there is no leader. Pull replica should not accept the update
|
// add document, this should fail since there is no leader. Pull replica should not accept the update
|
||||||
expectThrows(SolrException.class, () ->
|
expectThrows(SolrException.class, () ->
|
||||||
cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2", "foo", "zoo"))
|
cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2", "foo", "zoo"))
|
||||||
);
|
);
|
||||||
|
if (removeReplica) {
|
||||||
|
try(ZkShardTerms zkShardTerms = new ZkShardTerms(collectionName, "shard1", zkClient())) {
|
||||||
|
assertEquals(highestTerm, zkShardTerms.getHighestTerm());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Also fails if I send the update to the pull replica explicitly
|
// Also fails if I send the update to the pull replica explicitly
|
||||||
try (HttpSolrClient pullReplicaClient = getHttpSolrClient(docCollection.getReplicas(EnumSet.of(Replica.Type.PULL)).get(0).getCoreUrl())) {
|
try (HttpSolrClient pullReplicaClient = getHttpSolrClient(docCollection.getReplicas(EnumSet.of(Replica.Type.PULL)).get(0).getCoreUrl())) {
|
||||||
|
@ -436,6 +445,11 @@ public class TestPullReplica extends SolrCloudTestCase {
|
||||||
cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2", "foo", "zoo"))
|
cluster.getSolrClient().add(collectionName, new SolrInputDocument("id", "2", "foo", "zoo"))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if (removeReplica) {
|
||||||
|
try(ZkShardTerms zkShardTerms = new ZkShardTerms(collectionName, "shard1", zkClient())) {
|
||||||
|
assertEquals(highestTerm, zkShardTerms.getHighestTerm());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Queries should still work
|
// Queries should still work
|
||||||
waitForNumDocsInAllReplicas(1, docCollection.getReplicas(EnumSet.of(Replica.Type.PULL)));
|
waitForNumDocsInAllReplicas(1, docCollection.getReplicas(EnumSet.of(Replica.Type.PULL)));
|
||||||
|
|
|
@ -94,7 +94,7 @@ public class ZkShardTermsTest extends SolrCloudTestCase {
|
||||||
assertEquals(1L, rep1Terms.getTerm("rep1"));
|
assertEquals(1L, rep1Terms.getTerm("rep1"));
|
||||||
|
|
||||||
waitFor(1L, () -> rep2Terms.getTerm("rep1"));
|
waitFor(1L, () -> rep2Terms.getTerm("rep1"));
|
||||||
rep2Terms.setEqualsToMax("rep2");
|
rep2Terms.setTermEqualsToLeader("rep2");
|
||||||
assertEquals(1L, rep2Terms.getTerm("rep2"));
|
assertEquals(1L, rep2Terms.getTerm("rep2"));
|
||||||
rep2Terms.registerTerm("rep2");
|
rep2Terms.registerTerm("rep2");
|
||||||
assertEquals(1L, rep2Terms.getTerm("rep2"));
|
assertEquals(1L, rep2Terms.getTerm("rep2"));
|
||||||
|
@ -138,7 +138,7 @@ public class ZkShardTermsTest extends SolrCloudTestCase {
|
||||||
while (!stop.get()) {
|
while (!stop.get()) {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(random().nextInt(200));
|
Thread.sleep(random().nextInt(200));
|
||||||
zkShardTerms.setEqualsToMax(replica);
|
zkShardTerms.setTermEqualsToLeader(replica);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
|
@ -178,7 +178,7 @@ public class ZkShardTermsTest extends SolrCloudTestCase {
|
||||||
waitFor(1, count::get);
|
waitFor(1, count::get);
|
||||||
leaderTerms.ensureTermsIsHigher("leader", Collections.singleton("replica"));
|
leaderTerms.ensureTermsIsHigher("leader", Collections.singleton("replica"));
|
||||||
waitFor(2, count::get);
|
waitFor(2, count::get);
|
||||||
replicaTerms.setEqualsToMax("replica");
|
replicaTerms.setTermEqualsToLeader("replica");
|
||||||
waitFor(3, count::get);
|
waitFor(3, count::get);
|
||||||
assertEquals(0, replicaTerms.getNumListeners());
|
assertEquals(0, replicaTerms.getNumListeners());
|
||||||
|
|
||||||
|
@ -194,6 +194,41 @@ public class ZkShardTermsTest extends SolrCloudTestCase {
|
||||||
assertEquals(1L, terms.getTerm("leader").longValue());
|
assertEquals(1L, terms.getTerm("leader").longValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSetTermToZero() {
|
||||||
|
String collection = "setTermToZero";
|
||||||
|
ZkShardTerms terms = new ZkShardTerms(collection, "shard1", cluster.getZkClient());
|
||||||
|
terms.registerTerm("leader");
|
||||||
|
terms.registerTerm("replica");
|
||||||
|
terms.ensureTermsIsHigher("leader", Collections.singleton("replica"));
|
||||||
|
assertEquals(1L, terms.getTerm("leader"));
|
||||||
|
terms.setTermToZero("leader");
|
||||||
|
assertEquals(0L, terms.getTerm("leader"));
|
||||||
|
terms.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testReplicaCanBecomeLeader() throws InterruptedException {
|
||||||
|
String collection = "replicaCanBecomeLeader";
|
||||||
|
ZkShardTerms leaderTerms = new ZkShardTerms(collection, "shard1", cluster.getZkClient());
|
||||||
|
ZkShardTerms replicaTerms = new ZkShardTerms(collection, "shard1", cluster.getZkClient());
|
||||||
|
leaderTerms.registerTerm("leader");
|
||||||
|
replicaTerms.registerTerm("replica");
|
||||||
|
|
||||||
|
leaderTerms.ensureTermsIsHigher("leader", Collections.singleton("replica"));
|
||||||
|
waitFor(false, () -> replicaTerms.canBecomeLeader("replica"));
|
||||||
|
waitFor(true, () -> leaderTerms.skipSendingUpdatesTo("replica"));
|
||||||
|
|
||||||
|
replicaTerms.startRecovering("replica");
|
||||||
|
waitFor(false, () -> replicaTerms.canBecomeLeader("replica"));
|
||||||
|
waitFor(false, () -> leaderTerms.skipSendingUpdatesTo("replica"));
|
||||||
|
|
||||||
|
replicaTerms.doneRecovering("replica");
|
||||||
|
waitFor(true, () -> replicaTerms.canBecomeLeader("replica"));
|
||||||
|
waitFor(false, () -> leaderTerms.skipSendingUpdatesTo("replica"));
|
||||||
|
|
||||||
|
leaderTerms.close();
|
||||||
|
replicaTerms.close();
|
||||||
|
}
|
||||||
|
|
||||||
private <T> void waitFor(T expected, Supplier<T> supplier) throws InterruptedException {
|
private <T> void waitFor(T expected, Supplier<T> supplier) throws InterruptedException {
|
||||||
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, new TimeSource.CurrentTimeSource());
|
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, new TimeSource.CurrentTimeSource());
|
||||||
while (!timeOut.hasTimedOut()) {
|
while (!timeOut.hasTimedOut()) {
|
||||||
|
|
Loading…
Reference in New Issue