mirror of https://github.com/apache/lucene.git
SOLR-11484: CloudSolrClient does not invalidate cache or retry for RouteException
This commit is contained in:
parent
161be0a4ae
commit
0d29f7a1a2
|
@ -75,6 +75,9 @@ Bug Fixes
|
|||
more than the given term's frequency in overridden FloatDocValues.floatVal().
|
||||
(Michael Kosten, Erik Hatcher, Steve Rowe)
|
||||
|
||||
* SOLR-11484: CloudSolrClient does not invalidate cache or retry for RouteException (noble, hossman)
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr
|
||||
|
|
|
@ -607,6 +607,14 @@ public class CloudSolrClient extends SolrClient {
|
|||
String name = slice.getName();
|
||||
List<String> urls = new ArrayList<>();
|
||||
Replica leader = slice.getLeader();
|
||||
if (directUpdatesToLeadersOnly && leader == null) {
|
||||
for (Replica replica : slice.getReplicas(
|
||||
replica -> replica.isActive(getClusterStateProvider().getLiveNodes())
|
||||
&& replica.getType() == Replica.Type.NRT)) {
|
||||
leader = replica;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (leader == null) {
|
||||
if (directUpdatesToLeadersOnly) {
|
||||
continue;
|
||||
|
@ -908,7 +916,7 @@ public class CloudSolrClient extends SolrClient {
|
|||
rootCause instanceof NoHttpResponseException ||
|
||||
rootCause instanceof SocketException);
|
||||
|
||||
if (wasCommError) {
|
||||
if (wasCommError || (exc instanceof RouteException)) {
|
||||
// it was a communication error. it is likely that
|
||||
// the node to which the request to be sent is down . So , expire the state
|
||||
// so that the next attempt would fetch the fresh state
|
||||
|
|
|
@ -787,6 +787,65 @@ public class CloudSolrClientTest extends SolrCloudTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testRetryUpdatesWhenClusterStateIsStale() throws Exception {
|
||||
final String COL = "stale_state_test_col";
|
||||
assert cluster.getJettySolrRunners().size() >= 2;
|
||||
|
||||
final JettySolrRunner old_leader_node = cluster.getJettySolrRunners().get(0);
|
||||
final JettySolrRunner new_leader_node = cluster.getJettySolrRunners().get(1);
|
||||
|
||||
// start with exactly 1 shard/replica...
|
||||
assertEquals("Couldn't create collection", 0,
|
||||
CollectionAdminRequest.createCollection(COL, "conf", 1, 1)
|
||||
.setCreateNodeSet(old_leader_node.getNodeName())
|
||||
.process(cluster.getSolrClient()).getStatus());
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish
|
||||
(COL, cluster.getSolrClient().getZkStateReader(), true, true, 330);
|
||||
|
||||
// determine the coreNodeName of only current replica
|
||||
Collection<Slice> slices = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COL).getSlices();
|
||||
assertEquals(1, slices.size()); // sanity check
|
||||
Slice slice = slices.iterator().next();
|
||||
assertEquals(1, slice.getReplicas().size()); // sanity check
|
||||
final String old_leader_core_node_name = slice.getLeader().getName();
|
||||
|
||||
// NOTE: creating our own CloudSolrClient whose settings we can muck with...
|
||||
try (CloudSolrClient stale_client = getCloudSolrClient(cluster.getZkServer().getZkAddress())) {
|
||||
// don't let collection cache entries get expired, even on a slow machine...
|
||||
stale_client.setCollectionCacheTTl(Integer.MAX_VALUE);
|
||||
stale_client.setDefaultCollection(COL);
|
||||
|
||||
// do a query to populate stale_client's cache...
|
||||
assertEquals(0, stale_client.query(new SolrQuery("*:*")).getResults().getNumFound());
|
||||
|
||||
// add 1 replica on a diff node...
|
||||
assertEquals("Couldn't create collection", 0,
|
||||
CollectionAdminRequest.addReplicaToShard(COL, "shard1")
|
||||
.setNode(new_leader_node.getNodeName())
|
||||
// NOTE: don't use our stale_client for this -- don't tip it off of a collection change
|
||||
.process(cluster.getSolrClient()).getStatus());
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish
|
||||
(COL, cluster.getSolrClient().getZkStateReader(), true, true, 330);
|
||||
|
||||
// ...and delete our original leader.
|
||||
assertEquals("Couldn't create collection", 0,
|
||||
CollectionAdminRequest.deleteReplica(COL, "shard1", old_leader_core_node_name)
|
||||
// NOTE: don't use our stale_client for this -- don't tip it off of a collection change
|
||||
.process(cluster.getSolrClient()).getStatus());
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish
|
||||
(COL, cluster.getSolrClient().getZkStateReader(), true, true, 330);
|
||||
|
||||
// stale_client's collection state cache should now only point at a leader that no longer exists.
|
||||
|
||||
// attempt a (direct) update that should succeed in spite of cached cluster state
|
||||
// pointing solely to a node that's no longer part of our collection...
|
||||
assertEquals(0, (new UpdateRequest().add("id", "1").commit(stale_client, COL)).getStatus());
|
||||
assertEquals(1, stale_client.query(new SolrQuery("*:*")).getResults().getNumFound());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void checkSingleServer(NamedList<Object> response) {
|
||||
final CloudSolrClient.RouteResponse rr = (CloudSolrClient.RouteResponse) response;
|
||||
final Map<String,LBHttpSolrClient.Req> routes = rr.getRoutes();
|
||||
|
|
Loading…
Reference in New Issue