mirror of https://github.com/apache/lucene.git
SOLR-11484: CloudSolrClient does not invalidate cache or retry for RouteException
This commit is contained in:
parent
161be0a4ae
commit
0d29f7a1a2
|
@ -75,6 +75,9 @@ Bug Fixes
|
||||||
more than the given term's frequency in overridden FloatDocValues.floatVal().
|
more than the given term's frequency in overridden FloatDocValues.floatVal().
|
||||||
(Michael Kosten, Erik Hatcher, Steve Rowe)
|
(Michael Kosten, Erik Hatcher, Steve Rowe)
|
||||||
|
|
||||||
|
* SOLR-11484: CloudSolrClient does not invalidate cache or retry for RouteException (noble, hossman)
|
||||||
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr
|
* SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr
|
||||||
|
|
|
@ -607,6 +607,14 @@ public class CloudSolrClient extends SolrClient {
|
||||||
String name = slice.getName();
|
String name = slice.getName();
|
||||||
List<String> urls = new ArrayList<>();
|
List<String> urls = new ArrayList<>();
|
||||||
Replica leader = slice.getLeader();
|
Replica leader = slice.getLeader();
|
||||||
|
if (directUpdatesToLeadersOnly && leader == null) {
|
||||||
|
for (Replica replica : slice.getReplicas(
|
||||||
|
replica -> replica.isActive(getClusterStateProvider().getLiveNodes())
|
||||||
|
&& replica.getType() == Replica.Type.NRT)) {
|
||||||
|
leader = replica;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (leader == null) {
|
if (leader == null) {
|
||||||
if (directUpdatesToLeadersOnly) {
|
if (directUpdatesToLeadersOnly) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -908,7 +916,7 @@ public class CloudSolrClient extends SolrClient {
|
||||||
rootCause instanceof NoHttpResponseException ||
|
rootCause instanceof NoHttpResponseException ||
|
||||||
rootCause instanceof SocketException);
|
rootCause instanceof SocketException);
|
||||||
|
|
||||||
if (wasCommError) {
|
if (wasCommError || (exc instanceof RouteException)) {
|
||||||
// it was a communication error. it is likely that
|
// it was a communication error. it is likely that
|
||||||
// the node to which the request to be sent is down . So , expire the state
|
// the node to which the request to be sent is down . So , expire the state
|
||||||
// so that the next attempt would fetch the fresh state
|
// so that the next attempt would fetch the fresh state
|
||||||
|
|
|
@ -787,6 +787,65 @@ public class CloudSolrClientTest extends SolrCloudTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRetryUpdatesWhenClusterStateIsStale() throws Exception {
|
||||||
|
final String COL = "stale_state_test_col";
|
||||||
|
assert cluster.getJettySolrRunners().size() >= 2;
|
||||||
|
|
||||||
|
final JettySolrRunner old_leader_node = cluster.getJettySolrRunners().get(0);
|
||||||
|
final JettySolrRunner new_leader_node = cluster.getJettySolrRunners().get(1);
|
||||||
|
|
||||||
|
// start with exactly 1 shard/replica...
|
||||||
|
assertEquals("Couldn't create collection", 0,
|
||||||
|
CollectionAdminRequest.createCollection(COL, "conf", 1, 1)
|
||||||
|
.setCreateNodeSet(old_leader_node.getNodeName())
|
||||||
|
.process(cluster.getSolrClient()).getStatus());
|
||||||
|
AbstractDistribZkTestBase.waitForRecoveriesToFinish
|
||||||
|
(COL, cluster.getSolrClient().getZkStateReader(), true, true, 330);
|
||||||
|
|
||||||
|
// determine the coreNodeName of only current replica
|
||||||
|
Collection<Slice> slices = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COL).getSlices();
|
||||||
|
assertEquals(1, slices.size()); // sanity check
|
||||||
|
Slice slice = slices.iterator().next();
|
||||||
|
assertEquals(1, slice.getReplicas().size()); // sanity check
|
||||||
|
final String old_leader_core_node_name = slice.getLeader().getName();
|
||||||
|
|
||||||
|
// NOTE: creating our own CloudSolrClient whose settings we can muck with...
|
||||||
|
try (CloudSolrClient stale_client = getCloudSolrClient(cluster.getZkServer().getZkAddress())) {
|
||||||
|
// don't let collection cache entries get expired, even on a slow machine...
|
||||||
|
stale_client.setCollectionCacheTTl(Integer.MAX_VALUE);
|
||||||
|
stale_client.setDefaultCollection(COL);
|
||||||
|
|
||||||
|
// do a query to populate stale_client's cache...
|
||||||
|
assertEquals(0, stale_client.query(new SolrQuery("*:*")).getResults().getNumFound());
|
||||||
|
|
||||||
|
// add 1 replica on a diff node...
|
||||||
|
assertEquals("Couldn't create collection", 0,
|
||||||
|
CollectionAdminRequest.addReplicaToShard(COL, "shard1")
|
||||||
|
.setNode(new_leader_node.getNodeName())
|
||||||
|
// NOTE: don't use our stale_client for this -- don't tip it off of a collection change
|
||||||
|
.process(cluster.getSolrClient()).getStatus());
|
||||||
|
AbstractDistribZkTestBase.waitForRecoveriesToFinish
|
||||||
|
(COL, cluster.getSolrClient().getZkStateReader(), true, true, 330);
|
||||||
|
|
||||||
|
// ...and delete our original leader.
|
||||||
|
assertEquals("Couldn't create collection", 0,
|
||||||
|
CollectionAdminRequest.deleteReplica(COL, "shard1", old_leader_core_node_name)
|
||||||
|
// NOTE: don't use our stale_client for this -- don't tip it off of a collection change
|
||||||
|
.process(cluster.getSolrClient()).getStatus());
|
||||||
|
AbstractDistribZkTestBase.waitForRecoveriesToFinish
|
||||||
|
(COL, cluster.getSolrClient().getZkStateReader(), true, true, 330);
|
||||||
|
|
||||||
|
// stale_client's collection state cache should now only point at a leader that no longer exists.
|
||||||
|
|
||||||
|
// attempt a (direct) update that should succeed in spite of cached cluster state
|
||||||
|
// pointing solely to a node that's no longer part of our collection...
|
||||||
|
assertEquals(0, (new UpdateRequest().add("id", "1").commit(stale_client, COL)).getStatus());
|
||||||
|
assertEquals(1, stale_client.query(new SolrQuery("*:*")).getResults().getNumFound());
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private static void checkSingleServer(NamedList<Object> response) {
|
private static void checkSingleServer(NamedList<Object> response) {
|
||||||
final CloudSolrClient.RouteResponse rr = (CloudSolrClient.RouteResponse) response;
|
final CloudSolrClient.RouteResponse rr = (CloudSolrClient.RouteResponse) response;
|
||||||
final Map<String,LBHttpSolrClient.Req> routes = rr.getRoutes();
|
final Map<String,LBHttpSolrClient.Req> routes = rr.getRoutes();
|
||||||
|
|
Loading…
Reference in New Issue