mirror of https://github.com/apache/lucene.git
SOLR-6496: LBHttpSolrClient stops retrying after the timeAllowed threshold is met
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1651236 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c36180cdf8
commit
6d7fdf9f3f
|
@ -681,6 +681,9 @@ Other Changes
|
|||
or collection depending on whether Solr is running in standalone or cloud mode
|
||||
(Timothy Potter)
|
||||
|
||||
* SOLR-6496: LBHttpSolrClient stops server retries after the timeAllowed threshold is met.
|
||||
(Steve Davids, Anshum Gupta)
|
||||
|
||||
================== 4.10.3 ==================
|
||||
|
||||
Bug Fixes
|
||||
|
|
|
@ -77,17 +77,18 @@ public class CloudExitableDirectoryReaderTest extends AbstractFullDistribZkTestB
|
|||
time than this. Keeping it at 5 because the delaying search component delays all requests
|
||||
by at 1 second.
|
||||
*/
|
||||
long fiveSeconds = 5000L;
|
||||
int fiveSeconds = 5000;
|
||||
|
||||
Long timeAllowed = TestUtil.nextLong(random(), fiveSeconds, Long.MAX_VALUE);
|
||||
Integer timeAllowed = TestUtil.nextInt(random(), fiveSeconds, Integer.MAX_VALUE);
|
||||
assertSuccess(params("q", "name:a*", "timeAllowed",timeAllowed.toString()));
|
||||
|
||||
assertPartialResults(params("q", "name:a*", "timeAllowed", "1"));
|
||||
|
||||
timeAllowed = TestUtil.nextLong(random(), fiveSeconds, Long.MAX_VALUE);
|
||||
timeAllowed = TestUtil.nextInt(random(), fiveSeconds, Integer.MAX_VALUE);
|
||||
assertSuccess(params("q", "name:b*", "timeAllowed",timeAllowed.toString()));
|
||||
|
||||
timeAllowed = TestUtil.nextLong(random(), Long.MIN_VALUE, -1L); // negative timeAllowed should disable timeouts
|
||||
// negative timeAllowed should disable timeouts
|
||||
timeAllowed = TestUtil.nextInt(random(), Integer.MIN_VALUE, -1);
|
||||
assertSuccess(params("q", "name:b*", "timeAllowed",timeAllowed.toString()));
|
||||
|
||||
assertSuccess(params("q","name:b*")); // no time limitation
|
||||
|
|
|
@ -21,7 +21,9 @@ import org.apache.solr.client.solrj.*;
|
|||
import org.apache.solr.client.solrj.request.IsUpdateRequest;
|
||||
import org.apache.solr.client.solrj.request.RequestWriter;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SolrjNamedThreadFactory;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -288,7 +290,13 @@ public class LBHttpSolrClient extends SolrClient {
|
|||
boolean isUpdate = req.request instanceof IsUpdateRequest;
|
||||
List<ServerWrapper> skipped = null;
|
||||
|
||||
long timeAllowedNano = getTimeAllowedInNanos(req.getRequest());
|
||||
long timeOutTime = System.nanoTime() + timeAllowedNano;
|
||||
for (String serverStr : req.getServers()) {
|
||||
if(isTimeExceeded(timeAllowedNano, timeOutTime)) {
|
||||
break;
|
||||
}
|
||||
|
||||
serverStr = normalize(serverStr);
|
||||
// if the server is currently a zombie, just skip to the next one
|
||||
ServerWrapper wrapper = zombieServers.get(serverStr);
|
||||
|
@ -318,6 +326,10 @@ public class LBHttpSolrClient extends SolrClient {
|
|||
// try the servers we previously skipped
|
||||
if (skipped != null) {
|
||||
for (ServerWrapper wrapper : skipped) {
|
||||
if(isTimeExceeded(timeAllowedNano, timeOutTime)) {
|
||||
break;
|
||||
}
|
||||
|
||||
ex = doRequest(wrapper.client, req, rsp, isUpdate, true, wrapper.getKey());
|
||||
if (ex == null) {
|
||||
return rsp; // SUCCESS
|
||||
|
@ -482,7 +494,13 @@ public class LBHttpSolrClient extends SolrClient {
|
|||
int maxTries = serverList.length;
|
||||
Map<String,ServerWrapper> justFailed = null;
|
||||
|
||||
long timeAllowedNano = getTimeAllowedInNanos(request);
|
||||
long timeOutTime = System.nanoTime() + timeAllowedNano;
|
||||
for (int attempts=0; attempts<maxTries; attempts++) {
|
||||
if(isTimeExceeded(timeAllowedNano, timeOutTime)) {
|
||||
break;
|
||||
}
|
||||
|
||||
int count = counter.incrementAndGet() & Integer.MAX_VALUE;
|
||||
ServerWrapper wrapper = serverList[count % serverList.length];
|
||||
wrapper.lastUsed = System.currentTimeMillis();
|
||||
|
@ -506,9 +524,12 @@ public class LBHttpSolrClient extends SolrClient {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// try other standard servers that we didn't try just now
|
||||
for (ServerWrapper wrapper : zombieServers.values()) {
|
||||
if(isTimeExceeded(timeAllowedNano, timeOutTime)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (wrapper.standard==false || justFailed!=null && justFailed.containsKey(wrapper.getKey())) continue;
|
||||
try {
|
||||
NamedList<Object> rsp = wrapper.client.request(request);
|
||||
|
@ -539,6 +560,19 @@ public class LBHttpSolrClient extends SolrClient {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return time allowed in nanos, returns -1 if no time_allowed is specified.
|
||||
*/
|
||||
private long getTimeAllowedInNanos(final SolrRequest req) {
|
||||
SolrParams reqParams = req.getParams();
|
||||
return reqParams == null ? -1 :
|
||||
TimeUnit.NANOSECONDS.convert(reqParams.getInt(CommonParams.TIME_ALLOWED, -1), TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
private boolean isTimeExceeded(long timeAllowedNano, long timeOutTime) {
|
||||
return timeAllowedNano > 0 && System.nanoTime() > timeOutTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes up one dead server and check for aliveness. The check is done in a roundrobin. Each server is checked for
|
||||
* aliveness once in 'x' millis where x is decided by the setAliveCheckinterval() or it is defaulted to 1 minute
|
||||
|
|
Loading…
Reference in New Issue