SOLR-6496: LBHttpSolrClient stops retrying after the timeAllowed threshold is met

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1651236 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Anshum Gupta 2015-01-12 23:52:10 +00:00
parent c36180cdf8
commit 6d7fdf9f3f
3 changed files with 43 additions and 5 deletions

View File

@ -681,6 +681,9 @@ Other Changes
or collection depending on whether Solr is running in standalone or cloud mode
(Timothy Potter)
* SOLR-6496: LBHttpSolrClient stops server retries after the timeAllowed threshold is met.
(Steve Davids, Anshum Gupta)
================== 4.10.3 ==================
Bug Fixes

View File

@ -77,17 +77,18 @@ public class CloudExitableDirectoryReaderTest extends AbstractFullDistribZkTestB
time than this. Keeping it at 5 because the delaying search component delays all requests
by at 1 second.
*/
long fiveSeconds = 5000L;
int fiveSeconds = 5000;
Long timeAllowed = TestUtil.nextLong(random(), fiveSeconds, Long.MAX_VALUE);
Integer timeAllowed = TestUtil.nextInt(random(), fiveSeconds, Integer.MAX_VALUE);
assertSuccess(params("q", "name:a*", "timeAllowed",timeAllowed.toString()));
assertPartialResults(params("q", "name:a*", "timeAllowed", "1"));
timeAllowed = TestUtil.nextLong(random(), fiveSeconds, Long.MAX_VALUE);
timeAllowed = TestUtil.nextInt(random(), fiveSeconds, Integer.MAX_VALUE);
assertSuccess(params("q", "name:b*", "timeAllowed",timeAllowed.toString()));
timeAllowed = TestUtil.nextLong(random(), Long.MIN_VALUE, -1L); // negative timeAllowed should disable timeouts
// negative timeAllowed should disable timeouts
timeAllowed = TestUtil.nextInt(random(), Integer.MIN_VALUE, -1);
assertSuccess(params("q", "name:b*", "timeAllowed",timeAllowed.toString()));
assertSuccess(params("q","name:b*")); // no time limitation

View File

@ -21,7 +21,9 @@ import org.apache.solr.client.solrj.*;
import org.apache.solr.client.solrj.request.IsUpdateRequest;
import org.apache.solr.client.solrj.request.RequestWriter;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SolrjNamedThreadFactory;
import org.apache.solr.common.SolrException;
@ -288,7 +290,13 @@ public class LBHttpSolrClient extends SolrClient {
boolean isUpdate = req.request instanceof IsUpdateRequest;
List<ServerWrapper> skipped = null;
long timeAllowedNano = getTimeAllowedInNanos(req.getRequest());
long timeOutTime = System.nanoTime() + timeAllowedNano;
for (String serverStr : req.getServers()) {
if(isTimeExceeded(timeAllowedNano, timeOutTime)) {
break;
}
serverStr = normalize(serverStr);
// if the server is currently a zombie, just skip to the next one
ServerWrapper wrapper = zombieServers.get(serverStr);
@ -318,6 +326,10 @@ public class LBHttpSolrClient extends SolrClient {
// try the servers we previously skipped
if (skipped != null) {
for (ServerWrapper wrapper : skipped) {
if(isTimeExceeded(timeAllowedNano, timeOutTime)) {
break;
}
ex = doRequest(wrapper.client, req, rsp, isUpdate, true, wrapper.getKey());
if (ex == null) {
return rsp; // SUCCESS
@ -482,7 +494,13 @@ public class LBHttpSolrClient extends SolrClient {
int maxTries = serverList.length;
Map<String,ServerWrapper> justFailed = null;
long timeAllowedNano = getTimeAllowedInNanos(request);
long timeOutTime = System.nanoTime() + timeAllowedNano;
for (int attempts=0; attempts<maxTries; attempts++) {
if(isTimeExceeded(timeAllowedNano, timeOutTime)) {
break;
}
int count = counter.incrementAndGet() & Integer.MAX_VALUE;
ServerWrapper wrapper = serverList[count % serverList.length];
wrapper.lastUsed = System.currentTimeMillis();
@ -506,9 +524,12 @@ public class LBHttpSolrClient extends SolrClient {
}
}
// try other standard servers that we didn't try just now
for (ServerWrapper wrapper : zombieServers.values()) {
if(isTimeExceeded(timeAllowedNano, timeOutTime)) {
break;
}
if (wrapper.standard==false || justFailed!=null && justFailed.containsKey(wrapper.getKey())) continue;
try {
NamedList<Object> rsp = wrapper.client.request(request);
@ -539,6 +560,19 @@ public class LBHttpSolrClient extends SolrClient {
}
}
/**
* @return time allowed in nanos, returns -1 if no time_allowed is specified.
*/
private long getTimeAllowedInNanos(final SolrRequest req) {
SolrParams reqParams = req.getParams();
return reqParams == null ? -1 :
TimeUnit.NANOSECONDS.convert(reqParams.getInt(CommonParams.TIME_ALLOWED, -1), TimeUnit.MILLISECONDS);
}
private boolean isTimeExceeded(long timeAllowedNano, long timeOutTime) {
return timeAllowedNano > 0 && System.nanoTime() > timeOutTime;
}
/**
* Takes up one dead server and check for aliveness. The check is done in a roundrobin. Each server is checked for
* aliveness once in 'x' millis where x is decided by the setAliveCheckinterval() or it is defaulted to 1 minute