mirror of https://github.com/apache/lucene.git
SOLR-5588: PeerSync doesn't count all connect failures as success.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1554129 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f3812e8410
commit
15fbdbd3a6
|
@ -325,6 +325,9 @@ Bug Fixes
|
|||
* SOLR-5503: Retry 'forward to leader' requests less aggressively - rather
|
||||
than on IOException and status 500, ConnectException. (Mark Miller)
|
||||
|
||||
* SOLR-5588: PeerSync doesn't count all connect failures as success.
|
||||
(Mark Miller)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -291,7 +291,8 @@ public class PeerSync {
|
|||
if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrServerException) {
|
||||
Throwable solrException = ((SolrServerException) srsp.getException())
|
||||
.getRootCause();
|
||||
if (solrException instanceof ConnectException || solrException instanceof ConnectTimeoutException
|
||||
boolean connectTimeoutExceptionInChain = connectTimeoutExceptionInChain(srsp.getException());
|
||||
if (connectTimeoutExceptionInChain || solrException instanceof ConnectException || solrException instanceof ConnectTimeoutException
|
||||
|| solrException instanceof NoHttpResponseException || solrException instanceof SocketException) {
|
||||
log.warn(msg() + " couldn't connect to " + srsp.getShardAddress() + ", counting as success");
|
||||
|
||||
|
@ -309,6 +310,10 @@ public class PeerSync {
|
|||
"Perhaps /get is not registered?");
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: we should return the above information so that when we can request a recovery through zookeeper, we do
|
||||
// that for these nodes
|
||||
|
||||
// TODO: at least log???
|
||||
// srsp.getException().printStackTrace(System.out);
|
||||
|
||||
|
@ -324,6 +329,23 @@ public class PeerSync {
|
|||
}
|
||||
}
|
||||
|
||||
// sometimes the root exception is a SocketTimeoutException, but ConnectTimeoutException
|
||||
// is in the chain
|
||||
private boolean connectTimeoutExceptionInChain(Throwable exception) {
|
||||
Throwable t = exception;
|
||||
while (true) {
|
||||
if (t instanceof ConnectTimeoutException) {
|
||||
return true;
|
||||
}
|
||||
Throwable cause = t.getCause();
|
||||
if (cause != null) {
|
||||
t = cause;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean handleVersions(ShardResponse srsp) {
|
||||
// we retrieved the last N updates from the replica
|
||||
List<Long> otherVersions = (List<Long>)srsp.getSolrResponse().getResponse().get("versions");
|
||||
|
|
|
@ -283,14 +283,6 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
|
|||
// kill a shard
|
||||
CloudJettyRunner deadShard = chaosMonkey.stopShard(SHARD1, 0);
|
||||
|
||||
|
||||
// we are careful to make sure the downed node is no longer in the state,
|
||||
// because on some systems (especially freebsd w/ blackhole enabled), trying
|
||||
// to talk to a downed node causes grief
|
||||
Set<CloudJettyRunner> jetties = new HashSet<CloudJettyRunner>();
|
||||
jetties.addAll(shardToJetty.get(SHARD1));
|
||||
jetties.remove(deadShard);
|
||||
|
||||
// ensure shard is dead
|
||||
try {
|
||||
index_specific(deadShard.client.solrClient, id, 999, i1, 107, t1,
|
||||
|
|
Loading…
Reference in New Issue