Failed search on a shard tries a local replica on a network thread

When a search on a shard to a remove node fails, and then replica exists on the local node, then the execution of the search is done on the network thread. This is problematic since we need to execute it on the actual search thread pool, but can also explain #4519, where the get happens on the network thread and it waits to send the get request till the network thread we use is freed (deadlock...)
fixes #4526

note, re-enable the geo shape fetch test, this fix should solve it as well
This commit is contained in:
Shay Banon 2013-12-19 22:19:20 +01:00
parent 0c1c2dc671
commit 5bf4e74647
2 changed files with 11 additions and 5 deletions

View File

@ -278,14 +278,23 @@ public abstract class TransportSearchTypeAction extends TransportAction<SearchRe
} }
} }
} else { } else {
ShardRouting nextShard = shardIt.nextOrNull(); final ShardRouting nextShard = shardIt.nextOrNull();
final boolean lastShard = nextShard == null; final boolean lastShard = nextShard == null;
// trace log this exception // trace log this exception
if (logger.isTraceEnabled() && t != null) { if (logger.isTraceEnabled() && t != null) {
logger.trace(executionFailureMsg(shard, shardIt, request, lastShard), t); logger.trace(executionFailureMsg(shard, shardIt, request, lastShard), t);
} }
if (!lastShard) { if (!lastShard) {
try {
threadPool.executor(ThreadPool.Names.SEARCH).execute(new Runnable() {
@Override
public void run() {
performFirstPhase(shardIndex, shardIt, nextShard); performFirstPhase(shardIndex, shardIt, nextShard);
}
});
} catch (Throwable t1) {
onFirstPhaseResult(shardIndex, shard, shard.currentNodeId(), shardIt, t1);
}
} else { } else {
// no more shards active, add a failure // no more shards active, add a failure
if (logger.isDebugEnabled() && !logger.isTraceEnabled()) { // do not double log this exception if (logger.isDebugEnabled() && !logger.isTraceEnabled()) { // do not double log this exception

View File

@ -19,7 +19,6 @@
package org.elasticsearch.search.geo; package org.elasticsearch.search.geo;
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.geo.builders.ShapeBuilder; import org.elasticsearch.common.geo.builders.ShapeBuilder;
@ -147,9 +146,7 @@ public class GeoShapeIntegrationTests extends ElasticsearchIntegrationTest {
assertThat(searchResponse.getHits().getAt(0).id(), equalTo("blakely")); assertThat(searchResponse.getHits().getAt(0).id(), equalTo("blakely"));
} }
// TODO this test causes hangs, blocking on the action get when fetching the shape for some reason
@Test @Test
@AwaitsFix(bugUrl = "this test causes hangs, blocking on the action get when fetching the shape for some reason")
public void testIndexedShapeReference() throws Exception { public void testIndexedShapeReference() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("location") .startObject("properties").startObject("location")