From 6c3e7d36c07a5418cfb6d96f60fb0b7c292789a2 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 16 Sep 2013 12:23:16 +0200 Subject: [PATCH] If cluster health times out don't index lots of documents The SearchWithRandomExceptionTests aim to catch problems when resources are not closed due to exceptions etc. Yet in some cases the random seeds cause the index to never be fully allocated so we basically go into a ping-pong state where we try to allocate shards back and forth on nodes. This causes all docs to time out which in-turn causes the tests to run for a very long time (hours or days). If we can not allocate the index and get to a yellow state we simply index only one doc and expected all searches to fail. This commit also beefs up the assertions in this test to check if documents are actually present if they are indexed and refresh was successful. Closes #3694 --- .../SearchWithRandomExceptionsTests.java | 108 +++++++++++++++--- 1 file changed, 92 insertions(+), 16 deletions(-) diff --git a/src/test/java/org/elasticsearch/search/basic/SearchWithRandomExceptionsTests.java b/src/test/java/org/elasticsearch/search/basic/SearchWithRandomExceptionsTests.java index a79e759f044..a87e79054db 100644 --- a/src/test/java/org/elasticsearch/search/basic/SearchWithRandomExceptionsTests.java +++ b/src/test/java/org/elasticsearch/search/basic/SearchWithRandomExceptionsTests.java @@ -20,14 +20,20 @@ package org.elasticsearch.search.basic; import org.apache.lucene.util.English; -import org.apache.lucene.util.LuceneTestCase.AwaitsFix; import org.elasticsearch.AbstractSharedClusterTest; import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; +import org.elasticsearch.action.admin.indices.refresh.RefreshResponse; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.action.search.SearchPhaseExecutionException; +import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Requests; +import org.elasticsearch.common.settings.ImmutableSettings.Builder; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.store.mock.MockDirectoryHelper; +import org.hamcrest.Matchers; import org.junit.Test; import java.io.IOException; @@ -35,42 +41,112 @@ import java.util.concurrent.ExecutionException; import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; + public class SearchWithRandomExceptionsTests extends AbstractSharedClusterTest { @Test - @AwaitsFix(bugUrl = "https://github.com/elasticsearch/elasticsearch/issues/3694") public void testRandomExceptions() throws IOException, InterruptedException, ExecutionException { final int numShards = between(1, 5); String mapping = XContentFactory.jsonBuilder(). startObject(). startObject("type"). startObject("properties"). - startObject("test").field("type", "string").endObject(). + startObject("test") + .field("type", "string") + .field("index", "not_analyzed") + .endObject(). endObject(). endObject() .endObject().string(); + final double exceptionRate; + final double exceptionOnOpenRate; + if (frequently()) { + if (randomBoolean()) { + if (randomBoolean()) { + exceptionOnOpenRate = 1.0/between(5, 100); + exceptionRate = 0.0d; + } else { + exceptionRate = 1.0/between(5, 100); + exceptionOnOpenRate = 0.0d; + } + } else { + exceptionOnOpenRate = 1.0/between(5, 100); + exceptionRate = 1.0/between(5, 100); + } + } else { + // rarely no exception + exceptionRate = 0d; + exceptionOnOpenRate = 0d; + } + + Builder settings = settingsBuilder() + .put("index.number_of_shards", numShards) + .put("index.number_of_replicas", randomIntBetween(0, 1)) + .put(MockDirectoryHelper.RANDOM_IO_EXCEPTION_RATE, exceptionRate) + .put(MockDirectoryHelper.RANDOM_IO_EXCEPTION_RATE_ON_OPEN, exceptionOnOpenRate) + .put(MockDirectoryHelper.CHECK_INDEX_ON_CLOSE, true); + logger.info("creating index: [test] using settings: [{}]", settings.build().getAsMap()); client().admin().indices().prepareCreate("test") - .setSettings(settingsBuilder() - .put("index.number_of_shards", numShards) - .put("index.number_of_replicas", randomIntBetween(0, 1)) - .put(MockDirectoryHelper.RANDOM_IO_EXCEPTION_RATE, randomBoolean() ? 1.0/between(10, 100) : 0.0) - .put(MockDirectoryHelper.RANDOM_IO_EXCEPTION_RATE_ON_OPEN, randomBoolean() ? 1.0/between(10, 100) : 0.0) - .put(MockDirectoryHelper.CHECK_INDEX_ON_CLOSE, true)) + .setSettings(settings) .addMapping("type", mapping).execute().actionGet(); - client().admin().cluster() - .health(Requests.clusterHealthRequest().waitForGreenStatus().timeout(TimeValue.timeValueMillis(100))).get(); // it's ok to timeout here - int numDocs = between(10, 100); + ClusterHealthResponse clusterHealthResponse = client().admin().cluster() + .health(Requests.clusterHealthRequest().waitForYellowStatus().timeout(TimeValue.timeValueSeconds(5))).get(); // it's OK to timeout here + final int numDocs; + final boolean expectAllShardsFailed; + if (clusterHealthResponse.isTimedOut()) { + /* some seeds just won't let you create the index at all and we enter a ping-pong mode + * trying one node after another etc. that is ok but we need to make sure we don't wait + * forever when indexing documents so we set numDocs = 1 and expecte all shards to fail + * when we search below.*/ + logger.info("ClusterHealth timed out - only index one doc and expect searches to fail"); + numDocs = 1; + expectAllShardsFailed = true; + } else { + numDocs = between(10, 100); + expectAllShardsFailed = false; + } + long numCreated = 0; + boolean[] added = new boolean[numDocs]; for (int i = 0; i < numDocs ; i++) { try { - client().prepareIndex("test", "type", "" + i).setSource("test", English.intToEnglish(i)).get(); + IndexResponse indexResponse = client().prepareIndex("test", "type", "" + i).setTimeout(TimeValue.timeValueSeconds(1)).setSource("test", English.intToEnglish(i)).get(); + if (indexResponse.isCreated()) { + numCreated++; + added[i] = true; + } } catch (ElasticSearchException ex) { } } - client().admin().indices().prepareRefresh("test").execute().get(); // don't assert on failures here - int numSearches = atLeast(10); + RefreshResponse refreshResponse = client().admin().indices().prepareRefresh("test").execute().get(); // don't assert on failures here + final boolean refreshFailed = refreshResponse.getShardFailures().length != 0 || refreshResponse.getFailedShards() != 0; + logger.info("Refresh failed [{}]", refreshFailed); + + final int numSearches = atLeast(10); // we don't check anything here really just making sure we don't leave any open files or a broken index behind. for (int i = 0; i < numSearches; i++) { - client().prepareSearch().setQuery(QueryBuilders.matchQuery("test", English.intToEnglish(between(0, numDocs)))).get(); + try { + int docToQuery = between(0, numDocs-1); + long expectedResults = added[docToQuery] ? 1 : 0; + logger.info("Searching for [test:{}]", English.intToEnglish(docToQuery)); + SearchResponse searchResponse = client().prepareSearch().setQuery(QueryBuilders.matchQuery("test", English.intToEnglish(docToQuery))).get(); + logger.info("Successful shards: [{}] numShards: [{}]", searchResponse.getSuccessfulShards(), numShards); + if (searchResponse.getSuccessfulShards() == numShards && !refreshFailed) { + assertThat(searchResponse.getHits().getTotalHits(), Matchers.equalTo(expectedResults)); + } + // check match all + searchResponse = client().prepareSearch().setQuery(QueryBuilders.matchAllQuery()).get(); + if (searchResponse.getSuccessfulShards() == numShards && !refreshFailed) { + assertThat(searchResponse.getHits().getTotalHits(), Matchers.equalTo(numCreated)); + } + + } catch (SearchPhaseExecutionException ex) { + if (!expectAllShardsFailed) { + throw ex; + } else { + logger.info("expected SearchPhaseException: [{}]", ex.getMessage()); + } + } + } } }