diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java index d3e766a46fd..8d966e55c0b 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java @@ -133,17 +133,26 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider * buid the FST and write it to disk. */ FST> build = builder.build(); - fieldOffsets.put(field, output.getFilePointer()); - build.save(output); - /* write some more meta-info */ - output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput()); - output.writeVInt(maxSurfaceFormsPerAnalyzedForm); - output.writeInt(maxGraphExpansions); // can be negative - int options = 0; - options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0; - options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; - options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; - output.writeVInt(options); + assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]"; + /* + * it's possible that the FST is null if we have 2 segments that get merged + * and all docs that have a value in this field are deleted. This will cause + * a consumer to be created but it doesn't consume any values causing the FSTBuilder + * to return null. + */ + if (build != null) { + fieldOffsets.put(field, output.getFilePointer()); + build.save(output); + /* write some more meta-info */ + output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput()); + output.writeVInt(maxSurfaceFormsPerAnalyzedForm); + output.writeInt(maxGraphExpansions); // can be negative + int options = 0; + options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0; + options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0; + options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0; + output.writeVInt(options); + } } }; } @@ -251,9 +260,9 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider for (Map.Entry entry: lookupMap.entrySet()) { sizeInBytes += entry.getValue().fst.sizeInBytes(); - - if (fields == null || fields.length == 0) continue; - + if (fields == null || fields.length == 0) { + continue; + } for (String field : fields) { // support for getting fields by regex as in fielddata if (Regex.simpleMatch(field, entry.getKey())) { diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java index 4502dd26cd8..e9fc40a9ee3 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java @@ -31,7 +31,9 @@ import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.text.StringText; import org.elasticsearch.index.mapper.core.CompletionFieldMapper; -import org.elasticsearch.search.suggest.*; +import org.elasticsearch.search.suggest.Suggest; +import org.elasticsearch.search.suggest.SuggestContextParser; +import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option; import java.io.IOException; @@ -64,8 +66,13 @@ public class CompletionSuggester extends Suggester AtomicReader atomicReader = atomicReaderContext.reader(); Terms terms = atomicReader.fields().terms(fieldName); if (terms instanceof Completion090PostingsFormat.CompletionTerms) { - Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms; - Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext); + final Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms; + final Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext); + if (lookup == null) { + // we don't have a lookup for this segment.. this might be possible if a merge dropped all + // docs from the segment that had a value in this segment. + continue; + } List lookupResults = lookup.lookup(spare, false, suggestionContext.getSize()); for (Lookup.LookupResult res : lookupResults) { diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java index be089468542..d6f4771f2da 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java @@ -247,6 +247,27 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { dir.close(); return lookup; } + + @Test + public void testNoDocs() throws IOException { + AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true); + RAMDirectory dir = new RAMDirectory(); + IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT); + FieldsConsumer consumer = provider.consumer(output); + FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, + DocValuesType.SORTED, DocValuesType.BINARY, new HashMap()); + TermsConsumer addField = consumer.addField(fieldInfo); + addField.finish(0, 0, 0); + consumer.close(); + output.close(); + + IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); + LookupFactory load = provider.load(input); + PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat()); + NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); + assertNull(load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), new CompletionSuggestionContext(null))); + dir.close(); + } // TODO ADD more unittests } diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java index d0281ad7a97..282e0143da9 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java @@ -22,6 +22,9 @@ import com.carrotsearch.randomizedtesting.generators.RandomStrings; import com.google.common.collect.Lists; import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; +import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse; +import org.elasticsearch.action.admin.indices.segments.IndexShardSegments; +import org.elasticsearch.action.admin.indices.segments.ShardSegments; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.suggest.SuggestResponse; @@ -679,4 +682,39 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest { client().admin().indices().prepareOptimize(INDEX).execute().actionGet(); } } + + @Test // see #3555 + public void testPrunedSegments() throws IOException { + createIndexAndMappingAndSettings(settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0), "standard", "standard", false, false, false); + + client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() + .startObject().startObject(FIELD) + .startArray("input").value("The Beatles").endArray() + .endObject().endObject() + ).get(); + client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder() + .startObject() + .field("somefield", "somevalue") + .endObject() + ).get(); // we have 2 docs in a segment... + OptimizeResponse actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet(); + assertNoFailures(actionGet); + // update the first one and then merge.. the target segment will have no value in FIELD + client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() + .startObject() + .field("somefield", "somevalue") + .endObject() + ).get(); + actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet(); + assertNoFailures(actionGet); + + assertSuggestions("b"); + assertThat(2l, equalTo(client().prepareCount(INDEX).get().getCount())); + for(IndexShardSegments seg : client().admin().indices().prepareSegments().get().getIndices().get(INDEX)) { + ShardSegments[] shards = seg.getShards(); + for (ShardSegments shardSegments : shards) { + assertThat(1, equalTo(shardSegments.getSegments().size())); + } + } + } }