Prevent NPE if all docs for the field are pruned during merge

During segment merges FieldConsumers for all fields from the source segments are created. Yet, if all documents that have a value in a certain field are deleted / pruned during the merge the FieldConsumer will not be called at all causing the internally used FST Builder to return `null` from it's `build()` method. This means we can't store it and run potentially into a NPE. This commit adds handling for this corner case both during merge and during suggest phase since we also don't have a Lookup instance for this segments field. Closes #3555
2013-08-22 11:54:36 +02:00 · 2013-08-22 11:54:36 +02:00 · fc3133d087
parent 7fda12316a
commit fc3133d087
4 changed files with 92 additions and 17 deletions
--- a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java
+++ b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java
@ -133,6 +133,14 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
                         * buid the FST and write it to disk.
                         */
                        FST<Pair<Long, BytesRef>> build = builder.build();
                        assert build != null || docCount == 0 : "the FST is null but docCount is != 0 actual value: [" + docCount + "]";
                        /*
                         * it's possible that the FST is null if we have 2 segments that get merged
                         * and all docs that have a value in this field are deleted. This will cause
                         * a consumer to be created but it doesn't consume any values causing the FSTBuilder
                         * to return null.
                         */
                        if (build != null) {
                            fieldOffsets.put(field, output.getFilePointer());
                            build.save(output);
                            /* write some more meta-info */
@ -145,6 +153,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
                            options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
                            output.writeVInt(options);
                        }
                    }
                };
            }
        };
@ -251,9 +260,9 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
                for (Map.Entry<String, AnalyzingSuggestHolder> entry: lookupMap.entrySet()) {
                    sizeInBytes += entry.getValue().fst.sizeInBytes();
-
+                    if (fields == null || fields.length == 0) {
-                    if (fields == null || fields.length == 0) continue;
+                        continue;
-
+                    }
                    for (String field : fields) {
                        // support for getting fields by regex as in fielddata
                        if (Regex.simpleMatch(field, entry.getKey())) {
--- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
+++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java
@ -31,7 +31,9 @@ import org.elasticsearch.ElasticSearchException;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.text.StringText;
 import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
-import org.elasticsearch.search.suggest.*;
+import org.elasticsearch.search.suggest.Suggest;
 import org.elasticsearch.search.suggest.SuggestContextParser;
 import org.elasticsearch.search.suggest.Suggester;
 import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option;
 import java.io.IOException;
@ -64,8 +66,13 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
            AtomicReader atomicReader = atomicReaderContext.reader();
            Terms terms = atomicReader.fields().terms(fieldName);
            if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
-                Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
+                final Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
-                Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
+                final Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
                if (lookup == null) {
                    // we don't have a lookup for this segment.. this might be possible if a merge dropped all
                    // docs from the segment that had a value in this segment.
                    continue;
                }
                List<Lookup.LookupResult> lookupResults = lookup.lookup(spare, false, suggestionContext.getSize());
                for (Lookup.LookupResult res : lookupResults) {
--- a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java
+++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionPostingsFormatTest.java
@ -248,5 +248,26 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
        return lookup;
    }
    @Test
    public void testNoDocs() throws IOException {
        AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true);
        RAMDirectory dir = new RAMDirectory();
        IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT);
        FieldsConsumer consumer = provider.consumer(output);
        FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                DocValuesType.SORTED, DocValuesType.BINARY, new HashMap<String, String>());
        TermsConsumer addField = consumer.addField(fieldInfo);
        addField.finish(0, 0, 0);
        consumer.close();
        output.close();
        IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
        LookupFactory load = provider.load(input);
        PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
        NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
        assertNull(load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), new CompletionSuggestionContext(null)));
        dir.close();
    }
    // TODO ADD more unittests
 }
--- a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java
+++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java
@ -22,6 +22,9 @@ import com.carrotsearch.randomizedtesting.generators.RandomStrings;
 import com.google.common.collect.Lists;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
 import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse;
 import org.elasticsearch.action.admin.indices.segments.IndexShardSegments;
 import org.elasticsearch.action.admin.indices.segments.ShardSegments;
 import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
 import org.elasticsearch.action.index.IndexRequestBuilder;
 import org.elasticsearch.action.suggest.SuggestResponse;
@ -679,4 +682,39 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
            client().admin().indices().prepareOptimize(INDEX).execute().actionGet();
        }
    }
    @Test // see #3555
    public void testPrunedSegments() throws IOException {
        createIndexAndMappingAndSettings(settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0), "standard", "standard", false, false, false);
        client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
                .startObject().startObject(FIELD)
                .startArray("input").value("The Beatles").endArray()
                .endObject().endObject()
        ).get();
        client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder()
                .startObject()
                    .field("somefield", "somevalue")
                .endObject()
        ).get(); // we have 2 docs in a segment...
        OptimizeResponse actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet();
        assertNoFailures(actionGet);
        // update the first one and then merge.. the target segment will have no value in FIELD
        client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
                .startObject()
                    .field("somefield", "somevalue")
                .endObject()
        ).get();
        actionGet = client().admin().indices().prepareOptimize().setFlush(true).setMaxNumSegments(1).setRefresh(true).execute().actionGet();
        assertNoFailures(actionGet);
        assertSuggestions("b");
        assertThat(2l, equalTo(client().prepareCount(INDEX).get().getCount()));
        for(IndexShardSegments seg : client().admin().indices().prepareSegments().get().getIndices().get(INDEX)) {
            ShardSegments[] shards = seg.getShards();
            for (ShardSegments shardSegments : shards) {
                assertThat(1, equalTo(shardSegments.getSegments().size()));
            }
        }
    }
 }