Recheck cutoffScore during phrase_suggest merge.

The goal is to throw out suggestions that only meet the cutoff in some shards. This will happen if your input phrase is only contained in a few shards. If your shards are unbanced this rechecking can throw out good suggestions. Closes #3547.
2025-03-09 14:34:43 +00:00 · 2013-08-21 11:05:53 -04:00 · 2013-08-21 11:05:53 -04:00 · 10e55bd3ef
commit 10e55bd3ef
parent 76939b82d3
6 changed files with 279 additions and 57 deletions
--- a/src/main/java/org/elasticsearch/search/suggest/Suggest.java
+++ b/src/main/java/org/elasticsearch/search/suggest/Suggest.java
@ -31,6 +31,7 @@ import org.elasticsearch.common.xcontent.XContentBuilderString;
 import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
 import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
 import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
+import org.elasticsearch.search.suggest.phrase.PhraseSuggestion;
 import org.elasticsearch.search.suggest.term.TermSuggestion;

 import java.io.IOException;
@ -119,6 +120,9 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
            case CompletionSuggestion.TYPE:
                suggestion = new CompletionSuggestion();
                break;
+            case PhraseSuggestion.TYPE:
+                suggestion = new PhraseSuggestion();
+                break;
            default:
                suggestion = new Suggestion<Entry<Option>>();
                break;
@ -357,7 +361,7 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
                CollectionUtil.timSort(options, comparator);
            }

-            protected Entry<O> reduce(List<Entry<O>> toReduce) {
+            protected Entry<O> reduce(List<? extends Entry<O>> toReduce) {
                if (toReduce.size() == 1) {
                    return toReduce.get(0);
                }
@ -367,20 +371,29 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
                    assert leader.text.equals(entry.text);
                    assert leader.offset == entry.offset;
                    assert leader.length == entry.length;
+                    leader.merge(entry);
                    for (O option : entry) {
                        O merger = entries.get(option);
                        if (merger == null) {
-                           entries.put(option, option);
+                            entries.put(option, option);
                        } else {
                            merger.mergeInto(option);
-                        }    
+                        }
                    }
                }
                leader.options.clear();
-                leader.options.addAll(entries.keySet());
+                for (O option: entries.keySet()) {
+                    leader.addOption(option);
+                }
                return leader;
            }
-            
+
+            /**
+             * Merge any extra fields for this subtype.
+             */
+            protected void merge(Entry<O> other) {
+            }
+
            /**
             * @return the text (analyzed by suggest analyzer) originating from the suggest text. Usually this is a
             *         single term.
--- a/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java
+++ b/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java
@ -18,10 +18,6 @@
 */
 package org.elasticsearch.search.suggest.phrase;

-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.shingle.ShingleFilter;
@ -36,6 +32,10 @@ import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
 import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;

+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 //TODO public for tests
 public final class NoisyChannelSpellChecker {
    public static final double REAL_WORD_LIKELYHOOD = 0.95d;
@ -59,7 +59,7 @@ public final class NoisyChannelSpellChecker {
                
    }

-    public Correction[] getCorrections(TokenStream stream, final CandidateGenerator generator,
+    public Result getCorrections(TokenStream stream, final CandidateGenerator generator,
            float maxErrors, int numCorrections, IndexReader reader, WordScorer wordScorer, BytesRef separator, float confidence, int gramSize) throws IOException {
        
        final List<CandidateSet> candidateSetsList = new ArrayList<DirectCandidateGenerator.CandidateSet>();
@ -109,7 +109,7 @@ public final class NoisyChannelSpellChecker {
        });
        
        if (candidateSetsList.isEmpty() || candidateSetsList.size() >= tokenLimit) {
-            return Correction.EMPTY;
+            return Result.EMPTY;
        }
        
        for (CandidateSet candidateSet : candidateSetsList) {
@ -123,14 +123,15 @@ public final class NoisyChannelSpellChecker {
            for (int i = 0; i < candidates.length; i++) {
                candidates[i] = candidateSets[i].originalTerm;
            }
-            cutoffScore = scorer.score(candidates, candidateSets);
+            double inputPhraseScore = scorer.score(candidates, candidateSets);
+            cutoffScore = inputPhraseScore * confidence;
        }
-        Correction[] findBestCandiates = scorer.findBestCandiates(candidateSets, maxErrors, cutoffScore * confidence);
+        Correction[] findBestCandiates = scorer.findBestCandiates(candidateSets, maxErrors, cutoffScore);
        
-        return findBestCandiates;
+        return new Result(findBestCandiates, cutoffScore);
    }

-    public Correction[] getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator,
+    public Result getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator,
            float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException {
       
        return getCorrections(tokenStream(analyzer, query, new CharsRef(), analysisField), generator, maxErrors, numCorrections, reader, scorer, new BytesRef(" "), confidence, gramSize);
@ -141,6 +142,15 @@ public final class NoisyChannelSpellChecker {
        UnicodeUtil.UTF8toUTF16(query, spare);
        return analyzer.tokenStream(field, new FastCharArrayReader(spare.chars, spare.offset, spare.length));
    }
-      

+    public static class Result {
+        public static final Result EMPTY = new Result(Correction.EMPTY, Double.MIN_VALUE);
+        public final Correction[] corrections;
+        public final double cutoffScore;
+
+        public Result(Correction[] corrections, double cutoffScore) {
+            this.corrections = corrections;
+            this.cutoffScore = cutoffScore;
+        }
+    }
 }
--- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java
+++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java
@ -33,14 +33,12 @@ import org.elasticsearch.search.suggest.Suggest.Suggestion;
 import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
 import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
 import org.elasticsearch.search.suggest.*;
+import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.Result;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;

-import java.io.IOException;
-import java.util.List;
-
 public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
    private final BytesRef SEPARATOR = new BytesRef(" ");
    
@ -56,11 +54,8 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
    public Suggestion<? extends Entry<? extends Option>> innerExecute(String name, PhraseSuggestionContext suggestion,
            IndexReader indexReader, CharsRef spare) throws IOException {
        double realWordErrorLikelihood = suggestion.realworldErrorLikelyhood();
-        UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
-        Suggestion.Entry<Option> resultEntry = new Suggestion.Entry<Option>(new StringText(spare.toString()), 0, spare.length);
-        final Suggestion<Entry<Option>> response = new Suggestion<Entry<Option>>(name, suggestion.getSize());
-        response.addTerm(resultEntry);
-        
+        final PhraseSuggestion response = new PhraseSuggestion(name, suggestion.getSize());
+
        List<PhraseSuggestionContext.DirectCandidateGenerator>  generators = suggestion.generators();
        final int numGenerators = generators.size();
        final List<CandidateGenerator> gens = new ArrayList<CandidateGenerator>(generators.size());
@ -81,12 +76,15 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
            TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
            
            WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
-            Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
+            Result checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
                    gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
                    suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
-           
+
+            PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
+            response.addTerm(resultEntry);
+
            BytesRef byteSpare = new BytesRef();
-            for (Correction correction : corrections) {
+            for (Correction correction : checkerResult.corrections) {
                UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare);
                Text phrase = new StringText(spare.toString());
                Text highlighted = null;
@ -96,9 +94,16 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
                }
                resultEntry.addOption(new Suggestion.Entry.Option(phrase, highlighted, (float) (correction.score)));
            }
+        } else {
+            response.addTerm(buildResultEntry(suggestion, spare, Double.MIN_VALUE));
        }
        return response;
    }
+
+    private PhraseSuggestion.Entry buildResultEntry(PhraseSuggestionContext suggestion, CharsRef spare, double cutoffScore) {
+        UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
+        return new PhraseSuggestion.Entry(new StringText(spare.toString()), 0, spare.length, cutoffScore);
+    }
    
    @Override
    public String[] names() {
--- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestion.java
+++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestion.java
@ -0,0 +1,120 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest.phrase;
+
+import org.elasticsearch.Version;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.common.xcontent.XContentBuilderString;
+import org.elasticsearch.search.suggest.Suggest;
+import org.elasticsearch.search.suggest.Suggest.Suggestion;
+
+import java.io.IOException;
+
+/**
+ * Suggestion entry returned from the {@link PhraseSuggester}.
+ */
+public class PhraseSuggestion extends Suggest.Suggestion<PhraseSuggestion.Entry> {
+    public static final int TYPE = 3;
+
+    public PhraseSuggestion() {
+    }
+
+    public PhraseSuggestion(String name, int size) {
+        super(name, size);
+    }
+
+    @Override
+    public int getType() {
+        return TYPE;
+    }
+
+    @Override
+    protected Entry newEntry() {
+        return new Entry();
+    }
+
+    public static class Entry extends Suggestion.Entry<Suggestion.Entry.Option> {
+        static class Fields {
+            static final XContentBuilderString CUTOFF_SCORE = new XContentBuilderString("cutoff_score");
+        }
+
+        protected double cutoffScore = Double.MIN_VALUE;
+
+        public Entry(Text text, int offset, int length, double cutoffScore) {
+            super(text, offset, length);
+            this.cutoffScore = cutoffScore;
+        }
+
+        public Entry() {
+        }
+
+        /**
+         * @return cutoff score for suggestions.  input term score * confidence for phrase suggest, 0 otherwise
+         */
+        public double getCutoffScore() {
+            return cutoffScore;
+        }
+
+        @Override
+        protected void merge(Suggestion.Entry<Suggestion.Entry.Option> other) {
+            super.merge(other);
+            // If the cluster contains both pre 0.90.4 and post 0.90.4 nodes then we'll see Suggestion.Entry
+            // objects being merged with PhraseSuggestion.Entry objects.  We merge Suggestion.Entry objects
+            // by assuming they had a low cutoff score rather than a high one as that is the more common scenario
+            // and the simplest one for us to implement.
+            if (!(other instanceof PhraseSuggestion.Entry)) {
+                return;
+            }
+            PhraseSuggestion.Entry otherSuggestionEntry = (PhraseSuggestion.Entry) other;
+            this.cutoffScore = Math.max(this.cutoffScore, otherSuggestionEntry.cutoffScore);
+        }
+
+        @Override
+        public void addOption(Suggestion.Entry.Option option) {
+            if (option.getScore() > this.cutoffScore) {
+                this.options.add(option);
+            }
+        }
+
+        @Override
+        public void readFrom(StreamInput in) throws IOException {
+            super.readFrom(in);
+            // If the other side is older than 0.90.4 then it shouldn't be sending suggestions of this type but just in case
+            // we're going to assume that they are regular suggestions so we won't read anything.
+            if (in.getVersion().before(Version.V_0_90_4)) {
+                return;
+            }
+            cutoffScore = in.readDouble();
+        }
+
+        @Override
+        public void writeTo(StreamOutput out) throws IOException {
+            super.writeTo(out);
+            // If the other side of the message is older than 0.90.4 it'll interpret these suggestions as regular suggestions
+            // so we have to pretend to be one which we can do by just calling the superclass writeTo and doing nothing else
+            if (out.getVersion().before(Version.V_0_90_4)) {
+                return;
+            }
+            out.writeDouble(cutoffScore);
+        }
+    }
+}
--- a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java
+++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java
@ -40,9 +40,7 @@ import org.junit.Test;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.*;
 import java.util.concurrent.ExecutionException;

 import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
@ -165,7 +163,7 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
    public void testUnmappedField() throws IOException, InterruptedException, ExecutionException {
        int numShards = between(1,5);
        Builder builder = ImmutableSettings.builder();
-        builder.put("index.number_of_shards", numShards).put("index.number_of_replicas", between(0, 2));
+        builder.put("index.number_of_shards", numShards).put("index.number_of_replicas", between(0, numberOfNodes() - 1));
        builder.put("index.analysis.analyzer.biword.tokenizer", "standard");
        builder.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase");
        builder.put("index.analysis.filter.shingler.type", "shingle");
@ -1149,7 +1147,7 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
    @Test // see #3469
    public void testShardFailures() throws IOException, InterruptedException {
        Builder builder = ImmutableSettings.builder();
-        builder.put("index.number_of_shards", between(1, 5)).put("index.number_of_replicas", between(0, 2));
+        builder.put("index.number_of_shards", between(1, 5)).put("index.number_of_replicas", between(0, numberOfNodes() - 1));
        builder.put("index.analysis.analyzer.suggest.tokenizer", "standard");
        builder.putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler");
        builder.put("index.analysis.filter.shingler.type", "shingle");
@ -1187,7 +1185,8 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
        client().prepareIndex("test", "type1", "1")
                .setSource(XContentFactory.jsonBuilder().startObject().field("name", "Just testing the suggestions api").endObject()).execute().actionGet();
        client().prepareIndex("test", "type1", "2")
-                .setSource(XContentFactory.jsonBuilder().startObject().field("name", "An other title").endObject()).execute().actionGet();
+            .setSource(XContentFactory.jsonBuilder().startObject().field("name", "An other title about equal length").endObject()).execute().actionGet();
+        // Note that the last document has to have about the same length as the other or cutoff rechecking will remove the useful suggestion.
        client().admin().indices().prepareRefresh().execute().actionGet();

        // When searching on a shard with a non existing mapping, we should fail
@ -1240,7 +1239,8 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
        client().prepareIndex("test", "type1", "1")
                .setSource(XContentFactory.jsonBuilder().startObject().field("name", "Just testing the suggestions api").endObject()).execute().actionGet();
        client().prepareIndex("test", "type1", "2")
-                .setSource(XContentFactory.jsonBuilder().startObject().field("name", "An other title").endObject()).execute().actionGet();
+                .setSource(XContentFactory.jsonBuilder().startObject().field("name", "An other title about equal length").endObject()).execute().actionGet();
+        // Note that the last document has to have about the same length as the other or cutoff rechecking will remove the useful suggestion.
        client().admin().indices().prepareRefresh().execute().actionGet();

        SearchRequestBuilder suggestBuilder = client().prepareSearch().setSearchType(SearchType.COUNT);
@ -1251,4 +1251,72 @@ public class SuggestSearchTests extends AbstractSharedClusterTest {
        ElasticsearchAssertions.assertNoFailures(searchResponse);
        ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
    }
+
+    /**
+     * Searching for a rare phrase shouldn't provide any suggestions if confidence > 1.  This was possible before we rechecked the cutoff
+     * score during the reduce phase.  Failures don't occur every time - maybe two out of five tries but we don't repeat it to save time.
+     */
+    @Test
+    public void testSearchForRarePhrase() throws ElasticSearchException, IOException {
+        // If there isn't enough chaf per shard then shards can become unbalanced, making the cutoff recheck this is testing do more harm then good.
+        int chafPerShard = 100;
+        Builder builder = ImmutableSettings.builder();
+        int numberOfShards = between(2, 5);
+        builder.put("index.number_of_shards", numberOfShards).put("index.number_of_replicas", between(0, numberOfNodes() - 1));
+        builder.put("index.analysis.analyzer.body.tokenizer", "standard");
+        builder.putArray("index.analysis.analyzer.body.filter", "lowercase", "my_shingle");
+        builder.put("index.analysis.filter.my_shingle.type", "shingle");
+        builder.put("index.analysis.filter.my_shingle.output_unigrams", true);
+        builder.put("index.analysis.filter.my_shingle.min_shingle_size", 2);
+        builder.put("index.analysis.filter.my_shingle.max_shingle_size", 2);
+
+        XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
+        .startObject("_all").field("store", "yes").field("termVector", "with_positions_offsets").endObject()
+        .startObject("properties")
+        .startObject("body").field("type", "string").field("analyzer", "body").endObject()
+        .endObject()
+        .endObject().endObject();
+
+        client().admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
+        ensureGreen();
+        List<String> phrases = new ArrayList<String>();
+        Collections.addAll(phrases, "nobel prize", "noble gases", "somethingelse prize", "pride and joy", "notes are fun");
+        for (int i = 0; i < 8; i++) {
+            phrases.add("noble somethingelse" + i);
+        }
+        for (int i = 0; i < numberOfShards * chafPerShard; i++) {
+            phrases.add("chaff" + i);
+        }
+        for (String phrase: phrases) {
+            client().prepareIndex("test", "type1")
+            .setSource(XContentFactory.jsonBuilder()
+                    .startObject()
+                    .field("body", phrase)
+                    .endObject()
+            )
+            .execute().actionGet();
+        }
+        refresh();
+
+        Suggest searchSuggest = searchSuggest(client(), "nobel prize", phraseSuggestion("simple_phrase")
+                .field("body")
+                .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always").maxTermFreq(.99f))
+                .confidence(2f)
+                .maxErrors(5f)
+                .size(1));
+        ElasticsearchAssertions.assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
+
+        searchSuggest = searchSuggest(client(), "noble prize", phraseSuggestion("simple_phrase")
+                .field("body")
+                .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always").maxTermFreq(.99f))
+                .confidence(2f)
+                .maxErrors(5f)
+                .size(1));
+        ElasticsearchAssertions.assertSuggestion(searchSuggest, 0, 0, "simple_phrase", "nobel prize");
+    }
+
+    @Override
+    protected int numberOfNodes() {
+        return 3;
+    }
 }
--- a/src/test/java/org/elasticsearch/test/unit/search/suggest/phrase/NoisyChannelSpellCheckerTests.java
+++ b/src/test/java/org/elasticsearch/test/unit/search/suggest/phrase/NoisyChannelSpellCheckerTests.java
@ -42,6 +42,7 @@ import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;
 import org.elasticsearch.search.suggest.phrase.*;
+import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.Result;
 import org.elasticsearch.test.integration.ElasticsearchTestCase;
 import org.junit.Test;

@ -50,6 +51,7 @@ import java.util.HashMap;
 import java.util.Map;

 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
 public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
    private final BytesRef space = new BytesRef(" ");
    private final BytesRef preTag = new BytesRef("<em>");
@ -100,19 +102,23 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        DirectSpellChecker spellchecker = new DirectSpellChecker();
        spellchecker.setMinQueryLength(1);
        DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
-        Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
+        Result result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
+        Correction[] corrections = result.corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ace"));
        assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american <em>ace</em>"));
+        assertThat(result.cutoffScore, greaterThan(0d));
        
-        corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 0, 1);
+        result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 0, 1);
+        corrections = result.corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ame"));
        assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ame"));
+        assertThat(result.cutoffScore, equalTo(Double.MIN_VALUE));

        suggester = new NoisyChannelSpellChecker(0.85);
        wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
-        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2).corrections;
        assertThat(corrections.length, equalTo(4));
        assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
        assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
@ -123,7 +129,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        assertThat(corrections[2].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorn</em> the <em>god</em> jewel"));
        assertThat(corrections[3].join(space, preTag, postTag).utf8ToString(), equalTo("<em>xorr</em> the got jewel"));
        
-        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections.length, equalTo(4));
        assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
        assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
@ -133,7 +139,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        // Test some of the highlighting corner cases
        suggester = new NoisyChannelSpellChecker(0.85);
        wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
-        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections.length, equalTo(4));
        assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
        assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
@ -168,18 +174,18 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        spellchecker.setMinQueryLength(1);
        suggester = new NoisyChannelSpellChecker(0.85);
        wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
-        corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
+        corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
        assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));
        
        generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
-        corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
+        corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
        assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("<em>captain america</em>"));

        // Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
        generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
-        corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
+        corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
        assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain <em>america</em>"));
    }
@ -241,23 +247,23 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper,  MultiFields.getTerms(ir, "body_reverse"));
        CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
        
-        Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
+        Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
        
        generator = new MultiCandidateGeneratorWrapper(5, forward, reverse);
-        corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
        
-        corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body", wordScorer, 1, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix
        
-        corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body", wordScorer, 1, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body", wordScorer, 1, 2).corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
        
-        corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2).corrections;
        assertThat(corrections.length, equalTo(4));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
        assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
@ -266,11 +272,11 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        
        

-        corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2).corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
        
-        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2).corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));

@ -323,16 +329,16 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        DirectSpellChecker spellchecker = new DirectSpellChecker();
        spellchecker.setMinQueryLength(1);
        DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
-        Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 3);
+        Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 3).corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
        
-        corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 1);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 1).corrections;
        assertThat(corrections.length, equalTo(0));
 //        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
        
        wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
-        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3).corrections;
        assertThat(corrections.length, equalTo(4));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
        assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
@ -342,7 +348,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
      

        
-        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 3);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 3).corrections;
        assertThat(corrections.length, equalTo(4));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
        assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
@ -350,7 +356,7 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
        

-        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 100, 3);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 100, 3).corrections;
        assertThat(corrections.length, equalTo(1));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
        
@ -379,16 +385,16 @@ public class NoisyChannelSpellCheckerTests extends ElasticsearchTestCase{
        spellchecker.setMinQueryLength(1);
        suggester = new NoisyChannelSpellChecker(0.95);
        wordScorer = new LinearInterpoatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "),  0.5, 0.4, 0.1);
-        corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
+        corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3).corrections;
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
        
        generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
-        corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
+        corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3).corrections;
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
        
        
        wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.4);
-        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3);
+        corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3).corrections;
        assertThat(corrections.length, equalTo(2));
        assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
        assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));