From dc9a05228714b6a951301f1f185a1970d3320de6 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 8 Mar 2013 19:37:39 +0100 Subject: [PATCH] Respect CandidateGenerator#size if set in the request and reduce the total #of candidates to the shard size. Closes #2752 --- .../search/suggest/SuggestUtils.java | 1 + .../suggest/phrase/CandidateGenerator.java | 6 +- .../phrase/DirectCandidateGenerator.java | 10 ++- .../MultiCandidateGeneratorWrapper.java | 8 +- .../phrase/NoisyChannelSpellChecker.java | 8 +- .../suggest/phrase/PhraseSuggester.java | 4 +- .../search/suggest/SuggestSearchTests.java | 86 ++++++++++++++++++- .../phrase/NoisyChannelSpellCheckerTests.java | 57 ++++++------ 8 files changed, 135 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java b/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java index 32336123df9..a29bb2f8acb 100644 --- a/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java @@ -82,6 +82,7 @@ public final class SuggestUtils { directSpellChecker.setMinPrefix(suggestion.prefixLength()); directSpellChecker.setMinQueryLength(suggestion.minWordLength()); directSpellChecker.setThresholdFrequency(suggestion.minDocFreq()); + directSpellChecker.setLowerCaseTerms(false); return directSpellChecker; } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java b/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java index bd361215145..38b85a464dc 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java @@ -31,9 +31,9 @@ public abstract class CandidateGenerator { public abstract long frequency(BytesRef term) throws IOException; - public CandidateSet drawCandidates(BytesRef term, int numCandidates) throws IOException { + public CandidateSet drawCandidates(BytesRef term) throws IOException { CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term)); - return drawCandidates(set, numCandidates); + return drawCandidates(set); } public Candidate createCandidate(BytesRef term) throws IOException { @@ -41,6 +41,6 @@ public abstract class CandidateGenerator { } public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException; - public abstract CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException; + public abstract CandidateSet drawCandidates(CandidateSet set) throws IOException; } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java b/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java index 0cddfdbe08f..ec8027d1ee8 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java @@ -55,15 +55,17 @@ public final class DirectCandidateGenerator extends CandidateGenerator { private final boolean useTotalTermFrequency; private final CharsRef spare = new CharsRef(); private final BytesRef byteSpare = new BytesRef(); + private final int numCandidates; - public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood) throws IOException { - this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, null, null); + public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { + this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null); } - public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, Analyzer preFilter, Analyzer postFilter) throws IOException { + public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter) throws IOException { this.spellchecker = spellchecker; this.field = field; + this.numCandidates = numCandidates; this.suggestMode = suggestMode; this.reader = reader; Terms terms = MultiFields.getTerms(reader, field); @@ -114,7 +116,7 @@ public final class DirectCandidateGenerator extends CandidateGenerator { * @see org.elasticsearch.search.suggest.phrase.CandidateGenerator#drawCandidates(org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet, int) */ @Override - public CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException { + public CandidateSet drawCandidates(CandidateSet set) throws IOException { Candidate original = set.originalTerm; BytesRef term = preFilter(original.term, spare, byteSpare); final long frequency = original.frequency; diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java b/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java index f830cd3a81c..9aa985de103 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java @@ -30,9 +30,11 @@ public final class MultiCandidateGeneratorWrapper extends CandidateGenerator { private final CandidateGenerator[] candidateGenerator; + private int numCandidates ; - public MultiCandidateGeneratorWrapper(CandidateGenerator...candidateGenerators) { + public MultiCandidateGeneratorWrapper(int numCandidates, CandidateGenerator...candidateGenerators) { this.candidateGenerator = candidateGenerators; + this.numCandidates = numCandidates; } @Override public boolean isKnownWord(BytesRef term) throws IOException { @@ -45,9 +47,9 @@ public final class MultiCandidateGeneratorWrapper extends CandidateGenerator { } @Override - public CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException { + public CandidateSet drawCandidates(CandidateSet set) throws IOException { for (CandidateGenerator generator : candidateGenerator) { - generator.drawCandidates(set, numCandidates); + generator.drawCandidates(set); } return reduce(set, numCandidates); } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java b/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java index 1527d69f27c..dc0419285fa 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java @@ -55,7 +55,7 @@ public final class NoisyChannelSpellChecker { this.requireUnigram = requireUnigram; } - public Correction[] getCorrections(TokenStream stream, final CandidateGenerator generator, final int numCandidates, + public Correction[] getCorrections(TokenStream stream, final CandidateGenerator generator, float maxErrors, int numCorrections, IndexReader reader, WordScorer wordScorer, BytesRef separator, float confidence, int gramSize) throws IOException { final List candidateSetsList = new ArrayList(); @@ -105,7 +105,7 @@ public final class NoisyChannelSpellChecker { }); for (CandidateSet candidateSet : candidateSetsList) { - generator.drawCandidates(candidateSet, numCandidates); + generator.drawCandidates(candidateSet); } double cutoffScore = Double.MIN_VALUE; CandidateScorer scorer = new CandidateScorer(wordScorer, numCorrections, gramSize); @@ -122,10 +122,10 @@ public final class NoisyChannelSpellChecker { return findBestCandiates; } - public Correction[] getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator, int numCandidates, + public Correction[] getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator, float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException { - return getCorrections(tokenStream(analyzer, query, new CharsRef(), analysisField), generator, numCandidates, maxErrors, numCorrections, reader, scorer, new BytesRef(" "), confidence, gramSize); + return getCorrections(tokenStream(analyzer, query, new CharsRef(), analysisField), generator, maxErrors, numCorrections, reader, scorer, new BytesRef(" "), confidence, gramSize); } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java index 0fa507946c5..dc9f2d9b19d 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java @@ -58,7 +58,7 @@ final class PhraseSuggester implements Suggester { for (int i = 0; i < gens.length; i++) { PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i); DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator); - gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.preFilter(), generator.postFilter()); + gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter()); } @@ -66,7 +66,7 @@ final class PhraseSuggester implements Suggester { final BytesRef separator = suggestion.separator(); TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField()); WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator); - Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(gens), suggestion.getShardSize(), suggestion.maxErrors(), + Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens), suggestion.maxErrors(), suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize()); UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare); diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java index 2942ce2e527..268ff37bdfd 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java @@ -725,6 +725,91 @@ public class SuggestSearchTests extends AbstractNodesTests { assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1)); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel")); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel")); + + } + + @Test + public void testSizePararm() throws IOException { + client.admin().indices().prepareDelete().execute().actionGet(); + Builder builder = ImmutableSettings.builder(); + builder.put("index.number_of_shards", 1); + builder.put("index.number_of_replicas", 1); + builder.put("index.analysis.analyzer.reverse.tokenizer", "standard"); + builder.putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse"); + builder.put("index.analysis.analyzer.body.tokenizer", "standard"); + builder.putArray("index.analysis.analyzer.body.filter", "lowercase"); + builder.put("index.analysis.analyzer.bigram.tokenizer", "standard"); + builder.putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase"); + builder.put("index.analysis.filter.my_shingle.type", "shingle"); + builder.put("index.analysis.filter.my_shingle.output_unigrams", false); + builder.put("index.analysis.filter.my_shingle.min_shingle_size", 2); + builder.put("index.analysis.filter.my_shingle.max_shingle_size", 2); + + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("_all") + .field("store", "yes").field("termVector", "with_positions_offsets").endObject().startObject("properties") + .startObject("body").field("type", "string").field("analyzer", "body").endObject().startObject("body_reverse") + .field("type", "string").field("analyzer", "reverse").endObject().startObject("bigram").field("type", "string") + .field("analyzer", "bigram").endObject().endObject().endObject().endObject(); + + client.admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet(); + client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet(); + String line = "xorr the god jewel"; + client.prepareIndex("test", "type1") + .setSource( + XContentFactory.jsonBuilder().startObject().field("body", line).field("body_reverse", line).field("bigram", line) + .endObject()).execute().actionGet(); + line = "I got it this time"; + client.prepareIndex("test", "type1") + .setSource( + XContentFactory.jsonBuilder().startObject().field("body", line).field("body_reverse", line).field("bigram", line) + .endObject()).execute().actionGet(); + client.admin().indices().prepareRefresh().execute().actionGet(); + SearchResponse search = client // initially draw candidates with a size 1 so "got" will be the only candidate since it's LD1 + .prepareSearch() + .setSearchType(SearchType.COUNT) + .setSuggestText("Xorr the Gut-Jewel") + .addSuggestion( + phraseSuggestion("simple_phrase") + .realWordErrorLikelihood(0.95f) + .field("bigram") + .gramSize(2) + .analyzer("body") + .addCandidateGenerator( + PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).prefixLength(1) + .suggestMode("always").size(1).accuracy(0.1f)) + .smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)).maxErrors(1.0f).size(5)).execute() + .actionGet(); + assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0)); + assertThat(search.getSuggest(), notNullValue()); + assertThat(search.getSuggest().size(), equalTo(1)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase")); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(0)); + + search = client // we allow a size of 2 now on the shard generator level so "god" will be found since it's LD2 + .prepareSearch() + .setSearchType(SearchType.COUNT) + .setSuggestText("Xorr the Gut-Jewel") + .addSuggestion( + phraseSuggestion("simple_phrase") + .realWordErrorLikelihood(0.95f) + .field("bigram") + .gramSize(2) + .analyzer("body") + .addCandidateGenerator( + PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).prefixLength(1) + .suggestMode("always").size(2).accuracy(0.1f)) + .smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)).maxErrors(1.0f).size(5)).execute() + .actionGet(); + assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0)); + assertThat(search.getSuggest(), notNullValue()); + assertThat(search.getSuggest().size(), equalTo(1)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase")); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xorr the Gut-Jewel")); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), + equalTo("xorr the god jewel")); } @@ -831,7 +916,6 @@ public class SuggestSearchTests extends AbstractNodesTests { phraseSuggestion("simple_phrase").realWordErrorLikelihood(0.95f).field("bigram").analyzer("ngram").maxErrors(0.5f) .size(1)).execute().actionGet(); - SearchResponse search = client.prepareSearch() .setSearchType(SearchType.COUNT) .setSuggestText("Xor the Got-Jewel") diff --git a/src/test/java/org/elasticsearch/test/unit/search/suggest/phrase/NoisyChannelSpellCheckerTests.java b/src/test/java/org/elasticsearch/test/unit/search/suggest/phrase/NoisyChannelSpellCheckerTests.java index 1c9a4f996fc..21bddf9b271 100644 --- a/src/test/java/org/elasticsearch/test/unit/search/suggest/phrase/NoisyChannelSpellCheckerTests.java +++ b/src/test/java/org/elasticsearch/test/unit/search/suggest/phrase/NoisyChannelSpellCheckerTests.java @@ -108,25 +108,25 @@ public class NoisyChannelSpellCheckerTests { NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker(); spellchecker.setMinQueryLength(1); - DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95); - Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 2); + DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5); + Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); - corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 0, 1); + corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 0, 1); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ame")); suggester = new NoisyChannelSpellChecker(0.85); wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f); - corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 2); + corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2); assertThat(corrections.length, equalTo(4)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("xorn the god jewel")); assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel")); - corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 1, 2); + corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 2); assertThat(corrections.length, equalTo(4)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); @@ -158,11 +158,11 @@ public class NoisyChannelSpellCheckerTests { spellchecker.setMinQueryLength(1); suggester = new NoisyChannelSpellChecker(0.85); wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f); - corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 2); + corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); - generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, null, analyzer); - corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 2); + generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer); + corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); } @@ -219,26 +219,27 @@ public class NoisyChannelSpellCheckerTests { NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker(); spellchecker.setMinQueryLength(1); - DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95); - DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, wrapper, wrapper); - CandidateGenerator generator = new MultiCandidateGeneratorWrapper(forward, reverse); + DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10); + DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper); + CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse); - Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 10, 1, 1, ir, "body", wordScorer, 1, 2); + Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); - corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 2); + generator = new MultiCandidateGeneratorWrapper(5, forward, reverse); + corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); - corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 5, 1, 1, ir, "body", wordScorer, 1, 2); + corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body", wordScorer, 1, 2); assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix - corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 5, 2, 1, ir, "body", wordScorer, 1, 2); + corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body", wordScorer, 1, 2); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); - corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 2); + corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2); assertThat(corrections.length, equalTo(4)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel")); @@ -247,11 +248,11 @@ public class NoisyChannelSpellCheckerTests { - corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2); + corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); - corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2); + corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); @@ -303,17 +304,17 @@ public class NoisyChannelSpellCheckerTests { NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker(); spellchecker.setMinQueryLength(1); - DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95); - Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 3); + DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5); + Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 3); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); - corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 1); + corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 1); assertThat(corrections.length, equalTo(0)); // assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape")); wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1); - corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 3); + corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3); assertThat(corrections.length, equalTo(4)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); @@ -323,7 +324,7 @@ public class NoisyChannelSpellCheckerTests { - corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 1, 3); + corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 3); assertThat(corrections.length, equalTo(4)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); @@ -331,7 +332,7 @@ public class NoisyChannelSpellCheckerTests { assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel")); - corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 100, 3); + corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 100, 3); assertThat(corrections.length, equalTo(1)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); @@ -360,16 +361,16 @@ public class NoisyChannelSpellCheckerTests { spellchecker.setMinQueryLength(1); suggester = new NoisyChannelSpellChecker(0.95); wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1); - corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 3); + corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); - generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, null, analyzer); - corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 3); + generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer); + corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4); - corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 2, ir, "body", wordScorer, 0, 3); + corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3); assertThat(corrections.length, equalTo(2)); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));