Respect CandidateGenerator#size if set in the request and reduce the total #of candidates to the shard size.

Closes #2752
This commit is contained in:
Simon Willnauer 2013-03-08 19:37:39 +01:00
parent cc6c07365c
commit dc9a052287
8 changed files with 135 additions and 45 deletions

View File

@ -82,6 +82,7 @@ public final class SuggestUtils {
directSpellChecker.setMinPrefix(suggestion.prefixLength()); directSpellChecker.setMinPrefix(suggestion.prefixLength());
directSpellChecker.setMinQueryLength(suggestion.minWordLength()); directSpellChecker.setMinQueryLength(suggestion.minWordLength());
directSpellChecker.setThresholdFrequency(suggestion.minDocFreq()); directSpellChecker.setThresholdFrequency(suggestion.minDocFreq());
directSpellChecker.setLowerCaseTerms(false);
return directSpellChecker; return directSpellChecker;
} }

View File

@ -31,9 +31,9 @@ public abstract class CandidateGenerator {
public abstract long frequency(BytesRef term) throws IOException; public abstract long frequency(BytesRef term) throws IOException;
public CandidateSet drawCandidates(BytesRef term, int numCandidates) throws IOException { public CandidateSet drawCandidates(BytesRef term) throws IOException {
CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term)); CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term));
return drawCandidates(set, numCandidates); return drawCandidates(set);
} }
public Candidate createCandidate(BytesRef term) throws IOException { public Candidate createCandidate(BytesRef term) throws IOException {
@ -41,6 +41,6 @@ public abstract class CandidateGenerator {
} }
public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException; public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException;
public abstract CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException; public abstract CandidateSet drawCandidates(CandidateSet set) throws IOException;
} }

View File

@ -55,15 +55,17 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
private final boolean useTotalTermFrequency; private final boolean useTotalTermFrequency;
private final CharsRef spare = new CharsRef(); private final CharsRef spare = new CharsRef();
private final BytesRef byteSpare = new BytesRef(); private final BytesRef byteSpare = new BytesRef();
private final int numCandidates;
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood) throws IOException { public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, null, null); this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null);
} }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, Analyzer preFilter, Analyzer postFilter) throws IOException { public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter) throws IOException {
this.spellchecker = spellchecker; this.spellchecker = spellchecker;
this.field = field; this.field = field;
this.numCandidates = numCandidates;
this.suggestMode = suggestMode; this.suggestMode = suggestMode;
this.reader = reader; this.reader = reader;
Terms terms = MultiFields.getTerms(reader, field); Terms terms = MultiFields.getTerms(reader, field);
@ -114,7 +116,7 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
* @see org.elasticsearch.search.suggest.phrase.CandidateGenerator#drawCandidates(org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet, int) * @see org.elasticsearch.search.suggest.phrase.CandidateGenerator#drawCandidates(org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet, int)
*/ */
@Override @Override
public CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException { public CandidateSet drawCandidates(CandidateSet set) throws IOException {
Candidate original = set.originalTerm; Candidate original = set.originalTerm;
BytesRef term = preFilter(original.term, spare, byteSpare); BytesRef term = preFilter(original.term, spare, byteSpare);
final long frequency = original.frequency; final long frequency = original.frequency;

View File

@ -30,9 +30,11 @@ public final class MultiCandidateGeneratorWrapper extends CandidateGenerator {
private final CandidateGenerator[] candidateGenerator; private final CandidateGenerator[] candidateGenerator;
private int numCandidates ;
public MultiCandidateGeneratorWrapper(CandidateGenerator...candidateGenerators) { public MultiCandidateGeneratorWrapper(int numCandidates, CandidateGenerator...candidateGenerators) {
this.candidateGenerator = candidateGenerators; this.candidateGenerator = candidateGenerators;
this.numCandidates = numCandidates;
} }
@Override @Override
public boolean isKnownWord(BytesRef term) throws IOException { public boolean isKnownWord(BytesRef term) throws IOException {
@ -45,9 +47,9 @@ public final class MultiCandidateGeneratorWrapper extends CandidateGenerator {
} }
@Override @Override
public CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException { public CandidateSet drawCandidates(CandidateSet set) throws IOException {
for (CandidateGenerator generator : candidateGenerator) { for (CandidateGenerator generator : candidateGenerator) {
generator.drawCandidates(set, numCandidates); generator.drawCandidates(set);
} }
return reduce(set, numCandidates); return reduce(set, numCandidates);
} }

View File

@ -55,7 +55,7 @@ public final class NoisyChannelSpellChecker {
this.requireUnigram = requireUnigram; this.requireUnigram = requireUnigram;
} }
public Correction[] getCorrections(TokenStream stream, final CandidateGenerator generator, final int numCandidates, public Correction[] getCorrections(TokenStream stream, final CandidateGenerator generator,
float maxErrors, int numCorrections, IndexReader reader, WordScorer wordScorer, BytesRef separator, float confidence, int gramSize) throws IOException { float maxErrors, int numCorrections, IndexReader reader, WordScorer wordScorer, BytesRef separator, float confidence, int gramSize) throws IOException {
final List<CandidateSet> candidateSetsList = new ArrayList<DirectCandidateGenerator.CandidateSet>(); final List<CandidateSet> candidateSetsList = new ArrayList<DirectCandidateGenerator.CandidateSet>();
@ -105,7 +105,7 @@ public final class NoisyChannelSpellChecker {
}); });
for (CandidateSet candidateSet : candidateSetsList) { for (CandidateSet candidateSet : candidateSetsList) {
generator.drawCandidates(candidateSet, numCandidates); generator.drawCandidates(candidateSet);
} }
double cutoffScore = Double.MIN_VALUE; double cutoffScore = Double.MIN_VALUE;
CandidateScorer scorer = new CandidateScorer(wordScorer, numCorrections, gramSize); CandidateScorer scorer = new CandidateScorer(wordScorer, numCorrections, gramSize);
@ -122,10 +122,10 @@ public final class NoisyChannelSpellChecker {
return findBestCandiates; return findBestCandiates;
} }
public Correction[] getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator, int numCandidates, public Correction[] getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator,
float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException { float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException {
return getCorrections(tokenStream(analyzer, query, new CharsRef(), analysisField), generator, numCandidates, maxErrors, numCorrections, reader, scorer, new BytesRef(" "), confidence, gramSize); return getCorrections(tokenStream(analyzer, query, new CharsRef(), analysisField), generator, maxErrors, numCorrections, reader, scorer, new BytesRef(" "), confidence, gramSize);
} }

View File

@ -58,7 +58,7 @@ final class PhraseSuggester implements Suggester<PhraseSuggestionContext> {
for (int i = 0; i < gens.length; i++) { for (int i = 0; i < gens.length; i++) {
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i); PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator); DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.preFilter(), generator.postFilter()); gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter());
} }
@ -66,7 +66,7 @@ final class PhraseSuggester implements Suggester<PhraseSuggestionContext> {
final BytesRef separator = suggestion.separator(); final BytesRef separator = suggestion.separator();
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField()); TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator); WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator);
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(gens), suggestion.getShardSize(), suggestion.maxErrors(), Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens), suggestion.maxErrors(),
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize()); suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare); UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);

View File

@ -725,6 +725,91 @@ public class SuggestSearchTests extends AbstractNodesTests {
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1)); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel")); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel"));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel")); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel"));
}
@Test
public void testSizePararm() throws IOException {
client.admin().indices().prepareDelete().execute().actionGet();
Builder builder = ImmutableSettings.builder();
builder.put("index.number_of_shards", 1);
builder.put("index.number_of_replicas", 1);
builder.put("index.analysis.analyzer.reverse.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse");
builder.put("index.analysis.analyzer.body.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.body.filter", "lowercase");
builder.put("index.analysis.analyzer.bigram.tokenizer", "standard");
builder.putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase");
builder.put("index.analysis.filter.my_shingle.type", "shingle");
builder.put("index.analysis.filter.my_shingle.output_unigrams", false);
builder.put("index.analysis.filter.my_shingle.min_shingle_size", 2);
builder.put("index.analysis.filter.my_shingle.max_shingle_size", 2);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("_all")
.field("store", "yes").field("termVector", "with_positions_offsets").endObject().startObject("properties")
.startObject("body").field("type", "string").field("analyzer", "body").endObject().startObject("body_reverse")
.field("type", "string").field("analyzer", "reverse").endObject().startObject("bigram").field("type", "string")
.field("analyzer", "bigram").endObject().endObject().endObject().endObject();
client.admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
String line = "xorr the god jewel";
client.prepareIndex("test", "type1")
.setSource(
XContentFactory.jsonBuilder().startObject().field("body", line).field("body_reverse", line).field("bigram", line)
.endObject()).execute().actionGet();
line = "I got it this time";
client.prepareIndex("test", "type1")
.setSource(
XContentFactory.jsonBuilder().startObject().field("body", line).field("body_reverse", line).field("bigram", line)
.endObject()).execute().actionGet();
client.admin().indices().prepareRefresh().execute().actionGet();
SearchResponse search = client // initially draw candidates with a size 1 so "got" will be the only candidate since it's LD1
.prepareSearch()
.setSearchType(SearchType.COUNT)
.setSuggestText("Xorr the Gut-Jewel")
.addSuggestion(
phraseSuggestion("simple_phrase")
.realWordErrorLikelihood(0.95f)
.field("bigram")
.gramSize(2)
.analyzer("body")
.addCandidateGenerator(
PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).prefixLength(1)
.suggestMode("always").size(1).accuracy(0.1f))
.smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)).maxErrors(1.0f).size(5)).execute()
.actionGet();
assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0));
assertThat(search.getSuggest(), notNullValue());
assertThat(search.getSuggest().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase"));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(0));
search = client // we allow a size of 2 now on the shard generator level so "god" will be found since it's LD2
.prepareSearch()
.setSearchType(SearchType.COUNT)
.setSuggestText("Xorr the Gut-Jewel")
.addSuggestion(
phraseSuggestion("simple_phrase")
.realWordErrorLikelihood(0.95f)
.field("bigram")
.gramSize(2)
.analyzer("body")
.addCandidateGenerator(
PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).prefixLength(1)
.suggestMode("always").size(2).accuracy(0.1f))
.smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)).maxErrors(1.0f).size(5)).execute()
.actionGet();
assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0));
assertThat(search.getSuggest(), notNullValue());
assertThat(search.getSuggest().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase"));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xorr the Gut-Jewel"));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(),
equalTo("xorr the god jewel"));
} }
@ -831,7 +916,6 @@ public class SuggestSearchTests extends AbstractNodesTests {
phraseSuggestion("simple_phrase").realWordErrorLikelihood(0.95f).field("bigram").analyzer("ngram").maxErrors(0.5f) phraseSuggestion("simple_phrase").realWordErrorLikelihood(0.95f).field("bigram").analyzer("ngram").maxErrors(0.5f)
.size(1)).execute().actionGet(); .size(1)).execute().actionGet();
SearchResponse search = client.prepareSearch() SearchResponse search = client.prepareSearch()
.setSearchType(SearchType.COUNT) .setSearchType(SearchType.COUNT)
.setSuggestText("Xor the Got-Jewel") .setSuggestText("Xor the Got-Jewel")

View File

@ -108,25 +108,25 @@ public class NoisyChannelSpellCheckerTests {
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1); spellchecker.setMinQueryLength(1);
DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95); DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 2); Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 0, 1); corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 0, 1);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ame")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ame"));
suggester = new NoisyChannelSpellChecker(0.85); suggester = new NoisyChannelSpellChecker(0.85);
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f); wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
assertThat(corrections.length, equalTo(4)); assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("xorn the god jewel")); assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("xorn the god jewel"));
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel")); assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections.length, equalTo(4)); assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
@ -158,11 +158,11 @@ public class NoisyChannelSpellCheckerTests {
spellchecker.setMinQueryLength(1); spellchecker.setMinQueryLength(1);
suggester = new NoisyChannelSpellChecker(0.85); suggester = new NoisyChannelSpellChecker(0.85);
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f); wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, null, analyzer); generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
} }
@ -219,26 +219,27 @@ public class NoisyChannelSpellCheckerTests {
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1); spellchecker.setMinQueryLength(1);
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95); DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, wrapper, wrapper); DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper);
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(forward, reverse); CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 10, 1, 1, ir, "body", wordScorer, 1, 2); Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 2); generator = new MultiCandidateGeneratorWrapper(5, forward, reverse);
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 5, 1, 1, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body", wordScorer, 1, 2);
assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix
corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 5, 2, 1, ir, "body", wordScorer, 1, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body", wordScorer, 1, 2);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
assertThat(corrections.length, equalTo(4)); assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
@ -247,11 +248,11 @@ public class NoisyChannelSpellCheckerTests {
corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
@ -303,17 +304,17 @@ public class NoisyChannelSpellCheckerTests {
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker(); NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker(); DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1); spellchecker.setMinQueryLength(1);
DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95); DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 3); Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 3);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 1); corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 1);
assertThat(corrections.length, equalTo(0)); assertThat(corrections.length, equalTo(0));
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape")); // assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1); wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 3); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3);
assertThat(corrections.length, equalTo(4)); assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
@ -323,7 +324,7 @@ public class NoisyChannelSpellCheckerTests {
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 1, 3); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 3);
assertThat(corrections.length, equalTo(4)); assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
@ -331,7 +332,7 @@ public class NoisyChannelSpellCheckerTests {
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel")); assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 100, 3); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 100, 3);
assertThat(corrections.length, equalTo(1)); assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
@ -360,16 +361,16 @@ public class NoisyChannelSpellCheckerTests {
spellchecker.setMinQueryLength(1); spellchecker.setMinQueryLength(1);
suggester = new NoisyChannelSpellChecker(0.95); suggester = new NoisyChannelSpellChecker(0.95);
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1); wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 3); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, null, analyzer); generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer);
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 3); corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4); wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4);
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 2, ir, "body", wordScorer, 0, 3); corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3);
assertThat(corrections.length, equalTo(2)); assertThat(corrections.length, equalTo(2));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel")); assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel")); assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));