Respect CandidateGenerator#size if set in the request and reduce the total #of candidates to the shard size.
Closes #2752
This commit is contained in:
parent
cc6c07365c
commit
dc9a052287
|
@ -82,6 +82,7 @@ public final class SuggestUtils {
|
||||||
directSpellChecker.setMinPrefix(suggestion.prefixLength());
|
directSpellChecker.setMinPrefix(suggestion.prefixLength());
|
||||||
directSpellChecker.setMinQueryLength(suggestion.minWordLength());
|
directSpellChecker.setMinQueryLength(suggestion.minWordLength());
|
||||||
directSpellChecker.setThresholdFrequency(suggestion.minDocFreq());
|
directSpellChecker.setThresholdFrequency(suggestion.minDocFreq());
|
||||||
|
directSpellChecker.setLowerCaseTerms(false);
|
||||||
return directSpellChecker;
|
return directSpellChecker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,9 +31,9 @@ public abstract class CandidateGenerator {
|
||||||
|
|
||||||
public abstract long frequency(BytesRef term) throws IOException;
|
public abstract long frequency(BytesRef term) throws IOException;
|
||||||
|
|
||||||
public CandidateSet drawCandidates(BytesRef term, int numCandidates) throws IOException {
|
public CandidateSet drawCandidates(BytesRef term) throws IOException {
|
||||||
CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term));
|
CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term));
|
||||||
return drawCandidates(set, numCandidates);
|
return drawCandidates(set);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Candidate createCandidate(BytesRef term) throws IOException {
|
public Candidate createCandidate(BytesRef term) throws IOException {
|
||||||
|
@ -41,6 +41,6 @@ public abstract class CandidateGenerator {
|
||||||
}
|
}
|
||||||
public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException;
|
public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException;
|
||||||
|
|
||||||
public abstract CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException;
|
public abstract CandidateSet drawCandidates(CandidateSet set) throws IOException;
|
||||||
|
|
||||||
}
|
}
|
|
@ -55,15 +55,17 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||||
private final boolean useTotalTermFrequency;
|
private final boolean useTotalTermFrequency;
|
||||||
private final CharsRef spare = new CharsRef();
|
private final CharsRef spare = new CharsRef();
|
||||||
private final BytesRef byteSpare = new BytesRef();
|
private final BytesRef byteSpare = new BytesRef();
|
||||||
|
private final int numCandidates;
|
||||||
|
|
||||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood) throws IOException {
|
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
|
||||||
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, null, null);
|
this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, Analyzer preFilter, Analyzer postFilter) throws IOException {
|
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter) throws IOException {
|
||||||
this.spellchecker = spellchecker;
|
this.spellchecker = spellchecker;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
|
this.numCandidates = numCandidates;
|
||||||
this.suggestMode = suggestMode;
|
this.suggestMode = suggestMode;
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
Terms terms = MultiFields.getTerms(reader, field);
|
Terms terms = MultiFields.getTerms(reader, field);
|
||||||
|
@ -114,7 +116,7 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||||
* @see org.elasticsearch.search.suggest.phrase.CandidateGenerator#drawCandidates(org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet, int)
|
* @see org.elasticsearch.search.suggest.phrase.CandidateGenerator#drawCandidates(org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet, int)
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException {
|
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
|
||||||
Candidate original = set.originalTerm;
|
Candidate original = set.originalTerm;
|
||||||
BytesRef term = preFilter(original.term, spare, byteSpare);
|
BytesRef term = preFilter(original.term, spare, byteSpare);
|
||||||
final long frequency = original.frequency;
|
final long frequency = original.frequency;
|
||||||
|
|
|
@ -30,9 +30,11 @@ public final class MultiCandidateGeneratorWrapper extends CandidateGenerator {
|
||||||
|
|
||||||
|
|
||||||
private final CandidateGenerator[] candidateGenerator;
|
private final CandidateGenerator[] candidateGenerator;
|
||||||
|
private int numCandidates ;
|
||||||
|
|
||||||
public MultiCandidateGeneratorWrapper(CandidateGenerator...candidateGenerators) {
|
public MultiCandidateGeneratorWrapper(int numCandidates, CandidateGenerator...candidateGenerators) {
|
||||||
this.candidateGenerator = candidateGenerators;
|
this.candidateGenerator = candidateGenerators;
|
||||||
|
this.numCandidates = numCandidates;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public boolean isKnownWord(BytesRef term) throws IOException {
|
public boolean isKnownWord(BytesRef term) throws IOException {
|
||||||
|
@ -45,9 +47,9 @@ public final class MultiCandidateGeneratorWrapper extends CandidateGenerator {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public CandidateSet drawCandidates(CandidateSet set, int numCandidates) throws IOException {
|
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
|
||||||
for (CandidateGenerator generator : candidateGenerator) {
|
for (CandidateGenerator generator : candidateGenerator) {
|
||||||
generator.drawCandidates(set, numCandidates);
|
generator.drawCandidates(set);
|
||||||
}
|
}
|
||||||
return reduce(set, numCandidates);
|
return reduce(set, numCandidates);
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,7 @@ public final class NoisyChannelSpellChecker {
|
||||||
this.requireUnigram = requireUnigram;
|
this.requireUnigram = requireUnigram;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Correction[] getCorrections(TokenStream stream, final CandidateGenerator generator, final int numCandidates,
|
public Correction[] getCorrections(TokenStream stream, final CandidateGenerator generator,
|
||||||
float maxErrors, int numCorrections, IndexReader reader, WordScorer wordScorer, BytesRef separator, float confidence, int gramSize) throws IOException {
|
float maxErrors, int numCorrections, IndexReader reader, WordScorer wordScorer, BytesRef separator, float confidence, int gramSize) throws IOException {
|
||||||
|
|
||||||
final List<CandidateSet> candidateSetsList = new ArrayList<DirectCandidateGenerator.CandidateSet>();
|
final List<CandidateSet> candidateSetsList = new ArrayList<DirectCandidateGenerator.CandidateSet>();
|
||||||
|
@ -105,7 +105,7 @@ public final class NoisyChannelSpellChecker {
|
||||||
});
|
});
|
||||||
|
|
||||||
for (CandidateSet candidateSet : candidateSetsList) {
|
for (CandidateSet candidateSet : candidateSetsList) {
|
||||||
generator.drawCandidates(candidateSet, numCandidates);
|
generator.drawCandidates(candidateSet);
|
||||||
}
|
}
|
||||||
double cutoffScore = Double.MIN_VALUE;
|
double cutoffScore = Double.MIN_VALUE;
|
||||||
CandidateScorer scorer = new CandidateScorer(wordScorer, numCorrections, gramSize);
|
CandidateScorer scorer = new CandidateScorer(wordScorer, numCorrections, gramSize);
|
||||||
|
@ -122,10 +122,10 @@ public final class NoisyChannelSpellChecker {
|
||||||
return findBestCandiates;
|
return findBestCandiates;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Correction[] getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator, int numCandidates,
|
public Correction[] getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator,
|
||||||
float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException {
|
float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException {
|
||||||
|
|
||||||
return getCorrections(tokenStream(analyzer, query, new CharsRef(), analysisField), generator, numCandidates, maxErrors, numCorrections, reader, scorer, new BytesRef(" "), confidence, gramSize);
|
return getCorrections(tokenStream(analyzer, query, new CharsRef(), analysisField), generator, maxErrors, numCorrections, reader, scorer, new BytesRef(" "), confidence, gramSize);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,7 @@ final class PhraseSuggester implements Suggester<PhraseSuggestionContext> {
|
||||||
for (int i = 0; i < gens.length; i++) {
|
for (int i = 0; i < gens.length; i++) {
|
||||||
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
|
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i);
|
||||||
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
|
DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(generator);
|
||||||
gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.preFilter(), generator.postFilter());
|
gens[i] = new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), indexReader, realWordErrorLikelihood, generator.size(), generator.preFilter(), generator.postFilter());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ final class PhraseSuggester implements Suggester<PhraseSuggestionContext> {
|
||||||
final BytesRef separator = suggestion.separator();
|
final BytesRef separator = suggestion.separator();
|
||||||
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
|
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
|
||||||
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator);
|
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestion.getField(), realWordErrorLikelihood, separator);
|
||||||
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(gens), suggestion.getShardSize(), suggestion.maxErrors(),
|
Correction[] corrections = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), gens), suggestion.maxErrors(),
|
||||||
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
|
suggestion.getShardSize(), indexReader,wordScorer , separator, suggestion.confidence(), suggestion.gramSize());
|
||||||
|
|
||||||
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
|
UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare);
|
||||||
|
|
|
@ -725,6 +725,91 @@ public class SuggestSearchTests extends AbstractNodesTests {
|
||||||
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1));
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1));
|
||||||
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel"));
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel"));
|
||||||
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel"));
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSizePararm() throws IOException {
|
||||||
|
client.admin().indices().prepareDelete().execute().actionGet();
|
||||||
|
Builder builder = ImmutableSettings.builder();
|
||||||
|
builder.put("index.number_of_shards", 1);
|
||||||
|
builder.put("index.number_of_replicas", 1);
|
||||||
|
builder.put("index.analysis.analyzer.reverse.tokenizer", "standard");
|
||||||
|
builder.putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse");
|
||||||
|
builder.put("index.analysis.analyzer.body.tokenizer", "standard");
|
||||||
|
builder.putArray("index.analysis.analyzer.body.filter", "lowercase");
|
||||||
|
builder.put("index.analysis.analyzer.bigram.tokenizer", "standard");
|
||||||
|
builder.putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase");
|
||||||
|
builder.put("index.analysis.filter.my_shingle.type", "shingle");
|
||||||
|
builder.put("index.analysis.filter.my_shingle.output_unigrams", false);
|
||||||
|
builder.put("index.analysis.filter.my_shingle.min_shingle_size", 2);
|
||||||
|
builder.put("index.analysis.filter.my_shingle.max_shingle_size", 2);
|
||||||
|
|
||||||
|
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("_all")
|
||||||
|
.field("store", "yes").field("termVector", "with_positions_offsets").endObject().startObject("properties")
|
||||||
|
.startObject("body").field("type", "string").field("analyzer", "body").endObject().startObject("body_reverse")
|
||||||
|
.field("type", "string").field("analyzer", "reverse").endObject().startObject("bigram").field("type", "string")
|
||||||
|
.field("analyzer", "bigram").endObject().endObject().endObject().endObject();
|
||||||
|
|
||||||
|
client.admin().indices().prepareCreate("test").setSettings(builder.build()).addMapping("type1", mapping).execute().actionGet();
|
||||||
|
client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
|
||||||
|
String line = "xorr the god jewel";
|
||||||
|
client.prepareIndex("test", "type1")
|
||||||
|
.setSource(
|
||||||
|
XContentFactory.jsonBuilder().startObject().field("body", line).field("body_reverse", line).field("bigram", line)
|
||||||
|
.endObject()).execute().actionGet();
|
||||||
|
line = "I got it this time";
|
||||||
|
client.prepareIndex("test", "type1")
|
||||||
|
.setSource(
|
||||||
|
XContentFactory.jsonBuilder().startObject().field("body", line).field("body_reverse", line).field("bigram", line)
|
||||||
|
.endObject()).execute().actionGet();
|
||||||
|
client.admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
SearchResponse search = client // initially draw candidates with a size 1 so "got" will be the only candidate since it's LD1
|
||||||
|
.prepareSearch()
|
||||||
|
.setSearchType(SearchType.COUNT)
|
||||||
|
.setSuggestText("Xorr the Gut-Jewel")
|
||||||
|
.addSuggestion(
|
||||||
|
phraseSuggestion("simple_phrase")
|
||||||
|
.realWordErrorLikelihood(0.95f)
|
||||||
|
.field("bigram")
|
||||||
|
.gramSize(2)
|
||||||
|
.analyzer("body")
|
||||||
|
.addCandidateGenerator(
|
||||||
|
PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).prefixLength(1)
|
||||||
|
.suggestMode("always").size(1).accuracy(0.1f))
|
||||||
|
.smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)).maxErrors(1.0f).size(5)).execute()
|
||||||
|
.actionGet();
|
||||||
|
assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0));
|
||||||
|
assertThat(search.getSuggest(), notNullValue());
|
||||||
|
assertThat(search.getSuggest().size(), equalTo(1));
|
||||||
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase"));
|
||||||
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1));
|
||||||
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(0));
|
||||||
|
|
||||||
|
search = client // we allow a size of 2 now on the shard generator level so "god" will be found since it's LD2
|
||||||
|
.prepareSearch()
|
||||||
|
.setSearchType(SearchType.COUNT)
|
||||||
|
.setSuggestText("Xorr the Gut-Jewel")
|
||||||
|
.addSuggestion(
|
||||||
|
phraseSuggestion("simple_phrase")
|
||||||
|
.realWordErrorLikelihood(0.95f)
|
||||||
|
.field("bigram")
|
||||||
|
.gramSize(2)
|
||||||
|
.analyzer("body")
|
||||||
|
.addCandidateGenerator(
|
||||||
|
PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).prefixLength(1)
|
||||||
|
.suggestMode("always").size(2).accuracy(0.1f))
|
||||||
|
.smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)).maxErrors(1.0f).size(5)).execute()
|
||||||
|
.actionGet();
|
||||||
|
assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0));
|
||||||
|
assertThat(search.getSuggest(), notNullValue());
|
||||||
|
assertThat(search.getSuggest().size(), equalTo(1));
|
||||||
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase"));
|
||||||
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1));
|
||||||
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1));
|
||||||
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xorr the Gut-Jewel"));
|
||||||
|
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(),
|
||||||
|
equalTo("xorr the god jewel"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -831,7 +916,6 @@ public class SuggestSearchTests extends AbstractNodesTests {
|
||||||
phraseSuggestion("simple_phrase").realWordErrorLikelihood(0.95f).field("bigram").analyzer("ngram").maxErrors(0.5f)
|
phraseSuggestion("simple_phrase").realWordErrorLikelihood(0.95f).field("bigram").analyzer("ngram").maxErrors(0.5f)
|
||||||
.size(1)).execute().actionGet();
|
.size(1)).execute().actionGet();
|
||||||
|
|
||||||
|
|
||||||
SearchResponse search = client.prepareSearch()
|
SearchResponse search = client.prepareSearch()
|
||||||
.setSearchType(SearchType.COUNT)
|
.setSearchType(SearchType.COUNT)
|
||||||
.setSuggestText("Xor the Got-Jewel")
|
.setSuggestText("Xor the Got-Jewel")
|
||||||
|
|
|
@ -108,25 +108,25 @@ public class NoisyChannelSpellCheckerTests {
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95);
|
DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
|
||||||
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 2);
|
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 0, 1);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 0, 1);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ame"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ame"));
|
||||||
|
|
||||||
suggester = new NoisyChannelSpellChecker(0.85);
|
suggester = new NoisyChannelSpellChecker(0.85);
|
||||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
|
||||||
assertThat(corrections.length, equalTo(4));
|
assertThat(corrections.length, equalTo(4));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
||||||
assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("xorn the god jewel"));
|
assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("xorn the god jewel"));
|
||||||
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
|
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections.length, equalTo(4));
|
assertThat(corrections.length, equalTo(4));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
||||||
|
@ -158,11 +158,11 @@ public class NoisyChannelSpellCheckerTests {
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
suggester = new NoisyChannelSpellChecker(0.85);
|
suggester = new NoisyChannelSpellChecker(0.85);
|
||||||
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
wordScorer = new LaplaceScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
|
||||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, null, analyzer);
|
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer);
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,26 +219,27 @@ public class NoisyChannelSpellCheckerTests {
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95);
|
DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
|
||||||
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, wrapper, wrapper);
|
DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper);
|
||||||
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(forward, reverse);
|
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
|
||||||
|
|
||||||
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 10, 1, 1, ir, "body", wordScorer, 1, 2);
|
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 2);
|
generator = new MultiCandidateGeneratorWrapper(5, forward, reverse);
|
||||||
|
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 5, 1, 1, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix
|
assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 5, 2, 1, ir, "body", wordScorer, 1, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body", wordScorer, 1, 2);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2);
|
||||||
assertThat(corrections.length, equalTo(4));
|
assertThat(corrections.length, equalTo(4));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
|
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
|
||||||
|
@ -247,11 +248,11 @@ public class NoisyChannelSpellCheckerTests {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
|
|
||||||
|
@ -303,17 +304,17 @@ public class NoisyChannelSpellCheckerTests {
|
||||||
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
|
||||||
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
DirectSpellChecker spellchecker = new DirectSpellChecker();
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95);
|
DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
|
||||||
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 3);
|
Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 3);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 5, 1, 1, ir, "body", wordScorer, 1, 1);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 1);
|
||||||
assertThat(corrections.length, equalTo(0));
|
assertThat(corrections.length, equalTo(0));
|
||||||
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
|
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
|
||||||
|
|
||||||
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 0, 3);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3);
|
||||||
assertThat(corrections.length, equalTo(4));
|
assertThat(corrections.length, equalTo(4));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
||||||
|
@ -323,7 +324,7 @@ public class NoisyChannelSpellCheckerTests {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 4, ir, "body", wordScorer, 1, 3);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 3);
|
||||||
assertThat(corrections.length, equalTo(4));
|
assertThat(corrections.length, equalTo(4));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
||||||
|
@ -331,7 +332,7 @@ public class NoisyChannelSpellCheckerTests {
|
||||||
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
|
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
|
||||||
|
|
||||||
|
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 1, ir, "body", wordScorer, 100, 3);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 100, 3);
|
||||||
assertThat(corrections.length, equalTo(1));
|
assertThat(corrections.length, equalTo(1));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
|
|
||||||
|
@ -360,16 +361,16 @@ public class NoisyChannelSpellCheckerTests {
|
||||||
spellchecker.setMinQueryLength(1);
|
spellchecker.setMinQueryLength(1);
|
||||||
suggester = new NoisyChannelSpellChecker(0.95);
|
suggester = new NoisyChannelSpellChecker(0.95);
|
||||||
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
wordScorer = new LinearInterpoatingScorer(ir, "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 3);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
|
||||||
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, null, analyzer);
|
generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer);
|
||||||
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 10, 2, 4, ir, "body", wordScorer, 1, 3);
|
corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3);
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
|
||||||
|
|
||||||
|
|
||||||
wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4);
|
wordScorer = new StupidBackoffScorer(ir, "body_ngram", 0.85d, new BytesRef(" "), 0.4);
|
||||||
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 5, 0.5f, 2, ir, "body", wordScorer, 0, 3);
|
corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3);
|
||||||
assertThat(corrections.length, equalTo(2));
|
assertThat(corrections.length, equalTo(2));
|
||||||
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
|
||||||
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
|
||||||
|
|
Loading…
Reference in New Issue