fix bug in StupidBackoffScorer were previous word and current word were flipped creating non-existing bigram

This commit is contained in:
Simon Willnauer 2013-02-28 21:19:24 +01:00
parent b4b3e350a6
commit c90c5cbf85
2 changed files with 10 additions and 6 deletions

View File

@ -43,7 +43,7 @@ public class StupidBackoffScorer extends WordScorer {
@Override
protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
SuggestUtils.join(separator, spare, word.term, w_1.term);
SuggestUtils.join(separator, spare, w_1.term, word.term);
final int count = frequency(spare);
if (count < 1) {
return discount * scoreUnigram(word);
@ -55,9 +55,13 @@ public class StupidBackoffScorer extends WordScorer {
protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException {
SuggestUtils.join(separator, spare, w_2.term, w_1.term, w.term);
final int trigramCount = frequency(spare);
if (trigramCount < 1) {
return discount * scoreBigram(w, w_1);
SuggestUtils.join(separator, spare, w_1.term, w.term);
final int count = frequency(spare);
if (count < 1) {
return discount * scoreUnigram(w);
}
return discount * (count / (w_1.frequency + 0.00000000001d));
}
SuggestUtils.join(separator, spare, w_1.term, w.term);
final int bigramCount = frequency(spare);

View File

@ -456,14 +456,14 @@ public class SuggestSearchTests extends AbstractNodesTests {
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always"))
.maxErrors(0.5f)
.confidence(0.f)
.size(2))
.size(1))
.execute().actionGet();
assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0));
assertThat(search.getSuggest(), notNullValue());
assertThat(search.getSuggest().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase"));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(2));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xorr the God-Jewel"));
assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel"));
@ -648,7 +648,7 @@ public class SuggestSearchTests extends AbstractNodesTests {
.addSuggestion(phraseSuggestion("simple_phrase").
realWordErrorLikelihood(0.95f).field("bigram").gramSize(2).analyzer("body")
.addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always"))
.smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(1.0))
.smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1))
.maxErrors(0.5f)
.size(1))
.execute().actionGet();