From c90c5cbf85ba94342885159e7dd2e411b3a7005a Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 28 Feb 2013 21:19:24 +0100 Subject: [PATCH] fix bug in StupidBackoffScorer were previous word and current word were flipped creating non-existing bigram --- .../search/suggest/phrase/StupidBackoffScorer.java | 10 +++++++--- .../integration/search/suggest/SuggestSearchTests.java | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java b/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java index 0657762b791..4754334bfa8 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java @@ -43,7 +43,7 @@ public class StupidBackoffScorer extends WordScorer { @Override protected double scoreBigram(Candidate word, Candidate w_1) throws IOException { - SuggestUtils.join(separator, spare, word.term, w_1.term); + SuggestUtils.join(separator, spare, w_1.term, word.term); final int count = frequency(spare); if (count < 1) { return discount * scoreUnigram(word); @@ -55,9 +55,13 @@ public class StupidBackoffScorer extends WordScorer { protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws IOException { SuggestUtils.join(separator, spare, w_2.term, w_1.term, w.term); final int trigramCount = frequency(spare); - if (trigramCount < 1) { - return discount * scoreBigram(w, w_1); + SuggestUtils.join(separator, spare, w_1.term, w.term); + final int count = frequency(spare); + if (count < 1) { + return discount * scoreUnigram(w); + } + return discount * (count / (w_1.frequency + 0.00000000001d)); } SuggestUtils.join(separator, spare, w_1.term, w.term); final int bigramCount = frequency(spare); diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java index 58d1528d443..4fba878597d 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java @@ -456,14 +456,14 @@ public class SuggestSearchTests extends AbstractNodesTests { .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always")) .maxErrors(0.5f) .confidence(0.f) - .size(2)) + .size(1)) .execute().actionGet(); assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0)); assertThat(search.getSuggest(), notNullValue()); assertThat(search.getSuggest().size(), equalTo(1)); assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase")); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1)); - assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(2)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1)); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xorr the God-Jewel")); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel")); @@ -648,7 +648,7 @@ public class SuggestSearchTests extends AbstractNodesTests { .addSuggestion(phraseSuggestion("simple_phrase"). realWordErrorLikelihood(0.95f).field("bigram").gramSize(2).analyzer("body") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always")) - .smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(1.0)) + .smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)) .maxErrors(0.5f) .size(1)) .execute().actionGet();