From 1f217f6a7b6a37d777e2801d2d7cedc1d32292fa Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 6 Mar 2013 14:31:21 +0100 Subject: [PATCH] Move smoothing model into its own sub-object in the PhraseSuggest request Closes #2735 --- .../suggest/phrase/PhraseSuggestParser.java | 191 +++++++++--------- .../phrase/PhraseSuggestionBuilder.java | 18 +- 2 files changed, 109 insertions(+), 100 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java index 10373bed595..8be4326bbbc 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java @@ -89,8 +89,9 @@ public final class PhraseSuggestParser implements SuggestContextParser { gramSizeSet = true; } else if ("force_unigrams".equals(fieldName)) { suggestion.setRequireUnigram(parser.booleanValue()); + } else { + throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support field [" + fieldName + "]"); } - } } else if (token == Token.START_ARRAY) { if ("direct_generator".equals(fieldName)) { @@ -111,97 +112,8 @@ public final class PhraseSuggestParser implements SuggestContextParser { } else { throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support array field [" + fieldName + "]"); } - } else if (token == Token.START_OBJECT) { - if ("linear".equals(fieldName)) { - ensureNoSmoothing(suggestion); - final double[] lambdas = new double[3]; - while ((token = parser.nextToken()) != Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - fieldName = parser.currentName(); - } - if (token.isValue()) { - if ("trigram_lambda".equals(fieldName)) { - lambdas[0] = parser.doubleValue(); - if (lambdas[0] < 0) { - throw new ElasticSearchIllegalArgumentException("trigram_lambda must be positive"); - } - } - if ("bigram_lambda".equals(fieldName)) { - lambdas[1] = parser.doubleValue(); - if (lambdas[1] < 0) { - throw new ElasticSearchIllegalArgumentException("bigram_lambda must be positive"); - } - } - if ("unigram_lambda".equals(fieldName)) { - lambdas[2] = parser.doubleValue(); - if (lambdas[2] < 0) { - throw new ElasticSearchIllegalArgumentException("unigram_lambda must be positive"); - } - } - } - } - double sum = 0.0d; - for (int i = 0; i < lambdas.length; i++) { - sum += lambdas[i]; - } - if (Math.abs(sum - 1.0) > 0.001) { - throw new ElasticSearchIllegalArgumentException("linear smoothing lambdas must sum to 1"); - } - suggestion.setModel(new WordScorer.WordScorerFactory() { - @Override - public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) - throws IOException { - return new LinearInterpoatingScorer(reader, field, realWordLikelyhood, separator, lambdas[0], lambdas[1], - lambdas[2]); - } - }); - } else if ("laplace".equals(fieldName)) { - ensureNoSmoothing(suggestion); - double theAlpha = 0.5; - - while ((token = parser.nextToken()) != Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - fieldName = parser.currentName(); - } - if (token.isValue()) { - if ("alpha".equals(fieldName)) { - theAlpha = parser.doubleValue(); - } - } - } - final double alpha = theAlpha; - suggestion.setModel( new WordScorer.WordScorerFactory() { - @Override - public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException { - return new LaplaceScorer(reader, field, realWordLikelyhood, separator, alpha); - } - }); - - } else if ("stupid_backoff".equals(fieldName)) { - ensureNoSmoothing(suggestion); - double theDiscount = 0.4; - while ((token = parser.nextToken()) != Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - fieldName = parser.currentName(); - } - if (token.isValue()) { - if ("discount".equals(fieldName)) { - theDiscount = parser.doubleValue(); - } - } - } - final double discount = theDiscount; - suggestion.setModel( new WordScorer.WordScorerFactory() { - @Override - public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException { - return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, discount); - } - }); - - } else { - throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support object field [" + fieldName + "]"); - } - + } else if (token == Token.START_OBJECT && "smoothing".equals(fieldName)) { + parseSmoothingModel(parser, suggestion, fieldName); } else { throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support field [" + fieldName + "]"); } @@ -242,6 +154,101 @@ public final class PhraseSuggestParser implements SuggestContextParser { return suggestion; } + public void parseSmoothingModel(XContentParser parser, PhraseSuggestionContext suggestion, String fieldName) throws IOException { + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + break; + } + } + if ("linear".equals(fieldName)) { + ensureNoSmoothing(suggestion); + final double[] lambdas = new double[3]; + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } + if (token.isValue()) { + if ("trigram_lambda".equals(fieldName)) { + lambdas[0] = parser.doubleValue(); + if (lambdas[0] < 0) { + throw new ElasticSearchIllegalArgumentException("trigram_lambda must be positive"); + } + } else if ("bigram_lambda".equals(fieldName)) { + lambdas[1] = parser.doubleValue(); + if (lambdas[1] < 0) { + throw new ElasticSearchIllegalArgumentException("bigram_lambda must be positive"); + } + } else if ("unigram_lambda".equals(fieldName)) { + lambdas[2] = parser.doubleValue(); + if (lambdas[2] < 0) { + throw new ElasticSearchIllegalArgumentException("unigram_lambda must be positive"); + } + } else { + throw new ElasticSearchIllegalArgumentException("suggester[phrase][smoothing][linear] doesn't support field [" + fieldName + "]"); + } + } + } + double sum = 0.0d; + for (int i = 0; i < lambdas.length; i++) { + sum += lambdas[i]; + } + if (Math.abs(sum - 1.0) > 0.001) { + throw new ElasticSearchIllegalArgumentException("linear smoothing lambdas must sum to 1"); + } + suggestion.setModel(new WordScorer.WordScorerFactory() { + @Override + public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) + throws IOException { + return new LinearInterpoatingScorer(reader, field, realWordLikelyhood, separator, lambdas[0], lambdas[1], + lambdas[2]); + } + }); + } else if ("laplace".equals(fieldName)) { + ensureNoSmoothing(suggestion); + double theAlpha = 0.5; + + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } + if (token.isValue() && "alpha".equals(fieldName)) { + theAlpha = parser.doubleValue(); + } + } + final double alpha = theAlpha; + suggestion.setModel( new WordScorer.WordScorerFactory() { + @Override + public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException { + return new LaplaceScorer(reader, field, realWordLikelyhood, separator, alpha); + } + }); + + } else if ("stupid_backoff".equals(fieldName)) { + ensureNoSmoothing(suggestion); + double theDiscount = 0.4; + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } + if (token.isValue() && "discount".equals(fieldName)) { + theDiscount = parser.doubleValue(); + } + } + final double discount = theDiscount; + suggestion.setModel( new WordScorer.WordScorerFactory() { + @Override + public WordScorer newScorer(IndexReader reader, String field, double realWordLikelyhood, BytesRef separator) throws IOException { + return new StupidBackoffScorer(reader, field, realWordLikelyhood, separator, discount); + } + }); + + } else { + throw new ElasticSearchIllegalArgumentException("suggester[phrase] doesn't support object field [" + fieldName + "]"); + } + } + private void ensureNoSmoothing(PhraseSuggestionContext suggestion) { if (suggestion.model() != null) { throw new ElasticSearchIllegalArgumentException("only one smoothing model supported"); diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java index 50445995b58..e3497f090b5 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java @@ -172,7 +172,7 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder