From 2795f4e55d0cbda397ef29887d3c9dee35c467b4 Mon Sep 17 00:00:00 2001 From: markharwood Date: Thu, 2 Jan 2014 16:11:20 +0000 Subject: [PATCH] =?UTF-8?q?Standardized=20use=20of=20=E2=80=9C*=5Flength?= =?UTF-8?q?=E2=80=9D=20for=20parameter=20names=20rather=20than=20=E2=80=9C?= =?UTF-8?q?*=5Flen=E2=80=9D.=20Java=20Builder=20apis=20drop=20old=20?= =?UTF-8?q?=E2=80=9Clen=E2=80=9D=20methods=20in=20favour=20of=20new=20?= =?UTF-8?q?=E2=80=9Clength=E2=80=9D=20Rest=20APIs=20support=20both=20old?= =?UTF-8?q?=20=E2=80=9Clen:=20and=20new=20=E2=80=9Clength=E2=80=9D=20forms?= =?UTF-8?q?=20using=20new=20ParseField=20class=20to=20a)=20provide=20compi?= =?UTF-8?q?ler-checked=20consistency=20between=20Builder=20and=20Parser=20?= =?UTF-8?q?classes=20and=20b)=20a=20common=20means=20of=20handling=20depre?= =?UTF-8?q?cated=20syntax=20in=20the=20DSL.=20Documentation=20and=20rest?= =?UTF-8?q?=20specs=20only=20document=20the=20new=20=E2=80=9C*length?= =?UTF-8?q?=E2=80=9D=20forms=20Closes=20#4083?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../queries/mlt-field-query.asciidoc | 8 +-- .../query-dsl/queries/mlt-query.asciidoc | 8 +-- .../suggesters/completion-suggest.asciidoc | 4 +- .../search/suggesters/phrase-suggest.asciidoc | 14 ++--- .../search/suggesters/term-suggest.asciidoc | 8 +-- rest-api-spec/api/mlt.json | 4 +- .../action/mlt/MoreLikeThisRequest.java | 28 +++++----- .../mlt/MoreLikeThisRequestBuilder.java | 4 +- .../mlt/TransportMoreLikeThisAction.java | 4 +- .../org/elasticsearch/common/ParseField.java | 10 +++- .../mapper/core/CompletionFieldMapper.java | 33 ++++++------ .../query/MoreLikeThisFieldQueryBuilder.java | 39 +++++++------- .../query/MoreLikeThisFieldQueryParser.java | 24 +++++---- .../index/query/MoreLikeThisQueryBuilder.java | 41 +++++++------- .../index/query/MoreLikeThisQueryParser.java | 40 +++++++++----- .../action/mlt/RestMoreLikeThisAction.java | 7 ++- .../search/suggest/SuggestUtils.java | 54 +++++++++++-------- .../phrase/PhraseSuggestionBuilder.java | 4 +- .../suggest/term/TermSuggestionBuilder.java | 8 +-- .../CompletionFieldMapperTests.java | 5 +- .../suggest/CompletionSuggestSearchTests.java | 6 +-- 21 files changed, 196 insertions(+), 157 deletions(-) diff --git a/docs/reference/query-dsl/queries/mlt-field-query.asciidoc b/docs/reference/query-dsl/queries/mlt-field-query.asciidoc index 58d4070b4d3..e4f28bbda5b 100644 --- a/docs/reference/query-dsl/queries/mlt-field-query.asciidoc +++ b/docs/reference/query-dsl/queries/mlt-field-query.asciidoc @@ -51,11 +51,11 @@ not occur in at least this many docs. Defaults to `5`. Words that appear in more than this many docs will be ignored. Defaults to unbounded. -|`min_word_len` |The minimum word length below which words will be -ignored. Defaults to `0`. +|`min_word_length` |The minimum word length below which words will be +ignored. Defaults to `0`. (Old name "min_word_len" is deprecated) -|`max_word_len` |The maximum word length above which words will be -ignored. Defaults to unbounded (`0`). +|`max_word_length` |The maximum word length above which words will be +ignored. Defaults to unbounded (`0`). (Old name "max_word_len" is deprecated) |`boost_terms` |Sets the boost factor to use when boosting terms. Defaults to `1`. diff --git a/docs/reference/query-dsl/queries/mlt-query.asciidoc b/docs/reference/query-dsl/queries/mlt-query.asciidoc index 979854632e8..bea704a620d 100644 --- a/docs/reference/query-dsl/queries/mlt-query.asciidoc +++ b/docs/reference/query-dsl/queries/mlt-query.asciidoc @@ -50,11 +50,11 @@ not occur in at least this many docs. Defaults to `5`. Words that appear in more than this many docs will be ignored. Defaults to unbounded. -|`min_word_len` |The minimum word length below which words will be -ignored. Defaults to `0`. +|`min_word_length` |The minimum word length below which words will be +ignored. Defaults to `0`.(Old name "min_word_len" is deprecated) -|`max_word_len` |The maximum word length above which words will be -ignored. Defaults to unbounded (`0`). +|`max_word_length` |The maximum word length above which words will be +ignored. Defaults to unbounded (`0`). (Old name "max_word_len" is deprecated) |`boost_terms` |Sets the boost factor to use when boosting terms. Defaults to `1`. diff --git a/docs/reference/search/suggesters/completion-suggest.asciidoc b/docs/reference/search/suggesters/completion-suggest.asciidoc index a1c79399396..f4cd755e8f4 100644 --- a/docs/reference/search/suggesters/completion-suggest.asciidoc +++ b/docs/reference/search/suggesters/completion-suggest.asciidoc @@ -79,13 +79,13 @@ Mapping supports the following parameters: `The Beatles`, no need to change a simple analyzer, if you are able to enrich your data. -`max_input_len`:: +`max_input_length`:: Limits the length of a single input, defaults to `50` UTF-16 code points. This limit is only used at index time to reduce the total number of characters per input string in order to prevent massive inputs from bloating the underlying datastructure. The most usecases won't be influenced by the default value since prefix completions hardly grow beyond prefixes longer - than a handful of characters. + than a handful of characters. (Old name "max_input_len" is deprecated) [[indexing]] ==== Indexing diff --git a/docs/reference/search/suggesters/phrase-suggest.asciidoc b/docs/reference/search/suggesters/phrase-suggest.asciidoc index fb75fb76ad0..8b763fb9578 100644 --- a/docs/reference/search/suggesters/phrase-suggest.asciidoc +++ b/docs/reference/search/suggesters/phrase-suggest.asciidoc @@ -36,7 +36,7 @@ curl -XPOST 'localhost:9200/_search' -d { "direct_generator" : [ { "field" : "body", "suggest_mode" : "always", - "min_word_len" : 1 + "min_word_length" : 1 } ], "highlight": { "pre_tag": "", @@ -229,15 +229,15 @@ The direct generators support the following parameters: and 2. Any other value result in an bad request error being thrown. Defaults to 2. -`prefix_len`:: +`prefix_length`:: The number of minimal prefix characters that must match in order be a candidate suggestions. Defaults to 1. Increasing this number improves spellcheck performance. Usually misspellings don't - occur in the beginning of terms. + occur in the beginning of terms. (Old name "prefix_len" is deprecated) -`min_word_len`:: +`min_word_length`:: The minimum length a suggest text term must have in - order to be included. Defaults to 4. + order to be included. Defaults to 4. (Old name "min_word_len" is deprecated) `max_inspections`:: A factor that is used to multiply with the @@ -298,11 +298,11 @@ curl -s -XPOST 'localhost:9200/_search' -d { "direct_generator" : [ { "field" : "body", "suggest_mode" : "always", - "min_word_len" : 1 + "min_word_length" : 1 }, { "field" : "reverse", "suggest_mode" : "always", - "min_word_len" : 1, + "min_word_length" : 1, "pre_filter" : "reverse", "post_filter" : "reverse" } ] diff --git a/docs/reference/search/suggesters/term-suggest.asciidoc b/docs/reference/search/suggesters/term-suggest.asciidoc index e958e14a844..f6331c1ffc9 100644 --- a/docs/reference/search/suggesters/term-suggest.asciidoc +++ b/docs/reference/search/suggesters/term-suggest.asciidoc @@ -62,15 +62,15 @@ doesn't take the query into account that is part of request. between 1 and 2. Any other value result in an bad request error being thrown. Defaults to 2. -`prefix_len`:: +`prefix_length`:: The number of minimal prefix characters that must match in order be a candidate suggestions. Defaults to 1. Increasing this number improves spellcheck performance. Usually misspellings don't - occur in the beginning of terms. + occur in the beginning of terms. (Old name "prefix_len" is deprecated) -`min_word_len`:: +`min_word_length`:: The minimum length a suggest text term must have in - order to be included. Defaults to 4. + order to be included. Defaults to 4. (Old name "min_word_len" is deprecated) `shard_size`:: Sets the maximum number of suggestions to be retrieved diff --git a/rest-api-spec/api/mlt.json b/rest-api-spec/api/mlt.json index 6c7d3ee5a35..3c4dfe033d6 100644 --- a/rest-api-spec/api/mlt.json +++ b/rest-api-spec/api/mlt.json @@ -35,7 +35,7 @@ "type" : "number", "description" : "The maximum query terms to be included in the generated query" }, - "max_word_len": { + "max_word_length": { "type" : "number", "description" : "The minimum length of the word: longer words will be ignored" }, @@ -47,7 +47,7 @@ "type" : "number", "description" : "The term frequency as percent: terms with lower occurence in the source document will be ignored" }, - "min_word_len": { + "min_word_length": { "type" : "number", "description" : "The minimum length of the word: shorter words will be ignored" }, diff --git a/src/main/java/org/elasticsearch/action/mlt/MoreLikeThisRequest.java b/src/main/java/org/elasticsearch/action/mlt/MoreLikeThisRequest.java index 480153b2225..bbd109bbd88 100644 --- a/src/main/java/org/elasticsearch/action/mlt/MoreLikeThisRequest.java +++ b/src/main/java/org/elasticsearch/action/mlt/MoreLikeThisRequest.java @@ -73,8 +73,8 @@ public class MoreLikeThisRequest extends ActionRequest { private String[] stopWords = null; private int minDocFreq = -1; private int maxDocFreq = -1; - private int minWordLen = -1; - private int maxWordLen = -1; + private int minWordLength = -1; + private int maxWordLength = -1; private float boostTerms = -1; private SearchType searchType = SearchType.DEFAULT; @@ -275,31 +275,31 @@ public class MoreLikeThisRequest extends ActionRequest { /** * The minimum word length below which words will be ignored. Defaults to 0. */ - public MoreLikeThisRequest minWordLen(int minWordLen) { - this.minWordLen = minWordLen; + public MoreLikeThisRequest minWordLength(int minWordLength) { + this.minWordLength = minWordLength; return this; } /** * The minimum word length below which words will be ignored. Defaults to 0. */ - public int minWordLen() { - return this.minWordLen; + public int minWordLength() { + return this.minWordLength; } /** * The maximum word length above which words will be ignored. Defaults to unbounded. */ - public MoreLikeThisRequest maxWordLen(int maxWordLen) { - this.maxWordLen = maxWordLen; + public MoreLikeThisRequest maxWordLength(int maxWordLength) { + this.maxWordLength = maxWordLength; return this; } /** * The maximum word length above which words will be ignored. Defaults to unbounded. */ - public int maxWordLen() { - return this.maxWordLen; + public int maxWordLength() { + return this.maxWordLength; } /** @@ -554,8 +554,8 @@ public class MoreLikeThisRequest extends ActionRequest { } minDocFreq = in.readVInt(); maxDocFreq = in.readVInt(); - minWordLen = in.readVInt(); - maxWordLen = in.readVInt(); + minWordLength = in.readVInt(); + maxWordLength = in.readVInt(); boostTerms = in.readFloat(); searchType = SearchType.fromId(in.readByte()); if (in.readBoolean()) { @@ -625,8 +625,8 @@ public class MoreLikeThisRequest extends ActionRequest { } out.writeVInt(minDocFreq); out.writeVInt(maxDocFreq); - out.writeVInt(minWordLen); - out.writeVInt(maxWordLen); + out.writeVInt(minWordLength); + out.writeVInt(maxWordLength); out.writeFloat(boostTerms); out.writeByte(searchType.id()); diff --git a/src/main/java/org/elasticsearch/action/mlt/MoreLikeThisRequestBuilder.java b/src/main/java/org/elasticsearch/action/mlt/MoreLikeThisRequestBuilder.java index 5c719324a69..cbeeb386d97 100644 --- a/src/main/java/org/elasticsearch/action/mlt/MoreLikeThisRequestBuilder.java +++ b/src/main/java/org/elasticsearch/action/mlt/MoreLikeThisRequestBuilder.java @@ -120,7 +120,7 @@ public class MoreLikeThisRequestBuilder extends ActionRequestBuilder0. */ public MoreLikeThisRequestBuilder setMinWordLen(int minWordLen) { - request.minWordLen(minWordLen); + request.minWordLength(minWordLen); return this; } @@ -128,7 +128,7 @@ public class MoreLikeThisRequestBuilder extends ActionRequestBuilder flags) { if (currentFieldName.equals(camelCaseName) || currentFieldName.equals(underscoreName)) { return true; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java index 3ad5b1e7d78..41630ec1f31 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java @@ -27,6 +27,7 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; @@ -73,13 +74,13 @@ public class CompletionFieldMapper extends AbstractFieldMapper { public static class Fields { // Mapping field names public static final String ANALYZER = "analyzer"; - public static final String INDEX_ANALYZER = "index_analyzer"; - public static final String SEARCH_ANALYZER = "search_analyzer"; - public static final String PRESERVE_SEPARATORS = "preserve_separators"; - public static final String PRESERVE_POSITION_INCREMENTS = "preserve_position_increments"; + public static final ParseField INDEX_ANALYZER = new ParseField("index_analyzer"); + public static final ParseField SEARCH_ANALYZER = new ParseField("search_analyzer"); + public static final ParseField PRESERVE_SEPARATORS = new ParseField("preserve_separators"); + public static final ParseField PRESERVE_POSITION_INCREMENTS = new ParseField("preserve_position_increments"); public static final String PAYLOADS = "payloads"; public static final String TYPE = "type"; - public static final String MAX_INPUT_LENGTH = "max_input_len"; + public static final ParseField MAX_INPUT_LENGTH = new ParseField("max_input_length", "max_input_len"); // Content field names public static final String CONTENT_FIELD_NAME_INPUT = "input"; public static final String CONTENT_FIELD_NAME_OUTPUT = "output"; @@ -119,7 +120,7 @@ public class CompletionFieldMapper extends AbstractFieldMapper { public Builder maxInputLength(int maxInputLength) { if (maxInputLength <= 0) { - throw new ElasticsearchIllegalArgumentException(Fields.MAX_INPUT_LENGTH + " must be > 0 but was [" + maxInputLength + "]"); + throw new ElasticsearchIllegalArgumentException(Fields.MAX_INPUT_LENGTH.getPreferredName() + " must be > 0 but was [" + maxInputLength + "]"); } this.maxInputLength = maxInputLength; return this; @@ -147,17 +148,17 @@ public class CompletionFieldMapper extends AbstractFieldMapper { NamedAnalyzer analyzer = getNamedAnalyzer(parserContext, fieldNode.toString()); builder.indexAnalyzer(analyzer); builder.searchAnalyzer(analyzer); - } else if (fieldName.equals(Fields.INDEX_ANALYZER) || fieldName.equals("indexAnalyzer")) { + } else if (Fields.INDEX_ANALYZER.match(fieldName)) { builder.indexAnalyzer(getNamedAnalyzer(parserContext, fieldNode.toString())); - } else if (fieldName.equals(Fields.SEARCH_ANALYZER) || fieldName.equals("searchAnalyzer")) { + } else if (Fields.SEARCH_ANALYZER.match(fieldName)) { builder.searchAnalyzer(getNamedAnalyzer(parserContext, fieldNode.toString())); } else if (fieldName.equals(Fields.PAYLOADS)) { builder.payloads(Boolean.parseBoolean(fieldNode.toString())); - } else if (fieldName.equals(Fields.PRESERVE_SEPARATORS) || fieldName.equals("preserveSeparators")) { + } else if (Fields.PRESERVE_SEPARATORS.match(fieldName)) { builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString())); - } else if (fieldName.equals(Fields.PRESERVE_POSITION_INCREMENTS) || fieldName.equals("preservePositionIncrements")) { + } else if (Fields.PRESERVE_POSITION_INCREMENTS.match(fieldName)) { builder.preservePositionIncrements(Boolean.parseBoolean(fieldNode.toString())); - } else if (fieldName.equals(Fields.MAX_INPUT_LENGTH) || fieldName.equals("maxInputLen")) { + } else if (Fields.MAX_INPUT_LENGTH.match(fieldName)) { builder.maxInputLength(Integer.parseInt(fieldNode.toString())); } else { throw new MapperParsingException("Unknown field [" + fieldName + "]"); @@ -347,13 +348,13 @@ public class CompletionFieldMapper extends AbstractFieldMapper { if (indexAnalyzer.name().equals(searchAnalyzer.name())) { builder.field(Fields.ANALYZER, indexAnalyzer.name()); } else { - builder.field(Fields.INDEX_ANALYZER, indexAnalyzer.name()) - .field(Fields.SEARCH_ANALYZER, searchAnalyzer.name()); + builder.field(Fields.INDEX_ANALYZER.getPreferredName(), indexAnalyzer.name()) + .field(Fields.SEARCH_ANALYZER.getPreferredName(), searchAnalyzer.name()); } builder.field(Fields.PAYLOADS, this.payloads); - builder.field(Fields.PRESERVE_SEPARATORS, this.preserveSeparators); - builder.field(Fields.PRESERVE_POSITION_INCREMENTS, this.preservePositionIncrements); - builder.field(Fields.MAX_INPUT_LENGTH, this.maxInputLength); + builder.field(Fields.PRESERVE_SEPARATORS.getPreferredName(), this.preserveSeparators); + builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), this.preservePositionIncrements); + builder.field(Fields.MAX_INPUT_LENGTH.getPreferredName(), this.maxInputLength); return builder.endObject(); } diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisFieldQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisFieldQueryBuilder.java index d9b2c21527e..72bed1698ae 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisFieldQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisFieldQueryBuilder.java @@ -38,8 +38,8 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B private String[] stopWords = null; private int minDocFreq = -1; private int maxDocFreq = -1; - private int minWordLen = -1; - private int maxWordLen = -1; + private int minWordLength = -1; + private int maxWordLength = -1; private float boostTerms = -1; private float boost = -1; private String analyzer; @@ -123,8 +123,8 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B * Sets the minimum word length below which words will be ignored. Defaults * to 0. */ - public MoreLikeThisFieldQueryBuilder minWordLen(int minWordLen) { - this.minWordLen = minWordLen; + public MoreLikeThisFieldQueryBuilder minWordLength(int minWordLength) { + this.minWordLength = minWordLength; return this; } @@ -133,7 +133,7 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B * unbounded (0). */ public MoreLikeThisFieldQueryBuilder maxWordLen(int maxWordLen) { - this.maxWordLen = maxWordLen; + this.maxWordLength = maxWordLen; return this; } @@ -179,39 +179,40 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B builder.startObject(MoreLikeThisFieldQueryParser.NAME); builder.startObject(name); if (likeText == null) { - throw new ElasticsearchIllegalArgumentException("moreLikeThisField requires 'like_text' to be provided"); + throw new ElasticsearchIllegalArgumentException("moreLikeThisField requires '"+ + MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' to be provided"); } - builder.field("like_text", likeText); + builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText); if (percentTermsToMatch != -1) { - builder.field("percent_terms_to_match", percentTermsToMatch); + builder.field(MoreLikeThisQueryParser.Fields.PERCENT_TERMS_TO_MATCH.getPreferredName(), percentTermsToMatch); } if (minTermFreq != -1) { - builder.field("min_term_freq", minTermFreq); + builder.field(MoreLikeThisQueryParser.Fields.MIN_TERM_FREQ.getPreferredName(), minTermFreq); } if (maxQueryTerms != -1) { - builder.field("max_query_terms", maxQueryTerms); + builder.field(MoreLikeThisQueryParser.Fields.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms); } if (stopWords != null && stopWords.length > 0) { - builder.startArray("stop_words"); + builder.startArray(MoreLikeThisQueryParser.Fields.STOP_WORDS.getPreferredName()); for (String stopWord : stopWords) { builder.value(stopWord); } builder.endArray(); } if (minDocFreq != -1) { - builder.field("min_doc_freq", minDocFreq); + builder.field(MoreLikeThisQueryParser.Fields.MIN_DOC_FREQ.getPreferredName(), minDocFreq); } if (maxDocFreq != -1) { - builder.field("max_doc_freq", maxDocFreq); + builder.field(MoreLikeThisQueryParser.Fields.MAX_DOC_FREQ.getPreferredName(), maxDocFreq); } - if (minWordLen != -1) { - builder.field("min_word_len", minWordLen); + if (minWordLength != -1) { + builder.field(MoreLikeThisQueryParser.Fields.MIN_WORD_LENGTH.getPreferredName(), minWordLength); } - if (maxWordLen != -1) { - builder.field("max_word_len", maxWordLen); + if (maxWordLength != -1) { + builder.field(MoreLikeThisQueryParser.Fields.MAX_WORD_LENGTH.getPreferredName(), maxWordLength); } if (boostTerms != -1) { - builder.field("boost_terms", boostTerms); + builder.field(MoreLikeThisQueryParser.Fields.BOOST_TERMS.getPreferredName(), boostTerms); } if (boost != -1) { builder.field("boost", boost); @@ -220,7 +221,7 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B builder.field("analyzer", analyzer); } if (failOnUnsupportedField != null) { - builder.field("fail_on_unsupported_field", failOnUnsupportedField); + builder.field(MoreLikeThisQueryParser.Fields.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField); } if (queryName != null) { builder.field("_name", queryName); diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisFieldQueryParser.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisFieldQueryParser.java index f2152051a51..7e465a590b4 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisFieldQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisFieldQueryParser.java @@ -51,6 +51,7 @@ public class MoreLikeThisFieldQueryParser implements QueryParser { return new String[]{NAME, "more_like_this_field", Strings.toCamelCase(NAME), "moreLikeThisField"}; } + @Override public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { XContentParser parser = parseContext.parser(); @@ -75,30 +76,30 @@ public class MoreLikeThisFieldQueryParser implements QueryParser { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token.isValue()) { - if ("like_text".equals(currentFieldName)) { + if (MoreLikeThisQueryParser.Fields.LIKE_TEXT.match(currentFieldName,parseContext.parseFlags()) ) { mltQuery.setLikeText(parser.text()); - } else if ("min_term_freq".equals(currentFieldName) || "minTermFreq".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.MIN_TERM_FREQ.match(currentFieldName,parseContext.parseFlags()) ) { mltQuery.setMinTermFrequency(parser.intValue()); - } else if ("max_query_terms".equals(currentFieldName) || "maxQueryTerms".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.MAX_QUERY_TERMS.match(currentFieldName,parseContext.parseFlags())) { mltQuery.setMaxQueryTerms(parser.intValue()); - } else if ("min_doc_freq".equals(currentFieldName) || "minDocFreq".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.MIN_DOC_FREQ.match(currentFieldName,parseContext.parseFlags())) { mltQuery.setMinDocFreq(parser.intValue()); - } else if ("max_doc_freq".equals(currentFieldName) || "maxDocFreq".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.MAX_DOC_FREQ.match(currentFieldName,parseContext.parseFlags())) { mltQuery.setMaxDocFreq(parser.intValue()); - } else if ("min_word_len".equals(currentFieldName) || "minWordLen".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.MIN_WORD_LENGTH.match(currentFieldName,parseContext.parseFlags())) { mltQuery.setMinWordLen(parser.intValue()); - } else if ("max_word_len".equals(currentFieldName) || "maxWordLen".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.MAX_WORD_LENGTH.match(currentFieldName,parseContext.parseFlags())) { mltQuery.setMaxWordLen(parser.intValue()); - } else if ("boost_terms".equals(currentFieldName) || "boostTerms".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.BOOST_TERMS.match(currentFieldName,parseContext.parseFlags())) { mltQuery.setBoostTerms(true); mltQuery.setBoostTermsFactor(parser.floatValue()); - } else if ("percent_terms_to_match".equals(currentFieldName) || "percentTermsToMatch".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.PERCENT_TERMS_TO_MATCH.match(currentFieldName,parseContext.parseFlags())) { mltQuery.setPercentTermsToMatch(parser.floatValue()); } else if ("analyzer".equals(currentFieldName)) { analyzer = parseContext.analysisService().analyzer(parser.text()); } else if ("boost".equals(currentFieldName)) { mltQuery.setBoost(parser.floatValue()); - } else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) { + } else if (MoreLikeThisQueryParser.Fields.FAIL_ON_UNSUPPORTED_FIELD.match(currentFieldName,parseContext.parseFlags())) { failOnUnsupportedField = parser.booleanValue(); } else if ("_name".equals(currentFieldName)) { queryName = parser.text(); @@ -106,7 +107,8 @@ public class MoreLikeThisFieldQueryParser implements QueryParser { throw new QueryParsingException(parseContext.index(), "[mlt_field] query does not support [" + currentFieldName + "]"); } } else if (token == XContentParser.Token.START_ARRAY) { - if ("stop_words".equals(currentFieldName) || "stopWords".equals(currentFieldName)) { + if (MoreLikeThisQueryParser.Fields.STOP_WORDS.match(currentFieldName,parseContext.parseFlags())) { + Set stopWords = Sets.newHashSet(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { stopWords.add(parser.text()); diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java index f06cfb22289..dc205cffef0 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java @@ -39,8 +39,8 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta private String[] stopWords = null; private int minDocFreq = -1; private int maxDocFreq = -1; - private int minWordLen = -1; - private int maxWordLen = -1; + private int minWordLength = -1; + private int maxWordLength = -1; private float boostTerms = -1; private float boost = -1; private String analyzer; @@ -131,8 +131,8 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta * Sets the minimum word length below which words will be ignored. Defaults * to 0. */ - public MoreLikeThisQueryBuilder minWordLen(int minWordLen) { - this.minWordLen = minWordLen; + public MoreLikeThisQueryBuilder minWordLength(int minWordLength) { + this.minWordLength = minWordLength; return this; } @@ -140,8 +140,8 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta * Sets the maximum word length above which words will be ignored. Defaults to * unbounded (0). */ - public MoreLikeThisQueryBuilder maxWordLen(int maxWordLen) { - this.maxWordLen = maxWordLen; + public MoreLikeThisQueryBuilder maxWordLength(int maxWordLength) { + this.maxWordLength = maxWordLength; return this; } @@ -193,39 +193,40 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta builder.endArray(); } if (likeText == null) { - throw new ElasticsearchIllegalArgumentException("moreLikeThis requires 'likeText' to be provided"); + throw new ElasticsearchIllegalArgumentException("moreLikeThis requires '"+ + MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' to be provided"); } - builder.field("like_text", likeText); + builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText); if (percentTermsToMatch != -1) { - builder.field("percent_terms_to_match", percentTermsToMatch); + builder.field(MoreLikeThisQueryParser.Fields.PERCENT_TERMS_TO_MATCH.getPreferredName(), percentTermsToMatch); } if (minTermFreq != -1) { - builder.field("min_term_freq", minTermFreq); + builder.field(MoreLikeThisQueryParser.Fields.MIN_TERM_FREQ.getPreferredName(), minTermFreq); } if (maxQueryTerms != -1) { - builder.field("max_query_terms", maxQueryTerms); + builder.field(MoreLikeThisQueryParser.Fields.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms); } if (stopWords != null && stopWords.length > 0) { - builder.startArray("stop_words"); + builder.startArray(MoreLikeThisQueryParser.Fields.STOP_WORDS.getPreferredName()); for (String stopWord : stopWords) { builder.value(stopWord); } builder.endArray(); } if (minDocFreq != -1) { - builder.field("min_doc_freq", minDocFreq); + builder.field(MoreLikeThisQueryParser.Fields.MIN_DOC_FREQ.getPreferredName(), minDocFreq); } if (maxDocFreq != -1) { - builder.field("max_doc_freq", maxDocFreq); + builder.field(MoreLikeThisQueryParser.Fields.MAX_DOC_FREQ.getPreferredName(), maxDocFreq); } - if (minWordLen != -1) { - builder.field("min_word_len", minWordLen); + if (minWordLength != -1) { + builder.field(MoreLikeThisQueryParser.Fields.MIN_WORD_LENGTH.getPreferredName(), minWordLength); } - if (maxWordLen != -1) { - builder.field("max_word_len", maxWordLen); + if (maxWordLength != -1) { + builder.field(MoreLikeThisQueryParser.Fields.MAX_WORD_LENGTH.getPreferredName(), maxWordLength); } if (boostTerms != -1) { - builder.field("boost_terms", boostTerms); + builder.field(MoreLikeThisQueryParser.Fields.BOOST_TERMS.getPreferredName(), boostTerms); } if (boost != -1) { builder.field("boost", boost); @@ -234,7 +235,7 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta builder.field("analyzer", analyzer); } if (failOnUnsupportedField != null) { - builder.field("fail_on_unsupported_field", failOnUnsupportedField); + builder.field(MoreLikeThisQueryParser.Fields.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField); } if (queryName != null) { builder.field("_name", queryName); diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java index c2948907ecd..207652926cc 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java @@ -24,6 +24,7 @@ import com.google.common.collect.Sets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.MoreLikeThisQuery; @@ -41,6 +42,21 @@ import java.util.Set; public class MoreLikeThisQueryParser implements QueryParser { public static final String NAME = "mlt"; + + + public static class Fields { + public static final ParseField LIKE_TEXT = new ParseField("like_text"); + public static final ParseField MIN_TERM_FREQ = new ParseField("min_term_freq"); + public static final ParseField MAX_QUERY_TERMS = new ParseField("max_query_terms"); + public static final ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len"); + public static final ParseField MAX_WORD_LENGTH = new ParseField("max_word_length", "max_word_len"); + public static final ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq"); + public static final ParseField MAX_DOC_FREQ = new ParseField("max_doc_freq"); + public static final ParseField BOOST_TERMS = new ParseField("boost_terms"); + public static final ParseField PERCENT_TERMS_TO_MATCH = new ParseField("percent_terms_to_match"); + public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field"); + public static final ParseField STOP_WORDS = new ParseField("stop_words"); + } @Inject public MoreLikeThisQueryParser() { @@ -68,38 +84,38 @@ public class MoreLikeThisQueryParser implements QueryParser { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token.isValue()) { - if ("like_text".equals(currentFieldName) || "likeText".equals(currentFieldName)) { + if (Fields.LIKE_TEXT.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setLikeText(parser.text()); - } else if ("min_term_freq".equals(currentFieldName) || "minTermFreq".equals(currentFieldName)) { + } else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setMinTermFrequency(parser.intValue()); - } else if ("max_query_terms".equals(currentFieldName) || "maxQueryTerms".equals(currentFieldName)) { + } else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setMaxQueryTerms(parser.intValue()); - } else if ("min_doc_freq".equals(currentFieldName) || "minDocFreq".equals(currentFieldName)) { + } else if (Fields.MIN_DOC_FREQ.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setMinDocFreq(parser.intValue()); - } else if ("max_doc_freq".equals(currentFieldName) || "maxDocFreq".equals(currentFieldName)) { + } else if (Fields.MAX_DOC_FREQ.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setMaxDocFreq(parser.intValue()); - } else if ("min_word_len".equals(currentFieldName) || "minWordLen".equals(currentFieldName)) { + } else if (Fields.MIN_WORD_LENGTH.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setMinWordLen(parser.intValue()); - } else if ("max_word_len".equals(currentFieldName) || "maxWordLen".equals(currentFieldName)) { + } else if (Fields.MAX_WORD_LENGTH.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setMaxWordLen(parser.intValue()); - } else if ("boost_terms".equals(currentFieldName) || "boostTerms".equals(currentFieldName)) { + } else if (Fields.BOOST_TERMS.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setBoostTerms(true); mltQuery.setBoostTermsFactor(parser.floatValue()); - } else if ("percent_terms_to_match".equals(currentFieldName) || "percentTermsToMatch".equals(currentFieldName)) { + } else if (Fields.PERCENT_TERMS_TO_MATCH.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setPercentTermsToMatch(parser.floatValue()); } else if ("analyzer".equals(currentFieldName)) { analyzer = parseContext.analysisService().analyzer(parser.text()); } else if ("boost".equals(currentFieldName)) { mltQuery.setBoost(parser.floatValue()); - } else if ("fail_on_unsupported_field".equals(currentFieldName) || "failOnUnsupportedField".equals(currentFieldName)) { + } else if (Fields.FAIL_ON_UNSUPPORTED_FIELD.match(currentFieldName, parseContext.parseFlags())) { failOnUnsupportedField = parser.booleanValue(); } else if ("_name".equals(currentFieldName)) { queryName = parser.text(); } else { throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]"); } - } else if (token == XContentParser.Token.START_ARRAY) { - if ("stop_words".equals(currentFieldName) || "stopWords".equals(currentFieldName)) { + } else if (token == XContentParser.Token.START_ARRAY) { + if (Fields.STOP_WORDS.match(currentFieldName, parseContext.parseFlags())) { Set stopWords = Sets.newHashSet(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { stopWords.add(parser.text()); diff --git a/src/main/java/org/elasticsearch/rest/action/mlt/RestMoreLikeThisAction.java b/src/main/java/org/elasticsearch/rest/action/mlt/RestMoreLikeThisAction.java index ce418a80a8e..4dacd0e8283 100644 --- a/src/main/java/org/elasticsearch/rest/action/mlt/RestMoreLikeThisAction.java +++ b/src/main/java/org/elasticsearch/rest/action/mlt/RestMoreLikeThisAction.java @@ -59,6 +59,9 @@ public class RestMoreLikeThisAction extends BaseRestHandler { mltRequest.listenerThreaded(false); try { + //TODO the ParseField class that encapsulates the supported names used for an attribute + //needs some work if it is to be used in a REST context like this too + // See the MoreLikeThisQueryParser constants that hold the valid syntax mltRequest.fields(request.paramAsStringArray("mlt_fields", null)); mltRequest.percentTermsToMatch(request.paramAsFloat("percent_terms_to_match", -1)); mltRequest.minTermFreq(request.paramAsInt("min_term_freq", -1)); @@ -66,8 +69,8 @@ public class RestMoreLikeThisAction extends BaseRestHandler { mltRequest.stopWords(request.paramAsStringArray("stop_words", null)); mltRequest.minDocFreq(request.paramAsInt("min_doc_freq", -1)); mltRequest.maxDocFreq(request.paramAsInt("max_doc_freq", -1)); - mltRequest.minWordLen(request.paramAsInt("min_word_len", -1)); - mltRequest.maxWordLen(request.paramAsInt("max_word_len", -1)); + mltRequest.minWordLength(request.paramAsInt("min_word_len", request.paramAsInt("min_word_length",-1))); + mltRequest.maxWordLength(request.paramAsInt("max_word_len", request.paramAsInt("max_word_length",-1))); mltRequest.boostTerms(request.paramAsFloat("boost_terms", -1)); mltRequest.searchType(SearchType.fromString(request.param("search_type"))); diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java b/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java index a5e2f8ac904..c61fa04ef4b 100644 --- a/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java @@ -18,30 +18,18 @@ */ package org.elasticsearch.search.suggest; -import java.io.IOException; -import java.util.Comparator; -import java.util.Locale; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.search.spell.DirectSpellChecker; -import org.apache.lucene.search.spell.JaroWinklerDistance; -import org.apache.lucene.search.spell.LevensteinDistance; -import org.apache.lucene.search.spell.LuceneLevenshteinDistance; -import org.apache.lucene.search.spell.NGramDistance; -import org.apache.lucene.search.spell.StringDistance; -import org.apache.lucene.search.spell.SuggestMode; -import org.apache.lucene.search.spell.SuggestWord; -import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; -import org.apache.lucene.search.spell.SuggestWordQueue; +import org.apache.lucene.search.spell.*; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.automaton.LevenshteinAutomata; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.FastCharArrayReader; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.CustomAnalyzer; @@ -51,6 +39,10 @@ import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext; +import java.io.IOException; +import java.util.Comparator; +import java.util.Locale; + public final class SuggestUtils { public static Comparator LUCENE_FREQUENCY = new SuggestWordFrequencyComparator(); public static Comparator SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR; @@ -193,6 +185,7 @@ public final class SuggestUtils { return new LuceneLevenshteinDistance(); } else if ("levenstein".equals(distanceVal)) { return new LevensteinDistance(); + //TODO Jaro and Winkler are 2 people - so apply same naming logic as damerau_levenshtein } else if ("jarowinkler".equals(distanceVal)) { return new JaroWinklerDistance(); } else if ("ngram".equals(distanceVal)) { @@ -202,30 +195,45 @@ public final class SuggestUtils { } } + public static class Fields { + public static final ParseField STRING_DISTANCE = new ParseField("string_distance"); + public static final ParseField SUGGEST_MODE = new ParseField("suggest_mode"); + public static final ParseField MAX_EDITS = new ParseField("max_edits"); + public static final ParseField MAX_INSPECTIONS = new ParseField("max_inspections"); + // TODO some of these constants are the same as MLT constants and + // could be moved to a shared class for maintaining consistency across + // the platform + public static final ParseField MAX_TERM_FREQ = new ParseField("max_term_freq"); + public static final ParseField PREFIX_LENGTH = new ParseField("prefix_length", "prefix_len"); + public static final ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len"); + public static final ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq"); + public static final ParseField SHARD_SIZE = new ParseField("shard_size"); + } + public static boolean parseDirectSpellcheckerSettings(XContentParser parser, String fieldName, DirectSpellcheckerSettings suggestion) throws IOException { if ("accuracy".equals(fieldName)) { suggestion.accuracy(parser.floatValue()); - } else if ("suggest_mode".equals(fieldName) || "suggestMode".equals(fieldName)) { + } else if (Fields.SUGGEST_MODE.match(fieldName)) { suggestion.suggestMode(SuggestUtils.resolveSuggestMode(parser.text())); } else if ("sort".equals(fieldName)) { suggestion.sort(SuggestUtils.resolveSort(parser.text())); - } else if ("string_distance".equals(fieldName) || "stringDistance".equals(fieldName)) { + } else if (Fields.STRING_DISTANCE.match(fieldName)) { suggestion.stringDistance(SuggestUtils.resolveDistance(parser.text())); - } else if ("max_edits".equals(fieldName) || "maxEdits".equals(fieldName)) { + } else if (Fields.MAX_EDITS.match(fieldName)) { suggestion.maxEdits(parser.intValue()); if (suggestion.maxEdits() < 1 || suggestion.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new ElasticsearchIllegalArgumentException("Illegal max_edits value " + suggestion.maxEdits()); } - } else if ("max_inspections".equals(fieldName) || "maxInspections".equals(fieldName)) { + } else if (Fields.MAX_INSPECTIONS.match(fieldName)) { suggestion.maxInspections(parser.intValue()); - } else if ("max_term_freq".equals(fieldName) || "maxTermFreq".equals(fieldName)) { + } else if (Fields.MAX_TERM_FREQ.match(fieldName)) { suggestion.maxTermFreq(parser.floatValue()); - } else if ("prefix_len".equals(fieldName) || "prefixLen".equals(fieldName)) { + } else if (Fields.PREFIX_LENGTH.match(fieldName)) { suggestion.prefixLength(parser.intValue()); - } else if ("min_word_len".equals(fieldName) || "minWordLen".equals(fieldName)) { + } else if (Fields.MIN_WORD_LENGTH.match(fieldName)) { suggestion.minQueryLength(parser.intValue()); - } else if ("min_doc_freq".equals(fieldName) || "minDocFreq".equals(fieldName)) { + } else if (Fields.MIN_DOC_FREQ.match(fieldName)) { suggestion.minDocFreq(parser.floatValue()); } else { return false; @@ -247,7 +255,7 @@ public final class SuggestUtils { suggestion.setField(parser.text()); } else if ("size".equals(fieldName)) { suggestion.setSize(parser.intValue()); - } else if ("shard_size".equals(fieldName) || "shardSize".equals(fieldName)) { + } else if (Fields.SHARD_SIZE.match(fieldName)) { suggestion.setShardSize(parser.intValue()); } else { return false; diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java index 761cd4989aa..9e5dfb59ba6 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java @@ -590,10 +590,10 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder