From 8e3bcb5e2f6ce4238060a170602b505b7e1c2f50 Mon Sep 17 00:00:00 2001 From: Britta Weber Date: Mon, 12 May 2014 23:39:24 +0200 Subject: [PATCH] refactor: unify terms and significant_terms parsing Both need the requiredSize, shardSize, minDocCount and shardMinDocCount. Parsing should not be duplicated. --- .../significant/SignificantTermsBuilder.java | 19 +-- .../SignificantTermsParametersParser.java | 62 +++++++++ .../significant/SignificantTermsParser.java | 83 ++---------- .../terms/AbstractTermsParametersParser.java | 121 ++++++++++++++++++ .../bucket/terms/TermsParametersParser.java | 75 +++++++++++ .../bucket/terms/TermsParser.java | 75 ++--------- 6 files changed, 287 insertions(+), 148 deletions(-) create mode 100644 src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsParametersParser.java create mode 100644 src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java create mode 100644 src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java index d6aa778a451..2e636078191 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsBuilder.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.aggregations.bucket.significant; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.FilterBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.terms.AbstractTermsParametersParser; import java.io.IOException; @@ -34,10 +35,10 @@ import java.io.IOException; public class SignificantTermsBuilder extends AggregationBuilder { private String field; - private int requiredSize = SignificantTermsParser.DEFAULT_REQUIRED_SIZE; - private int shardSize = SignificantTermsParser.DEFAULT_SHARD_SIZE; - private int minDocCount = SignificantTermsParser.DEFAULT_MIN_DOC_COUNT; - private int shardMinDocCount = SignificantTermsParser.DEFAULT_SHARD_MIN_DOC_COUNT; + private int requiredSize = AbstractTermsParametersParser.DEFAULT_REQUIRED_SIZE; + private int shardSize = AbstractTermsParametersParser.DEFAULT_SHARD_SIZE; + private int minDocCount = AbstractTermsParametersParser.DEFAULT_MIN_DOC_COUNT; + private int shardMinDocCount = AbstractTermsParametersParser.DEFAULT_SHARD_MIN_DOC_COUNT; private String executionHint; private String includePattern; private int includeFlags; @@ -136,16 +137,16 @@ public class SignificantTermsBuilder extends AggregationBuilder entries from every shards in order to return - if (shardSize < requiredSize) { - shardSize = requiredSize; + if (shardSize < aggParser.getRequiredSize()) { + shardSize = aggParser.getRequiredSize(); } + long shardMinDocCount = aggParser.getShardMinDocCount(); // shard_min_doc_count should not be larger than min_doc_count because this can cause buckets to be removed that would match the min_doc_count criteria - if (shardMinDocCount > minDocCount) { - shardMinDocCount = minDocCount; + if (shardMinDocCount > aggParser.getMinDocCount()) { + shardMinDocCount = aggParser.getMinDocCount(); } - - IncludeExclude includeExclude = incExcParser.includeExclude(); - return new SignificantTermsAggregatorFactory(aggregationName, vsParser.config(), requiredSize, shardSize, minDocCount, shardMinDocCount, includeExclude, executionHint, filter); + return new SignificantTermsAggregatorFactory(aggregationName, vsParser.config(), aggParser.getRequiredSize(), shardSize, aggParser.getMinDocCount(), shardMinDocCount, aggParser.getIncludeExclude(), aggParser.getExecutionHint(), aggParser.getFilter()); } - } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java new file mode 100644 index 00000000000..71adcab3c40 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractTermsParametersParser.java @@ -0,0 +1,121 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +package org.elasticsearch.search.aggregations.bucket.terms; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; +import org.elasticsearch.search.aggregations.support.ValuesSourceParser; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + +public abstract class AbstractTermsParametersParser { + + public static final int DEFAULT_REQUIRED_SIZE = 10; + public static final int DEFAULT_SHARD_SIZE = -1; + + //Typically need more than one occurrence of something for it to be statistically significant + public static final int DEFAULT_MIN_DOC_COUNT = 1; + public static final int DEFAULT_SHARD_MIN_DOC_COUNT = 1; + + static final ParseField EXECUTION_HINT_FIELD_NAME = new ParseField("execution_hint"); + static final ParseField SHARD_SIZE_FIELD_NAME = new ParseField("shard_size"); + static final ParseField MIN_DOC_COUNT_FIELD_NAME = new ParseField("min_doc_count"); + static final ParseField SHARD_MIN_DOC_COUNT_FIELD_NAME = new ParseField("shard_min_doc_count"); + + public int getRequiredSize() { + return requiredSize; + } + + public int getShardSize() { + return shardSize; + } + + public void setMinDocCount(long minDocCount) { + this.minDocCount = minDocCount; + } + + public long getMinDocCount() { + return minDocCount; + } + + public long getShardMinDocCount() { + return shardMinDocCount; + } + + //These are the results of the parsing. + + public String getExecutionHint() { + return executionHint; + } + + public IncludeExclude getIncludeExclude() { + return includeExclude; + } + + private int requiredSize = DEFAULT_REQUIRED_SIZE; + private int shardSize = DEFAULT_SHARD_SIZE; + private long minDocCount = DEFAULT_MIN_DOC_COUNT; + private long shardMinDocCount = DEFAULT_SHARD_MIN_DOC_COUNT; + private String executionHint = null; + IncludeExclude includeExclude; + + public void parse(String aggregationName, XContentParser parser, SearchContext context, ValuesSourceParser vsParser, IncludeExclude.Parser incExcParser) throws IOException { + XContentParser.Token token; + String currentFieldName = null; + setDefaults(); + + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (vsParser.token(currentFieldName, token, parser)) { + continue; + } else if (incExcParser.token(currentFieldName, token, parser)) { + continue; + } else if (token == XContentParser.Token.VALUE_STRING) { + if (EXECUTION_HINT_FIELD_NAME.match(currentFieldName)) { + executionHint = parser.text(); + } else { + parseSpecial(aggregationName, parser, context, token, currentFieldName); + } + } else if (token == XContentParser.Token.VALUE_NUMBER) { + if ("size".equals(currentFieldName)) { + requiredSize = parser.intValue(); + } else if (SHARD_SIZE_FIELD_NAME.match(currentFieldName)) { + shardSize = parser.intValue(); + } else if (MIN_DOC_COUNT_FIELD_NAME.match(currentFieldName)) { + minDocCount = parser.intValue(); + } else if (SHARD_MIN_DOC_COUNT_FIELD_NAME.match(currentFieldName)) { + shardMinDocCount = parser.longValue(); + } else { + parseSpecial(aggregationName, parser, context, token, currentFieldName); + } + } else { + parseSpecial(aggregationName, parser, context, token, currentFieldName); + } + } + includeExclude = incExcParser.includeExclude(); + } + + public abstract void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException; + + public abstract void setDefaults(); +} diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java new file mode 100644 index 00000000000..fd2fc105e0b --- /dev/null +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParametersParser.java @@ -0,0 +1,75 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +package org.elasticsearch.search.aggregations.bucket.terms; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.search.SearchParseException; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + + +public class TermsParametersParser extends AbstractTermsParametersParser { + + public String getOrderKey() { + return orderKey; + } + + public boolean isOrderAsc() { + return orderAsc; + } + + String orderKey = "_count"; + boolean orderAsc = false; + + @Override + public void setDefaults() { + } + + @Override + public void parseSpecial(String aggregationName, XContentParser parser, SearchContext context, XContentParser.Token token, String currentFieldName) throws IOException { + if (token == XContentParser.Token.START_OBJECT) { + if ("order".equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + orderKey = parser.currentName(); + } else if (token == XContentParser.Token.VALUE_STRING) { + String dir = parser.text(); + if ("asc".equalsIgnoreCase(dir)) { + orderAsc = true; + } else if ("desc".equalsIgnoreCase(dir)) { + orderAsc = false; + } else { + throw new SearchParseException(context, "Unknown terms order direction [" + dir + "] in terms aggregation [" + aggregationName + "]"); + } + } else { + throw new SearchParseException(context, "Unexpected token " + token + " for [order] in [" + aggregationName + "]."); + } + } + } else { + throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "]."); + } + } else { + throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "]."); + } + } + +} diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java index be379ada8a2..e1a4d4af854 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java @@ -19,7 +19,6 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.search.SearchParseException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude; @@ -40,78 +39,22 @@ public class TermsParser implements Aggregator.Parser { @Override public AggregatorFactory parse(String aggregationName, XContentParser parser, SearchContext context) throws IOException { - - int requiredSize = 10; - int shardSize = -1; - String orderKey = "_count"; - boolean orderAsc = false; - - String executionHint = null; - long minDocCount = 1; - + TermsParametersParser aggParser = new TermsParametersParser(); ValuesSourceParser vsParser = ValuesSourceParser.any(aggregationName, StringTerms.TYPE, context) + .scriptable(true) + .formattable(true) .requiresSortedValues(true) .requiresUniqueValues(true) - .formattable(true) .build(); - IncludeExclude.Parser incExcParser = new IncludeExclude.Parser(aggregationName, StringTerms.TYPE, context); + aggParser.parse(aggregationName, parser, context, vsParser, incExcParser); - XContentParser.Token token; - String currentFieldName = null; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (vsParser.token(currentFieldName, token, parser)) { - continue; - } else if (incExcParser.token(currentFieldName, token, parser)) { - continue; - } else if (token == XContentParser.Token.VALUE_STRING) { - if ("execution_hint".equals(currentFieldName) || "executionHint".equals(currentFieldName)) { - executionHint = parser.text(); - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "]."); - } - } else if (token == XContentParser.Token.VALUE_NUMBER) { - if ("size".equals(currentFieldName)) { - requiredSize = parser.intValue(); - } else if ("shard_size".equals(currentFieldName) || "shardSize".equals(currentFieldName)) { - shardSize = parser.intValue(); - } else if ("min_doc_count".equals(currentFieldName) || "minDocCount".equals(currentFieldName)) { - minDocCount = parser.intValue(); - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "]."); - } - } else if (token == XContentParser.Token.START_OBJECT) { - if ("order".equals(currentFieldName)) { - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - orderKey = parser.currentName(); - } else if (token == XContentParser.Token.VALUE_STRING) { - String dir = parser.text(); - if ("asc".equalsIgnoreCase(dir)) { - orderAsc = true; - } else if ("desc".equalsIgnoreCase(dir)) { - orderAsc = false; - } else { - throw new SearchParseException(context, "Unknown terms order direction [" + dir + "] in terms aggregation [" + aggregationName + "]"); - } - } else { - throw new SearchParseException(context, "Unexpected token " + token + " for [order] in [" + aggregationName + "]."); - } - } - } else { - throw new SearchParseException(context, "Unknown key for a " + token + " in [" + aggregationName + "]: [" + currentFieldName + "]."); - } - } else { - throw new SearchParseException(context, "Unexpected token " + token + " in [" + aggregationName + "]."); - } - } - + int shardSize = aggParser.getShardSize(); if (shardSize == 0) { shardSize = Integer.MAX_VALUE; } + int requiredSize = aggParser.getRequiredSize(); if (requiredSize == 0) { requiredSize = Integer.MAX_VALUE; } @@ -120,10 +63,8 @@ public class TermsParser implements Aggregator.Parser { if (shardSize < requiredSize) { shardSize = requiredSize; } - - IncludeExclude includeExclude = incExcParser.includeExclude(); - InternalOrder order = resolveOrder(orderKey, orderAsc); - return new TermsAggregatorFactory(aggregationName, vsParser.config(), order, requiredSize, shardSize, minDocCount, includeExclude, executionHint); + InternalOrder order = resolveOrder(aggParser.getOrderKey(), aggParser.isOrderAsc()); + return new TermsAggregatorFactory(aggregationName, vsParser.config(), order, requiredSize, shardSize, aggParser.getMinDocCount(), aggParser.getIncludeExclude(), aggParser.getExecutionHint()); } static InternalOrder resolveOrder(String key, boolean asc) {