diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java index f4dd81614d8..c7124b3cf28 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java @@ -205,7 +205,12 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } } - if (!config.unmapped()) { + if (config.unmapped() == false) { + if (config.fieldContext().fieldType().isSearchable() == false) { + throw new IllegalArgumentException("SignificantText aggregation requires fields to be searchable, but [" + + config.fieldContext().fieldType().name() + "] is not"); + } + this.fieldType = config.fieldContext().fieldType(); this.indexedFieldName = fieldType.name(); } @@ -248,6 +253,10 @@ public class SignificantTermsAggregatorFactory extends ValuesSourceAggregatorFac } private long getBackgroundFrequency(String value) throws IOException { + // fieldType can be null if the field is unmapped, but theoretically this method should only be called + // when constructing buckets. Assert to ensure this is the case + // TODO this is a bad setup and it should be refactored + assert fieldType != null; Query query = fieldType.termQuery(value, queryShardContext); if (query instanceof TermQuery) { // for types that use the inverted index, we prefer using a caching terms diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java index 25a8991c107..65972b4890b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java @@ -79,6 +79,10 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory // Note that if the field is unmapped (its field type is null), we don't fail, // and just use the given field name as a placeholder. this.fieldType = queryShardContext.fieldMapper(fieldName); + if (fieldType != null && fieldType.indexAnalyzer() == null) { + throw new IllegalArgumentException("Field [" + fieldType.name() + "] has no analyzer, but SignificantText " + + "requires an analyzed field"); + } this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName; this.sourceFieldNames = sourceFieldNames == null ? new String[] { indexedFieldName } @@ -120,6 +124,10 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory } private long getBackgroundFrequency(String value) throws IOException { + // fieldType can be null if the field is unmapped, but theoretically this method should only be called + // when constructing buckets. Assert to ensure this is the case + // TODO this is a bad setup and it should be refactored + assert fieldType != null; Query query = fieldType.termQuery(value, queryShardContext); if (query instanceof TermQuery) { // for types that use the inverted index, we prefer using a caching terms diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java index 25b8b148e50..8c3d5da1500 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregatorTests.java @@ -39,6 +39,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.BinaryFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -52,6 +53,8 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.terms.SignificantTermsAggregatorFactory.ExecutionMode; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import org.junit.Before; import java.io.IOException; @@ -83,16 +86,18 @@ public class SignificantTermsAggregatorTests extends AggregatorTestCase { return new SignificantTermsAggregationBuilder("foo").field(fieldName); } - /* NOTE - commented out instead of deleted, we need to backport https://github.com/elastic/elasticsearch/pull/52851 to support this. @Override protected List getSupportedValuesSourceTypes() { return Arrays.asList(CoreValuesSourceType.NUMERIC, - CoreValuesSourceType.BYTES); + CoreValuesSourceType.BYTES, + CoreValuesSourceType.BOOLEAN, + CoreValuesSourceType.DATE, + CoreValuesSourceType.IP); } @Override protected List unsupportedMappedFieldTypes() { - return List.of( + return Arrays.asList( NumberFieldMapper.NumberType.DOUBLE.typeName(), // floating points are not supported at all NumberFieldMapper.NumberType.FLOAT.typeName(), NumberFieldMapper.NumberType.HALF_FLOAT.typeName(), @@ -100,8 +105,6 @@ public class SignificantTermsAggregatorTests extends AggregatorTestCase { ); } - */ - /** * For each provided field type, we also register an alias with name {@code -alias}. */ diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java index 0a04c508f2d..35acae1d28b 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java @@ -34,14 +34,17 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.BinaryFieldMapper; +import org.elasticsearch.index.mapper.GeoPointFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.sampler.InternalSampler; import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder; -import org.elasticsearch.search.aggregations.bucket.terms.SignificantTerms; -import org.elasticsearch.search.aggregations.bucket.terms.SignificantTextAggregationBuilder; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; import java.util.Arrays; @@ -65,6 +68,31 @@ public class SignificantTextAggregatorTests extends AggregatorTestCase { Function.identity())); } + @Override + protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { + return new SignificantTextAggregationBuilder("foo", fieldName); + } + + @Override + protected List getSupportedValuesSourceTypes() { + // TODO it is likely accidental that SigText supports anything other than Bytes, and then only text fields + return Arrays.asList(CoreValuesSourceType.NUMERIC, + CoreValuesSourceType.BYTES, + CoreValuesSourceType.RANGE, + CoreValuesSourceType.GEOPOINT, + CoreValuesSourceType.BOOLEAN, + CoreValuesSourceType.DATE, + CoreValuesSourceType.IP); + } + + @Override + protected List unsupportedMappedFieldTypes() { + return Arrays.asList( + BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers + GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries + ); + } + /** * Uses the significant text aggregation to find the keywords in text fields */