mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-19 19:35:02 +00:00
Terms aggregations: make size=0 return all terms.
Terms aggregations return up to `size` terms, so up to now, the way to get all matching terms back was to set `size` to an arbitrary high number that would be larger than the number of unique terms. Terms aggregators already made sure to not allocate memory based on the `size` parameter so this commit mostly consists in making `0` an alias for the maximum integer value in the TermsParser. Close #4837
This commit is contained in:
parent
75778d082b
commit
9282ae4ffd
@ -65,6 +65,8 @@ the client.
|
|||||||
NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, elasticsearch will
|
NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, elasticsearch will
|
||||||
override it and reset it to be equal to `size`.
|
override it and reset it to be equal to `size`.
|
||||||
|
|
||||||
|
added[1.1.0] It is possible to not limit the number of terms that are returned by setting `size` to `0`. Don't use this
|
||||||
|
on high-cardinality fields as this will kill both your CPU since terms need to be return sorted, and your network.
|
||||||
|
|
||||||
==== Order
|
==== Order
|
||||||
|
|
||||||
|
@ -167,7 +167,7 @@ public class DoubleTerms extends InternalTerms {
|
|||||||
this.name = in.readString();
|
this.name = in.readString();
|
||||||
this.order = InternalOrder.Streams.readOrder(in);
|
this.order = InternalOrder.Streams.readOrder(in);
|
||||||
this.valueFormatter = ValueFormatterStreams.readOptional(in);
|
this.valueFormatter = ValueFormatterStreams.readOptional(in);
|
||||||
this.requiredSize = in.readVInt();
|
this.requiredSize = readSize(in);
|
||||||
this.minDocCount = in.readVLong();
|
this.minDocCount = in.readVLong();
|
||||||
int size = in.readVInt();
|
int size = in.readVInt();
|
||||||
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
||||||
@ -183,7 +183,7 @@ public class DoubleTerms extends InternalTerms {
|
|||||||
out.writeString(name);
|
out.writeString(name);
|
||||||
InternalOrder.Streams.writeOrder(order, out);
|
InternalOrder.Streams.writeOrder(order, out);
|
||||||
ValueFormatterStreams.writeOptional(valueFormatter, out);
|
ValueFormatterStreams.writeOptional(valueFormatter, out);
|
||||||
out.writeVInt(requiredSize);
|
writeSize(requiredSize, out);
|
||||||
out.writeVLong(minDocCount);
|
out.writeVLong(minDocCount);
|
||||||
out.writeVInt(buckets.size());
|
out.writeVInt(buckets.size());
|
||||||
for (InternalTerms.Bucket bucket : buckets) {
|
for (InternalTerms.Bucket bucket : buckets) {
|
||||||
|
@ -21,6 +21,8 @@ package org.elasticsearch.search.aggregations.bucket.terms;
|
|||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import org.elasticsearch.cache.recycler.CacheRecycler;
|
import org.elasticsearch.cache.recycler.CacheRecycler;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
import org.elasticsearch.common.io.stream.Streamable;
|
import org.elasticsearch.common.io.stream.Streamable;
|
||||||
import org.elasticsearch.common.text.Text;
|
import org.elasticsearch.common.text.Text;
|
||||||
import org.elasticsearch.common.xcontent.ToXContent;
|
import org.elasticsearch.common.xcontent.ToXContent;
|
||||||
@ -29,6 +31,7 @@ import org.elasticsearch.search.aggregations.InternalAggregation;
|
|||||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||||
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -185,4 +188,17 @@ public abstract class InternalTerms extends InternalAggregation implements Terms
|
|||||||
buckets = newBuckets;
|
buckets = newBuckets;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 0 actually means unlimited
|
||||||
|
protected static int readSize(StreamInput in) throws IOException {
|
||||||
|
final int size = in.readVInt();
|
||||||
|
return size == 0 ? Integer.MAX_VALUE : size;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void writeSize(int size, StreamOutput out) throws IOException {
|
||||||
|
if (size == Integer.MAX_VALUE) {
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
out.writeVInt(size);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -164,7 +164,7 @@ public class LongTerms extends InternalTerms {
|
|||||||
this.name = in.readString();
|
this.name = in.readString();
|
||||||
this.order = InternalOrder.Streams.readOrder(in);
|
this.order = InternalOrder.Streams.readOrder(in);
|
||||||
this.valueFormatter = ValueFormatterStreams.readOptional(in);
|
this.valueFormatter = ValueFormatterStreams.readOptional(in);
|
||||||
this.requiredSize = in.readVInt();
|
this.requiredSize = readSize(in);
|
||||||
this.minDocCount = in.readVLong();
|
this.minDocCount = in.readVLong();
|
||||||
int size = in.readVInt();
|
int size = in.readVInt();
|
||||||
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
||||||
@ -180,7 +180,7 @@ public class LongTerms extends InternalTerms {
|
|||||||
out.writeString(name);
|
out.writeString(name);
|
||||||
InternalOrder.Streams.writeOrder(order, out);
|
InternalOrder.Streams.writeOrder(order, out);
|
||||||
ValueFormatterStreams.writeOptional(valueFormatter, out);
|
ValueFormatterStreams.writeOptional(valueFormatter, out);
|
||||||
out.writeVInt(requiredSize);
|
writeSize(requiredSize, out);
|
||||||
out.writeVLong(minDocCount);
|
out.writeVLong(minDocCount);
|
||||||
out.writeVInt(buckets.size());
|
out.writeVInt(buckets.size());
|
||||||
for (InternalTerms.Bucket bucket : buckets) {
|
for (InternalTerms.Bucket bucket : buckets) {
|
||||||
|
@ -96,7 +96,7 @@ public class StringTerms extends InternalTerms {
|
|||||||
public void readFrom(StreamInput in) throws IOException {
|
public void readFrom(StreamInput in) throws IOException {
|
||||||
this.name = in.readString();
|
this.name = in.readString();
|
||||||
this.order = InternalOrder.Streams.readOrder(in);
|
this.order = InternalOrder.Streams.readOrder(in);
|
||||||
this.requiredSize = in.readVInt();
|
this.requiredSize = readSize(in);
|
||||||
this.minDocCount = in.readVLong();
|
this.minDocCount = in.readVLong();
|
||||||
int size = in.readVInt();
|
int size = in.readVInt();
|
||||||
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
List<InternalTerms.Bucket> buckets = new ArrayList<InternalTerms.Bucket>(size);
|
||||||
@ -111,7 +111,7 @@ public class StringTerms extends InternalTerms {
|
|||||||
public void writeTo(StreamOutput out) throws IOException {
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
out.writeString(name);
|
out.writeString(name);
|
||||||
InternalOrder.Streams.writeOrder(order, out);
|
InternalOrder.Streams.writeOrder(order, out);
|
||||||
out.writeVInt(requiredSize);
|
writeSize(requiredSize, out);
|
||||||
out.writeVLong(minDocCount);
|
out.writeVLong(minDocCount);
|
||||||
out.writeVInt(buckets.size());
|
out.writeVInt(buckets.size());
|
||||||
for (InternalTerms.Bucket bucket : buckets) {
|
for (InternalTerms.Bucket bucket : buckets) {
|
||||||
|
@ -176,6 +176,14 @@ public class TermsParser implements Aggregator.Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (shardSize == 0) {
|
||||||
|
shardSize = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (requiredSize == 0) {
|
||||||
|
requiredSize = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
// shard_size cannot be smaller than size as we need to at least fetch <size> entries from every shards in order to return <size>
|
// shard_size cannot be smaller than size as we need to at least fetch <size> entries from every shards in order to return <size>
|
||||||
if (shardSize < requiredSize) {
|
if (shardSize < requiredSize) {
|
||||||
shardSize = requiredSize;
|
shardSize = requiredSize;
|
||||||
|
@ -66,7 +66,7 @@ public class UnmappedTerms extends InternalTerms {
|
|||||||
public void readFrom(StreamInput in) throws IOException {
|
public void readFrom(StreamInput in) throws IOException {
|
||||||
this.name = in.readString();
|
this.name = in.readString();
|
||||||
this.order = InternalOrder.Streams.readOrder(in);
|
this.order = InternalOrder.Streams.readOrder(in);
|
||||||
this.requiredSize = in.readVInt();
|
this.requiredSize = readSize(in);
|
||||||
this.minDocCount = in.readVLong();
|
this.minDocCount = in.readVLong();
|
||||||
this.buckets = BUCKETS;
|
this.buckets = BUCKETS;
|
||||||
this.bucketMap = BUCKETS_MAP;
|
this.bucketMap = BUCKETS_MAP;
|
||||||
@ -76,7 +76,7 @@ public class UnmappedTerms extends InternalTerms {
|
|||||||
public void writeTo(StreamOutput out) throws IOException {
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
out.writeString(name);
|
out.writeString(name);
|
||||||
InternalOrder.Streams.writeOrder(order, out);
|
InternalOrder.Streams.writeOrder(order, out);
|
||||||
out.writeVInt(requiredSize);
|
writeSize(requiredSize, out);
|
||||||
out.writeVLong(minDocCount);
|
out.writeVLong(minDocCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
@Before
|
@Before
|
||||||
public void init() throws Exception {
|
public void init() throws Exception {
|
||||||
createIndex("idx");
|
createIndex("idx");
|
||||||
|
|
||||||
IndexRequestBuilder[] lowcardBuilders = new IndexRequestBuilder[NUM_DOCS];
|
IndexRequestBuilder[] lowcardBuilders = new IndexRequestBuilder[NUM_DOCS];
|
||||||
for (int i = 0; i < lowcardBuilders.length; i++) {
|
for (int i = 0; i < lowcardBuilders.length; i++) {
|
||||||
lowcardBuilders[i] = client().prepareIndex("idx", "type").setSource(jsonBuilder()
|
lowcardBuilders[i] = client().prepareIndex("idx", "type").setSource(jsonBuilder()
|
||||||
@ -72,7 +72,7 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
.field("value", (double) i)
|
.field("value", (double) i)
|
||||||
.startArray("values").value((double)i).value(i + 1d).endArray()
|
.startArray("values").value((double)i).value(i + 1d).endArray()
|
||||||
.endObject());
|
.endObject());
|
||||||
|
|
||||||
}
|
}
|
||||||
indexRandom(randomBoolean(), lowcardBuilders);
|
indexRandom(randomBoolean(), lowcardBuilders);
|
||||||
IndexRequestBuilder[] highCardBuilders = new IndexRequestBuilder[100]; // TODO: randomize the size?
|
IndexRequestBuilder[] highCardBuilders = new IndexRequestBuilder[100]; // TODO: randomize the size?
|
||||||
@ -89,6 +89,24 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
ensureSearchable();
|
ensureSearchable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// the main purpose of this test is to make sure we're not allocating 2GB of memory per shard
|
||||||
|
public void sizeIsZero() {
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||||
|
.addAggregation(terms("terms")
|
||||||
|
.field("value")
|
||||||
|
.minDocCount(randomInt(1))
|
||||||
|
.size(0))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertSearchResponse(response);
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("terms");
|
||||||
|
assertThat(terms, notNullValue());
|
||||||
|
assertThat(terms.getName(), equalTo("terms"));
|
||||||
|
assertThat(terms.buckets().size(), equalTo(100));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleValueField() throws Exception {
|
public void singleValueField() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
@ -550,7 +568,8 @@ public class DoubleTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
public void unmapped() throws Exception {
|
public void unmapped() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.field("value"))
|
.field("value")
|
||||||
|
.size(randomInt(5)))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
assertSearchResponse(response);
|
assertSearchResponse(response);
|
||||||
|
@ -87,6 +87,24 @@ public class LongTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
ensureSearchable();
|
ensureSearchable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// the main purpose of this test is to make sure we're not allocating 2GB of memory per shard
|
||||||
|
public void sizeIsZero() {
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||||
|
.addAggregation(terms("terms")
|
||||||
|
.field("value")
|
||||||
|
.minDocCount(randomInt(1))
|
||||||
|
.size(0))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertSearchResponse(response);
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("terms");
|
||||||
|
assertThat(terms, notNullValue());
|
||||||
|
assertThat(terms.getName(), equalTo("terms"));
|
||||||
|
assertThat(terms.buckets().size(), equalTo(100));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleValueField() throws Exception {
|
public void singleValueField() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
@ -544,7 +562,8 @@ public class LongTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
public void unmapped() throws Exception {
|
public void unmapped() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.field("value"))
|
.field("value")
|
||||||
|
.size(randomInt(5)))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
assertSearchResponse(response);
|
assertSearchResponse(response);
|
||||||
|
@ -97,6 +97,26 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
ensureSearchable();
|
ensureSearchable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// the main purpose of this test is to make sure we're not allocating 2GB of memory per shard
|
||||||
|
public void sizeIsZero() {
|
||||||
|
final int minDocCount = randomInt(1);
|
||||||
|
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
|
||||||
|
.addAggregation(terms("terms")
|
||||||
|
.executionHint(randomExecutionHint())
|
||||||
|
.field("value")
|
||||||
|
.minDocCount(minDocCount)
|
||||||
|
.size(0))
|
||||||
|
.execute().actionGet();
|
||||||
|
|
||||||
|
assertSearchResponse(response);System.out.println(response);
|
||||||
|
|
||||||
|
Terms terms = response.getAggregations().get("terms");
|
||||||
|
assertThat(terms, notNullValue());
|
||||||
|
assertThat(terms.getName(), equalTo("terms"));
|
||||||
|
assertThat(terms.buckets().size(), equalTo(minDocCount == 0 ? 105 : 100)); // 105 because of the other type
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleValueField() throws Exception {
|
public void singleValueField() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
@ -686,6 +706,7 @@ public class StringTermsTests extends ElasticsearchIntegrationTest {
|
|||||||
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx_unmapped").setTypes("type")
|
||||||
.addAggregation(terms("terms")
|
.addAggregation(terms("terms")
|
||||||
.executionHint(randomExecutionHint())
|
.executionHint(randomExecutionHint())
|
||||||
|
.size(randomInt(5))
|
||||||
.field("value"))
|
.field("value"))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user