mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-22 21:05:23 +00:00
When the `terms` agg runs against strings and uses global ordinals it has an optimization when it collects segments that only ever have a single value for the particular string. This is *very* common. But I broke it in #57241. This fixes that optimization and adds `debug` information that you can use to see how often we collect segments of each type. And adds a test to make sure that I don't break the optimization again. We also had a specialiation for when there isn't a filter on the terms to aggregate. I had removed that specialization in #57241 which resulted in some slow down as well. This adds it back but in a more clear way. And, hopefully, a way that is marginally faster when there *is* a filter. Closes #57407
This commit is contained in:
parent
e50f514092
commit
97c06816a4
@ -779,7 +779,7 @@ setup:
|
||||
body: { "size" : 0, "aggs" : { "no_field_terms" : { "terms" : { "size": 1 } } } }
|
||||
|
||||
---
|
||||
"profiler":
|
||||
"global ords profiler":
|
||||
- skip:
|
||||
version: " - 7.8.99"
|
||||
reason: debug information added in 7.9.0
|
||||
@ -808,6 +808,7 @@ setup:
|
||||
terms:
|
||||
field: str
|
||||
collect_mode: breadth_first
|
||||
execution_hint: global_ordinals
|
||||
aggs:
|
||||
max_number:
|
||||
max:
|
||||
@ -823,9 +824,83 @@ setup:
|
||||
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
|
||||
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
|
||||
- match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
|
||||
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
|
||||
- gt: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
|
||||
- match: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
|
||||
- match: { profile.shards.0.aggregations.0.debug.has_filter: false }
|
||||
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
|
||||
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test_3
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
properties:
|
||||
str:
|
||||
type: keyword
|
||||
- do:
|
||||
bulk:
|
||||
index: test_3
|
||||
refresh: true
|
||||
body: |
|
||||
{ "index": {} }
|
||||
{ "str": ["pig", "sheep"], "number": 100 }
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_3
|
||||
body:
|
||||
profile: true
|
||||
size: 0
|
||||
aggs:
|
||||
str_terms:
|
||||
terms:
|
||||
field: str
|
||||
collect_mode: breadth_first
|
||||
execution_hint: global_ordinals
|
||||
aggs:
|
||||
max_number:
|
||||
max:
|
||||
field: number
|
||||
- match: { aggregations.str_terms.buckets.0.key: pig }
|
||||
- match: { aggregations.str_terms.buckets.0.max_number.value: 100 }
|
||||
- match: { aggregations.str_terms.buckets.1.key: sheep }
|
||||
- match: { aggregations.str_terms.buckets.1.max_number.value: 100 }
|
||||
- match: { profile.shards.0.aggregations.0.type: GlobalOrdinalsStringTermsAggregator }
|
||||
- match: { profile.shards.0.aggregations.0.description: str_terms }
|
||||
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 1 }
|
||||
- match: { profile.shards.0.aggregations.0.debug.deferred_aggregators: [ max_number ] }
|
||||
- match: { profile.shards.0.aggregations.0.debug.collection_strategy: dense }
|
||||
- match: { profile.shards.0.aggregations.0.debug.result_strategy: terms }
|
||||
- match: { profile.shards.0.aggregations.0.debug.segments_with_single_valued_ords: 0 }
|
||||
- gt: { profile.shards.0.aggregations.0.debug.segments_with_multi_valued_ords: 0 }
|
||||
- match: { profile.shards.0.aggregations.0.debug.has_filter: false }
|
||||
- match: { profile.shards.0.aggregations.0.children.0.type: MaxAggregator }
|
||||
- match: { profile.shards.0.aggregations.0.children.0.description: max_number }
|
||||
|
||||
---
|
||||
"numeric profiler":
|
||||
- skip:
|
||||
version: " - 7.8.99"
|
||||
reason: debug information added in 7.9.0
|
||||
- do:
|
||||
bulk:
|
||||
index: test_1
|
||||
refresh: true
|
||||
body: |
|
||||
{ "index": {} }
|
||||
{ "number": 1 }
|
||||
{ "index": {} }
|
||||
{ "number": 3 }
|
||||
{ "index": {} }
|
||||
{ "number": 1 }
|
||||
{ "index": {} }
|
||||
{ "number": 1 }
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_1
|
||||
|
@ -33,7 +33,6 @@ import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.util.IntArray;
|
||||
import org.elasticsearch.common.util.LongHash;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
@ -73,6 +72,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
private final long valueCount;
|
||||
private final GlobalOrdLookupFunction lookupGlobalOrd;
|
||||
protected final CollectionStrategy collectionStrategy;
|
||||
protected int segmentsWithSingleValuedOrds = 0;
|
||||
protected int segmentsWithMultiValuedOrds = 0;
|
||||
|
||||
public interface GlobalOrdLookupFunction {
|
||||
BytesRef apply(long ord) throws IOException;
|
||||
@ -102,7 +103,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
valuesSource.globalOrdinalsValues(context.searcher().getIndexReader().leaves().get(0)) : DocValues.emptySortedSet();
|
||||
this.valueCount = values.getValueCount();
|
||||
this.lookupGlobalOrd = values::lookupOrd;
|
||||
this.acceptedGlobalOrdinals = includeExclude == null ? l -> true : includeExclude.acceptedGlobalOrdinals(values)::get;
|
||||
this.acceptedGlobalOrdinals = includeExclude == null ? ALWAYS_TRUE : includeExclude.acceptedGlobalOrdinals(values)::get;
|
||||
this.collectionStrategy = remapGlobalOrds ? new RemapGlobalOrds() : new DenseGlobalOrds();
|
||||
}
|
||||
|
||||
@ -110,24 +111,60 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
return collectionStrategy.describe();
|
||||
}
|
||||
|
||||
private SortedSetDocValues getGlobalOrds(LeafReaderContext ctx) throws IOException {
|
||||
return acceptedGlobalOrdinals == null ?
|
||||
valuesSource.globalOrdinalsValues(ctx) : new FilteredOrdinals(valuesSource.globalOrdinalsValues(ctx), acceptedGlobalOrdinals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
|
||||
SortedSetDocValues globalOrds = getGlobalOrds(ctx);
|
||||
SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
|
||||
collectionStrategy.globalOrdsReady(globalOrds);
|
||||
SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds);
|
||||
if (singleValues != null) {
|
||||
segmentsWithSingleValuedOrds++;
|
||||
if (acceptedGlobalOrdinals == ALWAYS_TRUE) {
|
||||
/*
|
||||
* Optimize when there isn't a filter because that is very
|
||||
* common and marginally faster.
|
||||
*/
|
||||
return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrd) throws IOException {
|
||||
assert owningBucketOrd == 0;
|
||||
if (false == singleValues.advanceExact(doc)) {
|
||||
return;
|
||||
}
|
||||
int globalOrd = singleValues.ordValue();
|
||||
collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
|
||||
}
|
||||
});
|
||||
}
|
||||
return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrd) throws IOException {
|
||||
assert owningBucketOrd == 0;
|
||||
if (singleValues.advanceExact(doc)) {
|
||||
int ord = singleValues.ordValue();
|
||||
collectionStrategy.collectGlobalOrd(doc, ord, sub);
|
||||
if (false == singleValues.advanceExact(doc)) {
|
||||
return;
|
||||
}
|
||||
int globalOrd = singleValues.ordValue();
|
||||
if (false == acceptedGlobalOrdinals.test(globalOrd)) {
|
||||
return;
|
||||
}
|
||||
collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
|
||||
}
|
||||
});
|
||||
}
|
||||
segmentsWithMultiValuedOrds++;
|
||||
if (acceptedGlobalOrdinals == ALWAYS_TRUE) {
|
||||
/*
|
||||
* Optimize when there isn't a filter because that is very
|
||||
* common and marginally faster.
|
||||
*/
|
||||
return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, globalOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrd) throws IOException {
|
||||
assert owningBucketOrd == 0;
|
||||
if (false == globalOrds.advanceExact(doc)) {
|
||||
return;
|
||||
}
|
||||
for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
|
||||
collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -136,10 +173,14 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrd) throws IOException {
|
||||
assert owningBucketOrd == 0;
|
||||
if (globalOrds.advanceExact(doc)) {
|
||||
for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
|
||||
collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
|
||||
if (false == globalOrds.advanceExact(doc)) {
|
||||
return;
|
||||
}
|
||||
for (long globalOrd = globalOrds.nextOrd(); globalOrd != NO_MORE_ORDS; globalOrd = globalOrds.nextOrd()) {
|
||||
if (false == acceptedGlobalOrdinals.test(globalOrd)) {
|
||||
continue;
|
||||
}
|
||||
collectionStrategy.collectGlobalOrd(doc, globalOrd, sub);
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -160,6 +201,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
super.collectDebugInfo(add);
|
||||
add.accept("collection_strategy", collectionStrategy.describe());
|
||||
add.accept("result_strategy", resultStrategy.describe());
|
||||
add.accept("segments_with_single_valued_ords", segmentsWithSingleValuedOrds);
|
||||
add.accept("segments_with_multi_valued_ords", segmentsWithMultiValuedOrds);
|
||||
add.accept("has_filter", acceptedGlobalOrdinals != ALWAYS_TRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -253,26 +297,31 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
|
||||
final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
|
||||
mapping = valuesSource.globalOrdinalsMapping(ctx);
|
||||
// Dense mode doesn't support include/exclude so we don't have to check it here.
|
||||
if (singleValues != null) {
|
||||
segmentsWithSingleValuedOrds++;
|
||||
return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, segmentOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrd) throws IOException {
|
||||
assert owningBucketOrd == 0;
|
||||
if (singleValues.advanceExact(doc)) {
|
||||
final int ord = singleValues.ordValue();
|
||||
segmentDocCounts.increment(ord + 1, 1);
|
||||
if (false == singleValues.advanceExact(doc)) {
|
||||
return;
|
||||
}
|
||||
int ord = singleValues.ordValue();
|
||||
segmentDocCounts.increment(ord + 1, 1);
|
||||
}
|
||||
});
|
||||
}
|
||||
segmentsWithMultiValuedOrds++;
|
||||
return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, segmentOrds) {
|
||||
@Override
|
||||
public void collect(int doc, long owningBucketOrd) throws IOException {
|
||||
assert owningBucketOrd == 0;
|
||||
if (segmentOrds.advanceExact(doc)) {
|
||||
for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
|
||||
segmentDocCounts.increment(segmentOrd + 1, 1);
|
||||
}
|
||||
if (false == segmentOrds.advanceExact(doc)) {
|
||||
return;
|
||||
}
|
||||
for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
|
||||
segmentDocCounts.increment(segmentOrd + 1, 1);
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -306,52 +355,6 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
}
|
||||
}
|
||||
|
||||
private static final class FilteredOrdinals extends AbstractSortedSetDocValues {
|
||||
|
||||
private final SortedSetDocValues inner;
|
||||
private final LongPredicate accepted;
|
||||
|
||||
private FilteredOrdinals(SortedSetDocValues inner, LongPredicate accepted) {
|
||||
this.inner = inner;
|
||||
this.accepted = accepted;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueCount() {
|
||||
return inner.getValueCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef lookupOrd(long ord) throws IOException {
|
||||
return inner.lookupOrd(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() throws IOException {
|
||||
for (long ord = inner.nextOrd(); ord != NO_MORE_ORDS; ord = inner.nextOrd()) {
|
||||
if (accepted.test(ord)) {
|
||||
return ord;
|
||||
}
|
||||
}
|
||||
return NO_MORE_ORDS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
if (inner.advanceExact(target)) {
|
||||
for (long ord = inner.nextOrd(); ord != NO_MORE_ORDS; ord = inner.nextOrd()) {
|
||||
if (accepted.test(ord)) {
|
||||
// reset the iterator
|
||||
boolean advanced = inner.advanceExact(target);
|
||||
assert advanced;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Strategy for collecting global ordinals.
|
||||
* <p>
|
||||
@ -800,4 +803,9 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||
System.arraycopy(from.bytes, from.offset, to.bytes, 0, from.length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Predicate used for {@link #acceptedGlobalOrdinals} if there is no filter.
|
||||
*/
|
||||
private static final LongPredicate ALWAYS_TRUE = l -> true;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user