I've always been confused by the strange behavior that I saw when working on #57304. Specifically, I saw switching from a bimorphic invocation to a monomorphic invocation to give us a 7%-15% performance bump. This felt *bonkers* to me. And, it also made me wonder whether it'd be worth looking into doing it everywhere. It turns out that, no, it isn't needed everywhere. This benchmark shows that a bimorphic invocation like: ``` LongKeyedBucketOrds ords = new LongKeyedBucketOrds.ForSingle(); ords.add(0, 0); <------ this line ``` is 19% slower than a monomorphic invocation like: ``` LongKeyedBucketOrds.ForSingle ords = new LongKeyedBucketOrds.ForSingle(); ords.add(0, 0); <------ this line ``` But *only* when the reference is mutable. In the example above, if `ords` is never changed then both perform the same. But if the `ords` reference is assigned twice then we start to see the difference: ``` immutable bimorphic avgt 10 6.468 ± 0.045 ns/op immutable monomorphic avgt 10 6.756 ± 0.026 ns/op mutable bimorphic avgt 10 9.741 ± 0.073 ns/op mutable monomorphic avgt 10 8.190 ± 0.016 ns/op ``` So the conclusion from all this is that we've done the right thing: `auto_date_histogram` is the only aggregation in which `ords` isn't final and it is the only aggregation that forces monomorphic invocations. All other aggregations use an immutable bimorphic invocation. Which is fine. Relates to #56487
This commit is contained in:
parent
db89764539
commit
81cba796e6
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.benchmark.search.aggregations.bucket.terms;
|
||||
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.PageCacheRecycler;
|
||||
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
|
||||
import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OperationsPerInvocation;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Fork(2)
|
||||
@Warmup(iterations = 10)
|
||||
@Measurement(iterations = 5)
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@OperationsPerInvocation(1_000_000)
|
||||
@State(Scope.Benchmark)
|
||||
public class LongKeyedBucketOrdsBenchmark {
|
||||
private static final long LIMIT = 1_000_000;
|
||||
/**
|
||||
* The number of distinct values to add to the buckets.
|
||||
*/
|
||||
private static final long DISTINCT_VALUES = 10;
|
||||
/**
|
||||
* The number of buckets to create in the {@link #multiBucket} case.
|
||||
* <p>
|
||||
* If this is not relatively prime to {@link #DISTINCT_VALUES} then the
|
||||
* values won't be scattered evenly across the buckets.
|
||||
*/
|
||||
private static final long DISTINCT_BUCKETS = 21;
|
||||
|
||||
private final PageCacheRecycler recycler = new PageCacheRecycler(Settings.EMPTY);
|
||||
private final BigArrays bigArrays = new BigArrays(recycler, null, "REQUEST");
|
||||
|
||||
/**
|
||||
* Force loading all of the implementations just for extra paranoia's sake.
|
||||
* We really don't want the JVM to be able to eliminate one of them just
|
||||
* because we don't use it in the particular benchmark. That is totally a
|
||||
* thing it'd do. It is sneaky.
|
||||
*/
|
||||
@Setup
|
||||
public void forceLoadClasses(Blackhole bh) {
|
||||
bh.consume(LongKeyedBucketOrds.FromSingle.class);
|
||||
bh.consume(LongKeyedBucketOrds.FromMany.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emulates a way that we do <strong>not</strong> use {@link LongKeyedBucketOrds}
|
||||
* because it is not needed.
|
||||
*/
|
||||
@Benchmark
|
||||
public void singleBucketIntoSingleImmutableMonmorphicInvocation(Blackhole bh) {
|
||||
try (LongKeyedBucketOrds.FromSingle ords = new LongKeyedBucketOrds.FromSingle(bigArrays)) {
|
||||
for (long i = 0; i < LIMIT; i++) {
|
||||
ords.add(0, i % DISTINCT_VALUES);
|
||||
}
|
||||
bh.consume(ords);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emulates the way that most aggregations use {@link LongKeyedBucketOrds}.
|
||||
*/
|
||||
@Benchmark
|
||||
public void singleBucketIntoSingleImmutableBimorphicInvocation(Blackhole bh) {
|
||||
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE)) {
|
||||
for (long i = 0; i < LIMIT; i++) {
|
||||
ords.add(0, i % DISTINCT_VALUES);
|
||||
}
|
||||
bh.consume(ords);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emulates the way that {@link AutoDateHistogramAggregationBuilder} uses {@link LongKeyedBucketOrds}.
|
||||
*/
|
||||
@Benchmark
|
||||
public void singleBucketIntoSingleMutableMonmorphicInvocation(Blackhole bh) {
|
||||
LongKeyedBucketOrds.FromSingle ords = new LongKeyedBucketOrds.FromSingle(bigArrays);
|
||||
for (long i = 0; i < LIMIT; i++) {
|
||||
if (i % 100_000 == 0) {
|
||||
ords.close();
|
||||
bh.consume(ords);
|
||||
ords = new LongKeyedBucketOrds.FromSingle(bigArrays);
|
||||
}
|
||||
ords.add(0, i % DISTINCT_VALUES);
|
||||
}
|
||||
bh.consume(ords);
|
||||
ords.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Emulates a way that we do <strong>not</strong> use {@link LongKeyedBucketOrds}
|
||||
* because it is significantly slower than the
|
||||
* {@link #singleBucketIntoSingleMutableMonmorphicInvocation monomorphic invocation}.
|
||||
*/
|
||||
@Benchmark
|
||||
public void singleBucketIntoSingleMutableBimorphicInvocation(Blackhole bh) {
|
||||
LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE);
|
||||
for (long i = 0; i < LIMIT; i++) {
|
||||
if (i % 100_000 == 0) {
|
||||
ords.close();
|
||||
bh.consume(ords);
|
||||
ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.ONE);
|
||||
}
|
||||
ords.add(0, i % DISTINCT_VALUES);
|
||||
|
||||
}
|
||||
bh.consume(ords);
|
||||
ords.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Emulates an aggregation that collects from a single bucket "by accident".
|
||||
* This can happen if an aggregation is under, say, a {@code terms}
|
||||
* aggregation and there is only a single value for that term in the index.
|
||||
*/
|
||||
@Benchmark
|
||||
public void singleBucketIntoMulti(Blackhole bh) {
|
||||
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) {
|
||||
for (long i = 0; i < LIMIT; i++) {
|
||||
ords.add(0, i % DISTINCT_VALUES);
|
||||
}
|
||||
bh.consume(ords);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emulates an aggregation that collects from many buckets.
|
||||
*/
|
||||
@Benchmark
|
||||
public void multiBucket(Blackhole bh) {
|
||||
try (LongKeyedBucketOrds ords = LongKeyedBucketOrds.build(bigArrays, CardinalityUpperBound.MANY)) {
|
||||
for (long i = 0; i < LIMIT; i++) {
|
||||
ords.add(i % DISTINCT_BUCKETS, i % DISTINCT_VALUES);
|
||||
}
|
||||
bh.consume(ords);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue