Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466)

* Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize

* Remove print

* Move benchmark, add header
This commit is contained in:
Jonathan Wei 2019-09-04 20:26:04 -07:00 committed by Fangjin Yang
parent de18840412
commit f36fd73f60
2 changed files with 86 additions and 4 deletions

View File

@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.benchmark.indexing;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.segment.StringDimensionIndexer;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
public class StringDimensionIndexerBenchmark
{
StringDimensionIndexer indexer;
int[] exampleArray;
@Param({"10000"})
public int cardinality;
@Param({"8"})
public int rowSize;
@Setup
public void setup()
{
indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true);
for (int i = 0; i < cardinality; i++) {
indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true);
}
exampleArray = new int[rowSize];
int stride = cardinality / rowSize;
for (int i = 0; i < rowSize; i++) {
exampleArray[i] = i * stride;
}
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void estimateEncodedKeyComponentSize(Blackhole blackhole)
{
long sz = indexer.estimateEncodedKeyComponentSize(exampleArray);
blackhole.consume(sz);
}
}

View File

@ -308,10 +308,14 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
// even though they are stored just once. It may overestimate the size by a bit, but we wanted to leave // even though they are stored just once. It may overestimate the size by a bit, but we wanted to leave
// more buffer to be safe // more buffer to be safe
long estimatedSize = key.length * Integer.BYTES; long estimatedSize = key.length * Integer.BYTES;
estimatedSize += Arrays.stream(key) long totalChars = 0;
.filter(element -> dimLookup.getValue(element) != null) for (int element : key) {
.mapToLong(element -> dimLookup.getValue(element).length() * Character.BYTES) String val = dimLookup.getValue(element);
.sum(); if (val != null) {
totalChars += val.length();
}
}
estimatedSize += totalChars * Character.BYTES;
return estimatedSize; return estimatedSize;
} }