mirror of https://github.com/apache/druid.git
Optimization for expressions that hit a single long column. (#6599)
* Optimization for expressions that hit a single long column. There was previously a single-long-input optimization that applied only to the time column. These have been combined together. Also adds type-specific value caching to ExprEval, which allowed simplifying the SingleLongInputCachingExpressionColumnValueSelector code. * Add more benchmarks. * Don't use LRU cache for __time. * Simplify a bit. * Let the cache grow.
This commit is contained in:
parent
e326086604
commit
52f6bdc1eb
|
@ -80,7 +80,16 @@ public class ExpressionSelectorBenchmark
|
|||
{
|
||||
final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo(
|
||||
ImmutableList.of(
|
||||
BenchmarkColumnSchema.makeNormal("n", ValueType.LONG, false, 1, 0d, 0d, 10000d, false),
|
||||
BenchmarkColumnSchema.makeZipf(
|
||||
"n",
|
||||
ValueType.LONG,
|
||||
false,
|
||||
1,
|
||||
0d,
|
||||
1000,
|
||||
10000,
|
||||
3d
|
||||
),
|
||||
BenchmarkColumnSchema.makeZipf(
|
||||
"s",
|
||||
ValueType.STRING,
|
||||
|
@ -146,10 +155,7 @@ public class ExpressionSelectorBenchmark
|
|||
final List<?> results = cursors
|
||||
.map(cursor -> {
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
while (!cursor.isDone()) {
|
||||
blackhole.consume(selector.getLong());
|
||||
cursor.advance();
|
||||
}
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
return null;
|
||||
})
|
||||
.toList();
|
||||
|
@ -219,6 +225,71 @@ public class ExpressionSelectorBenchmark
|
|||
blackhole.consume(count);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void timeFormatUsingExpression(Blackhole blackhole)
|
||||
{
|
||||
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
|
||||
null,
|
||||
index.getDataInterval(),
|
||||
VirtualColumns.create(
|
||||
ImmutableList.of(
|
||||
new ExpressionVirtualColumn(
|
||||
"v",
|
||||
"timestamp_format(__time, 'yyyy-MM-dd')",
|
||||
ValueType.STRING,
|
||||
TestExprMacroTable.INSTANCE
|
||||
)
|
||||
)
|
||||
),
|
||||
Granularities.ALL,
|
||||
false,
|
||||
null
|
||||
);
|
||||
|
||||
final List<?> results = cursors
|
||||
.map(cursor -> {
|
||||
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(
|
||||
DefaultDimensionSpec.of("v")
|
||||
);
|
||||
consumeDimension(cursor, selector, blackhole);
|
||||
return null;
|
||||
})
|
||||
.toList();
|
||||
|
||||
blackhole.consume(results);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void timeFormatUsingExtractionFn(Blackhole blackhole)
|
||||
{
|
||||
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
|
||||
null,
|
||||
index.getDataInterval(),
|
||||
VirtualColumns.EMPTY,
|
||||
Granularities.ALL,
|
||||
false,
|
||||
null
|
||||
);
|
||||
|
||||
final List<?> results = cursors
|
||||
.map(cursor -> {
|
||||
final DimensionSelector selector = cursor
|
||||
.getColumnSelectorFactory()
|
||||
.makeDimensionSelector(
|
||||
new ExtractionDimensionSpec(
|
||||
ColumnHolder.TIME_COLUMN_NAME,
|
||||
"v",
|
||||
new TimeFormatExtractionFn("yyyy-MM-dd", null, null, null, false)
|
||||
)
|
||||
);
|
||||
consumeDimension(cursor, selector, blackhole);
|
||||
return null;
|
||||
})
|
||||
.toList();
|
||||
|
||||
blackhole.consume(results);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void strlenUsingExpressionAsLong(Blackhole blackhole)
|
||||
{
|
||||
|
@ -312,6 +383,70 @@ public class ExpressionSelectorBenchmark
|
|||
blackhole.consume(results);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void arithmeticOnLong(Blackhole blackhole)
|
||||
{
|
||||
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
|
||||
null,
|
||||
index.getDataInterval(),
|
||||
VirtualColumns.create(
|
||||
ImmutableList.of(
|
||||
new ExpressionVirtualColumn(
|
||||
"v",
|
||||
"n + 1",
|
||||
ValueType.LONG,
|
||||
TestExprMacroTable.INSTANCE
|
||||
)
|
||||
)
|
||||
),
|
||||
Granularities.ALL,
|
||||
false,
|
||||
null
|
||||
);
|
||||
|
||||
final List<?> results = cursors
|
||||
.map(cursor -> {
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
return null;
|
||||
})
|
||||
.toList();
|
||||
|
||||
blackhole.consume(results);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void stringConcatAndCompareOnLong(Blackhole blackhole)
|
||||
{
|
||||
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
|
||||
null,
|
||||
index.getDataInterval(),
|
||||
VirtualColumns.create(
|
||||
ImmutableList.of(
|
||||
new ExpressionVirtualColumn(
|
||||
"v",
|
||||
"concat(n, ' is my favorite number') == '3 is my favorite number'",
|
||||
ValueType.LONG,
|
||||
TestExprMacroTable.INSTANCE
|
||||
)
|
||||
)
|
||||
),
|
||||
Granularities.ALL,
|
||||
false,
|
||||
null
|
||||
);
|
||||
|
||||
final List<?> results = cursors
|
||||
.map(cursor -> {
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
return null;
|
||||
})
|
||||
.toList();
|
||||
|
||||
blackhole.consume(results);
|
||||
}
|
||||
|
||||
private void consumeDimension(final Cursor cursor, final DimensionSelector selector, final Blackhole blackhole)
|
||||
{
|
||||
if (selector.getValueCardinality() >= 0) {
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.commons.io.FileUtils;
|
|||
import org.apache.druid.data.input.InputRow;
|
||||
import org.apache.druid.data.input.impl.DimensionSchema;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.DoubleDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.FloatDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.LongDimensionSchema;
|
||||
import org.apache.druid.data.input.impl.StringDimensionSchema;
|
||||
|
@ -99,6 +100,9 @@ public class SegmentGenerator implements Closeable
|
|||
case LONG:
|
||||
dimensions.add(new LongDimensionSchema(columnSchema.getName()));
|
||||
break;
|
||||
case DOUBLE:
|
||||
dimensions.add(new DoubleDimensionSchema(columnSchema.getName()));
|
||||
break;
|
||||
case FLOAT:
|
||||
dimensions.add(new FloatDimensionSchema(columnSchema.getName()));
|
||||
break;
|
||||
|
|
|
@ -30,6 +30,10 @@ import javax.annotation.Nullable;
|
|||
*/
|
||||
public abstract class ExprEval<T>
|
||||
{
|
||||
// Cached String values. Protected so they can be used by subclasses.
|
||||
private boolean stringValueValid = false;
|
||||
private String stringValue;
|
||||
|
||||
public static ExprEval ofLong(@Nullable Number longValue)
|
||||
{
|
||||
return new LongExprEval(longValue);
|
||||
|
@ -89,7 +93,7 @@ public abstract class ExprEval<T>
|
|||
@Nullable
|
||||
final T value;
|
||||
|
||||
private ExprEval(T value)
|
||||
private ExprEval(@Nullable T value)
|
||||
{
|
||||
this.value = value;
|
||||
}
|
||||
|
@ -115,7 +119,17 @@ public abstract class ExprEval<T>
|
|||
@Nullable
|
||||
public String asString()
|
||||
{
|
||||
return value == null ? null : String.valueOf(value);
|
||||
if (!stringValueValid) {
|
||||
if (value == null) {
|
||||
stringValue = null;
|
||||
} else {
|
||||
stringValue = String.valueOf(value);
|
||||
}
|
||||
|
||||
stringValueValid = true;
|
||||
}
|
||||
|
||||
return stringValue;
|
||||
}
|
||||
|
||||
public abstract boolean asBoolean();
|
||||
|
@ -126,7 +140,6 @@ public abstract class ExprEval<T>
|
|||
|
||||
private abstract static class NumericExprEval extends ExprEval<Number>
|
||||
{
|
||||
|
||||
private NumericExprEval(@Nullable Number value)
|
||||
{
|
||||
super(value);
|
||||
|
@ -247,6 +260,16 @@ public abstract class ExprEval<T>
|
|||
|
||||
private static class StringExprEval extends ExprEval<String>
|
||||
{
|
||||
// Cached primitive values.
|
||||
private boolean intValueValid = false;
|
||||
private boolean longValueValid = false;
|
||||
private boolean doubleValueValid = false;
|
||||
private boolean booleanValueValid = false;
|
||||
private int intValue;
|
||||
private long longValue;
|
||||
private double doubleValue;
|
||||
private boolean booleanValue;
|
||||
|
||||
private static final StringExprEval OF_NULL = new StringExprEval(null);
|
||||
|
||||
private Number numericVal;
|
||||
|
@ -263,9 +286,48 @@ public abstract class ExprEval<T>
|
|||
}
|
||||
|
||||
@Override
|
||||
public final int asInt()
|
||||
public int asInt()
|
||||
{
|
||||
Number number = asNumber();
|
||||
if (!intValueValid) {
|
||||
intValue = computeInt();
|
||||
intValueValid = true;
|
||||
}
|
||||
|
||||
return intValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long asLong()
|
||||
{
|
||||
if (!longValueValid) {
|
||||
longValue = computeLong();
|
||||
longValueValid = true;
|
||||
}
|
||||
|
||||
return longValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double asDouble()
|
||||
{
|
||||
if (!doubleValueValid) {
|
||||
doubleValue = computeDouble();
|
||||
doubleValueValid = true;
|
||||
}
|
||||
|
||||
return doubleValue;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String asString()
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
private int computeInt()
|
||||
{
|
||||
Number number = computeNumber();
|
||||
if (number == null) {
|
||||
assert NullHandling.replaceWithDefault();
|
||||
return 0;
|
||||
|
@ -273,10 +335,9 @@ public abstract class ExprEval<T>
|
|||
return number.intValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final long asLong()
|
||||
private long computeLong()
|
||||
{
|
||||
Number number = asNumber();
|
||||
Number number = computeNumber();
|
||||
if (number == null) {
|
||||
assert NullHandling.replaceWithDefault();
|
||||
return 0L;
|
||||
|
@ -284,10 +345,9 @@ public abstract class ExprEval<T>
|
|||
return number.longValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final double asDouble()
|
||||
private double computeDouble()
|
||||
{
|
||||
Number number = asNumber();
|
||||
Number number = computeNumber();
|
||||
if (number == null) {
|
||||
assert NullHandling.replaceWithDefault();
|
||||
return 0.0d;
|
||||
|
@ -296,7 +356,7 @@ public abstract class ExprEval<T>
|
|||
}
|
||||
|
||||
@Nullable
|
||||
private Number asNumber()
|
||||
private Number computeNumber()
|
||||
{
|
||||
if (value == null) {
|
||||
return null;
|
||||
|
@ -321,13 +381,18 @@ public abstract class ExprEval<T>
|
|||
@Override
|
||||
public boolean isNumericNull()
|
||||
{
|
||||
return asNumber() == null;
|
||||
return computeNumber() == null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean asBoolean()
|
||||
{
|
||||
return Evals.asBoolean(value);
|
||||
if (!booleanValueValid) {
|
||||
booleanValue = Evals.asBoolean(value);
|
||||
booleanValueValid = true;
|
||||
}
|
||||
|
||||
return booleanValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -335,9 +400,9 @@ public abstract class ExprEval<T>
|
|||
{
|
||||
switch (castTo) {
|
||||
case DOUBLE:
|
||||
return ExprEval.ofDouble(asNumber());
|
||||
return ExprEval.ofDouble(computeNumber());
|
||||
case LONG:
|
||||
return ExprEval.ofLong(asNumber());
|
||||
return ExprEval.ofLong(computeNumber());
|
||||
case STRING:
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -137,12 +137,12 @@ public class ExpressionSelectors
|
|||
final String column = Iterables.getOnlyElement(columns);
|
||||
final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column);
|
||||
|
||||
if (column.equals(ColumnHolder.TIME_COLUMN_NAME)) {
|
||||
// Optimization for expressions that hit the __time column and nothing else.
|
||||
// May be worth applying this optimization to all long columns?
|
||||
if (capabilities != null && capabilities.getType() == ValueType.LONG) {
|
||||
// Optimization for expressions that hit one long column and nothing else.
|
||||
return new SingleLongInputCachingExpressionColumnValueSelector(
|
||||
columnSelectorFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME),
|
||||
expression
|
||||
columnSelectorFactory.makeColumnValueSelector(column),
|
||||
expression,
|
||||
!ColumnHolder.TIME_COLUMN_NAME.equals(column) // __time doesn't need an LRU cache since it is sorted.
|
||||
);
|
||||
} else if (capabilities != null
|
||||
&& capabilities.getType() == ValueType.STRING
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.apache.druid.segment.virtual;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import it.unimi.dsi.fastutil.longs.Long2ObjectLinkedOpenHashMap;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.math.expr.Expr;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
|
@ -28,6 +29,7 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
|||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/**
|
||||
* Like {@link ExpressionColumnValueSelector}, but caches the most recently computed value and re-uses it in the case
|
||||
|
@ -35,30 +37,26 @@ import javax.annotation.Nonnull;
|
|||
*/
|
||||
public class SingleLongInputCachingExpressionColumnValueSelector implements ColumnValueSelector<ExprEval>
|
||||
{
|
||||
enum Validity
|
||||
{
|
||||
NONE,
|
||||
DOUBLE,
|
||||
LONG,
|
||||
EVAL
|
||||
}
|
||||
private static final int CACHE_SIZE = 1000;
|
||||
|
||||
private final ColumnValueSelector selector;
|
||||
private final Expr expression;
|
||||
private final SingleInputBindings bindings = new SingleInputBindings();
|
||||
|
||||
// Last read input value
|
||||
@Nullable
|
||||
private final LruEvalCache lruEvalCache;
|
||||
|
||||
// Last read input value.
|
||||
private long lastInput;
|
||||
|
||||
// Last computed output values (validity determined by "validity" field)
|
||||
private Validity validity = Validity.NONE;
|
||||
private double lastDoubleOutput;
|
||||
private long lastLongOutput;
|
||||
private ExprEval lastEvalOutput;
|
||||
// Last computed output value, or null if there is none.
|
||||
@Nullable
|
||||
private ExprEval lastOutput;
|
||||
|
||||
public SingleLongInputCachingExpressionColumnValueSelector(
|
||||
final ColumnValueSelector selector,
|
||||
final Expr expression
|
||||
final Expr expression,
|
||||
final boolean useLruCache
|
||||
)
|
||||
{
|
||||
// Verify expression has just one binding.
|
||||
|
@ -68,6 +66,7 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
|
|||
|
||||
this.selector = Preconditions.checkNotNull(selector, "selector");
|
||||
this.expression = Preconditions.checkNotNull(expression, "expression");
|
||||
this.lruEvalCache = useLruCache ? new LruEvalCache() : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -80,59 +79,41 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
|
|||
@Override
|
||||
public double getDouble()
|
||||
{
|
||||
// No Assert for null handling as delegate selector already have it.
|
||||
final long currentInput = selector.getLong();
|
||||
|
||||
if (lastInput == currentInput && validity == Validity.DOUBLE) {
|
||||
return lastDoubleOutput;
|
||||
} else {
|
||||
final double output = eval(currentInput).asDouble();
|
||||
lastInput = currentInput;
|
||||
lastDoubleOutput = output;
|
||||
validity = Validity.DOUBLE;
|
||||
return output;
|
||||
}
|
||||
return getObject().asDouble();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat()
|
||||
{
|
||||
// No Assert for null handling as delegate selector already have it.
|
||||
return (float) getDouble();
|
||||
return (float) getObject().asDouble();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLong()
|
||||
{
|
||||
// No Assert for null handling as delegate selector already have it.
|
||||
final long currentInput = selector.getLong();
|
||||
|
||||
if (lastInput == currentInput && validity == Validity.LONG) {
|
||||
return lastLongOutput;
|
||||
} else {
|
||||
final long output = eval(currentInput).asLong();
|
||||
lastInput = currentInput;
|
||||
lastLongOutput = output;
|
||||
validity = Validity.LONG;
|
||||
return output;
|
||||
}
|
||||
return getObject().asLong();
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public ExprEval getObject()
|
||||
{
|
||||
final long currentInput = selector.getLong();
|
||||
// No assert for null handling, as the delegate selector already has it.
|
||||
final long input = selector.getLong();
|
||||
final boolean cached = input == lastInput && lastOutput != null;
|
||||
|
||||
if (lastInput == currentInput && validity == Validity.EVAL) {
|
||||
return lastEvalOutput;
|
||||
if (!cached) {
|
||||
if (lruEvalCache == null) {
|
||||
bindings.set(input);
|
||||
lastOutput = expression.eval(bindings);
|
||||
} else {
|
||||
final ExprEval output = eval(currentInput);
|
||||
lastInput = currentInput;
|
||||
lastEvalOutput = output;
|
||||
validity = Validity.EVAL;
|
||||
return output;
|
||||
lastOutput = lruEvalCache.compute(input);
|
||||
}
|
||||
|
||||
lastInput = input;
|
||||
}
|
||||
|
||||
return lastOutput;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -141,12 +122,6 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
|
|||
return ExprEval.class;
|
||||
}
|
||||
|
||||
private ExprEval eval(final long value)
|
||||
{
|
||||
bindings.set(value);
|
||||
return expression.eval(bindings);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isNull()
|
||||
{
|
||||
|
@ -155,4 +130,26 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
|
|||
// ExprEval.isNumericNull checks whether the parsed primitive value is null or not.
|
||||
return getObject().isNumericNull();
|
||||
}
|
||||
|
||||
public class LruEvalCache
|
||||
{
|
||||
private final Long2ObjectLinkedOpenHashMap<ExprEval> m = new Long2ObjectLinkedOpenHashMap<>();
|
||||
|
||||
public ExprEval compute(final long n)
|
||||
{
|
||||
ExprEval value = m.getAndMoveToFirst(n);
|
||||
|
||||
if (value == null) {
|
||||
bindings.set(n);
|
||||
value = expression.eval(bindings);
|
||||
m.putAndMoveToFirst(n, value);
|
||||
|
||||
if (m.size() > CACHE_SIZE) {
|
||||
m.removeLast();
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue