Optimization for expressions that hit a single long column. (#6599)

* Optimization for expressions that hit a single long column.

There was previously a single-long-input optimization that applied only
to the time column. These have been combined together. Also adds
type-specific value caching to ExprEval, which allowed simplifying
the SingleLongInputCachingExpressionColumnValueSelector code.

* Add more benchmarks.

* Don't use LRU cache for __time.

* Simplify a bit.

* Let the cache grow.
This commit is contained in:
Gian Merlino 2018-11-13 09:36:32 -08:00 committed by GitHub
parent e326086604
commit 52f6bdc1eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 282 additions and 81 deletions

View File

@ -80,7 +80,16 @@ public class ExpressionSelectorBenchmark
{
final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo(
ImmutableList.of(
BenchmarkColumnSchema.makeNormal("n", ValueType.LONG, false, 1, 0d, 0d, 10000d, false),
BenchmarkColumnSchema.makeZipf(
"n",
ValueType.LONG,
false,
1,
0d,
1000,
10000,
3d
),
BenchmarkColumnSchema.makeZipf(
"s",
ValueType.STRING,
@ -146,10 +155,7 @@ public class ExpressionSelectorBenchmark
final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
while (!cursor.isDone()) {
blackhole.consume(selector.getLong());
cursor.advance();
}
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();
@ -219,6 +225,71 @@ public class ExpressionSelectorBenchmark
blackhole.consume(count);
}
@Benchmark
public void timeFormatUsingExpression(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"timestamp_format(__time, 'yyyy-MM-dd')",
ValueType.STRING,
TestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);
final List<?> results = cursors
.map(cursor -> {
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(
DefaultDimensionSpec.of("v")
);
consumeDimension(cursor, selector, blackhole);
return null;
})
.toList();
blackhole.consume(results);
}
@Benchmark
public void timeFormatUsingExtractionFn(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
final List<?> results = cursors
.map(cursor -> {
final DimensionSelector selector = cursor
.getColumnSelectorFactory()
.makeDimensionSelector(
new ExtractionDimensionSpec(
ColumnHolder.TIME_COLUMN_NAME,
"v",
new TimeFormatExtractionFn("yyyy-MM-dd", null, null, null, false)
)
);
consumeDimension(cursor, selector, blackhole);
return null;
})
.toList();
blackhole.consume(results);
}
@Benchmark
public void strlenUsingExpressionAsLong(Blackhole blackhole)
{
@ -312,6 +383,70 @@ public class ExpressionSelectorBenchmark
blackhole.consume(results);
}
@Benchmark
public void arithmeticOnLong(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"n + 1",
ValueType.LONG,
TestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);
final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();
blackhole.consume(results);
}
@Benchmark
public void stringConcatAndCompareOnLong(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"concat(n, ' is my favorite number') == '3 is my favorite number'",
ValueType.LONG,
TestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);
final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();
blackhole.consume(results);
}
private void consumeDimension(final Cursor cursor, final DimensionSelector selector, final Blackhole blackhole)
{
if (selector.getValueCardinality() >= 0) {

View File

@ -27,6 +27,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.DoubleDimensionSchema;
import org.apache.druid.data.input.impl.FloatDimensionSchema;
import org.apache.druid.data.input.impl.LongDimensionSchema;
import org.apache.druid.data.input.impl.StringDimensionSchema;
@ -99,6 +100,9 @@ public class SegmentGenerator implements Closeable
case LONG:
dimensions.add(new LongDimensionSchema(columnSchema.getName()));
break;
case DOUBLE:
dimensions.add(new DoubleDimensionSchema(columnSchema.getName()));
break;
case FLOAT:
dimensions.add(new FloatDimensionSchema(columnSchema.getName()));
break;

View File

@ -30,6 +30,10 @@ import javax.annotation.Nullable;
*/
public abstract class ExprEval<T>
{
// Cached String values. Protected so they can be used by subclasses.
private boolean stringValueValid = false;
private String stringValue;
public static ExprEval ofLong(@Nullable Number longValue)
{
return new LongExprEval(longValue);
@ -89,7 +93,7 @@ public abstract class ExprEval<T>
@Nullable
final T value;
private ExprEval(T value)
private ExprEval(@Nullable T value)
{
this.value = value;
}
@ -115,7 +119,17 @@ public abstract class ExprEval<T>
@Nullable
public String asString()
{
return value == null ? null : String.valueOf(value);
if (!stringValueValid) {
if (value == null) {
stringValue = null;
} else {
stringValue = String.valueOf(value);
}
stringValueValid = true;
}
return stringValue;
}
public abstract boolean asBoolean();
@ -126,7 +140,6 @@ public abstract class ExprEval<T>
private abstract static class NumericExprEval extends ExprEval<Number>
{
private NumericExprEval(@Nullable Number value)
{
super(value);
@ -247,6 +260,16 @@ public abstract class ExprEval<T>
private static class StringExprEval extends ExprEval<String>
{
// Cached primitive values.
private boolean intValueValid = false;
private boolean longValueValid = false;
private boolean doubleValueValid = false;
private boolean booleanValueValid = false;
private int intValue;
private long longValue;
private double doubleValue;
private boolean booleanValue;
private static final StringExprEval OF_NULL = new StringExprEval(null);
private Number numericVal;
@ -263,9 +286,48 @@ public abstract class ExprEval<T>
}
@Override
public final int asInt()
public int asInt()
{
Number number = asNumber();
if (!intValueValid) {
intValue = computeInt();
intValueValid = true;
}
return intValue;
}
@Override
public long asLong()
{
if (!longValueValid) {
longValue = computeLong();
longValueValid = true;
}
return longValue;
}
@Override
public double asDouble()
{
if (!doubleValueValid) {
doubleValue = computeDouble();
doubleValueValid = true;
}
return doubleValue;
}
@Nullable
@Override
public String asString()
{
return value;
}
private int computeInt()
{
Number number = computeNumber();
if (number == null) {
assert NullHandling.replaceWithDefault();
return 0;
@ -273,10 +335,9 @@ public abstract class ExprEval<T>
return number.intValue();
}
@Override
public final long asLong()
private long computeLong()
{
Number number = asNumber();
Number number = computeNumber();
if (number == null) {
assert NullHandling.replaceWithDefault();
return 0L;
@ -284,10 +345,9 @@ public abstract class ExprEval<T>
return number.longValue();
}
@Override
public final double asDouble()
private double computeDouble()
{
Number number = asNumber();
Number number = computeNumber();
if (number == null) {
assert NullHandling.replaceWithDefault();
return 0.0d;
@ -296,7 +356,7 @@ public abstract class ExprEval<T>
}
@Nullable
private Number asNumber()
private Number computeNumber()
{
if (value == null) {
return null;
@ -321,13 +381,18 @@ public abstract class ExprEval<T>
@Override
public boolean isNumericNull()
{
return asNumber() == null;
return computeNumber() == null;
}
@Override
public final boolean asBoolean()
{
return Evals.asBoolean(value);
if (!booleanValueValid) {
booleanValue = Evals.asBoolean(value);
booleanValueValid = true;
}
return booleanValue;
}
@Override
@ -335,9 +400,9 @@ public abstract class ExprEval<T>
{
switch (castTo) {
case DOUBLE:
return ExprEval.ofDouble(asNumber());
return ExprEval.ofDouble(computeNumber());
case LONG:
return ExprEval.ofLong(asNumber());
return ExprEval.ofLong(computeNumber());
case STRING:
return this;
}

View File

@ -137,12 +137,12 @@ public class ExpressionSelectors
final String column = Iterables.getOnlyElement(columns);
final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column);
if (column.equals(ColumnHolder.TIME_COLUMN_NAME)) {
// Optimization for expressions that hit the __time column and nothing else.
// May be worth applying this optimization to all long columns?
if (capabilities != null && capabilities.getType() == ValueType.LONG) {
// Optimization for expressions that hit one long column and nothing else.
return new SingleLongInputCachingExpressionColumnValueSelector(
columnSelectorFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME),
expression
columnSelectorFactory.makeColumnValueSelector(column),
expression,
!ColumnHolder.TIME_COLUMN_NAME.equals(column) // __time doesn't need an LRU cache since it is sorted.
);
} else if (capabilities != null
&& capabilities.getType() == ValueType.STRING

View File

@ -20,6 +20,7 @@
package org.apache.druid.segment.virtual;
import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.longs.Long2ObjectLinkedOpenHashMap;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
@ -28,6 +29,7 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.ColumnValueSelector;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/**
* Like {@link ExpressionColumnValueSelector}, but caches the most recently computed value and re-uses it in the case
@ -35,30 +37,26 @@ import javax.annotation.Nonnull;
*/
public class SingleLongInputCachingExpressionColumnValueSelector implements ColumnValueSelector<ExprEval>
{
enum Validity
{
NONE,
DOUBLE,
LONG,
EVAL
}
private static final int CACHE_SIZE = 1000;
private final ColumnValueSelector selector;
private final Expr expression;
private final SingleInputBindings bindings = new SingleInputBindings();
// Last read input value
@Nullable
private final LruEvalCache lruEvalCache;
// Last read input value.
private long lastInput;
// Last computed output values (validity determined by "validity" field)
private Validity validity = Validity.NONE;
private double lastDoubleOutput;
private long lastLongOutput;
private ExprEval lastEvalOutput;
// Last computed output value, or null if there is none.
@Nullable
private ExprEval lastOutput;
public SingleLongInputCachingExpressionColumnValueSelector(
final ColumnValueSelector selector,
final Expr expression
final Expr expression,
final boolean useLruCache
)
{
// Verify expression has just one binding.
@ -68,6 +66,7 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
this.selector = Preconditions.checkNotNull(selector, "selector");
this.expression = Preconditions.checkNotNull(expression, "expression");
this.lruEvalCache = useLruCache ? new LruEvalCache() : null;
}
@Override
@ -80,59 +79,41 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
@Override
public double getDouble()
{
// No Assert for null handling as delegate selector already have it.
final long currentInput = selector.getLong();
if (lastInput == currentInput && validity == Validity.DOUBLE) {
return lastDoubleOutput;
} else {
final double output = eval(currentInput).asDouble();
lastInput = currentInput;
lastDoubleOutput = output;
validity = Validity.DOUBLE;
return output;
}
return getObject().asDouble();
}
@Override
public float getFloat()
{
// No Assert for null handling as delegate selector already have it.
return (float) getDouble();
return (float) getObject().asDouble();
}
@Override
public long getLong()
{
// No Assert for null handling as delegate selector already have it.
final long currentInput = selector.getLong();
if (lastInput == currentInput && validity == Validity.LONG) {
return lastLongOutput;
} else {
final long output = eval(currentInput).asLong();
lastInput = currentInput;
lastLongOutput = output;
validity = Validity.LONG;
return output;
}
return getObject().asLong();
}
@Nonnull
@Override
public ExprEval getObject()
{
final long currentInput = selector.getLong();
// No assert for null handling, as the delegate selector already has it.
final long input = selector.getLong();
final boolean cached = input == lastInput && lastOutput != null;
if (lastInput == currentInput && validity == Validity.EVAL) {
return lastEvalOutput;
} else {
final ExprEval output = eval(currentInput);
lastInput = currentInput;
lastEvalOutput = output;
validity = Validity.EVAL;
return output;
if (!cached) {
if (lruEvalCache == null) {
bindings.set(input);
lastOutput = expression.eval(bindings);
} else {
lastOutput = lruEvalCache.compute(input);
}
lastInput = input;
}
return lastOutput;
}
@Override
@ -141,12 +122,6 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
return ExprEval.class;
}
private ExprEval eval(final long value)
{
bindings.set(value);
return expression.eval(bindings);
}
@Override
public boolean isNull()
{
@ -155,4 +130,26 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
// ExprEval.isNumericNull checks whether the parsed primitive value is null or not.
return getObject().isNumericNull();
}
public class LruEvalCache
{
private final Long2ObjectLinkedOpenHashMap<ExprEval> m = new Long2ObjectLinkedOpenHashMap<>();
public ExprEval compute(final long n)
{
ExprEval value = m.getAndMoveToFirst(n);
if (value == null) {
bindings.set(n);
value = expression.eval(bindings);
m.putAndMoveToFirst(n, value);
if (m.size() > CACHE_SIZE) {
m.removeLast();
}
}
return value;
}
}
}