Optimization for expressions that hit a single long column. (#6599)

* Optimization for expressions that hit a single long column.

There was previously a single-long-input optimization that applied only
to the time column. These have been combined together. Also adds
type-specific value caching to ExprEval, which allowed simplifying
the SingleLongInputCachingExpressionColumnValueSelector code.

* Add more benchmarks.

* Don't use LRU cache for __time.

* Simplify a bit.

* Let the cache grow.
This commit is contained in:
Gian Merlino 2018-11-13 09:36:32 -08:00 committed by GitHub
parent e326086604
commit 52f6bdc1eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 282 additions and 81 deletions

View File

@ -80,7 +80,16 @@ public class ExpressionSelectorBenchmark
{ {
final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo( final BenchmarkSchemaInfo schemaInfo = new BenchmarkSchemaInfo(
ImmutableList.of( ImmutableList.of(
BenchmarkColumnSchema.makeNormal("n", ValueType.LONG, false, 1, 0d, 0d, 10000d, false), BenchmarkColumnSchema.makeZipf(
"n",
ValueType.LONG,
false,
1,
0d,
1000,
10000,
3d
),
BenchmarkColumnSchema.makeZipf( BenchmarkColumnSchema.makeZipf(
"s", "s",
ValueType.STRING, ValueType.STRING,
@ -146,10 +155,7 @@ public class ExpressionSelectorBenchmark
final List<?> results = cursors final List<?> results = cursors
.map(cursor -> { .map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
while (!cursor.isDone()) { consumeLong(cursor, selector, blackhole);
blackhole.consume(selector.getLong());
cursor.advance();
}
return null; return null;
}) })
.toList(); .toList();
@ -219,6 +225,71 @@ public class ExpressionSelectorBenchmark
blackhole.consume(count); blackhole.consume(count);
} }
@Benchmark
public void timeFormatUsingExpression(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"timestamp_format(__time, 'yyyy-MM-dd')",
ValueType.STRING,
TestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);
final List<?> results = cursors
.map(cursor -> {
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(
DefaultDimensionSpec.of("v")
);
consumeDimension(cursor, selector, blackhole);
return null;
})
.toList();
blackhole.consume(results);
}
@Benchmark
public void timeFormatUsingExtractionFn(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
final List<?> results = cursors
.map(cursor -> {
final DimensionSelector selector = cursor
.getColumnSelectorFactory()
.makeDimensionSelector(
new ExtractionDimensionSpec(
ColumnHolder.TIME_COLUMN_NAME,
"v",
new TimeFormatExtractionFn("yyyy-MM-dd", null, null, null, false)
)
);
consumeDimension(cursor, selector, blackhole);
return null;
})
.toList();
blackhole.consume(results);
}
@Benchmark @Benchmark
public void strlenUsingExpressionAsLong(Blackhole blackhole) public void strlenUsingExpressionAsLong(Blackhole blackhole)
{ {
@ -312,6 +383,70 @@ public class ExpressionSelectorBenchmark
blackhole.consume(results); blackhole.consume(results);
} }
@Benchmark
public void arithmeticOnLong(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"n + 1",
ValueType.LONG,
TestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);
final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();
blackhole.consume(results);
}
@Benchmark
public void stringConcatAndCompareOnLong(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"concat(n, ' is my favorite number') == '3 is my favorite number'",
ValueType.LONG,
TestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);
final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();
blackhole.consume(results);
}
private void consumeDimension(final Cursor cursor, final DimensionSelector selector, final Blackhole blackhole) private void consumeDimension(final Cursor cursor, final DimensionSelector selector, final Blackhole blackhole)
{ {
if (selector.getValueCardinality() >= 0) { if (selector.getValueCardinality() >= 0) {

View File

@ -27,6 +27,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.DoubleDimensionSchema;
import org.apache.druid.data.input.impl.FloatDimensionSchema; import org.apache.druid.data.input.impl.FloatDimensionSchema;
import org.apache.druid.data.input.impl.LongDimensionSchema; import org.apache.druid.data.input.impl.LongDimensionSchema;
import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.data.input.impl.StringDimensionSchema;
@ -99,6 +100,9 @@ public class SegmentGenerator implements Closeable
case LONG: case LONG:
dimensions.add(new LongDimensionSchema(columnSchema.getName())); dimensions.add(new LongDimensionSchema(columnSchema.getName()));
break; break;
case DOUBLE:
dimensions.add(new DoubleDimensionSchema(columnSchema.getName()));
break;
case FLOAT: case FLOAT:
dimensions.add(new FloatDimensionSchema(columnSchema.getName())); dimensions.add(new FloatDimensionSchema(columnSchema.getName()));
break; break;

View File

@ -30,6 +30,10 @@ import javax.annotation.Nullable;
*/ */
public abstract class ExprEval<T> public abstract class ExprEval<T>
{ {
// Cached String values. Protected so they can be used by subclasses.
private boolean stringValueValid = false;
private String stringValue;
public static ExprEval ofLong(@Nullable Number longValue) public static ExprEval ofLong(@Nullable Number longValue)
{ {
return new LongExprEval(longValue); return new LongExprEval(longValue);
@ -89,7 +93,7 @@ public abstract class ExprEval<T>
@Nullable @Nullable
final T value; final T value;
private ExprEval(T value) private ExprEval(@Nullable T value)
{ {
this.value = value; this.value = value;
} }
@ -115,7 +119,17 @@ public abstract class ExprEval<T>
@Nullable @Nullable
public String asString() public String asString()
{ {
return value == null ? null : String.valueOf(value); if (!stringValueValid) {
if (value == null) {
stringValue = null;
} else {
stringValue = String.valueOf(value);
}
stringValueValid = true;
}
return stringValue;
} }
public abstract boolean asBoolean(); public abstract boolean asBoolean();
@ -126,7 +140,6 @@ public abstract class ExprEval<T>
private abstract static class NumericExprEval extends ExprEval<Number> private abstract static class NumericExprEval extends ExprEval<Number>
{ {
private NumericExprEval(@Nullable Number value) private NumericExprEval(@Nullable Number value)
{ {
super(value); super(value);
@ -247,6 +260,16 @@ public abstract class ExprEval<T>
private static class StringExprEval extends ExprEval<String> private static class StringExprEval extends ExprEval<String>
{ {
// Cached primitive values.
private boolean intValueValid = false;
private boolean longValueValid = false;
private boolean doubleValueValid = false;
private boolean booleanValueValid = false;
private int intValue;
private long longValue;
private double doubleValue;
private boolean booleanValue;
private static final StringExprEval OF_NULL = new StringExprEval(null); private static final StringExprEval OF_NULL = new StringExprEval(null);
private Number numericVal; private Number numericVal;
@ -263,9 +286,48 @@ public abstract class ExprEval<T>
} }
@Override @Override
public final int asInt() public int asInt()
{ {
Number number = asNumber(); if (!intValueValid) {
intValue = computeInt();
intValueValid = true;
}
return intValue;
}
@Override
public long asLong()
{
if (!longValueValid) {
longValue = computeLong();
longValueValid = true;
}
return longValue;
}
@Override
public double asDouble()
{
if (!doubleValueValid) {
doubleValue = computeDouble();
doubleValueValid = true;
}
return doubleValue;
}
@Nullable
@Override
public String asString()
{
return value;
}
private int computeInt()
{
Number number = computeNumber();
if (number == null) { if (number == null) {
assert NullHandling.replaceWithDefault(); assert NullHandling.replaceWithDefault();
return 0; return 0;
@ -273,10 +335,9 @@ public abstract class ExprEval<T>
return number.intValue(); return number.intValue();
} }
@Override private long computeLong()
public final long asLong()
{ {
Number number = asNumber(); Number number = computeNumber();
if (number == null) { if (number == null) {
assert NullHandling.replaceWithDefault(); assert NullHandling.replaceWithDefault();
return 0L; return 0L;
@ -284,10 +345,9 @@ public abstract class ExprEval<T>
return number.longValue(); return number.longValue();
} }
@Override private double computeDouble()
public final double asDouble()
{ {
Number number = asNumber(); Number number = computeNumber();
if (number == null) { if (number == null) {
assert NullHandling.replaceWithDefault(); assert NullHandling.replaceWithDefault();
return 0.0d; return 0.0d;
@ -296,7 +356,7 @@ public abstract class ExprEval<T>
} }
@Nullable @Nullable
private Number asNumber() private Number computeNumber()
{ {
if (value == null) { if (value == null) {
return null; return null;
@ -321,13 +381,18 @@ public abstract class ExprEval<T>
@Override @Override
public boolean isNumericNull() public boolean isNumericNull()
{ {
return asNumber() == null; return computeNumber() == null;
} }
@Override @Override
public final boolean asBoolean() public final boolean asBoolean()
{ {
return Evals.asBoolean(value); if (!booleanValueValid) {
booleanValue = Evals.asBoolean(value);
booleanValueValid = true;
}
return booleanValue;
} }
@Override @Override
@ -335,9 +400,9 @@ public abstract class ExprEval<T>
{ {
switch (castTo) { switch (castTo) {
case DOUBLE: case DOUBLE:
return ExprEval.ofDouble(asNumber()); return ExprEval.ofDouble(computeNumber());
case LONG: case LONG:
return ExprEval.ofLong(asNumber()); return ExprEval.ofLong(computeNumber());
case STRING: case STRING:
return this; return this;
} }

View File

@ -137,12 +137,12 @@ public class ExpressionSelectors
final String column = Iterables.getOnlyElement(columns); final String column = Iterables.getOnlyElement(columns);
final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column); final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(column);
if (column.equals(ColumnHolder.TIME_COLUMN_NAME)) { if (capabilities != null && capabilities.getType() == ValueType.LONG) {
// Optimization for expressions that hit the __time column and nothing else. // Optimization for expressions that hit one long column and nothing else.
// May be worth applying this optimization to all long columns?
return new SingleLongInputCachingExpressionColumnValueSelector( return new SingleLongInputCachingExpressionColumnValueSelector(
columnSelectorFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME), columnSelectorFactory.makeColumnValueSelector(column),
expression expression,
!ColumnHolder.TIME_COLUMN_NAME.equals(column) // __time doesn't need an LRU cache since it is sorted.
); );
} else if (capabilities != null } else if (capabilities != null
&& capabilities.getType() == ValueType.STRING && capabilities.getType() == ValueType.STRING

View File

@ -20,6 +20,7 @@
package org.apache.druid.segment.virtual; package org.apache.druid.segment.virtual;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.longs.Long2ObjectLinkedOpenHashMap;
import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.ISE;
import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprEval;
@ -28,6 +29,7 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.ColumnValueSelector;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/** /**
* Like {@link ExpressionColumnValueSelector}, but caches the most recently computed value and re-uses it in the case * Like {@link ExpressionColumnValueSelector}, but caches the most recently computed value and re-uses it in the case
@ -35,30 +37,26 @@ import javax.annotation.Nonnull;
*/ */
public class SingleLongInputCachingExpressionColumnValueSelector implements ColumnValueSelector<ExprEval> public class SingleLongInputCachingExpressionColumnValueSelector implements ColumnValueSelector<ExprEval>
{ {
enum Validity private static final int CACHE_SIZE = 1000;
{
NONE,
DOUBLE,
LONG,
EVAL
}
private final ColumnValueSelector selector; private final ColumnValueSelector selector;
private final Expr expression; private final Expr expression;
private final SingleInputBindings bindings = new SingleInputBindings(); private final SingleInputBindings bindings = new SingleInputBindings();
// Last read input value @Nullable
private final LruEvalCache lruEvalCache;
// Last read input value.
private long lastInput; private long lastInput;
// Last computed output values (validity determined by "validity" field) // Last computed output value, or null if there is none.
private Validity validity = Validity.NONE; @Nullable
private double lastDoubleOutput; private ExprEval lastOutput;
private long lastLongOutput;
private ExprEval lastEvalOutput;
public SingleLongInputCachingExpressionColumnValueSelector( public SingleLongInputCachingExpressionColumnValueSelector(
final ColumnValueSelector selector, final ColumnValueSelector selector,
final Expr expression final Expr expression,
final boolean useLruCache
) )
{ {
// Verify expression has just one binding. // Verify expression has just one binding.
@ -68,6 +66,7 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
this.selector = Preconditions.checkNotNull(selector, "selector"); this.selector = Preconditions.checkNotNull(selector, "selector");
this.expression = Preconditions.checkNotNull(expression, "expression"); this.expression = Preconditions.checkNotNull(expression, "expression");
this.lruEvalCache = useLruCache ? new LruEvalCache() : null;
} }
@Override @Override
@ -80,59 +79,41 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
@Override @Override
public double getDouble() public double getDouble()
{ {
// No Assert for null handling as delegate selector already have it. return getObject().asDouble();
final long currentInput = selector.getLong();
if (lastInput == currentInput && validity == Validity.DOUBLE) {
return lastDoubleOutput;
} else {
final double output = eval(currentInput).asDouble();
lastInput = currentInput;
lastDoubleOutput = output;
validity = Validity.DOUBLE;
return output;
}
} }
@Override @Override
public float getFloat() public float getFloat()
{ {
// No Assert for null handling as delegate selector already have it. return (float) getObject().asDouble();
return (float) getDouble();
} }
@Override @Override
public long getLong() public long getLong()
{ {
// No Assert for null handling as delegate selector already have it. return getObject().asLong();
final long currentInput = selector.getLong();
if (lastInput == currentInput && validity == Validity.LONG) {
return lastLongOutput;
} else {
final long output = eval(currentInput).asLong();
lastInput = currentInput;
lastLongOutput = output;
validity = Validity.LONG;
return output;
}
} }
@Nonnull @Nonnull
@Override @Override
public ExprEval getObject() public ExprEval getObject()
{ {
final long currentInput = selector.getLong(); // No assert for null handling, as the delegate selector already has it.
final long input = selector.getLong();
final boolean cached = input == lastInput && lastOutput != null;
if (lastInput == currentInput && validity == Validity.EVAL) { if (!cached) {
return lastEvalOutput; if (lruEvalCache == null) {
bindings.set(input);
lastOutput = expression.eval(bindings);
} else { } else {
final ExprEval output = eval(currentInput); lastOutput = lruEvalCache.compute(input);
lastInput = currentInput;
lastEvalOutput = output;
validity = Validity.EVAL;
return output;
} }
lastInput = input;
}
return lastOutput;
} }
@Override @Override
@ -141,12 +122,6 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
return ExprEval.class; return ExprEval.class;
} }
private ExprEval eval(final long value)
{
bindings.set(value);
return expression.eval(bindings);
}
@Override @Override
public boolean isNull() public boolean isNull()
{ {
@ -155,4 +130,26 @@ public class SingleLongInputCachingExpressionColumnValueSelector implements Colu
// ExprEval.isNumericNull checks whether the parsed primitive value is null or not. // ExprEval.isNumericNull checks whether the parsed primitive value is null or not.
return getObject().isNumericNull(); return getObject().isNumericNull();
} }
public class LruEvalCache
{
private final Long2ObjectLinkedOpenHashMap<ExprEval> m = new Long2ObjectLinkedOpenHashMap<>();
public ExprEval compute(final long n)
{
ExprEval value = m.getAndMoveToFirst(n);
if (value == null) {
bindings.set(n);
value = expression.eval(bindings);
m.putAndMoveToFirst(n, value);
if (m.size() > CACHE_SIZE) {
m.removeLast();
}
}
return value;
}
}
} }