LUCENE-8574: the DoubleValues for dependent bindings for an expression are now cached and reused and no longer inefficiently recomputed per hit

This commit is contained in:
Mike McCandless 2020-07-09 15:04:31 -04:00
parent 5c6314a970
commit 60e0d8ac6e
5 changed files with 125 additions and 4 deletions

View File

@ -60,6 +60,10 @@ API Changes
Improvements
* LUCENE-8574: Add a new ExpressionValueSource which will enforce only one value per name
per hit in dependencies, ExpressionFunctionValues will no longer
recompute already computed values (Haoyu Zhai)
* LUCENE-9370: RegExp query is no longer lenient about inappropriate backslashes and
follows the Java Pattern policy for rejecting illegal syntax. (Mark Harwood)

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.expressions;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
/**
* This expression value source shares one value cache when generating {@link ExpressionFunctionValues}
* such that only one value along the whole generation tree is corresponding to one name
*/
final class CachingExpressionValueSource extends ExpressionValueSource {
CachingExpressionValueSource(Bindings bindings, Expression expression) {
super(bindings, expression);
}
CachingExpressionValueSource(DoubleValuesSource[] variables, Expression expression, boolean needsScores) {
super(variables, expression, needsScores);
}
public CachingExpressionValueSource(ExpressionValueSource expressionValueSource) {
super(expressionValueSource.variables, expressionValueSource.expression, expressionValueSource.needsScores);
}
@Override
public DoubleValues getValues(LeafReaderContext readerContext, DoubleValues scores) throws IOException {
return getValuesWithCache(readerContext, scores, new HashMap<>());
}
private DoubleValues getValuesWithCache(LeafReaderContext readerContext, DoubleValues scores,
Map<String, DoubleValues> valuesCache) throws IOException {
DoubleValues[] externalValues = new DoubleValues[expression.variables.length];
for (int i = 0; i < variables.length; ++i) {
String externalName = expression.variables[i];
DoubleValues values = valuesCache.get(externalName);
if (values == null) {
if (variables[i] instanceof CachingExpressionValueSource) {
values = ((CachingExpressionValueSource) variables[i]).getValuesWithCache(readerContext, scores, valuesCache);
} else {
values = variables[i].getValues(readerContext, scores);
}
if (values == null) {
throw new RuntimeException("Unrecognized variable (" + externalName + ") referenced in expression (" +
expression.sourceText + ").");
}
valuesCache.put(externalName, values);
}
externalValues[i] = zeroWhenUnpositioned(values);
}
return new ExpressionFunctionValues(expression, externalValues);
}
}

View File

@ -24,6 +24,9 @@ import org.apache.lucene.search.DoubleValues;
class ExpressionFunctionValues extends DoubleValues {
final Expression expression;
final DoubleValues[] functionValues;
double currentValue;
int currentDoc = -1;
boolean computed;
ExpressionFunctionValues(Expression expression, DoubleValues[] functionValues) {
if (expression == null) {
@ -38,14 +41,23 @@ class ExpressionFunctionValues extends DoubleValues {
@Override
public boolean advanceExact(int doc) throws IOException {
if (currentDoc == doc) {
return true;
}
for (DoubleValues v : functionValues) {
v.advanceExact(doc);
}
currentDoc = doc;
computed = false;
return true;
}
@Override
public double doubleValue() {
return expression.evaluate(functionValues);
if (computed == false) {
currentValue = expression.evaluate(functionValues);
computed = true;
}
return currentValue;
}
}

View File

@ -31,7 +31,7 @@ import org.apache.lucene.search.IndexSearcher;
/**
* A {@link DoubleValuesSource} which evaluates a {@link Expression} given the context of an {@link Bindings}.
*/
final class ExpressionValueSource extends DoubleValuesSource {
class ExpressionValueSource extends DoubleValuesSource {
final DoubleValuesSource[] variables;
final Expression expression;
final boolean needsScores;
@ -69,7 +69,8 @@ final class ExpressionValueSource extends DoubleValuesSource {
if (values == null) {
values = variables[i].getValues(readerContext, scores);
if (values == null) {
throw new RuntimeException("Internal error. External (" + externalName + ") does not exist.");
throw new RuntimeException("Unrecognized variable (" + externalName + ") referenced in expression (" +
expression.sourceText + ").");
}
valuesCache.put(externalName, values);
}
@ -79,7 +80,7 @@ final class ExpressionValueSource extends DoubleValuesSource {
return new ExpressionFunctionValues(expression, externalValues);
}
private static DoubleValues zeroWhenUnpositioned(DoubleValues in) {
static DoubleValues zeroWhenUnpositioned(DoubleValues in) {
return new DoubleValues() {
boolean positioned = false;

View File

@ -128,6 +128,35 @@ public class TestExpressionValueSource extends LuceneTestCase {
assertFalse(vs1.equals(vs4));
}
public void testFibonacciExpr() throws Exception {
int n = 40;
SimpleBindings bindings = new SimpleBindings();
bindings.add("f0", DoubleValuesSource.constant(0));
bindings.add("f1", DoubleValuesSource.constant(1));
for (int i = 2; i < n + 1; i++) {
// Without using CachingExpressionValueSource this test will fail after 1 min around because of out of heap space when n=40
bindings.add("f" + Integer.toString(i), new CachingExpressionValueSource(
(ExpressionValueSource) JavascriptCompiler.compile("f" + Integer.toString(i - 1)+" + f" + Integer.toString(i - 2)).getDoubleValuesSource(bindings)));
}
DoubleValues values = bindings.getDoubleValuesSource("f" + Integer.toString(n)).getValues(null, null);
assertTrue(values.advanceExact(0));
assertEquals(fib(n), (int)values.doubleValue());
}
private int fib(int n) {
if (n == 0) {
return 0;
}
int prev = 0, curr = 1, tmp;
for (int i = 1; i < n; i++) {
tmp = curr;
curr += prev;
prev = tmp;
}
return curr;
}
public void testRewrite() throws Exception {
Expression expr = JavascriptCompiler.compile("a");