LUCENE-5961: Fix the exists() method for FunctionValues returned by many ValueSoures to behave properly when wrapping other ValueSources which do not exist for the specified document

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1632414 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2014-10-16 19:05:20 +00:00
parent 6697abbb28
commit d8146460f0
15 changed files with 490 additions and 130 deletions

View File

@ -184,6 +184,10 @@ Bug Fixes
* LUCENE-5980: Don't let document length overflow. (Robert Muir)
* LUCENE-5961: Fix the exists() method for FunctionValues returned by many ValueSoures to
behave properly when wrapping other ValueSources which do not exist for the specified document
(hossman)
Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect

View File

@ -6,3 +6,11 @@ The API of oal.document was restructured to differentiate between stored
documents and indexed documents. IndexReader.document(int) now returns
StoredDocument instead of Document. In most cases a simple replacement
of the return type is enough to upgrade.
## FunctionValues.exist() Behavior Changes due to ValueSource bug fixes (LUCENE-5961)
Bugs fixed in several ValueSource functions may result in different behavior in
situations where some documents do not have values for fields wrapped in other
ValueSources. Users who want to preserve the previous behavior may need to wrap
their ValueSources in a "DefFunction" along with a ConstValueSource of "0.0".

View File

@ -60,7 +60,14 @@ public abstract class DualFloatFunction extends ValueSource {
public float floatVal(int doc) {
return func(doc, aVals, bVals);
}
/**
* True if and only if <em>all</em> of the wrapped {@link FunctionValues}
* <code>exists</code> for the specified doc
*/
@Override
public boolean exists(int doc) {
return MultiFunction.allExists(doc, aVals, bVals);
}
@Override
public String toString(int doc) {
return name() + '(' + aVals.toString(doc) + ',' + bVals.toString(doc) + ')';

View File

@ -102,7 +102,7 @@ public class IfFunction extends BoolFunction {
@Override
public boolean exists(int doc) {
return true; // TODO: flow through to any sub-sources?
return ifVals.boolVal(doc) ? trueVals.exists(doc) : falseVals.exists(doc);
}
@Override
@ -148,4 +148,4 @@ public class IfFunction extends BoolFunction {
trueSource.createWeight(context, searcher);
falseSource.createWeight(context, searcher);
}
}
}

View File

@ -59,6 +59,10 @@ public class LinearFloatFunction extends ValueSource {
return vals.floatVal(doc) * slope + intercept;
}
@Override
public boolean exists(int doc) {
return vals.exists(doc);
}
@Override
public String toString(int doc) {
return slope + "*float(" + vals.toString(doc) + ")+" + intercept;
}

View File

@ -35,11 +35,25 @@ public class MaxFloatFunction extends MultiFloatFunction {
@Override
protected float func(int doc, FunctionValues[] valsArr) {
if (valsArr.length == 0) return 0.0f;
if ( ! exists(doc, valsArr) ) return 0.0f;
float val = Float.NEGATIVE_INFINITY;
for (FunctionValues vals : valsArr) {
val = Math.max(vals.floatVal(doc), val);
if (vals.exists(doc)) {
val = Math.max(vals.floatVal(doc), val);
}
}
return val;
}
/**
* True if <em>any</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*
* @see MultiFunction#anyExists
*/
@Override
protected boolean exists(int doc, FunctionValues[] valsArr) {
return MultiFunction.anyExists(doc, valsArr);
}
}

View File

@ -35,11 +35,26 @@ public class MinFloatFunction extends MultiFloatFunction {
@Override
protected float func(int doc, FunctionValues[] valsArr) {
if (valsArr.length == 0) return 0.0f;
if ( ! exists(doc, valsArr) ) return 0.0f;
float val = Float.POSITIVE_INFINITY;
for (FunctionValues vals : valsArr) {
val = Math.min(vals.floatVal(doc), val);
if (vals.exists(doc)) {
val = Math.min(vals.floatVal(doc), val);
}
}
return val;
}
/**
* True if <em>any</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*
* @see MultiFunction#anyExists
*/
@Override
protected boolean exists(int doc, FunctionValues[] valsArr) {
return MultiFunction.anyExists(doc, valsArr);
}
}

View File

@ -40,6 +40,18 @@ public abstract class MultiFloatFunction extends ValueSource {
abstract protected String name();
abstract protected float func(int doc, FunctionValues[] valsArr);
/**
* Called by {@link FunctionValues#exists} for each document.
*
* Default impl returns true if <em>all</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*
* @see FunctionValues#exists
* @see MultiFunction#allExists
*/
protected boolean exists(int doc, FunctionValues[] valsArr) {
return MultiFunction.allExists(doc, valsArr);
}
@Override
public String description() {
@ -70,21 +82,12 @@ public abstract class MultiFloatFunction extends ValueSource {
public float floatVal(int doc) {
return func(doc, valsArr);
}
@Override
public boolean exists(int doc) {
return MultiFloatFunction.this.exists(doc, valsArr);
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append(name()).append('(');
boolean firstTime=true;
for (FunctionValues vals : valsArr) {
if (firstTime) {
firstTime=false;
} else {
sb.append(',');
}
sb.append(vals.toString(doc));
}
sb.append(')');
return sb.toString();
return MultiFunction.toString(name(), valsArr, doc);
}
};
}

View File

@ -43,6 +43,35 @@ public abstract class MultiFunction extends ValueSource {
return description(name(), sources);
}
/**
* Helper utility for {@link FunctionValues} wrapping multiple {@link FunctionValues}
*
* @return true if <em>all</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*/
public static boolean allExists(int doc, FunctionValues... values) {
for (FunctionValues v : values) {
if ( ! v.exists(doc) ) {
return false;
}
}
return true;
}
/**
* Helper utility for {@link FunctionValues} wrapping multiple {@link FunctionValues}
*
* @return true if <em>any</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*/
public static boolean anyExists(int doc, FunctionValues... values) {
for (FunctionValues v : values) {
if ( v.exists(doc) ) {
return true;
}
}
return false;
}
public static String description(String name, List<ValueSource> sources) {
StringBuilder sb = new StringBuilder();
sb.append(name).append('(');

View File

@ -69,6 +69,10 @@ public class ReciprocalFloatFunction extends ValueSource {
return a/(m*vals.floatVal(doc) + b);
}
@Override
public boolean exists(int doc) {
return vals.exists(doc);
}
@Override
public String toString(int doc) {
return Float.toString(a) + "/("
+ m + "*float(" + vals.toString(doc) + ')'

View File

@ -70,20 +70,22 @@ public class ScaleFloatFunction extends ValueSource {
int maxDoc = leaf.reader().maxDoc();
FunctionValues vals = source.getValues(context, leaf);
for (int i=0; i<maxDoc; i++) {
float val = vals.floatVal(i);
if ((Float.floatToRawIntBits(val) & (0xff<<23)) == 0xff<<23) {
// if the exponent in the float is all ones, then this is +Inf, -Inf or NaN
// which don't make sense to factor into the scale function
continue;
if ( ! vals.exists(i) ) {
continue;
}
float val = vals.floatVal(i);
if ((Float.floatToRawIntBits(val) & (0xff<<23)) == 0xff<<23) {
// if the exponent in the float is all ones, then this is +Inf, -Inf or NaN
// which don't make sense to factor into the scale function
continue;
}
if (val < minVal) {
minVal = val;
}
if (val > maxVal) {
maxVal = val;
}
}
if (val < minVal) {
minVal = val;
}
if (val > maxVal) {
maxVal = val;
}
}
}
if (minVal == Float.POSITIVE_INFINITY) {
@ -113,6 +115,10 @@ public class ScaleFloatFunction extends ValueSource {
final FunctionValues vals = source.getValues(context, readerContext);
return new FloatDocValues(this) {
@Override
public boolean exists(int doc) {
return vals.exists(doc);
}
@Override
public float floatVal(int doc) {
return (vals.floatVal(doc) - minSource) * scale + min;

View File

@ -19,6 +19,8 @@ package org.apache.lucene.queries.function;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@ -31,9 +33,12 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.queries.function.valuesource.ConstValueSource;
import org.apache.lucene.queries.function.valuesource.DivFloatFunction;
@ -51,6 +56,7 @@ import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.queries.function.valuesource.MaxDocValueSource;
import org.apache.lucene.queries.function.valuesource.MaxFloatFunction;
import org.apache.lucene.queries.function.valuesource.MinFloatFunction;
import org.apache.lucene.queries.function.valuesource.MultiFloatFunction;
import org.apache.lucene.queries.function.valuesource.NormValueSource;
import org.apache.lucene.queries.function.valuesource.NumDocsValueSource;
import org.apache.lucene.queries.function.valuesource.PowFloatFunction;
@ -71,6 +77,7 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
@ -88,6 +95,11 @@ public class TestValueSources extends LuceneTestCase {
static IndexReader reader;
static IndexSearcher searcher;
static final ValueSource BOGUS_FLOAT_VS = new FloatFieldSource("bogus_field");
static final ValueSource BOGUS_DOUBLE_VS = new DoubleFieldSource("bogus_field");
static final ValueSource BOGUS_INT_VS = new IntFieldSource("bogus_field");
static final ValueSource BOGUS_LONG_VS = new LongFieldSource("bogus_field");
static final List<String[]> documents = Arrays.asList(new String[][] {
/* id, double, float, int, long, string, text */
new String[] { "0", "3.63", "5.2", "35", "4343", "test", "this is a test test test" },
@ -160,100 +172,165 @@ public class TestValueSources extends LuceneTestCase {
}
public void testConst() throws Exception {
assertHits(new FunctionQuery(new ConstValueSource(0.3f)),
new float[] { 0.3f, 0.3f });
ValueSource vs = new ConstValueSource(0.3f);
assertHits(new FunctionQuery(vs),
new float[] { 0.3f, 0.3f });
assertAllExist(vs);
}
public void testDiv() throws Exception {
assertHits(new FunctionQuery(new DivFloatFunction(
new ConstValueSource(10f), new ConstValueSource(5f))),
new float[] { 2f, 2f });
ValueSource vs = new DivFloatFunction(new ConstValueSource(10f), new ConstValueSource(5f));
assertHits(new FunctionQuery(vs),
new float[] { 2f, 2f });
assertAllExist(vs);
vs = new DivFloatFunction(new ConstValueSource(10f), BOGUS_FLOAT_VS);
assertNoneExist(vs);
vs = new DivFloatFunction(BOGUS_FLOAT_VS, new ConstValueSource(10f));
assertNoneExist(vs);
}
public void testDocFreq() throws Exception {
assertHits(new FunctionQuery(
new DocFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))),
new float[] { 2f, 2f });
ValueSource vs = new DocFreqValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
}
public void testDoubleConst() throws Exception {
assertHits(new FunctionQuery(new DoubleConstValueSource(0.3d)),
new float[] { 0.3f, 0.3f });
ValueSource vs = new DoubleConstValueSource(0.3d);
assertHits(new FunctionQuery(vs), new float[] { 0.3f, 0.3f });
assertAllExist(vs);
}
public void testDouble() throws Exception {
assertHits(new FunctionQuery(new DoubleFieldSource("double")),
new float[] { 3.63f, 5.65f });
ValueSource vs = new DoubleFieldSource("double");
assertHits(new FunctionQuery(vs), new float[] { 3.63f, 5.65f });
assertAllExist(vs);
assertNoneExist(BOGUS_DOUBLE_VS);
}
public void testFloat() throws Exception {
assertHits(new FunctionQuery(new FloatFieldSource("float")),
new float[] { 5.2f, 9.3f });
ValueSource vs = new FloatFieldSource("float");
assertHits(new FunctionQuery(vs), new float[] { 5.2f, 9.3f });
assertAllExist(vs);
assertNoneExist(BOGUS_FLOAT_VS);
}
public void testIDF() throws Exception {
Similarity saved = searcher.getSimilarity();
try {
searcher.setSimilarity(new DefaultSimilarity());
assertHits(new FunctionQuery(
new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"))),
new float[] { 0.5945349f, 0.5945349f });
ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs), new float[] { 0.5945349f, 0.5945349f });
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
public void testIf() throws Exception {
assertHits(new FunctionQuery(new IfFunction(
new BytesRefFieldSource("id"),
new ConstValueSource(1.0f),
new ConstValueSource(2.0f)
)),
new float[] { 1f, 1f });
// true just if a value exists...
assertHits(new FunctionQuery(new IfFunction(
new LiteralValueSource("false"),
new ConstValueSource(1.0f),
new ConstValueSource(2.0f)
)),
new float[] { 1f, 1f });
ValueSource vs = new IfFunction(new BytesRefFieldSource("id"),
new ConstValueSource(1.0f), // match
new ConstValueSource(2.0f));
assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
assertAllExist(vs);
// true just if a test value exists...
vs = new IfFunction(new LiteralValueSource("false"),
new ConstValueSource(1.0f), // match
new ConstValueSource(2.0f));
assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
assertAllExist(vs);
// false value if tests value does not exist
vs = new IfFunction(BOGUS_FLOAT_VS,
new ConstValueSource(1.0f),
new ConstValueSource(2.0f)); // match
assertHits(new FunctionQuery(vs), new float[] { 2F, 2F });
assertAllExist(vs);
// final value may still not exist
vs = new IfFunction(new BytesRefFieldSource("id"),
BOGUS_FLOAT_VS, // match
new ConstValueSource(1.0f));
assertNoneExist(vs);
}
public void testInt() throws Exception {
assertHits(new FunctionQuery(new IntFieldSource("int")),
new float[] { 35f, 54f });
ValueSource vs = new IntFieldSource("int");
assertHits(new FunctionQuery(vs), new float[] { 35f, 54f });
assertAllExist(vs);
assertNoneExist(BOGUS_INT_VS);
}
public void testJoinDocFreq() throws Exception {
assertHits(new FunctionQuery(new JoinDocFreqValueSource("string", "text")),
new float[] { 2f, 0f });
assertHits(new FunctionQuery(new JoinDocFreqValueSource("string", "text")),
new float[] { 2f, 0f });
// TODO: what *should* the rules be for exist() ?
}
public void testLinearFloat() throws Exception {
assertHits(new FunctionQuery(new LinearFloatFunction(new ConstValueSource(2.0f), 3, 1)),
new float[] { 7f, 7f });
ValueSource vs = new LinearFloatFunction(new ConstValueSource(2.0f), 3, 1);
assertHits(new FunctionQuery(vs), new float[] { 7f, 7f });
assertAllExist(vs);
vs = new LinearFloatFunction(BOGUS_FLOAT_VS, 3, 1);
assertNoneExist(vs);
}
public void testLong() throws Exception {
assertHits(new FunctionQuery(new LongFieldSource("long")),
new float[] { 4343f, 1954f });
ValueSource vs = new LongFieldSource("long");
assertHits(new FunctionQuery(vs), new float[] { 4343f, 1954f });
assertAllExist(vs);
assertNoneExist(BOGUS_LONG_VS);
}
public void testMaxDoc() throws Exception {
assertHits(new FunctionQuery(new MaxDocValueSource()),
new float[] { 2f, 2f });
ValueSource vs = new MaxDocValueSource();
assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
}
public void testMaxFloat() throws Exception {
assertHits(new FunctionQuery(new MaxFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)})),
new float[] { 2f, 2f });
ValueSource vs = new MaxFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)});
assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
// as long as one value exists, then max exists
vs = new MaxFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2F)});
assertAllExist(vs);
vs = new MaxFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2F), BOGUS_DOUBLE_VS});
assertAllExist(vs);
// if none exist, then max doesn't exist
vs = new MaxFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, BOGUS_INT_VS, BOGUS_DOUBLE_VS});
assertNoneExist(vs);
}
public void testMinFloat() throws Exception {
assertHits(new FunctionQuery(new MinFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)})),
new float[] { 1f, 1f });
ValueSource vs = new MinFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)});
assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
assertAllExist(vs);
// as long as one value exists, then min exists
vs = new MinFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2F)});
assertHits(new FunctionQuery(vs), new float[] { 2F, 2F });
assertAllExist(vs);
vs = new MinFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2F), BOGUS_DOUBLE_VS});
assertAllExist(vs);
// if none exist, then min doesn't exist
vs = new MinFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, BOGUS_INT_VS, BOGUS_DOUBLE_VS});
assertNoneExist(vs);
}
public void testNorm() throws Exception {
@ -261,35 +338,95 @@ public class TestValueSources extends LuceneTestCase {
try {
// no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new DefaultSimilarity());
assertHits(new FunctionQuery(
new NormValueSource("byte")),
new float[] { 0f, 0f });
ValueSource vs = new NormValueSource("byte");
assertHits(new FunctionQuery(vs), new float[] { 0f, 0f });
// regardless of wether norms exist, value source exists == 0
assertAllExist(vs);
vs = new NormValueSource("text");
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
public void testNumDocs() throws Exception {
assertHits(new FunctionQuery(new NumDocsValueSource()),
new float[] { 2f, 2f });
ValueSource vs = new NumDocsValueSource();
assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
}
public void testPow() throws Exception {
assertHits(new FunctionQuery(new PowFloatFunction(
new ConstValueSource(2f), new ConstValueSource(3f))),
new float[] { 8f, 8f });
ValueSource vs = new PowFloatFunction(new ConstValueSource(2f), new ConstValueSource(3f));
assertHits(new FunctionQuery(vs), new float[] { 8f, 8f });
assertAllExist(vs);
vs = new PowFloatFunction(BOGUS_FLOAT_VS, new ConstValueSource(3f));
assertNoneExist(vs);
vs = new PowFloatFunction(new ConstValueSource(3f), BOGUS_FLOAT_VS);
assertNoneExist(vs);
}
public void testProduct() throws Exception {
assertHits(new FunctionQuery(new ProductFloatFunction(new ValueSource[] {
new ConstValueSource(2f), new ConstValueSource(3f)})),
new float[] { 6f, 6f });
ValueSource vs = new ProductFloatFunction(new ValueSource[] {
new ConstValueSource(2f), new ConstValueSource(3f)});
assertHits(new FunctionQuery(vs), new float[] { 6f, 6f });
assertAllExist(vs);
vs = new ProductFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(3f)});
assertNoneExist(vs);
}
public void testQueryWrapedFuncWrapedQuery() throws Exception {
ValueSource vs = new QueryValueSource(new FunctionQuery(new ConstValueSource(2f)), 0f);
assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
}
public void testQuery() throws Exception {
assertHits(new FunctionQuery(new QueryValueSource(
new FunctionQuery(new ConstValueSource(2f)), 0f)),
new float[] { 2f, 2f });
Similarity saved = searcher.getSimilarity();
try {
searcher.setSimilarity(new DefaultSimilarity());
ValueSource vs = new QueryValueSource(new TermQuery(new Term("string","bar")), 42F);
assertHits(new FunctionQuery(vs), new float[] { 42F, 1F });
// valuesource should exist only for things matching the term query
// sanity check via quick & dirty wrapper arround tf
ValueSource expected = new MultiFloatFunction(new ValueSource[] {
new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"))}) {
@Override
protected String name() { return "tf_based_exists"; }
@Override
protected float func(int doc, FunctionValues[] valsArr) {
return valsArr[0].floatVal(doc);
}
@Override
protected boolean exists(int doc, FunctionValues[] valsArr) {
// if tf > 0, then it should exist
return 0 < func(doc, valsArr);
}
};
assertExists(expected, vs);
// Query matches all docs, func exists for all docs
vs = new QueryValueSource(new TermQuery(new Term("text","test")), 0F);
assertAllExist(vs);
// Query matches no docs, func exists for no docs
vs = new QueryValueSource(new TermQuery(new Term("bogus","does not exist")), 0F);
assertNoneExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
public void testRangeMap() throws Exception {
@ -300,37 +437,59 @@ public class TestValueSources extends LuceneTestCase {
5, 6, new SumFloatFunction(new ValueSource[] {new ConstValueSource(1f), new ConstValueSource(2f)}),
new ConstValueSource(11f))),
new float[] { 3f, 11f });
// TODO: what *should* the rules be for exist() ?
// ((source exists && source in range && target exists) OR (source not in range && default exists)) ?
}
public void testReciprocal() throws Exception {
assertHits(new FunctionQuery(new ReciprocalFloatFunction(new ConstValueSource(2f),
3, 1, 4)),
new float[] { 0.1f, 0.1f });
ValueSource vs = new ReciprocalFloatFunction(new ConstValueSource(2f), 3, 1, 4);
assertHits(new FunctionQuery(vs), new float[] { 0.1f, 0.1f });
assertAllExist(vs);
vs = new ReciprocalFloatFunction(BOGUS_FLOAT_VS, 3, 1, 4);
assertNoneExist(vs);
}
public void testScale() throws Exception {
assertHits(new FunctionQuery(new ScaleFloatFunction(new IntFieldSource("int"), 0, 1)),
new float[] { 0.0f, 1.0f });
ValueSource vs = new ScaleFloatFunction(new IntFieldSource("int"), 0, 1);
assertHits(new FunctionQuery(vs), new float[] { 0.0f, 1.0f });
assertAllExist(vs);
vs = new ScaleFloatFunction(BOGUS_INT_VS, 0, 1);
assertNoneExist(vs);
}
public void testSumFloat() throws Exception {
assertHits(new FunctionQuery(new SumFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)})),
new float[] { 3f, 3f });
ValueSource vs = new SumFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)});
assertHits(new FunctionQuery(vs), new float[] { 3f, 3f });
assertAllExist(vs);
vs = new SumFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2f)});
assertNoneExist(vs);
}
public void testSumTotalTermFreq() throws Exception {
assertHits(new FunctionQuery(new SumTotalTermFreqValueSource("text")),
new float[] { 8f, 8f });
ValueSource vs = new SumTotalTermFreqValueSource("text");
assertHits(new FunctionQuery(vs), new float[] { 8f, 8f });
assertAllExist(vs);
}
public void testTermFreq() throws Exception {
assertHits(new FunctionQuery(
new TermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))),
new float[] { 3f, 1f });
assertHits(new FunctionQuery(
new TermFreqValueSource("bogus", "bogus", "string", new BytesRef("bar"))),
new float[] { 0f, 1f });
ValueSource vs = new TermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs), new float[] { 3f, 1f });
assertAllExist(vs);
vs = new TermFreqValueSource("bogus", "bogus", "string", new BytesRef("bar"));
assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
assertAllExist(vs);
// regardless of wether norms exist, value source exists == 0
vs = new TermFreqValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
assertAllExist(vs);
}
public void testTF() throws Exception {
@ -338,23 +497,71 @@ public class TestValueSources extends LuceneTestCase {
try {
// no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new DefaultSimilarity());
assertHits(new FunctionQuery(
new TFValueSource("bogus", "bogus", "text", new BytesRef("test"))),
new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
assertHits(new FunctionQuery(
new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"))),
new float[] { 0f, 1f });
ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs),
new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
assertAllExist(vs);
vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"));
assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
assertAllExist(vs);
// regardless of wether norms exist, value source exists == 0
vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
assertAllExist(vs);
} finally {
searcher.setSimilarity(saved);
}
}
public void testTotalTermFreq() throws Exception {
assertHits(new FunctionQuery(
new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))),
new float[] { 4f, 4f });
ValueSource vs = new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"));
assertHits(new FunctionQuery(vs), new float[] { 4f, 4f });
assertAllExist(vs);
}
/**
* Asserts that for every doc, the {@link FunctionValues#exists} value
* from the {@link ValueSource} is <b>true</b>.
*/
void assertAllExist(ValueSource vs) {
assertExists(ALL_EXIST_VS, vs);
}
/**
* Asserts that for every doc, the {@link FunctionValues#exists} value
* from the {@link ValueSource} is <b>false</b>.
*/
void assertNoneExist(ValueSource vs) {
assertExists(NONE_EXIST_VS, vs);
}
/**
* Asserts that for every doc, the {@link FunctionValues#exists} value from the
* <code>actual</code> {@link ValueSource} matches the {@link FunctionValues#exists}
* value from the <code>expected</code> {@link ValueSource}
*/
void assertExists(ValueSource expected, ValueSource actual) {
Map context = ValueSource.newContext(searcher);
try {
expected.createWeight(context, searcher);
actual.createWeight(context, searcher);
for (org.apache.lucene.index.LeafReaderContext leaf : reader.leaves()) {
final FunctionValues expectedVals = expected.getValues(context, leaf);
final FunctionValues actualVals = actual.getValues(context, leaf);
String msg = expected.toString() + " ?= " + actual.toString() + " -> ";
for (int i = 0; i < leaf.reader().maxDoc(); ++i) {
assertEquals(msg + i, expectedVals.exists(i), actualVals.exists(i));
}
}
} catch (IOException e) {
throw new RuntimeException(actual.toString(), e);
}
}
void assertHits(Query q, float scores[]) throws Exception {
ScoreDoc expected[] = new ScoreDoc[scores.length];
int expectedDocs[] = new int[scores.length];
@ -368,4 +575,54 @@ public class TestValueSources extends LuceneTestCase {
CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs);
CheckHits.checkExplanations(q, "", searcher);
}
/**
* Simple test value source that implements {@link FunctionValues#exists} as a constant
* @see #ALL_EXIST_VS
* @see #NONE_EXIST_VS
*/
private static final class ExistsValueSource extends ValueSource {
final boolean expected;
final int value;
public ExistsValueSource(boolean expected) {
this.expected = expected;
this.value = expected ? 1 : 0;
}
@Override
public boolean equals(Object o) {
return o == this;
}
@Override
public int hashCode() {
return value;
}
@Override
public String description() {
return expected ? "ALL_EXIST" : "NONE_EXIST";
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) {
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return (float)value;
}
@Override
public boolean exists(int doc) {
return expected;
}
};
}
};
/** @see ExistsValueSource */
private static final ValueSource ALL_EXIST_VS = new ExistsValueSource(true);
/** @see ExistsValueSource */
private static final ValueSource NONE_EXIST_VS = new ExistsValueSource(false);
}

View File

@ -94,6 +94,12 @@ Upgrading from Solr 4.x
If you need the old functionality back , please add an extra parameter f=/**
example: /update/json/docs?f=/**
* Bugs fixed in several ValueSource functions may result in different behavior in
situations where some documents do not have values for fields wrapped in other value
sources. Users who want to preserve the previous behavior may need to wrap fields
in the "def()" function. Example: changing "fl=sum(fieldA,fieldB)" to
"fl=sum(def(fieldA,0.0),def(fieldB,0.0))". See LUCENE-5961 for more details.
Detailed Change List
----------------------

View File

@ -469,10 +469,9 @@ public class StatsComponentTest extends AbstractSolrTestCase {
"{!type=func key="+key+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+key+" ex=key_ex_tag v=field("+f+")}",
"{!type=func key="+key+" ex=key_ex_tag v='field("+f+")'}",
// identity math functions don't work as expected due to LUCENE-5961
// "{!type=func key="+key+" ex=key_ex_tag v='sum(0,"+f+")'}",
// "{!type=func key="+key+" ex=key_ex_tag v='product(1,"+f+")'}",
// identity math functions
"{!type=func key="+key+" ex=key_ex_tag v='sum(0,"+f+")'}",
"{!type=func key="+key+" ex=key_ex_tag v='product(1,"+f+")'}",
}) {
assertQ("test statistics over field specified as a function: " + param,
@ -555,10 +554,9 @@ public class StatsComponentTest extends AbstractSolrTestCase {
"{!type=func key="+f+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+f+" ex=key_ex_tag v=field("+f+")}",
"{!type=func key="+f+" ex=key_ex_tag v='field("+f+")'}",
// identity math functions don't work as expected due to LUCENE-5961
// "{!type=func key="+f+" ex=key_ex_tag v='sum(0,"+f+")'}",
// "{!type=func key="+f+" ex=key_ex_tag v='product(1,"+f+")'}",
// identity math functions
"{!type=func key="+f+" ex=key_ex_tag v='sum(0,"+f+")'}",
"{!type=func key="+f+" ex=key_ex_tag v='product(1,"+f+")'}",
}) {
assertQ("test statis & stats.facet over field specified as a function: " + param,
req("q", "*:*", "stats", "true", "stats.calcdistinct", "true",

View File

@ -731,12 +731,17 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
@Test
public void testPseudoFieldFunctions() throws Exception {
assertU(adoc("id", "1", "text", "hello", "foo_s","A"));
assertU(adoc("id", "1", "text", "hello", "foo_s","A", "yak_i", "32"));
assertU(adoc("id", "2"));
assertU(commit());
assertJQ(req("q", "id:1", "fl", "a:1,b:2.0,c:'X',d:{!func}foo_s,e:{!func}bar_s") // if exists() is false, no pseudo-field should be added
, "/response/docs/[0]=={'a':1, 'b':2.0,'c':'X','d':'A'}");
// if exists() is false, no pseudo-field should be added
assertJQ(req("q", "id:1", "fl", "a:1,b:2.0,c:'X',d:{!func}foo_s,e:{!func}bar_s")
, "/response/docs/[0]=={'a':1, 'b':2.0,'c':'X','d':'A'}");
assertJQ(req("q", "id:1", "fl", "a:sum(yak_i,bog_i),b:mul(yak_i,bog_i),c:min(yak_i,bog_i)")
, "/response/docs/[0]=={ 'c':32.0 }");
assertJQ(req("q", "id:1", "fl", "a:sum(yak_i,def(bog_i,42)), b:max(yak_i,bog_i)")
, "/response/docs/[0]=={ 'a': 74.0, 'b':32.0 }");
}
public void testMissingFieldFunctionBehavior() throws Exception {