LUCENE-7736: IndexReaderValues

This commit is contained in:
Alan Woodward 2017-03-28 19:44:02 +01:00
parent af2b903f65
commit 2a4dd499bb
25 changed files with 945 additions and 25 deletions

View File

@ -56,4 +56,19 @@ public abstract class DoubleValues {
};
}
/**
* An empty DoubleValues instance that always returns {@code false} from {@link #advanceExact(int)}
*/
public static final DoubleValues EMPTY = new DoubleValues() {
@Override
public double doubleValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int doc) throws IOException {
return false;
}
};
}

View File

@ -23,14 +23,17 @@ import java.util.function.DoubleToLongFunction;
import java.util.function.LongToDoubleFunction;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
/**
* Base class for producing {@link DoubleValues}
*
* To obtain a {@link DoubleValues} object for a leaf reader, clients should
* call {@link #getValues(LeafReaderContext, DoubleValues)}.
* To obtain a {@link DoubleValues} object for a leaf reader, clients should call
* {@link #rewrite(IndexSearcher)} against the top-level searcher, and then
* call {@link #getValues(LeafReaderContext, DoubleValues)} on the resulting
* DoubleValuesSource.
*
* DoubleValuesSource objects for NumericDocValues fields can be obtained by calling
* {@link #fromDoubleField(String)}, {@link #fromFloatField(String)}, {@link #fromIntField(String)}
@ -71,6 +74,18 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
return Explanation.noMatch(this.toString());
}
/**
* Return a DoubleValuesSource specialised for the given IndexSearcher
*
* Implementations should assume that this will only be called once.
* IndexReader-independent implementations can just return {@code this}
*
* Queries that use DoubleValuesSource objects should call rewrite() during
* {@link Query#createWeight(IndexSearcher, boolean, float)} rather than during
* {@link Query#rewrite(IndexReader)} to avoid IndexReader reference leakage
*/
public abstract DoubleValuesSource rewrite(IndexSearcher reader) throws IOException;
/**
* Create a sort field based on the value of this producer
* @param reverse true if the sort should be decreasing
@ -125,9 +140,9 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
}
@Override
public boolean needsScores() {
return inner.needsScores();
}
public boolean needsScores() {
return inner.needsScores();
}
@Override
public boolean equals(Object o) {
@ -147,6 +162,11 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
return "long(" + inner.toString() + ")";
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return inner.rewrite(searcher).toLongValuesSource();
}
}
/**
@ -229,6 +249,11 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
public String toString() {
return "scores";
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) {
return this;
}
};
/**
@ -246,6 +271,12 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
this.value = value;
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) {
return this;
}
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
return new DoubleValues() {
@ -266,6 +297,7 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
return false;
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
@ -293,6 +325,7 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
public String toString() {
return "constant(" + value + ")";
}
}
/**
@ -372,17 +405,22 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) throws IOException {
DoubleValues values = getValues(ctx, null);
if (values.advanceExact(docId))
return Explanation.match((float)values.doubleValue(), this.toString());
return Explanation.match((float) values.doubleValue(), this.toString());
else
return Explanation.noMatch(this.toString());
}
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
}
private static class DoubleValuesSortField extends SortField {
final DoubleValuesSource producer;
public DoubleValuesSortField(DoubleValuesSource producer, boolean reverse) {
DoubleValuesSortField(DoubleValuesSource producer, boolean reverse) {
super(producer.toString(), new DoubleValuesComparatorSource(producer), reverse);
this.producer = producer;
}
@ -401,6 +439,10 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
return buffer.toString();
}
@Override
public SortField rewrite(IndexSearcher searcher) throws IOException {
return new DoubleValuesSortField(producer.rewrite(searcher), reverse);
}
}
private static class DoubleValuesHolder {
@ -410,7 +452,7 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
private static class DoubleValuesComparatorSource extends FieldComparatorSource {
private final DoubleValuesSource producer;
public DoubleValuesComparatorSource(DoubleValuesSource producer) {
DoubleValuesComparatorSource(DoubleValuesSource producer) {
this.producer = producer;
}

View File

@ -528,13 +528,14 @@ public class IndexSearcher {
+ after.doc + " limit=" + limit);
}
final int cappedNumHits = Math.min(numHits, limit);
final Sort rewrittenSort = sort.rewrite(this);
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {
@Override
public TopFieldCollector newCollector() throws IOException {
final boolean fillFields = true;
return TopFieldCollector.create(sort, cappedNumHits, after, fillFields, doDocScores, doMaxScore);
return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, fillFields, doDocScores, doMaxScore);
}
@Override
@ -544,7 +545,7 @@ public class IndexSearcher {
for (TopFieldCollector collector : collectors) {
topDocs[i++] = collector.topDocs();
}
return TopDocs.merge(sort, 0, cappedNumHits, topDocs, true);
return TopDocs.merge(rewrittenSort, 0, cappedNumHits, topDocs, true);
}
};

View File

@ -28,7 +28,8 @@ import org.apache.lucene.index.NumericDocValues;
* Base class for producing {@link LongValues}
*
* To obtain a {@link LongValues} object for a leaf reader, clients should
* call {@link #getValues(LeafReaderContext, DoubleValues)}.
* call {@link #rewrite(IndexSearcher)} against the top-level searcher, and
* then {@link #getValues(LeafReaderContext, DoubleValues)}.
*
* LongValuesSource objects for long and int-valued NumericDocValues fields can
* be obtained by calling {@link #fromLongField(String)} and {@link #fromIntField(String)}.
@ -61,6 +62,14 @@ public abstract class LongValuesSource implements SegmentCacheable {
@Override
public abstract String toString();
/**
* Return a LongValuesSource specialised for the given IndexSearcher
*
* Implementations should assume that this will only be called once.
* IndexSearcher-independent implementations can just return {@code this}
*/
public abstract LongValuesSource rewrite(IndexSearcher searcher) throws IOException;
/**
* Create a sort field based on the value of this producer
* @param reverse true if the sort should be decreasing
@ -69,6 +78,71 @@ public abstract class LongValuesSource implements SegmentCacheable {
return new LongValuesSortField(this, reverse);
}
/**
* Convert to a DoubleValuesSource by casting long values to doubles
*/
public DoubleValuesSource toDoubleValuesSource() {
return new DoubleLongValuesSource(this);
}
private static class DoubleLongValuesSource extends DoubleValuesSource {
private final LongValuesSource inner;
private DoubleLongValuesSource(LongValuesSource inner) {
this.inner = inner;
}
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
LongValues v = inner.getValues(ctx, scores);
return new DoubleValues() {
@Override
public double doubleValue() throws IOException {
return (double) v.longValue();
}
@Override
public boolean advanceExact(int doc) throws IOException {
return v.advanceExact(doc);
}
};
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return inner.rewrite(searcher).toDoubleValuesSource();
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return inner.isCacheable(ctx);
}
@Override
public String toString() {
return "double(" + inner.toString() + ")";
}
@Override
public boolean needsScores() {
return inner.needsScores();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DoubleLongValuesSource that = (DoubleLongValuesSource) o;
return Objects.equals(inner, that.inner);
}
@Override
public int hashCode() {
return Objects.hash(inner);
}
}
/**
* Creates a LongValuesSource that wraps a long-valued field
*/
@ -141,6 +215,11 @@ public abstract class LongValuesSource implements SegmentCacheable {
return "constant(" + value + ")";
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
}
private static class FieldValuesSource extends LongValuesSource {
@ -184,6 +263,11 @@ public abstract class LongValuesSource implements SegmentCacheable {
public boolean needsScores() {
return false;
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
}
private static class LongValuesSortField extends SortField {
@ -209,6 +293,10 @@ public abstract class LongValuesSource implements SegmentCacheable {
return buffer.toString();
}
@Override
public SortField rewrite(IndexSearcher searcher) throws IOException {
return new LongValuesSortField(producer.rewrite(searcher), reverse);
}
}
private static class LongValuesHolder {

View File

@ -196,6 +196,7 @@ public class TestDoubleValuesSource extends LuceneTestCase {
}
private void testExplanations(Query q, DoubleValuesSource vs) throws IOException {
DoubleValuesSource rewritten = vs.rewrite(searcher);
searcher.search(q, new SimpleCollector() {
DoubleValues v;
@ -208,23 +209,23 @@ public class TestDoubleValuesSource extends LuceneTestCase {
@Override
public void setScorer(Scorer scorer) throws IOException {
this.v = vs.getValues(this.ctx, DoubleValuesSource.fromScorer(scorer));
this.v = rewritten.getValues(this.ctx, DoubleValuesSource.fromScorer(scorer));
}
@Override
public void collect(int doc) throws IOException {
Explanation scoreExpl = searcher.explain(q, ctx.docBase + doc);
if (this.v.advanceExact(doc)) {
CheckHits.verifyExplanation("", doc, (float) v.doubleValue(), true, vs.explain(ctx, doc, scoreExpl));
CheckHits.verifyExplanation("", doc, (float) v.doubleValue(), true, rewritten.explain(ctx, doc, scoreExpl));
}
else {
assertFalse(vs.explain(ctx, doc, scoreExpl).isMatch());
assertFalse(rewritten.explain(ctx, doc, scoreExpl).isMatch());
}
}
@Override
public boolean needsScores() {
return vs.needsScores();
return rewritten.needsScores();
}
});
}

View File

@ -106,6 +106,43 @@ public class TestSortRescorer extends LuceneTestCase {
// sort fields:
assertTrue(expl, expl.contains("= sort field <int: \"popularity\">! value=20"));
// Confirm the explanation includes first pass details:
assertTrue(expl.contains("= first pass score"));
assertTrue(expl.contains("body:contents in"));
}
public void testDoubleValuesSourceSort() throws Exception {
// create a sort field and sort by it (reverse order)
Query query = new TermQuery(new Term("body", "contents"));
IndexReader r = searcher.getIndexReader();
// Just first pass query
TopDocs hits = searcher.search(query, 10);
assertEquals(3, hits.totalHits);
assertEquals("3", r.document(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", r.document(hits.scoreDocs[1].doc).get("id"));
assertEquals("2", r.document(hits.scoreDocs[2].doc).get("id"));
DoubleValuesSource source = DoubleValuesSource.fromLongField("popularity");
// Now, rescore:
Sort sort = new Sort(source.getSortField(true));
Rescorer rescorer = new SortRescorer(sort);
hits = rescorer.rescore(searcher, hits, 10);
assertEquals(3, hits.totalHits);
assertEquals("2", r.document(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", r.document(hits.scoreDocs[1].doc).get("id"));
assertEquals("3", r.document(hits.scoreDocs[2].doc).get("id"));
String expl = rescorer.explain(searcher,
searcher.explain(query, hits.scoreDocs[0].doc),
hits.scoreDocs[0].doc).toString();
// Confirm the explanation breaks out the individual
// sort fields:
assertTrue(expl, expl.contains("= sort field <double(popularity)>! value=20.0"));
// Confirm the explanation includes first pass details:
assertTrue(expl.contains("= first pass score"));
assertTrue(expl.contains("body:contents in"));

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
/**
* A {@link DoubleValuesSource} which evaluates a {@link Expression} given the context of an {@link Bindings}.
@ -52,6 +53,12 @@ final class ExpressionValueSource extends DoubleValuesSource {
this.needsScores = needsScores;
}
ExpressionValueSource(DoubleValuesSource[] variables, Expression expression, boolean needsScores) {
this.variables = variables;
this.expression = expression;
this.needsScores = needsScores;
}
@Override
public DoubleValues getValues(LeafReaderContext readerContext, DoubleValues scores) throws IOException {
Map<String, DoubleValues> valuesCache = new HashMap<>();
@ -159,6 +166,20 @@ final class ExpressionValueSource extends DoubleValuesSource {
for (DoubleValuesSource var : variables) {
explanations[i++] = var.explain(ctx, docId, scoreExplanation);
}
return Explanation.match((float)dv.doubleValue(), expression.sourceText + ", computed from:", explanations);
return Explanation.match((float) dv.doubleValue(), expression.sourceText + ", computed from:", explanations);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
boolean changed = false;
DoubleValuesSource[] rewritten = new DoubleValuesSource[variables.length];
for (int i = 0; i < variables.length; i++) {
rewritten[i] = variables[i].rewrite(searcher);
changed |= (rewritten[i] == variables[i]);
}
if (changed) {
return new ExpressionValueSource(variables, expression, needsScores);
}
return this;
}
}

View File

@ -766,6 +766,11 @@ public class TestRangeFacetCounts extends FacetTestCase {
return Explanation.match(docId + 1, "");
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public int hashCode() {
return 0;

View File

@ -62,10 +62,11 @@ public final class FunctionMatchQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
DoubleValuesSource vs = source.rewrite(searcher);
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DoubleValues values = source.getValues(context, null);
DoubleValues values = vs.getValues(context, null);
DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
@Override

View File

@ -60,7 +60,7 @@ public final class FunctionScoreQuery extends Query {
Weight inner = in.createWeight(searcher, needsScores && source.needsScores(), 1f);
if (needsScores == false)
return inner;
return new FunctionScoreWeight(this, inner, source, boost);
return new FunctionScoreWeight(this, inner, source.rewrite(searcher), boost);
}
@Override
@ -115,8 +115,6 @@ public final class FunctionScoreQuery extends Query {
return Explanation.noMatch("No match");
Explanation scoreExplanation = inner.explain(context, doc);
Explanation expl = valueSource.explain(context, doc, scoreExplanation);
if (boost == 1f)
return expl;
return Explanation.match(expl.getValue() * boost, "product of:",
Explanation.match(boost, "boost"), expl);
}
@ -140,7 +138,7 @@ public final class FunctionScoreQuery extends Query {
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return valueSource.isCacheable(ctx);
return inner.isCacheable(ctx) && valueSource.isCacheable(ctx);
}
}

View File

@ -0,0 +1,415 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queries.function;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
/**
* Class exposing static helper methods for generating DoubleValuesSource instances
* over some IndexReader statistics
*/
public final class IndexReaderFunctions {
// non-instantiable class
private IndexReaderFunctions() {}
/**
* Creates a constant value source returning the docFreq of a given term
*
* @see IndexReader#docFreq(Term)
*/
public static DoubleValuesSource docFreq(Term term) {
return new IndexReaderDoubleValuesSource(r -> (double) r.docFreq(term), "docFreq(" + term.toString() + ")");
}
/**
* Creates a constant value source returning the index's maxDoc
*
* @see IndexReader#maxDoc()
*/
public static DoubleValuesSource maxDoc() {
return new IndexReaderDoubleValuesSource(IndexReader::maxDoc, "maxDoc()");
}
/**
* Creates a constant value source returning the index's numDocs
*
* @see IndexReader#numDocs()
*/
public static DoubleValuesSource numDocs() {
return new IndexReaderDoubleValuesSource(IndexReader::numDocs, "numDocs()");
}
/**
* Creates a constant value source returning the number of deleted docs in the index
*
* @see IndexReader#numDeletedDocs()
*/
public static DoubleValuesSource numDeletedDocs() {
return new IndexReaderDoubleValuesSource(IndexReader::numDeletedDocs, "numDeletedDocs()");
}
/**
* Creates a constant value source returning the sumTotalTermFreq for a field
*
* @see IndexReader#getSumTotalTermFreq(String)
*/
public static LongValuesSource sumTotalTermFreq(String field) {
return new SumTotalTermFreqValuesSource(field);
}
private static class SumTotalTermFreqValuesSource extends LongValuesSource {
private final String field;
private SumTotalTermFreqValuesSource(String field) {
this.field = field;
}
@Override
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
throw new UnsupportedOperationException("IndexReaderFunction must be rewritten before use");
}
@Override
public boolean needsScores() {
return false;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SumTotalTermFreqValuesSource that = (SumTotalTermFreqValuesSource) o;
return Objects.equals(field, that.field);
}
@Override
public int hashCode() {
return Objects.hash(field);
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return new NoCacheConstantLongValuesSource(searcher.getIndexReader().getSumTotalTermFreq(field), this);
}
@Override
public String toString() {
return "sumTotalTermFreq(" + field + ")";
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
}
private static class NoCacheConstantLongValuesSource extends LongValuesSource {
final long value;
final LongValuesSource parent;
private NoCacheConstantLongValuesSource(long value, LongValuesSource parent) {
this.value = value;
this.parent = parent;
}
@Override
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
return new LongValues() {
@Override
public long longValue() throws IOException {
return value;
}
@Override
public boolean advanceExact(int doc) throws IOException {
return true;
}
};
}
@Override
public boolean needsScores() {
return false;
}
@Override
public LongValuesSource rewrite(IndexSearcher reader) throws IOException {
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof NoCacheConstantLongValuesSource)) return false;
NoCacheConstantLongValuesSource that = (NoCacheConstantLongValuesSource) o;
return value == that.value &&
Objects.equals(parent, that.parent);
}
@Override
public int hashCode() {
return Objects.hash(value, parent);
}
@Override
public String toString() {
return parent.toString();
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
}
/**
* Creates a value source that returns the term freq of a given term for each document
*
* @see PostingsEnum#freq()
*/
public static DoubleValuesSource termFreq(Term term) {
return new TermFreqDoubleValuesSource(term);
}
private static class TermFreqDoubleValuesSource extends DoubleValuesSource {
private final Term term;
private TermFreqDoubleValuesSource(Term term) {
this.term = term;
}
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
Terms terms = ctx.reader().terms(term.field());
TermsEnum te = terms == null ? null : terms.iterator();
if (te == null || te.seekExact(term.bytes()) == false) {
return DoubleValues.EMPTY;
}
final PostingsEnum pe = te.postings(null);
assert pe != null;
return new DoubleValues() {
@Override
public double doubleValue() throws IOException {
return pe.freq();
}
@Override
public boolean advanceExact(int doc) throws IOException {
if (pe.docID() > doc)
return false;
return pe.docID() == doc || pe.advance(doc) == doc;
}
};
}
@Override
public boolean needsScores() {
return false;
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public String toString() {
return "termFreq(" + term.toString() + ")";
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
TermFreqDoubleValuesSource that = (TermFreqDoubleValuesSource) o;
return Objects.equals(term, that.term);
}
@Override
public int hashCode() {
return Objects.hash(term);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
}
/**
* Creates a constant value source returning the totalTermFreq for a given term
*
* @see IndexReader#totalTermFreq(Term)
*/
public static DoubleValuesSource totalTermFreq(Term term) {
return new IndexReaderDoubleValuesSource(r -> r.totalTermFreq(term), "totalTermFreq(" + term.toString() + ")");
}
/**
* Creates a constant value source returning the sumDocFreq for a given field
*
* @see IndexReader#getSumDocFreq(String)
*/
public static DoubleValuesSource sumDocFreq(String field) {
return new IndexReaderDoubleValuesSource(r -> r.getSumDocFreq(field), "sumDocFreq(" + field + ")");
}
/**
* Creates a constant value source returning the docCount for a given field
*
* @see IndexReader#getDocCount(String)
*/
public static DoubleValuesSource docCount(String field) {
return new IndexReaderDoubleValuesSource(r -> r.getDocCount(field), "docCount(" + field + ")");
}
@FunctionalInterface
private interface ReaderFunction {
double apply(IndexReader reader) throws IOException;
}
private static class IndexReaderDoubleValuesSource extends DoubleValuesSource {
private final ReaderFunction func;
private final String description;
private IndexReaderDoubleValuesSource(ReaderFunction func, String description) {
this.func = func;
this.description = description;
}
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
throw new UnsupportedOperationException("IndexReaderFunction must be rewritten before use");
}
@Override
public boolean needsScores() {
return false;
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return new NoCacheConstantDoubleValuesSource(func.apply(searcher.getIndexReader()), this);
}
@Override
public String toString() {
return description;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
IndexReaderDoubleValuesSource that = (IndexReaderDoubleValuesSource) o;
return Objects.equals(description, that.description) && Objects.equals(func, that.func);
}
@Override
public int hashCode() {
return Objects.hash(description, func);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
}
private static class NoCacheConstantDoubleValuesSource extends DoubleValuesSource {
final double value;
final DoubleValuesSource parent;
private NoCacheConstantDoubleValuesSource(double value, DoubleValuesSource parent) {
this.value = value;
this.parent = parent;
}
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
return new DoubleValues() {
@Override
public double doubleValue() throws IOException {
return value;
}
@Override
public boolean advanceExact(int doc) throws IOException {
return true;
}
};
}
@Override
public boolean needsScores() {
return false;
}
@Override
public DoubleValuesSource rewrite(IndexSearcher reader) throws IOException {
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof NoCacheConstantDoubleValuesSource)) return false;
NoCacheConstantDoubleValuesSource that = (NoCacheConstantDoubleValuesSource) o;
return Double.compare(that.value, value) == 0 &&
Objects.equals(parent, that.parent);
}
@Override
public int hashCode() {
return Objects.hash(value, parent);
}
@Override
public String toString() {
return parent.toString();
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
}
}

View File

@ -180,6 +180,11 @@ public abstract class ValueSource {
return in.toString();
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
}
/**
@ -192,6 +197,7 @@ public abstract class ValueSource {
private static class WrappedDoubleValuesSource extends DoubleValuesSource {
private final ValueSource in;
private IndexSearcher searcher;
private WrappedDoubleValuesSource(ValueSource in) {
this.in = in;
@ -202,6 +208,7 @@ public abstract class ValueSource {
Map context = new HashMap<>();
FakeScorer scorer = new FakeScorer();
context.put("scorer", scorer);
context.put("searcher", searcher);
FunctionValues fv = in.getValues(context, ctx);
return new DoubleValues() {
@ -239,10 +246,17 @@ public abstract class ValueSource {
FakeScorer scorer = new FakeScorer();
scorer.score = scoreExplanation.getValue();
context.put("scorer", scorer);
context.put("searcher", searcher);
FunctionValues fv = in.getValues(context, ctx);
return fv.explain(docId);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
this.searcher = searcher;
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
@ -279,7 +293,14 @@ public abstract class ValueSource {
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
Scorer scorer = (Scorer) context.get("scorer");
DoubleValues scores = scorer == null ? null : DoubleValuesSource.fromScorer(scorer);
DoubleValues inner = in.getValues(readerContext, scores);
IndexSearcher searcher = (IndexSearcher) context.get("searcher");
DoubleValues inner;
if (searcher != null)
inner = in.rewrite(searcher).getValues(readerContext, scores);
else
inner = in.getValues(readerContext, scores);
return new FunctionValues() {
@Override
public String toString(int doc) throws IOException {
@ -324,6 +345,7 @@ public abstract class ValueSource {
public String description() {
return in.toString();
}
}
//

View File

@ -67,7 +67,9 @@ public class TestFunctionScoreExplanations extends BaseExplanationTestCase {
Explanation e1 = searcher.explain(q, 0);
Explanation e = searcher.explain(csq, 0);
assertEquals(e, e1);
assertEquals(e.getValue(), e1.getValue(), 0.00001);
assertEquals(e.getDetails()[1], e1);
}
public void testSubExplanations() throws IOException {
@ -76,8 +78,9 @@ public class TestFunctionScoreExplanations extends BaseExplanationTestCase {
searcher.setSimilarity(new BM25Similarity());
Explanation expl = searcher.explain(query, 0);
assertEquals("constant(5.0)", expl.getDescription());
assertEquals(0, expl.getDetails().length);
Explanation subExpl = expl.getDetails()[1];
assertEquals("constant(5.0)", subExpl.getDescription());
assertEquals(0, subExpl.getDetails().length);
query = new BoostQuery(query, 2);
expl = searcher.explain(query, 0);

View File

@ -144,6 +144,11 @@ public class TestFunctionScoreQuery extends FunctionTestSetup {
return in.isCacheable(ctx);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return function(in.rewrite(searcher), function);
}
@Override
public int hashCode() {
return 0;
@ -189,6 +194,11 @@ public class TestFunctionScoreQuery extends FunctionTestSetup {
return in.isCacheable(ctx);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return scoringFunction(in.rewrite(searcher), function);
}
@Override
public int hashCode() {
return 0;

View File

@ -0,0 +1,201 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queries.function;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
public class TestIndexReaderFunctions extends LuceneTestCase {
static Directory dir;
static Analyzer analyzer;
static IndexReader reader;
static IndexSearcher searcher;
static final List<String[]> documents = Arrays.asList(
/* id, double, float, int, long, string, text, double MV (x3), int MV (x3)*/
new String[] { "0", "3.63", "5.2", "35", "4343", "test", "this is a test test test", "2.13", "3.69", "-0.11", "1", "7", "5"},
new String[] { "1", "5.65", "9.3", "54", "1954", "bar", "second test", "12.79", "123.456", "0.01", "12", "900", "-1" });
@BeforeClass
public static void beforeClass() throws Exception {
dir = newDirectory();
analyzer = new MockAnalyzer(random());
IndexWriterConfig iwConfig = newIndexWriterConfig(analyzer);
iwConfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
for (String [] doc : documents) {
Document document = new Document();
document.add(new StringField("id", doc[0], Field.Store.NO));
document.add(new SortedDocValuesField("id", new BytesRef(doc[0])));
document.add(new StringField("string", doc[5], Field.Store.NO));
document.add(new SortedDocValuesField("string", new BytesRef(doc[5])));
document.add(new TextField("text", doc[6], Field.Store.NO));
iw.addDocument(document);
}
reader = iw.getReader();
searcher = newSearcher(reader);
iw.close();
}
@AfterClass
public static void afterClass() throws Exception {
IOUtils.close(reader, dir, analyzer);
searcher = null;
reader = null;
dir = null;
analyzer = null;
}
public void testDocFreq() throws Exception {
DoubleValuesSource vs = IndexReaderFunctions.docFreq(new Term("text", "test"));
assertHits(vs, new float[] { 2f, 2f });
assertEquals("docFreq(text:test)", vs.toString());
assertCacheable(vs, false);
}
public void testMaxDoc() throws Exception {
DoubleValuesSource vs = IndexReaderFunctions.maxDoc();
assertHits(vs, new float[] { 2f, 2f });
assertEquals("maxDoc()", vs.toString());
assertCacheable(vs, false);
}
public void testNumDocs() throws Exception {
DoubleValuesSource vs = IndexReaderFunctions.numDocs();
assertHits(vs, new float[] { 2f, 2f });
assertEquals("numDocs()", vs.toString());
assertCacheable(vs, false);
}
public void testSumTotalTermFreq() throws Exception {
LongValuesSource vs = IndexReaderFunctions.sumTotalTermFreq("text");
assertHits(vs.toDoubleValuesSource(), new float[] { 8f, 8f });
assertEquals("sumTotalTermFreq(text)", vs.toString());
assertCacheable(vs, false);
}
public void testTermFreq() throws Exception {
assertHits(IndexReaderFunctions.termFreq(new Term("string", "bar")), new float[] { 0f, 1f });
assertHits(IndexReaderFunctions.termFreq(new Term("text", "test")), new float[] { 3f, 1f });
assertHits(IndexReaderFunctions.termFreq(new Term("bogus", "bogus")), new float[] { 0F, 0F });
assertEquals("termFreq(string:bar)", IndexReaderFunctions.termFreq(new Term("string", "bar")).toString());
assertCacheable(IndexReaderFunctions.termFreq(new Term("text", "test")), true);
}
public void testTotalTermFreq() throws Exception {
DoubleValuesSource vs = IndexReaderFunctions.totalTermFreq(new Term("text", "test"));
assertHits(vs, new float[] { 4f, 4f });
assertEquals("totalTermFreq(text:test)", vs.toString());
assertCacheable(vs, false);
}
public void testNumDeletedDocs() throws Exception {
DoubleValuesSource vs = IndexReaderFunctions.numDeletedDocs();
assertHits(vs, new float[] { 0, 0 });
assertEquals("numDeletedDocs()", vs.toString());
assertCacheable(vs, false);
}
public void testSumDocFreq() throws Exception {
DoubleValuesSource vs = IndexReaderFunctions.sumDocFreq("text");
assertHits(vs, new float[] { 6, 6 });
assertEquals("sumDocFreq(text)", vs.toString());
assertCacheable(vs, false);
}
public void testDocCount() throws Exception {
DoubleValuesSource vs = IndexReaderFunctions.docCount("text");
assertHits(vs, new float[] { 2, 2 });
assertEquals("docCount(text)", vs.toString());
assertCacheable(vs, false);
}
void assertCacheable(DoubleValuesSource vs, boolean expected) throws Exception {
Query q = new FunctionScoreQuery(new MatchAllDocsQuery(), vs);
Weight w = searcher.createNormalizedWeight(q, true);
LeafReaderContext ctx = reader.leaves().get(0);
assertEquals(expected, w.isCacheable(ctx));
}
void assertCacheable(LongValuesSource vs, boolean expected) throws Exception {
Query q = new FunctionScoreQuery(new MatchAllDocsQuery(), vs.toDoubleValuesSource());
Weight w = searcher.createNormalizedWeight(q, true);
LeafReaderContext ctx = reader.leaves().get(0);
assertEquals(expected, w.isCacheable(ctx));
}
void assertHits(DoubleValuesSource vs, float scores[]) throws Exception {
Query q = new FunctionScoreQuery(new MatchAllDocsQuery(), vs);
ScoreDoc expected[] = new ScoreDoc[scores.length];
int expectedDocs[] = new int[scores.length];
for (int i = 0; i < expected.length; i++) {
expectedDocs[i] = i;
expected[i] = new ScoreDoc(i, scores[i]);
}
TopDocs docs = searcher.search(q, documents.size(),
new Sort(new SortField("id", SortField.Type.STRING)), true, false);
CheckHits.checkHits(random(), q, "", searcher, expectedDocs);
CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs);
CheckHits.checkExplanations(q, "", searcher);
assertSort(vs, expected);
}
void assertSort(DoubleValuesSource vs, ScoreDoc expected[]) throws Exception {
boolean reversed = random().nextBoolean();
Arrays.sort(expected, (a, b) -> reversed ? (int) (b.score - a.score) : (int) (a.score - b.score));
int[] expectedDocs = new int[expected.length];
for (int i = 0; i < expected.length; i++) {
expectedDocs[i] = expected[i].doc;
}
TopDocs docs = searcher.search(new MatchAllDocsQuery(), expected.length,
new Sort(vs.getSortField(reversed)));
CheckHits.checkHitsQuery(new MatchAllDocsQuery(), expected, docs.scoreDocs, expectedDocs);
}
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.spatial.ShapeValues;
import org.apache.lucene.spatial.ShapeValuesSource;
import org.locationtech.spatial4j.shape.Rectangle;
@ -45,6 +46,11 @@ public abstract class BBoxSimilarityValueSource extends DoubleValuesSource {
this.bboxValueSource = bboxValueSource;
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public String toString() {
return getClass().getSimpleName()+"(" + bboxValueSource.toString() + "," + similarityDescription() + ")";

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
/**
* Caches the doubleVal of another value source in a HashMap
@ -88,6 +89,11 @@ public class CachingDoubleValueSource extends DoubleValuesSource {
return source.explain(ctx, docId, scoreExplanation);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return new CachingDoubleValueSource(source.rewrite(searcher));
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.spatial.ShapeValues;
import org.apache.lucene.spatial.ShapeValuesSource;
import org.locationtech.spatial4j.context.SpatialContext;
@ -85,6 +86,11 @@ public class DistanceToShapeValueSource extends DoubleValuesSource {
return shapeValueSource.isCacheable(ctx);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
/**
* Transforms a DoubleValuesSource using the formula v = k / (v + k)
@ -80,6 +81,11 @@ public class ReciprocalDoubleValuesSource extends DoubleValuesSource {
distToEdge + " / (v + " + distToEdge + "), computed from:", expl);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return new ReciprocalDoubleValuesSource(distToEdge, input.rewrite(searcher));
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.spatial.ShapeValues;
import org.apache.lucene.spatial.ShapeValuesSource;
import org.locationtech.spatial4j.context.SpatialContext;
@ -77,6 +78,11 @@ public class ShapeAreaValueSource extends DoubleValuesSource {
return shapeValueSource.isCacheable(ctx);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -22,6 +22,7 @@ import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceCalculator;
import org.locationtech.spatial4j.shape.Point;
@ -94,6 +95,11 @@ public class ShapeFieldCacheDistanceValueSource extends DoubleValuesSource {
return true;
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.locationtech.spatial4j.distance.DistanceCalculator;
import org.locationtech.spatial4j.shape.Point;
@ -97,6 +98,11 @@ public class DistanceValueSource extends DoubleValuesSource {
return DocValues.isCacheable(ctx, strategy.getFieldNameX(), strategy.getFieldNameY());
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -40,6 +40,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.spell.Dictionary;
@ -191,6 +192,11 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase {
public String toString() {
return null;
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
};
}

View File

@ -22,6 +22,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.locationtech.spatial4j.distance.DistanceCalculator;
import org.locationtech.spatial4j.shape.Point;
@ -93,6 +94,11 @@ public class DistanceValueSource extends DoubleValuesSource {
return DocValues.isCacheable(ctx, strategy.getFieldNameX(), strategy.getFieldNameY());
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
@ -253,6 +254,11 @@ public class LatLonPointSpatialField extends AbstractSpatialFieldType implements
return DocValues.isCacheable(ctx, fieldName);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return this;
}
@Override
public String toString() {
return "distSort(" + fieldName + ", " + queryPoint + ", mult:" + multiplier + ")";