LUCENE-8099: Deprecate CustomScoreQuery, BoostedQuery, BoostingQuery

This commit is contained in:
Alan Woodward 2017-12-14 12:09:51 +00:00
parent e441a99009
commit b01e6023e1
17 changed files with 194 additions and 125 deletions

View File

@ -65,6 +65,10 @@ API Changes
* LUCENE-8051: LevensteinDistance renamed to LevenshteinDistance.
(Pulak Ghosh via Adrien Grand)
* LUCENE-8099: Deprecate CustomScoreQuery, BoostedQuery and BoostingQuery.
Users should instead use FunctionScoreQuery, possibly combined with
a lucene expression (Alan Woodward)
New Features
* LUCENE-2899: Add new module analysis/opennlp, with analysis components

View File

@ -27,6 +27,9 @@ import org.apache.lucene.index.IndexReader;
* Boost values that are less than one will give less importance to this
* query compared to other ones while values that are greater than one will
* give more importance to the scores returned by this query.
*
* More complex boosts can be applied by using FunctionScoreQuery in the
* lucene-queries module
*/
public final class BoostQuery extends Query {

View File

@ -107,7 +107,7 @@ final class ExpressionValueSource extends DoubleValuesSource {
final int prime = 31;
int result = 1;
result = prime * result
+ ((expression == null) ? 0 : expression.hashCode());
+ ((expression == null) ? 0 : expression.sourceText.hashCode());
result = prime * result + (needsScores ? 1231 : 1237);
result = prime * result + Arrays.hashCode(variables);
return result;
@ -129,7 +129,7 @@ final class ExpressionValueSource extends DoubleValuesSource {
if (other.expression != null) {
return false;
}
} else if (!expression.equals(other.expression)) {
} else if (!expression.sourceText.equals(other.expression.sourceText)) {
return false;
}
if (needsScores != other.needsScores) {
@ -178,7 +178,7 @@ final class ExpressionValueSource extends DoubleValuesSource {
changed |= (rewritten[i] == variables[i]);
}
if (changed) {
return new ExpressionValueSource(variables, expression, needsScores);
return new ExpressionValueSource(rewritten, expression, needsScores);
}
return this;
}

View File

@ -40,6 +40,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
@ -211,8 +212,10 @@ public class WeightedSpanTermExtractor {
}
} else if (query instanceof MatchAllDocsQuery) {
//nothing
} else if (query instanceof CustomScoreQuery){
} else if (query instanceof CustomScoreQuery) {
extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
} else if (query instanceof FunctionScoreQuery) {
extract(((FunctionScoreQuery) query).getWrappedQuery(), boost, terms);
} else if (isQueryUnsupported(query.getClass())) {
// nothing
} else {

View File

@ -23,6 +23,7 @@ import java.util.List;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -80,8 +81,11 @@ class MultiTermHighlighting {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (query instanceof BoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((BoostQuery)query).getQuery(), fieldMatcher, lookInSpan,
list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (query instanceof FunctionScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((FunctionScoreQuery) query).getWrappedQuery(), fieldMatcher,
lookInSpan, preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));

View File

@ -28,6 +28,7 @@ import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
@ -143,7 +144,12 @@ public class FieldQuery {
} else if (sourceQuery instanceof CustomScoreQuery) {
final Query q = ((CustomScoreQuery) sourceQuery).getSubQuery();
if (q != null) {
flatten( q, reader, flatQueries, boost);
flatten(q, reader, flatQueries, boost);
}
} else if (sourceQuery instanceof FunctionScoreQuery) {
final Query q = ((FunctionScoreQuery)sourceQuery).getWrappedQuery();
if (q != null) {
flatten(q, reader, flatQueries, boost);
}
} else if (sourceQuery instanceof ToParentBlockJoinQuery) {
Query childQuery = ((ToParentBlockJoinQuery) sourceQuery).getChildQuery();

View File

@ -59,10 +59,12 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
@ -166,6 +168,28 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testFunctionScoreQuery() throws Exception {
TermQuery termQuery = new TermQuery(new Term(FIELD_NAME, "very"));
FunctionScoreQuery query = new FunctionScoreQuery(termQuery, DoubleValuesSource.constant(1));
searcher = newSearcher(reader);
TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
assertEquals(2, hits.totalHits);
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(scorer);
final int docId0 = hits.scoreDocs[0].doc;
Document doc = searcher.doc(docId0);
String storedField = doc.get(FIELD_NAME);
TokenStream stream = getAnyTokenStream(FIELD_NAME, docId0);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
highlighter.setTextFragmenter(fragmenter);
String fragment = highlighter.getBestFragment(stream, storedField);
assertEquals("Hello this is a piece of text that is <B>very</B> long and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
}
public void testQueryScorerHits() throws Exception {
PhraseQuery phraseQuery = new PhraseQuery(FIELD_NAME, "very", "long");

View File

@ -39,10 +39,12 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
@ -119,6 +121,36 @@ public class FastVectorHighlighterTest extends LuceneTestCase {
writer.close();
dir.close();
}
public void testFunctionScoreQueryHighlight() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType type = new FieldType(TextField.TYPE_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
doc.add(field);
writer.addDocument(doc);
FastVectorHighlighter highlighter = new FastVectorHighlighter();
IndexReader reader = DirectoryReader.open(writer);
int docId = 0;
FieldQuery fieldQuery = highlighter.getFieldQuery( new FunctionScoreQuery(new TermQuery(new Term("field", "foo")), DoubleValuesSource.constant(1)), reader );
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
// highlighted results are centered
assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
reader.close();
writer.close();
dir.close();
}
public void testPhraseHighlightLongTextTest() throws IOException {
Directory dir = newDirectory();

View File

@ -21,4 +21,12 @@
</description>
<import file="../module-build.xml"/>
<path id="test.classpath">
<pathelement path="${expressions.jar}"/>
<fileset dir="../expressions/lib"/>
<path refid="test.base.classpath"/>
</path>
<target name="compile-core" depends="jar-expressions,common.compile-core" />
</project>

View File

@ -48,7 +48,19 @@ import org.apache.lucene.search.Weight;
* This code was originally made available here:
* <a href="http://marc.theaimsgroup.com/?l=lucene-user&amp;m=108058407130459&amp;w=2">http://marc.theaimsgroup.com/?l=lucene-user&amp;m=108058407130459&amp;w=2</a>
* and is documented here: http://wiki.apache.org/lucene-java/CommunityContributions
*
* Clients should instead use FunctionScoreQuery and the lucene-expressions library:
* <pre>
* SimpleBindings bindings = new SimpleBindings();
* bindings.add("score", DoubleValuesSource.SCORES);
* bindings.add("context", DoubleValuesSource.fromQuery(new ConstantScoreQuery(myContextQuery, boost)));
* Expression expr = JavascriptCompiler.compile("score * context");
* FunctionScoreQuery q = new FunctionScoreQuery(inputQuery, expr.getDoubleValuesSource(bindings));
* </pre>
*
* @deprecated Use {@link org.apache.lucene.queries.function.FunctionScoreQuery}
*/
@Deprecated
public class BoostingQuery extends Query {
private final float boost; // the amount to boost by
private final Query match; // query to match

View File

@ -43,7 +43,19 @@ import org.apache.lucene.search.Weight;
* Subclasses can modify the computation by overriding {@link #getCustomScoreProvider}.
*
* @lucene.experimental
*
* Clients should instead use FunctionScoreQuery and the lucene-expressions library
* <pre>
* SimpleBindings bindings = new SimpleBindings();
* bindings.add("score", DoubleValuesSource.SCORES);
* bindings.add("boost", DoubleValuesSource.fromIntField("myboostfield"));
* Expression expr = JavascriptCompiler.compile("score * boost");
* FunctionScoreQuery q = new FunctionScoreQuery(inputQuery, expr.getDoubleValuesSource(bindings));
* </pre>
*
* @deprecated use {@link org.apache.lucene.queries.function.FunctionScoreQuery}
*/
@Deprecated
public class CustomScoreQuery extends Query implements Cloneable {
private Query subQuery;

View File

@ -35,9 +35,20 @@ import org.apache.lucene.search.Weight;
/**
* Query that is boosted by a ValueSource
*
* Instead of using this query, clients can use a {@link FunctionScoreQuery} and the
* lucene-expressions library:
* <pre>
* SimpleBindings bindings = new SimpleBindings();
* bindings.add("score", DoubleValuesSource.SCORES);
* bindings.add("boost", DoubleValuesSource.fromIntField("myboostfield"));
* Expression expr = JavascriptCompiler.compile("score * boost");
* FunctionScoreQuery q = new FunctionScoreQuery(inputQuery, expr.getDoubleValuesSource(bindings));
* </pre>
*
* @deprecated Use {@link FunctionScoreQuery}
*/
// TODO: BoostedQuery and BoostingQuery in the same module?
// something has to give
@Deprecated
public final class BoostedQuery extends Query {
private final Query q;
private final ValueSource boostVal; // optional, can be null

View File

@ -56,6 +56,13 @@ public final class FunctionScoreQuery extends Query {
this.source = source;
}
/**
* @return the wrapped Query
*/
public Query getWrappedQuery() {
return in;
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
Weight inner = in.createWeight(searcher, scoreMode.needsScores() && source.needsScores() ? scoreMode : ScoreMode.COMPLETE_NO_SCORES, 1f);

View File

@ -18,20 +18,19 @@
package org.apache.lucene.queries.function;
import java.io.IOException;
import java.util.function.DoubleUnaryOperator;
import java.util.function.ToDoubleBiFunction;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
@ -81,8 +80,10 @@ public class TestFunctionScoreQuery extends FunctionTestSetup {
// CustomScoreQuery and BoostedQuery equivalent
public void testScoreModifyingSource() throws Exception {
DoubleValuesSource iii = DoubleValuesSource.fromIntField("iii");
DoubleValuesSource score = scoringFunction(iii, (v, s) -> v * s);
SimpleBindings bindings = new SimpleBindings();
bindings.add("score", DoubleValuesSource.SCORES);
bindings.add("iii", DoubleValuesSource.fromIntField("iii"));
Expression expr = JavascriptCompiler.compile("score * iii");
BooleanQuery bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term(TEXT_FIELD, "first")), BooleanClause.Occur.SHOULD)
@ -90,7 +91,7 @@ public class TestFunctionScoreQuery extends FunctionTestSetup {
.build();
TopDocs plain = searcher.search(bq, 1);
FunctionScoreQuery fq = new FunctionScoreQuery(bq, score);
FunctionScoreQuery fq = new FunctionScoreQuery(bq, expr.getDoubleValuesSource(bindings));
QueryUtils.check(random(), fq, searcher, rarely());
@ -104,12 +105,38 @@ public class TestFunctionScoreQuery extends FunctionTestSetup {
}
// BoostingQuery equivalent
public void testCombiningMultipleQueryScores() throws Exception {
SimpleBindings bindings = new SimpleBindings();
bindings.add("score", DoubleValuesSource.SCORES);
bindings.add("testquery", DoubleValuesSource.fromQuery(new TermQuery(new Term(TEXT_FIELD, "rechecking"))));
Expression expr = JavascriptCompiler.compile("score + (testquery * 100)");
TermQuery q = new TermQuery(new Term(TEXT_FIELD, "text"));
TopDocs plain = searcher.search(q, 1);
FunctionScoreQuery fq = new FunctionScoreQuery(q, expr.getDoubleValuesSource(bindings));
QueryUtils.check(random(), fq, searcher, rarely());
int[] expectedDocs = new int[]{ 6, 1, 0, 2, 8 };
TopDocs docs = searcher.search(fq, 5);
assertEquals(plain.totalHits, docs.totalHits);
for (int i = 0; i < expectedDocs.length; i++) {
assertEquals(expectedDocs[i], docs.scoreDocs[i].doc);
}
}
// check boosts with non-distributive score source
public void testBoostsAreAppliedLast() throws Exception {
DoubleValuesSource scores = function(DoubleValuesSource.SCORES, v -> Math.log(v + 4));
SimpleBindings bindings = new SimpleBindings();
bindings.add("score", DoubleValuesSource.SCORES);
Expression expr = JavascriptCompiler.compile("ln(score + 4)");
Query q1 = new FunctionScoreQuery(new TermQuery(new Term(TEXT_FIELD, "text")), scores);
Query q1 = new FunctionScoreQuery(new TermQuery(new Term(TEXT_FIELD, "text")), expr.getDoubleValuesSource(bindings));
TopDocs plain = searcher.search(q1, 5);
Query boosted = new BoostQuery(q1, 2);
@ -122,106 +149,6 @@ public class TestFunctionScoreQuery extends FunctionTestSetup {
}
public static DoubleValuesSource function(DoubleValuesSource in, DoubleUnaryOperator function) {
return new DoubleValuesSource() {
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
DoubleValues v = in.getValues(ctx, scores);
return new DoubleValues() {
@Override
public double doubleValue() throws IOException {
return function.applyAsDouble(v.doubleValue());
}
@Override
public boolean advanceExact(int doc) throws IOException {
return v.advanceExact(doc);
}
};
}
@Override
public boolean needsScores() {
return in.needsScores();
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return in.isCacheable(ctx);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return function(in.rewrite(searcher), function);
}
@Override
public int hashCode() {
return 0;
}
@Override
public boolean equals(Object obj) {
return false;
}
@Override
public String toString() {
return "fn";
}
};
}
private static DoubleValuesSource scoringFunction(DoubleValuesSource in, ToDoubleBiFunction<Double, Double> function) {
return new DoubleValuesSource() {
@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
DoubleValues v = in.getValues(ctx, scores);
return new DoubleValues() {
@Override
public double doubleValue() throws IOException {
return function.applyAsDouble(v.doubleValue(), scores.doubleValue());
}
@Override
public boolean advanceExact(int doc) throws IOException {
return v.advanceExact(doc);
}
};
}
@Override
public boolean needsScores() {
return true;
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return in.isCacheable(ctx);
}
@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) throws IOException {
return scoringFunction(in.rewrite(searcher), function);
}
@Override
public int hashCode() {
return 0;
}
@Override
public boolean equals(Object obj) {
return false;
}
@Override
public String toString() {
return "fn";
}
};
}
public void testTruncateNegativeScores() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());

View File

@ -16,11 +16,18 @@
*/
package org.apache.solr.search;
import org.apache.lucene.queries.function.BoostedQuery;
import java.text.ParseException;
import org.apache.lucene.expressions.Expression;
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
@ -60,7 +67,7 @@ public class BoostQParserPlugin extends QParserPlugin {
} else {
vs = new QueryValueSource(bq, 0.0f);
}
return new BoostedQuery(q, vs);
return boostQuery(q, vs);
}
@ -84,4 +91,16 @@ public class BoostQParserPlugin extends QParserPlugin {
};
}
public static Query boostQuery(Query input, ValueSource vs) {
try {
SimpleBindings bindings = new SimpleBindings();
bindings.add("score", DoubleValuesSource.SCORES);
bindings.add("vs", vs.asDoubleValuesSource());
Expression expr = JavascriptCompiler.compile("score * vs");
return new FunctionScoreQuery(input, expr.getDoubleValuesSource(bindings));
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); // should never happen!
}
}
}

View File

@ -33,7 +33,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.BoostedQuery;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.ProductFloatFunction;
@ -197,9 +196,9 @@ public class ExtendedDismaxQParser extends QParser {
List<ValueSource> boosts = getMultiplicativeBoosts();
if (boosts.size()>1) {
ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()]));
topQuery = new BoostedQuery(topQuery, prod);
topQuery = BoostQParserPlugin.boostQuery(topQuery, prod);
} else if (boosts.size() == 1) {
topQuery = new BoostedQuery(topQuery, boosts.get(0));
topQuery = BoostQParserPlugin.boostQuery(topQuery, boosts.get(0));
}
return topQuery;

View File

@ -27,7 +27,6 @@ import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.BoostedQuery;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
@ -326,8 +325,7 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
Query q = fp.parseNestedQuery();
ValueSource vs = fp.parseValueSource();
BoostedQuery bq = new BoostedQuery(q, vs);
return new QueryValueSource(bq, 0.0f);
return new QueryValueSource(BoostQParserPlugin.boostQuery(q, vs), 0.0f);
}
});
addParser("joindf", new ValueSourceParser() {