SOLR-14364: LTR SolrFeature fq improvements

Mostly general code improvements, though it should support postFilters now
Add QueryUtils.combineQueryAndFilter
This commit is contained in:
David Smiley 2020-03-26 00:29:46 -04:00
parent d7494699ed
commit 7b3980c080
7 changed files with 95 additions and 173 deletions

View File

@ -77,6 +77,8 @@ Improvements
* SOLR-14307: User defined "<cache/>" entries in solrconfig.xml now support enabled="true|false" just like
core searcher caches. (hossman)
* SOLR-14364: LTR's SolrFeature "fq" now supports PostFilters (e.g. collapse). (David Smiley)
Optimizations
---------------------
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)

View File

@ -23,22 +23,23 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
/**
* This feature allows you to reuse any Solr query as a feature. The value
* of the feature will be the score of the given query for the current document.
@ -62,6 +63,8 @@ public class SolrFeature extends Feature {
private String q;
private List<String> fq;
// The setters will be invoked via reflection from the passed in params
public String getDf() {
return df;
}
@ -109,7 +112,7 @@ public class SolrFeature extends Feature {
public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores,
SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi)
throws IOException {
return new SolrFeatureWeight(searcher, request, originalQuery, efi);
return new SolrFeatureWeight((SolrIndexSearcher) searcher, request, originalQuery, efi);
}
@Override
@ -120,67 +123,68 @@ public class SolrFeature extends Feature {
": Q or FQ must be provided");
}
}
/**
* Weight for a SolrFeature
**/
public class SolrFeatureWeight extends FeatureWeight {
final private Weight solrQueryWeight;
final private Query query;
final private List<Query> queryAndFilters;
private final Weight solrQueryWeight;
public SolrFeatureWeight(IndexSearcher searcher,
public SolrFeatureWeight(SolrIndexSearcher searcher,
SolrQueryRequest request, Query originalQuery, Map<String, String[]> efi) throws IOException {
super(SolrFeature.this, searcher, request, originalQuery, efi);
try {
String solrQuery = q;
final List<String> fqs = fq;
if ((solrQuery == null) || solrQuery.isEmpty()) {
solrQuery = "*:*";
}
solrQuery = macroExpander.expand(solrQuery);
if (solrQuery == null) {
throw new FeatureException(this.getClass().getSimpleName()+" requires efi parameter that was not passed in request.");
}
final SolrQueryRequest req = makeRequest(request.getCore(), solrQuery,
fqs, df);
final SolrQueryRequest req = makeRequest(request.getCore(), q, fq, df);
if (req == null) {
throw new IOException("ERROR: No parameters provided");
}
// Build the filter queries
queryAndFilters = new ArrayList<Query>(); // If there are no fqs we just want an empty list
if (fqs != null) {
for (String fq : fqs) {
if ((fq != null) && (fq.trim().length() != 0)) {
fq = macroExpander.expand(fq);
if (fq == null) {
// Build the scoring query
Query scoreQuery;
String qStr = q;
if (qStr == null || qStr.isEmpty()) {
scoreQuery = null; // ultimately behaves like MatchAllDocsQuery
} else {
qStr = macroExpander.expand(qStr);
if (qStr == null) {
throw new FeatureException(this.getClass().getSimpleName() + " requires efi parameter that was not passed in request.");
}
final QParser fqp = QParser.getParser(fq, req);
final Query filterQuery = fqp.getQuery();
scoreQuery = QParser.getParser(qStr, req).getQuery();
// note: QParser can return a null Query sometimes, such as if the query is a stopword or just symbols
if (scoreQuery == null) {
scoreQuery = new MatchNoDocsQuery(); // debatable; all or none?
}
}
// Build the filter queries
Query filterDocSetQuery = null;
if (fq != null) {
List<Query> filterQueries = new ArrayList<>(); // If there are no fqs we just want an empty list
for (String fqStr : fq) {
if (fqStr != null) {
fqStr = macroExpander.expand(fqStr);
if (fqStr == null) {
throw new FeatureException(this.getClass().getSimpleName() + " requires efi parameter that was not passed in request.");
}
final Query filterQuery = QParser.getParser(fqStr, req).getQuery();
if (filterQuery != null) {
queryAndFilters.add(filterQuery);
}
filterQueries.add(filterQuery);
}
}
}
final QParser parser = QParser.getParser(solrQuery, req);
query = parser.parse();
if (filterQueries.isEmpty() == false) { // TODO optimize getDocSet to make this check unnecessary SOLR-14376
DocSet filtersDocSet = searcher.getDocSet(filterQueries); // execute
if (filtersDocSet != searcher.getLiveDocSet()) {
filterDocSetQuery = filtersDocSet.getTopFilter();
}
}
}
Query query = QueryUtils.combineQueryAndFilter(scoreQuery, filterDocSetQuery);
// Query can be null if there was no input to parse, for instance if you
// make a phrase query with "to be", and the analyzer removes all the
// words
// leaving nothing for the phrase query to parse.
if (query != null) {
queryAndFilters.add(query);
solrQueryWeight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE, 1);
} else {
solrQueryWeight = null;
}
} catch (final SyntaxError e) {
throw new FeatureException("Failed to parse feature query.", e);
}
@ -209,67 +213,26 @@ public class SolrFeature extends Feature {
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
Scorer solrScorer = null;
if (solrQueryWeight != null) {
solrScorer = solrQueryWeight.scorer(context);
}
final DocIdSetIterator idItr = getDocIdSetIteratorFromQueries(
queryAndFilters, context);
if (idItr != null) {
return solrScorer == null ? new ValueFeatureScorer(this, 1f, idItr)
: new SolrFeatureScorer(this, solrScorer,
new SolrFeatureScorerIterator(idItr, solrScorer.iterator()));
} else {
Scorer solrScorer = solrQueryWeight.scorer(context);
if (solrScorer == null) {
return null;
}
}
/**
* Given a list of Solr filters/queries, return a doc iterator that
* traverses over the documents that matched all the criteria of the
* queries.
*
* @param queries
* Filtering criteria to match documents against
* @param context
* Index reader
* @return DocIdSetIterator to traverse documents that matched all filter
* criteria
*/
private DocIdSetIterator getDocIdSetIteratorFromQueries(List<Query> queries,
LeafReaderContext context) throws IOException {
final SolrIndexSearcher.ProcessedFilter pf = ((SolrIndexSearcher) searcher)
.getProcessedFilter(null, queries);
final Bits liveDocs = context.reader().getLiveDocs();
DocIdSetIterator idIter = null;
if (pf.filter != null) {
final DocIdSet idSet = pf.filter.getDocIdSet(context, liveDocs);
if (idSet != null) {
idIter = idSet.iterator();
}
}
return idIter;
return new SolrFeatureScorer(this, solrScorer);
}
/**
* Scorer for a SolrFeature
**/
public class SolrFeatureScorer extends FeatureScorer {
final private Scorer solrScorer;
*/
public class SolrFeatureScorer extends FilterFeatureScorer {
public SolrFeatureScorer(FeatureWeight weight, Scorer solrScorer,
SolrFeatureScorerIterator itr) {
super(weight, itr);
this.solrScorer = solrScorer;
public SolrFeatureScorer(FeatureWeight weight, Scorer solrScorer) {
super(weight, solrScorer);
}
@Override
public float score() throws IOException {
try {
return solrScorer.score();
return in.score();
} catch (UnsupportedOperationException e) {
throw new FeatureException(
e.toString() + ": " +
@ -278,54 +241,6 @@ public class SolrFeature extends Feature {
}
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
/**
* An iterator that allows to iterate only on the documents for which a feature has
* a value.
**/
public class SolrFeatureScorerIterator extends DocIdSetIterator {
final private DocIdSetIterator filterIterator;
final private DocIdSetIterator scorerFilter;
SolrFeatureScorerIterator(DocIdSetIterator filterIterator,
DocIdSetIterator scorerFilter) {
this.filterIterator = filterIterator;
this.scorerFilter = scorerFilter;
}
@Override
public int docID() {
return filterIterator.docID();
}
@Override
public int nextDoc() throws IOException {
int docID = filterIterator.nextDoc();
scorerFilter.advance(docID);
return docID;
}
@Override
public int advance(int target) throws IOException {
// We use iterator to catch the scorer up since
// that checks if the target id is in the query + all the filters
int docID = filterIterator.advance(target);
scorerFilter.advance(docID);
return docID;
}
@Override
public long cost() {
return filterIterator.cost() + scorerFilter.cost();
}
}
}
}

View File

@ -39,8 +39,6 @@ import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
@ -81,6 +79,7 @@ import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpecParsing;
@ -416,12 +415,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
collector = groupExpandCollector;
}
if (pfilter.filter != null) {
query = new BooleanQuery.Builder()
.add(query, Occur.MUST)
.add(pfilter.filter, Occur.FILTER)
.build();
}
query = QueryUtils.combineQueryAndFilter(query, pfilter.filter);
searcher.search(query, collector);
ReturnFields returnFields = rb.rsp.getReturnFields();

View File

@ -33,8 +33,6 @@ import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingCollector;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.MultiCollector;
@ -440,13 +438,7 @@ public class Grouping {
collector = timeLimitingCollector;
}
try {
Query q = query;
if (luceneFilter != null) {
q = new BooleanQuery.Builder()
.add(q, Occur.MUST)
.add(luceneFilter, Occur.FILTER)
.build();
}
Query q = QueryUtils.combineQueryAndFilter(query, luceneFilter);
searcher.search(q, collector);
} catch (TimeLimitingCollector.TimeExceededException | ExitableDirectoryReader.ExitingReaderException x) {
log.warn( "Query: " + query + "; " + x.getMessage() );

View File

@ -16,17 +16,18 @@
*/
package org.apache.solr.search;
import java.util.Collection;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import java.util.Collection;
/**
*
*/
@ -141,4 +142,30 @@ public class QueryUtils {
}
return bq;
}
/**
* Combines a scoring query with a non-scoring (filter) query.
* If both parameters are null then return a {@link MatchAllDocsQuery}.
* If only {@code scoreQuery} is present then return it.
* If only {@code filterQuery} is present then return it wrapped with constant scoring.
* If neither are null then we combine with a BooleanQuery.
*/
public static Query combineQueryAndFilter(Query scoreQuery, Query filterQuery) {
if (scoreQuery == null) {
if (filterQuery == null) {
return new MatchAllDocsQuery(); // default if nothing -- match everything
} else {
return new ConstantScoreQuery(filterQuery);
}
} else {
if (filterQuery == null) {
return scoreQuery;
} else {
return new BooleanQuery.Builder()
.add(scoreQuery, Occur.MUST)
.add(filterQuery, Occur.FILTER)
.build();
}
}
}
}

View File

@ -1555,9 +1555,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
Query query = QueryUtils.makeQueryable(cmd.getQuery());
ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
if (pf.filter != null) {
query = new BooleanQuery.Builder().add(query, Occur.MUST).add(pf.filter, Occur.FILTER).build();
}
query = QueryUtils.combineQueryAndFilter(query, pf.filter);
// handle zero case...
if (lastDocRequested <= 0) {

View File

@ -24,8 +24,6 @@ import java.util.List;
import org.apache.lucene.index.ExitableDirectoryReader;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
@ -228,12 +226,8 @@ public class CommandHandler {
collector = MultiCollector.wrap(collector, hitCountCollector);
}
if (filter.filter != null) {
query = new BooleanQuery.Builder()
.add(query, Occur.MUST)
.add(filter.filter, Occur.FILTER)
.build();
}
query = QueryUtils.combineQueryAndFilter(query, filter.filter);
if (filter.postFilter != null) {
filter.postFilter.setLastDelegate(collector);
collector = filter.postFilter;