SOLR-2429: ability not not cache filters and post filtering

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1140252 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2011-06-27 17:52:02 +00:00
parent 615375d45a
commit e219ad401c
24 changed files with 1297 additions and 197 deletions

View File

@ -257,6 +257,15 @@ New Features
* LUCENE-3234: add a new parameter hl.phraseLimit for FastVectorHighlighter speed up. * LUCENE-3234: add a new parameter hl.phraseLimit for FastVectorHighlighter speed up.
(Mike Sokolov via koji) (Mike Sokolov via koji)
* SOLR-2429: Ability to add cache=false to queries and query filters to avoid
using the filterCache or queryCache. A cost may also be specified and is used
to order the evaluation of non-cached filters from least to greatest cost .
For very expensive query filters (cost >= 100) if the query implements
the PostFilter interface, it will be used to obtain a Collector that is
checked only for documents that match the main query and all other filters.
The "frange" query now implements the PostFilter interface. (yonik)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -153,5 +153,16 @@ public interface CommonParams {
public static final String THREADS = "threads"; public static final String THREADS = "threads";
public static final String TRUE = Boolean.TRUE.toString(); public static final String TRUE = Boolean.TRUE.toString();
public static final String FALSE = Boolean.FALSE.toString(); public static final String FALSE = Boolean.FALSE.toString();
/** Used as a local parameter on queries. cache=false means don't check any query or filter caches.
* cache=true is the default.
*/
public static final String CACHE = "cache";
/** Used as a local param on filter queries in conjunction with cache=false. Filters are checked in order, from
* smallest cost to largest. If cost>=100 and the query implements PostFilter, then that interface will be used to do post query filtering.
*/
public static final String COST = "cost";
} }

View File

@ -287,6 +287,7 @@ public class QueryComponent extends SearchComponent
DocListAndSet res = new DocListAndSet(); DocListAndSet res = new DocListAndSet();
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0); res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
if (rb.isNeedDocSet()) { if (rb.isNeedDocSet()) {
// TODO: create a cache for this!
List<Query> queries = new ArrayList<Query>(); List<Query> queries = new ArrayList<Query>();
queries.add(rb.getQuery()); queries.add(rb.getQuery());
List<Query> filters = rb.getFilters(); List<Query> filters = rb.getFilters();

View File

@ -54,10 +54,10 @@ public class BoostQParserPlugin extends QParserPlugin {
public Query parse() throws ParseException { public Query parse() throws ParseException {
b = localParams.get(BOOSTFUNC); b = localParams.get(BOOSTFUNC);
baseParser = subQuery(localParams.get(QueryParsing.V), null); baseParser = subQuery(localParams.get(QueryParsing.V), null);
Query q = baseParser.parse(); Query q = baseParser.getQuery();
if (b == null) return q; if (b == null) return q;
Query bq = subQuery(b, FunctionQParserPlugin.NAME).parse(); Query bq = subQuery(b, FunctionQParserPlugin.NAME).getQuery();
if (bq instanceof FunctionQuery) { if (bq instanceof FunctionQuery) {
vs = ((FunctionQuery)bq).getValueSource(); vs = ((FunctionQuery)bq).getValueSource();
} else { } else {

View File

@ -0,0 +1,75 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import java.io.IOException;
/** A simple delegating collector where one can set the delegate after creation */
public class DelegatingCollector extends Collector {
static int setLastDelegateCount; // for testing purposes only to determine the number of times a delegating collector chain was used
protected Collector delegate;
protected Scorer scorer;
protected IndexReader.AtomicReaderContext context;
protected int docBase;
public Collector getDelegate() {
return delegate;
}
public void setDelegate(Collector delegate) {
this.delegate = delegate;
}
/** Sets the last delegate in a chain of DelegatingCollectors */
public void setLastDelegate(Collector delegate) {
DelegatingCollector ptr = this;
for(; ptr.getDelegate() instanceof DelegatingCollector; ptr = (DelegatingCollector)ptr.getDelegate());
ptr.setDelegate(delegate);
setLastDelegateCount++;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
delegate.setScorer(scorer);
}
@Override
public void collect(int doc) throws IOException {
delegate.collect(doc);
}
@Override
public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException {
this.context = context;
this.docBase = context.docBase;
delegate.setNextReader(context);
}
@Override
public boolean acceptsDocsOutOfOrder() {
return delegate.acceptsDocsOutOfOrder();
}
}

View File

@ -106,7 +106,7 @@ public class DisMaxQParser extends QParser {
if (null == boostFunc || "".equals(boostFunc)) continue; if (null == boostFunc || "".equals(boostFunc)) continue;
Map<String, Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc); Map<String, Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc);
for (String f : ff.keySet()) { for (String f : ff.keySet()) {
Query fq = subQuery(f, FunctionQParserPlugin.NAME).parse(); Query fq = subQuery(f, FunctionQParserPlugin.NAME).getQuery();
Float b = ff.get(f); Float b = ff.get(f);
if (null != b) { if (null != b) {
fq.setBoost(b); fq.setBoost(b);
@ -125,7 +125,7 @@ public class DisMaxQParser extends QParser {
boostQueries = new ArrayList<Query>(); boostQueries = new ArrayList<Query>();
for (String qs : boostParams) { for (String qs : boostParams) {
if (qs.trim().length() == 0) continue; if (qs.trim().length() == 0) continue;
Query q = subQuery(qs, null).parse(); Query q = subQuery(qs, null).getQuery();
boostQueries.add(q); boostQueries.add(q);
} }
} }
@ -190,7 +190,7 @@ public class DisMaxQParser extends QParser {
String altQ = solrParams.get(DisMaxParams.ALTQ); String altQ = solrParams.get(DisMaxParams.ALTQ);
if (altQ != null) { if (altQ != null) {
QParser altQParser = subQuery(altQ, null); QParser altQParser = subQuery(altQ, null);
return altQParser.parse(); return altQParser.getQuery();
} else { } else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "missing query string"); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "missing query string");
} }

View File

@ -0,0 +1,40 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
/** The ExtendedQuery interface provides extra metadata to a query.
* Implementations of ExtendedQuery must also extend Query.
*/
public interface ExtendedQuery {
/** Should this query be cached in the query cache or filter cache. */
public boolean getCache();
public void setCache(boolean cache);
/** Returns the cost of this query, used to order checking of filters that are not cached.
* If getCache()==false && getCost()>=100 && this instanceof PostFilter, then
* the PostFilter interface will be used for filtering.
*/
public int getCost();
public void setCost(int cost);
/** If true, the clauses of this boolean query should be cached separately. This is not yet implemented. */
public boolean getCacheSep();
public void setCacheSep(boolean cacheSep);
}

View File

@ -0,0 +1,74 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.Query;
public class ExtendedQueryBase extends Query implements ExtendedQuery {
private int cost;
private boolean cache = true;
private boolean cacheSep;
@Override
public void setCache(boolean cache) {
this.cache = cache;
}
@Override
public boolean getCache() {
return cache;
}
@Override
public void setCacheSep(boolean cacheSep) {
this.cacheSep = cacheSep;
}
@Override
public boolean getCacheSep() {
return cacheSep;
}
@Override
public void setCost(int cost) {
this.cost = cost;
}
public int getCost() {
return cost;
}
public String getOptions() {
StringBuilder sb = new StringBuilder();
if (!cache) {
sb.append("{!cache=false");
sb.append(" cost=");
sb.append(cost);
sb.append("}");
} else if (cacheSep) {
sb.append("{!cache=sep");
sb.append("}");
}
return sb.toString();
}
@Override
public String toString(String field) {
return getOptions();
}
}

View File

@ -16,13 +16,18 @@
*/ */
package org.apache.solr.search; package org.apache.solr.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query; import org.apache.lucene.search.*;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.function.*; import org.apache.solr.search.function.*;
import java.io.IOException;
import java.util.Map;
/** /**
* Create a range query over a function. * Create a range query over a function.
* <br>Other parameters: * <br>Other parameters:
@ -48,7 +53,7 @@ public class FunctionRangeQParserPlugin extends QParserPlugin {
@Override @Override
public Query parse() throws ParseException { public Query parse() throws ParseException {
funcStr = localParams.get(QueryParsing.V, null); funcStr = localParams.get(QueryParsing.V, null);
Query funcQ = subQuery(funcStr, FunctionQParserPlugin.NAME).parse(); Query funcQ = subQuery(funcStr, FunctionQParserPlugin.NAME).getQuery();
if (funcQ instanceof FunctionQuery) { if (funcQ instanceof FunctionQuery) {
vs = ((FunctionQuery)funcQ).getValueSource(); vs = ((FunctionQuery)funcQ).getValueSource();
} else { } else {
@ -62,10 +67,51 @@ public class FunctionRangeQParserPlugin extends QParserPlugin {
// TODO: add a score=val option to allow score to be the value // TODO: add a score=val option to allow score to be the value
ValueSourceRangeFilter rf = new ValueSourceRangeFilter(vs, l, u, includeLower, includeUpper); ValueSourceRangeFilter rf = new ValueSourceRangeFilter(vs, l, u, includeLower, includeUpper);
SolrConstantScoreQuery csq = new SolrConstantScoreQuery(rf); FunctionRangeQuery frq = new FunctionRangeQuery(rf);
return csq; return frq;
} }
}; };
} }
} }
// This class works as either a normal constant score query, or as a PostFilter using a collector
class FunctionRangeQuery extends SolrConstantScoreQuery implements PostFilter {
final ValueSourceRangeFilter rangeFilt;
public FunctionRangeQuery(ValueSourceRangeFilter filter) {
super(filter);
this.rangeFilt = filter;
}
@Override
public DelegatingCollector getFilterCollector(IndexSearcher searcher) {
Map fcontext = ValueSource.newContext(searcher);
return new FunctionRangeCollector(fcontext);
}
class FunctionRangeCollector extends DelegatingCollector {
final Map fcontext;
ValueSourceScorer scorer;
int maxdoc;
public FunctionRangeCollector(Map fcontext) {
this.fcontext = fcontext;
}
@Override
public void collect(int doc) throws IOException {
if (doc<maxdoc && scorer.matches(doc)) {
delegate.collect(doc);
}
}
@Override
public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException {
maxdoc = context.reader.maxDoc();
DocValues dv = rangeFilt.getValueSource().getValues(fcontext, context);
scorer = dv.getRangeScorer(context.reader, rangeFilt.getLowerVal(), rangeFilt.getUpperVal(), rangeFilt.isIncludeLower(), rangeFilt.isIncludeUpper());
super.setNextReader(context);
}
}
}

View File

@ -267,8 +267,8 @@ public class Grouping {
DocListAndSet out = new DocListAndSet(); DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out); qr.setDocListAndSet(out);
filter = cmd.getFilter() != null ? cmd.getFilter() : searcher.getDocSet(cmd.getFilterList()); SolrIndexSearcher.ProcessedFilter pf = searcher.getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
luceneFilter = filter == null ? null : filter.getTopFilter(); final Filter luceneFilter = pf.filter;
maxDoc = searcher.maxDoc(); maxDoc = searcher.maxDoc();
needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0; needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
@ -320,6 +320,11 @@ public class Grouping {
} }
} }
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(allCollectors);
allCollectors = pf.postFilter;
}
if (allCollectors != null) { if (allCollectors != null) {
searcher.search(query, luceneFilter, allCollectors); searcher.search(query, luceneFilter, allCollectors);
} }
@ -348,6 +353,10 @@ public class Grouping {
searcher.search(query, luceneFilter, secondPhaseCollectors); searcher.search(query, luceneFilter, secondPhaseCollectors);
} }
} else { } else {
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(secondPhaseCollectors);
secondPhaseCollectors = pf.postFilter;
}
searcher.search(query, luceneFilter, secondPhaseCollectors); searcher.search(query, luceneFilter, secondPhaseCollectors);
} }
} }

View File

@ -0,0 +1,47 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.IndexSearcher;
/** The PostFilter interface provides a mechanism to further filter documents
* after they have already gone through the main query and other filters.
* This is appropriate for filters with a very high cost.
* <p/>
* The filtering mechanism used is a {@link DelegatingCollector}
* that allows the filter to not call the delegate for certain documents,
* thus effectively filtering them out. This also avoids the normal
* filter advancing mechanism which asks for the first acceptable document on
* or after the target (which is undesirable for expensive filters).
* This collector interface also enables better performance when an external system
* must be consulted, since document ids may be buffered and batched into
* a single request to the external system.
* <p>
* Implementations of this interface must also be a Query.
* If an implementation can only support the collector method of
* filtering through getFilterCollector, then ExtendedQuery.getCached()
* should always return false, and ExtendedQuery.getCost() should
* return no less than 100.
*
* @see ExtendedQueryBase
*/
public interface PostFilter extends ExtendedQuery {
/** Returns a DelegatingCollector to be run after the main query and all of it's filters, but before any sorting or grouping collectors */
public DelegatingCollector getFilterCollector(IndexSearcher searcher);
}

View File

@ -141,10 +141,39 @@ public abstract class QParser {
public Query getQuery() throws ParseException { public Query getQuery() throws ParseException {
if (query==null) { if (query==null) {
query=parse(); query=parse();
if (localParams != null) {
String cacheStr = localParams.get(CommonParams.CACHE);
if (cacheStr != null) {
if (CommonParams.FALSE.equals(cacheStr)) {
extendedQuery().setCache(false);
} else if (CommonParams.TRUE.equals(cacheStr)) {
extendedQuery().setCache(true);
} else if ("sep".equals(cacheStr)) {
extendedQuery().setCacheSep(true);
}
}
int cost = localParams.getInt(CommonParams.COST, Integer.MIN_VALUE);
if (cost != Integer.MIN_VALUE) {
extendedQuery().setCost(cost);
}
}
} }
return query; return query;
} }
// returns an extended query (and sets "query" to a new wrapped query if necessary)
private ExtendedQuery extendedQuery() {
if (query instanceof ExtendedQuery) {
return (ExtendedQuery)query;
} else {
WrappedQuery wq = new WrappedQuery(query);
query = wq;
return wq;
}
}
private void checkRecurse() throws ParseException { private void checkRecurse() throws ParseException {
if (recurseCount++ >= 100) { if (recurseCount++ >= 100) {
throw new ParseException("Infinite Recursion detected parsing query '" + qstr + "'"); throw new ParseException("Infinite Recursion detected parsing query '" + qstr + "'");

View File

@ -52,6 +52,15 @@ public class QueryUtils {
* @return * @return
*/ */
static Query getAbs(Query q) { static Query getAbs(Query q) {
if (q instanceof WrappedQuery) {
Query subQ = ((WrappedQuery)q).getWrappedQuery();
Query absSubQ = getAbs(subQ);
if (absSubQ == subQ) return q;
WrappedQuery newQ = (WrappedQuery)q.clone();
newQ.setWrappedQuery(absSubQ);
return newQ;
}
if (!(q instanceof BooleanQuery)) return q; if (!(q instanceof BooleanQuery)) return q;
BooleanQuery bq = (BooleanQuery)q; BooleanQuery bq = (BooleanQuery)q;
@ -87,6 +96,9 @@ public class QueryUtils {
* lucene. * lucene.
*/ */
static Query makeQueryable(Query q) { static Query makeQueryable(Query q) {
if (q instanceof WrappedQuery) {
return makeQueryable(((WrappedQuery)q).getWrappedQuery());
}
return isNegative(q) ? fixNegativeQuery(q) : q; return isNegative(q) ? fixNegativeQuery(q) : q;
} }

View File

@ -34,7 +34,9 @@ import java.util.Map;
* *
* Experimental and subject to change. * Experimental and subject to change.
*/ */
public class SolrConstantScoreQuery extends ConstantScoreQuery { public class SolrConstantScoreQuery extends ConstantScoreQuery implements ExtendedQuery {
boolean cache = true; // cache by default
int cost;
public SolrConstantScoreQuery(Filter filter) { public SolrConstantScoreQuery(Filter filter) {
super(filter); super(filter);
@ -46,6 +48,36 @@ public class SolrConstantScoreQuery extends ConstantScoreQuery {
return filter; return filter;
} }
@Override
public void setCache(boolean cache) {
this.cache = cache;
}
@Override
public boolean getCache() {
return cache;
}
@Override
public void setCacheSep(boolean cacheSep) {
}
@Override
public boolean getCacheSep() {
return false;
}
@Override
public void setCost(int cost) {
this.cost = cost;
}
@Override
public int getCost() {
return cost;
}
@Override @Override
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
return this; return this;

View File

@ -542,6 +542,17 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
* The DocSet returned should <b>not</b> be modified. * The DocSet returned should <b>not</b> be modified.
*/ */
public DocSet getDocSet(Query query) throws IOException { public DocSet getDocSet(Query query) throws IOException {
if (query instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery)query;
if (!eq.getCache()) {
if (query instanceof WrappedQuery) {
query = ((WrappedQuery)query).getWrappedQuery();
}
query = QueryUtils.makeQueryable(query);
return getDocSetNC(query, null);
}
}
// Get the absolute value (positive version) of this query. If we // Get the absolute value (positive version) of this query. If we
// get back the same reference, we know it's positive. // get back the same reference, we know it's positive.
Query absQ = QueryUtils.getAbs(query); Query absQ = QueryUtils.getAbs(query);
@ -574,12 +585,29 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
if (answer!=null) return answer; if (answer!=null) return answer;
} }
answer = getDocSetNC(q,null); answer = getDocSetNC(q,null);
if (filterCache != null) filterCache.put(q,answer); if (filterCache != null) filterCache.put(
q,answer);
return answer; return answer;
} }
private static Query matchAllDocsQuery = new MatchAllDocsQuery(); private static Query matchAllDocsQuery = new MatchAllDocsQuery();
static class ProcessedFilter {
DocSet answer; // the answer, if non-null
Filter filter;
DelegatingCollector postFilter;
}
private static Comparator<Query> sortByCost = new Comparator<Query>() {
@Override
public int compare(Query q1, Query q2) {
return ((ExtendedQuery)q1).getCost() - ((ExtendedQuery)q2).getCost();
}
};
/** /**
* Returns the set of document ids matching all queries. * Returns the set of document ids matching all queries.
* This method is cache-aware and attempts to retrieve the answer from the cache if possible. * This method is cache-aware and attempts to retrieve the answer from the cache if possible.
@ -589,123 +617,161 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
* The DocSet returned should <b>not</b> be modified. * The DocSet returned should <b>not</b> be modified.
*/ */
public DocSet getDocSet(List<Query> queries) throws IOException { public DocSet getDocSet(List<Query> queries) throws IOException {
if (queries==null) return null; ProcessedFilter pf = getProcessedFilter(null, queries);
if (queries.size()==1) return getDocSet(queries.get(0)); if (pf.answer != null) return pf.answer;
DocSetCollector setCollector = new DocSetCollector(maxDoc()>>6, maxDoc());
Collector collector = setCollector;
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
final AtomicReaderContext[] leaves = leafContexts;
for (int i=0; i<leaves.length; i++) {
final AtomicReaderContext leaf = leaves[i];
final IndexReader reader = leaf.reader;
DocIdSet idSet = null;
if (pf.filter != null) {
idSet = pf.filter.getDocIdSet(leaf);
if (idSet == null) continue;
}
DocIdSetIterator idIter = null;
if (idSet != null) {
idIter = idSet.iterator();
if (idIter == null) continue;
}
collector.setNextReader(leaf);
Bits skipDocs = reader.getDeletedDocs();
int max = reader.maxDoc();
if (idIter == null) {
for (int docid = 0; docid<max; docid++) {
if (skipDocs != null && skipDocs.get(docid)) continue;
collector.collect(docid);
}
} else {
for (int docid = -1; (docid = idIter.advance(docid+1)) < max; ) {
if (skipDocs != null && skipDocs.get(docid)) continue;
collector.collect(docid);
}
}
}
return setCollector.getDocSet();
}
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
ProcessedFilter pf = new ProcessedFilter();
if (queries==null || queries.size()==0) {
if (setFilter != null)
pf.filter = setFilter.getTopFilter();
return pf;
}
DocSet answer=null; DocSet answer=null;
boolean[] neg = new boolean[queries.size()]; boolean[] neg = new boolean[queries.size()+1];
DocSet[] sets = new DocSet[queries.size()]; DocSet[] sets = new DocSet[queries.size()+1];
List<Query> notCached = null;
List<Query> postFilters = null;
int end = 0;
int smallestIndex = -1; int smallestIndex = -1;
if (setFilter != null) {
answer = sets[end++] = setFilter;
smallestIndex = end;
}
int smallestCount = Integer.MAX_VALUE; int smallestCount = Integer.MAX_VALUE;
for (int i=0; i<sets.length; i++) { for (Query q : queries) {
Query q = queries.get(i); if (q instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery)q;
if (!eq.getCache()) {
if (eq.getCost() >= 100 && eq instanceof PostFilter) {
if (postFilters == null) postFilters = new ArrayList<Query>(sets.length-end);
postFilters.add(q);
} else {
if (notCached == null) notCached = new ArrayList<Query>(sets.length-end);
notCached.add(q);
}
continue;
}
}
Query posQuery = QueryUtils.getAbs(q); Query posQuery = QueryUtils.getAbs(q);
sets[i] = getPositiveDocSet(posQuery); sets[end] = getPositiveDocSet(posQuery);
// Negative query if absolute value different from original // Negative query if absolute value different from original
if (q==posQuery) { if (q==posQuery) {
neg[i] = false; neg[end] = false;
// keep track of the smallest positive set. // keep track of the smallest positive set.
// This optimization is only worth it if size() is cached, which it would // This optimization is only worth it if size() is cached, which it would
// be if we don't do any set operations. // be if we don't do any set operations.
int sz = sets[i].size(); int sz = sets[end].size();
if (sz<smallestCount) { if (sz<smallestCount) {
smallestCount=sz; smallestCount=sz;
smallestIndex=i; smallestIndex=end;
answer = sets[i]; answer = sets[end];
} }
} else { } else {
neg[i] = true; neg[end] = true;
}
} }
// if no positive queries, start off with all docs end++;
if (answer==null) answer = getPositiveDocSet(matchAllDocsQuery); }
// Are all of our normal cached filters negative?
if (end > 0 && answer==null) {
answer = getPositiveDocSet(matchAllDocsQuery);
}
// do negative queries first to shrink set size // do negative queries first to shrink set size
for (int i=0; i<sets.length; i++) { for (int i=0; i<end; i++) {
if (neg[i]) answer = answer.andNot(sets[i]); if (neg[i]) answer = answer.andNot(sets[i]);
} }
for (int i=0; i<sets.length; i++) { for (int i=0; i<end; i++) {
if (!neg[i] && i!=smallestIndex) answer = answer.intersection(sets[i]); if (!neg[i] && i!=smallestIndex) answer = answer.intersection(sets[i]);
} }
return answer; if (notCached != null) {
} Collections.sort(notCached, sortByCost);
List<Weight> weights = new ArrayList<Weight>(notCached.size());
Filter getFilter(Query q) throws IOException { for (Query q : notCached) {
if (q == null) return null; Query qq = QueryUtils.makeQueryable(q);
// TODO: support pure negative queries? weights.add(createNormalizedWeight(qq));
// if (q instanceof) {
// }
return getDocSet(q).getTopFilter();
}
Filter getFilter(DocSet setFilter, List<Query> queries) throws IOException {
Filter answer = setFilter == null ? null : setFilter.getTopFilter();
if (queries == null || queries.size() == 0) {
return answer;
}
if (answer == null && queries.size() == 1) {
return getFilter(queries.get(0));
}
DocSet finalSet=null;
int nDocSets =0;
boolean[] neg = new boolean[queries.size()];
DocSet[] sets = new DocSet[queries.size()];
Query[] nocache = new Query[queries.size()];
int smallestIndex = -1;
int smallestCount = Integer.MAX_VALUE;
for (Query q : queries) {
// if (q instanceof)
Query posQuery = QueryUtils.getAbs(q);
sets[nDocSets] = getPositiveDocSet(posQuery);
// Negative query if absolute value different from original
if (q==posQuery) {
neg[nDocSets] = false;
// keep track of the smallest positive set.
// This optimization is only worth it if size() is cached, which it would
// be if we don't do any set operations.
int sz = sets[nDocSets].size();
if (sz<smallestCount) {
smallestCount=sz;
smallestIndex=nDocSets;
finalSet = sets[nDocSets];
} }
pf.filter = new FilterImpl(answer, weights);
} else { } else {
neg[nDocSets] = true; if (postFilters == null) {
if (answer == null) {
answer = getPositiveDocSet(matchAllDocsQuery);
}
// "answer" is the only part of the filter, so set it.
pf.answer = answer;
} }
nDocSets++; if (answer != null) {
pf.filter = answer.getTopFilter();
}
} }
// if no positive queries, start off with all docs if (postFilters != null) {
if (finalSet==null) finalSet = getPositiveDocSet(matchAllDocsQuery); Collections.sort(postFilters, sortByCost);
for (int i=postFilters.size()-1; i>=0; i--) {
// do negative queries first to shrink set size DelegatingCollector prev = pf.postFilter;
for (int i=0; i<sets.length; i++) { pf.postFilter = ((PostFilter)postFilters.get(i)).getFilterCollector(this);
if (neg[i]) finalSet = finalSet.andNot(sets[i]); if (prev != null) pf.postFilter.setDelegate(prev);
}
} }
for (int i=0; i<sets.length; i++) { return pf;
if (!neg[i] && i!=smallestIndex) finalSet = finalSet.intersection(sets[i]);
}
return finalSet.getTopFilter();
} }
/** lucene.internal */ /** lucene.internal */
@ -861,6 +927,17 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
public DocSet getDocSet(Query query, DocSet filter) throws IOException { public DocSet getDocSet(Query query, DocSet filter) throws IOException {
if (filter==null) return getDocSet(query); if (filter==null) return getDocSet(query);
if (query instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery)query;
if (!eq.getCache()) {
if (query instanceof WrappedQuery) {
query = ((WrappedQuery)query).getWrappedQuery();
}
query = QueryUtils.makeQueryable(query);
return getDocSetNC(query, filter);
}
}
// Negative query if absolute value different from original // Negative query if absolute value different from original
Query absQ = QueryUtils.getAbs(query); Query absQ = QueryUtils.getAbs(query);
boolean positive = absQ==query; boolean positive = absQ==query;
@ -942,6 +1019,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
static final int NO_CHECK_QCACHE = 0x80000000; static final int NO_CHECK_QCACHE = 0x80000000;
static final int GET_DOCSET = 0x40000000; static final int GET_DOCSET = 0x40000000;
static final int NO_CHECK_FILTERCACHE = 0x20000000; static final int NO_CHECK_FILTERCACHE = 0x20000000;
static final int NO_SET_QCACHE = 0x10000000;
public static final int GET_DOCLIST = 0x02; // get the documents actually returned in a response public static final int GET_DOCLIST = 0x02; // get the documents actually returned in a response
public static final int GET_SCORES = 0x01; public static final int GET_SCORES = 0x01;
@ -959,21 +1037,33 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
// check for overflow, and check for # docs in index // check for overflow, and check for # docs in index
if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc(); if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc();
int supersetMaxDoc= maxDocRequested; int supersetMaxDoc= maxDocRequested;
DocList superset; DocList superset = null;
int flags = cmd.getFlags();
Query q = cmd.getQuery();
if (q instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery)q;
if (!eq.getCache()) {
flags |= (NO_CHECK_QCACHE | NO_SET_QCACHE | NO_CHECK_FILTERCACHE);
}
}
// we can try and look up the complete query in the cache. // we can try and look up the complete query in the cache.
// we can't do that if filter!=null though (we don't want to // we can't do that if filter!=null though (we don't want to
// do hashCode() and equals() for a big DocSet). // do hashCode() and equals() for a big DocSet).
if (queryResultCache != null && cmd.getFilter()==null) { if (queryResultCache != null && cmd.getFilter()==null
&& (flags & (NO_CHECK_QCACHE|NO_SET_QCACHE)) != ((NO_CHECK_QCACHE|NO_SET_QCACHE)))
{
// all of the current flags can be reused during warming, // all of the current flags can be reused during warming,
// so set all of them on the cache key. // so set all of them on the cache key.
key = new QueryResultKey(cmd.getQuery(), cmd.getFilterList(), cmd.getSort(), cmd.getFlags()); key = new QueryResultKey(q, cmd.getFilterList(), cmd.getSort(), flags);
if ((cmd.getFlags() & NO_CHECK_QCACHE)==0) { if ((flags & NO_CHECK_QCACHE)==0) {
superset = queryResultCache.get(key); superset = queryResultCache.get(key);
if (superset != null) { if (superset != null) {
// check that the cache entry has scores recorded if we need them // check that the cache entry has scores recorded if we need them
if ((cmd.getFlags() & GET_SCORES)==0 || superset.hasScores()) { if ((flags & GET_SCORES)==0 || superset.hasScores()) {
// NOTE: subset() returns null if the DocList has fewer docs than // NOTE: subset() returns null if the DocList has fewer docs than
// requested // requested
out.docList = superset.subset(cmd.getOffset(),cmd.getLen()); out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
@ -983,12 +1073,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
// found the docList in the cache... now check if we need the docset too. // found the docList in the cache... now check if we need the docset too.
// OPT: possible future optimization - if the doclist contains all the matches, // OPT: possible future optimization - if the doclist contains all the matches,
// use it to make the docset instead of rerunning the query. // use it to make the docset instead of rerunning the query.
if (out.docSet==null && ((cmd.getFlags() & GET_DOCSET)!=0) ) { if (out.docSet==null && ((flags & GET_DOCSET)!=0) ) {
if (cmd.getFilterList()==null) { if (cmd.getFilterList()==null) {
out.docSet = getDocSet(cmd.getQuery()); out.docSet = getDocSet(cmd.getQuery());
} else { } else {
List<Query> newList = new ArrayList<Query>(cmd.getFilterList() List<Query> newList = new ArrayList<Query>(cmd.getFilterList().size()+1);
.size()+1);
newList.add(cmd.getQuery()); newList.add(cmd.getQuery());
newList.addAll(cmd.getFilterList()); newList.addAll(cmd.getFilterList());
out.docSet = getDocSet(newList); out.docSet = getDocSet(newList);
@ -1001,6 +1090,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
// If we are going to generate the result, bump up to the // If we are going to generate the result, bump up to the
// next resultWindowSize for better caching. // next resultWindowSize for better caching.
if ((flags & NO_SET_QCACHE) == 0) {
// handle 0 special case as well as avoid idiv in the common case. // handle 0 special case as well as avoid idiv in the common case.
if (maxDocRequested < queryResultWindowSize) { if (maxDocRequested < queryResultWindowSize) {
supersetMaxDoc=queryResultWindowSize; supersetMaxDoc=queryResultWindowSize;
@ -1008,6 +1098,9 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
supersetMaxDoc = ((maxDocRequested -1)/queryResultWindowSize + 1)*queryResultWindowSize; supersetMaxDoc = ((maxDocRequested -1)/queryResultWindowSize + 1)*queryResultWindowSize;
if (supersetMaxDoc < 0) supersetMaxDoc=maxDocRequested; if (supersetMaxDoc < 0) supersetMaxDoc=maxDocRequested;
} }
} else {
key = null; // we won't be caching the result
}
} }
@ -1020,7 +1113,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
// check if we should try and use the filter cache // check if we should try and use the filter cache
boolean useFilterCache=false; boolean useFilterCache=false;
if ((cmd.getFlags() & (GET_SCORES|NO_CHECK_FILTERCACHE))==0 && useFilterForSortedQuery && cmd.getSort() != null && filterCache != null) { if ((flags & (GET_SCORES|NO_CHECK_FILTERCACHE))==0 && useFilterForSortedQuery && cmd.getSort() != null && filterCache != null) {
useFilterCache=true; useFilterCache=true;
SortField[] sfields = cmd.getSort().getSort(); SortField[] sfields = cmd.getSort().getSort();
for (SortField sf : sfields) { for (SortField sf : sfields) {
@ -1049,7 +1142,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
} else { } else {
// do it the normal way... // do it the normal way...
cmd.setSupersetMaxDoc(supersetMaxDoc); cmd.setSupersetMaxDoc(supersetMaxDoc);
if ((cmd.getFlags() & GET_DOCSET)!=0) { if ((flags & GET_DOCSET)!=0) {
// this currently conflates returning the docset for the base query vs // this currently conflates returning the docset for the base query vs
// the base query and all filters. // the base query and all filters.
DocSet qDocSet = getDocListAndSetNC(qr,cmd); DocSet qDocSet = getDocListAndSetNC(qr,cmd);
@ -1059,9 +1152,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
getDocListNC(qr,cmd); getDocListNC(qr,cmd);
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader); //Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
} }
if (key != null) {
superset = out.docList; superset = out.docList;
out.docList = superset.subset(cmd.getOffset(),cmd.getLen()); out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
} }
}
// lastly, put the superset in the cache if the size is less than or equal // lastly, put the superset in the cache if the size is less than or equal
// to queryResultMaxDocsCached // to queryResultMaxDocsCached
@ -1073,9 +1168,6 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException { private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException {
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
//Query query, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
DocSet filter = cmd.getFilter()!=null ? cmd.getFilter() : getDocSet(cmd.getFilterList());
final long timeAllowed = cmd.getTimeAllowed(); final long timeAllowed = cmd.getTimeAllowed();
int len = cmd.getSupersetMaxDoc(); int len = cmd.getSupersetMaxDoc();
int last = len; int last = len;
@ -1091,7 +1183,8 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
Query query = QueryUtils.makeQueryable(cmd.getQuery()); Query query = QueryUtils.makeQueryable(cmd.getQuery());
final Filter luceneFilter = filter==null ? null : filter.getTopFilter(); ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
final Filter luceneFilter = pf.filter;
// handle zero case... // handle zero case...
if (lastDocRequested<=0) { if (lastDocRequested<=0) {
@ -1143,6 +1236,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
if( timeAllowed > 0 ) { if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, timeAllowed); collector = new TimeLimitingCollector(collector, timeAllowed);
} }
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
try { try {
super.search(query, luceneFilter, collector); super.search(query, luceneFilter, collector);
} }
@ -1167,6 +1265,10 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
if( timeAllowed > 0 ) { if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, timeAllowed); collector = new TimeLimitingCollector(collector, timeAllowed);
} }
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
try { try {
super.search(query, luceneFilter, collector); super.search(query, luceneFilter, collector);
} }
@ -1199,7 +1301,6 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
// be cached if desired. // be cached if desired.
private DocSet getDocListAndSetNC(QueryResult qr,QueryCommand cmd) throws IOException { private DocSet getDocListAndSetNC(QueryResult qr,QueryCommand cmd) throws IOException {
int len = cmd.getSupersetMaxDoc(); int len = cmd.getSupersetMaxDoc();
DocSet filter = cmd.getFilter()!=null ? cmd.getFilter() : getDocSet(cmd.getFilterList());
int last = len; int last = len;
if (last < 0 || last > maxDoc()) last=maxDoc(); if (last < 0 || last > maxDoc()) last=maxDoc();
final int lastDocRequested = last; final int lastDocRequested = last;
@ -1214,11 +1315,12 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
int maxDoc = maxDoc(); int maxDoc = maxDoc();
int smallSetSize = maxDoc>>6; int smallSetSize = maxDoc>>6;
ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
final Filter luceneFilter = pf.filter;
Query query = QueryUtils.makeQueryable(cmd.getQuery()); Query query = QueryUtils.makeQueryable(cmd.getQuery());
final long timeAllowed = cmd.getTimeAllowed(); final long timeAllowed = cmd.getTimeAllowed();
final Filter luceneFilter = filter==null ? null : filter.getTopFilter();
// handle zero case... // handle zero case...
if (lastDocRequested<=0) { if (lastDocRequested<=0) {
final float[] topscore = new float[] { Float.NEGATIVE_INFINITY }; final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
@ -1253,6 +1355,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
if( timeAllowed > 0 ) { if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, timeAllowed); collector = new TimeLimitingCollector(collector, timeAllowed);
} }
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
try { try {
super.search(query, luceneFilter, collector); super.search(query, luceneFilter, collector);
} }
@ -1284,6 +1391,10 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
if( timeAllowed > 0 ) { if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, timeAllowed ); collector = new TimeLimitingCollector(collector, timeAllowed );
} }
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
try { try {
super.search(query, luceneFilter, collector); super.search(query, luceneFilter, collector);
} }
@ -1320,7 +1431,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
// TODO: currently we don't generate the DocSet for the base query, // TODO: currently we don't generate the DocSet for the base query,
// but the QueryDocSet == CompleteDocSet if filter==null. // but the QueryDocSet == CompleteDocSet if filter==null.
return filter==null ? qr.getDocSet() : null; return pf.filter==null && pf.postFilter==null ? qr.getDocSet() : null;
} }
@ -1933,3 +2044,133 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
} }
class FilterImpl extends Filter {
final DocSet filter;
final Filter topFilter;
final List<Weight> weights;
public FilterImpl(DocSet filter, List<Weight> weights) {
this.filter = filter;
this.weights = weights;
this.topFilter = filter == null ? null : filter.getTopFilter();
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
DocIdSet sub = topFilter == null ? null : topFilter.getDocIdSet(context);
if (weights.size() == 0) return sub;
return new FilterSet(sub, context);
}
private class FilterSet extends DocIdSet {
DocIdSet docIdSet;
AtomicReaderContext context;
public FilterSet(DocIdSet docIdSet, AtomicReaderContext context) {
this.docIdSet = docIdSet;
this.context = context;
}
@Override
public DocIdSetIterator iterator() throws IOException {
List<DocIdSetIterator> iterators = new ArrayList<DocIdSetIterator>(weights.size()+1);
if (docIdSet != null) {
DocIdSetIterator iter = docIdSet.iterator();
if (iter == null) return null;
iterators.add(iter);
}
for (Weight w : weights) {
Scorer scorer = w.scorer(context, Weight.ScorerContext.def());
if (scorer == null) return null;
iterators.add(scorer);
}
if (iterators.size()==0) return null;
if (iterators.size()==1) return iterators.get(0);
if (iterators.size()==2) return new DualFilterIterator(iterators.get(0), iterators.get(1));
return new FilterIterator(iterators.toArray(new DocIdSetIterator[iterators.size()]));
}
}
private static class FilterIterator extends DocIdSetIterator {
final DocIdSetIterator[] iterators;
final DocIdSetIterator first;
public FilterIterator(DocIdSetIterator[] iterators) {
this.iterators = iterators;
this.first = iterators[0];
}
@Override
public int docID() {
return first.docID();
}
private int doNext(int doc) throws IOException {
int which=0; // index of the iterator with the highest id
int i=1;
outer: for(;;) {
for (; i<iterators.length; i++) {
if (i == which) continue;
DocIdSetIterator iter = iterators[i];
int next = iter.advance(doc);
if (next != doc) {
doc = next;
which = i;
i = 0;
continue outer;
}
}
return doc;
}
}
@Override
public int nextDoc() throws IOException {
return doNext(first.nextDoc());
}
@Override
public int advance(int target) throws IOException {
return doNext(first.advance(target));
}
}
private static class DualFilterIterator extends DocIdSetIterator {
final DocIdSetIterator a;
final DocIdSetIterator b;
public DualFilterIterator(DocIdSetIterator a, DocIdSetIterator b) {
this.a = a;
this.b = b;
}
@Override
public int docID() {
return a.docID();
}
@Override
public int nextDoc() throws IOException {
int doc = a.nextDoc();
for(;;) {
int other = b.advance(doc);
if (other == doc) return doc;
doc = a.advance(other);
if (other == doc) return doc;
}
}
@Override
public int advance(int target) throws IOException {
int doc = a.advance(target);
for(;;) {
int other = b.advance(doc);
if (other == doc) return doc;
doc = a.advance(other);
if (other == doc) return doc;
}
}
}
}

View File

@ -0,0 +1,96 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight;
import java.io.IOException;
import java.util.Set;
/** A simple query that wraps another query and implements ExtendedQuery. */
public class WrappedQuery extends ExtendedQueryBase {
private Query q;
public WrappedQuery(Query q) {
this.q = q;
}
public Query getWrappedQuery() {
return q;
}
public void setWrappedQuery(Query q) {
this.q = q;
}
@Override
public void setBoost(float b) {
q.setBoost(b);
}
@Override
public float getBoost() {
return q.getBoost();
}
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return q.createWeight(searcher);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
// currently no need to continue wrapping at this point.
return q.rewrite(reader);
}
@Override
public void extractTerms(Set<Term> terms) {
q.extractTerms(terms);
}
@Override
public Object clone() {
WrappedQuery newQ = (WrappedQuery)super.clone();
newQ.q = (Query) q.clone();
return newQ;
}
@Override
public int hashCode() {
return q.hashCode();
}
@Override
public boolean equals(Object obj) {
if (obj instanceof WrappedQuery) {
return this.q.equals(((WrappedQuery)obj).q);
}
return q.equals(obj);
}
@Override
public String toString(String field) {
return getOptions() + q.toString();
}
}

View File

@ -136,7 +136,7 @@ public abstract class DocValues {
// A RangeValueSource can't easily be a ValueSource that takes another ValueSource // A RangeValueSource can't easily be a ValueSource that takes another ValueSource
// because it needs different behavior depending on the type of fields. There is also // because it needs different behavior depending on the type of fields. There is also
// a setup cost - parsing and normalizing params, and doing a binary search on the StringIndex. // a setup cost - parsing and normalizing params, and doing a binary search on the StringIndex.
// TODO: change "reader" to AtomicReaderContext
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
float lower; float lower;
float upper; float upper;

View File

@ -193,64 +193,3 @@ public abstract class ValueSource implements Serializable {
} }
class ValueSourceScorer extends Scorer {
protected IndexReader reader;
private int doc = -1;
protected final int maxDoc;
protected final DocValues values;
protected boolean checkDeletes;
private final Bits delDocs;
protected ValueSourceScorer(IndexReader reader, DocValues values) {
super(null);
this.reader = reader;
this.maxDoc = reader.maxDoc();
this.values = values;
setCheckDeletes(true);
this.delDocs = MultiFields.getDeletedDocs(reader);
}
public IndexReader getReader() {
return reader;
}
public void setCheckDeletes(boolean checkDeletes) {
this.checkDeletes = checkDeletes && reader.hasDeletions();
}
public boolean matches(int doc) {
return (!checkDeletes || !delDocs.get(doc)) && matchesValue(doc);
}
public boolean matchesValue(int doc) {
return true;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
for (; ;) {
doc++;
if (doc >= maxDoc) return doc = NO_MORE_DOCS;
if (matches(doc)) return doc;
}
}
@Override
public int advance(int target) throws IOException {
// also works fine when target==NO_MORE_DOCS
doc = target - 1;
return nextDoc();
}
@Override
public float score() throws IOException {
return values.floatVal(doc);
}
}

View File

@ -49,6 +49,27 @@ public class ValueSourceRangeFilter extends SolrFilter {
this.includeUpper = upperVal != null && includeUpper; this.includeUpper = upperVal != null && includeUpper;
} }
public ValueSource getValueSource() {
return valueSource;
}
public String getLowerVal() {
return lowerVal;
}
public String getUpperVal() {
return upperVal;
}
public boolean isIncludeLower() {
return includeLower;
}
public boolean isIncludeUpper() {
return includeUpper;
}
@Override @Override
public DocIdSet getDocIdSet(final Map context, final AtomicReaderContext readerContext) throws IOException { public DocIdSet getDocIdSet(final Map context, final AtomicReaderContext readerContext) throws IOException {
return new DocIdSet() { return new DocIdSet() {

View File

@ -0,0 +1,85 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.function;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.Bits;
import java.io.IOException;
public class ValueSourceScorer extends Scorer {
protected IndexReader reader;
private int doc = -1;
protected final int maxDoc;
protected final DocValues values;
protected boolean checkDeletes;
private final Bits delDocs;
protected ValueSourceScorer(IndexReader reader, DocValues values) {
super(null);
this.reader = reader;
this.maxDoc = reader.maxDoc();
this.values = values;
setCheckDeletes(true);
this.delDocs = MultiFields.getDeletedDocs(reader);
}
public IndexReader getReader() {
return reader;
}
public void setCheckDeletes(boolean checkDeletes) {
this.checkDeletes = checkDeletes && reader.hasDeletions();
}
public boolean matches(int doc) {
return (!checkDeletes || !delDocs.get(doc)) && matchesValue(doc);
}
public boolean matchesValue(int doc) {
return true;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
for (; ;) {
doc++;
if (doc >= maxDoc) return doc = NO_MORE_DOCS;
if (matches(doc)) return doc;
}
}
@Override
public int advance(int target) throws IOException {
// also works fine when target==NO_MORE_DOCS
doc = target - 1;
return nextDoc();
}
@Override
public float score() throws IOException {
return values.floatVal(doc);
}
}

View File

@ -430,6 +430,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase {
} }
for (String test : tests) { for (String test : tests) {
if (test == null || test.length()==0) continue;
String testJSON = test.replace('\'', '"'); String testJSON = test.replace('\'', '"');
try { try {

View File

@ -0,0 +1,322 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.*;
public class TestFiltering extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig.xml","schema12.xml");
}
public void testCaching() throws Exception {
assertU(adoc("id","4", "val_i","1"));
assertU(adoc("id","1", "val_i","2"));
assertU(adoc("id","3", "val_i","3"));
assertU(adoc("id","2", "val_i","4"));
assertU(commit());
int prevCount;
prevCount = DelegatingCollector.setLastDelegateCount;
assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false cost=100}val_i")
,"/response/numFound==2"
);
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
// The exact same query the second time will be cached by the queryCache
prevCount = DelegatingCollector.setLastDelegateCount;
assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false cost=100}val_i")
,"/response/numFound==2"
);
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
// cache is true by default
prevCount = DelegatingCollector.setLastDelegateCount;
assertJQ(req("q","*:*", "fq","{!frange l=2 u=4}val_i")
,"/response/numFound==3"
);
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
// default cost avoids post filtering
prevCount = DelegatingCollector.setLastDelegateCount;
assertJQ(req("q","*:*", "fq","{!frange l=2 u=5 cache=false}val_i")
,"/response/numFound==3"
);
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
// now re-do the same tests w/ faceting on to get the full docset
prevCount = DelegatingCollector.setLastDelegateCount;
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false cost=100}val_i")
,"/response/numFound==3"
);
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
// since we need the docset and the filter was not cached, the collector will need to be used again
prevCount = DelegatingCollector.setLastDelegateCount;
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false cost=100}val_i")
,"/response/numFound==3"
);
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
// cache is true by default
prevCount = DelegatingCollector.setLastDelegateCount;
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=7}val_i")
,"/response/numFound==3"
);
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
// default cost avoids post filtering
prevCount = DelegatingCollector.setLastDelegateCount;
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=8 cache=false}val_i")
,"/response/numFound==3"
);
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
}
class Model {
int indexSize;
OpenBitSet answer;
OpenBitSet multiSelect;
OpenBitSet facetQuery;
void clear() {
answer = new OpenBitSet(indexSize);
answer.set(0, indexSize);
multiSelect = new OpenBitSet(indexSize);
multiSelect.set(0, indexSize);
facetQuery = new OpenBitSet(indexSize);
facetQuery.set(0, indexSize);
}
}
static String f = "val_i";
String frangeStr(boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
String topLev="";
if (!cache || exclude) {
topLev = "" + (cache || random.nextBoolean() ? " cache="+cache : "")
+ (cost!=0 ? " cost="+cost : "")
+ ((exclude) ? " tag=t" : "");
}
String ret = "{!frange v="+f+" l="+l+" u="+u;
if (negative) {
ret = "-_query_:\"" + ret + "}\"";
if (topLev.length()>0) {
ret = "{!" + topLev + "}" + ret; // add options at top level (can't be on frange)
}
} else {
ret += topLev + "}"; // add options right to frange
}
return ret;
}
String makeRandomQuery(Model model, boolean mainQuery, boolean facetQuery) {
boolean cache = random.nextBoolean();
int cost = cache ? 0 : random.nextBoolean() ? random.nextInt(200) : 0;
boolean positive = random.nextBoolean();
boolean exclude = facetQuery ? false : random.nextBoolean(); // can't exclude a facet query from faceting
OpenBitSet[] sets = facetQuery ? new OpenBitSet[]{model.facetQuery} :
(exclude ? new OpenBitSet[]{model.answer, model.facetQuery} : new OpenBitSet[]{model.answer, model.multiSelect, model.facetQuery});
if (random.nextInt(100) < 50) {
// frange
int l=0;
int u=0;
if (positive) {
// positive frange, make it big by taking the max of 4 tries
int n=-1;
for (int i=0; i<4; i++) {
int ll = random.nextInt(model.indexSize);
int uu = ll + ((ll==model.indexSize-1) ? 0 : random.nextInt(model.indexSize-l));
if (uu-ll+1 > n) {
n = uu-ll+1;
u = uu;
l = ll;
}
}
for (OpenBitSet set : sets) {
set.clear(0,l);
set.clear(u+1, model.indexSize);
}
} else {
// negative frange.. make it relatively small
l = random.nextInt(model.indexSize);
u = Math.max(model.indexSize-1, l+random.nextInt(Math.max(model.indexSize / 10, 2)));
for (OpenBitSet set : sets) {
set.clear(l,u+1);
}
}
return frangeStr(!positive, l, u, cache, cost, exclude);
} else {
// term or boolean query
OpenBitSet pset = new OpenBitSet(model.indexSize);
for (int i=0; i<pset.getBits().length; i++) {
pset.getBits()[i] = random.nextLong(); // set 50% of the bits on average
}
if (positive) {
for (OpenBitSet set : sets) {
set.and(pset);
}
} else {
for (OpenBitSet set : sets) {
set.andNot(pset);
}
}
StringBuilder sb = new StringBuilder();
for (int doc=-1;;) {
doc = pset.nextSetBit(doc+1);
if (doc < 0 || doc >= model.indexSize) break;
sb.append((positive ? " ":" -") + f+":"+doc);
}
String ret = sb.toString();
if (ret.length()==0) ret = (positive ? "":"-") + "id:99999999";
if (!cache || exclude || random.nextBoolean()) {
ret = "{!cache=" + cache
+ ((cost != 0) ? " cost="+cost : "")
+ ((exclude) ? " tag=t" : "")
+ "}" + ret;
}
return ret;
}
}
@Test
public void testRandomFiltering() throws Exception {
int indexIter=5 * RANDOM_MULTIPLIER;
int queryIter=250 * RANDOM_MULTIPLIER;
Model model = new Model();
for (int iiter = 0; iiter<indexIter; iiter++) {
model.indexSize = random.nextInt(20 * RANDOM_MULTIPLIER) + 1;
clearIndex();
for (int i=0; i<model.indexSize; i++) {
String val = Integer.toString(i);
assertU(adoc("id",val,f,val));
if (random.nextInt(100) < 20) {
// duplicate doc 20% of the time (makes deletions)
assertU(adoc("id",val,f,val));
}
if (random.nextInt(100) < 10) {
// commit 10% of the time (forces a new segment)
assertU(commit());
}
}
assertU(commit());
for (int qiter=0; qiter<queryIter; qiter++) {
model.clear();
List<String> params = new ArrayList<String>();
params.add("q"); params.add(makeRandomQuery(model, true, false));
int nFilters = random.nextInt(5);
for (int i=0; i<nFilters; i++) {
params.add("fq"); params.add(makeRandomQuery(model, false, false));
}
boolean facet = random.nextBoolean();
if (facet) {
// basic facet.query tests getDocListAndSet
params.add("facet"); params.add("true");
params.add("facet.query"); params.add("*:*");
params.add("facet.query"); params.add("{!key=multiSelect ex=t}*:*");
String facetQuery = makeRandomQuery(model, false, true);
if (facetQuery.startsWith("{!")) {
facetQuery = "{!key=facetQuery " + facetQuery.substring(2);
} else {
facetQuery = "{!key=facetQuery}" + facetQuery;
}
params.add("facet.query"); params.add(facetQuery);
}
if (random.nextInt(100) < 10) {
params.add("group"); params.add("true");
params.add("group.main"); params.add("true");
params.add("group.field"); params.add("id");
if (random.nextBoolean()) {
params.add("group.cache.percent"); params.add("100");
}
}
SolrQueryRequest sreq = req(params.toArray(new String[params.size()]));
long expected = model.answer.cardinality();
long expectedMultiSelect = model.multiSelect.cardinality();
long expectedFacetQuery = model.facetQuery.cardinality();
if (iiter==-1 && qiter==-1) {
// set breakpoint here to debug a specific issue
System.out.println("request="+params);
}
try {
assertJQ(sreq
,"/response/numFound==" + expected
, facet ? "/facet_counts/facet_queries/*:*/==" + expected : null
, facet ? "/facet_counts/facet_queries/multiSelect/==" + expectedMultiSelect : null
, facet ? "/facet_counts/facet_queries/facetQuery/==" + expectedFacetQuery : null
);
} catch (Exception e) {
// show the indexIter and queryIter for easier debugging
SolrException.log(log, e);
String s= "FAILURE: iiter=" + iiter + " qiter=" + qiter + " request="+params;
log.error(s);
fail(s);
}
}
}
}
}

View File

@ -120,6 +120,16 @@ public class TestQueryTypes extends AbstractSolrTestCase {
,"//result[@numFound='1']" ,"//result[@numFound='1']"
); );
// frange as filter not cached
assertQ(req( "q","*:*", "fq", "{!frange cache=false v="+f+" l='"+v+"' u='"+v+"'}" )
,"//result[@numFound='1']"
);
// frange as filter run after the main query
assertQ(req( "q","*:*", "fq", "{!frange cache=false cost=100 v="+f+" l='"+v+"' u='"+v+"'}" )
,"//result[@numFound='1']"
);
// exists() // exists()
assertQ(req( "fq","id:999", "q", "{!frange l=1 u=1}if(exists("+f+"),1,0)" ) assertQ(req( "fq","id:999", "q", "{!frange l=1 u=1}if(exists("+f+"),1,0)" )
,"//result[@numFound='1']" ,"//result[@numFound='1']"

View File

@ -202,10 +202,10 @@ public class TestSearchPerf extends AbstractSolrTestCase {
SolrQueryRequest req = lrf.makeRequest(); SolrQueryRequest req = lrf.makeRequest();
QParser parser = QParser.getParser("foomany_s:[" + l + " TO " + u + "]", null, req); QParser parser = QParser.getParser("foomany_s:[" + l + " TO " + u + "]", null, req);
Query range = parser.parse(); Query range = parser.getQuery();
QParser parser2 = QParser.getParser("{!frange l="+l+" u="+u+"}foomany_s", null, req); QParser parser2 = QParser.getParser("{!frange l="+l+" u="+u+"}foomany_s", null, req);
Query frange = parser2.parse(); Query frange = parser2.getQuery();
req.close(); req.close();
createIndex2(indexSize,"foomany_s"); createIndex2(indexSize,"foomany_s");
@ -228,13 +228,13 @@ public class TestSearchPerf extends AbstractSolrTestCase {
SolrQueryRequest req = lrf.makeRequest(); SolrQueryRequest req = lrf.makeRequest();
QParser parser = QParser.getParser("foomany_s:[" + l + " TO " + u + "]", null, req); QParser parser = QParser.getParser("foomany_s:[" + l + " TO " + u + "]", null, req);
Query rangeQ = parser.parse(); Query rangeQ = parser.getQuery();
List<Query> filters = new ArrayList<Query>(); List<Query> filters = new ArrayList<Query>();
filters.add(rangeQ); filters.add(rangeQ);
req.close(); req.close();
parser = QParser.getParser("{!dismax qf=t10_100_ws pf=t10_100_ws ps=20}"+ t(0) + ' ' + t(1) + ' ' + t(2), null, req); parser = QParser.getParser("{!dismax qf=t10_100_ws pf=t10_100_ws ps=20}"+ t(0) + ' ' + t(1) + ' ' + t(2), null, req);
Query q= parser.parse(); Query q= parser.getQuery();
// SolrIndexSearcher searcher = req.getSearcher(); // SolrIndexSearcher searcher = req.getSearcher();
// DocSet range = searcher.getDocSet(rangeQ, null); // DocSet range = searcher.getDocSet(rangeQ, null);