SOLR-8037: speed up term range queries, use filter cache for embedded ranges

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1702661 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2015-09-12 18:51:36 +00:00
parent f213b3bfdc
commit 996a3fb117
9 changed files with 845 additions and 25 deletions

View File

@ -21,8 +21,9 @@ import java.util.Arrays;
/** /**
* A LSB Radix sorter for unsigned int values. * A LSB Radix sorter for unsigned int values.
* @lucene.internal
*/ */
final class LSBRadixSorter { public final class LSBRadixSorter {
private static final int INSERTION_SORT_THRESHOLD = 30; private static final int INSERTION_SORT_THRESHOLD = 30;
private static final int HISTOGRAM_SIZE = 256; private static final int HISTOGRAM_SIZE = 256;

View File

@ -72,6 +72,11 @@ Optimizations
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
specified. Speedups of up to 8% were observed. (yonik) specified. Speedups of up to 8% were observed. (yonik)
* SOLR-8037: Speed up creation of filters from term range queries (i.e. non-numeric range queries)
and use the filter cache for term range queries that are part of larger queries. Some observed
speedups were up to 2.5x for production of filters, and up to 10x for query evaluation with
embedded term range queres that resulted in filter cache hits. (yonik)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -0,0 +1,502 @@
package org.apache.solr.query;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSetBuilder;
import org.apache.solr.search.DocSetProducer;
import org.apache.solr.search.ExtendedQueryBase;
import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SolrIndexSearcher;
/** @lucene.experimental */
public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetProducer {
private final String field;
private final BytesRef lower;
private final BytesRef upper;
private byte flags;
private static byte FLAG_INC_LOWER = 0x01;
private static byte FLAG_INC_UPPER = 0x02;
public SolrRangeQuery(String field, BytesRef lower, BytesRef upper, boolean includeLower, boolean includeUpper) {
this.field = field;
this.lower = lower;
this.upper = upper;
this.flags = (byte)((this.lower != null && includeLower ? FLAG_INC_LOWER : 0) | (this.upper != null && includeUpper ? FLAG_INC_UPPER : 0));
}
public String getField() {
return field;
}
public boolean includeLower() {
return (flags & FLAG_INC_LOWER) != 0;
}
public boolean includeUpper() {
return (flags & FLAG_INC_UPPER) != 0;
}
@Override
public int hashCode() {
int hash = 0x8f2c9ba7 * (flags+1); // avoid multiplying by 0
hash = hash * 29 + ((lower == null) ? 0 : lower.hashCode()); // TODO: simpler hash code here?
hash = hash * 29 + ((upper == null) ? 0 : upper.hashCode());
return hash;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (!(obj instanceof SolrRangeQuery)) {
return false;
}
SolrRangeQuery other = (SolrRangeQuery)obj;
return (this.flags == other.flags)
&& (this.field.equals(other.field))
&& (this.lower == other.lower || (this.lower != null && other.lower != null && this.lower.equals(other.lower)))
&& (this.upper == other.upper || (this.upper != null && other.upper != null && this.upper.equals(other.upper)))
;
}
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
if (!getField().equals(field)) {
buffer.append(getField());
buffer.append(":");
}
// TODO: use our schema?
buffer.append(includeLower() ? '[' : '{');
buffer.append(endpoint(lower));
buffer.append(" TO ");
buffer.append(endpoint(upper));
buffer.append(includeUpper() ? ']' : '}');
return buffer.toString();
}
private String endpoint(BytesRef ref) {
if (ref == null) return "*";
String toStr = Term.toString(ref);
if ("*".equals(toStr)) {
toStr = "\\*";
}
// TODO: other escaping
return toStr;
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
return this;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needScores) throws IOException {
return new ConstWeight(searcher, needScores);
/*
DocSet docs = createDocSet(searcher.getIndexReader().leaves(), searcher.getIndexReader().maxDoc());
SolrConstantScoreQuery csq = new SolrConstantScoreQuery( docs.getTopFilter() );
return csq.createWeight(searcher, needScores);
*/
}
@Override
public DocSet createDocSet(SolrIndexSearcher searcher) throws IOException {
return createDocSet( searcher, Math.min(64,(searcher.maxDoc()>>>10)+4) );
}
private DocSet createDocSet(SolrIndexSearcher searcher, long cost) throws IOException {
int maxDoc = searcher.maxDoc();
BitDocSet liveDocs = searcher.getLiveDocs();
FixedBitSet liveBits = liveDocs.size() == maxDoc ? null : liveDocs.getBits();
DocSetBuilder builder = new DocSetBuilder(maxDoc, cost);
List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
int maxTermsPerSegment = 0;
for (LeafReaderContext ctx : leaves) {
TermsEnum te = getTermsEnum(ctx);
int termsVisited = builder.add(te, ctx.docBase);
maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
}
return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
}
private class RangeTermsEnum extends TermsEnum {
TermsEnum te;
BytesRef curr;
boolean positioned;
public RangeTermsEnum(Terms terms) throws IOException {
if (terms == null) {
positioned = true;
} else {
te = terms.iterator();
if (lower != null) {
TermsEnum.SeekStatus status = te.seekCeil(lower);
if (status == TermsEnum.SeekStatus.END) {
positioned = true;
curr = null;
} else if (status == SeekStatus.FOUND) {
positioned = includeLower();
curr = te.term();
} else {
// lower bound not found, so includeLower is irrelevant
positioned = true;
curr = te.term();
}
}
}
}
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
return te.seekCeil(text);
}
@Override
public void seekExact(long ord) throws IOException {
te.seekExact(ord);
}
@Override
public BytesRef term() throws IOException {
return te.term(); // should be equal to curr, except if we went past the end
}
@Override
public long ord() throws IOException {
return te.ord();
}
@Override
public int docFreq() throws IOException {
return te.docFreq();
}
@Override
public long totalTermFreq() throws IOException {
return te.totalTermFreq();
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
return te.postings(reuse, flags);
}
@Override
public BytesRef next() throws IOException {
if (positioned) {
positioned = false;
} else {
curr = te.next();
}
if (curr == null) return null;
if (upper != null) {
int cmp = curr.compareTo(upper);
if (cmp < 0 || cmp == 0 && includeUpper()) {
return curr;
} else {
curr = null;
}
}
return curr;
}
@Override
public AttributeSource attributes() {
return te.attributes();
}
@Override
public boolean seekExact(BytesRef text) throws IOException {
return te.seekExact(text);
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
te.seekExact(term, state);
}
@Override
public TermState termState() throws IOException {
return te.termState();
}
}
public TermsEnum getTermsEnum(LeafReaderContext ctx) throws IOException {
return new RangeTermsEnum( ctx.reader().terms(getField()) );
}
private static class TermAndState {
final BytesRef term;
final TermState state;
final int docFreq;
final long totalTermFreq;
TermAndState(BytesRef term, TermState state, int docFreq, long totalTermFreq) {
this.term = term;
this.state = state;
this.docFreq = docFreq;
this.totalTermFreq = totalTermFreq;
}
}
private static class SegState {
final Weight weight;
final DocIdSet set;
SegState(Weight weight) {
this.weight = weight;
this.set = null;
}
SegState(DocIdSet set) {
this.set = set;
this.weight = null;
}
}
// adapted from MultiTermQueryConstantScoreWrapper
class ConstWeight extends ConstantScoreWeight {
private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
final IndexSearcher searcher;
final boolean needScores;
boolean checkedFilterCache;
Filter filter;
final SegState[] segStates;
protected ConstWeight(IndexSearcher searcher, boolean needScores) {
super( SolrRangeQuery.this );
this.searcher = searcher;
this.segStates = new SegState[ searcher.getIndexReader().leaves().size() ];
this.needScores = needScores;
}
/** Try to collect terms from the given terms enum and return count=sum(df) for terms visited so far
* or (-count - 1) if this should be rewritten into a boolean query.
* The termEnum will already be positioned on the next term if not exhausted.
*/
private long collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
long count = 0;
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
for (int i = 0; i < threshold; ++i) {
final BytesRef term = termsEnum.next();
if (term == null) {
return -count - 1;
}
TermState state = termsEnum.termState();
if (state.isRealTerm() == false) {
// TermQuery does not accept fake terms for now
return count;
}
int df = termsEnum.docFreq();
count += df;
terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, df, termsEnum.totalTermFreq()));
}
return termsEnum.next() == null ? (-count - 1) : count;
}
private SegState getSegState(LeafReaderContext context) throws IOException {
SegState segState = segStates[context.ord];
if (segState != null) return segState;
// first time, check our filter cache
boolean doCheck = !checkedFilterCache && context.ord == 0;
checkedFilterCache = true;
SolrIndexSearcher solrSearcher = null;
if (doCheck && searcher instanceof SolrIndexSearcher) {
solrSearcher = (SolrIndexSearcher)searcher;
if (solrSearcher.getFilterCache() == null) {
doCheck = false;
} else {
solrSearcher = (SolrIndexSearcher)searcher;
DocSet answer = solrSearcher.getFilterCache().get(SolrRangeQuery.this);
if (answer != null) {
filter = answer.getTopFilter();
}
}
}
if (filter != null) {
return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null));
}
final Terms terms = context.reader().terms(SolrRangeQuery.this.getField());
if (terms == null) {
return segStates[context.ord] = new SegState((DocIdSet) null);
}
final TermsEnum termsEnum = SolrRangeQuery.this.getTermsEnum(context);
PostingsEnum docs = null;
final List<TermAndState> collectedTerms = new ArrayList<>();
long count = collectTerms(context, termsEnum, collectedTerms);
if (count < 0) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (TermAndState t : collectedTerms) {
final TermContext termContext = new TermContext(searcher.getTopReaderContext());
termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
bq.add(new TermQuery(new Term( SolrRangeQuery.this.getField(), t.term), termContext), BooleanClause.Occur.SHOULD);
}
Query q = new ConstantScoreQuery(bq.build());
final Weight weight = searcher.rewrite(q).createWeight(searcher, needScores);
weight.normalize(1f, score());
return segStates[context.ord] = new SegState(weight);
}
// Too many terms for boolean query...
if (doCheck) {
DocSet answer = createDocSet(solrSearcher, count);
solrSearcher.getFilterCache().put(SolrRangeQuery.this, answer);
filter = answer.getTopFilter();
return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null));
}
/* FUTURE: reuse term states in the future to help build DocSet, use collected count so far...
Bits liveDocs = context.reader().getLiveDocs();
int base = context.docBase;
int termsVisited = collectedTerms.size();
DocSetBuilder builder = new DocSetBuilder(searcher.getIndexReader().maxDoc());
if (!collectedTerms.isEmpty()) {
TermsEnum termsEnum2 = terms.iterator();
for (TermAndState t : collectedTerms) {
termsEnum2.seekExact(t.term, t.state);
docs = termsEnum2.postings(docs, PostingsEnum.NONE);
builder.add(docs, context.docBase, liveDocs);
}
}
termsVisited += builder.add(termsEnum, base, liveDocs);
*/
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
builder.grow((int)Math.min(Integer.MAX_VALUE,count));
if (collectedTerms.isEmpty() == false) {
TermsEnum termsEnum2 = terms.iterator();
for (TermAndState t : collectedTerms) {
termsEnum2.seekExact(t.term, t.state);
docs = termsEnum2.postings(docs, PostingsEnum.NONE);
builder.add(docs);
}
}
do {
// already positioned on the next term, so don't call next() here...
docs = termsEnum.postings(docs, PostingsEnum.NONE);
builder.add(docs);
} while (termsEnum.next() != null);
DocIdSet segSet = builder.build();
return segStates[context.ord] = new SegState(segSet);
}
private Scorer scorer(DocIdSet set) throws IOException {
if (set == null) {
return null;
}
final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
return new ConstantScoreScorer(this, score(), disi);
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
final SegState weightOrBitSet = getSegState(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.bulkScorer(context);
} else {
final Scorer scorer = scorer(weightOrBitSet.set);
if (scorer == null) {
return null;
}
return new DefaultBulkScorer(scorer);
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final SegState weightOrBitSet = getSegState(context);
if (weightOrBitSet.weight != null) {
return weightOrBitSet.weight.scorer(context);
} else {
return scorer(weightOrBitSet.set);
}
}
}
}

View File

@ -63,6 +63,7 @@ import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.Base64; import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.StrUtils;
import org.apache.solr.query.SolrRangeQuery;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.Sorting; import org.apache.solr.search.Sorting;
@ -720,12 +721,11 @@ public abstract class FieldType extends FieldProperties {
part2 == null ? null : new BytesRef(toInternal(part2)), part2 == null ? null : new BytesRef(toInternal(part2)),
minInclusive, maxInclusive); minInclusive, maxInclusive);
} else { } else {
MultiTermQuery rangeQuery = TermRangeQuery.newStringRange( SolrRangeQuery rangeQuery = new SolrRangeQuery(
field.getName(), field.getName(),
part1 == null ? null : toInternal(part1), part1 == null ? null : new BytesRef(toInternal(part1)),
part2 == null ? null : toInternal(part2), part2 == null ? null : new BytesRef(toInternal(part2)),
minInclusive, maxInclusive); minInclusive, maxInclusive);
rangeQuery.setRewriteMethod(getRewriteMethod(parser, field));
return rangeQuery; return rangeQuery;
} }
} }

View File

@ -28,6 +28,7 @@ import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.QueryBuilder; import org.apache.lucene.util.QueryBuilder;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.query.SolrRangeQuery;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.Sorting; import org.apache.solr.search.Sorting;
@ -136,7 +137,7 @@ public class TextField extends FieldType {
Analyzer multiAnalyzer = getMultiTermAnalyzer(); Analyzer multiAnalyzer = getMultiTermAnalyzer();
BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer); BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer); BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);
return new TermRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive); return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
} }
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) { public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {

View File

@ -285,8 +285,7 @@ public class BitDocSet extends DocSetBase {
} }
final int base = context.docBase; final int base = context.docBase;
final int maxDoc = reader.maxDoc(); final int max = base + reader.maxDoc(); // one past the max doc in this segment.
final int max = base + maxDoc; // one past the max doc in this segment.
return BitsFilteredDocIdSet.wrap(new DocIdSet() { return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override @Override
@ -302,10 +301,11 @@ public class BitDocSet extends DocSetBase {
@Override @Override
public int nextDoc() { public int nextDoc() {
if (pos >= bs.length() - 1) { int next = pos+1;
if (next >= max) {
return adjustedDoc = NO_MORE_DOCS; return adjustedDoc = NO_MORE_DOCS;
} else { } else {
pos = bs.nextSetBit(pos + 1); pos = bs.nextSetBit(next);
return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS; return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
} }
} }
@ -314,7 +314,7 @@ public class BitDocSet extends DocSetBase {
public int advance(int target) { public int advance(int target) {
if (target == NO_MORE_DOCS) return adjustedDoc = NO_MORE_DOCS; if (target == NO_MORE_DOCS) return adjustedDoc = NO_MORE_DOCS;
int adjusted = target + base; int adjusted = target + base;
if (adjusted >= bs.length()) { if (adjusted >= max) {
return adjustedDoc = NO_MORE_DOCS; return adjustedDoc = NO_MORE_DOCS;
} else { } else {
pos = bs.nextSetBit(adjusted); pos = bs.nextSetBit(adjusted);
@ -326,6 +326,7 @@ public class BitDocSet extends DocSetBase {
public long cost() { public long cost() {
// we don't want to actually compute cardinality, but // we don't want to actually compute cardinality, but
// if it's already been computed, we use it (pro-rated for the segment) // if it's already been computed, we use it (pro-rated for the segment)
int maxDoc = max-base;
if (size != -1) { if (size != -1) {
return (long)(size * ((FixedBitSet.bits2words(maxDoc)<<6) / (float)bs.length())); return (long)(size * ((FixedBitSet.bits2words(maxDoc)<<6) / (float)bs.length()));
} else { } else {
@ -350,7 +351,7 @@ public class BitDocSet extends DocSetBase {
@Override @Override
public int length() { public int length() {
return maxDoc; return max-base;
} }
}; };
} }

View File

@ -0,0 +1,222 @@
package org.apache.solr.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LSBRadixSorter;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Adapted from DocIdSetBuilder to build DocSets
*
* @lucene.internal
*/
public final class DocSetBuilder {
private final int maxDoc;
private final int threshold;
private int[] buffer;
private int pos;
private FixedBitSet bitSet;
public DocSetBuilder(int maxDoc, long costEst) {
this.maxDoc = maxDoc;
// For ridiculously small sets, we'll just use a sorted int[]
// maxDoc >>> 7 is a good value if you want to save memory, lower values
// such as maxDoc >>> 11 should provide faster building but at the expense
// of using a full bitset even for quite sparse data
this.threshold = (maxDoc >>> 7) + 4; // the +4 is for better testing on small indexes
if (costEst > threshold) {
bitSet = new FixedBitSet(maxDoc);
} else {
this.buffer = new int[Math.max((int)costEst,1)];
}
}
private void upgradeToBitSet() {
assert bitSet == null;
bitSet = new FixedBitSet(maxDoc);
for (int i = 0; i < pos; ++i) {
bitSet.set(buffer[i]);
}
this.buffer = null;
this.pos = 0;
}
private void growBuffer(int minSize) {
if (minSize < buffer.length) return;
int newSize = buffer.length;
while (newSize < minSize) {
newSize = newSize << 1;
}
newSize = Math.min(newSize, threshold);
int[] newBuffer = new int[newSize];
System.arraycopy(buffer, 0, newBuffer, 0, pos);
buffer = newBuffer;
}
public void add(DocIdSetIterator iter, int base) throws IOException {
grow((int) Math.min(Integer.MAX_VALUE, iter.cost()));
if (bitSet != null) {
add(bitSet, iter, base);
} else {
while (true) {
for (int i = pos; i < buffer.length; ++i) {
final int doc = iter.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
pos = i; // update pos
return;
}
buffer[i] = doc + base; // using the loop counter may help with removal of bounds checking
}
pos = buffer.length; // update pos
if (pos + 1 >= threshold) {
break;
}
growBuffer(pos + 1);
}
upgradeToBitSet();
add(bitSet, iter, base);
}
}
public static void add(FixedBitSet bitSet, DocIdSetIterator iter, int base) throws IOException {
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
bitSet.set(doc + base);
}
}
/** Returns the number of terms visited */
public int add(TermsEnum te, int base) throws IOException {
PostingsEnum postings = null;
int termCount = 0;
for(;;) {
BytesRef term = te.next();
if (term == null) break;
termCount++;
postings = te.postings(postings, PostingsEnum.NONE);
add(postings, base);
}
return termCount;
}
public void grow(int numDocs) {
if (bitSet == null) {
final long newLength = pos + numDocs;
if (newLength < threshold) {
growBuffer((int) newLength);
} else {
upgradeToBitSet();
}
}
}
public void add(int doc) {
if (bitSet != null) {
bitSet.set(doc);
} else {
if (pos >= buffer.length) {
if (pos + 1 >= threshold) {
upgradeToBitSet();
bitSet.set(doc);
return;
}
growBuffer(pos + 1);
}
buffer[pos++] = doc;
}
}
private static int dedup(int[] arr, int length, FixedBitSet acceptDocs) {
if (length == 0) {
return 0;
}
int l = 1;
int previous = arr[0];
for (int i = 1; i < length; ++i) {
final int value = arr[i];
assert value >= previous;
if (value != previous) {
if (acceptDocs == null || acceptDocs.get(value)) {
arr[l++] = value;
previous = value;
}
}
}
return l;
}
public DocSet build(FixedBitSet filter) {
if (bitSet != null) {
if (filter != null) {
bitSet.and(filter);
}
return new BitDocSet(bitSet);
// TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
} else {
LSBRadixSorter sorter = new LSBRadixSorter();
sorter.sort(buffer, 0, pos);
final int l = dedup(buffer, pos, filter);
assert l <= pos;
return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set
}
}
/** Only use this if you know there were no duplicates and that docs were collected in-order! */
public DocSet buildUniqueInOrder(FixedBitSet filter) {
if (bitSet != null) {
if (filter != null) {
bitSet.and(filter);
}
return new BitDocSet(bitSet);
} else {
// don't need to sort, but still need to remove non accepted docs
int l = pos;
if (filter != null) {
l = dedup(buffer, pos, filter);
}
return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set
}
}
}

View File

@ -462,6 +462,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
return fieldNames; return fieldNames;
} }
public SolrCache<Query,DocSet> getFilterCache() {
return filterCache;
}
/** /**
* Returns a collection of the names of all stored fields which can be * Returns a collection of the names of all stored fields which can be
* highlighted the index reader knows about. * highlighted the index reader knows about.
@ -919,12 +923,12 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
DocSet absAnswer = filterCache.get(absQ); DocSet absAnswer = filterCache.get(absQ);
if (absAnswer!=null) { if (absAnswer!=null) {
if (positive) return absAnswer; if (positive) return absAnswer;
else return getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer); else return getLiveDocs().andNot(absAnswer);
} }
} }
DocSet absAnswer = getDocSetNC(absQ, null); DocSet absAnswer = getDocSetNC(absQ, null);
DocSet answer = positive ? absAnswer : getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer); DocSet answer = positive ? absAnswer : getLiveDocs().andNot(absAnswer);
if (filterCache != null) { if (filterCache != null) {
// cache negative queries as positive // cache negative queries as positive
@ -948,7 +952,15 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
} }
private static Query matchAllDocsQuery = new MatchAllDocsQuery(); private static Query matchAllDocsQuery = new MatchAllDocsQuery();
private BitDocSet liveDocs;
public BitDocSet getLiveDocs() throws IOException {
// going through the filter cache will provide thread safety here
if (liveDocs == null) {
liveDocs = getDocSetBits(matchAllDocsQuery);
}
return liveDocs;
}
public static class ProcessedFilter { public static class ProcessedFilter {
public DocSet answer; // the answer, if non-null public DocSet answer; // the answer, if non-null
@ -1129,7 +1141,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
// Are all of our normal cached filters negative? // Are all of our normal cached filters negative?
if (end > 0 && answer==null) { if (end > 0 && answer==null) {
answer = getPositiveDocSet(matchAllDocsQuery); answer = getLiveDocs();
} }
// do negative queries first to shrink set size // do negative queries first to shrink set size
@ -1152,7 +1164,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
} else { } else {
if (postFilters == null) { if (postFilters == null) {
if (answer == null) { if (answer == null) {
answer = getPositiveDocSet(matchAllDocsQuery); answer = getLiveDocs();
} }
// "answer" is the only part of the filter, so set it. // "answer" is the only part of the filter, so set it.
pf.answer = answer; pf.answer = answer;
@ -2150,7 +2162,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
// if both negative, we need to create a temp DocSet since we // if both negative, we need to create a temp DocSet since we
// don't have a counting method that takes three. // don't have a counting method that takes three.
DocSet all = getPositiveDocSet(matchAllDocsQuery); DocSet all = getLiveDocs();
// -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b)) // -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
// we use the last form since the intermediate DocSet should normally be smaller. // we use the last form since the intermediate DocSet should normally be smaller.

View File

@ -21,6 +21,7 @@ package org.apache.solr.search;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
@ -34,7 +35,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
@BeforeClass @BeforeClass
public static void beforeTests() throws Exception { public static void beforeTests() throws Exception {
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_ System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
initCore("solrconfig.xml","schema12.xml"); initCore("solrconfig.xml","schema_latest.xml");
} }
@ -132,8 +133,79 @@ public class TestFiltering extends SolrTestCaseJ4 {
} }
static String f = "val_i"; static String f = "val_i";
static String f_s = "val_s";
static String f_s(int i) {
return String.format(Locale.ROOT, "%05d", i);
}
String frangeStr(boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
String rangeStr(String field, boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
String topLev="";
if (!cache || exclude) {
topLev = "{!" + (cache || random().nextBoolean() ? " cache=" + cache : "")
+ (cost != 0 ? " cost=" + cost : "")
+ ((exclude) ? " tag=t" : "") + "}";
}
String q = field + ":";
String q2 = q;
String lower1 = "[" + f_s(l);
String lower2 = l<=0 ? lower1 : ("{" + f_s(l-1));
String upper1 = f_s(u) + "]";
String upper2 = f_s(u+1) + "}";
if (random().nextBoolean()) {
q += lower1;
q2 += lower2;
} else {
q += lower2;
q2 += lower1;
}
q += " TO ";
q2 += " TO ";
if (random().nextBoolean()) {
q += upper1;
q2 += upper2;
} else {
q += upper2;
q2 += upper1;
}
// String q = field + ":[" + f_s(l) + " TO " + f_s(u) + "]";
if (negative) {
q = "-_query_:\"" + q + "\"";
// q = "-" + q; // TODO: need to be encapsulated for some reason?
} else {
if (random().nextBoolean()) {
// try some different query structures - important for testing different code paths
switch (random().nextInt(5)) {
case 0:
q = q + " OR id:RAND"+random().nextInt();
break;
case 1:
q = "id:RAND"+random().nextInt() + " OR " + q;
break;
case 2:
q = "*:* AND " + q;
break;
case 3:
q = q + " AND " + q2;
break;
case 4:
q = q + " OR " + q2;
break;
}
}
}
return topLev + q;
}
String frangeStr(String field, boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
String topLev=""; String topLev="";
if (!cache || exclude) { if (!cache || exclude) {
@ -142,7 +214,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
+ ((exclude) ? " tag=t" : ""); + ((exclude) ? " tag=t" : "");
} }
String ret = "{!frange v="+f+" l="+l+" u="+u; String ret = "{!frange v="+field+" l="+l+" u="+u;
if (negative) { if (negative) {
ret = "-_query_:\"" + ret + "}\""; ret = "-_query_:\"" + ret + "}\"";
if (topLev.length()>0) { if (topLev.length()>0) {
@ -165,7 +237,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
FixedBitSet[] sets = facetQuery ? new FixedBitSet[]{model.facetQuery} : FixedBitSet[] sets = facetQuery ? new FixedBitSet[]{model.facetQuery} :
(exclude ? new FixedBitSet[]{model.answer, model.facetQuery} : new FixedBitSet[]{model.answer, model.multiSelect, model.facetQuery}); (exclude ? new FixedBitSet[]{model.answer, model.facetQuery} : new FixedBitSet[]{model.answer, model.multiSelect, model.facetQuery});
if (random().nextInt(100) < 50) { if (random().nextInt(100) < 60) {
// frange // frange
int l=0; int l=0;
int u=0; int u=0;
@ -201,7 +273,10 @@ public class TestFiltering extends SolrTestCaseJ4 {
} }
} }
return frangeStr(!positive, l, u, cache, cost, exclude); String whichField = random().nextBoolean() ? f : f_s;
return random().nextBoolean() ?
frangeStr(f, !positive, l, u, cache, cost, exclude) // todo: frange doesn't work on the string field?
: rangeStr(whichField, !positive, l, u, cache, cost, exclude);
} else { } else {
// term or boolean query // term or boolean query
int numWords = FixedBitSet.bits2words(model.indexSize); int numWords = FixedBitSet.bits2words(model.indexSize);
@ -256,16 +331,17 @@ public class TestFiltering extends SolrTestCaseJ4 {
Model model = new Model(); Model model = new Model();
for (int iiter = 0; iiter<indexIter; iiter++) { for (int iiter = 0; iiter<indexIter; iiter++) {
model.indexSize = random().nextInt(20 * RANDOM_MULTIPLIER) + 1; model.indexSize = random().nextInt(40 * RANDOM_MULTIPLIER) + 1;
clearIndex(); clearIndex();
for (int i=0; i<model.indexSize; i++) { for (int i=0; i<model.indexSize; i++) {
String val = Integer.toString(i); String val = Integer.toString(i);
assertU(adoc("id",val,f,val)); SolrInputDocument doc = sdoc("id", val, f,val, f_s, f_s(i) );
updateJ(jsonAdd(doc), null);
if (random().nextInt(100) < 20) { if (random().nextInt(100) < 20) {
// duplicate doc 20% of the time (makes deletions) // duplicate doc 20% of the time (makes deletions)
assertU(adoc("id",val,f,val)); updateJ(jsonAdd(doc), null);
} }
if (random().nextInt(100) < 10) { if (random().nextInt(100) < 10) {
// commit 10% of the time (forces a new segment) // commit 10% of the time (forces a new segment)