mirror of https://github.com/apache/lucene.git
SOLR-8037: speed up term range queries, use filter cache for embedded ranges
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1702661 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f213b3bfdc
commit
996a3fb117
|
@ -21,8 +21,9 @@ import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A LSB Radix sorter for unsigned int values.
|
* A LSB Radix sorter for unsigned int values.
|
||||||
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
final class LSBRadixSorter {
|
public final class LSBRadixSorter {
|
||||||
|
|
||||||
private static final int INSERTION_SORT_THRESHOLD = 30;
|
private static final int INSERTION_SORT_THRESHOLD = 30;
|
||||||
private static final int HISTOGRAM_SIZE = 256;
|
private static final int HISTOGRAM_SIZE = 256;
|
||||||
|
|
|
@ -72,6 +72,11 @@ Optimizations
|
||||||
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
|
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
|
||||||
specified. Speedups of up to 8% were observed. (yonik)
|
specified. Speedups of up to 8% were observed. (yonik)
|
||||||
|
|
||||||
|
* SOLR-8037: Speed up creation of filters from term range queries (i.e. non-numeric range queries)
|
||||||
|
and use the filter cache for term range queries that are part of larger queries. Some observed
|
||||||
|
speedups were up to 2.5x for production of filters, and up to 10x for query evaluation with
|
||||||
|
embedded term range queres that resulted in filter cache hits. (yonik)
|
||||||
|
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -0,0 +1,502 @@
|
||||||
|
package org.apache.solr.query;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermContext;
|
||||||
|
import org.apache.lucene.index.TermState;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BulkScorer;
|
||||||
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.Filter;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.DocIdSetBuilder;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.apache.solr.search.BitDocSet;
|
||||||
|
import org.apache.solr.search.DocSet;
|
||||||
|
import org.apache.solr.search.DocSetBuilder;
|
||||||
|
import org.apache.solr.search.DocSetProducer;
|
||||||
|
import org.apache.solr.search.ExtendedQueryBase;
|
||||||
|
import org.apache.solr.search.SolrConstantScoreQuery;
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
|
/** @lucene.experimental */
|
||||||
|
public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetProducer {
|
||||||
|
private final String field;
|
||||||
|
private final BytesRef lower;
|
||||||
|
private final BytesRef upper;
|
||||||
|
private byte flags;
|
||||||
|
private static byte FLAG_INC_LOWER = 0x01;
|
||||||
|
private static byte FLAG_INC_UPPER = 0x02;
|
||||||
|
|
||||||
|
public SolrRangeQuery(String field, BytesRef lower, BytesRef upper, boolean includeLower, boolean includeUpper) {
|
||||||
|
this.field = field;
|
||||||
|
this.lower = lower;
|
||||||
|
this.upper = upper;
|
||||||
|
this.flags = (byte)((this.lower != null && includeLower ? FLAG_INC_LOWER : 0) | (this.upper != null && includeUpper ? FLAG_INC_UPPER : 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getField() {
|
||||||
|
return field;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean includeLower() {
|
||||||
|
return (flags & FLAG_INC_LOWER) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean includeUpper() {
|
||||||
|
return (flags & FLAG_INC_UPPER) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int hash = 0x8f2c9ba7 * (flags+1); // avoid multiplying by 0
|
||||||
|
hash = hash * 29 + ((lower == null) ? 0 : lower.hashCode()); // TODO: simpler hash code here?
|
||||||
|
hash = hash * 29 + ((upper == null) ? 0 : upper.hashCode());
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!(obj instanceof SolrRangeQuery)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
SolrRangeQuery other = (SolrRangeQuery)obj;
|
||||||
|
|
||||||
|
return (this.flags == other.flags)
|
||||||
|
&& (this.field.equals(other.field))
|
||||||
|
&& (this.lower == other.lower || (this.lower != null && other.lower != null && this.lower.equals(other.lower)))
|
||||||
|
&& (this.upper == other.upper || (this.upper != null && other.upper != null && this.upper.equals(other.upper)))
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
StringBuilder buffer = new StringBuilder();
|
||||||
|
if (!getField().equals(field)) {
|
||||||
|
buffer.append(getField());
|
||||||
|
buffer.append(":");
|
||||||
|
}
|
||||||
|
// TODO: use our schema?
|
||||||
|
buffer.append(includeLower() ? '[' : '{');
|
||||||
|
buffer.append(endpoint(lower));
|
||||||
|
buffer.append(" TO ");
|
||||||
|
buffer.append(endpoint(upper));
|
||||||
|
buffer.append(includeUpper() ? ']' : '}');
|
||||||
|
return buffer.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String endpoint(BytesRef ref) {
|
||||||
|
if (ref == null) return "*";
|
||||||
|
String toStr = Term.toString(ref);
|
||||||
|
if ("*".equals(toStr)) {
|
||||||
|
toStr = "\\*";
|
||||||
|
}
|
||||||
|
// TODO: other escaping
|
||||||
|
return toStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, boolean needScores) throws IOException {
|
||||||
|
return new ConstWeight(searcher, needScores);
|
||||||
|
/*
|
||||||
|
DocSet docs = createDocSet(searcher.getIndexReader().leaves(), searcher.getIndexReader().maxDoc());
|
||||||
|
SolrConstantScoreQuery csq = new SolrConstantScoreQuery( docs.getTopFilter() );
|
||||||
|
return csq.createWeight(searcher, needScores);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocSet createDocSet(SolrIndexSearcher searcher) throws IOException {
|
||||||
|
return createDocSet( searcher, Math.min(64,(searcher.maxDoc()>>>10)+4) );
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocSet createDocSet(SolrIndexSearcher searcher, long cost) throws IOException {
|
||||||
|
int maxDoc = searcher.maxDoc();
|
||||||
|
BitDocSet liveDocs = searcher.getLiveDocs();
|
||||||
|
FixedBitSet liveBits = liveDocs.size() == maxDoc ? null : liveDocs.getBits();
|
||||||
|
|
||||||
|
DocSetBuilder builder = new DocSetBuilder(maxDoc, cost);
|
||||||
|
|
||||||
|
List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
|
||||||
|
|
||||||
|
int maxTermsPerSegment = 0;
|
||||||
|
for (LeafReaderContext ctx : leaves) {
|
||||||
|
TermsEnum te = getTermsEnum(ctx);
|
||||||
|
int termsVisited = builder.add(te, ctx.docBase);
|
||||||
|
maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
|
||||||
|
}
|
||||||
|
|
||||||
|
return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private class RangeTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
|
TermsEnum te;
|
||||||
|
BytesRef curr;
|
||||||
|
boolean positioned;
|
||||||
|
|
||||||
|
public RangeTermsEnum(Terms terms) throws IOException {
|
||||||
|
if (terms == null) {
|
||||||
|
positioned = true;
|
||||||
|
} else {
|
||||||
|
te = terms.iterator();
|
||||||
|
if (lower != null) {
|
||||||
|
TermsEnum.SeekStatus status = te.seekCeil(lower);
|
||||||
|
if (status == TermsEnum.SeekStatus.END) {
|
||||||
|
positioned = true;
|
||||||
|
curr = null;
|
||||||
|
} else if (status == SeekStatus.FOUND) {
|
||||||
|
positioned = includeLower();
|
||||||
|
curr = te.term();
|
||||||
|
} else {
|
||||||
|
// lower bound not found, so includeLower is irrelevant
|
||||||
|
positioned = true;
|
||||||
|
curr = te.term();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
||||||
|
return te.seekCeil(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void seekExact(long ord) throws IOException {
|
||||||
|
te.seekExact(ord);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef term() throws IOException {
|
||||||
|
return te.term(); // should be equal to curr, except if we went past the end
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ord() throws IOException {
|
||||||
|
return te.ord();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docFreq() throws IOException {
|
||||||
|
return te.docFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long totalTermFreq() throws IOException {
|
||||||
|
return te.totalTermFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
|
||||||
|
return te.postings(reuse, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef next() throws IOException {
|
||||||
|
if (positioned) {
|
||||||
|
positioned = false;
|
||||||
|
} else {
|
||||||
|
curr = te.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (curr == null) return null;
|
||||||
|
|
||||||
|
if (upper != null) {
|
||||||
|
int cmp = curr.compareTo(upper);
|
||||||
|
if (cmp < 0 || cmp == 0 && includeUpper()) {
|
||||||
|
return curr;
|
||||||
|
} else {
|
||||||
|
curr = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return curr;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AttributeSource attributes() {
|
||||||
|
return te.attributes();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean seekExact(BytesRef text) throws IOException {
|
||||||
|
return te.seekExact(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||||
|
te.seekExact(term, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermState termState() throws IOException {
|
||||||
|
return te.termState();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public TermsEnum getTermsEnum(LeafReaderContext ctx) throws IOException {
|
||||||
|
return new RangeTermsEnum( ctx.reader().terms(getField()) );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static class TermAndState {
|
||||||
|
final BytesRef term;
|
||||||
|
final TermState state;
|
||||||
|
final int docFreq;
|
||||||
|
final long totalTermFreq;
|
||||||
|
|
||||||
|
TermAndState(BytesRef term, TermState state, int docFreq, long totalTermFreq) {
|
||||||
|
this.term = term;
|
||||||
|
this.state = state;
|
||||||
|
this.docFreq = docFreq;
|
||||||
|
this.totalTermFreq = totalTermFreq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class SegState {
|
||||||
|
final Weight weight;
|
||||||
|
final DocIdSet set;
|
||||||
|
|
||||||
|
SegState(Weight weight) {
|
||||||
|
this.weight = weight;
|
||||||
|
this.set = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
SegState(DocIdSet set) {
|
||||||
|
this.set = set;
|
||||||
|
this.weight = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// adapted from MultiTermQueryConstantScoreWrapper
|
||||||
|
class ConstWeight extends ConstantScoreWeight {
|
||||||
|
|
||||||
|
private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
|
||||||
|
|
||||||
|
final IndexSearcher searcher;
|
||||||
|
final boolean needScores;
|
||||||
|
boolean checkedFilterCache;
|
||||||
|
Filter filter;
|
||||||
|
final SegState[] segStates;
|
||||||
|
|
||||||
|
|
||||||
|
protected ConstWeight(IndexSearcher searcher, boolean needScores) {
|
||||||
|
super( SolrRangeQuery.this );
|
||||||
|
this.searcher = searcher;
|
||||||
|
this.segStates = new SegState[ searcher.getIndexReader().leaves().size() ];
|
||||||
|
this.needScores = needScores;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Try to collect terms from the given terms enum and return count=sum(df) for terms visited so far
|
||||||
|
* or (-count - 1) if this should be rewritten into a boolean query.
|
||||||
|
* The termEnum will already be positioned on the next term if not exhausted.
|
||||||
|
*/
|
||||||
|
private long collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
|
||||||
|
long count = 0;
|
||||||
|
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
|
||||||
|
for (int i = 0; i < threshold; ++i) {
|
||||||
|
final BytesRef term = termsEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
return -count - 1;
|
||||||
|
}
|
||||||
|
TermState state = termsEnum.termState();
|
||||||
|
if (state.isRealTerm() == false) {
|
||||||
|
// TermQuery does not accept fake terms for now
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
int df = termsEnum.docFreq();
|
||||||
|
count += df;
|
||||||
|
terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, df, termsEnum.totalTermFreq()));
|
||||||
|
}
|
||||||
|
return termsEnum.next() == null ? (-count - 1) : count;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SegState getSegState(LeafReaderContext context) throws IOException {
|
||||||
|
SegState segState = segStates[context.ord];
|
||||||
|
if (segState != null) return segState;
|
||||||
|
|
||||||
|
// first time, check our filter cache
|
||||||
|
boolean doCheck = !checkedFilterCache && context.ord == 0;
|
||||||
|
checkedFilterCache = true;
|
||||||
|
SolrIndexSearcher solrSearcher = null;
|
||||||
|
if (doCheck && searcher instanceof SolrIndexSearcher) {
|
||||||
|
solrSearcher = (SolrIndexSearcher)searcher;
|
||||||
|
if (solrSearcher.getFilterCache() == null) {
|
||||||
|
doCheck = false;
|
||||||
|
} else {
|
||||||
|
solrSearcher = (SolrIndexSearcher)searcher;
|
||||||
|
DocSet answer = solrSearcher.getFilterCache().get(SolrRangeQuery.this);
|
||||||
|
if (answer != null) {
|
||||||
|
filter = answer.getTopFilter();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filter != null) {
|
||||||
|
return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
final Terms terms = context.reader().terms(SolrRangeQuery.this.getField());
|
||||||
|
if (terms == null) {
|
||||||
|
return segStates[context.ord] = new SegState((DocIdSet) null);
|
||||||
|
}
|
||||||
|
|
||||||
|
final TermsEnum termsEnum = SolrRangeQuery.this.getTermsEnum(context);
|
||||||
|
|
||||||
|
PostingsEnum docs = null;
|
||||||
|
|
||||||
|
final List<TermAndState> collectedTerms = new ArrayList<>();
|
||||||
|
long count = collectTerms(context, termsEnum, collectedTerms);
|
||||||
|
if (count < 0) {
|
||||||
|
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||||
|
for (TermAndState t : collectedTerms) {
|
||||||
|
final TermContext termContext = new TermContext(searcher.getTopReaderContext());
|
||||||
|
termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
|
||||||
|
bq.add(new TermQuery(new Term( SolrRangeQuery.this.getField(), t.term), termContext), BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
Query q = new ConstantScoreQuery(bq.build());
|
||||||
|
final Weight weight = searcher.rewrite(q).createWeight(searcher, needScores);
|
||||||
|
weight.normalize(1f, score());
|
||||||
|
return segStates[context.ord] = new SegState(weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Too many terms for boolean query...
|
||||||
|
|
||||||
|
if (doCheck) {
|
||||||
|
DocSet answer = createDocSet(solrSearcher, count);
|
||||||
|
solrSearcher.getFilterCache().put(SolrRangeQuery.this, answer);
|
||||||
|
filter = answer.getTopFilter();
|
||||||
|
return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FUTURE: reuse term states in the future to help build DocSet, use collected count so far...
|
||||||
|
Bits liveDocs = context.reader().getLiveDocs();
|
||||||
|
int base = context.docBase;
|
||||||
|
int termsVisited = collectedTerms.size();
|
||||||
|
|
||||||
|
DocSetBuilder builder = new DocSetBuilder(searcher.getIndexReader().maxDoc());
|
||||||
|
if (!collectedTerms.isEmpty()) {
|
||||||
|
TermsEnum termsEnum2 = terms.iterator();
|
||||||
|
for (TermAndState t : collectedTerms) {
|
||||||
|
termsEnum2.seekExact(t.term, t.state);
|
||||||
|
docs = termsEnum2.postings(docs, PostingsEnum.NONE);
|
||||||
|
builder.add(docs, context.docBase, liveDocs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
termsVisited += builder.add(termsEnum, base, liveDocs);
|
||||||
|
*/
|
||||||
|
|
||||||
|
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
|
||||||
|
builder.grow((int)Math.min(Integer.MAX_VALUE,count));
|
||||||
|
if (collectedTerms.isEmpty() == false) {
|
||||||
|
TermsEnum termsEnum2 = terms.iterator();
|
||||||
|
for (TermAndState t : collectedTerms) {
|
||||||
|
termsEnum2.seekExact(t.term, t.state);
|
||||||
|
docs = termsEnum2.postings(docs, PostingsEnum.NONE);
|
||||||
|
builder.add(docs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
// already positioned on the next term, so don't call next() here...
|
||||||
|
docs = termsEnum.postings(docs, PostingsEnum.NONE);
|
||||||
|
builder.add(docs);
|
||||||
|
} while (termsEnum.next() != null);
|
||||||
|
|
||||||
|
DocIdSet segSet = builder.build();
|
||||||
|
return segStates[context.ord] = new SegState(segSet);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Scorer scorer(DocIdSet set) throws IOException {
|
||||||
|
if (set == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final DocIdSetIterator disi = set.iterator();
|
||||||
|
if (disi == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new ConstantScoreScorer(this, score(), disi);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||||
|
final SegState weightOrBitSet = getSegState(context);
|
||||||
|
if (weightOrBitSet.weight != null) {
|
||||||
|
return weightOrBitSet.weight.bulkScorer(context);
|
||||||
|
} else {
|
||||||
|
final Scorer scorer = scorer(weightOrBitSet.set);
|
||||||
|
if (scorer == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new DefaultBulkScorer(scorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
|
final SegState weightOrBitSet = getSegState(context);
|
||||||
|
if (weightOrBitSet.weight != null) {
|
||||||
|
return weightOrBitSet.weight.scorer(context);
|
||||||
|
} else {
|
||||||
|
return scorer(weightOrBitSet.set);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -63,6 +63,7 @@ import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.solr.common.util.Base64;
|
import org.apache.solr.common.util.Base64;
|
||||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
import org.apache.solr.common.util.StrUtils;
|
import org.apache.solr.common.util.StrUtils;
|
||||||
|
import org.apache.solr.query.SolrRangeQuery;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.Sorting;
|
import org.apache.solr.search.Sorting;
|
||||||
|
@ -720,12 +721,11 @@ public abstract class FieldType extends FieldProperties {
|
||||||
part2 == null ? null : new BytesRef(toInternal(part2)),
|
part2 == null ? null : new BytesRef(toInternal(part2)),
|
||||||
minInclusive, maxInclusive);
|
minInclusive, maxInclusive);
|
||||||
} else {
|
} else {
|
||||||
MultiTermQuery rangeQuery = TermRangeQuery.newStringRange(
|
SolrRangeQuery rangeQuery = new SolrRangeQuery(
|
||||||
field.getName(),
|
field.getName(),
|
||||||
part1 == null ? null : toInternal(part1),
|
part1 == null ? null : new BytesRef(toInternal(part1)),
|
||||||
part2 == null ? null : toInternal(part2),
|
part2 == null ? null : new BytesRef(toInternal(part2)),
|
||||||
minInclusive, maxInclusive);
|
minInclusive, maxInclusive);
|
||||||
rangeQuery.setRewriteMethod(getRewriteMethod(parser, field));
|
|
||||||
return rangeQuery;
|
return rangeQuery;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.uninverting.UninvertingReader.Type;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.QueryBuilder;
|
import org.apache.lucene.util.QueryBuilder;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.query.SolrRangeQuery;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.Sorting;
|
import org.apache.solr.search.Sorting;
|
||||||
|
@ -136,7 +137,7 @@ public class TextField extends FieldType {
|
||||||
Analyzer multiAnalyzer = getMultiTermAnalyzer();
|
Analyzer multiAnalyzer = getMultiTermAnalyzer();
|
||||||
BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
|
BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
|
||||||
BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);
|
BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);
|
||||||
return new TermRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
|
return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
|
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
|
||||||
|
|
|
@ -285,8 +285,7 @@ public class BitDocSet extends DocSetBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int base = context.docBase;
|
final int base = context.docBase;
|
||||||
final int maxDoc = reader.maxDoc();
|
final int max = base + reader.maxDoc(); // one past the max doc in this segment.
|
||||||
final int max = base + maxDoc; // one past the max doc in this segment.
|
|
||||||
|
|
||||||
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
|
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -302,10 +301,11 @@ public class BitDocSet extends DocSetBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() {
|
public int nextDoc() {
|
||||||
if (pos >= bs.length() - 1) {
|
int next = pos+1;
|
||||||
|
if (next >= max) {
|
||||||
return adjustedDoc = NO_MORE_DOCS;
|
return adjustedDoc = NO_MORE_DOCS;
|
||||||
} else {
|
} else {
|
||||||
pos = bs.nextSetBit(pos + 1);
|
pos = bs.nextSetBit(next);
|
||||||
return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
|
return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -314,7 +314,7 @@ public class BitDocSet extends DocSetBase {
|
||||||
public int advance(int target) {
|
public int advance(int target) {
|
||||||
if (target == NO_MORE_DOCS) return adjustedDoc = NO_MORE_DOCS;
|
if (target == NO_MORE_DOCS) return adjustedDoc = NO_MORE_DOCS;
|
||||||
int adjusted = target + base;
|
int adjusted = target + base;
|
||||||
if (adjusted >= bs.length()) {
|
if (adjusted >= max) {
|
||||||
return adjustedDoc = NO_MORE_DOCS;
|
return adjustedDoc = NO_MORE_DOCS;
|
||||||
} else {
|
} else {
|
||||||
pos = bs.nextSetBit(adjusted);
|
pos = bs.nextSetBit(adjusted);
|
||||||
|
@ -326,6 +326,7 @@ public class BitDocSet extends DocSetBase {
|
||||||
public long cost() {
|
public long cost() {
|
||||||
// we don't want to actually compute cardinality, but
|
// we don't want to actually compute cardinality, but
|
||||||
// if it's already been computed, we use it (pro-rated for the segment)
|
// if it's already been computed, we use it (pro-rated for the segment)
|
||||||
|
int maxDoc = max-base;
|
||||||
if (size != -1) {
|
if (size != -1) {
|
||||||
return (long)(size * ((FixedBitSet.bits2words(maxDoc)<<6) / (float)bs.length()));
|
return (long)(size * ((FixedBitSet.bits2words(maxDoc)<<6) / (float)bs.length()));
|
||||||
} else {
|
} else {
|
||||||
|
@ -350,7 +351,7 @@ public class BitDocSet extends DocSetBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int length() {
|
public int length() {
|
||||||
return maxDoc;
|
return max-base;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,222 @@
|
||||||
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.apache.lucene.util.LSBRadixSorter;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adapted from DocIdSetBuilder to build DocSets
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public final class DocSetBuilder {
|
||||||
|
|
||||||
|
private final int maxDoc;
|
||||||
|
private final int threshold;
|
||||||
|
|
||||||
|
private int[] buffer;
|
||||||
|
private int pos;
|
||||||
|
|
||||||
|
private FixedBitSet bitSet;
|
||||||
|
|
||||||
|
|
||||||
|
public DocSetBuilder(int maxDoc, long costEst) {
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
// For ridiculously small sets, we'll just use a sorted int[]
|
||||||
|
// maxDoc >>> 7 is a good value if you want to save memory, lower values
|
||||||
|
// such as maxDoc >>> 11 should provide faster building but at the expense
|
||||||
|
// of using a full bitset even for quite sparse data
|
||||||
|
this.threshold = (maxDoc >>> 7) + 4; // the +4 is for better testing on small indexes
|
||||||
|
|
||||||
|
if (costEst > threshold) {
|
||||||
|
bitSet = new FixedBitSet(maxDoc);
|
||||||
|
} else {
|
||||||
|
this.buffer = new int[Math.max((int)costEst,1)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void upgradeToBitSet() {
|
||||||
|
assert bitSet == null;
|
||||||
|
bitSet = new FixedBitSet(maxDoc);
|
||||||
|
for (int i = 0; i < pos; ++i) {
|
||||||
|
bitSet.set(buffer[i]);
|
||||||
|
}
|
||||||
|
this.buffer = null;
|
||||||
|
this.pos = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void growBuffer(int minSize) {
|
||||||
|
if (minSize < buffer.length) return;
|
||||||
|
|
||||||
|
int newSize = buffer.length;
|
||||||
|
while (newSize < minSize) {
|
||||||
|
newSize = newSize << 1;
|
||||||
|
}
|
||||||
|
newSize = Math.min(newSize, threshold);
|
||||||
|
|
||||||
|
int[] newBuffer = new int[newSize];
|
||||||
|
System.arraycopy(buffer, 0, newBuffer, 0, pos);
|
||||||
|
buffer = newBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(DocIdSetIterator iter, int base) throws IOException {
|
||||||
|
grow((int) Math.min(Integer.MAX_VALUE, iter.cost()));
|
||||||
|
|
||||||
|
if (bitSet != null) {
|
||||||
|
add(bitSet, iter, base);
|
||||||
|
} else {
|
||||||
|
while (true) {
|
||||||
|
for (int i = pos; i < buffer.length; ++i) {
|
||||||
|
final int doc = iter.nextDoc();
|
||||||
|
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
pos = i; // update pos
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buffer[i] = doc + base; // using the loop counter may help with removal of bounds checking
|
||||||
|
}
|
||||||
|
|
||||||
|
pos = buffer.length; // update pos
|
||||||
|
if (pos + 1 >= threshold) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
growBuffer(pos + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
upgradeToBitSet();
|
||||||
|
add(bitSet, iter, base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static void add(FixedBitSet bitSet, DocIdSetIterator iter, int base) throws IOException {
|
||||||
|
for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
|
||||||
|
bitSet.set(doc + base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the number of terms visited */
|
||||||
|
public int add(TermsEnum te, int base) throws IOException {
|
||||||
|
PostingsEnum postings = null;
|
||||||
|
|
||||||
|
int termCount = 0;
|
||||||
|
for(;;) {
|
||||||
|
BytesRef term = te.next();
|
||||||
|
if (term == null) break;
|
||||||
|
termCount++;
|
||||||
|
postings = te.postings(postings, PostingsEnum.NONE);
|
||||||
|
add(postings, base);
|
||||||
|
}
|
||||||
|
|
||||||
|
return termCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void grow(int numDocs) {
|
||||||
|
if (bitSet == null) {
|
||||||
|
final long newLength = pos + numDocs;
|
||||||
|
if (newLength < threshold) {
|
||||||
|
growBuffer((int) newLength);
|
||||||
|
} else {
|
||||||
|
upgradeToBitSet();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void add(int doc) {
|
||||||
|
if (bitSet != null) {
|
||||||
|
bitSet.set(doc);
|
||||||
|
} else {
|
||||||
|
if (pos >= buffer.length) {
|
||||||
|
if (pos + 1 >= threshold) {
|
||||||
|
upgradeToBitSet();
|
||||||
|
bitSet.set(doc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
growBuffer(pos + 1);
|
||||||
|
}
|
||||||
|
buffer[pos++] = doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int dedup(int[] arr, int length, FixedBitSet acceptDocs) {
|
||||||
|
if (length == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int l = 1;
|
||||||
|
int previous = arr[0];
|
||||||
|
for (int i = 1; i < length; ++i) {
|
||||||
|
final int value = arr[i];
|
||||||
|
assert value >= previous;
|
||||||
|
if (value != previous) {
|
||||||
|
if (acceptDocs == null || acceptDocs.get(value)) {
|
||||||
|
arr[l++] = value;
|
||||||
|
previous = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public DocSet build(FixedBitSet filter) {
|
||||||
|
if (bitSet != null) {
|
||||||
|
if (filter != null) {
|
||||||
|
bitSet.and(filter);
|
||||||
|
}
|
||||||
|
return new BitDocSet(bitSet);
|
||||||
|
// TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
|
||||||
|
} else {
|
||||||
|
LSBRadixSorter sorter = new LSBRadixSorter();
|
||||||
|
sorter.sort(buffer, 0, pos);
|
||||||
|
final int l = dedup(buffer, pos, filter);
|
||||||
|
assert l <= pos;
|
||||||
|
return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Only use this if you know there were no duplicates and that docs were collected in-order! */
|
||||||
|
public DocSet buildUniqueInOrder(FixedBitSet filter) {
|
||||||
|
if (bitSet != null) {
|
||||||
|
if (filter != null) {
|
||||||
|
bitSet.and(filter);
|
||||||
|
}
|
||||||
|
return new BitDocSet(bitSet);
|
||||||
|
} else {
|
||||||
|
// don't need to sort, but still need to remove non accepted docs
|
||||||
|
int l = pos;
|
||||||
|
if (filter != null) {
|
||||||
|
l = dedup(buffer, pos, filter);
|
||||||
|
}
|
||||||
|
return new SortedIntDocSet(buffer, l); // TODO: have option to not shrink in the future if it will be a temporary set
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -462,6 +462,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
||||||
return fieldNames;
|
return fieldNames;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SolrCache<Query,DocSet> getFilterCache() {
|
||||||
|
return filterCache;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a collection of the names of all stored fields which can be
|
* Returns a collection of the names of all stored fields which can be
|
||||||
* highlighted the index reader knows about.
|
* highlighted the index reader knows about.
|
||||||
|
@ -919,12 +923,12 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
||||||
DocSet absAnswer = filterCache.get(absQ);
|
DocSet absAnswer = filterCache.get(absQ);
|
||||||
if (absAnswer!=null) {
|
if (absAnswer!=null) {
|
||||||
if (positive) return absAnswer;
|
if (positive) return absAnswer;
|
||||||
else return getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
|
else return getLiveDocs().andNot(absAnswer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DocSet absAnswer = getDocSetNC(absQ, null);
|
DocSet absAnswer = getDocSetNC(absQ, null);
|
||||||
DocSet answer = positive ? absAnswer : getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
|
DocSet answer = positive ? absAnswer : getLiveDocs().andNot(absAnswer);
|
||||||
|
|
||||||
if (filterCache != null) {
|
if (filterCache != null) {
|
||||||
// cache negative queries as positive
|
// cache negative queries as positive
|
||||||
|
@ -948,7 +952,15 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
|
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
|
||||||
|
private BitDocSet liveDocs;
|
||||||
|
|
||||||
|
public BitDocSet getLiveDocs() throws IOException {
|
||||||
|
// going through the filter cache will provide thread safety here
|
||||||
|
if (liveDocs == null) {
|
||||||
|
liveDocs = getDocSetBits(matchAllDocsQuery);
|
||||||
|
}
|
||||||
|
return liveDocs;
|
||||||
|
}
|
||||||
|
|
||||||
public static class ProcessedFilter {
|
public static class ProcessedFilter {
|
||||||
public DocSet answer; // the answer, if non-null
|
public DocSet answer; // the answer, if non-null
|
||||||
|
@ -1129,7 +1141,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
||||||
|
|
||||||
// Are all of our normal cached filters negative?
|
// Are all of our normal cached filters negative?
|
||||||
if (end > 0 && answer==null) {
|
if (end > 0 && answer==null) {
|
||||||
answer = getPositiveDocSet(matchAllDocsQuery);
|
answer = getLiveDocs();
|
||||||
}
|
}
|
||||||
|
|
||||||
// do negative queries first to shrink set size
|
// do negative queries first to shrink set size
|
||||||
|
@ -1152,7 +1164,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
||||||
} else {
|
} else {
|
||||||
if (postFilters == null) {
|
if (postFilters == null) {
|
||||||
if (answer == null) {
|
if (answer == null) {
|
||||||
answer = getPositiveDocSet(matchAllDocsQuery);
|
answer = getLiveDocs();
|
||||||
}
|
}
|
||||||
// "answer" is the only part of the filter, so set it.
|
// "answer" is the only part of the filter, so set it.
|
||||||
pf.answer = answer;
|
pf.answer = answer;
|
||||||
|
@ -2150,7 +2162,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
||||||
|
|
||||||
// if both negative, we need to create a temp DocSet since we
|
// if both negative, we need to create a temp DocSet since we
|
||||||
// don't have a counting method that takes three.
|
// don't have a counting method that takes three.
|
||||||
DocSet all = getPositiveDocSet(matchAllDocsQuery);
|
DocSet all = getLiveDocs();
|
||||||
|
|
||||||
// -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
|
// -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
|
||||||
// we use the last form since the intermediate DocSet should normally be smaller.
|
// we use the last form since the intermediate DocSet should normally be smaller.
|
||||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.solr.search;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
@ -34,7 +35,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeTests() throws Exception {
|
public static void beforeTests() throws Exception {
|
||||||
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
|
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
|
||||||
initCore("solrconfig.xml","schema12.xml");
|
initCore("solrconfig.xml","schema_latest.xml");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,8 +133,79 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
|
|
||||||
static String f = "val_i";
|
static String f = "val_i";
|
||||||
|
static String f_s = "val_s";
|
||||||
|
static String f_s(int i) {
|
||||||
|
return String.format(Locale.ROOT, "%05d", i);
|
||||||
|
}
|
||||||
|
|
||||||
String frangeStr(boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
|
|
||||||
|
String rangeStr(String field, boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
|
||||||
|
String topLev="";
|
||||||
|
if (!cache || exclude) {
|
||||||
|
topLev = "{!" + (cache || random().nextBoolean() ? " cache=" + cache : "")
|
||||||
|
+ (cost != 0 ? " cost=" + cost : "")
|
||||||
|
+ ((exclude) ? " tag=t" : "") + "}";
|
||||||
|
}
|
||||||
|
|
||||||
|
String q = field + ":";
|
||||||
|
String q2 = q;
|
||||||
|
|
||||||
|
String lower1 = "[" + f_s(l);
|
||||||
|
String lower2 = l<=0 ? lower1 : ("{" + f_s(l-1));
|
||||||
|
String upper1 = f_s(u) + "]";
|
||||||
|
String upper2 = f_s(u+1) + "}";
|
||||||
|
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
q += lower1;
|
||||||
|
q2 += lower2;
|
||||||
|
} else {
|
||||||
|
q += lower2;
|
||||||
|
q2 += lower1;
|
||||||
|
}
|
||||||
|
|
||||||
|
q += " TO ";
|
||||||
|
q2 += " TO ";
|
||||||
|
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
q += upper1;
|
||||||
|
q2 += upper2;
|
||||||
|
} else {
|
||||||
|
q += upper2;
|
||||||
|
q2 += upper1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// String q = field + ":[" + f_s(l) + " TO " + f_s(u) + "]";
|
||||||
|
|
||||||
|
if (negative) {
|
||||||
|
q = "-_query_:\"" + q + "\"";
|
||||||
|
// q = "-" + q; // TODO: need to be encapsulated for some reason?
|
||||||
|
} else {
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
// try some different query structures - important for testing different code paths
|
||||||
|
switch (random().nextInt(5)) {
|
||||||
|
case 0:
|
||||||
|
q = q + " OR id:RAND"+random().nextInt();
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
q = "id:RAND"+random().nextInt() + " OR " + q;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
q = "*:* AND " + q;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
q = q + " AND " + q2;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
q = q + " OR " + q2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return topLev + q;
|
||||||
|
}
|
||||||
|
|
||||||
|
String frangeStr(String field, boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
|
||||||
|
|
||||||
String topLev="";
|
String topLev="";
|
||||||
if (!cache || exclude) {
|
if (!cache || exclude) {
|
||||||
|
@ -142,7 +214,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
||||||
+ ((exclude) ? " tag=t" : "");
|
+ ((exclude) ? " tag=t" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
String ret = "{!frange v="+f+" l="+l+" u="+u;
|
String ret = "{!frange v="+field+" l="+l+" u="+u;
|
||||||
if (negative) {
|
if (negative) {
|
||||||
ret = "-_query_:\"" + ret + "}\"";
|
ret = "-_query_:\"" + ret + "}\"";
|
||||||
if (topLev.length()>0) {
|
if (topLev.length()>0) {
|
||||||
|
@ -165,7 +237,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
||||||
FixedBitSet[] sets = facetQuery ? new FixedBitSet[]{model.facetQuery} :
|
FixedBitSet[] sets = facetQuery ? new FixedBitSet[]{model.facetQuery} :
|
||||||
(exclude ? new FixedBitSet[]{model.answer, model.facetQuery} : new FixedBitSet[]{model.answer, model.multiSelect, model.facetQuery});
|
(exclude ? new FixedBitSet[]{model.answer, model.facetQuery} : new FixedBitSet[]{model.answer, model.multiSelect, model.facetQuery});
|
||||||
|
|
||||||
if (random().nextInt(100) < 50) {
|
if (random().nextInt(100) < 60) {
|
||||||
// frange
|
// frange
|
||||||
int l=0;
|
int l=0;
|
||||||
int u=0;
|
int u=0;
|
||||||
|
@ -201,7 +273,10 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return frangeStr(!positive, l, u, cache, cost, exclude);
|
String whichField = random().nextBoolean() ? f : f_s;
|
||||||
|
return random().nextBoolean() ?
|
||||||
|
frangeStr(f, !positive, l, u, cache, cost, exclude) // todo: frange doesn't work on the string field?
|
||||||
|
: rangeStr(whichField, !positive, l, u, cache, cost, exclude);
|
||||||
} else {
|
} else {
|
||||||
// term or boolean query
|
// term or boolean query
|
||||||
int numWords = FixedBitSet.bits2words(model.indexSize);
|
int numWords = FixedBitSet.bits2words(model.indexSize);
|
||||||
|
@ -256,16 +331,17 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
||||||
Model model = new Model();
|
Model model = new Model();
|
||||||
|
|
||||||
for (int iiter = 0; iiter<indexIter; iiter++) {
|
for (int iiter = 0; iiter<indexIter; iiter++) {
|
||||||
model.indexSize = random().nextInt(20 * RANDOM_MULTIPLIER) + 1;
|
model.indexSize = random().nextInt(40 * RANDOM_MULTIPLIER) + 1;
|
||||||
clearIndex();
|
clearIndex();
|
||||||
|
|
||||||
for (int i=0; i<model.indexSize; i++) {
|
for (int i=0; i<model.indexSize; i++) {
|
||||||
String val = Integer.toString(i);
|
String val = Integer.toString(i);
|
||||||
|
|
||||||
assertU(adoc("id",val,f,val));
|
SolrInputDocument doc = sdoc("id", val, f,val, f_s, f_s(i) );
|
||||||
|
updateJ(jsonAdd(doc), null);
|
||||||
if (random().nextInt(100) < 20) {
|
if (random().nextInt(100) < 20) {
|
||||||
// duplicate doc 20% of the time (makes deletions)
|
// duplicate doc 20% of the time (makes deletions)
|
||||||
assertU(adoc("id",val,f,val));
|
updateJ(jsonAdd(doc), null);
|
||||||
}
|
}
|
||||||
if (random().nextInt(100) < 10) {
|
if (random().nextInt(100) < 10) {
|
||||||
// commit 10% of the time (forces a new segment)
|
// commit 10% of the time (forces a new segment)
|
||||||
|
|
Loading…
Reference in New Issue