mirror of https://github.com/apache/lucene.git
LUCENE-8941: Build wildcard matches lazily
This commit is contained in:
parent
64884be044
commit
fa72da1c71
|
@ -91,6 +91,9 @@ Optimizations
|
||||||
* LUCENE-8935: BooleanQuery with no scoring clause can now early terminate the query when
|
* LUCENE-8935: BooleanQuery with no scoring clause can now early terminate the query when
|
||||||
the total hits is not requested.
|
the total hits is not requested.
|
||||||
|
|
||||||
|
* LUCENE-8941: Matches on wildcard queries will defer building their full
|
||||||
|
disjunction until a MatchesIterator is pulled (Alan Woodward)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-8778 LUCENE-8911: Define analyzer SPI names as static final fields and document the names in Javadocs.
|
* LUCENE-8778 LUCENE-8911: Define analyzer SPI names as static final fields and document the names in Javadocs.
|
||||||
|
|
|
@ -74,7 +74,6 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
|
||||||
*/
|
*/
|
||||||
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
|
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
|
||||||
Objects.requireNonNull(field);
|
Objects.requireNonNull(field);
|
||||||
List<MatchesIterator> mis = new ArrayList<>();
|
|
||||||
Terms t = context.reader().terms(field);
|
Terms t = context.reader().terms(field);
|
||||||
if (t == null)
|
if (t == null)
|
||||||
return null;
|
return null;
|
||||||
|
@ -84,15 +83,92 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
|
||||||
if (te.seekExact(term)) {
|
if (te.seekExact(term)) {
|
||||||
PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
|
PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
|
||||||
if (pe.advance(doc) == doc) {
|
if (pe.advance(doc) == doc) {
|
||||||
mis.add(new TermMatchesIterator(query, pe));
|
return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query);
|
||||||
reuse = null;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
reuse = pe;
|
reuse = pe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return fromSubIterators(mis);
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchesIterator over a set of terms that only loads the first matching term at construction,
|
||||||
|
// waiting until the iterator is actually used before it loads all other matching terms.
|
||||||
|
private static class TermsEnumDisjunctionMatchesIterator implements MatchesIterator {
|
||||||
|
|
||||||
|
private final MatchesIterator first;
|
||||||
|
private final BytesRefIterator terms;
|
||||||
|
private final TermsEnum te;
|
||||||
|
private final int doc;
|
||||||
|
private final Query query;
|
||||||
|
|
||||||
|
private MatchesIterator it = null;
|
||||||
|
|
||||||
|
TermsEnumDisjunctionMatchesIterator(MatchesIterator first, BytesRefIterator terms, TermsEnum te, int doc, Query query) {
|
||||||
|
this.first = first;
|
||||||
|
this.terms = terms;
|
||||||
|
this.te = te;
|
||||||
|
this.doc = doc;
|
||||||
|
this.query = query;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void init() throws IOException {
|
||||||
|
List<MatchesIterator> mis = new ArrayList<>();
|
||||||
|
mis.add(first);
|
||||||
|
PostingsEnum reuse = null;
|
||||||
|
for (BytesRef term = terms.next(); term != null; term = terms.next()) {
|
||||||
|
if (te.seekExact(term)) {
|
||||||
|
PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
|
||||||
|
if (pe.advance(doc) == doc) {
|
||||||
|
mis.add(new TermMatchesIterator(query, pe));
|
||||||
|
reuse = null;
|
||||||
|
} else {
|
||||||
|
reuse = pe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
it = fromSubIterators(mis);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean next() throws IOException {
|
||||||
|
if (it == null) {
|
||||||
|
init();
|
||||||
|
}
|
||||||
|
assert it != null;
|
||||||
|
return it.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int startPosition() {
|
||||||
|
return it.startPosition();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int endPosition() {
|
||||||
|
return it.endPosition();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int startOffset() throws IOException {
|
||||||
|
return it.startOffset();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int endOffset() throws IOException {
|
||||||
|
return it.endOffset();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MatchesIterator getSubMatches() throws IOException {
|
||||||
|
return it.getSubMatches();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query getQuery() {
|
||||||
|
return it.getQuery();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static MatchesIterator fromSubIterators(List<MatchesIterator> mis) throws IOException {
|
static MatchesIterator fromSubIterators(List<MatchesIterator> mis) throws IOException {
|
||||||
|
|
|
@ -31,25 +31,30 @@ import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.ReaderUtil;
|
import org.apache.lucene.index.ReaderUtil;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
import org.apache.lucene.search.spans.SpanQuery;
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
public class TestMatchesIterator extends LuceneTestCase {
|
public class TestMatchesIterator extends LuceneTestCase {
|
||||||
|
|
||||||
protected IndexSearcher searcher;
|
protected IndexSearcher searcher;
|
||||||
protected Directory directory;
|
protected Directory directory;
|
||||||
protected IndexReader reader;
|
protected IndexReader reader = null;
|
||||||
|
|
||||||
private static final String FIELD_WITH_OFFSETS = "field_offsets";
|
private static final String FIELD_WITH_OFFSETS = "field_offsets";
|
||||||
private static final String FIELD_NO_OFFSETS = "field_no_offsets";
|
private static final String FIELD_NO_OFFSETS = "field_no_offsets";
|
||||||
|
@ -701,4 +706,63 @@ public class TestMatchesIterator extends LuceneTestCase {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMinimalSeekingWithWildcards() throws IOException {
|
||||||
|
SeekCountingLeafReader reader = new SeekCountingLeafReader(getOnlyLeafReader(this.reader));
|
||||||
|
this.searcher = new IndexSearcher(reader);
|
||||||
|
Query query = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "w"));
|
||||||
|
Weight w = searcher.createWeight(query.rewrite(reader), ScoreMode.COMPLETE, 1);
|
||||||
|
|
||||||
|
// docs 0-3 match several different terms here, but we only seek to the first term and
|
||||||
|
// then short-cut return; other terms are ignored until we try and iterate over matches
|
||||||
|
int[] expectedSeeks = new int[]{ 1, 1, 1, 1, 6, 6 };
|
||||||
|
int i = 0;
|
||||||
|
for (LeafReaderContext ctx : reader.leaves()) {
|
||||||
|
for (int doc = 0; doc < ctx.reader().maxDoc(); doc++) {
|
||||||
|
reader.seeks = 0;
|
||||||
|
w.matches(ctx, doc);
|
||||||
|
assertEquals("Unexpected seek count on doc " + doc, expectedSeeks[i], reader.seeks);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class SeekCountingLeafReader extends FilterLeafReader {
|
||||||
|
|
||||||
|
int seeks = 0;
|
||||||
|
|
||||||
|
public SeekCountingLeafReader(LeafReader in) {
|
||||||
|
super(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Terms terms(String field) throws IOException {
|
||||||
|
Terms terms = super.terms(field);
|
||||||
|
if (terms == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new FilterTerms(terms) {
|
||||||
|
@Override
|
||||||
|
public TermsEnum iterator() throws IOException {
|
||||||
|
return new FilterTermsEnum(super.iterator()) {
|
||||||
|
@Override
|
||||||
|
public boolean seekExact(BytesRef text) throws IOException {
|
||||||
|
seeks++;
|
||||||
|
return super.seekExact(text);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CacheHelper getCoreCacheHelper() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CacheHelper getReaderCacheHelper() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue