LUCENE-8229: Add Weight.matches() to iterate over match positions

This commit is contained in:
Alan Woodward 2018-04-11 09:43:27 +01:00
parent 5b250b4a40
commit 040a9601b1
26 changed files with 1317 additions and 10 deletions

View File

@ -134,6 +134,11 @@ New Features
soft deletes if the reader is opened form a directory. (Simon Willnauer, soft deletes if the reader is opened form a directory. (Simon Willnauer,
Mike McCandless, Uwe Schindler, Adrien Grand) Mike McCandless, Uwe Schindler, Adrien Grand)
* LUCENE-8229: Add a method Weight.matches(LeafReaderContext, doc) that returns
an iterator over matching positions for a given query and document. This
allows exact hit extraction and will enable implementation of accurate
highlighters. (Alan Woodward, Adrien Grand, David Smiley)
Bug Fixes Bug Fixes
* LUCENE-8234: Fixed bug in how spatial relationship is computed for * LUCENE-8234: Fixed bug in how spatial relationship is computed for

View File

@ -119,6 +119,41 @@ final class BooleanWeight extends Weight {
} }
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
final int minShouldMatch = query.getMinimumNumberShouldMatch();
List<Matches> matches = new ArrayList<>();
int shouldMatchCount = 0;
Iterator<Weight> wIt = weights.iterator();
Iterator<BooleanClause> cIt = query.clauses().iterator();
while (wIt.hasNext()) {
Weight w = wIt.next();
BooleanClause bc = cIt.next();
Matches m = w.matches(context, doc);
if (bc.isProhibited()) {
if (m != null) {
return null;
}
}
if (bc.isRequired()) {
if (m == null) {
return null;
}
matches.add(m);
}
if (bc.getOccur() == Occur.SHOULD) {
if (m != null) {
matches.add(m);
shouldMatchCount++;
}
}
}
if (shouldMatchCount < minShouldMatch) {
return null;
}
return Matches.fromSubMatches(matches);
}
static BulkScorer disableScoring(final BulkScorer scorer) { static BulkScorer disableScoring(final BulkScorer scorer) {
return new BulkScorer() { return new BulkScorer() {

View File

@ -158,6 +158,11 @@ public final class ConstantScoreQuery extends Query {
}; };
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
return innerWeight.matches(context, doc);
}
@Override @Override
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context); ScorerSupplier scorerSupplier = scorerSupplier(context);

View File

@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
/**
* A {@link MatchesIterator} that combines matches from a set of sub-iterators
*
* Matches are sorted by their start positions, and then by their end positions, so that
* prefixes sort first. Matches may overlap, or be duplicated if they appear in more
* than one of the sub-iterators.
*/
final class DisjunctionMatchesIterator implements MatchesIterator {
/**
* Create a {@link DisjunctionMatchesIterator} over a list of terms
*
* Only terms that have at least one match in the given document will be included
*/
static MatchesIterator fromTerms(LeafReaderContext context, int doc, String field, List<Term> terms) throws IOException {
Objects.requireNonNull(field);
for (Term term : terms) {
if (Objects.equals(field, term.field()) == false) {
throw new IllegalArgumentException("Tried to generate iterator from terms in multiple fields: expected [" + field + "] but got [" + term.field() + "]");
}
}
return fromTermsEnum(context, doc, field, asBytesRefIterator(terms));
}
private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
return new BytesRefIterator() {
int i = 0;
@Override
public BytesRef next() {
if (i >= terms.size())
return null;
return terms.get(i++).bytes();
}
};
}
/**
* Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
*
* Only terms that have at least one match in the given document will be included
*/
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, String field, BytesRefIterator terms) throws IOException {
Objects.requireNonNull(field);
List<MatchesIterator> mis = new ArrayList<>();
Terms t = context.reader().terms(field);
if (t == null)
return null;
TermsEnum te = t.iterator();
PostingsEnum reuse = null;
for (BytesRef term = terms.next(); term != null; term = terms.next()) {
if (te.seekExact(term)) {
PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
if (pe.advance(doc) == doc) {
// TODO do we want to use the copied term here, or instead create a label that associates all of the TMIs with a single term?
mis.add(new TermMatchesIterator(BytesRef.deepCopyOf(term), pe));
reuse = null;
}
else {
reuse = pe;
}
}
}
if (mis.size() == 0)
return null;
if (mis.size() == 1)
return mis.get(0);
return new DisjunctionMatchesIterator(mis);
}
static MatchesIterator fromSubIterators(List<MatchesIterator> mis) throws IOException {
if (mis.size() == 0)
return null;
if (mis.size() == 1)
return mis.get(0);
return new DisjunctionMatchesIterator(mis);
}
private final PriorityQueue<MatchesIterator> queue;
private boolean started = false;
private DisjunctionMatchesIterator(List<MatchesIterator> matches) throws IOException {
queue = new PriorityQueue<MatchesIterator>(matches.size()){
@Override
protected boolean lessThan(MatchesIterator a, MatchesIterator b) {
return a.startPosition() < b.startPosition() ||
(a.startPosition() == b.startPosition() && a.endPosition() < b.endPosition()) ||
(a.startPosition() == b.startPosition() && a.endPosition() == b.endPosition() && a.term().compareTo(b.term()) < 0);
}
};
for (MatchesIterator mi : matches) {
if (mi.next()) {
queue.add(mi);
}
}
}
@Override
public boolean next() throws IOException {
if (started == false) {
return started = true;
}
if (queue.top().next() == false) {
queue.pop();
}
if (queue.size() > 0) {
queue.updateTop();
return true;
}
return false;
}
@Override
public int startPosition() {
return queue.top().startPosition();
}
@Override
public int endPosition() {
return queue.top().endPosition();
}
@Override
public int startOffset() throws IOException {
return queue.top().startOffset();
}
@Override
public int endOffset() throws IOException {
return queue.top().endOffset();
}
@Override
public BytesRef term() {
return queue.top().term();
}
}

View File

@ -118,6 +118,18 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
} }
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
List<Matches> mis = new ArrayList<>();
for (Weight weight : weights) {
Matches mi = weight.matches(context, doc);
if (mi != null) {
mis.add(mi);
}
}
return Matches.fromSubMatches(mis);
}
/** Create the scorer used to score our associated DisjunctionMaxQuery */ /** Create the scorer used to score our associated DisjunctionMaxQuery */
@Override @Override
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {

View File

@ -74,11 +74,16 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
@Override @Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) { return new ConstantScoreWeight(this, boost) {
@Override @Override
public Scorer scorer(LeafReaderContext context) throws IOException { public Matches matches(LeafReaderContext context, int doc) throws IOException {
final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field); final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
TermsEnum termsEnum = query.getTermsEnum(new Terms() { return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, getTermsEnum(fcsi)));
}
private TermsEnum getTermsEnum(SortedSetDocValues fcsi) throws IOException {
return query.getTermsEnum(new Terms() {
@Override @Override
public TermsEnum iterator() throws IOException { public TermsEnum iterator() throws IOException {
return fcsi.termsEnum(); return fcsi.termsEnum();
@ -118,13 +123,18 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
public boolean hasPositions() { public boolean hasPositions() {
return false; return false;
} }
@Override @Override
public boolean hasPayloads() { public boolean hasPayloads() {
return false; return false;
} }
}); });
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
TermsEnum termsEnum = getTermsEnum(fcsi);
assert termsEnum != null; assert termsEnum != null;
if (termsEnum.next() == null) { if (termsEnum.next() == null) {
// no matching terms // no matching terms

View File

@ -75,4 +75,8 @@ public abstract class FilterWeight extends Weight {
return in.scorer(context); return in.scorer(context);
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
return in.matches(context, doc);
}
} }

View File

@ -119,6 +119,12 @@ public final class IndexOrDocValuesQuery extends Query {
indexWeight.extractTerms(terms); indexWeight.extractTerms(terms);
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
// We need to check a single doc, so the dv query should perform better
return dvWeight.matches(context, doc);
}
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {
// We need to check a single doc, so the dv query should perform better // We need to check a single doc, so the dv query should perform better

View File

@ -678,6 +678,11 @@ public class LRUQueryCache implements QueryCache, Accountable {
in.extractTerms(terms); in.extractTerms(terms);
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
return in.matches(context, doc);
}
private boolean cacheEntryHasReasonableWorstCaseSize(int maxDoc) { private boolean cacheEntryHasReasonableWorstCaseSize(int maxDoc) {
// The worst-case (dense) is a bit set which needs one bit per document // The worst-case (dense) is a bit set which needs one bit per document
final long worstCaseRamUsage = maxDoc / 8; final long worstCaseRamUsage = maxDoc / 8;

View File

@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Reports the positions and optionally offsets of all matching terms in a query
* for a single document
*
* To obtain a {@link MatchesIterator} for a particular field, call {@link #getMatches(String)}.
* Note that you can call {@link #getMatches(String)} multiple times to retrieve new
* iterators, but it is not thread-safe.
*/
public interface Matches extends Iterable<String> {
/**
* Returns a {@link MatchesIterator} over the matches for a single field,
* or {@code null} if there are no matches in that field.
*/
MatchesIterator getMatches(String field) throws IOException;
/**
* Indicates a match with no term positions, for example on a Point or DocValues field,
* or a field indexed as docs and freqs only
*/
Matches MATCH_WITH_NO_TERMS = new Matches() {
@Override
public Iterator<String> iterator() {
return Collections.emptyIterator();
}
@Override
public MatchesIterator getMatches(String field) {
return null;
}
};
/**
* Amalgamate a collection of {@link Matches} into a single object
*/
static Matches fromSubMatches(List<Matches> subMatches) {
if (subMatches == null || subMatches.size() == 0) {
return null;
}
List<Matches> sm = subMatches.stream().filter(m -> m != MATCH_WITH_NO_TERMS).collect(Collectors.toList());
if (sm.size() == 0) {
return MATCH_WITH_NO_TERMS;
}
if (sm.size() == 1) {
return sm.get(0);
}
Set<String> fields = new HashSet<>();
for (Matches m : sm) {
for (String field : m) {
fields.add(field);
}
}
return new Matches() {
@Override
public MatchesIterator getMatches(String field) throws IOException {
List<MatchesIterator> subIterators = new ArrayList<>();
for (Matches m : sm) {
MatchesIterator it = m.getMatches(field);
if (it != null) {
subIterators.add(it);
}
}
return DisjunctionMatchesIterator.fromSubIterators(subIterators);
}
@Override
public Iterator<String> iterator() {
return fields.iterator();
}
};
}
/**
* A functional interface that supplies a {@link MatchesIterator}
*/
@FunctionalInterface
interface MatchesIteratorSupplier {
/** Return a new {@link MatchesIterator} */
MatchesIterator get() throws IOException;
}
/**
* Create a Matches for a single field
*/
static Matches forField(String field, MatchesIteratorSupplier mis) throws IOException {
// The indirection here, using a Supplier object rather than a MatchesIterator
// directly, is to allow for multiple calls to Matches.getMatches() to return
// new iterators. We still need to call MatchesIteratorSupplier.get() eagerly
// to work out if we have a hit or not.
MatchesIterator mi = mis.get();
if (mi == null) {
return null;
}
return new Matches() {
boolean cached = true;
@Override
public MatchesIterator getMatches(String f) throws IOException {
if (Objects.equals(field, f) == false) {
return null;
}
if (cached == false) {
return mis.get();
}
cached = false;
return mi;
}
@Override
public Iterator<String> iterator() {
return Collections.singleton(field).iterator();
}
};
}
}

View File

@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
/**
* An iterator over match positions (and optionally offsets) for a single document and field
*
* To iterate over the matches, call {@link #next()} until it returns {@code false}, retrieving
* positions and/or offsets after each call. You should not call the position or offset methods
* before {@link #next()} has been called, or after {@link #next()} has returned {@code false}.
*
* Matches are ordered by start position, and then by end position. Match intervals may overlap.
*
* @see Weight#matches(LeafReaderContext, int)
*/
public interface MatchesIterator {
/**
* Advance the iterator to the next match position
* @return {@code true} if matches have not been exhausted
*/
boolean next() throws IOException;
/**
* The start position of the current match
*
* Should only be called after {@link #next()} has returned {@code true}
*/
int startPosition();
/**
* The end position of the current match
*
* Should only be called after {@link #next()} has returned {@code true}
*/
int endPosition();
/**
* The starting offset of the current match, or {@code -1} if offsets are not available
*
* Should only be called after {@link #next()} has returned {@code true}
*/
int startOffset() throws IOException;
/**
* The ending offset of the current match, or {@code -1} if offsets are not available
*
* Should only be called after {@link #next()} has returned {@code true}
*/
int endOffset() throws IOException;
/**
* The underlying term of the current match
*
* Should only be called after {@link #next()} has returned {@code true}
*/
BytesRef term();
}

View File

@ -25,8 +25,8 @@ import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
@ -202,6 +202,18 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
} }
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
final Terms terms = context.reader().terms(query.field);
if (terms == null) {
return null;
}
if (terms.hasPositions() == false) {
return super.matches(context, doc);
}
return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, query.getTermsEnum(terms)));
}
@Override @Override
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
final WeightOrDocIdSet weightOrBitSet = rewrite(context); final WeightOrDocIdSet weightOrBitSet = rewrite(context);

View File

@ -30,8 +30,9 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -159,6 +160,16 @@ public final class SynonymQuery extends Query {
} }
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
String field = terms[0].field();
Terms terms = context.reader().terms(field);
if (terms == null || terms.hasPositions() == false) {
return super.matches(context, doc);
}
return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTerms(context, doc, field, Arrays.asList(SynonymQuery.this.terms)));
}
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context); Scorer scorer = scorer(context);

View File

@ -220,6 +220,15 @@ public class TermInSetQuery extends Query implements Accountable {
// order to protect highlighters // order to protect highlighters
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
Terms terms = context.reader().terms(field);
if (terms == null || terms.hasPositions() == false) {
return super.matches(context, doc);
}
return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, field, termData.iterator()));
}
/** /**
* On the given leaf context, try to either rewrite to a disjunction if * On the given leaf context, try to either rewrite to a disjunction if
* there are few matching terms, or build a bitset containing matching docs. * there are few matching terms, or build a bitset containing matching docs.

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.util.BytesRef;
/**
* A {@link MatchesIterator} over a single term's postings list
*/
class TermMatchesIterator implements MatchesIterator {
private int upto;
private int pos;
private final PostingsEnum pe;
private final BytesRef term;
/**
* Create a new {@link TermMatchesIterator} for the given term and postings list
*/
TermMatchesIterator(BytesRef term, PostingsEnum pe) throws IOException {
this.pe = pe;
this.upto = pe.freq();
this.term = term;
}
@Override
public boolean next() throws IOException {
if (upto-- > 0) {
pos = pe.nextPosition();
return true;
}
return false;
}
@Override
public int startPosition() {
return pos;
}
@Override
public int endPosition() {
return pos;
}
@Override
public int startOffset() throws IOException {
return pe.startOffset();
}
@Override
public int endOffset() throws IOException {
return pe.endOffset();
}
@Override
public BytesRef term() {
return term;
}
}

View File

@ -25,10 +25,11 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
@ -80,6 +81,24 @@ public class TermQuery extends Query {
terms.add(getTerm()); terms.add(getTerm());
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
TermsEnum te = getTermsEnum(context);
if (te == null) {
return null;
}
if (context.reader().terms(term.field()).hasPositions() == false) {
return super.matches(context, doc);
}
return Matches.forField(term.field(), () -> {
PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
if (pe.advance(doc) != doc) {
return null;
}
return new TermMatchesIterator(term.bytes(), pe);
});
}
@Override @Override
public String toString() { public String toString() {
return "weight(" + TermQuery.this + ")"; return "weight(" + TermQuery.this + ")";

View File

@ -69,6 +69,35 @@ public abstract class Weight implements SegmentCacheable {
*/ */
public abstract void extractTerms(Set<Term> terms); public abstract void extractTerms(Set<Term> terms);
/**
* Returns {@link Matches} for a specific document, or {@code null} if the document
* does not match the parent query
*
* A query match that contains no position information (for example, a Point or
* DocValues query) will return {@link Matches#MATCH_WITH_NO_TERMS}
*
* @param context the reader's context to create the {@link Matches} for
* @param doc the document's id relative to the given context's reader
*/
public Matches matches(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
if (scorer == null) {
return null;
}
final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
if (twoPhase == null) {
if (scorer.iterator().advance(doc) != doc) {
return null;
}
}
else {
if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
return null;
}
}
return Matches.MATCH_WITH_NO_TERMS;
}
/** /**
* An explanation of the score computation for the named document. * An explanation of the score computation for the named document.
* *

View File

@ -0,0 +1,440 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestMatchesIterator extends LuceneTestCase {
protected IndexSearcher searcher;
protected Directory directory;
protected IndexReader reader;
private static final String FIELD_WITH_OFFSETS = "field_offsets";
private static final String FIELD_NO_OFFSETS = "field_no_offsets";
private static final String FIELD_DOCS_ONLY = "field_docs_only";
private static final String FIELD_FREQS = "field_freqs";
private static final FieldType OFFSETS = new FieldType(TextField.TYPE_STORED);
static {
OFFSETS.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
}
private static final FieldType DOCS = new FieldType(TextField.TYPE_STORED);
static {
DOCS.setIndexOptions(IndexOptions.DOCS);
}
private static final FieldType DOCS_AND_FREQS = new FieldType(TextField.TYPE_STORED);
static {
DOCS_AND_FREQS.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
}
@Override
public void tearDown() throws Exception {
reader.close();
directory.close();
super.tearDown();
}
@Override
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(newField(FIELD_WITH_OFFSETS, docFields[i], OFFSETS));
doc.add(newField(FIELD_NO_OFFSETS, docFields[i], TextField.TYPE_STORED));
doc.add(newField(FIELD_DOCS_ONLY, docFields[i], DOCS));
doc.add(newField(FIELD_FREQS, docFields[i], DOCS_AND_FREQS));
doc.add(new NumericDocValuesField("id", i));
doc.add(newField("id", Integer.toString(i), TextField.TYPE_STORED));
writer.addDocument(doc);
}
writer.forceMerge(1);
reader = writer.getReader();
writer.close();
searcher = newSearcher(getOnlyLeafReader(reader));
}
protected String[] docFields = {
"w1 w2 w3 w4 w5",
"w1 w3 w2 w3 zz",
"w1 xx w2 yy w4",
"w1 w2 w1 w4 w2 w3",
"nothing matches this document"
};
void checkMatches(Query q, String field, int[][] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(expected[i][0], searcher.leafContexts));
int doc = expected[i][0] - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (matches == null) {
assertEquals(expected[i].length, 1);
continue;
}
MatchesIterator it = matches.getMatches(field);
if (expected[i].length == 1) {
assertNull(it);
return;
}
checkFieldMatches(it, expected[i]);
checkFieldMatches(matches.getMatches(field), expected[i]); // test multiple calls
}
}
void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
int pos = 1;
while (it.next()) {
//System.out.println(expected[i][pos] + "->" + expected[i][pos + 1] + "[" + expected[i][pos + 2] + "->" + expected[i][pos + 3] + "]");
assertEquals(expected[pos], it.startPosition());
assertEquals(expected[pos + 1], it.endPosition());
assertEquals(expected[pos + 2], it.startOffset());
assertEquals(expected[pos + 3], it.endOffset());
pos += 4;
}
assertEquals(expected.length, pos);
}
void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (expected[i]) {
MatchesIterator mi = matches.getMatches(field);
assertNull(mi);
}
else {
assertNull(matches);
}
}
}
void checkTerms(Query q, String field, String[][] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
Matches matches = w.matches(ctx, doc);
if (matches == null) {
assertEquals(expected[i].length, 0);
continue;
}
MatchesIterator it = matches.getMatches(field);
if (it == null) {
assertEquals(expected[i].length, 0);
continue;
}
int pos = 0;
while (it.next()) {
assertEquals(expected[i][pos], it.term().utf8ToString());
pos += 1;
}
assertEquals(expected[i].length, pos);
}
}
public void testTermQuery() throws IOException {
Query q = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"));
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0, 0, 0, 0, 2 },
{ 1, 0, 0, 0, 2 },
{ 2, 0, 0, 0, 2 },
{ 3, 0, 0, 0, 2, 2, 2, 6, 8 },
{ 4 }
});
}
public void testTermQueryNoStoredOffsets() throws IOException {
Query q = new TermQuery(new Term(FIELD_NO_OFFSETS, "w1"));
checkMatches(q, FIELD_NO_OFFSETS, new int[][]{
{ 0, 0, 0, -1, -1 },
{ 1, 0, 0, -1, -1 },
{ 2, 0, 0, -1, -1 },
{ 3, 0, 0, -1, -1, 2, 2, -1, -1 },
{ 4 }
});
checkTerms(q, FIELD_NO_OFFSETS, new String[][]{
{ "w1" },
{ "w1" },
{ "w1" },
{ "w1", "w1" },
{}
});
}
public void testTermQueryNoPositions() throws IOException {
for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
Query q = new TermQuery(new Term(field, "w1"));
checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
}
}
public void testDisjunction() throws IOException {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
.build();
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0, 0, 0, 0, 2, 2, 2, 6, 8 },
{ 1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11 },
{ 2, 0, 0, 0, 2 },
{ 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
{ 4 }
});
checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
{ "w1", "w3" },
{ "w1", "w3", "w3" },
{ "w1" },
{ "w1", "w1", "w3" },
{}
});
}
public void testDisjunctionNoPositions() throws IOException {
for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
.build();
checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
}
}
public void testReqOpt() throws IOException {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
.build();
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0, 0, 0, 0, 2, 2, 2, 6, 8 },
{ 1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11 },
{ 2 },
{ 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
{ 4 }
});
}
public void testReqOptNoPositions() throws IOException {
for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
.build();
checkNoPositionsMatches(q, field, new boolean[]{ true, true, false, true, false });
}
}
public void testMinShouldMatch() throws IOException {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
.add(new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx")), BooleanClause.Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build(), BooleanClause.Occur.SHOULD)
.build();
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11 },
{ 1, 1, 1, 3, 5, 3, 3, 9, 11 },
{ 2, 0, 0, 0, 2, 1, 1, 3, 5, 4, 4, 12, 14 },
{ 3, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11, 5, 5, 15, 17 },
{ 4 }
});
checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
{ "w1", "w3", "w4" },
{ "w3", "w3" },
{ "w1", "xx", "w4" },
{ "w1", "w1", "w4", "w3" },
{}
});
}
public void testMinShouldMatchNoPositions() throws IOException {
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
.add(new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build(), BooleanClause.Occur.SHOULD)
.build();
checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
}
}
public void testExclusion() throws IOException {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "zz")), BooleanClause.Occur.MUST_NOT)
.build();
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0, 2, 2, 6, 8 },
{ 1 },
{ 2 },
{ 3, 5, 5, 15, 17 },
{ 4 }
});
}
public void testExclusionNoPositions() throws IOException {
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.MUST_NOT)
.build();
checkNoPositionsMatches(q, field, new boolean[]{ true, false, false, true, false });
}
}
public void testConjunction() throws IOException {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.MUST)
.build();
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0, 2, 2, 6, 8, 3, 3, 9, 11 },
{ 1 },
{ 2 },
{ 3, 3, 3, 9, 11, 5, 5, 15, 17 },
{ 4 }
});
}
public void testConjunctionNoPositions() throws IOException {
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.MUST)
.build();
checkNoPositionsMatches(q, field, new boolean[]{ true, false, false, true, false });
}
}
public void testWildcards() throws IOException {
Query q = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "x"));
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0 },
{ 1 },
{ 2, 1, 1, 3, 5 },
{ 3 },
{ 4 }
});
checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
{}, {}, { "xx" }, {}
});
Query rq = new RegexpQuery(new Term(FIELD_WITH_OFFSETS, "w[1-2]"));
checkMatches(rq, FIELD_WITH_OFFSETS, new int[][]{
{ 0, 0, 0, 0, 2, 1, 1, 3, 5 },
{ 1, 0, 0, 0, 2, 2, 2, 6, 8 },
{ 2, 0, 0, 0, 2, 2, 2, 6, 8 },
{ 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
{ 4 }
});
}
public void testNoMatchWildcards() throws IOException {
Query nomatch = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "wibble"));
Matches matches = searcher.createWeight(searcher.rewrite(nomatch), ScoreMode.COMPLETE_NO_SCORES, 1)
.matches(searcher.leafContexts.get(0), 0);
assertNull(matches);
}
public void testWildcardsNoPositions() throws IOException {
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
Query q = new PrefixQuery(new Term(field, "x"));
checkNoPositionsMatches(q, field, new boolean[]{ false, false, true, false, false });
}
}
public void testSynonymQuery() throws IOException {
Query q = new SynonymQuery(new Term(FIELD_WITH_OFFSETS, "w1"), new Term(FIELD_WITH_OFFSETS, "w2"));
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
{ 0, 0, 0, 0, 2, 1, 1, 3, 5 },
{ 1, 0, 0, 0, 2, 2, 2, 6, 8 },
{ 2, 0, 0, 0, 2, 2, 2, 6, 8 },
{ 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
{ 4 }
});
}
public void testSynonymQueryNoPositions() throws IOException {
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
Query q = new SynonymQuery(new Term(field, "w1"), new Term(field, "w2"));
checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
}
}
public void testMultipleFields() throws IOException {
Query q = new BooleanQuery.Builder()
.add(new TermQuery(new Term("id", "1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
.build();
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(1, searcher.leafContexts));
Matches m = w.matches(ctx, 1 - ctx.docBase);
assertNotNull(m);
checkFieldMatches(m.getMatches("id"), new int[]{ -1, 0, 0, -1, -1 });
checkFieldMatches(m.getMatches(FIELD_WITH_OFFSETS), new int[]{ -1, 1, 1, 3, 5, 3, 3, 9, 11 });
assertNull(m.getMatches("bogus"));
Set<String> fields = new HashSet<>();
for (String field : m) {
fields.add(field);
}
assertEquals(2, fields.size());
assertTrue(fields.contains(FIELD_WITH_OFFSETS));
assertTrue(fields.contains("id"));
}
protected String[] doc1Fields = {
"w1 w2 w3 w4 w5",
"w1 w3 w2 w3 zz",
"w1 xx w2 yy w4",
"w1 w2 w1 w4 w2 w3"
};
}

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight; import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.ScorerSupplier;
@ -151,6 +152,28 @@ public class ToParentBlockJoinQuery extends Query {
} }
return Explanation.noMatch("Not a match"); return Explanation.noMatch("Not a match");
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
// The default implementation would delegate to the joinQuery's Weight, which
// matches on children. We need to match on the parent instead
Scorer scorer = scorer(context);
if (scorer == null) {
return null;
}
final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
if (twoPhase == null) {
if (scorer.iterator().advance(doc) != doc) {
return null;
}
}
else {
if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
return null;
}
}
return Matches.MATCH_WITH_NO_TERMS;
}
} }
private static class ParentApproximation extends DocIdSetIterator { private static class ParentApproximation extends DocIdSetIterator {

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterScorer; import org.apache.lucene.search.FilterScorer;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
@ -148,6 +149,11 @@ public final class FunctionScoreQuery extends Query {
this.inner.extractTerms(terms); this.inner.extractTerms(terms);
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
return inner.matches(context, doc);
}
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Explanation scoreExplanation = inner.explain(context, doc); Explanation scoreExplanation = inner.explain(context, doc);

View File

@ -136,6 +136,28 @@ public final class CoveringQuery extends Query {
} }
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
if (minMatchValues.advanceExact(doc) == false) {
return null;
}
final long minimumNumberMatch = Math.max(1, minMatchValues.longValue());
long matchCount = 0;
List<Matches> subMatches = new ArrayList<>();
for (Weight weight : weights) {
Matches matches = weight.matches(context, doc);
if (matches != null) {
matchCount++;
subMatches.add(matches);
}
}
if (matchCount < minimumNumberMatch) {
return null;
}
return Matches.fromSubMatches(subMatches);
}
@Override @Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException { public Explanation explain(LeafReaderContext context, int doc) throws IOException {
LongValues minMatchValues = minimumNumberMatch.getValues(context, null); LongValues minMatchValues = minimumNumberMatch.getValues(context, null);

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Iterator;
class AssertingMatches implements Matches {
private final Matches in;
AssertingMatches(Matches matches) {
this.in = matches;
}
@Override
public MatchesIterator getMatches(String field) throws IOException {
MatchesIterator mi = in.getMatches(field);
if (mi == null)
return null;
return new AssertingMatchesIterator(mi);
}
@Override
public Iterator<String> iterator() {
return in.iterator();
}
}

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
class AssertingMatchesIterator implements MatchesIterator {
private final MatchesIterator in;
private State state = State.UNPOSITIONED;
private enum State { UNPOSITIONED, ITERATING, EXHAUSTED }
AssertingMatchesIterator(MatchesIterator in) {
this.in = in;
}
@Override
public boolean next() throws IOException {
assert state != State.EXHAUSTED : state;
boolean more = in.next();
if (more == false) {
state = State.EXHAUSTED;
}
else {
state = State.ITERATING;
}
return more;
}
@Override
public int startPosition() {
assert state == State.ITERATING : state;
return in.startPosition();
}
@Override
public int endPosition() {
assert state == State.ITERATING : state;
return in.endPosition();
}
@Override
public int startOffset() throws IOException {
assert state == State.ITERATING : state;
return in.startOffset();
}
@Override
public int endOffset() throws IOException {
assert state == State.ITERATING : state;
return in.endOffset();
}
@Override
public BytesRef term() {
assert state == State.ITERATING : state;
return in.term();
}
}

View File

@ -31,6 +31,14 @@ class AssertingWeight extends FilterWeight {
this.scoreMode = scoreMode; this.scoreMode = scoreMode;
} }
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
Matches matches = in.matches(context, doc);
if (matches == null)
return null;
return new AssertingMatches(matches);
}
@Override @Override
public Scorer scorer(LeafReaderContext context) throws IOException { public Scorer scorer(LeafReaderContext context) throws IOException {
if (random.nextBoolean()) { if (random.nextBoolean()) {

View File

@ -28,7 +28,9 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import static junit.framework.Assert.assertNotNull;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
/** /**
@ -56,7 +58,7 @@ public class CheckHits {
if (ignore.contains(Integer.valueOf(doc))) continue; if (ignore.contains(Integer.valueOf(doc))) continue;
Explanation exp = searcher.explain(q, doc); Explanation exp = searcher.explain(q, doc);
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
exp); exp);
Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+ Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
" doesn't indicate non-match: " + exp.toString(), " doesn't indicate non-match: " + exp.toString(),
@ -300,6 +302,16 @@ public class CheckHits {
(query, defaultFieldName, searcher, deep)); (query, defaultFieldName, searcher, deep));
} }
/**
* Asserts that the result of calling {@link Weight#matches(LeafReaderContext, int)}
* for every document matching a query returns a non-null {@link Matches}
* @param query the query to test
* @param searcher the search to test against
*/
public static void checkMatches(Query query, IndexSearcher searcher) throws IOException {
searcher.search(query, new MatchesAsserter(query, searcher));
}
private static final Pattern COMPUTED_FROM_PATTERN = Pattern.compile(".*, computed as .* from:"); private static final Pattern COMPUTED_FROM_PATTERN = Pattern.compile(".*, computed as .* from:");
@ -505,7 +517,7 @@ public class CheckHits {
("exception in hitcollector of [["+d+"]] for #"+doc, e); ("exception in hitcollector of [["+d+"]] for #"+doc, e);
} }
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp); assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp);
verifyExplanation(d,doc,scorer.score(),deep,exp); verifyExplanation(d,doc,scorer.score(),deep,exp);
Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc + Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc +
" does not indicate match: " + exp.toString(), " does not indicate match: " + exp.toString(),
@ -522,6 +534,45 @@ public class CheckHits {
} }
} }
/**
* Asserts that the {@link Matches} from a query is non-null whenever
* the document its created for is a hit.
*
* Also checks that the previous non-matching document has a {@code null} {@link Matches}
*/
public static class MatchesAsserter extends SimpleCollector {
private final Weight weight;
private LeafReaderContext context;
int lastCheckedDoc = -1;
public MatchesAsserter(Query query, IndexSearcher searcher) throws IOException {
this.weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1);
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.context = context;
this.lastCheckedDoc = -1;
}
@Override
public void collect(int doc) throws IOException {
Matches matches = this.weight.matches(context, doc);
assertNotNull("Unexpected null Matches object in doc" + doc + " for query " + this.weight.getQuery(), matches);
if (lastCheckedDoc != doc - 1) {
assertNull("Unexpected non-null Matches object in non-matching doc" + doc + " for query " + this.weight.getQuery(),
this.weight.matches(context, doc - 1));
}
lastCheckedDoc = doc;
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.COMPLETE_NO_SCORES;
}
}
public static void checkTopScores(Random random, Query query, IndexSearcher searcher) throws IOException { public static void checkTopScores(Random random, Query query, IndexSearcher searcher) throws IOException {
// Check it computed the top hits correctly // Check it computed the top hits correctly
doCheckTopScores(query, searcher, 1); doCheckTopScores(query, searcher, 1);

View File

@ -108,6 +108,7 @@ public class QueryUtils {
* @see #checkSkipTo * @see #checkSkipTo
* @see #checkExplanations * @see #checkExplanations
* @see #checkEqual * @see #checkEqual
* @see CheckHits#checkMatches(Query, IndexSearcher)
*/ */
public static void check(Random random, Query q1, IndexSearcher s) { public static void check(Random random, Query q1, IndexSearcher s) {
check(random, q1, s, true); check(random, q1, s, true);
@ -125,6 +126,7 @@ public class QueryUtils {
check(random, q1, wrapUnderlyingReader(random, s, +1), false); check(random, q1, wrapUnderlyingReader(random, s, +1), false);
} }
checkExplanations(q1,s); checkExplanations(q1,s);
CheckHits.checkMatches(q1, s);
} }
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);