mirror of https://github.com/apache/lucene.git
LUCENE-8229: Add Weight.matches() to iterate over match positions
This commit is contained in:
parent
5b250b4a40
commit
040a9601b1
|
@ -134,6 +134,11 @@ New Features
|
|||
soft deletes if the reader is opened form a directory. (Simon Willnauer,
|
||||
Mike McCandless, Uwe Schindler, Adrien Grand)
|
||||
|
||||
* LUCENE-8229: Add a method Weight.matches(LeafReaderContext, doc) that returns
|
||||
an iterator over matching positions for a given query and document. This
|
||||
allows exact hit extraction and will enable implementation of accurate
|
||||
highlighters. (Alan Woodward, Adrien Grand, David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-8234: Fixed bug in how spatial relationship is computed for
|
||||
|
|
|
@ -119,6 +119,41 @@ final class BooleanWeight extends Weight {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
final int minShouldMatch = query.getMinimumNumberShouldMatch();
|
||||
List<Matches> matches = new ArrayList<>();
|
||||
int shouldMatchCount = 0;
|
||||
Iterator<Weight> wIt = weights.iterator();
|
||||
Iterator<BooleanClause> cIt = query.clauses().iterator();
|
||||
while (wIt.hasNext()) {
|
||||
Weight w = wIt.next();
|
||||
BooleanClause bc = cIt.next();
|
||||
Matches m = w.matches(context, doc);
|
||||
if (bc.isProhibited()) {
|
||||
if (m != null) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
if (bc.isRequired()) {
|
||||
if (m == null) {
|
||||
return null;
|
||||
}
|
||||
matches.add(m);
|
||||
}
|
||||
if (bc.getOccur() == Occur.SHOULD) {
|
||||
if (m != null) {
|
||||
matches.add(m);
|
||||
shouldMatchCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (shouldMatchCount < minShouldMatch) {
|
||||
return null;
|
||||
}
|
||||
return Matches.fromSubMatches(matches);
|
||||
}
|
||||
|
||||
static BulkScorer disableScoring(final BulkScorer scorer) {
|
||||
return new BulkScorer() {
|
||||
|
||||
|
|
|
@ -158,6 +158,11 @@ public final class ConstantScoreQuery extends Query {
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
return innerWeight.matches(context, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
ScorerSupplier scorerSupplier = scorerSupplier(context);
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* A {@link MatchesIterator} that combines matches from a set of sub-iterators
|
||||
*
|
||||
* Matches are sorted by their start positions, and then by their end positions, so that
|
||||
* prefixes sort first. Matches may overlap, or be duplicated if they appear in more
|
||||
* than one of the sub-iterators.
|
||||
*/
|
||||
final class DisjunctionMatchesIterator implements MatchesIterator {
|
||||
|
||||
/**
|
||||
* Create a {@link DisjunctionMatchesIterator} over a list of terms
|
||||
*
|
||||
* Only terms that have at least one match in the given document will be included
|
||||
*/
|
||||
static MatchesIterator fromTerms(LeafReaderContext context, int doc, String field, List<Term> terms) throws IOException {
|
||||
Objects.requireNonNull(field);
|
||||
for (Term term : terms) {
|
||||
if (Objects.equals(field, term.field()) == false) {
|
||||
throw new IllegalArgumentException("Tried to generate iterator from terms in multiple fields: expected [" + field + "] but got [" + term.field() + "]");
|
||||
}
|
||||
}
|
||||
return fromTermsEnum(context, doc, field, asBytesRefIterator(terms));
|
||||
}
|
||||
|
||||
private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
|
||||
return new BytesRefIterator() {
|
||||
int i = 0;
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
if (i >= terms.size())
|
||||
return null;
|
||||
return terms.get(i++).bytes();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
|
||||
*
|
||||
* Only terms that have at least one match in the given document will be included
|
||||
*/
|
||||
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, String field, BytesRefIterator terms) throws IOException {
|
||||
Objects.requireNonNull(field);
|
||||
List<MatchesIterator> mis = new ArrayList<>();
|
||||
Terms t = context.reader().terms(field);
|
||||
if (t == null)
|
||||
return null;
|
||||
TermsEnum te = t.iterator();
|
||||
PostingsEnum reuse = null;
|
||||
for (BytesRef term = terms.next(); term != null; term = terms.next()) {
|
||||
if (te.seekExact(term)) {
|
||||
PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
|
||||
if (pe.advance(doc) == doc) {
|
||||
// TODO do we want to use the copied term here, or instead create a label that associates all of the TMIs with a single term?
|
||||
mis.add(new TermMatchesIterator(BytesRef.deepCopyOf(term), pe));
|
||||
reuse = null;
|
||||
}
|
||||
else {
|
||||
reuse = pe;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mis.size() == 0)
|
||||
return null;
|
||||
if (mis.size() == 1)
|
||||
return mis.get(0);
|
||||
return new DisjunctionMatchesIterator(mis);
|
||||
}
|
||||
|
||||
static MatchesIterator fromSubIterators(List<MatchesIterator> mis) throws IOException {
|
||||
if (mis.size() == 0)
|
||||
return null;
|
||||
if (mis.size() == 1)
|
||||
return mis.get(0);
|
||||
return new DisjunctionMatchesIterator(mis);
|
||||
}
|
||||
|
||||
private final PriorityQueue<MatchesIterator> queue;
|
||||
|
||||
private boolean started = false;
|
||||
|
||||
private DisjunctionMatchesIterator(List<MatchesIterator> matches) throws IOException {
|
||||
queue = new PriorityQueue<MatchesIterator>(matches.size()){
|
||||
@Override
|
||||
protected boolean lessThan(MatchesIterator a, MatchesIterator b) {
|
||||
return a.startPosition() < b.startPosition() ||
|
||||
(a.startPosition() == b.startPosition() && a.endPosition() < b.endPosition()) ||
|
||||
(a.startPosition() == b.startPosition() && a.endPosition() == b.endPosition() && a.term().compareTo(b.term()) < 0);
|
||||
}
|
||||
};
|
||||
for (MatchesIterator mi : matches) {
|
||||
if (mi.next()) {
|
||||
queue.add(mi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (started == false) {
|
||||
return started = true;
|
||||
}
|
||||
if (queue.top().next() == false) {
|
||||
queue.pop();
|
||||
}
|
||||
if (queue.size() > 0) {
|
||||
queue.updateTop();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return queue.top().startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return queue.top().endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return queue.top().startOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return queue.top().endOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
return queue.top().term();
|
||||
}
|
||||
|
||||
}
|
|
@ -118,6 +118,18 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
List<Matches> mis = new ArrayList<>();
|
||||
for (Weight weight : weights) {
|
||||
Matches mi = weight.matches(context, doc);
|
||||
if (mi != null) {
|
||||
mis.add(mi);
|
||||
}
|
||||
}
|
||||
return Matches.fromSubMatches(mis);
|
||||
}
|
||||
|
||||
/** Create the scorer used to score our associated DisjunctionMaxQuery */
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
|
|
|
@ -74,10 +74,15 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
|||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
|
||||
return new ConstantScoreWeight(this, boost) {
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
|
||||
TermsEnum termsEnum = query.getTermsEnum(new Terms() {
|
||||
return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, getTermsEnum(fcsi)));
|
||||
}
|
||||
|
||||
private TermsEnum getTermsEnum(SortedSetDocValues fcsi) throws IOException {
|
||||
return query.getTermsEnum(new Terms() {
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
|
@ -124,7 +129,12 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
|||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
|
||||
TermsEnum termsEnum = getTermsEnum(fcsi);
|
||||
assert termsEnum != null;
|
||||
if (termsEnum.next() == null) {
|
||||
// no matching terms
|
||||
|
|
|
@ -75,4 +75,8 @@ public abstract class FilterWeight extends Weight {
|
|||
return in.scorer(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
return in.matches(context, doc);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -119,6 +119,12 @@ public final class IndexOrDocValuesQuery extends Query {
|
|||
indexWeight.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
// We need to check a single doc, so the dv query should perform better
|
||||
return dvWeight.matches(context, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
// We need to check a single doc, so the dv query should perform better
|
||||
|
|
|
@ -678,6 +678,11 @@ public class LRUQueryCache implements QueryCache, Accountable {
|
|||
in.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
return in.matches(context, doc);
|
||||
}
|
||||
|
||||
private boolean cacheEntryHasReasonableWorstCaseSize(int maxDoc) {
|
||||
// The worst-case (dense) is a bit set which needs one bit per document
|
||||
final long worstCaseRamUsage = maxDoc / 8;
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Reports the positions and optionally offsets of all matching terms in a query
|
||||
* for a single document
|
||||
*
|
||||
* To obtain a {@link MatchesIterator} for a particular field, call {@link #getMatches(String)}.
|
||||
* Note that you can call {@link #getMatches(String)} multiple times to retrieve new
|
||||
* iterators, but it is not thread-safe.
|
||||
*/
|
||||
public interface Matches extends Iterable<String> {
|
||||
|
||||
/**
|
||||
* Returns a {@link MatchesIterator} over the matches for a single field,
|
||||
* or {@code null} if there are no matches in that field.
|
||||
*/
|
||||
MatchesIterator getMatches(String field) throws IOException;
|
||||
|
||||
/**
|
||||
* Indicates a match with no term positions, for example on a Point or DocValues field,
|
||||
* or a field indexed as docs and freqs only
|
||||
*/
|
||||
Matches MATCH_WITH_NO_TERMS = new Matches() {
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.emptyIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getMatches(String field) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Amalgamate a collection of {@link Matches} into a single object
|
||||
*/
|
||||
static Matches fromSubMatches(List<Matches> subMatches) {
|
||||
if (subMatches == null || subMatches.size() == 0) {
|
||||
return null;
|
||||
}
|
||||
List<Matches> sm = subMatches.stream().filter(m -> m != MATCH_WITH_NO_TERMS).collect(Collectors.toList());
|
||||
if (sm.size() == 0) {
|
||||
return MATCH_WITH_NO_TERMS;
|
||||
}
|
||||
if (sm.size() == 1) {
|
||||
return sm.get(0);
|
||||
}
|
||||
Set<String> fields = new HashSet<>();
|
||||
for (Matches m : sm) {
|
||||
for (String field : m) {
|
||||
fields.add(field);
|
||||
}
|
||||
}
|
||||
return new Matches() {
|
||||
@Override
|
||||
public MatchesIterator getMatches(String field) throws IOException {
|
||||
List<MatchesIterator> subIterators = new ArrayList<>();
|
||||
for (Matches m : sm) {
|
||||
MatchesIterator it = m.getMatches(field);
|
||||
if (it != null) {
|
||||
subIterators.add(it);
|
||||
}
|
||||
}
|
||||
return DisjunctionMatchesIterator.fromSubIterators(subIterators);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return fields.iterator();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* A functional interface that supplies a {@link MatchesIterator}
|
||||
*/
|
||||
@FunctionalInterface
|
||||
interface MatchesIteratorSupplier {
|
||||
/** Return a new {@link MatchesIterator} */
|
||||
MatchesIterator get() throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Matches for a single field
|
||||
*/
|
||||
static Matches forField(String field, MatchesIteratorSupplier mis) throws IOException {
|
||||
|
||||
// The indirection here, using a Supplier object rather than a MatchesIterator
|
||||
// directly, is to allow for multiple calls to Matches.getMatches() to return
|
||||
// new iterators. We still need to call MatchesIteratorSupplier.get() eagerly
|
||||
// to work out if we have a hit or not.
|
||||
|
||||
MatchesIterator mi = mis.get();
|
||||
if (mi == null) {
|
||||
return null;
|
||||
}
|
||||
return new Matches() {
|
||||
boolean cached = true;
|
||||
@Override
|
||||
public MatchesIterator getMatches(String f) throws IOException {
|
||||
if (Objects.equals(field, f) == false) {
|
||||
return null;
|
||||
}
|
||||
if (cached == false) {
|
||||
return mis.get();
|
||||
}
|
||||
cached = false;
|
||||
return mi;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.singleton(field).iterator();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* An iterator over match positions (and optionally offsets) for a single document and field
|
||||
*
|
||||
* To iterate over the matches, call {@link #next()} until it returns {@code false}, retrieving
|
||||
* positions and/or offsets after each call. You should not call the position or offset methods
|
||||
* before {@link #next()} has been called, or after {@link #next()} has returned {@code false}.
|
||||
*
|
||||
* Matches are ordered by start position, and then by end position. Match intervals may overlap.
|
||||
*
|
||||
* @see Weight#matches(LeafReaderContext, int)
|
||||
*/
|
||||
public interface MatchesIterator {
|
||||
|
||||
/**
|
||||
* Advance the iterator to the next match position
|
||||
* @return {@code true} if matches have not been exhausted
|
||||
*/
|
||||
boolean next() throws IOException;
|
||||
|
||||
/**
|
||||
* The start position of the current match
|
||||
*
|
||||
* Should only be called after {@link #next()} has returned {@code true}
|
||||
*/
|
||||
int startPosition();
|
||||
|
||||
/**
|
||||
* The end position of the current match
|
||||
*
|
||||
* Should only be called after {@link #next()} has returned {@code true}
|
||||
*/
|
||||
int endPosition();
|
||||
|
||||
/**
|
||||
* The starting offset of the current match, or {@code -1} if offsets are not available
|
||||
*
|
||||
* Should only be called after {@link #next()} has returned {@code true}
|
||||
*/
|
||||
int startOffset() throws IOException;
|
||||
|
||||
/**
|
||||
* The ending offset of the current match, or {@code -1} if offsets are not available
|
||||
*
|
||||
* Should only be called after {@link #next()} has returned {@code true}
|
||||
*/
|
||||
int endOffset() throws IOException;
|
||||
|
||||
/**
|
||||
* The underlying term of the current match
|
||||
*
|
||||
* Should only be called after {@link #next()} has returned {@code true}
|
||||
*/
|
||||
BytesRef term();
|
||||
|
||||
}
|
|
@ -25,8 +25,8 @@ import java.util.Objects;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
|
@ -202,6 +202,18 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
final Terms terms = context.reader().terms(query.field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
if (terms.hasPositions() == false) {
|
||||
return super.matches(context, doc);
|
||||
}
|
||||
return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, query.getTermsEnum(terms)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
|
||||
|
|
|
@ -30,8 +30,9 @@ import org.apache.lucene.index.IndexOptions;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -159,6 +160,16 @@ public final class SynonymQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
String field = terms[0].field();
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms == null || terms.hasPositions() == false) {
|
||||
return super.matches(context, doc);
|
||||
}
|
||||
return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTerms(context, doc, field, Arrays.asList(SynonymQuery.this.terms)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
Scorer scorer = scorer(context);
|
||||
|
|
|
@ -220,6 +220,15 @@ public class TermInSetQuery extends Query implements Accountable {
|
|||
// order to protect highlighters
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms == null || terms.hasPositions() == false) {
|
||||
return super.matches(context, doc);
|
||||
}
|
||||
return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, field, termData.iterator()));
|
||||
}
|
||||
|
||||
/**
|
||||
* On the given leaf context, try to either rewrite to a disjunction if
|
||||
* there are few matching terms, or build a bitset containing matching docs.
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* A {@link MatchesIterator} over a single term's postings list
|
||||
*/
|
||||
class TermMatchesIterator implements MatchesIterator {
|
||||
|
||||
private int upto;
|
||||
private int pos;
|
||||
private final PostingsEnum pe;
|
||||
private final BytesRef term;
|
||||
|
||||
/**
|
||||
* Create a new {@link TermMatchesIterator} for the given term and postings list
|
||||
*/
|
||||
TermMatchesIterator(BytesRef term, PostingsEnum pe) throws IOException {
|
||||
this.pe = pe;
|
||||
this.upto = pe.freq();
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (upto-- > 0) {
|
||||
pos = pe.nextPosition();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return pe.startOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return pe.endOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
return term;
|
||||
}
|
||||
}
|
|
@ -25,10 +25,11 @@ import org.apache.lucene.index.IndexOptions;
|
|||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
|
@ -80,6 +81,24 @@ public class TermQuery extends Query {
|
|||
terms.add(getTerm());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
TermsEnum te = getTermsEnum(context);
|
||||
if (te == null) {
|
||||
return null;
|
||||
}
|
||||
if (context.reader().terms(term.field()).hasPositions() == false) {
|
||||
return super.matches(context, doc);
|
||||
}
|
||||
return Matches.forField(term.field(), () -> {
|
||||
PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
|
||||
if (pe.advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
return new TermMatchesIterator(term.bytes(), pe);
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "weight(" + TermQuery.this + ")";
|
||||
|
|
|
@ -69,6 +69,35 @@ public abstract class Weight implements SegmentCacheable {
|
|||
*/
|
||||
public abstract void extractTerms(Set<Term> terms);
|
||||
|
||||
/**
|
||||
* Returns {@link Matches} for a specific document, or {@code null} if the document
|
||||
* does not match the parent query
|
||||
*
|
||||
* A query match that contains no position information (for example, a Point or
|
||||
* DocValues query) will return {@link Matches#MATCH_WITH_NO_TERMS}
|
||||
*
|
||||
* @param context the reader's context to create the {@link Matches} for
|
||||
* @param doc the document's id relative to the given context's reader
|
||||
*/
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
Scorer scorer = scorer(context);
|
||||
if (scorer == null) {
|
||||
return null;
|
||||
}
|
||||
final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
|
||||
if (twoPhase == null) {
|
||||
if (scorer.iterator().advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return Matches.MATCH_WITH_NO_TERMS;
|
||||
}
|
||||
|
||||
/**
|
||||
* An explanation of the score computation for the named document.
|
||||
*
|
||||
|
|
|
@ -0,0 +1,440 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestMatchesIterator extends LuceneTestCase {
|
||||
|
||||
protected IndexSearcher searcher;
|
||||
protected Directory directory;
|
||||
protected IndexReader reader;
|
||||
|
||||
private static final String FIELD_WITH_OFFSETS = "field_offsets";
|
||||
private static final String FIELD_NO_OFFSETS = "field_no_offsets";
|
||||
private static final String FIELD_DOCS_ONLY = "field_docs_only";
|
||||
private static final String FIELD_FREQS = "field_freqs";
|
||||
|
||||
private static final FieldType OFFSETS = new FieldType(TextField.TYPE_STORED);
|
||||
static {
|
||||
OFFSETS.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
}
|
||||
|
||||
private static final FieldType DOCS = new FieldType(TextField.TYPE_STORED);
|
||||
static {
|
||||
DOCS.setIndexOptions(IndexOptions.DOCS);
|
||||
}
|
||||
|
||||
private static final FieldType DOCS_AND_FREQS = new FieldType(TextField.TYPE_STORED);
|
||||
static {
|
||||
DOCS_AND_FREQS.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
reader.close();
|
||||
directory.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||
newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||
for (int i = 0; i < docFields.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newField(FIELD_WITH_OFFSETS, docFields[i], OFFSETS));
|
||||
doc.add(newField(FIELD_NO_OFFSETS, docFields[i], TextField.TYPE_STORED));
|
||||
doc.add(newField(FIELD_DOCS_ONLY, docFields[i], DOCS));
|
||||
doc.add(newField(FIELD_FREQS, docFields[i], DOCS_AND_FREQS));
|
||||
doc.add(new NumericDocValuesField("id", i));
|
||||
doc.add(newField("id", Integer.toString(i), TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.forceMerge(1);
|
||||
reader = writer.getReader();
|
||||
writer.close();
|
||||
searcher = newSearcher(getOnlyLeafReader(reader));
|
||||
}
|
||||
|
||||
protected String[] docFields = {
|
||||
"w1 w2 w3 w4 w5",
|
||||
"w1 w3 w2 w3 zz",
|
||||
"w1 xx w2 yy w4",
|
||||
"w1 w2 w1 w4 w2 w3",
|
||||
"nothing matches this document"
|
||||
};
|
||||
|
||||
void checkMatches(Query q, String field, int[][] expected) throws IOException {
|
||||
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||
for (int i = 0; i < expected.length; i++) {
|
||||
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(expected[i][0], searcher.leafContexts));
|
||||
int doc = expected[i][0] - ctx.docBase;
|
||||
Matches matches = w.matches(ctx, doc);
|
||||
if (matches == null) {
|
||||
assertEquals(expected[i].length, 1);
|
||||
continue;
|
||||
}
|
||||
MatchesIterator it = matches.getMatches(field);
|
||||
if (expected[i].length == 1) {
|
||||
assertNull(it);
|
||||
return;
|
||||
}
|
||||
checkFieldMatches(it, expected[i]);
|
||||
checkFieldMatches(matches.getMatches(field), expected[i]); // test multiple calls
|
||||
}
|
||||
}
|
||||
|
||||
void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
|
||||
int pos = 1;
|
||||
while (it.next()) {
|
||||
//System.out.println(expected[i][pos] + "->" + expected[i][pos + 1] + "[" + expected[i][pos + 2] + "->" + expected[i][pos + 3] + "]");
|
||||
assertEquals(expected[pos], it.startPosition());
|
||||
assertEquals(expected[pos + 1], it.endPosition());
|
||||
assertEquals(expected[pos + 2], it.startOffset());
|
||||
assertEquals(expected[pos + 3], it.endOffset());
|
||||
pos += 4;
|
||||
}
|
||||
assertEquals(expected.length, pos);
|
||||
}
|
||||
|
||||
void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws IOException {
|
||||
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||
for (int i = 0; i < expected.length; i++) {
|
||||
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
|
||||
int doc = i - ctx.docBase;
|
||||
Matches matches = w.matches(ctx, doc);
|
||||
if (expected[i]) {
|
||||
MatchesIterator mi = matches.getMatches(field);
|
||||
assertNull(mi);
|
||||
}
|
||||
else {
|
||||
assertNull(matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkTerms(Query q, String field, String[][] expected) throws IOException {
|
||||
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||
for (int i = 0; i < expected.length; i++) {
|
||||
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
|
||||
int doc = i - ctx.docBase;
|
||||
Matches matches = w.matches(ctx, doc);
|
||||
if (matches == null) {
|
||||
assertEquals(expected[i].length, 0);
|
||||
continue;
|
||||
}
|
||||
MatchesIterator it = matches.getMatches(field);
|
||||
if (it == null) {
|
||||
assertEquals(expected[i].length, 0);
|
||||
continue;
|
||||
}
|
||||
int pos = 0;
|
||||
while (it.next()) {
|
||||
assertEquals(expected[i][pos], it.term().utf8ToString());
|
||||
pos += 1;
|
||||
}
|
||||
assertEquals(expected[i].length, pos);
|
||||
}
|
||||
}
|
||||
|
||||
public void testTermQuery() throws IOException {
|
||||
Query q = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"));
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0, 0, 0, 0, 2 },
|
||||
{ 1, 0, 0, 0, 2 },
|
||||
{ 2, 0, 0, 0, 2 },
|
||||
{ 3, 0, 0, 0, 2, 2, 2, 6, 8 },
|
||||
{ 4 }
|
||||
});
|
||||
}
|
||||
|
||||
public void testTermQueryNoStoredOffsets() throws IOException {
|
||||
Query q = new TermQuery(new Term(FIELD_NO_OFFSETS, "w1"));
|
||||
checkMatches(q, FIELD_NO_OFFSETS, new int[][]{
|
||||
{ 0, 0, 0, -1, -1 },
|
||||
{ 1, 0, 0, -1, -1 },
|
||||
{ 2, 0, 0, -1, -1 },
|
||||
{ 3, 0, 0, -1, -1, 2, 2, -1, -1 },
|
||||
{ 4 }
|
||||
});
|
||||
checkTerms(q, FIELD_NO_OFFSETS, new String[][]{
|
||||
{ "w1" },
|
||||
{ "w1" },
|
||||
{ "w1" },
|
||||
{ "w1", "w1" },
|
||||
{}
|
||||
});
|
||||
}
|
||||
|
||||
public void testTermQueryNoPositions() throws IOException {
|
||||
for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
|
||||
Query q = new TermQuery(new Term(field, "w1"));
|
||||
checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
|
||||
}
|
||||
}
|
||||
|
||||
public void testDisjunction() throws IOException {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0, 0, 0, 0, 2, 2, 2, 6, 8 },
|
||||
{ 1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11 },
|
||||
{ 2, 0, 0, 0, 2 },
|
||||
{ 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
|
||||
{ 4 }
|
||||
});
|
||||
checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
|
||||
{ "w1", "w3" },
|
||||
{ "w1", "w3", "w3" },
|
||||
{ "w1" },
|
||||
{ "w1", "w1", "w3" },
|
||||
{}
|
||||
});
|
||||
}
|
||||
|
||||
public void testDisjunctionNoPositions() throws IOException {
|
||||
for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
|
||||
}
|
||||
}
|
||||
|
||||
public void testReqOpt() throws IOException {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0, 0, 0, 0, 2, 2, 2, 6, 8 },
|
||||
{ 1, 0, 0, 0, 2, 1, 1, 3, 5, 3, 3, 9, 11 },
|
||||
{ 2 },
|
||||
{ 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
|
||||
{ 4 }
|
||||
});
|
||||
}
|
||||
|
||||
public void testReqOptNoPositions() throws IOException {
|
||||
for (String field : new String[]{ FIELD_DOCS_ONLY, FIELD_FREQS }) {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
checkNoPositionsMatches(q, field, new boolean[]{ true, true, false, true, false });
|
||||
}
|
||||
}
|
||||
|
||||
public void testMinShouldMatch() throws IOException {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "xx")), BooleanClause.Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.build(), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11 },
|
||||
{ 1, 1, 1, 3, 5, 3, 3, 9, 11 },
|
||||
{ 2, 0, 0, 0, 2, 1, 1, 3, 5, 4, 4, 12, 14 },
|
||||
{ 3, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11, 5, 5, 15, 17 },
|
||||
{ 4 }
|
||||
});
|
||||
checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
|
||||
{ "w1", "w3", "w4" },
|
||||
{ "w3", "w3" },
|
||||
{ "w1", "xx", "w4" },
|
||||
{ "w1", "w1", "w4", "w3" },
|
||||
{}
|
||||
});
|
||||
}
|
||||
|
||||
public void testMinShouldMatchNoPositions() throws IOException {
|
||||
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(field, "w1")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.build(), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
|
||||
}
|
||||
}
|
||||
|
||||
public void testExclusion() throws IOException {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "zz")), BooleanClause.Occur.MUST_NOT)
|
||||
.build();
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0, 2, 2, 6, 8 },
|
||||
{ 1 },
|
||||
{ 2 },
|
||||
{ 3, 5, 5, 15, 17 },
|
||||
{ 4 }
|
||||
});
|
||||
}
|
||||
|
||||
public void testExclusionNoPositions() throws IOException {
|
||||
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.MUST_NOT)
|
||||
.build();
|
||||
checkNoPositionsMatches(q, field, new boolean[]{ true, false, false, true, false });
|
||||
}
|
||||
}
|
||||
|
||||
public void testConjunction() throws IOException {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w4")), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0, 2, 2, 6, 8, 3, 3, 9, 11 },
|
||||
{ 1 },
|
||||
{ 2 },
|
||||
{ 3, 3, 3, 9, 11, 5, 5, 15, 17 },
|
||||
{ 4 }
|
||||
});
|
||||
}
|
||||
|
||||
public void testConjunctionNoPositions() throws IOException {
|
||||
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term(field, "w4")), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
checkNoPositionsMatches(q, field, new boolean[]{ true, false, false, true, false });
|
||||
}
|
||||
}
|
||||
|
||||
public void testWildcards() throws IOException {
|
||||
Query q = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "x"));
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0 },
|
||||
{ 1 },
|
||||
{ 2, 1, 1, 3, 5 },
|
||||
{ 3 },
|
||||
{ 4 }
|
||||
});
|
||||
checkTerms(q, FIELD_WITH_OFFSETS, new String[][]{
|
||||
{}, {}, { "xx" }, {}
|
||||
});
|
||||
|
||||
Query rq = new RegexpQuery(new Term(FIELD_WITH_OFFSETS, "w[1-2]"));
|
||||
checkMatches(rq, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0, 0, 0, 0, 2, 1, 1, 3, 5 },
|
||||
{ 1, 0, 0, 0, 2, 2, 2, 6, 8 },
|
||||
{ 2, 0, 0, 0, 2, 2, 2, 6, 8 },
|
||||
{ 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
|
||||
{ 4 }
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
public void testNoMatchWildcards() throws IOException {
|
||||
Query nomatch = new PrefixQuery(new Term(FIELD_WITH_OFFSETS, "wibble"));
|
||||
Matches matches = searcher.createWeight(searcher.rewrite(nomatch), ScoreMode.COMPLETE_NO_SCORES, 1)
|
||||
.matches(searcher.leafContexts.get(0), 0);
|
||||
assertNull(matches);
|
||||
}
|
||||
|
||||
public void testWildcardsNoPositions() throws IOException {
|
||||
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
|
||||
Query q = new PrefixQuery(new Term(field, "x"));
|
||||
checkNoPositionsMatches(q, field, new boolean[]{ false, false, true, false, false });
|
||||
}
|
||||
}
|
||||
|
||||
public void testSynonymQuery() throws IOException {
|
||||
Query q = new SynonymQuery(new Term(FIELD_WITH_OFFSETS, "w1"), new Term(FIELD_WITH_OFFSETS, "w2"));
|
||||
checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
|
||||
{ 0, 0, 0, 0, 2, 1, 1, 3, 5 },
|
||||
{ 1, 0, 0, 0, 2, 2, 2, 6, 8 },
|
||||
{ 2, 0, 0, 0, 2, 2, 2, 6, 8 },
|
||||
{ 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
|
||||
{ 4 }
|
||||
});
|
||||
}
|
||||
|
||||
public void testSynonymQueryNoPositions() throws IOException {
|
||||
for (String field : new String[]{ FIELD_FREQS, FIELD_DOCS_ONLY }) {
|
||||
Query q = new SynonymQuery(new Term(field, "w1"), new Term(field, "w2"));
|
||||
checkNoPositionsMatches(q, field, new boolean[]{ true, true, true, true, false });
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultipleFields() throws IOException {
|
||||
Query q = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("id", "1")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term(FIELD_WITH_OFFSETS, "w3")), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
|
||||
|
||||
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(1, searcher.leafContexts));
|
||||
Matches m = w.matches(ctx, 1 - ctx.docBase);
|
||||
assertNotNull(m);
|
||||
checkFieldMatches(m.getMatches("id"), new int[]{ -1, 0, 0, -1, -1 });
|
||||
checkFieldMatches(m.getMatches(FIELD_WITH_OFFSETS), new int[]{ -1, 1, 1, 3, 5, 3, 3, 9, 11 });
|
||||
assertNull(m.getMatches("bogus"));
|
||||
|
||||
Set<String> fields = new HashSet<>();
|
||||
for (String field : m) {
|
||||
fields.add(field);
|
||||
}
|
||||
assertEquals(2, fields.size());
|
||||
assertTrue(fields.contains(FIELD_WITH_OFFSETS));
|
||||
assertTrue(fields.contains("id"));
|
||||
}
|
||||
|
||||
protected String[] doc1Fields = {
|
||||
"w1 w2 w3 w4 w5",
|
||||
"w1 w3 w2 w3 zz",
|
||||
"w1 xx w2 yy w4",
|
||||
"w1 w2 w1 w4 w2 w3"
|
||||
};
|
||||
|
||||
}
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.FilterWeight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Matches;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
|
@ -151,6 +152,28 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
}
|
||||
return Explanation.noMatch("Not a match");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
// The default implementation would delegate to the joinQuery's Weight, which
|
||||
// matches on children. We need to match on the parent instead
|
||||
Scorer scorer = scorer(context);
|
||||
if (scorer == null) {
|
||||
return null;
|
||||
}
|
||||
final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
|
||||
if (twoPhase == null) {
|
||||
if (scorer.iterator().advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return Matches.MATCH_WITH_NO_TERMS;
|
||||
}
|
||||
}
|
||||
|
||||
private static class ParentApproximation extends DocIdSetIterator {
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.search.DoubleValuesSource;
|
|||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.FilterScorer;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Matches;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -148,6 +149,11 @@ public final class FunctionScoreQuery extends Query {
|
|||
this.inner.extractTerms(terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
return inner.matches(context, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
Explanation scoreExplanation = inner.explain(context, doc);
|
||||
|
|
|
@ -136,6 +136,28 @@ public final class CoveringQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
|
||||
if (minMatchValues.advanceExact(doc) == false) {
|
||||
return null;
|
||||
}
|
||||
final long minimumNumberMatch = Math.max(1, minMatchValues.longValue());
|
||||
long matchCount = 0;
|
||||
List<Matches> subMatches = new ArrayList<>();
|
||||
for (Weight weight : weights) {
|
||||
Matches matches = weight.matches(context, doc);
|
||||
if (matches != null) {
|
||||
matchCount++;
|
||||
subMatches.add(matches);
|
||||
}
|
||||
}
|
||||
if (matchCount < minimumNumberMatch) {
|
||||
return null;
|
||||
}
|
||||
return Matches.fromSubMatches(subMatches);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
LongValues minMatchValues = minimumNumberMatch.getValues(context, null);
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
class AssertingMatches implements Matches {
|
||||
|
||||
private final Matches in;
|
||||
|
||||
AssertingMatches(Matches matches) {
|
||||
this.in = matches;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getMatches(String field) throws IOException {
|
||||
MatchesIterator mi = in.getMatches(field);
|
||||
if (mi == null)
|
||||
return null;
|
||||
return new AssertingMatchesIterator(mi);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return in.iterator();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
class AssertingMatchesIterator implements MatchesIterator {
|
||||
|
||||
private final MatchesIterator in;
|
||||
private State state = State.UNPOSITIONED;
|
||||
|
||||
private enum State { UNPOSITIONED, ITERATING, EXHAUSTED }
|
||||
|
||||
AssertingMatchesIterator(MatchesIterator in) {
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
assert state != State.EXHAUSTED : state;
|
||||
boolean more = in.next();
|
||||
if (more == false) {
|
||||
state = State.EXHAUSTED;
|
||||
}
|
||||
else {
|
||||
state = State.ITERATING;
|
||||
}
|
||||
return more;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
assert state == State.ITERATING : state;
|
||||
return in.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
assert state == State.ITERATING : state;
|
||||
return in.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
assert state == State.ITERATING : state;
|
||||
return in.startOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
assert state == State.ITERATING : state;
|
||||
return in.endOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
assert state == State.ITERATING : state;
|
||||
return in.term();
|
||||
}
|
||||
}
|
|
@ -31,6 +31,14 @@ class AssertingWeight extends FilterWeight {
|
|||
this.scoreMode = scoreMode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
Matches matches = in.matches(context, doc);
|
||||
if (matches == null)
|
||||
return null;
|
||||
return new AssertingMatches(matches);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
if (random.nextBoolean()) {
|
||||
|
|
|
@ -28,7 +28,9 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import static junit.framework.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
|
@ -56,7 +58,7 @@ public class CheckHits {
|
|||
if (ignore.contains(Integer.valueOf(doc))) continue;
|
||||
|
||||
Explanation exp = searcher.explain(q, doc);
|
||||
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
|
||||
assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
|
||||
exp);
|
||||
Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
|
||||
" doesn't indicate non-match: " + exp.toString(),
|
||||
|
@ -301,6 +303,16 @@ public class CheckHits {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that the result of calling {@link Weight#matches(LeafReaderContext, int)}
|
||||
* for every document matching a query returns a non-null {@link Matches}
|
||||
* @param query the query to test
|
||||
* @param searcher the search to test against
|
||||
*/
|
||||
public static void checkMatches(Query query, IndexSearcher searcher) throws IOException {
|
||||
searcher.search(query, new MatchesAsserter(query, searcher));
|
||||
}
|
||||
|
||||
private static final Pattern COMPUTED_FROM_PATTERN = Pattern.compile(".*, computed as .* from:");
|
||||
|
||||
/**
|
||||
|
@ -505,7 +517,7 @@ public class CheckHits {
|
|||
("exception in hitcollector of [["+d+"]] for #"+doc, e);
|
||||
}
|
||||
|
||||
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp);
|
||||
assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp);
|
||||
verifyExplanation(d,doc,scorer.score(),deep,exp);
|
||||
Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc +
|
||||
" does not indicate match: " + exp.toString(),
|
||||
|
@ -522,6 +534,45 @@ public class CheckHits {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that the {@link Matches} from a query is non-null whenever
|
||||
* the document its created for is a hit.
|
||||
*
|
||||
* Also checks that the previous non-matching document has a {@code null} {@link Matches}
|
||||
*/
|
||||
public static class MatchesAsserter extends SimpleCollector {
|
||||
|
||||
private final Weight weight;
|
||||
private LeafReaderContext context;
|
||||
int lastCheckedDoc = -1;
|
||||
|
||||
public MatchesAsserter(Query query, IndexSearcher searcher) throws IOException {
|
||||
this.weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
this.context = context;
|
||||
this.lastCheckedDoc = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
Matches matches = this.weight.matches(context, doc);
|
||||
assertNotNull("Unexpected null Matches object in doc" + doc + " for query " + this.weight.getQuery(), matches);
|
||||
if (lastCheckedDoc != doc - 1) {
|
||||
assertNull("Unexpected non-null Matches object in non-matching doc" + doc + " for query " + this.weight.getQuery(),
|
||||
this.weight.matches(context, doc - 1));
|
||||
}
|
||||
lastCheckedDoc = doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
return ScoreMode.COMPLETE_NO_SCORES;
|
||||
}
|
||||
}
|
||||
|
||||
public static void checkTopScores(Random random, Query query, IndexSearcher searcher) throws IOException {
|
||||
// Check it computed the top hits correctly
|
||||
doCheckTopScores(query, searcher, 1);
|
||||
|
|
|
@ -108,6 +108,7 @@ public class QueryUtils {
|
|||
* @see #checkSkipTo
|
||||
* @see #checkExplanations
|
||||
* @see #checkEqual
|
||||
* @see CheckHits#checkMatches(Query, IndexSearcher)
|
||||
*/
|
||||
public static void check(Random random, Query q1, IndexSearcher s) {
|
||||
check(random, q1, s, true);
|
||||
|
@ -125,6 +126,7 @@ public class QueryUtils {
|
|||
check(random, q1, wrapUnderlyingReader(random, s, +1), false);
|
||||
}
|
||||
checkExplanations(q1,s);
|
||||
CheckHits.checkMatches(q1, s);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
|
|
Loading…
Reference in New Issue