LUCENE-3041: QueryVisitor (#581)

This commit adds an introspection API to Query, allowing users to traverse
the nested structure of a query and examine its leaves.  It replaces the existing
`extractTerms` method on Weight, and alters some highlighting code to use
the new API
This commit is contained in:
Alan Woodward 2019-03-14 15:04:33 +00:00 committed by GitHub
parent ffb1fc83de
commit fbd05167f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
180 changed files with 1853 additions and 931 deletions

View File

@ -10,6 +10,9 @@ API Changes
* LUCENE-8474: RAMDirectory and associated deprecated classes have been
removed. (Dawid Weiss)
* LUCENE-3041: The deprecated Weight#extractTerms() method has been
removed (Alan Woodward, Simon Willnauer, David Smiley, Luca Cavanna)
Bug fixes:
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
@ -17,6 +20,16 @@ Bug fixes:
======================= Lucene 8.1.0 =======================
API Changes
* LUCENE-3041: A query introspection API has been added. Queries should
implement a visit() method, taking a QueryVisitor, and either pass the
visitor down to any child queries, or call a visitX() or consumeX() method
on it. All locations in the code that called Weight.extractTerms()
have been changed to use this API, and the extractTerms() method has
been deprecated. (Alan Woodward, Simon Willnauer, David Smiley, Luca
Cavanna)
Bug fixes
* LUCENE-8712: Polygon2D does not detect crossings through segment edges.

View File

@ -39,6 +39,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyTermsEnum;
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
@ -327,4 +328,9 @@ public class NearestFuzzyQuery extends Query {
Objects.equals(fieldVals, other.fieldVals);
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
}

View File

@ -18,14 +18,12 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.document.FeatureField.FeatureFunction;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
@ -33,6 +31,7 @@ import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.ImpactsDISI;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
@ -89,18 +88,6 @@ final class FeatureQuery extends Query {
return false;
}
@Override
public void extractTerms(Set<Term> terms) {
if (scoreMode.needsScores() == false) {
// features are irrelevant to highlighting, skip
} else {
// extracting the term here will help get better scoring with
// distributed term statistics if the saturation function is used
// and the pivot value is computed automatically
terms.add(new Term(fieldName, featureName));
}
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
String desc = "weight(" + getQuery() + " in " + doc + ") [" + function + "]";
@ -174,6 +161,13 @@ final class FeatureQuery extends Query {
};
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(fieldName)) {
visitor.visitLeaf(this);
}
}
@Override
public String toString(String field) {
return "FeatureQuery(field=" + fieldName + ", feature=" + featureName + ", function=" + function + ")";

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -95,6 +96,13 @@ final class LatLonDocValuesBoxQuery extends Query {
return h;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -54,6 +55,13 @@ final class LatLonDocValuesDistanceQuery extends Query {
this.radiusMeters = radiusMeters;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();

View File

@ -18,7 +18,6 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.GeoUtils;
@ -30,11 +29,11 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
@ -64,6 +63,13 @@ final class LatLonPointDistanceFeatureQuery extends Query {
this.pivotDistance = pivotDistance;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public final boolean equals(Object o) {
return sameClassAs(o) &&
@ -101,9 +107,6 @@ final class LatLonPointDistanceFeatureQuery extends Query {
return false;
}
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedNumericDocValues multiDocValues = DocValues.getSortedNumeric(context.reader(), field);

View File

@ -32,6 +32,7 @@ import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
@ -71,6 +72,13 @@ final class LatLonPointDistanceQuery extends Query {
this.radiusMeters = radiusMeters;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
Rectangle box = Rectangle.fromPointDistance(latitude, longitude, radiusMeters);

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
@ -75,6 +76,13 @@ final class LatLonPointInPolygonQuery extends Query {
// TODO: we could also compute the maximal inner bounding box, to make relations faster to compute?
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

View File

@ -18,7 +18,6 @@ package org.apache.lucene.document;
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
@ -27,11 +26,11 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
@ -75,6 +74,13 @@ final class LongDistanceFeatureQuery extends Query {
return h;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public String toString(String field) {
return getClass().getSimpleName() + "(field=" + field + ",origin=" + origin + ",pivotDistance=" + pivotDistance + ")";
@ -89,9 +95,6 @@ final class LongDistanceFeatureQuery extends Query {
return false;
}
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedNumericDocValues multiDocValues = DocValues.getSortedNumeric(context.reader(), field);

View File

@ -31,10 +31,11 @@ import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FutureArrays;
@ -261,6 +262,13 @@ abstract class RangeFieldQuery extends Query {
}
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -67,6 +68,13 @@ abstract class SortedNumericDocValuesRangeQuery extends Query {
return h;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public String toString(String field) {
StringBuilder b = new StringBuilder();

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -78,6 +79,13 @@ abstract class SortedSetDocValuesRangeQuery extends Query {
return h;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public String toString(String field) {
StringBuilder b = new StringBuilder();

View File

@ -151,7 +151,14 @@ public class AutomatonQuery extends MultiTermQuery {
buffer.append("}");
return buffer.toString();
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(getField())) {
visitor.visitLeaf(this);
}
}
/** Returns the automaton used to create this query */
public Automaton getAutomaton() {
return automaton;

View File

@ -25,8 +25,8 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.InPlaceMergeSorter;
@ -294,6 +294,15 @@ public final class BlendedTermQuery extends Query {
return rewriteMethod.rewrite(termQueries);
}
@Override
public void visit(QueryVisitor visitor) {
Term[] termsToVisit = Arrays.stream(terms).filter(t -> visitor.acceptField(t.field())).toArray(Term[]::new);
if (termsToVisit.length > 0) {
QueryVisitor v = visitor.getSubVisitor(Occur.SHOULD, this);
v.consumeTerms(this, termsToVisit);
}
}
private static TermStates adjustFrequencies(IndexReaderContext readerContext,
TermStates ctx, int artificialDf, long artificialTtf) throws IOException {
List<LeafReaderContext> leaves = readerContext.leaves();

View File

@ -451,6 +451,18 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
for (BooleanClause.Occur occur : clauseSets.keySet()) {
if (clauseSets.get(occur).size() > 0) {
QueryVisitor v = visitor.getSubVisitor(occur, this);
for (Query q : clauseSets.get(occur)) {
q.visit(v);
}
}
}
}
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {

View File

@ -24,10 +24,8 @@ import java.util.EnumMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
@ -56,17 +54,6 @@ final class BooleanWeight extends Weight {
}
}
@Override
public void extractTerms(Set<Term> terms) {
int i = 0;
for (BooleanClause clause : query) {
if (clause.isScoring() || (scoreMode.needsScores() == false && clause.isProhibited() == false)) {
weights.get(i).extractTerms(terms);
}
i++;
}
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
final int minShouldMatch = query.getMinimumNumberShouldMatch();

View File

@ -104,6 +104,11 @@ public final class BoostQuery extends Query {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
query.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
}
@Override
public String toString(String field) {
StringBuilder builder = new StringBuilder();

View File

@ -64,6 +64,11 @@ public final class ConstantScoreQuery extends Query {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
query.visit(visitor.getSubVisitor(BooleanClause.Occur.FILTER, this));
}
/** We return this as our {@link BulkScorer} so that if the CSQ
* wraps a query with its own optimized top-level
* scorer (e.g. BooleanScorer) we can use that

View File

@ -18,10 +18,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
/**
* A Weight that has a constant score equal to the boost of the wrapped query.
@ -39,13 +37,6 @@ public abstract class ConstantScoreWeight extends Weight {
this.score = score;
}
@Override
public void extractTerms(Set<Term> terms) {
// most constant-score queries don't wrap index terms
// eg. geo filters, doc values queries, ...
// override if your constant-score query does wrap terms
}
/** Return the score produced by this {@link Weight}. */
protected final float score() {
return score;

View File

@ -24,11 +24,9 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.LeafReaderContext;
/**
* A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum
@ -111,13 +109,6 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
this.scoreMode = scoreMode;
}
@Override
public void extractTerms(Set<Term> terms) {
for (Weight weight : weights) {
weight.extractTerms(terms);
}
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
List<Matches> mis = new ArrayList<>();
@ -237,6 +228,14 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.SHOULD, this);
for (Query q : disjuncts) {
q.visit(v);
}
}
/** Prettyprint us.
* @param field the field to which we are applied
* @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"

View File

@ -59,6 +59,13 @@ public final class DocValuesFieldExistsQuery extends Query {
return "DocValuesFieldExistsQuery [field=" + this.field + "]";
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) {
return new ConstantScoreWeight(this, boost) {

View File

@ -70,6 +70,13 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
/** Returns the field name for this query */
public final String getField() { return query.getField(); }
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(query.getField())) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

View File

@ -17,10 +17,8 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
/**
* A {@code FilterWeight} contains another {@code Weight} and implements
@ -60,11 +58,6 @@ public abstract class FilterWeight extends Weight {
return in.isCacheable(ctx);
}
@Override
public void extractTerms(Set<Term> terms) {
in.extractTerms(terms);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return in.explain(context, doc);

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
/** Implements the fuzzy search query. The similarity measurement
@ -146,6 +147,21 @@ public class FuzzyQuery extends MultiTermQuery {
return transpositions;
}
/**
* Expert: Constructs an equivalent Automaton accepting terms matched by this query
*/
public Automaton toAutomaton() {
return FuzzyTermsEnum.buildAutomaton(term.text(), prefixLength, transpositions, maxEdits);
}
@Override
public void visit(QueryVisitor visitor) {
// TODO find some way of consuming Automata
if (visitor.acceptField(term.field())) {
visitor.visitLeaf(this);
}
}
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (maxEdits == 0 || prefixLength >= term.text().length()) { // can only match if it's exact
@ -153,7 +169,7 @@ public class FuzzyQuery extends MultiTermQuery {
}
return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits, prefixLength, transpositions);
}
/**
* Returns the pattern term.
*/
@ -173,7 +189,7 @@ public class FuzzyQuery extends MultiTermQuery {
buffer.append(Integer.toString(maxEdits));
return buffer.toString();
}
@Override
public int hashCode() {
final int prime = 31;

View File

@ -17,6 +17,9 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
@ -35,9 +38,6 @@ import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import java.io.IOException;
import java.util.Arrays;
/** Subclass of TermsEnum for enumerating all terms that are similar
* to the specified filter term.
*
@ -111,11 +111,7 @@ public final class FuzzyTermsEnum extends BaseTermsEnum {
this.term = term;
// convert the string into a utf32 int[] representation for fast comparisons
final String utf16 = term.text();
this.termText = new int[utf16.codePointCount(0, utf16.length())];
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
termText[j++] = cp = utf16.codePointAt(i);
}
this.termText = stringToUTF32(term.text());
this.termLength = termText.length;
this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);
@ -133,16 +129,10 @@ public final class FuzzyTermsEnum extends BaseTermsEnum {
CompiledAutomaton[] prevAutomata = dfaAtt.automata();
if (prevAutomata == null) {
prevAutomata = new CompiledAutomaton[maxEdits+1];
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
Automaton[] automata = buildAutomata(termText, prefixLength, transpositions, maxEdits);
for (int i = 0; i <= maxEdits; i++) {
Automaton a = builder.toAutomaton(i, prefix);
prevAutomata[i] = new CompiledAutomaton(a, true, false);
prevAutomata[i] = new CompiledAutomaton(automata[i], true, false);
}
// first segment computes the automata, and we share with subsequent segments via this Attribute:
dfaAtt.setAutomata(prevAutomata);
}
@ -152,6 +142,46 @@ public final class FuzzyTermsEnum extends BaseTermsEnum {
bottomTerm = maxBoostAtt.getCompetitiveTerm();
bottomChanged(null);
}
/**
* Builds a binary Automaton to match a fuzzy term
* @param text the term to match
* @param prefixLength length of a required common prefix
* @param transpositions {@code true} if transpositions should count as a single edit
* @param maxEdits the maximum edit distance of matching terms
*/
public static Automaton buildAutomaton(String text, int prefixLength, boolean transpositions, int maxEdits) {
int[] termText = stringToUTF32(text);
Automaton[] automata = buildAutomata(termText, prefixLength, transpositions, maxEdits);
return automata[automata.length - 1];
}
private static int[] stringToUTF32(String text) {
int[] termText = new int[text.codePointCount(0, text.length())];
for (int cp, i = 0, j = 0; i < text.length(); i += Character.charCount(cp)) {
termText[j++] = cp = text.codePointAt(i);
}
return termText;
}
private static Automaton[] buildAutomata(int[] termText, int prefixLength, boolean transpositions, int maxEdits) {
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("max edits must be 0.." + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + ", inclusive; got: " + maxEdits);
}
if (prefixLength < 0) {
throw new IllegalArgumentException("prefixLength cannot be less than 0");
}
Automaton[] automata = new Automaton[maxEdits + 1];
int termLength = termText.length;
prefixLength = Math.min(prefixLength, termLength);
String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
LevenshteinAutomata builder = new LevenshteinAutomata(suffix, transpositions);
String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
for (int i = 0; i <= maxEdits; i++) {
automata[i] = builder.toAutomaton(i, prefix);
}
return automata;
}
/**
* return an automata-based enum for matching up to editDistance from

View File

@ -17,13 +17,11 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
/**
* A query that uses either an index structure (points or terms) or doc values
@ -109,16 +107,18 @@ public final class IndexOrDocValuesQuery extends Query {
return this;
}
@Override
public void visit(QueryVisitor visitor) {
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
indexQuery.visit(v);
dvQuery.visit(v);
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
final Weight indexWeight = indexQuery.createWeight(searcher, scoreMode, boost);
final Weight dvWeight = dvQuery.createWeight(searcher, scoreMode, boost);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
indexWeight.extractTerms(terms);
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
// We need to check a single doc, so the dv query should perform better

View File

@ -36,7 +36,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
@ -673,11 +672,6 @@ public class LRUQueryCache implements QueryCache, Accountable {
used = new AtomicBoolean(false);
}
@Override
public void extractTerms(Set<Term> terms) {
in.extractTerms(terms);
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
return in.matches(context, doc);

View File

@ -89,4 +89,9 @@ public final class MatchAllDocsQuery extends Query {
public int hashCode() {
return classHash();
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
}

View File

@ -18,10 +18,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
/**
* A query that matches no documents.
@ -44,10 +42,6 @@ public class MatchNoDocsQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return Explanation.noMatch(reason);
@ -65,6 +59,11 @@ public class MatchNoDocsQuery extends Query {
};
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public String toString(String field) {
return "MatchNoDocsQuery(\"" + reason + "\")";

View File

@ -21,12 +21,10 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
@ -203,6 +201,18 @@ public class MultiPhraseQuery extends Query {
}
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field) == false) {
return;
}
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
for (Term[] terms : termArrays) {
QueryVisitor sv = v.getSubVisitor(BooleanClause.Occur.SHOULD, this);
sv.consumeTerms(this, terms);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
final Map<Term,TermStates> termStates = new HashMap<>();
@ -298,13 +308,6 @@ public class MultiPhraseQuery extends Query {
}
}
@Override
public void extractTerms(Set<Term> terms) {
for (final Term[] arr : termArrays) {
Collections.addAll(terms, arr);
}
}
};
}

View File

@ -231,4 +231,11 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
};
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(getField())) {
query.visit(visitor.getSubVisitor(Occur.FILTER, this));
}
}
}

View File

@ -77,6 +77,11 @@ public class NGramPhraseQuery extends Query {
return builder.build();
}
@Override
public void visit(QueryVisitor visitor) {
phraseQuery.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&

View File

@ -61,6 +61,13 @@ public final class NormsFieldExistsQuery extends Query {
return "NormsFieldExistsQuery [field=" + this.field + "]";
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {

View File

@ -20,9 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
@ -255,6 +253,9 @@ public class PhraseQuery extends Query {
*/
public int getSlop() { return slop; }
/** Returns the field this query applies to */
public String getField() { return field; }
/** Returns the list of terms in this phrase. */
public Term[] getTerms() {
return terms;
@ -284,6 +285,15 @@ public class PhraseQuery extends Query {
}
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field) == false) {
return;
}
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
v.consumeTerms(this, terms);
}
static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
final PostingsEnum postings;
final int position;
@ -460,11 +470,6 @@ public class PhraseQuery extends Query {
return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost, exposeOffsets);
}
}
@Override
public void extractTerms(Set<Term> queryTerms) {
Collections.addAll(queryTerms, terms);
}
};
}

View File

@ -105,6 +105,13 @@ public abstract class PointInSetQuery extends Query {
sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

View File

@ -99,6 +99,13 @@ public abstract class PointRangeQuery extends Query {
}
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

View File

@ -74,6 +74,12 @@ public abstract class Query {
return this;
}
/**
* Recurse through the query tree, visiting any child queries
* @param visitor a QueryVisitor to be called by each query in the tree
*/
public abstract void visit(QueryVisitor visitor);
/**
* Override and implement query instance equivalence properly in a subclass.
* This is required so that {@link QueryCache} works properly.

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.index.Term;
/**
* Allows recursion through a query tree
*
* @see Query#visit(QueryVisitor)
*/
public abstract class QueryVisitor {
/**
* Called by leaf queries that match on specific terms
*
* @param query the leaf query
* @param terms the terms the query will match on
*/
public void consumeTerms(Query query, Term... terms) { }
// TODO it would be nice to have a way to consume 'classes' of Terms from
// things like AutomatonQuery
/**
* Called by leaf queries that do not match on terms
* @param query the query
*/
public void visitLeaf(Query query) { }
/**
* Whether or not terms from this field are of interest to the visitor
*
* Implement this to avoid collecting terms from heavy queries such as {@link TermInSetQuery}
* that are not running on fields of interest
*/
public boolean acceptField(String field) {
return true;
}
/**
* Pulls a visitor instance for visiting child clauses of a query
*
* The default implementation returns {@code this}, unless {@code occur} is equal
* to {@link BooleanClause.Occur#MUST_NOT} in which case it returns
* {@link #EMPTY_VISITOR}
*
* @param occur the relationship between the parent and its children
* @param parent the query visited
*/
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
if (occur == BooleanClause.Occur.MUST_NOT) {
return EMPTY_VISITOR;
}
return this;
}
/**
* Builds a {@code QueryVisitor} instance that collects all terms that may match a query
* @param termSet a {@code Set} to add collected terms to
*/
public static QueryVisitor termCollector(Set<Term> termSet) {
return new QueryVisitor() {
@Override
public void consumeTerms(Query query, Term... terms) {
termSet.addAll(Arrays.asList(terms));
}
};
}
/**
* A QueryVisitor implementation that does nothing
*/
public static final QueryVisitor EMPTY_VISITOR = new QueryVisitor() {};
}

View File

@ -25,7 +25,6 @@ import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.lucene.index.Impact;
@ -54,7 +53,9 @@ import org.apache.lucene.util.PriorityQueue;
* term frequencies for the document.
*/
public final class SynonymQuery extends Query {
private final TermAndBoost terms[];
private final String field;
/**
* A builder for {@link SynonymQuery}.
@ -102,7 +103,7 @@ public final class SynonymQuery extends Query {
*/
public SynonymQuery build() {
Collections.sort(terms, Comparator.comparing(a -> a.term));
return new SynonymQuery(terms.toArray(new TermAndBoost[0]));
return new SynonymQuery(terms.toArray(new TermAndBoost[0]), field);
}
}
@ -111,8 +112,9 @@ public final class SynonymQuery extends Query {
* <p>
* The terms must all have the same field.
*/
private SynonymQuery(TermAndBoost[] terms) {
private SynonymQuery(TermAndBoost[] terms, String field) {
this.terms = Objects.requireNonNull(terms);
this.field = field;
}
public List<Term> getTerms() {
@ -164,6 +166,16 @@ public final class SynonymQuery extends Query {
return this;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field) == false) {
return;
}
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.SHOULD, this);
Term[] ts = Arrays.stream(terms).map(t -> t.term).toArray(Term[]::new);
v.consumeTerms(this, ts);
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
if (scoreMode.needsScores()) {
@ -209,13 +221,6 @@ public final class SynonymQuery extends Query {
}
}
@Override
public void extractTerms(Set<Term> terms) {
for (TermAndBoost term : SynonymQuery.this.terms) {
terms.add(term.term);
}
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
String field = terms[0].term.field();

View File

@ -23,7 +23,6 @@ import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import org.apache.lucene.index.IndexReader;
@ -33,8 +32,8 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
@ -122,6 +121,20 @@ public class TermInSetQuery extends Query implements Accountable {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field) == false) {
return;
}
QueryVisitor v = visitor.getSubVisitor(Occur.SHOULD, this);
List<Term> terms = new ArrayList<>();
TermIterator iterator = termData.iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
terms.add(new Term(field, BytesRef.deepCopyOf(term)));
}
v.consumeTerms(this, terms.toArray(new Term[0]));
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
@ -212,14 +225,6 @@ public class TermInSetQuery extends Query implements Accountable {
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public void extractTerms(Set<Term> terms) {
// no-op
// This query is for abuse cases when the number of terms is too high to
// run efficiently as a BooleanQuery. So likewise we hide its terms in
// order to protect highlighters
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
Terms terms = context.reader().terms(field);

View File

@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
@ -75,11 +74,6 @@ public class TermQuery extends Query {
}
}
@Override
public void extractTerms(Set<Term> terms) {
terms.add(getTerm());
}
@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
TermsEnum te = getTermsEnum(context);
@ -205,6 +199,13 @@ public class TermQuery extends Query {
return new TermWeight(searcher, scoreMode, boost, termState);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(term.field())) {
visitor.consumeTerms(this, term);
}
}
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {

View File

@ -18,12 +18,10 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.Bits;
/**
@ -61,14 +59,6 @@ public abstract class Weight implements SegmentCacheable {
this.parentQuery = query;
}
/**
* Expert: adds all terms occurring in this query to the terms set. If the
* {@link Weight} was created with {@code needsScores == true} then this
* method will only extract terms which are used for scoring, otherwise it
* will extract all terms which are used for matching.
*/
public abstract void extractTerms(Set<Term> terms);
/**
* Returns {@link Matches} for a specific document, or {@code null} if the document
* does not match the parent query

View File

@ -398,12 +398,6 @@
* {@link org.apache.lucene.search.similarities.Similarity.SimScorer#explain(Explanation, long) SimScorer#explain(Explanation freq, long norm)}.
* </li>
* <li>
* {@link org.apache.lucene.search.Weight#extractTerms(java.util.Set) extractTerms(Set&lt;Term&gt; terms)} &mdash; Extract terms that
* this query operates on. This is typically used to support distributed search: knowing the terms that a query operates on helps
* merge index statistics of these terms so that scores are computed over a subset of the data like they would if all documents
* were in the same index.
* </li>
* <li>
* {@link org.apache.lucene.search.Weight#matches matches(LeafReaderContext context, int doc)} &mdash; Give information about positions
* and offsets of matches. This is typically useful to implement highlighting.
* </li>

View File

@ -17,14 +17,16 @@
package org.apache.lucene.search.spans;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
/**
* <p>Wrapper to allow {@link SpanQuery} objects participate in composite
* single-field SpanQueries by 'lying' about their search field. That is,
@ -104,6 +106,13 @@ public final class FieldMaskingSpanQuery extends SpanQuery {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
maskedQuery.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
}
}
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();

View File

@ -21,9 +21,11 @@ import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
/**
@ -93,6 +95,13 @@ public final class SpanBoostQuery extends SpanQuery {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(getField())) {
query.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
}
}
@Override
public String toString(String field) {
StringBuilder builder = new StringBuilder();

View File

@ -17,18 +17,19 @@
package org.apache.lucene.search.spans;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
abstract class SpanContainQuery extends SpanQuery implements Cloneable {
@ -68,15 +69,6 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable {
this.littleWeight = littleWeight;
}
/**
* Extract terms from both <code>big</code> and <code>little</code>.
*/
@Override
public void extractTerms(Set<Term> terms) {
bigWeight.extractTerms(terms);
littleWeight.extractTerms(terms);
}
ArrayList<Spans> prepareConjunction(final LeafReaderContext context, Postings postings) throws IOException {
Spans bigSpans = bigWeight.getSpans(context, postings);
if (bigSpans == null) {
@ -128,6 +120,15 @@ abstract class SpanContainQuery extends SpanQuery implements Cloneable {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(getField())) {
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
big.visit(v);
little.visit(v);
}
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.TopTermsRewrite;
@ -121,7 +122,14 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
public Query rewrite(IndexReader reader) throws IOException {
return rewriteMethod.rewrite(reader, query);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(query.getField())) {
query.visit(visitor.getSubVisitor(Occur.MUST, this));
}
}
@Override
public int hashCode() {
return classHash() * 31 + query.hashCode();

View File

@ -24,15 +24,16 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Weight;
@ -225,13 +226,6 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
: new NearSpansOrdered(slop, subSpans);
}
@Override
public void extractTerms(Set<Term> terms) {
for (SpanWeight w : subWeights) {
w.extractTerms(terms);
}
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
for (Weight w : subWeights) {
@ -265,6 +259,17 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(getField()) == false) {
return;
}
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.MUST, this);
for (SpanQuery clause : clauses) {
clause.visit(v);
}
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
@ -301,6 +306,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
return field;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public String toString(String field) {
return "SpanGap(" + field + ":" + width + ")";
@ -327,11 +337,6 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
return new GapSpans(width);
}
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;

View File

@ -20,15 +20,16 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TwoPhaseIterator;
@ -188,11 +189,6 @@ public final class SpanNotQuery extends SpanQuery {
};
}
@Override
public void extractTerms(Set<Term> terms) {
includeWeight.extractTerms(terms);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return includeWeight.isCacheable(ctx) && excludeWeight.isCacheable(ctx);
@ -209,7 +205,16 @@ public final class SpanNotQuery extends SpanQuery {
}
return super.rewrite(reader);
}
/** Returns true iff <code>o</code> is equal to this. */
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(getField())) {
include.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
exclude.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST_NOT, this));
}
}
/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&

View File

@ -22,17 +22,18 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.DisiPriorityQueue;
import org.apache.lucene.search.DisiWrapper;
import org.apache.lucene.search.DisjunctionDISIApproximation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
@ -88,6 +89,17 @@ public final class SpanOrQuery extends SpanQuery {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(getField()) == false) {
return;
}
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.SHOULD, this);
for (SpanQuery q : clauses) {
q.visit(v);
}
}
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
@ -133,13 +145,6 @@ public final class SpanOrQuery extends SpanQuery {
this.subWeights = subWeights;
}
@Override
public void extractTerms(Set<Term> terms) {
for (final SpanWeight w: subWeights) {
w.extractTerms(terms);
}
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
for (Weight w : subWeights) {

View File

@ -20,14 +20,15 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
@ -82,11 +83,6 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
this.matchWeight = matchWeight;
}
@Override
public void extractTerms(Set<Term> terms) {
matchWeight.extractTerms(terms);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return matchWeight.isCacheable(ctx);
@ -126,6 +122,13 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(getField())) {
match.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
}
}
/** Returns true iff <code>other</code> is equal to this. */
@Override
public boolean equals(Object other) {

View File

@ -21,18 +21,18 @@ import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
/** Matches spans containing a term.
@ -84,6 +84,13 @@ public class SpanTermQuery extends SpanQuery {
return new SpanTermWeight(context, searcher, scoreMode.needsScores() ? Collections.singletonMap(term, context) : null, boost);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(term.field())) {
visitor.consumeTerms(this, term);
}
}
public class SpanTermWeight extends SpanWeight {
final TermStates termStates;
@ -94,11 +101,6 @@ public class SpanTermQuery extends SpanQuery {
assert termStates != null : "TermStates must not be null";
}
@Override
public void extractTerms(Set<Term> terms) {
terms.add(term);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;

View File

@ -17,15 +17,10 @@
package org.apache.lucene.document;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@ -272,27 +267,6 @@ public class TestFeatureField extends LuceneTestCase {
dir.close();
}
public void testExtractTerms() throws IOException {
IndexReader reader = new MultiReader();
IndexSearcher searcher = newSearcher(reader);
Query query = FeatureField.newLogQuery("field", "term", 2f, 42);
Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1f);
Set<Term> terms = new HashSet<>();
weight.extractTerms(terms);
assertEquals(Collections.emptySet(), terms);
terms = new HashSet<>();
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
weight.extractTerms(terms);
assertEquals(Collections.singleton(new Term("field", "term")), terms);
terms = new HashSet<>();
weight = searcher.createWeight(query, ScoreMode.TOP_SCORES, 1f);
weight.extractTerms(terms);
assertEquals(Collections.singleton(new Term("field", "term")), terms);
}
public void testDemo() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig()

View File

@ -18,11 +18,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.PriorityQueue;
@ -153,6 +151,11 @@ final class JustCompileSearch {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public void visit(QueryVisitor visitor) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public boolean equals(Object obj) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
@ -242,11 +245,6 @@ final class JustCompileSearch {
super(null);
}
@Override
public void extractTerms(Set<Term> terms) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);

View File

@ -22,16 +22,13 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.EnumMap;
import java.util.Map;
import java.util.Set;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
public class TestBoolean2ScorerSupplier extends LuceneTestCase {
private static class FakeWeight extends Weight {
@ -40,11 +37,6 @@ public class TestBoolean2ScorerSupplier extends LuceneTestCase {
super(new MatchNoDocsQuery());
}
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;

View File

@ -22,14 +22,13 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@ -52,8 +51,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
public class TestBooleanQuery extends LuceneTestCase {
public void testEquality() throws Exception {
@ -754,7 +751,9 @@ public class TestBooleanQuery extends LuceneTestCase {
assertEquals("a +b -c #d", bq.build().toString("field"));
}
public void testExtractTerms() throws IOException {
public void testQueryVisitor() throws IOException {
Term a = new Term("f", "a");
Term b = new Term("f", "b");
Term c = new Term("f", "c");
@ -764,15 +763,37 @@ public class TestBooleanQuery extends LuceneTestCase {
bqBuilder.add(new TermQuery(b), Occur.MUST);
bqBuilder.add(new TermQuery(c), Occur.FILTER);
bqBuilder.add(new TermQuery(d), Occur.MUST_NOT);
IndexSearcher searcher = new IndexSearcher(new MultiReader());
BooleanQuery bq = bqBuilder.build();
Set<Term> scoringTerms = new HashSet<>();
searcher.createWeight(searcher.rewrite(bq), ScoreMode.COMPLETE, 1).extractTerms(scoringTerms);
assertEquals(new HashSet<>(Arrays.asList(a, b)), scoringTerms);
bq.visit(new QueryVisitor() {
Set<Term> matchingTerms = new HashSet<>();
searcher.createWeight(searcher.rewrite(bq), ScoreMode.COMPLETE_NO_SCORES, 1).extractTerms(matchingTerms);
assertEquals(new HashSet<>(Arrays.asList(a, b, c)), matchingTerms);
Term expected;
@Override
public QueryVisitor getSubVisitor(Occur occur, Query parent) {
switch (occur) {
case SHOULD:
expected = a;
break;
case MUST:
expected = b;
break;
case FILTER:
expected = c;
break;
case MUST_NOT:
expected = d;
break;
default:
throw new IllegalStateException();
}
return this;
}
@Override
public void consumeTerms(Query query, Term... terms) {
assertEquals(expected, terms[0]);
}
});
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -79,11 +78,6 @@ public class TestBooleanScorer extends LuceneTestCase {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new Weight(CrazyMustUseBulkScorerQuery.this) {
@Override
public void extractTerms(Set<Term> terms) {
throw new UnsupportedOperationException();
}
@Override
public Explanation explain(LeafReaderContext context, int doc) {
throw new UnsupportedOperationException();
@ -118,6 +112,11 @@ public class TestBooleanScorer extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public boolean equals(Object obj) {
return this == obj;

View File

@ -22,10 +22,8 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
@ -91,11 +89,6 @@ public class TestConjunctionDISI extends LuceneTestCase {
super(new MatchNoDocsQuery());
}
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;

View File

@ -19,15 +19,11 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@ -157,6 +153,11 @@ public class TestConstantScoreQuery extends LuceneTestCase {
return in.createWeight(searcher, scoreMode, boost);
}
@Override
public void visit(QueryVisitor visitor) {
in.visit(visitor);
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
@ -230,17 +231,4 @@ public class TestConstantScoreQuery extends LuceneTestCase {
dir.close();
}
public void testExtractTerms() throws Exception {
final IndexSearcher searcher = newSearcher(new MultiReader());
final TermQuery termQuery = new TermQuery(new Term("foo", "bar"));
final Query csq = searcher.rewrite(new ConstantScoreQuery(termQuery));
final Set<Term> scoringTerms = new HashSet<>();
searcher.createWeight(csq, ScoreMode.COMPLETE, 1).extractTerms(scoringTerms);
assertEquals(Collections.emptySet(), scoringTerms);
final Set<Term> matchingTerms = new HashSet<>();
searcher.createWeight(csq, ScoreMode.COMPLETE_NO_SCORES, 1).extractTerms(matchingTerms);
assertEquals(Collections.singleton(new Term("foo", "bar")), matchingTerms);
}
}

View File

@ -29,7 +29,6 @@ import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@ -378,6 +377,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
@ -972,6 +976,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return "BadQuery";
@ -1304,11 +1313,6 @@ public class TestLRUQueryCache extends LuceneTestCase {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;
@ -1326,6 +1330,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return "NoCacheQuery";
@ -1410,6 +1419,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public boolean equals(Object other) {
return sameClassAs(other);
@ -1494,6 +1508,11 @@ public class TestLRUQueryCache extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
}
public void testDocValuesUpdatesDontBreakCache() throws IOException {

View File

@ -22,10 +22,8 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -39,11 +37,6 @@ public class TestMaxScoreSumPropagator extends LuceneTestCase {
super(new MatchNoDocsQuery());
}
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return null;

View File

@ -183,6 +183,11 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
public String toString(String field) {
return "dummy";
}
@Override
public void visit(QueryVisitor visitor) {
}
};
mtq.setRewriteMethod(method);
final Query q1 = searcher.rewrite(mtq);

View File

@ -121,6 +121,11 @@ public class TestNeedsScores extends LuceneTestCase {
}
}
@Override
public void visit(QueryVisitor visitor) {
in.visit(visitor);
}
@Override
public int hashCode() {
final int prime = 31;

View File

@ -108,6 +108,11 @@ public class TestPrefixRandom extends LuceneTestCase {
return field.toString() + ":" + prefix.toString();
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public boolean equals(Object obj) {
if (super.equals(obj) == false) {

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -422,10 +421,6 @@ public class TestQueryRescorer extends LuceneTestCase {
return new Weight(FixedScoreQuery.this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Scorer scorer(final LeafReaderContext context) throws IOException {
@ -499,6 +494,11 @@ public class TestQueryRescorer extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return "FixedScoreQuery " + idToNum.length + " ids; reverse=" + reverse;

View File

@ -0,0 +1,333 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.LuceneTestCase;
import static org.hamcrest.CoreMatchers.equalTo;
public class TestQueryVisitor extends LuceneTestCase {
private static final Query query = new BooleanQuery.Builder()
.add(new TermQuery(new Term("field1", "t1")), BooleanClause.Occur.MUST)
.add(new BooleanQuery.Builder()
.add(new TermQuery(new Term("field1", "tm2")), BooleanClause.Occur.SHOULD)
.add(new BoostQuery(new TermQuery(new Term("field1", "tm3")), 2), BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.MUST)
.add(new BoostQuery(new PhraseQuery.Builder()
.add(new Term("field1", "term4"))
.add(new Term("field1", "term5"))
.build(), 3), BooleanClause.Occur.MUST)
.add(new SpanNearQuery(new SpanQuery[]{
new SpanTermQuery(new Term("field1", "term6")),
new SpanTermQuery(new Term("field1", "term7"))
}, 2, true), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("field1", "term8")), BooleanClause.Occur.MUST_NOT)
.add(new PrefixQuery(new Term("field1", "term9")), BooleanClause.Occur.SHOULD)
.add(new BoostQuery(new BooleanQuery.Builder()
.add(new BoostQuery(new TermQuery(new Term("field2", "term10")), 3), BooleanClause.Occur.MUST)
.build(), 2), BooleanClause.Occur.SHOULD)
.build();
public void testExtractTermsEquivalent() {
Set<Term> terms = new HashSet<>();
Set<Term> expected = new HashSet<>(Arrays.asList(
new Term("field1", "t1"), new Term("field1", "tm2"),
new Term("field1", "tm3"), new Term("field1", "term4"),
new Term("field1", "term5"), new Term("field1", "term6"),
new Term("field1", "term7"), new Term("field2", "term10")
));
query.visit(QueryVisitor.termCollector(terms));
assertThat(terms, equalTo(expected));
}
public void extractAllTerms() {
Set<Term> terms = new HashSet<>();
QueryVisitor visitor = new QueryVisitor() {
@Override
public void consumeTerms(Query query, Term... ts) {
terms.addAll(Arrays.asList(ts));
}
@Override
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
return this;
}
};
Set<Term> expected = new HashSet<>(Arrays.asList(
new Term("field1", "t1"), new Term("field1", "tm2"),
new Term("field1", "tm3"), new Term("field1", "term4"),
new Term("field1", "term5"), new Term("field1", "term6"),
new Term("field1", "term7"), new Term("field1", "term8"),
new Term("field2", "term10")
));
query.visit(visitor);
assertThat(terms, equalTo(expected));
}
public void extractTermsFromField() {
final Set<Term> actual = new HashSet<>();
Set<Term> expected = new HashSet<>(Arrays.asList(new Term("field2", "term10")));
query.visit(new QueryVisitor(){
@Override
public boolean acceptField(String field) {
return "field2".equals(field);
}
@Override
public void consumeTerms(Query query, Term... terms) {
actual.addAll(Arrays.asList(terms));
}
});
assertThat(actual, equalTo(expected));
}
static class BoostedTermExtractor extends QueryVisitor {
final float boost;
final Map<Term, Float> termsToBoosts;
BoostedTermExtractor(float boost, Map<Term, Float> termsToBoosts) {
this.boost = boost;
this.termsToBoosts = termsToBoosts;
}
@Override
public void consumeTerms(Query query, Term... terms) {
for (Term term : terms) {
termsToBoosts.put(term, boost);
}
}
@Override
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
if (parent instanceof BoostQuery) {
return new BoostedTermExtractor(boost * ((BoostQuery)parent).getBoost(), termsToBoosts);
}
return super.getSubVisitor(occur, parent);
}
}
public void testExtractTermsAndBoosts() {
Map<Term, Float> termsToBoosts = new HashMap<>();
query.visit(new BoostedTermExtractor(1, termsToBoosts));
Map<Term, Float> expected = new HashMap<>();
expected.put(new Term("field1", "t1"), 1f);
expected.put(new Term("field1", "tm2"), 1f);
expected.put(new Term("field1", "tm3"), 2f);
expected.put(new Term("field1", "term4"), 3f);
expected.put(new Term("field1", "term5"), 3f);
expected.put(new Term("field1", "term6"), 1f);
expected.put(new Term("field1", "term7"), 1f);
expected.put(new Term("field2", "term10"), 6f);
assertThat(termsToBoosts, equalTo(expected));
}
public void testLeafQueryTypeCounts() {
Map<Class<? extends Query>, Integer> queryCounts = new HashMap<>();
query.visit(new QueryVisitor() {
private void countQuery(Query q) {
queryCounts.compute(q.getClass(), (query, i) -> {
if (i == null) {
return 1;
}
return i + 1;
});
}
@Override
public void consumeTerms(Query query, Term... terms) {
countQuery(query);
}
@Override
public void visitLeaf(Query query) {
countQuery(query);
}
});
assertEquals(4, queryCounts.get(TermQuery.class).intValue());
assertEquals(1, queryCounts.get(PhraseQuery.class).intValue());
}
static abstract class QueryNode extends QueryVisitor {
final List<QueryNode> children = new ArrayList<>();
abstract int getWeight();
abstract void collectTerms(Set<Term> terms);
abstract boolean nextTermSet();
@Override
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
if (occur == BooleanClause.Occur.MUST || occur == BooleanClause.Occur.FILTER) {
QueryNode n = new ConjunctionNode();
children.add(n);
return n;
}
if (occur == BooleanClause.Occur.MUST_NOT) {
return QueryVisitor.EMPTY_VISITOR;
}
if (parent instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) parent;
if (bq.getClauses(BooleanClause.Occur.MUST).size() > 0 || bq.getClauses(BooleanClause.Occur.FILTER).size() > 0) {
return QueryVisitor.EMPTY_VISITOR;
}
}
DisjunctionNode n = new DisjunctionNode();
children.add(n);
return n;
}
}
static class TermNode extends QueryNode {
final Term term;
TermNode(Term term) {
this.term = term;
}
@Override
int getWeight() {
return term.text().length();
}
@Override
void collectTerms(Set<Term> terms) {
terms.add(term);
}
@Override
boolean nextTermSet() {
return false;
}
@Override
public String toString() {
return "TERM(" + term.toString() + ")";
}
}
static class ConjunctionNode extends QueryNode {
@Override
int getWeight() {
children.sort(Comparator.comparingInt(QueryNode::getWeight));
return children.get(0).getWeight();
}
@Override
void collectTerms(Set<Term> terms) {
children.sort(Comparator.comparingInt(QueryNode::getWeight));
children.get(0).collectTerms(terms);
}
@Override
boolean nextTermSet() {
children.sort(Comparator.comparingInt(QueryNode::getWeight));
if (children.get(0).nextTermSet()) {
return true;
}
if (children.size() == 1) {
return false;
}
children.remove(0);
return true;
}
@Override
public void consumeTerms(Query query, Term... terms) {
for (Term term : terms) {
children.add(new TermNode(term));
}
}
@Override
public String toString() {
return children.stream().map(QueryNode::toString).collect(Collectors.joining(",", "AND(", ")"));
}
}
static class DisjunctionNode extends QueryNode {
@Override
int getWeight() {
children.sort(Comparator.comparingInt(QueryNode::getWeight).reversed());
return children.get(0).getWeight();
}
@Override
void collectTerms(Set<Term> terms) {
for (QueryNode child : children) {
child.collectTerms(terms);
}
}
@Override
boolean nextTermSet() {
boolean next = false;
for (QueryNode child : children) {
next |= child.nextTermSet();
}
return next;
}
@Override
public void consumeTerms(Query query, Term... terms) {
for (Term term : terms) {
children.add(new TermNode(term));
}
}
@Override
public String toString() {
return children.stream().map(QueryNode::toString).collect(Collectors.joining(",", "OR(", ")"));
}
}
public void testExtractMatchingTermSet() {
QueryNode extractor = new ConjunctionNode();
query.visit(extractor);
Set<Term> minimumTermSet = new HashSet<>();
extractor.collectTerms(minimumTermSet);
Set<Term> expected1 = new HashSet<>(Collections.singletonList(new Term("field1", "t1")));
assertThat(minimumTermSet, equalTo(expected1));
assertTrue(extractor.nextTermSet());
Set<Term> expected2 = new HashSet<>(Arrays.asList(new Term("field1", "tm2"), new Term("field1", "tm3")));
minimumTermSet.clear();
extractor.collectTerms(minimumTermSet);
assertThat(minimumTermSet, equalTo(expected2));
}
}

View File

@ -40,9 +40,9 @@ import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.RegExp;
/**
@ -138,6 +138,11 @@ public class TestRegexpRandom2 extends LuceneTestCase {
return field.toString() + automaton.toString();
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public boolean equals(Object obj) {
if (super.equals(obj) == false) {

View File

@ -142,7 +142,12 @@ public class TestScorerPerf extends LuceneTestCase {
}
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return "randomBitSetFilter";

View File

@ -257,6 +257,11 @@ public class TestSortRandom extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return "RandomFilter(density=" + density + ")";

View File

@ -132,6 +132,11 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
}
}

View File

@ -354,6 +354,11 @@ public class TestWANDScorer extends LuceneTestCase {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new FilterWeight(query.createWeight(searcher, scoreMode, boost)) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
import java.io.IOException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
/**
@ -98,6 +99,11 @@ final class JustCompileSearchSpans {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public void visit(QueryVisitor visitor) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public String toString(String field) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);

View File

@ -17,6 +17,9 @@
package org.apache.lucene.search.spans;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -27,6 +30,7 @@ import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.store.Directory;
@ -34,9 +38,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import java.util.HashSet;
import java.util.Set;
import static org.apache.lucene.search.spans.SpanTestUtil.assertFinished;
import static org.apache.lucene.search.spans.SpanTestUtil.assertNext;
@ -143,7 +144,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
QueryUtils.checkEqual(q, qr);
Set<Term> terms = new HashSet<>();
qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms);
qr.visit(QueryVisitor.termCollector(terms));
assertEquals(1, terms.size());
}
@ -163,7 +164,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
QueryUtils.checkUnequal(q, qr);
Set<Term> terms = new HashSet<>();
qr.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(terms);
qr.visit(QueryVisitor.termCollector(terms));
assertEquals(2, terms.size());
}
@ -177,7 +178,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase {
QueryUtils.checkEqual(q, qr);
HashSet<Term> set = new HashSet<>();
qr.createWeight(searcher, ScoreMode.COMPLETE, 1f).extractTerms(set);
qr.visit(QueryVisitor.termCollector(set));
assertEquals(2, set.size());
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.TermQuery;
/**
@ -149,6 +150,11 @@ public final class DrillDownQuery extends Query {
return getBooleanQuery().toString(field);
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
private BooleanQuery getBooleanQuery() {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
if (baseQuery != null) {

View File

@ -20,12 +20,10 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.facet.DrillSidewaysScorer.DocsAndCost;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreScorer;
@ -33,6 +31,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
@ -78,6 +77,11 @@ class DrillSidewaysQuery extends Query {
return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce);
}
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
@ -88,9 +92,6 @@ class DrillSidewaysQuery extends Query {
}
return new Weight(DrillSidewaysQuery.this) {
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return baseWeight.explain(context, doc);

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -141,6 +142,11 @@ public final class DoubleRange extends Range {
return "Filter(" + range.toString() + ")";
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
if (fastMatchQuery != null) {

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -133,6 +134,11 @@ public final class LongRange extends Range {
return "Filter(" + range.toString() + ")";
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
if (fastMatchQuery != null) {

View File

@ -52,6 +52,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
@ -749,6 +750,11 @@ public class TestDrillSideways extends FacetTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return "drillSidewaysTestFilter";

View File

@ -53,6 +53,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
@ -714,6 +715,11 @@ public class TestRangeFacetCounts extends FacetTestCase {
return super.rewrite(reader);
}
@Override
public void visit(QueryVisitor visitor) {
in.visit(visitor);
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
final Weight in = this.in.createWeight(searcher, scoreMode, boost);

View File

@ -15,19 +15,20 @@
* limitations under the License.
*/
package org.apache.lucene.search.highlight;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.function.Predicate;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.QueryVisitor;
/**
* Utility class used to extract the terms used in a query, plus any weights.
@ -100,10 +101,10 @@ public final class QueryTermExtractor
* @param fieldName The fieldName used to filter query terms
* @return an array of the terms used in a query, plus their weights.
*/
public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
{
HashSet<WeightedTerm> terms=new HashSet<>();
getTerms(query, 1f, terms,prohibited,fieldName);
public static WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName) {
HashSet<WeightedTerm> terms = new HashSet<>();
Predicate<String> fieldSelector = fieldName == null ? f -> true : fieldName::equals;
query.visit(new BoostedTermExtractor(1, terms, prohibited, fieldSelector));
return terms.toArray(new WeightedTerm[0]);
}
@ -119,50 +120,45 @@ public final class QueryTermExtractor
return getTerms(query,prohibited,null);
}
private static final void getTerms(Query query, float boost, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName) {
try {
if (query instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) query;
getTerms(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms, prohibited, fieldName);
} else if (query instanceof BooleanQuery)
getTermsFromBooleanQuery((BooleanQuery) query, boost, terms, prohibited, fieldName);
else {
HashSet<Term> nonWeightedTerms = new HashSet<>();
try {
EMPTY_INDEXSEARCHER.createWeight(EMPTY_INDEXSEARCHER.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1).extractTerms(nonWeightedTerms);
} catch (IOException bogus) {
throw new RuntimeException("Should not happen on an empty index", bogus);
}
for (Iterator<Term> iter = nonWeightedTerms.iterator(); iter.hasNext(); ) {
Term term = iter.next();
if ((fieldName == null) || (term.field().equals(fieldName))) {
terms.add(new WeightedTerm(boost, term.text()));
}
}
}
} catch (UnsupportedOperationException ignore) {
//this is non-fatal for our purposes
}
}
private static class BoostedTermExtractor extends QueryVisitor {
/**
* extractTerms is currently the only query-independent means of introspecting queries but it only reveals
* a list of terms for that query - not the boosts each individual term in that query may or may not have.
* "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held
* in each child element.
* Some discussion around this topic here:
* http://www.gossamer-threads.com/lists/lucene/java-dev/34208?search_string=introspection;#34208
* Unfortunately there seemed to be limited interest in requiring all Query objects to implement
* something common which would allow access to child queries so what follows here are query-specific
* implementations for accessing embedded query elements.
*/
private static final void getTermsFromBooleanQuery(BooleanQuery query, float boost, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
{
for (BooleanClause clause : query)
{
if (prohibited || clause.getOccur()!=BooleanClause.Occur.MUST_NOT)
getTerms(clause.getQuery(), boost, terms, prohibited, fieldName);
final float boost;
final Set<WeightedTerm> terms;
final boolean includeProhibited;
final Predicate<String> fieldSelector;
private BoostedTermExtractor(float boost, Set<WeightedTerm> terms, boolean includeProhibited,
Predicate<String> fieldSelector) {
this.boost = boost;
this.terms = terms;
this.includeProhibited = includeProhibited;
this.fieldSelector = fieldSelector;
}
@Override
public boolean acceptField(String field) {
return fieldSelector.test(field);
}
@Override
public void consumeTerms(Query query, Term... terms) {
for (Term term : terms) {
this.terms.add(new WeightedTerm(boost, term.text()));
}
}
@Override
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
if (parent instanceof BoostQuery) {
float newboost = boost * ((BoostQuery)parent).getBoost();
return new BoostedTermExtractor(newboost, terms, includeProhibited, fieldSelector);
}
if (occur == BooleanClause.Occur.MUST_NOT && includeProhibited == false) {
return QueryVisitor.EMPTY_VISITOR;
}
return this;
}
}
}

View File

@ -51,6 +51,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
@ -308,10 +309,10 @@ public class WeightedSpanTermExtractor {
for (final String field : fieldNames) {
final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
queries.put(field, rewrittenQuery);
rewrittenQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost).extractTerms(nonWeightedTerms);
rewrittenQuery.visit(QueryVisitor.termCollector(nonWeightedTerms));
}
} else {
spanQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost).extractTerms(nonWeightedTerms);
spanQuery.visit(QueryVisitor.termCollector(nonWeightedTerms));
}
List<PositionSpan> spanPositions = new ArrayList<>();
@ -378,7 +379,7 @@ public class WeightedSpanTermExtractor {
protected void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query, float boost) throws IOException {
Set<Term> nonWeightedTerms = new HashSet<>();
final IndexSearcher searcher = new IndexSearcher(getLeafContext());
searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1).extractTerms(nonWeightedTerms);
searcher.rewrite(query).visit(QueryVisitor.termCollector(nonWeightedTerms));
for (final Term queryTerm : nonWeightedTerms) {

View File

@ -19,10 +19,8 @@ package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.FilteringTokenFilter;
@ -30,7 +28,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@ -47,21 +44,19 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
private final LeafReader memIndexLeafReader;
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer,
Function<Query, Collection<Query>> multiTermQueryRewrite) {
public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer) {
super(components, analyzer);
boolean storePayloads = components.getPhraseHelper().hasPositionSensitivity(); // might be needed
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
memIndexLeafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
// preFilter for MemoryIndex
preMemIndexFilterAutomaton = buildCombinedAutomaton(components, multiTermQueryRewrite);
preMemIndexFilterAutomaton = buildCombinedAutomaton(components);
}
/**
* Build one {@link CharacterRunAutomaton} matching any term the query might match.
*/
private static CharacterRunAutomaton buildCombinedAutomaton(UHComponents components,
Function<Query, Collection<Query>> multiTermQueryRewrite) {
private static CharacterRunAutomaton buildCombinedAutomaton(UHComponents components) {
List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
if (components.getTerms().length > 0) {
allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(components.getTerms()))));
@ -69,7 +64,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
Collections.addAll(allAutomata, components.getAutomata());
for (SpanQuery spanQuery : components.getPhraseHelper().getSpanQueries()) {
Collections.addAll(allAutomata,
MultiTermHighlighting.extractAutomata(spanQuery, components.getFieldMatcher(), true, multiTermQueryRewrite));//true==lookInSpan
MultiTermHighlighting.extractAutomata(spanQuery, components.getFieldMatcher(), true));//true==lookInSpan
}
if (allAutomata.size() == 1) {

View File

@ -17,33 +17,20 @@
package org.apache.lucene.search.uhighlight;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanPositionCheckQuery;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.apache.lucene.util.automaton.Operations;
/**
@ -51,7 +38,7 @@ import org.apache.lucene.util.automaton.Operations;
*
* @lucene.internal
*/
class MultiTermHighlighting {
final class MultiTermHighlighting {
private MultiTermHighlighting() {
}
@ -59,138 +46,117 @@ class MultiTermHighlighting {
* Extracts MultiTermQueries that match the provided field predicate.
* Returns equivalent automata that will match terms.
*/
public static CharacterRunAutomaton[] extractAutomata(Query query,
Predicate<String> fieldMatcher,
boolean lookInSpan,
Function<Query, Collection<Query>> preRewriteFunc) {
// TODO Lucene needs a Query visitor API! LUCENE-3041
static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan) {
List<CharacterRunAutomaton> list = new ArrayList<>();
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
if (customSubQueries != null) {
for (Query sub : customSubQueries) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
}
}
} else if (query instanceof ConstantScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (query instanceof BoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (query instanceof FunctionScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((FunctionScoreQuery) query).getWrappedQuery(), fieldMatcher,
lookInSpan, preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanOrQuery) {
for (Query sub : ((SpanOrQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNearQuery) {
for (Query sub : ((SpanNearQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNotQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanBoostQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof FuzzyQuery) {
final FuzzyQuery fq = (FuzzyQuery) query;
if (fieldMatcher.test(fq.getField())) {
String utf16 = fq.getTerm().text();
int termText[] = new int[utf16.codePointCount(0, utf16.length())];
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
termText[j++] = cp = utf16.codePointAt(i);
}
int termLength = termText.length;
int prefixLength = Math.min(fq.getPrefixLength(), termLength);
String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
list.add(new CharacterRunAutomaton(automaton) {
@Override
public String toString() {
return fq.toString();
}
});
}
} else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query;
if (fieldMatcher.test(aq.getField())) {
if (aq.isAutomatonBinary() == false) { // note: is the case for WildcardQuery, RegexpQuery
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
return aq.toString();
}
});
} else { // note: is the case for PrefixQuery, TermRangeQuery
// byte oriented automaton:
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) { // empty here is bogus just to satisfy API
// TODO can we get access to the aq.compiledAutomaton.runAutomaton ?
ByteRunAutomaton byteRunAutomaton =
new ByteRunAutomaton(aq.getAutomaton(), true, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
@Override
public boolean run(char[] chars, int offset, int length) {
int state = 0;
final int maxIdx = offset + length;
for (int i = offset; i < maxIdx; i++) {
final int code = chars[i];
int b;
// UTF16 to UTF8 (inlined logic from UnicodeUtil.UTF16toUTF8 )
if (code < 0x80) {
state = byteRunAutomaton.step(state, code);
if (state == -1) return false;
} else if (code < 0x800) {
b = (0xC0 | (code >> 6));
state = byteRunAutomaton.step(state, b);
if (state == -1) return false;
b = (0x80 | (code & 0x3F));
state = byteRunAutomaton.step(state, b);
if (state == -1) return false;
} else {
// more complex
byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes);
for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) {
state = byteRunAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF);
if (state == -1) return false;
}
break;
}
}
return byteRunAutomaton.isAccept(state);
}
@Override
public String toString() {
return aq.toString();
}
});
}
}
}
return list.toArray(new CharacterRunAutomaton[list.size()]);
AutomataCollector collector = new AutomataCollector(lookInSpan, fieldMatcher);
query.visit(collector);
return collector.runAutomata.toArray(new CharacterRunAutomaton[0]);
}
private static class AutomataCollector extends QueryVisitor {
List<CharacterRunAutomaton> runAutomata = new ArrayList<>();
final boolean lookInSpan;
final Predicate<String> fieldMatcher;
private AutomataCollector(boolean lookInSpan, Predicate<String> fieldMatcher) {
this.lookInSpan = lookInSpan;
this.fieldMatcher = fieldMatcher;
}
@Override
public boolean acceptField(String field) {
return fieldMatcher.test(field);
}
@Override
public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
if (lookInSpan == false && parent instanceof SpanQuery) {
return QueryVisitor.EMPTY_VISITOR;
}
return super.getSubVisitor(occur, parent);
}
@Override
public void visitLeaf(Query query) {
if (query instanceof AutomatonQuery) {
AutomatonQuery aq = (AutomatonQuery) query;
if (aq.isAutomatonBinary() == false) {
// WildcardQuery, RegexpQuery
runAutomata.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
return query.toString();
}
});
}
else {
runAutomata.add(binaryToCharRunAutomaton(aq.getAutomaton(), query.toString()));
}
}
else if (query instanceof FuzzyQuery) {
FuzzyQuery fq = (FuzzyQuery) query;
if (fq.getMaxEdits() == 0 || fq.getPrefixLength() >= fq.getTerm().text().length()) {
consumeTerms(query, fq.getTerm());
}
else {
runAutomata.add(new CharacterRunAutomaton(fq.toAutomaton()){
@Override
public String toString() {
return query.toString();
}
});
}
}
}
}
private static CharacterRunAutomaton binaryToCharRunAutomaton(Automaton binaryAutomaton, String description) {
return new CharacterRunAutomaton(Automata.makeEmpty()) { // empty here is bogus just to satisfy API
// TODO can we get access to the aq.compiledAutomaton.runAutomaton ?
ByteRunAutomaton byteRunAutomaton =
new ByteRunAutomaton(binaryAutomaton, true, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
@Override
public String toString() {
return description;
}
@Override
public boolean run(char[] chars, int offset, int length) {
int state = 0;
final int maxIdx = offset + length;
for (int i = offset; i < maxIdx; i++) {
final int code = chars[i];
int b;
// UTF16 to UTF8 (inlined logic from UnicodeUtil.UTF16toUTF8 )
if (code < 0x80) {
state = byteRunAutomaton.step(state, code);
if (state == -1) return false;
} else if (code < 0x800) {
b = (0xC0 | (code >> 6));
state = byteRunAutomaton.step(state, b);
if (state == -1) return false;
b = (0x80 | (code & 0x3F));
state = byteRunAutomaton.step(state, b);
if (state == -1) return false;
} else {
// more complex
byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes);
for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) {
state = byteRunAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF);
if (state == -1) return false;
}
break;
}
}
return byteRunAutomaton.isAccept(state);
}
};
}
}

View File

@ -23,14 +23,13 @@ import java.util.HashMap;
import java.util.List;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.TreeSet;
import java.util.function.Supplier;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchesIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
@ -245,30 +244,24 @@ public abstract class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable
* See {@link Passage#getMatchTerms()}. */
private BytesRef queryToTerm(Query query) {
// compute an approximate BytesRef term of a Query. We cache this since we're likely to see the same query again.
// Our approach is to call extractTerms and visit each term in order, concatenating them with an adjoining space.
// Our approach is to visit each matching term in order, concatenating them with an adjoining space.
// If we don't have any (perhaps due to an MTQ like a wildcard) then we fall back on the toString() of the query.
return queryToTermMap.computeIfAbsent(query, (Query q) -> {
try {
BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
UnifiedHighlighter.EMPTY_INDEXSEARCHER
.createWeight(UnifiedHighlighter.EMPTY_INDEXSEARCHER.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1f)
.extractTerms(new TreeSet<Term>() {
@Override
public boolean add(Term term) {
BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
q.visit(new QueryVisitor() {
@Override
public void consumeTerms(Query query, Term... terms) {
for (Term term : terms) {
if (bytesRefBuilder.length() > 0) {
bytesRefBuilder.append((byte) ' ');
}
bytesRefBuilder.append(term.bytes());
return true;
}
});
if (bytesRefBuilder.length() > 0) {
return bytesRefBuilder.get();
}
} catch (IOException e) {//ignore
// go to fallback...
});
if (bytesRefBuilder.length() > 0) {
return bytesRefBuilder.get();
}
// fallback: (likely a MultiTermQuery)
return new BytesRef(q.toString());
});

View File

@ -24,7 +24,6 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.function.Predicate;
@ -40,6 +39,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -97,20 +97,6 @@ public class PhraseHelper {
boolean[] mustRewriteHolder = {false}; // boolean wrapped in 1-ary array so it's mutable from inner class
// When we call Weight.extractTerms, we do it on clauses that are NOT position sensitive.
// We only want the to track a Set of bytes for the Term, not Term class with field part.
Set<Term> extractPosInsensitiveTermsTarget = new TreeSet<Term>() {
@Override
public boolean add(Term term) {
// don't call super.add; we don't actually use the superclass
if (fieldMatcher.test(term.field())) {
return positionInsensitiveTerms.add(term.bytes());
} else {
return false;
}
}
};
// For TermQueries or other position insensitive queries, collect the Terms.
// For other Query types, WSTE will convert to an equivalent SpanQuery. NOT extracting position spans here.
new WeightedSpanTermExtractor(field) {
@ -147,10 +133,19 @@ public class PhraseHelper {
// called on Query types that are NOT position sensitive, e.g. TermQuery
@Override
protected void extractWeightedTerms(Map<String, WeightedSpanTerm> terms, Query query, float boost)
throws IOException {
query.createWeight(UnifiedHighlighter.EMPTY_INDEXSEARCHER, ScoreMode.COMPLETE_NO_SCORES, boost)
.extractTerms(extractPosInsensitiveTermsTarget);
protected void extractWeightedTerms(Map<String, WeightedSpanTerm> terms, Query query, float boost) {
query.visit(new QueryVisitor() {
@Override
public boolean acceptField(String field) {
return fieldMatcher.test(field);
}
@Override
public void consumeTerms(Query query, Term... terms) {
for (Term term : terms) {
positionInsensitiveTerms.add(term.bytes());
}
}
});
}
// called on SpanQueries. Some other position-sensitive queries like PhraseQuery are converted beforehand

View File

@ -54,8 +54,8 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.spans.SpanQuery;
@ -140,11 +140,11 @@ public class UnifiedHighlighter {
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
/**
* Calls {@link Weight#extractTerms(Set)} on an empty index for the query.
* Extracts matching terms after rewriting against an empty index
*/
protected static Set<Term> extractTerms(Query query) throws IOException {
Set<Term> queryTerms = new HashSet<>();
EMPTY_INDEXSEARCHER.createWeight(EMPTY_INDEXSEARCHER.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1).extractTerms(queryTerms);
EMPTY_INDEXSEARCHER.rewrite(query).visit(QueryVisitor.termCollector(queryTerms));
return queryTerms;
}
@ -816,7 +816,7 @@ public class UnifiedHighlighter {
|| highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES); // Weight.Matches will find all
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), lookInSpan, this::preMultiTermQueryRewrite)
? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), lookInSpan)
: ZERO_LEN_AUTOMATA_ARRAY;
}
@ -863,7 +863,7 @@ public class UnifiedHighlighter {
//skip using a memory index since it's pure term filtering
return new TokenStreamOffsetStrategy(components, getIndexAnalyzer());
} else {
return new MemoryIndexOffsetStrategy(components, getIndexAnalyzer(), this::preMultiTermQueryRewrite);
return new MemoryIndexOffsetStrategy(components, getIndexAnalyzer());
}
case NONE_NEEDED:
return NoOpOffsetStrategy.INSTANCE;
@ -902,19 +902,6 @@ public class UnifiedHighlighter {
return null;
}
/**
* When dealing with multi term queries / span queries, we may need to handle custom queries that aren't supported
* by the default automata extraction in {@code MultiTermHighlighting}. This can be overridden to return a collection
* of queries if appropriate, or null if nothing to do. If query is not custom, simply returning null will allow the
* default rules to apply.
*
* @param query Query to be highlighted
* @return A Collection of Query object(s) if needst o be rewritten, otherwise null.
*/
protected Collection<Query> preMultiTermQueryRewrite(Query query) {
return null;
}
private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) {
return new DocIdSetIterator() {
int idx = -1;

View File

@ -72,6 +72,7 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PhraseQuery.Builder;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
@ -255,6 +256,11 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
return query;
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public String toString(String field) {
return null;

View File

@ -16,6 +16,11 @@
*/
package org.apache.lucene.search.highlight.custom;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
@ -25,6 +30,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
@ -35,11 +41,6 @@ import org.apache.lucene.search.highlight.WeightedSpanTerm;
import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
/**
* Tests the extensibility of {@link WeightedSpanTermExtractor} and
* {@link QueryScorer} in a user defined package
@ -175,6 +176,11 @@ public class HighlightCustomQueryTest extends LuceneTestCase {
return new TermQuery(term);
}
@Override
public void visit(QueryVisitor visitor) {
visitor.consumeTerms(this, term);
}
@Override
public int hashCode() {
return classHash() + Objects.hashCode(term);

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Objects;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
@ -50,6 +49,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
@ -999,15 +999,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected List<Query> preMultiTermQueryRewrite(Query query) {
if (query instanceof MyWrapperSpanQuery) {
return Collections.singletonList(((MyWrapperSpanQuery) query).originalQuery);
}
return null;
}
};
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
@ -1051,6 +1043,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
return originalQuery.createWeight(searcher, scoreMode, boost);
}
@Override
public void visit(QueryVisitor visitor) {
originalQuery.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query newOriginalQuery = originalQuery.rewrite(reader);

View File

@ -42,6 +42,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
@ -588,6 +589,11 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
public int hashCode() {
return wrapped.hashCode();
}
@Override
public void visit(QueryVisitor visitor) {
wrapped.visit(visitor);
}
}
// Ported from LUCENE-5455 (fixed in LUCENE-8121). Also see LUCENE-2287.

View File

@ -15,6 +15,7 @@
* limitations under the License.
*/
package org.apache.lucene.search.vectorhighlight;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
@ -29,6 +30,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
@ -926,6 +928,12 @@ public class FieldQueryTest extends AbstractTestCase {
public String toString(String field) {
return "DummyQuery";
}
@Override
public void visit(QueryVisitor visitor) {
}
@Override
public boolean equals(Object o) {
throw new AssertionError();

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
@ -29,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
@ -60,6 +60,11 @@ final class GlobalOrdinalsQuery extends Query {
this.indexReaderContextId = indexReaderContextId;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode scoreMode, float boost) throws IOException {
if (searcher.getTopReaderContext().id() != indexReaderContextId) {
@ -107,9 +112,6 @@ final class GlobalOrdinalsQuery extends Query {
this.approximationWeight = approximationWeight;
}
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
@ -29,6 +28,7 @@ import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
@ -65,6 +65,11 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
this.indexReaderContextId = indexReaderContextId;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode scoreMode, float boost) throws IOException {
if (searcher.getTopReaderContext().id() != indexReaderContextId) {
@ -125,9 +130,6 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
super(query, approximationWeight);
}
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
SortedDocValues values = DocValues.getSorted(context.reader(), joinField);

View File

@ -18,16 +18,15 @@
package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
@ -83,6 +82,11 @@ public class ParentChildrenBlockJoinQuery extends Query {
return "ParentChildrenBlockJoinQuery (" + childQuery + ")";
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
final Query childRewrite = childQuery.rewrite(reader);
@ -99,11 +103,6 @@ public class ParentChildrenBlockJoinQuery extends Query {
final int readerIndex = ReaderUtil.subIndex(parentDocId, searcher.getIndexReader().leaves());
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
childWeight.extractTerms(terms);
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
return Explanation.noMatch("Not implemented, use ToParentBlockJoinQuery explain why a document matched");

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.BiFunction;
import org.apache.lucene.document.DoublePoint;
@ -36,12 +35,12 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSetIterator;
@ -119,14 +118,17 @@ abstract class PointInSetIncludingScoreQuery extends Query {
sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public final Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode scoreMode, float boost) throws IOException {
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);

View File

@ -19,17 +19,16 @@ package org.apache.lucene.search.join;
import java.io.IOException;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSetIterator;
@ -71,6 +70,13 @@ class TermsIncludingScoreQuery extends Query {
return String.format(Locale.ROOT, "TermsIncludingScoreQuery{field=%s;fromQuery=%s}", toField, fromQuery);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(toField)) {
visitor.visitLeaf(this);
}
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
@ -99,9 +105,6 @@ class TermsIncludingScoreQuery extends Query {
}
return new Weight(TermsIncludingScoreQuery.this) {
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Terms terms = context.reader().terms(toField);

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
@ -59,6 +60,11 @@ class TermsQuery extends MultiTermQuery {
this.indexReaderContextId = indexReaderContextId;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (this.terms.size() == 0) {

View File

@ -20,13 +20,15 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
@ -64,6 +66,11 @@ public class ToChildBlockJoinQuery extends Query {
this.parentsFilter = parentsFilter;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode scoreMode, float boost) throws IOException {
return new ToChildBlockJoinWeight(this, parentQuery.createWeight(searcher, scoreMode, boost), parentsFilter, scoreMode.needsScores());

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TwoPhaseIterator;
@ -85,6 +86,11 @@ public class ToParentBlockJoinQuery extends Query {
this.scoreMode = scoreMode;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode weightScoreMode, float boost) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher, weightScoreMode, boost), parentsFilter, weightScoreMode.needsScores() ? scoreMode : ScoreMode.None);

View File

@ -20,15 +20,14 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -63,8 +62,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
public class TestBlockJoin extends LuceneTestCase {
// One resume...
@ -93,34 +90,6 @@ public class TestBlockJoin extends LuceneTestCase {
return job;
}
public void testExtractTerms() throws Exception {
TermQuery termQuery = new TermQuery(new Term("field", "value"));
QueryBitSetProducer bitSetProducer = new QueryBitSetProducer(new MatchNoDocsQuery());
ToParentBlockJoinQuery toParentBlockJoinQuery = new ToParentBlockJoinQuery(termQuery, bitSetProducer, ScoreMode.None);
ToChildBlockJoinQuery toChildBlockJoinQuery = new ToChildBlockJoinQuery(toParentBlockJoinQuery, bitSetProducer);
Directory directory = newDirectory();
final IndexWriter w = new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())));
w.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Weight weight = toParentBlockJoinQuery.createWeight(indexSearcher, org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES, 1f);
Set<Term> terms = new HashSet<>();
weight.extractTerms(terms);
Term[] termArr =terms.toArray(new Term[0]);
assertEquals(1, termArr.length);
weight = toChildBlockJoinQuery.createWeight(indexSearcher, org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES, 1f);
terms = new HashSet<>();
weight.extractTerms(terms);
termArr =terms.toArray(new Term[0]);
assertEquals(1, termArr.length);
indexReader.close();
directory.close();
}
public void testEmptyChildFilter() throws Exception {
final Directory dir = newDirectory();
final IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));

Some files were not shown because too many files have changed in this diff Show More