mirror of https://github.com/apache/lucene.git
Add field as a separate input to newSynonymQuery (#11941)
QueryBuilder#newSynonymQuery takes an array of TermAndBoost objects as a parameter and uses the field of the first term in the array as its field. However, there are cases where this array may be empty, which will result in an ArrayOutOfBoundsException. This commit reworks QueryBuilder so that TermAndBoost contains plain BytesRefs, and passes the field as a separate parameter. This guards against accidental calls to newSynonymQuery with an empty list - in this case, an empty synonym query is generated rather than an exception. It also refactors SynonymQuery itself to hold BytesRefs rather than Terms, which needlessly repeat the field for each entry. Fixes #11864
This commit is contained in:
parent
cd82a9bbdc
commit
332679886c
|
@ -111,6 +111,10 @@ API Changes
|
|||
memory to provide stronger guarantees on query latency.
|
||||
(Adrien Grand, Uwe Schindler)
|
||||
|
||||
* GITHUB#11941: QueryBuilder#add and #newSynonymQuery methods now take a `field` parameter,
|
||||
to avoid possible exceptions when building queries from an empty term list. The helper
|
||||
TermAndBoost class now holds a BytesRef rather than a Term. (Alan Woodward)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
* GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall
|
||||
|
|
|
@ -80,6 +80,14 @@ public final class SynonymQuery extends Query {
|
|||
if (field.equals(term.field()) == false) {
|
||||
throw new IllegalArgumentException("Synonyms must be across the same field");
|
||||
}
|
||||
return addTerm(term.bytes(), boost);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the provided {@code term} as a synonym, document frequencies of this term will be
|
||||
* boosted by {@code boost}.
|
||||
*/
|
||||
public Builder addTerm(BytesRef term, float boost) {
|
||||
if (Float.isNaN(boost) || Float.compare(boost, 0f) <= 0 || Float.compare(boost, 1f) > 0) {
|
||||
throw new IllegalArgumentException(
|
||||
"boost must be a positive float between 0 (exclusive) and 1 (inclusive)");
|
||||
|
@ -110,7 +118,7 @@ public final class SynonymQuery extends Query {
|
|||
|
||||
public List<Term> getTerms() {
|
||||
return Collections.unmodifiableList(
|
||||
Arrays.stream(terms).map(TermAndBoost::getTerm).collect(Collectors.toList()));
|
||||
Arrays.stream(terms).map(t -> new Term(field, t.term)).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -120,7 +128,7 @@ public final class SynonymQuery extends Query {
|
|||
if (i != 0) {
|
||||
builder.append(" ");
|
||||
}
|
||||
Query termQuery = new TermQuery(terms[i].term);
|
||||
Query termQuery = new TermQuery(new Term(this.field, terms[i].term));
|
||||
builder.append(termQuery.toString(field));
|
||||
if (terms[i].boost != 1f) {
|
||||
builder.append("^");
|
||||
|
@ -148,7 +156,7 @@ public final class SynonymQuery extends Query {
|
|||
return new BooleanQuery.Builder().build();
|
||||
}
|
||||
if (terms.length == 1 && terms[0].boost == 1f) {
|
||||
return new TermQuery(terms[0].term);
|
||||
return new TermQuery(new Term(field, terms[0].term));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
@ -159,7 +167,7 @@ public final class SynonymQuery extends Query {
|
|||
return;
|
||||
}
|
||||
QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.SHOULD, this);
|
||||
Term[] ts = Arrays.stream(terms).map(t -> t.term).toArray(Term[]::new);
|
||||
Term[] ts = Arrays.stream(terms).map(t -> new Term(field, t.term)).toArray(Term[]::new);
|
||||
v.consumeTerms(this, ts);
|
||||
}
|
||||
|
||||
|
@ -172,7 +180,7 @@ public final class SynonymQuery extends Query {
|
|||
// if scores are not needed, let BooleanWeight deal with optimizing that case.
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
for (TermAndBoost term : terms) {
|
||||
bq.add(new TermQuery(term.term), BooleanClause.Occur.SHOULD);
|
||||
bq.add(new TermQuery(new Term(field, term.term)), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return searcher
|
||||
.rewrite(bq.build())
|
||||
|
@ -191,16 +199,17 @@ public final class SynonymQuery extends Query {
|
|||
super(query);
|
||||
assert scoreMode.needsScores();
|
||||
this.scoreMode = scoreMode;
|
||||
CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].term.field());
|
||||
CollectionStatistics collectionStats = searcher.collectionStatistics(field);
|
||||
long docFreq = 0;
|
||||
long totalTermFreq = 0;
|
||||
termStates = new TermStates[terms.length];
|
||||
for (int i = 0; i < termStates.length; i++) {
|
||||
TermStates ts = TermStates.build(searcher.getTopReaderContext(), terms[i].term, true);
|
||||
Term term = new Term(field, terms[i].term);
|
||||
TermStates ts = TermStates.build(searcher.getTopReaderContext(), term, true);
|
||||
termStates[i] = ts;
|
||||
if (ts.docFreq() > 0) {
|
||||
TermStatistics termStats =
|
||||
searcher.termStatistics(terms[i].term, ts.docFreq(), ts.totalTermFreq());
|
||||
searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq());
|
||||
docFreq = Math.max(termStats.docFreq(), docFreq);
|
||||
totalTermFreq += termStats.totalTermFreq();
|
||||
}
|
||||
|
@ -217,13 +226,12 @@ public final class SynonymQuery extends Query {
|
|||
|
||||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
String field = terms[0].term.field();
|
||||
Terms indexTerms = context.reader().terms(field);
|
||||
if (indexTerms == null) {
|
||||
return super.matches(context, doc);
|
||||
}
|
||||
List<Term> termList =
|
||||
Arrays.stream(terms).map(TermAndBoost::getTerm).collect(Collectors.toList());
|
||||
Arrays.stream(terms).map(t -> new Term(field, t.term)).collect(Collectors.toList());
|
||||
return MatchesUtils.forField(
|
||||
field,
|
||||
() -> DisjunctionMatchesIterator.fromTerms(context, doc, getQuery(), field, termList));
|
||||
|
@ -244,8 +252,7 @@ public final class SynonymQuery extends Query {
|
|||
assert scorer instanceof TermScorer;
|
||||
freq = ((TermScorer) scorer).freq();
|
||||
}
|
||||
LeafSimScorer docScorer =
|
||||
new LeafSimScorer(simWeight, context.reader(), terms[0].term.field(), true);
|
||||
LeafSimScorer docScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
|
||||
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
|
||||
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
|
||||
return Explanation.match(
|
||||
|
@ -271,8 +278,8 @@ public final class SynonymQuery extends Query {
|
|||
for (int i = 0; i < terms.length; i++) {
|
||||
TermState state = termStates[i].get(context);
|
||||
if (state != null) {
|
||||
TermsEnum termsEnum = context.reader().terms(terms[i].term.field()).iterator();
|
||||
termsEnum.seekExact(terms[i].term.bytes(), state);
|
||||
TermsEnum termsEnum = context.reader().terms(field).iterator();
|
||||
termsEnum.seekExact(terms[i].term, state);
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
iterators.add(impactsEnum);
|
||||
|
@ -290,8 +297,7 @@ public final class SynonymQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
|
||||
LeafSimScorer simScorer =
|
||||
new LeafSimScorer(simWeight, context.reader(), terms[0].term.field(), true);
|
||||
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
|
||||
|
||||
// we must optimize this case (term not in segment), disjunctions require >= 2 subs
|
||||
if (iterators.size() == 1) {
|
||||
|
@ -443,7 +449,7 @@ public final class SynonymQuery extends Query {
|
|||
}
|
||||
|
||||
PriorityQueue<SubIterator> pq =
|
||||
new PriorityQueue<SubIterator>(impacts.length) {
|
||||
new PriorityQueue<>(impacts.length) {
|
||||
@Override
|
||||
protected boolean lessThan(SubIterator a, SubIterator b) {
|
||||
if (a.current == null) { // means iteration is finished
|
||||
|
@ -626,18 +632,14 @@ public final class SynonymQuery extends Query {
|
|||
}
|
||||
|
||||
private static class TermAndBoost {
|
||||
final Term term;
|
||||
final BytesRef term;
|
||||
final float boost;
|
||||
|
||||
TermAndBoost(Term term, float boost) {
|
||||
TermAndBoost(BytesRef term, float boost) {
|
||||
this.term = term;
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
Term getTerm() {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
|
|
|
@ -65,13 +65,13 @@ public class QueryBuilder {
|
|||
/** Wraps a term and boost */
|
||||
public static class TermAndBoost {
|
||||
/** the term */
|
||||
public final Term term;
|
||||
public final BytesRef term;
|
||||
/** the boost */
|
||||
public final float boost;
|
||||
|
||||
/** Creates a new TermAndBoost */
|
||||
public TermAndBoost(Term term, float boost) {
|
||||
this.term = term;
|
||||
public TermAndBoost(BytesRef term, float boost) {
|
||||
this.term = BytesRef.deepCopyOf(term);
|
||||
this.boost = boost;
|
||||
}
|
||||
}
|
||||
|
@ -390,21 +390,24 @@ public class QueryBuilder {
|
|||
stream.reset();
|
||||
List<TermAndBoost> terms = new ArrayList<>();
|
||||
while (stream.incrementToken()) {
|
||||
terms.add(new TermAndBoost(new Term(field, termAtt.getBytesRef()), boostAtt.getBoost()));
|
||||
terms.add(new TermAndBoost(termAtt.getBytesRef(), boostAtt.getBoost()));
|
||||
}
|
||||
|
||||
return newSynonymQuery(terms.toArray(new TermAndBoost[0]));
|
||||
return newSynonymQuery(field, terms.toArray(TermAndBoost[]::new));
|
||||
}
|
||||
|
||||
protected void add(
|
||||
BooleanQuery.Builder q, List<TermAndBoost> current, BooleanClause.Occur operator) {
|
||||
String field,
|
||||
BooleanQuery.Builder q,
|
||||
List<TermAndBoost> current,
|
||||
BooleanClause.Occur operator) {
|
||||
if (current.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
if (current.size() == 1) {
|
||||
q.add(newTermQuery(current.get(0).term, current.get(0).boost), operator);
|
||||
q.add(newTermQuery(new Term(field, current.get(0).term), current.get(0).boost), operator);
|
||||
} else {
|
||||
q.add(newSynonymQuery(current.toArray(new TermAndBoost[0])), operator);
|
||||
q.add(newSynonymQuery(field, current.toArray(TermAndBoost[]::new)), operator);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -421,13 +424,12 @@ public class QueryBuilder {
|
|||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
if (posIncrAtt.getPositionIncrement() != 0) {
|
||||
add(q, currentQuery, operator);
|
||||
add(field, q, currentQuery, operator);
|
||||
currentQuery.clear();
|
||||
}
|
||||
currentQuery.add(
|
||||
new TermAndBoost(new Term(field, termAtt.getBytesRef()), boostAtt.getBoost()));
|
||||
currentQuery.add(new TermAndBoost(termAtt.getBytesRef(), boostAtt.getBoost()));
|
||||
}
|
||||
add(q, currentQuery, operator);
|
||||
add(field, q, currentQuery, operator);
|
||||
|
||||
return q.build();
|
||||
}
|
||||
|
@ -518,7 +520,7 @@ public class QueryBuilder {
|
|||
if (graph.hasSidePath(start)) {
|
||||
final Iterator<TokenStream> sidePathsIterator = graph.getFiniteStrings(start, end);
|
||||
Iterator<Query> queries =
|
||||
new Iterator<Query>() {
|
||||
new Iterator<>() {
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return sidePathsIterator.hasNext();
|
||||
|
@ -544,14 +546,14 @@ public class QueryBuilder {
|
|||
s -> {
|
||||
TermToBytesRefAttribute t = s.addAttribute(TermToBytesRefAttribute.class);
|
||||
BoostAttribute b = s.addAttribute(BoostAttribute.class);
|
||||
return new TermAndBoost(new Term(field, t.getBytesRef()), b.getBoost());
|
||||
return new TermAndBoost(t.getBytesRef(), b.getBoost());
|
||||
})
|
||||
.toArray(TermAndBoost[]::new);
|
||||
assert terms.length > 0;
|
||||
if (terms.length == 1) {
|
||||
positionalQuery = newTermQuery(terms[0].term, terms[0].boost);
|
||||
positionalQuery = newTermQuery(new Term(field, terms[0].term), terms[0].boost);
|
||||
} else {
|
||||
positionalQuery = newSynonymQuery(terms);
|
||||
positionalQuery = newSynonymQuery(field, terms);
|
||||
}
|
||||
}
|
||||
if (positionalQuery != null) {
|
||||
|
@ -599,8 +601,8 @@ public class QueryBuilder {
|
|||
*
|
||||
* @return new Query instance
|
||||
*/
|
||||
protected Query newSynonymQuery(TermAndBoost[] terms) {
|
||||
SynonymQuery.Builder builder = new SynonymQuery.Builder(terms[0].term.field());
|
||||
protected Query newSynonymQuery(String field, TermAndBoost[] terms) {
|
||||
SynonymQuery.Builder builder = new SynonymQuery.Builder(field);
|
||||
for (TermAndBoost t : terms) {
|
||||
builder.addTerm(t.term, t.boost);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue