mirror of https://github.com/apache/lucene.git
LUCENE-9940: DisjunctionMaxQuery shouldn't depend on disjunct order for equals checks (#110)
DisjunctionMaxQuery stores its disjuncts in a Query[], and uses Arrays.equals() for comparisons in its equals() implementation. This means that the order in which disjuncts are added to the query matters for equality checks. This commit changes DMQ to instead store its disjuncts in a Multiset, meaning that ordering no longer matters. The getDisjuncts() method now returns a Collection<Query> rather than a List, and some tests are changed to use query equality checks rather than iterating over disjuncts and expecting a particular order.
This commit is contained in:
parent
043ed3a91f
commit
f7a3587091
|
@ -109,7 +109,6 @@ API Changes
|
|||
only applicable for fields that are indexed with doc values only. (Mayya Sharipova,
|
||||
Adrien Grand, Simon Willnauer)
|
||||
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
|
||||
|
@ -246,6 +245,9 @@ Bug fixes
|
|||
* LUCENE-9930: The Ukrainian analyzer was reloading its dictionary for every new
|
||||
TokenStreamComponents, which could lead to memory leaks. (Alan Woodward)
|
||||
|
||||
* LUCENE-9940: The order of disjuncts in DisjunctionMaxQuery does not matter
|
||||
for equality checks (Alan Woodward)
|
||||
|
||||
Changes in Backwards Compatibility Policy
|
||||
|
||||
* LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
@ -45,7 +44,7 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
public final class DisjunctionMaxQuery extends Query implements Iterable<Query> {
|
||||
|
||||
/* The subqueries */
|
||||
private final Query[] disjuncts;
|
||||
private final Multiset<Query> disjuncts = new Multiset<>();
|
||||
|
||||
/* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */
|
||||
private final float tieBreakerMultiplier;
|
||||
|
@ -66,7 +65,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
throw new IllegalArgumentException("tieBreakerMultiplier must be in [0, 1]");
|
||||
}
|
||||
this.tieBreakerMultiplier = tieBreakerMultiplier;
|
||||
this.disjuncts = disjuncts.toArray(new Query[disjuncts.size()]);
|
||||
this.disjuncts.addAll(disjuncts);
|
||||
}
|
||||
|
||||
/** @return An {@code Iterator<Query>} over the disjuncts */
|
||||
|
@ -76,8 +75,8 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
}
|
||||
|
||||
/** @return the disjuncts. */
|
||||
public List<Query> getDisjuncts() {
|
||||
return Collections.unmodifiableList(Arrays.asList(disjuncts));
|
||||
public Collection<Query> getDisjuncts() {
|
||||
return Collections.unmodifiableCollection(disjuncts);
|
||||
}
|
||||
|
||||
/** @return tie breaker value for multiple matches. */
|
||||
|
@ -208,8 +207,8 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
*/
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
if (disjuncts.length == 1) {
|
||||
return disjuncts[0];
|
||||
if (disjuncts.size() == 1) {
|
||||
return disjuncts.iterator().next();
|
||||
}
|
||||
|
||||
if (tieBreakerMultiplier == 1.0f) {
|
||||
|
@ -254,14 +253,15 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
buffer.append("(");
|
||||
for (int i = 0; i < disjuncts.length; i++) {
|
||||
Query subquery = disjuncts[i];
|
||||
Iterator<Query> it = disjuncts.iterator();
|
||||
for (int i = 0; it.hasNext(); i++) {
|
||||
Query subquery = it.next();
|
||||
if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens
|
||||
buffer.append("(");
|
||||
buffer.append(subquery.toString(field));
|
||||
buffer.append(")");
|
||||
} else buffer.append(subquery.toString(field));
|
||||
if (i != disjuncts.length - 1) buffer.append(" | ");
|
||||
if (i != disjuncts.size() - 1) buffer.append(" | ");
|
||||
}
|
||||
buffer.append(")");
|
||||
if (tieBreakerMultiplier != 0.0f) {
|
||||
|
@ -285,7 +285,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
|
||||
private boolean equalsTo(DisjunctionMaxQuery other) {
|
||||
return tieBreakerMultiplier == other.tieBreakerMultiplier
|
||||
&& Arrays.equals(disjuncts, other.disjuncts);
|
||||
&& Objects.equals(disjuncts, other.disjuncts);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -297,7 +297,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
|
|||
public int hashCode() {
|
||||
int h = classHash();
|
||||
h = 31 * h + Float.floatToIntBits(tieBreakerMultiplier);
|
||||
h = 31 * h + Arrays.hashCode(disjuncts);
|
||||
h = 31 * h + Objects.hashCode(disjuncts);
|
||||
return h;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -496,11 +496,21 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
Query sub2 = tq("hed", "elephant");
|
||||
DisjunctionMaxQuery q = new DisjunctionMaxQuery(Arrays.asList(sub1, sub2), 1.0f);
|
||||
Query rewritten = s.rewrite(q);
|
||||
assertTrue(rewritten instanceof BooleanQuery);
|
||||
BooleanQuery bq = (BooleanQuery) rewritten;
|
||||
assertEquals(bq.clauses().size(), 2);
|
||||
assertEquals(bq.clauses().get(0), new BooleanClause(sub1, BooleanClause.Occur.SHOULD));
|
||||
assertEquals(bq.clauses().get(1), new BooleanClause(sub2, BooleanClause.Occur.SHOULD));
|
||||
Query expected =
|
||||
new BooleanQuery.Builder()
|
||||
.add(sub1, BooleanClause.Occur.SHOULD)
|
||||
.add(sub2, BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
assertEquals(expected, rewritten);
|
||||
}
|
||||
|
||||
public void testDisjunctOrderAndEquals() throws Exception {
|
||||
// the order that disjuncts are provided in should not matter for equals() comparisons
|
||||
Query sub1 = tq("hed", "albino");
|
||||
Query sub2 = tq("hed", "elephant");
|
||||
Query q1 = new DisjunctionMaxQuery(Arrays.asList(sub1, sub2), 1.0f);
|
||||
Query q2 = new DisjunctionMaxQuery(Arrays.asList(sub2, sub1), 1.0f);
|
||||
assertEquals(q1, q2);
|
||||
}
|
||||
|
||||
public void testRandomTopDocs() throws Exception {
|
||||
|
|
|
@ -18,16 +18,19 @@ package org.apache.lucene.queryparser.xml;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -99,13 +102,14 @@ public class TestCoreParser extends LuceneTestCase {
|
|||
|
||||
public void testDisjunctionMaxQueryXML() throws ParserException, IOException {
|
||||
Query q = parse("DisjunctionMaxQuery.xml");
|
||||
assertTrue(q instanceof DisjunctionMaxQuery);
|
||||
DisjunctionMaxQuery d = (DisjunctionMaxQuery) q;
|
||||
assertEquals(0.0f, d.getTieBreakerMultiplier(), 0.0001f);
|
||||
assertEquals(2, d.getDisjuncts().size());
|
||||
DisjunctionMaxQuery ndq = (DisjunctionMaxQuery) d.getDisjuncts().get(1);
|
||||
assertEquals(0.3f, ndq.getTieBreakerMultiplier(), 0.0001f);
|
||||
assertEquals(1, ndq.getDisjuncts().size());
|
||||
Query expected =
|
||||
new DisjunctionMaxQuery(
|
||||
Arrays.asList(
|
||||
new TermQuery(new Term("a", "merger")),
|
||||
new DisjunctionMaxQuery(
|
||||
Arrays.asList(new TermQuery(new Term("b", "verger"))), 0.3f)),
|
||||
0.0f);
|
||||
assertEquals(expected, q);
|
||||
}
|
||||
|
||||
public void testRangeQueryXML() throws ParserException, IOException {
|
||||
|
|
Loading…
Reference in New Issue