diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4eed756ccd1..b7f1b55f167 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -109,7 +109,6 @@ API Changes only applicable for fields that are indexed with doc values only. (Mayya Sharipova, Adrien Grand, Simon Willnauer) - Improvements * LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words, @@ -246,6 +245,9 @@ Bug fixes * LUCENE-9930: The Ukrainian analyzer was reloading its dictionary for every new TokenStreamComponents, which could lead to memory leaks. (Alan Woodward) +* LUCENE-9940: The order of disjuncts in DisjunctionMaxQuery does not matter + for equality checks (Alan Woodward) + Changes in Backwards Compatibility Policy * LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 97ca0fd3de0..3b2e1e2d04c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -18,7 +18,6 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Iterator; @@ -45,7 +44,7 @@ import org.apache.lucene.index.LeafReaderContext; public final class DisjunctionMaxQuery extends Query implements Iterable { /* The subqueries */ - private final Query[] disjuncts; + private final Multiset disjuncts = new Multiset<>(); /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */ private final float tieBreakerMultiplier; @@ -66,7 +65,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable throw new IllegalArgumentException("tieBreakerMultiplier must be in [0, 1]"); } this.tieBreakerMultiplier = tieBreakerMultiplier; - this.disjuncts = disjuncts.toArray(new Query[disjuncts.size()]); + this.disjuncts.addAll(disjuncts); } /** @return An {@code Iterator} over the disjuncts */ @@ -76,8 +75,8 @@ public final class DisjunctionMaxQuery extends Query implements Iterable } /** @return the disjuncts. */ - public List getDisjuncts() { - return Collections.unmodifiableList(Arrays.asList(disjuncts)); + public Collection getDisjuncts() { + return Collections.unmodifiableCollection(disjuncts); } /** @return tie breaker value for multiple matches. */ @@ -208,8 +207,8 @@ public final class DisjunctionMaxQuery extends Query implements Iterable */ @Override public Query rewrite(IndexReader reader) throws IOException { - if (disjuncts.length == 1) { - return disjuncts[0]; + if (disjuncts.size() == 1) { + return disjuncts.iterator().next(); } if (tieBreakerMultiplier == 1.0f) { @@ -254,14 +253,15 @@ public final class DisjunctionMaxQuery extends Query implements Iterable public String toString(String field) { StringBuilder buffer = new StringBuilder(); buffer.append("("); - for (int i = 0; i < disjuncts.length; i++) { - Query subquery = disjuncts[i]; + Iterator it = disjuncts.iterator(); + for (int i = 0; it.hasNext(); i++) { + Query subquery = it.next(); if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(subquery.toString(field)); buffer.append(")"); } else buffer.append(subquery.toString(field)); - if (i != disjuncts.length - 1) buffer.append(" | "); + if (i != disjuncts.size() - 1) buffer.append(" | "); } buffer.append(")"); if (tieBreakerMultiplier != 0.0f) { @@ -285,7 +285,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable private boolean equalsTo(DisjunctionMaxQuery other) { return tieBreakerMultiplier == other.tieBreakerMultiplier - && Arrays.equals(disjuncts, other.disjuncts); + && Objects.equals(disjuncts, other.disjuncts); } /** @@ -297,7 +297,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable public int hashCode() { int h = classHash(); h = 31 * h + Float.floatToIntBits(tieBreakerMultiplier); - h = 31 * h + Arrays.hashCode(disjuncts); + h = 31 * h + Objects.hashCode(disjuncts); return h; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java index d73f3c0d99b..b5d0eddaf03 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -496,11 +496,21 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { Query sub2 = tq("hed", "elephant"); DisjunctionMaxQuery q = new DisjunctionMaxQuery(Arrays.asList(sub1, sub2), 1.0f); Query rewritten = s.rewrite(q); - assertTrue(rewritten instanceof BooleanQuery); - BooleanQuery bq = (BooleanQuery) rewritten; - assertEquals(bq.clauses().size(), 2); - assertEquals(bq.clauses().get(0), new BooleanClause(sub1, BooleanClause.Occur.SHOULD)); - assertEquals(bq.clauses().get(1), new BooleanClause(sub2, BooleanClause.Occur.SHOULD)); + Query expected = + new BooleanQuery.Builder() + .add(sub1, BooleanClause.Occur.SHOULD) + .add(sub2, BooleanClause.Occur.SHOULD) + .build(); + assertEquals(expected, rewritten); + } + + public void testDisjunctOrderAndEquals() throws Exception { + // the order that disjuncts are provided in should not matter for equals() comparisons + Query sub1 = tq("hed", "albino"); + Query sub2 = tq("hed", "elephant"); + Query q1 = new DisjunctionMaxQuery(Arrays.asList(sub1, sub2), 1.0f); + Query q2 = new DisjunctionMaxQuery(Arrays.asList(sub2, sub1), 1.0f); + assertEquals(q1, q2); } public void testRandomTopDocs() throws Exception { diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java index 06c05bab6de..ab3953f3fe7 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java @@ -18,16 +18,19 @@ package org.apache.lucene.queryparser.xml; import java.io.IOException; import java.io.InputStream; +import java.util.Arrays; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.util.LuceneTestCase; @@ -99,13 +102,14 @@ public class TestCoreParser extends LuceneTestCase { public void testDisjunctionMaxQueryXML() throws ParserException, IOException { Query q = parse("DisjunctionMaxQuery.xml"); - assertTrue(q instanceof DisjunctionMaxQuery); - DisjunctionMaxQuery d = (DisjunctionMaxQuery) q; - assertEquals(0.0f, d.getTieBreakerMultiplier(), 0.0001f); - assertEquals(2, d.getDisjuncts().size()); - DisjunctionMaxQuery ndq = (DisjunctionMaxQuery) d.getDisjuncts().get(1); - assertEquals(0.3f, ndq.getTieBreakerMultiplier(), 0.0001f); - assertEquals(1, ndq.getDisjuncts().size()); + Query expected = + new DisjunctionMaxQuery( + Arrays.asList( + new TermQuery(new Term("a", "merger")), + new DisjunctionMaxQuery( + Arrays.asList(new TermQuery(new Term("b", "verger"))), 0.3f)), + 0.0f); + assertEquals(expected, q); } public void testRangeQueryXML() throws ParserException, IOException {