From f525b1a6c96f176c2b5f938202de334573da3c9d Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 12 Oct 2015 19:43:22 +0000 Subject: [PATCH] LUCENE-6305: BooleanQuery.equals/hashcode ignore clause order. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1708211 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../apache/lucene/search/BooleanQuery.java | 60 ++++++++- .../org/apache/lucene/search/Multiset.java | 123 ++++++++++++++++++ .../lucene/search/TestBooleanQuery.java | 114 ++++++++++++++++ .../apache/lucene/search/TestMultiset.java | 99 ++++++++++++++ 5 files changed, 396 insertions(+), 3 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/Multiset.java create mode 100644 lucene/core/src/test/org/apache/lucene/search/TestMultiset.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 26d99b29a21..504e3cfbd36 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -276,6 +276,9 @@ Changes in Runtime Behavior * LUCENE-6784: IndexSearcher's query caching is enabled by default. Run indexSearcher.setQueryCache(null) to disable. (Adrien Grand) +* LUCENE-6305: BooleanQuery.equals and hashcode do not depend on the order of + clauses anymore. (Adrien Grand) + ======================= Lucene 5.3.1 ======================= Bug Fixes diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index fcae9fcc506..b7dd2aa1630 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -20,9 +20,13 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; +import java.util.EnumMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; import org.apache.lucene.index.IndexReader; @@ -109,12 +113,21 @@ public class BooleanQuery extends Query implements Iterable { return this; } + /** + * Add a new clause to this {@link Builder}. Note that the order in which + * clauses are added does not have any impact on matching documents or query + * performance. + * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number + */ public Builder add(BooleanClause clause) { add(clause.getQuery(), clause.getOccur()); return this; } /** + * Add a new clause to this {@link Builder}. Note that the order in which + * clauses are added does not have any impact on matching documents or query + * performance. * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number */ public Builder add(Query query, Occur occur) { @@ -135,13 +148,24 @@ public class BooleanQuery extends Query implements Iterable { private final boolean disableCoord; private final int minimumNumberShouldMatch; - private final List clauses; + private final List clauses; // used for toString() and getClauses() + private final Map> clauseSets; // used for equals/hashcode private BooleanQuery(boolean disableCoord, int minimumNumberShouldMatch, BooleanClause[] clauses) { this.disableCoord = disableCoord; this.minimumNumberShouldMatch = minimumNumberShouldMatch; this.clauses = Collections.unmodifiableList(Arrays.asList(clauses)); + clauseSets = new EnumMap<>(Occur.class); + // duplicates matter for SHOULD and MUST + clauseSets.put(Occur.SHOULD, new Multiset<>()); + clauseSets.put(Occur.MUST, new Multiset<>()); + // but not for FILTER and MUST_NOT + clauseSets.put(Occur.FILTER, new HashSet<>()); + clauseSets.put(Occur.MUST_NOT, new HashSet<>()); + for (BooleanClause clause : clauses) { + clauseSets.get(clause.getOccur()).add(clause.getQuery()); + } } /** @@ -271,6 +295,19 @@ public class BooleanQuery extends Query implements Iterable { return buffer.toString(); } + /** + * Compares the specified object with this boolean query for equality. + * Returns true if and only if the provided object
    + *
  • is also a {@link BooleanQuery},
  • + *
  • has the same value of {@link #isCoordDisabled()}
  • + *
  • has the same value of {@link #getMinimumNumberShouldMatch()}
  • + *
  • has the same {@link Occur#SHOULD} clauses, regardless of the order
  • + *
  • has the same {@link Occur#MUST} clauses, regardless of the order
  • + *
  • has the same set of {@link Occur#FILTER} clauses, regardless of the + * order and regardless of duplicates
  • + *
  • has the same set of {@link Occur#MUST_NOT} clauses, regardless of + * the order and regardless of duplicates
+ */ @Override public boolean equals(Object o) { if (super.equals(o) == false) { @@ -279,12 +316,29 @@ public class BooleanQuery extends Query implements Iterable { BooleanQuery that = (BooleanQuery)o; return this.getMinimumNumberShouldMatch() == that.getMinimumNumberShouldMatch() && this.disableCoord == that.disableCoord - && clauses.equals(that.clauses); + && clauseSets.equals(that.clauseSets); } + private int computeHashCode() { + int hashCode = 31 * super.hashCode() + Objects.hash(disableCoord, minimumNumberShouldMatch, clauseSets); + if (hashCode == 0) { + hashCode = 1; + } + return hashCode; + } + + // cached hash code is ok since boolean queries are immutable + private int hashCode; + @Override public int hashCode() { - return 31 * super.hashCode() + Objects.hash(disableCoord, minimumNumberShouldMatch, clauses); + if (hashCode == 0) { + // no need for synchronization, in the worst case we would just compute the hash several times + hashCode = computeHashCode(); + assert hashCode != 0; + } + assert hashCode == computeHashCode(); + return hashCode; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/Multiset.java b/lucene/core/src/java/org/apache/lucene/search/Multiset.java new file mode 100644 index 00000000000..ff346512eff --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/Multiset.java @@ -0,0 +1,123 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.AbstractCollection; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +/** + * A {@link Multiset} is a set that allows for duplicate elements. Two + * {@link Multiset}s are equal if they contain the same unique elements and if + * each unique element has as many occurrences in both multisets. + * Iteration order is not specified. + * @lucene.internal + */ +final class Multiset extends AbstractCollection { + + private final Map map = new HashMap<>(); + private int size; + + /** Create an empty {@link Multiset}. */ + Multiset() { + super(); + } + + @Override + public Iterator iterator() { + final Iterator> mapIterator = map.entrySet().iterator(); + return new Iterator() { + + T current; + int remaining; + + @Override + public boolean hasNext() { + return remaining > 0 || mapIterator.hasNext(); + } + + @Override + public T next() { + if (remaining == 0) { + Map.Entry next = mapIterator.next(); + current = next.getKey(); + remaining = next.getValue(); + } + assert remaining > 0; + remaining -= 1; + return current; + } + }; + } + + @Override + public int size() { + return size; + } + + @Override + public void clear() { + map.clear(); + size = 0; + } + + @Override + public boolean add(T e) { + map.put(e, map.getOrDefault(e, 0) + 1); + size += 1; + return true; + } + + @Override + public boolean remove(Object o) { + final Integer count = map.get(o); + if (count == null) { + return false; + } else if (1 == count.intValue()) { + map.remove(o); + } else { + map.put((T) o, count - 1); + } + size -= 1; + return true; + } + + @Override + public boolean contains(Object o) { + return map.containsKey(o); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + Multiset that = (Multiset) obj; + return size == that.size // not necessary but helps escaping early + && map.equals(that.map); + } + + @Override + public int hashCode() { + return 31 * getClass().hashCode() + map.hashCode(); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java index b8e6a8deae8..355e591a8bb 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -52,6 +53,8 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NamedThreadFactory; import org.apache.lucene.util.TestUtil; +import com.carrotsearch.randomizedtesting.generators.RandomPicks; + public class TestBooleanQuery extends LuceneTestCase { public void testEquality() throws Exception { @@ -74,6 +77,117 @@ public class TestBooleanQuery extends LuceneTestCase { assertEquals(bq1.build(), bq2.build()); } + public void testEqualityDoesNotDependOnOrder() { + TermQuery[] queries = new TermQuery[] { + new TermQuery(new Term("foo", "bar")), + new TermQuery(new Term("foo", "baz")) + }; + for (int iter = 0; iter < 10; ++iter) { + List clauses = new ArrayList<>(); + final int numClauses = random().nextInt(20); + for (int i = 0; i < numClauses; ++i) { + Query query = RandomPicks.randomFrom(random(), queries); + if (random().nextBoolean()) { + query = new BoostQuery(query, random().nextFloat()); + } + Occur occur = RandomPicks.randomFrom(random(), Occur.values()); + clauses.add(new BooleanClause(query, occur)); + } + + final boolean disableCoord = random().nextBoolean(); + final int minShouldMatch = random().nextInt(5); + BooleanQuery.Builder bq1Builder = new BooleanQuery.Builder(); + bq1Builder.setDisableCoord(disableCoord); + bq1Builder.setMinimumNumberShouldMatch(minShouldMatch); + for (BooleanClause clause : clauses) { + bq1Builder.add(clause); + } + final BooleanQuery bq1 = bq1Builder.build(); + + Collections.shuffle(clauses, random()); + BooleanQuery.Builder bq2Builder = new BooleanQuery.Builder(); + bq2Builder.setDisableCoord(disableCoord); + bq2Builder.setMinimumNumberShouldMatch(minShouldMatch); + for (BooleanClause clause : clauses) { + bq2Builder.add(clause); + } + final BooleanQuery bq2 = bq2Builder.build(); + + QueryUtils.checkEqual(bq1, bq2); + } + } + + public void testEqualityOnDuplicateShouldClauses() { + BooleanQuery bq1 = new BooleanQuery.Builder() + .setDisableCoord(random().nextBoolean()) + .setMinimumNumberShouldMatch(random().nextInt(2)) + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .build(); + BooleanQuery bq2 = new BooleanQuery.Builder() + .setDisableCoord(bq1.isCoordDisabled()) + .setMinimumNumberShouldMatch(bq1.getMinimumNumberShouldMatch()) + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .build(); + QueryUtils.checkUnequal(bq1, bq2); + } + + public void testEqualityOnDuplicateMustClauses() { + BooleanQuery bq1 = new BooleanQuery.Builder() + .setDisableCoord(random().nextBoolean()) + .setMinimumNumberShouldMatch(random().nextInt(2)) + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) + .build(); + BooleanQuery bq2 = new BooleanQuery.Builder() + .setDisableCoord(bq1.isCoordDisabled()) + .setMinimumNumberShouldMatch(bq1.getMinimumNumberShouldMatch()) + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) + .build(); + QueryUtils.checkUnequal(bq1, bq2); + } + + public void testEqualityOnDuplicateFilterClauses() { + BooleanQuery bq1 = new BooleanQuery.Builder() + .setDisableCoord(random().nextBoolean()) + .setMinimumNumberShouldMatch(random().nextInt(2)) + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .build(); + BooleanQuery bq2 = new BooleanQuery.Builder() + .setDisableCoord(bq1.isCoordDisabled()) + .setMinimumNumberShouldMatch(bq1.getMinimumNumberShouldMatch()) + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .build(); + QueryUtils.checkEqual(bq1, bq2); + } + + public void testEqualityOnDuplicateMustNotClauses() { + BooleanQuery bq1 = new BooleanQuery.Builder() + .setDisableCoord(random().nextBoolean()) + .setMinimumNumberShouldMatch(random().nextInt(2)) + .add(new MatchAllDocsQuery(), Occur.MUST) + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .build(); + BooleanQuery bq2 = new BooleanQuery.Builder() + .setDisableCoord(bq1.isCoordDisabled()) + .setMinimumNumberShouldMatch(bq1.getMinimumNumberShouldMatch()) + .add(new MatchAllDocsQuery(), Occur.MUST) + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .build(); + QueryUtils.checkEqual(bq1, bq2); + } + + public void testHashCodeIsStable() { + BooleanQuery bq = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", TestUtil.randomSimpleString(random()))), Occur.SHOULD) + .add(new TermQuery(new Term("foo", TestUtil.randomSimpleString(random()))), Occur.SHOULD) + .build(); + final int hashCode = bq.hashCode(); + assertEquals(hashCode, bq.hashCode()); + } + public void testException() { try { BooleanQuery.setMaxClauseCount(0); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMultiset.java b/lucene/core/src/test/org/apache/lucene/search/TestMultiset.java new file mode 100644 index 00000000000..ce65d85707a --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiset.java @@ -0,0 +1,99 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.util.LuceneTestCase; + +public class TestMultiset extends LuceneTestCase { + + public void testDuplicatesMatter() { + Multiset s1 = new Multiset<>(); + Multiset s2 = new Multiset<>(); + assertEquals(s1.size(), s2.size()); + assertEquals(s1, s2); + + assertTrue(s1.add(42)); + assertTrue(s2.add(42)); + assertEquals(s1, s2); + + s2.add(42); + assertFalse(s1.equals(s2)); + + s1.add(43); + s1.add(43); + s2.add(43); + assertEquals(s1.size(), s2.size()); + assertFalse(s1.equals(s2)); + } + + private static Map toCountMap(Multiset set) { + Map map = new HashMap<>(); + int recomputedSize = 0; + for (T element : set) { + add(map, element); + recomputedSize += 1; + } + assertEquals(set.toString(), recomputedSize, set.size()); + return map; + } + + private static void add(Map map, T element) { + map.put(element, map.getOrDefault(element, 0) + 1); + } + + private static void remove(Map map, T element) { + Integer count = map.get(element); + if (count == null) { + return; + } else if (count.intValue() == 1) { + map.remove(element); + } else { + map.put(element, count - 1); + } + } + + public void testRandom() { + Map reference = new HashMap<>(); + Multiset multiset = new Multiset<>(); + final int iters = atLeast(100); + for (int i = 0; i < iters; ++i) { + final int value = random().nextInt(10); + switch (random().nextInt(10)) { + case 0: + case 1: + case 2: + remove(reference, value); + multiset.remove(value); + break; + case 3: + reference.clear(); + multiset.clear(); + break; + default: + add(reference, value); + multiset.add(value); + break; + } + assertEquals(reference, toCountMap(multiset)); + } + } + +}