LUCENE-6305: BooleanQuery.equals/hashcode ignore clause order.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1708211 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-10-12 19:43:22 +00:00
parent 1a68f4b67c
commit f525b1a6c9
5 changed files with 396 additions and 3 deletions

View File

@ -276,6 +276,9 @@ Changes in Runtime Behavior
* LUCENE-6784: IndexSearcher's query caching is enabled by default. Run * LUCENE-6784: IndexSearcher's query caching is enabled by default. Run
indexSearcher.setQueryCache(null) to disable. (Adrien Grand) indexSearcher.setQueryCache(null) to disable. (Adrien Grand)
* LUCENE-6305: BooleanQuery.equals and hashcode do not depend on the order of
clauses anymore. (Adrien Grand)
======================= Lucene 5.3.1 ======================= ======================= Lucene 5.3.1 =======================
Bug Fixes Bug Fixes

View File

@ -20,9 +20,13 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
@ -109,12 +113,21 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
return this; return this;
} }
/**
* Add a new clause to this {@link Builder}. Note that the order in which
* clauses are added does not have any impact on matching documents or query
* performance.
* @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
*/
public Builder add(BooleanClause clause) { public Builder add(BooleanClause clause) {
add(clause.getQuery(), clause.getOccur()); add(clause.getQuery(), clause.getOccur());
return this; return this;
} }
/** /**
* Add a new clause to this {@link Builder}. Note that the order in which
* clauses are added does not have any impact on matching documents or query
* performance.
* @throws TooManyClauses if the new number of clauses exceeds the maximum clause number * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
*/ */
public Builder add(Query query, Occur occur) { public Builder add(Query query, Occur occur) {
@ -135,13 +148,24 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
private final boolean disableCoord; private final boolean disableCoord;
private final int minimumNumberShouldMatch; private final int minimumNumberShouldMatch;
private final List<BooleanClause> clauses; private final List<BooleanClause> clauses; // used for toString() and getClauses()
private final Map<Occur, Collection<Query>> clauseSets; // used for equals/hashcode
private BooleanQuery(boolean disableCoord, int minimumNumberShouldMatch, private BooleanQuery(boolean disableCoord, int minimumNumberShouldMatch,
BooleanClause[] clauses) { BooleanClause[] clauses) {
this.disableCoord = disableCoord; this.disableCoord = disableCoord;
this.minimumNumberShouldMatch = minimumNumberShouldMatch; this.minimumNumberShouldMatch = minimumNumberShouldMatch;
this.clauses = Collections.unmodifiableList(Arrays.asList(clauses)); this.clauses = Collections.unmodifiableList(Arrays.asList(clauses));
clauseSets = new EnumMap<>(Occur.class);
// duplicates matter for SHOULD and MUST
clauseSets.put(Occur.SHOULD, new Multiset<>());
clauseSets.put(Occur.MUST, new Multiset<>());
// but not for FILTER and MUST_NOT
clauseSets.put(Occur.FILTER, new HashSet<>());
clauseSets.put(Occur.MUST_NOT, new HashSet<>());
for (BooleanClause clause : clauses) {
clauseSets.get(clause.getOccur()).add(clause.getQuery());
}
} }
/** /**
@ -271,6 +295,19 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
return buffer.toString(); return buffer.toString();
} }
/**
* Compares the specified object with this boolean query for equality.
* Returns true if and only if the provided object<ul>
* <li>is also a {@link BooleanQuery},</li>
* <li>has the same value of {@link #isCoordDisabled()}</li>
* <li>has the same value of {@link #getMinimumNumberShouldMatch()}</li>
* <li>has the same {@link Occur#SHOULD} clauses, regardless of the order</li>
* <li>has the same {@link Occur#MUST} clauses, regardless of the order</li>
* <li>has the same set of {@link Occur#FILTER} clauses, regardless of the
* order and regardless of duplicates</li>
* <li>has the same set of {@link Occur#MUST_NOT} clauses, regardless of
* the order and regardless of duplicates</li></ul>
*/
@Override @Override
public boolean equals(Object o) { public boolean equals(Object o) {
if (super.equals(o) == false) { if (super.equals(o) == false) {
@ -279,12 +316,29 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
BooleanQuery that = (BooleanQuery)o; BooleanQuery that = (BooleanQuery)o;
return this.getMinimumNumberShouldMatch() == that.getMinimumNumberShouldMatch() return this.getMinimumNumberShouldMatch() == that.getMinimumNumberShouldMatch()
&& this.disableCoord == that.disableCoord && this.disableCoord == that.disableCoord
&& clauses.equals(that.clauses); && clauseSets.equals(that.clauseSets);
} }
private int computeHashCode() {
int hashCode = 31 * super.hashCode() + Objects.hash(disableCoord, minimumNumberShouldMatch, clauseSets);
if (hashCode == 0) {
hashCode = 1;
}
return hashCode;
}
// cached hash code is ok since boolean queries are immutable
private int hashCode;
@Override @Override
public int hashCode() { public int hashCode() {
return 31 * super.hashCode() + Objects.hash(disableCoord, minimumNumberShouldMatch, clauses); if (hashCode == 0) {
// no need for synchronization, in the worst case we would just compute the hash several times
hashCode = computeHashCode();
assert hashCode != 0;
}
assert hashCode == computeHashCode();
return hashCode;
} }
} }

View File

@ -0,0 +1,123 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.AbstractCollection;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* A {@link Multiset} is a set that allows for duplicate elements. Two
* {@link Multiset}s are equal if they contain the same unique elements and if
* each unique element has as many occurrences in both multisets.
* Iteration order is not specified.
* @lucene.internal
*/
final class Multiset<T> extends AbstractCollection<T> {
private final Map<T, Integer> map = new HashMap<>();
private int size;
/** Create an empty {@link Multiset}. */
Multiset() {
super();
}
@Override
public Iterator<T> iterator() {
final Iterator<Map.Entry<T, Integer>> mapIterator = map.entrySet().iterator();
return new Iterator<T>() {
T current;
int remaining;
@Override
public boolean hasNext() {
return remaining > 0 || mapIterator.hasNext();
}
@Override
public T next() {
if (remaining == 0) {
Map.Entry<T, Integer> next = mapIterator.next();
current = next.getKey();
remaining = next.getValue();
}
assert remaining > 0;
remaining -= 1;
return current;
}
};
}
@Override
public int size() {
return size;
}
@Override
public void clear() {
map.clear();
size = 0;
}
@Override
public boolean add(T e) {
map.put(e, map.getOrDefault(e, 0) + 1);
size += 1;
return true;
}
@Override
public boolean remove(Object o) {
final Integer count = map.get(o);
if (count == null) {
return false;
} else if (1 == count.intValue()) {
map.remove(o);
} else {
map.put((T) o, count - 1);
}
size -= 1;
return true;
}
@Override
public boolean contains(Object o) {
return map.containsKey(o);
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != getClass()) {
return false;
}
Multiset<?> that = (Multiset<?>) obj;
return size == that.size // not necessary but helps escaping early
&& map.equals(that.map);
}
@Override
public int hashCode() {
return 31 * getClass().hashCode() + map.hashCode();
}
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.BitSet; import java.util.BitSet;
import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
@ -52,6 +53,8 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory; import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
public class TestBooleanQuery extends LuceneTestCase { public class TestBooleanQuery extends LuceneTestCase {
public void testEquality() throws Exception { public void testEquality() throws Exception {
@ -74,6 +77,117 @@ public class TestBooleanQuery extends LuceneTestCase {
assertEquals(bq1.build(), bq2.build()); assertEquals(bq1.build(), bq2.build());
} }
public void testEqualityDoesNotDependOnOrder() {
TermQuery[] queries = new TermQuery[] {
new TermQuery(new Term("foo", "bar")),
new TermQuery(new Term("foo", "baz"))
};
for (int iter = 0; iter < 10; ++iter) {
List<BooleanClause> clauses = new ArrayList<>();
final int numClauses = random().nextInt(20);
for (int i = 0; i < numClauses; ++i) {
Query query = RandomPicks.randomFrom(random(), queries);
if (random().nextBoolean()) {
query = new BoostQuery(query, random().nextFloat());
}
Occur occur = RandomPicks.randomFrom(random(), Occur.values());
clauses.add(new BooleanClause(query, occur));
}
final boolean disableCoord = random().nextBoolean();
final int minShouldMatch = random().nextInt(5);
BooleanQuery.Builder bq1Builder = new BooleanQuery.Builder();
bq1Builder.setDisableCoord(disableCoord);
bq1Builder.setMinimumNumberShouldMatch(minShouldMatch);
for (BooleanClause clause : clauses) {
bq1Builder.add(clause);
}
final BooleanQuery bq1 = bq1Builder.build();
Collections.shuffle(clauses, random());
BooleanQuery.Builder bq2Builder = new BooleanQuery.Builder();
bq2Builder.setDisableCoord(disableCoord);
bq2Builder.setMinimumNumberShouldMatch(minShouldMatch);
for (BooleanClause clause : clauses) {
bq2Builder.add(clause);
}
final BooleanQuery bq2 = bq2Builder.build();
QueryUtils.checkEqual(bq1, bq2);
}
}
public void testEqualityOnDuplicateShouldClauses() {
BooleanQuery bq1 = new BooleanQuery.Builder()
.setDisableCoord(random().nextBoolean())
.setMinimumNumberShouldMatch(random().nextInt(2))
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.build();
BooleanQuery bq2 = new BooleanQuery.Builder()
.setDisableCoord(bq1.isCoordDisabled())
.setMinimumNumberShouldMatch(bq1.getMinimumNumberShouldMatch())
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.build();
QueryUtils.checkUnequal(bq1, bq2);
}
public void testEqualityOnDuplicateMustClauses() {
BooleanQuery bq1 = new BooleanQuery.Builder()
.setDisableCoord(random().nextBoolean())
.setMinimumNumberShouldMatch(random().nextInt(2))
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.build();
BooleanQuery bq2 = new BooleanQuery.Builder()
.setDisableCoord(bq1.isCoordDisabled())
.setMinimumNumberShouldMatch(bq1.getMinimumNumberShouldMatch())
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.build();
QueryUtils.checkUnequal(bq1, bq2);
}
public void testEqualityOnDuplicateFilterClauses() {
BooleanQuery bq1 = new BooleanQuery.Builder()
.setDisableCoord(random().nextBoolean())
.setMinimumNumberShouldMatch(random().nextInt(2))
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.build();
BooleanQuery bq2 = new BooleanQuery.Builder()
.setDisableCoord(bq1.isCoordDisabled())
.setMinimumNumberShouldMatch(bq1.getMinimumNumberShouldMatch())
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.build();
QueryUtils.checkEqual(bq1, bq2);
}
public void testEqualityOnDuplicateMustNotClauses() {
BooleanQuery bq1 = new BooleanQuery.Builder()
.setDisableCoord(random().nextBoolean())
.setMinimumNumberShouldMatch(random().nextInt(2))
.add(new MatchAllDocsQuery(), Occur.MUST)
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.build();
BooleanQuery bq2 = new BooleanQuery.Builder()
.setDisableCoord(bq1.isCoordDisabled())
.setMinimumNumberShouldMatch(bq1.getMinimumNumberShouldMatch())
.add(new MatchAllDocsQuery(), Occur.MUST)
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.build();
QueryUtils.checkEqual(bq1, bq2);
}
public void testHashCodeIsStable() {
BooleanQuery bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", TestUtil.randomSimpleString(random()))), Occur.SHOULD)
.add(new TermQuery(new Term("foo", TestUtil.randomSimpleString(random()))), Occur.SHOULD)
.build();
final int hashCode = bq.hashCode();
assertEquals(hashCode, bq.hashCode());
}
public void testException() { public void testException() {
try { try {
BooleanQuery.setMaxClauseCount(0); BooleanQuery.setMaxClauseCount(0);

View File

@ -0,0 +1,99 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.util.LuceneTestCase;
public class TestMultiset extends LuceneTestCase {
public void testDuplicatesMatter() {
Multiset<Integer> s1 = new Multiset<>();
Multiset<Integer> s2 = new Multiset<>();
assertEquals(s1.size(), s2.size());
assertEquals(s1, s2);
assertTrue(s1.add(42));
assertTrue(s2.add(42));
assertEquals(s1, s2);
s2.add(42);
assertFalse(s1.equals(s2));
s1.add(43);
s1.add(43);
s2.add(43);
assertEquals(s1.size(), s2.size());
assertFalse(s1.equals(s2));
}
private static <T> Map<T, Integer> toCountMap(Multiset<T> set) {
Map<T, Integer> map = new HashMap<>();
int recomputedSize = 0;
for (T element : set) {
add(map, element);
recomputedSize += 1;
}
assertEquals(set.toString(), recomputedSize, set.size());
return map;
}
private static <T> void add(Map<T, Integer> map, T element) {
map.put(element, map.getOrDefault(element, 0) + 1);
}
private static <T> void remove(Map<T, Integer> map, T element) {
Integer count = map.get(element);
if (count == null) {
return;
} else if (count.intValue() == 1) {
map.remove(element);
} else {
map.put(element, count - 1);
}
}
public void testRandom() {
Map<Integer, Integer> reference = new HashMap<>();
Multiset<Integer> multiset = new Multiset<>();
final int iters = atLeast(100);
for (int i = 0; i < iters; ++i) {
final int value = random().nextInt(10);
switch (random().nextInt(10)) {
case 0:
case 1:
case 2:
remove(reference, value);
multiset.remove(value);
break;
case 3:
reference.clear();
multiset.clear();
break;
default:
add(reference, value);
multiset.add(value);
break;
}
assertEquals(reference, toCountMap(multiset));
}
}
}