mirror of https://github.com/apache/lucene.git
LUCENE-9142 Refactor IntSet operations for determinize (#1184)
* LUCENE-9142 Refactor SortedIntSet for equality Split SortedIntSet into a class heirarchy to make comparisons to FrozenIntSet more meaningful. Use Arrays.equals for more efficient comparison. Add tests for IntSet to verify correctness.
This commit is contained in:
parent
f3cd1dbde3
commit
abd282d258
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.automaton;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
final class FrozenIntSet extends IntSet {
|
||||
final int[] values;
|
||||
final int hashCode;
|
||||
final int state;
|
||||
|
||||
FrozenIntSet(int[] values, int hashCode, int state) {
|
||||
this.values = values;
|
||||
this.hashCode = hashCode;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
@Override
|
||||
int[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
int size() {
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Arrays.toString(values);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.automaton;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
abstract class IntSet {
|
||||
/**
|
||||
* Return an array representation of this int set's values. Values are valid for indices [0, {@link #size()}).
|
||||
* If this is a mutable int set, then changes to the set are not guaranteed to be visible in this array.
|
||||
* @return an array containing the values for this set, guaranteed to be at least {@link #size()} elements
|
||||
*/
|
||||
abstract int[] getArray();
|
||||
|
||||
/**
|
||||
* Guaranteed to be less than or equal to the length of the array returned by {@link #getArray()}.
|
||||
* @return The number of values in this set.
|
||||
*/
|
||||
abstract int size();
|
||||
|
||||
@Override
|
||||
public abstract int hashCode();
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof IntSet)) return false;
|
||||
IntSet that = (IntSet) o;
|
||||
return hashCode() == that.hashCode()
|
||||
&& Arrays.equals(getArray(), 0, size(), that.getArray(), 0, that.size());
|
||||
}
|
||||
}
|
|
@ -682,13 +682,14 @@ final public class Operations {
|
|||
//System.out.println("DET:");
|
||||
//a.writeDot("/l/la/lucene/core/detin.dot");
|
||||
|
||||
SortedIntSet.FrozenIntSet initialset = new SortedIntSet.FrozenIntSet(0, 0);
|
||||
// Same initial values and state will always have the same hashCode
|
||||
FrozenIntSet initialset = new FrozenIntSet(new int[] { 0 }, 683, 0);
|
||||
|
||||
// Create state 0:
|
||||
b.createState();
|
||||
|
||||
ArrayDeque<SortedIntSet.FrozenIntSet> worklist = new ArrayDeque<>();
|
||||
Map<SortedIntSet.FrozenIntSet,Integer> newstate = new HashMap<>();
|
||||
ArrayDeque<FrozenIntSet> worklist = new ArrayDeque<>();
|
||||
Map<IntSet,Integer> newstate = new HashMap<>();
|
||||
|
||||
worklist.add(initialset);
|
||||
|
||||
|
@ -704,7 +705,7 @@ final public class Operations {
|
|||
Transition t = new Transition();
|
||||
|
||||
while (worklist.size() > 0) {
|
||||
SortedIntSet.FrozenIntSet s = worklist.removeFirst();
|
||||
FrozenIntSet s = worklist.removeFirst();
|
||||
//System.out.println("det: pop set=" + s);
|
||||
|
||||
// Collate all outgoing transitions by min/1+max:
|
||||
|
@ -745,7 +746,7 @@ final public class Operations {
|
|||
if (q >= maxDeterminizedStates) {
|
||||
throw new TooComplexToDeterminizeException(a, maxDeterminizedStates);
|
||||
}
|
||||
final SortedIntSet.FrozenIntSet p = statesSet.freeze(q);
|
||||
final FrozenIntSet p = statesSet.freeze(q);
|
||||
//System.out.println(" make new state=" + q + " -> " + p + " accCount=" + accCount);
|
||||
worklist.add(p);
|
||||
b.setAccept(q, accCount > 0);
|
||||
|
|
|
@ -21,9 +21,8 @@ import java.util.Map;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
// Just holds a set of int[] states, plus a corresponding
|
||||
// int[] count per state. Used by
|
||||
// BasicOperations.determinize
|
||||
final class SortedIntSet {
|
||||
// int[] count per state. Used by Operations.determinize
|
||||
final class SortedIntSet extends IntSet {
|
||||
int[] values;
|
||||
int[] counts;
|
||||
int upto;
|
||||
|
@ -37,9 +36,7 @@ final class SortedIntSet {
|
|||
|
||||
private boolean useTreeMap;
|
||||
|
||||
int state;
|
||||
|
||||
public SortedIntSet(int capacity) {
|
||||
SortedIntSet(int capacity) {
|
||||
values = new int[capacity];
|
||||
counts = new int[capacity];
|
||||
}
|
||||
|
@ -47,13 +44,7 @@ final class SortedIntSet {
|
|||
// Adds this state to the set
|
||||
public void incr(int num) {
|
||||
if (useTreeMap) {
|
||||
final Integer key = num;
|
||||
Integer val = map.get(key);
|
||||
if (val == null) {
|
||||
map.put(key, 1);
|
||||
} else {
|
||||
map.put(key, 1+val);
|
||||
}
|
||||
map.merge(num, 1, Integer::sum);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -130,7 +121,7 @@ final class SortedIntSet {
|
|||
assert false;
|
||||
}
|
||||
|
||||
public void computeHash() {
|
||||
void computeHash() {
|
||||
if (useTreeMap) {
|
||||
if (map.size() > values.length) {
|
||||
final int size = ArrayUtil.oversize(map.size(), Integer.BYTES);
|
||||
|
@ -151,41 +142,34 @@ final class SortedIntSet {
|
|||
}
|
||||
}
|
||||
|
||||
public FrozenIntSet freeze(int state) {
|
||||
final int[] c = new int[upto];
|
||||
System.arraycopy(values, 0, c, 0, upto);
|
||||
/**
|
||||
* Create a snapshot of this int set associated with a given state. The snapshot will not retain any frequency
|
||||
* information about the elements of this set, only existence.
|
||||
* <p>
|
||||
* It is the caller's responsibility to ensure that the hashCode and data are up to date via the {@link #computeHash()} method before calling this method.
|
||||
* @param state the state to associate with the frozen set.
|
||||
* @return A new FrozenIntSet with the same values as this set.
|
||||
*/
|
||||
FrozenIntSet freeze(int state) {
|
||||
final int[] c = ArrayUtil.copyOfSubArray(values, 0, upto);
|
||||
return new FrozenIntSet(c, hashCode, state);
|
||||
}
|
||||
|
||||
@Override
|
||||
int[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
int size() {
|
||||
return upto;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object _other) {
|
||||
if (_other == null) {
|
||||
return false;
|
||||
}
|
||||
if (!(_other instanceof FrozenIntSet)) {
|
||||
return false;
|
||||
}
|
||||
FrozenIntSet other = (FrozenIntSet) _other;
|
||||
if (hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
||||
if (other.values.length != upto) {
|
||||
return false;
|
||||
}
|
||||
for(int i=0;i<upto;i++) {
|
||||
if (other.values[i] != values[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder().append('[');
|
||||
|
@ -199,78 +183,5 @@ final class SortedIntSet {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
public final static class FrozenIntSet {
|
||||
final int[] values;
|
||||
final int hashCode;
|
||||
final int state;
|
||||
|
||||
public FrozenIntSet(int[] values, int hashCode, int state) {
|
||||
this.values = values;
|
||||
this.hashCode = hashCode;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
public FrozenIntSet(int num, int state) {
|
||||
this.values = new int[] {num};
|
||||
this.state = state;
|
||||
this.hashCode = 683+num;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object _other) {
|
||||
if (_other == null) {
|
||||
return false;
|
||||
}
|
||||
if (_other instanceof FrozenIntSet) {
|
||||
FrozenIntSet other = (FrozenIntSet) _other;
|
||||
if (hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
||||
if (other.values.length != values.length) {
|
||||
return false;
|
||||
}
|
||||
for(int i=0;i<values.length;i++) {
|
||||
if (other.values[i] != values[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else if (_other instanceof SortedIntSet) {
|
||||
SortedIntSet other = (SortedIntSet) _other;
|
||||
if (hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
||||
if (other.values.length != values.length) {
|
||||
return false;
|
||||
}
|
||||
for(int i=0;i<values.length;i++) {
|
||||
if (other.values[i] != values[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder().append('[');
|
||||
for(int i=0;i<values.length;i++) {
|
||||
if (i > 0) {
|
||||
sb.append(' ');
|
||||
}
|
||||
sb.append(values[i]);
|
||||
}
|
||||
sb.append(']');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.automaton;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestIntSet extends LuceneTestCase {
|
||||
@Test
|
||||
public void testFreezeEqualitySmallSet() {
|
||||
testFreezeEquality(10);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFreezeEqualityLargeSet() {
|
||||
testFreezeEquality(100);
|
||||
}
|
||||
|
||||
private void testFreezeEquality(int size) {
|
||||
SortedIntSet sortedSet = new SortedIntSet(0);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
// Some duplicates is nice but not critical
|
||||
sortedSet.incr(random().nextInt(i + 1));
|
||||
}
|
||||
|
||||
sortedSet.computeHash();
|
||||
IntSet frozen0 = sortedSet.freeze(0);
|
||||
|
||||
assertEquals("Frozen set not equal to origin sorted set.", sortedSet, frozen0);
|
||||
assertEquals("Symmetry: Sorted set not equal to frozen set.", frozen0, sortedSet);
|
||||
|
||||
IntSet frozen1 = sortedSet.freeze(random().nextInt());
|
||||
assertEquals("Sorted set modified while freezing?", sortedSet, frozen1);
|
||||
assertEquals("Frozen sets were not equal", frozen0, frozen1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMapCutover() {
|
||||
SortedIntSet set = new SortedIntSet(10);
|
||||
for (int i = 0; i < 35; i++) {
|
||||
// No duplicates so there are enough elements to trigger impl cutover
|
||||
set.incr(i);
|
||||
}
|
||||
|
||||
set.computeHash();
|
||||
assertTrue(set.size() > 32);
|
||||
|
||||
for (int i = 0; i < 35; i++) {
|
||||
// This is pretty much the worst case, perf wise
|
||||
set.decr(i);
|
||||
}
|
||||
|
||||
set.computeHash();
|
||||
assertTrue(set.size() == 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModify() {
|
||||
SortedIntSet set = new SortedIntSet(2);
|
||||
set.incr(1);
|
||||
set.incr(2);
|
||||
set.computeHash();
|
||||
|
||||
FrozenIntSet set2 = set.freeze(0);
|
||||
assertEquals(set, set2);
|
||||
|
||||
set.incr(1);
|
||||
set.computeHash();
|
||||
assertEquals(set, set2);
|
||||
|
||||
set.decr(1);
|
||||
set.computeHash();
|
||||
assertEquals(set, set2);
|
||||
|
||||
set.decr(1);
|
||||
set.computeHash();
|
||||
assertNotEquals(set, set2);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue