Implement Accountable for NFARunAutomaton (#13741)

This commit is contained in:
Patrick Zhai 2024-09-10 17:35:37 -07:00 committed by GitHub
parent e4efae6ab9
commit 7c529ce092
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 57 additions and 4 deletions

View File

@ -159,6 +159,8 @@ New Features
searcher is created via LuceneTestCase#newSearcher. Users may override IndexSearcher#slices(List) to optionally
create slices that target segment partitions. (Luca Cavanna)
* GITHUB#13741: Implement Accountable for NFARunAutomaton, fix hashCode implementation of CompiledAutomaton. (Patrick Zhai)
Improvements
---------------------

View File

@ -510,6 +510,7 @@ public class CompiledAutomaton implements Accountable {
final int prime = 31;
int result = 1;
result = prime * result + ((runAutomaton == null) ? 0 : runAutomaton.hashCode());
result = prime * result + ((nfaRunAutomaton == null) ? 0 : nfaRunAutomaton.hashCode());
result = prime * result + ((term == null) ? 0 : term.hashCode());
result = prime * result + ((type == null) ? 0 : type.hashCode());
return result;
@ -538,6 +539,7 @@ public class CompiledAutomaton implements Accountable {
+ RamUsageEstimator.sizeOfObject(automaton)
+ RamUsageEstimator.sizeOfObject(commonSuffixRef)
+ RamUsageEstimator.sizeOfObject(runAutomaton)
+ RamUsageEstimator.sizeOfObject(nfaRunAutomaton)
+ RamUsageEstimator.sizeOfObject(term)
+ RamUsageEstimator.sizeOfObject(transition);
}

View File

@ -21,7 +21,9 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.internal.hppc.BitMixer;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
/**
* A RunAutomaton that does not require DFA. It will lazily determinize on-demand, memorizing the
@ -31,13 +33,16 @@ import org.apache.lucene.util.ArrayUtil;
*
* @lucene.internal
*/
public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {
public class NFARunAutomaton implements ByteRunnable, TransitionAccessor, Accountable {
/** state ordinal of "no such state" */
public static final int MISSING = -1;
private static final int MISSING = -1;
private static final int NOT_COMPUTED = -2;
private static final long BASE_RAM_BYTES =
RamUsageEstimator.shallowSizeOfInstance(NFARunAutomaton.class);
private final Automaton automaton;
private final int[] points;
private final Map<DState, Integer> dStateToOrd = new HashMap<>(); // could init lazily?
@ -229,7 +234,17 @@ public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {
setTransitionAccordingly(t);
}
private class DState {
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES
+ RamUsageEstimator.sizeOfObject(automaton)
+ RamUsageEstimator.sizeOfObject(points)
+ RamUsageEstimator.sizeOfMap(dStateToOrd)
+ RamUsageEstimator.sizeOfObject(dStates)
+ RamUsageEstimator.sizeOfObject(classmap);
}
private class DState implements Accountable {
private final int[] nfaStates;
// this field is lazily init'd when first time caller wants to add a new transition
private int[] transitions;
@ -426,5 +441,17 @@ public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {
DState dState = (DState) o;
return hashCode == dState.hashCode && Arrays.equals(nfaStates, dState.nfaStates);
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
Integer.BYTES * 3
+ 1
+ Transition.BYTES_USED * 2
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF * 4L)
+ RamUsageEstimator.sizeOfObject(nfaStates)
+ RamUsageEstimator.sizeOfObject(transitions);
}
}
}

View File

@ -16,12 +16,18 @@
*/
package org.apache.lucene.util.automaton;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Holds one transition from an {@link Automaton}. This is typically used temporarily when iterating
* through transitions by invoking {@link Automaton#initTransition} and {@link
* Automaton#getNextTransition}.
*/
public class Transition {
public class Transition implements Accountable {
/** static estimation of bytes used */
public static final long BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Transition.class);
/** Sole constructor. */
public Transition() {}
@ -48,4 +54,9 @@ public class Transition {
public String toString() {
return source + " --> " + dest + " " + (char) min + "-" + (char) max;
}
@Override
public long ramBytesUsed() {
return BYTES_USED;
}
}

View File

@ -32,14 +32,25 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.RamUsageTester;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.IntsRef;
import org.junit.Assert;
public class TestNFARunAutomaton extends LuceneTestCase {
private static final String FIELD = "field";
public void testRamUsageEstimation() {
RegExp regExp = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE);
Automaton nfa = regExp.toAutomaton();
NFARunAutomaton runAutomaton = new NFARunAutomaton(nfa);
long estimation = runAutomaton.ramBytesUsed();
long actual = RamUsageTester.ramUsed(runAutomaton);
Assert.assertEquals((double) actual, (double) estimation, (double) actual * 0.3);
}
@SuppressWarnings("unused")
public void testWithRandomRegex() {
for (int i = 0; i < 100; i++) {