mirror of https://github.com/apache/lucene.git
Implement Accountable for NFARunAutomaton (#13741)
This commit is contained in:
parent
e4efae6ab9
commit
7c529ce092
|
@ -159,6 +159,8 @@ New Features
|
||||||
searcher is created via LuceneTestCase#newSearcher. Users may override IndexSearcher#slices(List) to optionally
|
searcher is created via LuceneTestCase#newSearcher. Users may override IndexSearcher#slices(List) to optionally
|
||||||
create slices that target segment partitions. (Luca Cavanna)
|
create slices that target segment partitions. (Luca Cavanna)
|
||||||
|
|
||||||
|
* GITHUB#13741: Implement Accountable for NFARunAutomaton, fix hashCode implementation of CompiledAutomaton. (Patrick Zhai)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -510,6 +510,7 @@ public class CompiledAutomaton implements Accountable {
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
int result = 1;
|
int result = 1;
|
||||||
result = prime * result + ((runAutomaton == null) ? 0 : runAutomaton.hashCode());
|
result = prime * result + ((runAutomaton == null) ? 0 : runAutomaton.hashCode());
|
||||||
|
result = prime * result + ((nfaRunAutomaton == null) ? 0 : nfaRunAutomaton.hashCode());
|
||||||
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
||||||
result = prime * result + ((type == null) ? 0 : type.hashCode());
|
result = prime * result + ((type == null) ? 0 : type.hashCode());
|
||||||
return result;
|
return result;
|
||||||
|
@ -538,6 +539,7 @@ public class CompiledAutomaton implements Accountable {
|
||||||
+ RamUsageEstimator.sizeOfObject(automaton)
|
+ RamUsageEstimator.sizeOfObject(automaton)
|
||||||
+ RamUsageEstimator.sizeOfObject(commonSuffixRef)
|
+ RamUsageEstimator.sizeOfObject(commonSuffixRef)
|
||||||
+ RamUsageEstimator.sizeOfObject(runAutomaton)
|
+ RamUsageEstimator.sizeOfObject(runAutomaton)
|
||||||
|
+ RamUsageEstimator.sizeOfObject(nfaRunAutomaton)
|
||||||
+ RamUsageEstimator.sizeOfObject(term)
|
+ RamUsageEstimator.sizeOfObject(term)
|
||||||
+ RamUsageEstimator.sizeOfObject(transition);
|
+ RamUsageEstimator.sizeOfObject(transition);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,9 @@ import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.apache.lucene.internal.hppc.BitMixer;
|
import org.apache.lucene.internal.hppc.BitMixer;
|
||||||
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A RunAutomaton that does not require DFA. It will lazily determinize on-demand, memorizing the
|
* A RunAutomaton that does not require DFA. It will lazily determinize on-demand, memorizing the
|
||||||
|
@ -31,13 +33,16 @@ import org.apache.lucene.util.ArrayUtil;
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {
|
public class NFARunAutomaton implements ByteRunnable, TransitionAccessor, Accountable {
|
||||||
|
|
||||||
/** state ordinal of "no such state" */
|
/** state ordinal of "no such state" */
|
||||||
public static final int MISSING = -1;
|
private static final int MISSING = -1;
|
||||||
|
|
||||||
private static final int NOT_COMPUTED = -2;
|
private static final int NOT_COMPUTED = -2;
|
||||||
|
|
||||||
|
private static final long BASE_RAM_BYTES =
|
||||||
|
RamUsageEstimator.shallowSizeOfInstance(NFARunAutomaton.class);
|
||||||
|
|
||||||
private final Automaton automaton;
|
private final Automaton automaton;
|
||||||
private final int[] points;
|
private final int[] points;
|
||||||
private final Map<DState, Integer> dStateToOrd = new HashMap<>(); // could init lazily?
|
private final Map<DState, Integer> dStateToOrd = new HashMap<>(); // could init lazily?
|
||||||
|
@ -229,7 +234,17 @@ public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {
|
||||||
setTransitionAccordingly(t);
|
setTransitionAccordingly(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class DState {
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return BASE_RAM_BYTES
|
||||||
|
+ RamUsageEstimator.sizeOfObject(automaton)
|
||||||
|
+ RamUsageEstimator.sizeOfObject(points)
|
||||||
|
+ RamUsageEstimator.sizeOfMap(dStateToOrd)
|
||||||
|
+ RamUsageEstimator.sizeOfObject(dStates)
|
||||||
|
+ RamUsageEstimator.sizeOfObject(classmap);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class DState implements Accountable {
|
||||||
private final int[] nfaStates;
|
private final int[] nfaStates;
|
||||||
// this field is lazily init'd when first time caller wants to add a new transition
|
// this field is lazily init'd when first time caller wants to add a new transition
|
||||||
private int[] transitions;
|
private int[] transitions;
|
||||||
|
@ -426,5 +441,17 @@ public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {
|
||||||
DState dState = (DState) o;
|
DState dState = (DState) o;
|
||||||
return hashCode == dState.hashCode && Arrays.equals(nfaStates, dState.nfaStates);
|
return hashCode == dState.hashCode && Arrays.equals(nfaStates, dState.nfaStates);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return RamUsageEstimator.alignObjectSize(
|
||||||
|
Integer.BYTES * 3
|
||||||
|
+ 1
|
||||||
|
+ Transition.BYTES_USED * 2
|
||||||
|
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||||
|
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF * 4L)
|
||||||
|
+ RamUsageEstimator.sizeOfObject(nfaStates)
|
||||||
|
+ RamUsageEstimator.sizeOfObject(transitions);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,12 +16,18 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.util.automaton;
|
package org.apache.lucene.util.automaton;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Accountable;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds one transition from an {@link Automaton}. This is typically used temporarily when iterating
|
* Holds one transition from an {@link Automaton}. This is typically used temporarily when iterating
|
||||||
* through transitions by invoking {@link Automaton#initTransition} and {@link
|
* through transitions by invoking {@link Automaton#initTransition} and {@link
|
||||||
* Automaton#getNextTransition}.
|
* Automaton#getNextTransition}.
|
||||||
*/
|
*/
|
||||||
public class Transition {
|
public class Transition implements Accountable {
|
||||||
|
|
||||||
|
/** static estimation of bytes used */
|
||||||
|
public static final long BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Transition.class);
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Transition() {}
|
public Transition() {}
|
||||||
|
@ -48,4 +54,9 @@ public class Transition {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return source + " --> " + dest + " " + (char) min + "-" + (char) max;
|
return source + " --> " + dest + " " + (char) min + "-" + (char) max;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return BYTES_USED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,14 +32,25 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.tests.util.RamUsageTester;
|
||||||
import org.apache.lucene.tests.util.TestUtil;
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
|
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
import org.junit.Assert;
|
||||||
|
|
||||||
public class TestNFARunAutomaton extends LuceneTestCase {
|
public class TestNFARunAutomaton extends LuceneTestCase {
|
||||||
|
|
||||||
private static final String FIELD = "field";
|
private static final String FIELD = "field";
|
||||||
|
|
||||||
|
public void testRamUsageEstimation() {
|
||||||
|
RegExp regExp = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE);
|
||||||
|
Automaton nfa = regExp.toAutomaton();
|
||||||
|
NFARunAutomaton runAutomaton = new NFARunAutomaton(nfa);
|
||||||
|
long estimation = runAutomaton.ramBytesUsed();
|
||||||
|
long actual = RamUsageTester.ramUsed(runAutomaton);
|
||||||
|
Assert.assertEquals((double) actual, (double) estimation, (double) actual * 0.3);
|
||||||
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
public void testWithRandomRegex() {
|
public void testWithRandomRegex() {
|
||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
|
|
Loading…
Reference in New Issue