mirror of https://github.com/apache/lucene.git
Rename NodeHash to FSTSuffixNodeCache (#13259)
This commit is contained in:
parent
cfdd20f5bc
commit
13285279c2
|
@ -98,8 +98,8 @@ public class FSTCompiler<T> {
|
|||
// it will throw exceptions if attempt to call getReverseBytesReader() or writeTo(DataOutput)
|
||||
private static final FSTReader NULL_FST_READER = new NullFSTReader();
|
||||
|
||||
private final NodeHash<T> dedupHash;
|
||||
// a temporary FST used during building for NodeHash cache
|
||||
private final FSTSuffixNodeCache<T> suffixDedupCache;
|
||||
// a temporary FST used during building for FSTSuffixNodeCache cache
|
||||
final FST<T> fst;
|
||||
private final T NO_OUTPUT;
|
||||
|
||||
|
@ -178,9 +178,9 @@ public class FSTCompiler<T> {
|
|||
if (suffixRAMLimitMB < 0) {
|
||||
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
|
||||
} else if (suffixRAMLimitMB > 0) {
|
||||
dedupHash = new NodeHash<>(this, suffixRAMLimitMB);
|
||||
suffixDedupCache = new FSTSuffixNodeCache<>(this, suffixRAMLimitMB);
|
||||
} else {
|
||||
dedupHash = null;
|
||||
suffixDedupCache = null;
|
||||
}
|
||||
NO_OUTPUT = outputs.getNoOutput();
|
||||
|
||||
|
@ -379,12 +379,12 @@ public class FSTCompiler<T> {
|
|||
private CompiledNode compileNode(UnCompiledNode<T> nodeIn) throws IOException {
|
||||
final long node;
|
||||
long bytesPosStart = numBytesWritten;
|
||||
if (dedupHash != null) {
|
||||
if (suffixDedupCache != null) {
|
||||
if (nodeIn.numArcs == 0) {
|
||||
node = addNode(nodeIn);
|
||||
lastFrozenNode = node;
|
||||
} else {
|
||||
node = dedupHash.add(nodeIn);
|
||||
node = suffixDedupCache.add(nodeIn);
|
||||
}
|
||||
} else {
|
||||
node = addNode(nodeIn);
|
||||
|
|
|
@ -31,8 +31,24 @@ import org.apache.lucene.util.packed.PagedGrowableWriter;
|
|||
// TODO: couldn't we prune naturally back until we see a transition with an output? it's highly
|
||||
// unlikely (mostly impossible) such suffixes can be shared?
|
||||
|
||||
// Used to dedup states (lookup already-frozen states)
|
||||
final class NodeHash<T> {
|
||||
/**
|
||||
* This is essentially a LRU cache to maintain and lookup node suffix. Un-compiled node can be added
|
||||
* into the cache and if a similar node exists we will return its address in the FST. A node is
|
||||
* defined as similar if it has the same label, arcs, outputs & other properties that identify a
|
||||
* node.
|
||||
*
|
||||
* <p>The total size of the cache is controlled through the constructor parameter <code>ramLimitMB
|
||||
* </code> Implementation-wise, we maintain two lookup tables, a primary table where node can be
|
||||
* looked up from, and a fallback lookup table in case the lookup in the primary table fails. Nodes
|
||||
* from the fallback table can also be promoted to the primary table when that happens. When the
|
||||
* primary table is full, we swap it with the fallback table and clear out the primary table.
|
||||
*
|
||||
* <p>To lookup the node address, we build a special hash table which maps from the Node hash value
|
||||
* to the Node address in the FST, called <code>PagedGrowableHash</code>. Internally it uses {@link
|
||||
* PagedGrowableWriter} to store the mapping, which allows efficient packing the hash & address long
|
||||
* values, and uses {@link ByteBlockPool} to store the actual node content (arcs & outputs).
|
||||
*/
|
||||
final class FSTSuffixNodeCache<T> {
|
||||
|
||||
// primary table -- we add nodes into this until it reaches the requested tableSizeLimit/2, then
|
||||
// we move it to fallback
|
||||
|
@ -60,7 +76,7 @@ final class NodeHash<T> {
|
|||
* recently used suffixes are discarded, and the FST is no longer minimalI. Still, larger
|
||||
* ramLimitMB will make the FST smaller (closer to minimal).
|
||||
*/
|
||||
public NodeHash(FSTCompiler<T> fstCompiler, double ramLimitMB) {
|
||||
public FSTSuffixNodeCache(FSTCompiler<T> fstCompiler, double ramLimitMB) {
|
||||
if (ramLimitMB <= 0) {
|
||||
throw new IllegalArgumentException("ramLimitMB must be > 0; got: " + ramLimitMB);
|
||||
}
|
|
@ -19,14 +19,16 @@ package org.apache.lucene.util.fst;
|
|||
import com.carrotsearch.randomizedtesting.generators.RandomBytes;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
public class TestNodeHash extends LuceneTestCase {
|
||||
public class TestFSTSuffixNodeCache extends LuceneTestCase {
|
||||
|
||||
public void testCopyFallbackNodeBytes() {
|
||||
// we don't need the FSTCompiler in this test
|
||||
NodeHash<Object> nodeHash = new NodeHash<>(null, 1);
|
||||
FSTSuffixNodeCache<Object> suffixCache = new FSTSuffixNodeCache<>(null, 1);
|
||||
|
||||
NodeHash<Object>.PagedGrowableHash primaryHashTable = nodeHash.new PagedGrowableHash();
|
||||
NodeHash<Object>.PagedGrowableHash fallbackHashTable = nodeHash.new PagedGrowableHash();
|
||||
FSTSuffixNodeCache<Object>.PagedGrowableHash primaryHashTable =
|
||||
suffixCache.new PagedGrowableHash();
|
||||
FSTSuffixNodeCache<Object>.PagedGrowableHash fallbackHashTable =
|
||||
suffixCache.new PagedGrowableHash();
|
||||
int nodeLength = atLeast(500);
|
||||
long fallbackHashSlot = 1;
|
||||
byte[] fallbackBytes = RandomBytes.randomBytesOfLength(random(), nodeLength);
|
Loading…
Reference in New Issue