From cdc7d87fcc321f63b6e9d9f8eceb295b1c919bd5 Mon Sep 17 00:00:00 2001 From: Dzung Bui Date: Thu, 2 Nov 2023 17:34:36 +0900 Subject: [PATCH] Clean up UnCompiledNode.inputCount (#12735) * Clean up inputCount * Update CHANGES.txt --- lucene/CHANGES.txt | 4 +++- .../org/apache/lucene/util/fst/FSTCompiler.java | 16 ---------------- .../org/apache/lucene/util/fst/TestFSTs.java | 1 - 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 76293f6dbbd..09e34a55e3e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -62,9 +62,11 @@ API Changes * GITHUB#12599: Add RandomAccessInput#readBytes method to the RandomAccessInput interface. (Ignacio Vera) -* GITHUB#12709 Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods +* GITHUB#12709: Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods of the two (Anh Dung Bui) +* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui) + New Features --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java index f17c220f83d..3af62410070 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTCompiler.java @@ -270,10 +270,6 @@ public class FSTCompiler { return directAddressingMaxOversizingFactor; } - public long getTermCount() { - return frontier[0].inputCount; - } - public long getNodeCount() { // 1+ in order to count the -1 implicit final node return 1 + nodeCount; @@ -749,7 +745,6 @@ public class FSTCompiler { // format cannot represent the empty input since // 'finalness' is stored on the incoming arc, not on // the node - frontier[0].inputCount++; frontier[0].isFinal = true; fst.setEmptyOutput(output); return; @@ -760,9 +755,6 @@ public class FSTCompiler { int pos2 = input.offset; final int pos1Stop = Math.min(lastInput.length(), input.length); while (true) { - frontier[pos1].inputCount++; - // System.out.println(" incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" + - // frontier[pos1]); if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) { break; } @@ -786,7 +778,6 @@ public class FSTCompiler { // init tail states for current input for (int idx = prefixLenPlus1; idx <= input.length; idx++) { frontier[idx - 1].addArc(input.ints[input.offset + idx - 1], frontier[idx]); - frontier[idx].inputCount++; } final UnCompiledNode lastNode = frontier[input.length]; @@ -835,8 +826,6 @@ public class FSTCompiler { // save last input lastInput.copyInts(input); - - // System.out.println(" count[0]=" + frontier[0].inputCount); } private boolean validOutput(T output) { @@ -906,10 +895,6 @@ public class FSTCompiler { T output; boolean isFinal; - // TODO: remove this tracking? we used to use it for confusingly pruning NodeHash, but - // we switched to LRU by RAM usage instead: - long inputCount; - /** This node's depth, starting from the automaton root. */ final int depth; @@ -935,7 +920,6 @@ public class FSTCompiler { numArcs = 0; isFinal = false; output = owner.NO_OUTPUT; - inputCount = 0; // We don't clear the depth here because it never changes // for nodes on the frontier (even when reused). diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java index 927fe058ef0..f6dd84efd0e 100644 --- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -568,7 +568,6 @@ public class TestFSTs extends LuceneTestCase { System.out.println( ((tMid - tStart) / (double) TimeUnit.SECONDS.toNanos(1)) + " sec to add all terms"); - assert fstCompiler.getTermCount() == ord; FST fst = fstCompiler.compile(); long tEnd = System.nanoTime(); System.out.println(