diff --git a/lucene/src/java/org/apache/lucene/util/fst/FST.java b/lucene/src/java/org/apache/lucene/util/fst/FST.java index fb2079fc7a6..4e5dcb6d7b3 100644 --- a/lucene/src/java/org/apache/lucene/util/fst/FST.java +++ b/lucene/src/java/org/apache/lucene/util/fst/FST.java @@ -231,7 +231,7 @@ public final class FST { b.append(" hasOutput"); } if (flag(BIT_ARC_HAS_FINAL_OUTPUT)) { - b.append(" hasOutput"); + b.append(" hasFinalOutput"); } if (bytesPerArc != 0) { b.append(" arcArray(idx=" + arcIdx + " of " + numArcs + ")"); @@ -1447,6 +1447,7 @@ public final class FST { // Find top nodes with highest number of incoming arcs: NodeQueue q = new NodeQueue(topN); + // TODO: we could use more RAM efficient selection algo here... NodeAndInCount bottom = null; for(int node=0;node= minInCountDeref) { @@ -1515,6 +1516,8 @@ public final class FST { int addressError = 0; + //int totWasted = 0; + // Since we re-reverse the bytes, we now write the // nodes backwards, so that BIT_TARGET_NEXT is // unchanged: @@ -1554,10 +1557,11 @@ public final class FST { writer.writeByte(ARCS_AS_FIXED_ARRAY); writer.writeVInt(arc.numArcs); writer.writeVInt(bytesPerArc); + //System.out.println("node " + node + ": " + arc.numArcs + " arcs"); } int maxBytesPerArc = 0; - + //int wasted = 0; while(true) { // iterate over all arcs for this node //System.out.println(" arc label=" + arc.label + " target=" + arc.target + " pos=" + writer.posWrite); @@ -1680,6 +1684,7 @@ public final class FST { // incoming FST did... but in this case we // will retry (below) so it's OK to ovewrite // bytes: + //wasted += bytesPerArc - arcBytes; writer.setPosWrite(arcStartPos + bytesPerArc); } @@ -1693,6 +1698,8 @@ public final class FST { if (useArcArray) { if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) { // converged + //System.out.println(" bba=" + bytesPerArc + " wasted=" + wasted); + //totWasted += wasted; break; } } else { @@ -1719,6 +1726,7 @@ public final class FST { // other nodes because we only produce acyclic FSTs // w/ nodes only pointing "forwards": assert !negDelta; + //System.out.println("TOT wasted=" + totWasted); // Converged! break; } @@ -1730,7 +1738,7 @@ public final class FST { } fst.startNode = newNodeAddress[startNode]; - //System.out.println("new startNode=" + startNode); + //System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode); if (emptyOutput != null) { fst.setEmptyOutput(emptyOutput); diff --git a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java index bd0516529b6..3082c61c3d4 100644 --- a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -530,7 +530,7 @@ public class TestFSTs extends LuceneTestCase { if (VERBOSE) { System.out.println("TEST: now rewrite"); } - final FST packed =fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000)); + final FST packed = fst.pack(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 0, 10000000)); if (VERBOSE) { System.out.println("TEST: now verify packed FST"); } @@ -1308,13 +1308,13 @@ public class TestFSTs extends LuceneTestCase { System.out.println("Pack..."); fst = fst.pack(4, 100000000); System.out.println("New size " + fst.sizeInBytes() + " bytes"); - } else { - Directory dir = FSDirectory.open(new File(dirOut)); - IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT); - fst.save(out); - out.close(); - System.out.println("Saved FST to fst.bin."); } + + Directory dir = FSDirectory.open(new File(dirOut)); + IndexOutput out = dir.createOutput("fst.bin", IOContext.DEFAULT); + fst.save(out); + out.close(); + System.out.println("Saved FST to fst.bin."); if (!verify) { return;