From 5f33d8d2ae7f639947254009c60ba586389e10b7 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 19 Jan 2012 23:54:55 +0000 Subject: [PATCH] use singletons in FST outputs; add 2 commented out test cases showing non-minimality git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1233696 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/util/fst/ByteSequenceOutputs.java | 3 +- .../lucene/util/fst/IntSequenceOutputs.java | 3 +- .../org/apache/lucene/util/fst/TestFSTs.java | 44 +++++++++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java b/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java index 10f566c5396..cb8101155af 100644 --- a/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java +++ b/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java @@ -32,12 +32,13 @@ import org.apache.lucene.util.BytesRef; public final class ByteSequenceOutputs extends Outputs { private final static BytesRef NO_OUTPUT = new BytesRef(); + private final static ByteSequenceOutputs singleton = new ByteSequenceOutputs(); private ByteSequenceOutputs() { } public static ByteSequenceOutputs getSingleton() { - return new ByteSequenceOutputs(); + return singleton; } @Override diff --git a/lucene/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java b/lucene/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java index 8f3ad732814..aa0ca0264cd 100644 --- a/lucene/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java +++ b/lucene/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java @@ -32,12 +32,13 @@ import org.apache.lucene.util.IntsRef; public final class IntSequenceOutputs extends Outputs { private final static IntsRef NO_OUTPUT = new IntsRef(); + private final static IntSequenceOutputs singleton = new IntSequenceOutputs(); private IntSequenceOutputs() { } public static IntSequenceOutputs getSingleton() { - return new IntSequenceOutputs(); + return singleton; } @Override diff --git a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java index 68ec08794ea..c9ac1b50605 100644 --- a/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java +++ b/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java @@ -1055,6 +1055,50 @@ public class TestFSTs extends LuceneTestCase { } } + // NOTE: this test shows a case where our current builder + // fails to produce minimal FST: + /* + public void test3() throws Exception { + final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); + Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + IntsRef scratchIntsRef = new IntsRef(); + builder.add(Util.toIntsRef(new BytesRef("aa$"), scratchIntsRef), outputs.get(0)); + builder.add(Util.toIntsRef(new BytesRef("aab$"), scratchIntsRef), 1L); + builder.add(Util.toIntsRef(new BytesRef("bbb$"), scratchIntsRef), 2L); + final FST fst = builder.finish(); + //System.out.println("NODES " + fst.getNodeCount() + " ARCS " + fst.getArcCount()); + // NOTE: we produce 7 nodes today + assertEquals(6, fst.getNodeCount()); + // NOTE: we produce 8 arcs today + assertEquals(7, fst.getNodeCount()); + //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + //Util.toDot(fst, w, false, false); + //w.close(); + } + */ + + // NOTE: this test shows a case where our current builder + // fails to produce minimal FST: + /* + public void test4() throws Exception { + final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); + Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + IntsRef scratchIntsRef = new IntsRef(); + builder.add(Util.toIntsRef(new BytesRef("aa$"), scratchIntsRef), outputs.getNoOutput()); + builder.add(Util.toIntsRef(new BytesRef("aab$"), scratchIntsRef), new BytesRef("1")); + builder.add(Util.toIntsRef(new BytesRef("bbb$"), scratchIntsRef), new BytesRef("11")); + final FST fst = builder.finish(); + //System.out.println("NODES " + fst.getNodeCount() + " ARCS " + fst.getArcCount()); + // NOTE: we produce 7 nodes today + assertEquals(6, fst.getNodeCount()); + // NOTE: we produce 8 arcs today + assertEquals(7, fst.getNodeCount()); + //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + //Util.toDot(fst, w, false, false); + //w.close(); + } + */ + // Build FST for all unique terms in the test line docs // file, up until a time limit public void testRealTerms() throws Exception {