From 9e4a656592d83f4190858611686f7b833ef58bea Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Fri, 3 Jun 2011 10:39:33 +0000 Subject: [PATCH] Adding more JavaDoc to FST Builder because the arguments are rather cryptic right now. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1130976 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/util/fst/Builder.java | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/lucene/src/java/org/apache/lucene/util/fst/Builder.java b/lucene/src/java/org/apache/lucene/util/fst/Builder.java index b5736264e2d..9d25a1482a5 100644 --- a/lucene/src/java/org/apache/lucene/util/fst/Builder.java +++ b/lucene/src/java/org/apache/lucene/util/fst/Builder.java @@ -21,6 +21,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.fst.FST.INPUT_TYPE; import java.io.IOException; @@ -69,6 +70,42 @@ public class Builder { // current "frontier" private UnCompiledNode[] frontier; + /** + * Instantiates an FST/FSA builder without any pruning. A shortcut + * to {@link #Builder(INPUT_TYPE, int, int, boolean, Outputs)} with + * pruning options turned off. + */ + public Builder(FST.INPUT_TYPE inputType, Outputs outputs) + { + this(inputType, 0, 0, true, outputs); + } + + /** + * Instantiates an FST/FSA builder with all the possible tuning and construction + * tweaks. Read parameter documentation carefully. + * + * @param inputType + * The input type (transition labels). Can be anything from {@link INPUT_TYPE} + * enumeration. Shorter types will consume less memory. Strings (character sequences) are + * represented as {@link INPUT_TYPE#BYTE4} (full unicode codepoints). + * + * @param minSuffixCount1 + * If pruning the input graph during construction, this threshold is used for telling + * if a node is kept or pruned. If transition_count(node) >= minSuffixCount1, the node + * is kept. + * + * @param minSuffixCount2 + * (Note: only Mike McCandless knows what this one is really doing...) + * + * @param doMinSuffix + * If true, the shared suffixes will be compacted into unique paths. + * This requires an additional hash map for lookups in memory. Setting this parameter to + * false creates a single path for all input sequences. This will result in a larger + * graph, but may require less memory and will speed up construction. + * @param outputs The output type for each input sequence. Applies only if building an FST. For + * FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the + * singleton output object. + */ public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doMinSuffix, Outputs outputs) { this.minSuffixCount1 = minSuffixCount1; this.minSuffixCount2 = minSuffixCount2;