mirror of https://github.com/apache/lucene.git
Alternative depth-based DOT layout ordering in FST's Utils.
https://issues.apache.org/jira/browse/LUCENE-2934 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1072978 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3f1e769798
commit
7097021329
|
@ -467,9 +467,13 @@ public class FST<T> {
|
||||||
return arc;
|
return arc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Follow the follow arc and read the first arc of its
|
/**
|
||||||
* target; this changes the provide arc (2nd arg) in-place
|
* Follow the <code>follow</code> arc and read the first arc of its target;
|
||||||
* and returns it. */
|
* this changes the provided <code>arc</code> (2nd arg) in-place and returns
|
||||||
|
* it.
|
||||||
|
*
|
||||||
|
* @returns Returns the second argument (<code>arc</code>).
|
||||||
|
*/
|
||||||
public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
|
public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
|
||||||
//int pos = address;
|
//int pos = address;
|
||||||
//System.out.println(" readFirstTarget follow.target=" + follow.target + " isFinal=" + follow.isFinal());
|
//System.out.println(" readFirstTarget follow.target=" + follow.target + " isFinal=" + follow.isFinal());
|
||||||
|
|
|
@ -17,12 +17,8 @@ package org.apache.lucene.util.automaton.fst;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.*;
|
||||||
import java.io.PrintStream;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.HashSet;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
|
@ -160,90 +156,165 @@ public final class Util {
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
// NOTE: this consumes alot of RAM!
|
* Dumps an {@link FST} to a GraphViz's <code>dot</code> language description
|
||||||
// arcs w/ NEXT opto are in blue
|
* for visualization. Example of use:
|
||||||
/*
|
*
|
||||||
eg:
|
* <pre>
|
||||||
PrintStream ps = new PrintStream("out.dot");
|
* PrintStream ps = new PrintStream("out.dot");
|
||||||
fst.toDot(ps);
|
* fst.toDot(ps);
|
||||||
ps.close();
|
* ps.close();
|
||||||
System.out.println("SAVED out.dot");
|
* </pre>
|
||||||
|
*
|
||||||
then dot -Tpng out.dot > /x/tmp/out.png
|
* and then, from command line:
|
||||||
*/
|
*
|
||||||
|
* <pre>
|
||||||
public static<T> void toDot(FST<T> fst, PrintStream out) throws IOException {
|
* dot -Tpng -o out.png out.dot
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Note: larger FSTs (a few thousand nodes) won't even render, don't bother.
|
||||||
|
*
|
||||||
|
* @param sameRank
|
||||||
|
* If <code>true</code>, the resulting <code>dot</code> file will try
|
||||||
|
* to order states in layers of breadth-first traversal. This may
|
||||||
|
* mess up arcs, but makes the output FST's structure a bit clearer.
|
||||||
|
*
|
||||||
|
* @param labelStates
|
||||||
|
* If <code>true</code> states will have labels equal to their offsets in their
|
||||||
|
* binary format. Expands the graph considerably.
|
||||||
|
*
|
||||||
|
* @see "http://www.graphviz.org/"
|
||||||
|
*/
|
||||||
|
public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates)
|
||||||
|
throws IOException {
|
||||||
|
// This is the start arc in the automaton (from the epsilon state to the first state
|
||||||
|
// with outgoing transitions.
|
||||||
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
|
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
|
||||||
|
|
||||||
final List<FST.Arc<T>> queue = new ArrayList<FST.Arc<T>>();
|
// A queue of transitions to consider for the next level.
|
||||||
queue.add(startArc);
|
final List<FST.Arc<T>> thisLevelQueue = new ArrayList<FST.Arc<T>>();
|
||||||
|
|
||||||
final Set<Integer> seen = new HashSet<Integer>();
|
// A queue of transitions to consider when processing the next level.
|
||||||
seen.add(startArc.target);
|
final List<FST.Arc<T>> nextLevelQueue = new ArrayList<FST.Arc<T>>();
|
||||||
|
nextLevelQueue.add(startArc);
|
||||||
|
|
||||||
out.println("digraph FST {");
|
// A list of states on the same level (for ranking).
|
||||||
out.println(" rankdir = LR;");
|
final List<Integer> sameLevelStates = new ArrayList<Integer>();
|
||||||
//out.println(" " + startNode + " [shape=circle label=" + startNode + "];");
|
|
||||||
out.println(" " + startArc.target + " [label=\"\" shape=circle];");
|
// A bitset of already seen states (target offset).
|
||||||
out.println(" initial [shape=point color=white label=\"\"];");
|
final BitSet seen = new BitSet();
|
||||||
out.println(" initial -> " + startArc.target);
|
seen.set(startArc.target);
|
||||||
|
|
||||||
|
// Shape for states.
|
||||||
|
final String stateShape = "circle";
|
||||||
|
|
||||||
|
// Emit DOT prologue.
|
||||||
|
out.write("digraph FST {\n");
|
||||||
|
out.write(" rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");
|
||||||
|
|
||||||
|
if (!labelStates) {
|
||||||
|
out.write(" node [shape=circle, width=.2, height=.2, style=filled]\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
emitDotState(out, "initial", "point", "white", "");
|
||||||
|
emitDotState(out, Integer.toString(startArc.target), stateShape, null, "");
|
||||||
|
out.write(" initial -> " + startArc.target + "\n");
|
||||||
|
|
||||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||||
|
int level = 0;
|
||||||
|
|
||||||
while(queue.size() != 0) {
|
while (!nextLevelQueue.isEmpty()) {
|
||||||
FST.Arc<T> arc = queue.get(queue.size()-1);
|
// we could double buffer here, but it doesn't matter probably.
|
||||||
queue.remove(queue.size()-1);
|
thisLevelQueue.addAll(nextLevelQueue);
|
||||||
//System.out.println("dot cycle target=" + arc.target);
|
nextLevelQueue.clear();
|
||||||
|
|
||||||
if (fst.targetHasArcs(arc)) {
|
level++;
|
||||||
|
out.write("\n // Transitions and states at level: " + level + "\n");
|
||||||
// scan all arcs
|
while (!thisLevelQueue.isEmpty()) {
|
||||||
final int node = arc.target;
|
final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
|
||||||
fst.readFirstTargetArc(arc, arc);
|
|
||||||
while(true) {
|
|
||||||
|
|
||||||
//System.out.println(" cycle label=" + arc.label + " (" + (char) arc.label + ") target=" + arc.target);
|
|
||||||
if (!seen.contains(arc.target)) {
|
|
||||||
final String shape;
|
|
||||||
if (arc.target == -1) {
|
|
||||||
shape = "doublecircle";
|
|
||||||
} else {
|
|
||||||
shape = "circle";
|
|
||||||
}
|
|
||||||
out.println(" " + arc.target + " [shape=" + shape + "];");
|
|
||||||
seen.add(arc.target);
|
|
||||||
queue.add(new FST.Arc<T>().copyFrom(arc));
|
|
||||||
//System.out.println(" new!");
|
|
||||||
}
|
|
||||||
String outs;
|
|
||||||
if (arc.output != NO_OUTPUT) {
|
|
||||||
outs = "/" + fst.outputs.outputToString(arc.output);
|
|
||||||
} else {
|
|
||||||
outs = "";
|
|
||||||
}
|
|
||||||
final char cl;
|
|
||||||
if (arc.label == FST.END_LABEL) {
|
|
||||||
cl = '~';
|
|
||||||
} else {
|
|
||||||
cl = (char) arc.label;
|
|
||||||
}
|
|
||||||
out.println(" " + node + " -> " + arc.target + " [label=\"" + cl + outs + "\"]");
|
|
||||||
//if (arc.flag(FST.BIT_TARGET_NEXT)) {
|
|
||||||
//out.print(" color=blue");
|
|
||||||
//}
|
|
||||||
//out.println("];");
|
|
||||||
|
|
||||||
if (arc.isLast()) {
|
if (fst.targetHasArcs(arc)) {
|
||||||
break;
|
// scan all arcs
|
||||||
} else {
|
final int node = arc.target;
|
||||||
|
fst.readFirstTargetArc(arc, arc);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
// Emit the unseen state and add it to the queue for the next level.
|
||||||
|
if (arc.target >= 0 && !seen.get(arc.target)) {
|
||||||
|
emitDotState(out, Integer.toString(arc.target), stateShape, null,
|
||||||
|
labelStates ? Integer.toString(arc.target) : "");
|
||||||
|
seen.set(arc.target);
|
||||||
|
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
|
||||||
|
sameLevelStates.add(arc.target);
|
||||||
|
}
|
||||||
|
|
||||||
|
String outs;
|
||||||
|
if (arc.output != NO_OUTPUT) {
|
||||||
|
outs = "/" + fst.outputs.outputToString(arc.output);
|
||||||
|
} else {
|
||||||
|
outs = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
final String cl;
|
||||||
|
if (arc.label == FST.END_LABEL) {
|
||||||
|
cl = "~";
|
||||||
|
} else {
|
||||||
|
cl = printableLabel(arc.label);
|
||||||
|
}
|
||||||
|
|
||||||
|
out.write(" " + node + " -> " + arc.target + " [label=\"" + cl + outs + "\"]\n");
|
||||||
|
|
||||||
|
// Break the loop if we're on the last arc of this state.
|
||||||
|
if (arc.isLast()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
fst.readNextArc(arc);
|
fst.readNextArc(arc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Emit state ranking information.
|
||||||
|
if (sameRank && sameLevelStates.size() > 1) {
|
||||||
|
out.write(" {rank=same; ");
|
||||||
|
for (int state : sameLevelStates) {
|
||||||
|
out.write(state + "; ");
|
||||||
|
}
|
||||||
|
out.write(" }\n");
|
||||||
|
}
|
||||||
|
sameLevelStates.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit terminating state (always there anyway).
|
||||||
|
out.write(" -1 [style=filled, color=black, shape=circle, label=\"\"]\n\n");
|
||||||
|
out.write(" {rank=sink; -1 } ");
|
||||||
|
|
||||||
|
out.write("}\n");
|
||||||
|
out.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emit a single state in the <code>dot</code> language.
|
||||||
|
*/
|
||||||
|
private static void emitDotState(Writer out, String name, String shape,
|
||||||
|
String color, String label) throws IOException {
|
||||||
|
out.write(" " + name
|
||||||
|
+ " ["
|
||||||
|
+ (shape != null ? "shape=" + shape : "") + " "
|
||||||
|
+ (color != null ? "color=" + color : "") + " "
|
||||||
|
+ (label != null ? "label=\"" + label + "\"" : "label=\"\"") + " "
|
||||||
|
+ "]\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensures an arc's label is indeed printable (dot uses US-ASCII).
|
||||||
|
*/
|
||||||
|
private static String printableLabel(int label) {
|
||||||
|
if (label >= 0x20 && label <= 0x7d) {
|
||||||
|
return Character.toString((char) label);
|
||||||
|
} else {
|
||||||
|
return "0x" + Integer.toHexString(label);
|
||||||
}
|
}
|
||||||
out.println("}");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue