diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java index 4af5add0149..98a654cbed2 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java @@ -99,7 +99,7 @@ public class FST { public int arcWithOutputCount; // If arc has this label then that arc is final/accepted - public static int END_LABEL = -1; + public static final int END_LABEL = -1; public final static class Arc { public int label; @@ -297,9 +297,7 @@ public class FST { final int v; if (inputType == INPUT_TYPE.BYTE1) { v = in.readByte()&0xFF; - } else if (inputType == INPUT_TYPE.BYTE2) { - v = in.readVInt(); - } else { + } else { v = in.readVInt(); } return v; @@ -478,6 +476,59 @@ public class FST { return arc; } + /** Follows the follow arc and reads the last + * arc of its target; this changes the provided + * arc (2nd arg) in-place and returns it. + * + * @return Returns the second argument + * (arc). */ + public Arc readLastTargetArc(Arc follow, Arc arc) throws IOException { + //System.out.println("readLast"); + if (!targetHasArcs(follow)) { + //System.out.println(" end node"); + assert follow.isFinal(); + arc.label = -1; + arc.output = follow.nextFinalOutput; + arc.flags = BIT_LAST_ARC; + return arc; + } else { + final BytesReader in = getBytesReader(follow.target); + arc.flags = in.readByte(); + if (arc.flag(BIT_ARCS_AS_FIXED_ARRAY)) { + // array: jump straight to end + arc.numArcs = in.readVInt(); + arc.bytesPerArc = in.readByte() & 0xFF; + //System.out.println(" array numArcs=" + arc.numArcs + " bpa=" + arc.bytesPerArc); + arc.posArcsStart = in.pos; + arc.arcIdx = arc.numArcs - 2; + } else { + // non-array: linear scan + arc.bytesPerArc = 0; + //System.out.println(" scan"); + while(!arc.isLast()) { + // skip this arc: + readLabel(in); + if (arc.flag(BIT_ARC_HAS_OUTPUT)) { + outputs.read(in); + } + if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) { + outputs.read(in); + } + if (arc.flag(BIT_STOP_NODE)) { + } else if (arc.flag(BIT_TARGET_NEXT)) { + } else { + in.pos -= 4; + } + arc.flags = in.readByte(); + } + arc.nextArc = in.pos+1; + } + readNextRealArc(arc); + assert arc.isLast(); + return arc; + } + } + /** * Follow the follow arc and read the first arc of its target; * this changes the provided arc (2nd arg) in-place and returns @@ -518,15 +569,13 @@ public class FST { arc.numArcs = in.readVInt(); arc.bytesPerArc = in.readByte() & 0xFF; arc.arcIdx = -1; - arc.posArcsStart = in.pos; + arc.nextArc = arc.posArcsStart = in.pos; //System.out.println(" bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos); } else { - in.pos++; + arc.nextArc = address; arc.bytesPerArc = 0; } - arc.nextArc = in.pos; - arc.label = 0; - return readNextArc(arc); + return readNextRealArc(arc); } /** @@ -598,6 +647,7 @@ public class FST { if (arc.bytesPerArc != 0) { // arcs are at fixed entries arc.arcIdx++; + assert arc.arcIdx < arc.numArcs; in = getBytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc); } else { // arcs are packed diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java index c5e0471180e..77484164c01 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java @@ -274,7 +274,7 @@ abstract class FSTEnum { while(true) { //System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast()); - if (arc.bytesPerArc != 0 && arc.label != -1) { + if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) { // Arcs are fixed array -- use binary search to find // the target. @@ -465,12 +465,7 @@ abstract class FSTEnum { } incr(); - final FST.Arc nextArc = getArc(upto); - fst.readFirstTargetArc(arc, nextArc); - arc = nextArc; - while(!arc.isLast()) { - fst.readNextArc(arc); - } + arc = fst.readLastTargetArc(arc, getArc(upto)); } }