LUCENE-4117: add BytesReader to all FST arc reading APIs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1348354 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-06-09 10:41:29 +00:00
parent ced7b9c7cc
commit 0063ec8cf8
5 changed files with 66 additions and 60 deletions

View File

@ -246,7 +246,7 @@ public final class FST<T> {
}
};
private final static boolean flag(int flags, int bit) {
private static boolean flag(int flags, int bit) {
return (flags & bit) != 0;
}
@ -755,7 +755,7 @@ public final class FST<T> {
*
* @return Returns the second argument
* (<code>arc</code>). */
public Arc<T> readLastTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
public Arc<T> readLastTargetArc(Arc<T> follow, Arc<T> arc, FST.BytesReader in) throws IOException {
//System.out.println("readLast");
if (!targetHasArcs(follow)) {
//System.out.println(" end node");
@ -766,7 +766,7 @@ public final class FST<T> {
arc.flags = BIT_LAST_ARC;
return arc;
} else {
final BytesReader in = getBytesReader(getNodeAddress(follow.target));
in.pos = getNodeAddress(follow.target);
arc.node = follow.target;
final byte b = in.readByte();
if (b == ARCS_AS_FIXED_ARRAY) {
@ -822,7 +822,7 @@ public final class FST<T> {
*
* @return Returns the second argument (<code>arc</code>).
*/
public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
//int pos = address;
//System.out.println(" readFirstTarget follow.target=" + follow.target + " isFinal=" + follow.isFinal());
if (follow.isFinal()) {
@ -841,7 +841,7 @@ public final class FST<T> {
//System.out.println(" insert isFinal; nextArc=" + follow.target + " isLast=" + arc.isLast() + " output=" + outputs.outputToString(arc.output));
return arc;
} else {
return readFirstRealTargetArc(follow.target, arc, getBytesReader(0));
return readFirstRealTargetArc(follow.target, arc, in);
}
}
@ -881,37 +881,36 @@ public final class FST<T> {
* @return Returns <code>true</code> if <code>arc</code> points to a state in an
* expanded array format.
*/
boolean isExpandedTarget(Arc<T> follow) throws IOException {
boolean isExpandedTarget(Arc<T> follow, FST.BytesReader in) throws IOException {
if (!targetHasArcs(follow)) {
return false;
} else {
final BytesReader in = getBytesReader(getNodeAddress(follow.target));
in.pos = getNodeAddress(follow.target);
return in.readByte() == ARCS_AS_FIXED_ARRAY;
}
}
/** In-place read; returns the arc. */
public Arc<T> readNextArc(Arc<T> arc) throws IOException {
public Arc<T> readNextArc(Arc<T> arc, BytesReader in) throws IOException {
if (arc.label == END_LABEL) {
// This was a fake inserted "final" arc
if (arc.nextArc <= 0) {
throw new IllegalArgumentException("cannot readNextArc when arc.isLast()=true");
}
return readFirstRealTargetArc(arc.nextArc, arc, getBytesReader(0));
return readFirstRealTargetArc(arc.nextArc, arc, in);
} else {
return readNextRealArc(arc, getBytesReader(0));
return readNextRealArc(arc, in);
}
}
/** Peeks at next arc's label; does not alter arc. Do
* not call this if arc.isLast()! */
public int readNextArcLabel(Arc<T> arc) throws IOException {
public int readNextArcLabel(Arc<T> arc, BytesReader in) throws IOException {
assert !arc.isLast();
final BytesReader in;
if (arc.label == END_LABEL) {
//System.out.println(" nextArc fake " + arc.nextArc);
in = getBytesReader(getNodeAddress(arc.nextArc));
in.pos = getNodeAddress(arc.nextArc);
final byte b = bytes[in.pos];
if (b == ARCS_AS_FIXED_ARRAY) {
//System.out.println(" nextArc fake array");
@ -927,12 +926,12 @@ public final class FST<T> {
if (arc.bytesPerArc != 0) {
//System.out.println(" nextArc real array");
// arcs are at fixed entries
in = getBytesReader(arc.posArcsStart);
in.pos = arc.posArcsStart;
in.skip((1+arc.arcIdx)*arc.bytesPerArc);
} else {
// arcs are packed
//System.out.println(" nextArc real packed");
in = getBytesReader(arc.nextArc);
in.pos = arc.nextArc;
}
}
// skip flags
@ -1223,7 +1222,7 @@ public final class FST<T> {
}
}
public final BytesReader getBytesReader(int pos) {
public BytesReader getBytesReader(int pos) {
// TODO: maybe re-use via ThreadLocal?
if (packed) {
return new ForwardBytesReader(bytes, pos);

View File

@ -35,6 +35,7 @@ abstract class FSTEnum<T> {
@SuppressWarnings({"rawtypes","unchecked"}) protected T[] output = (T[]) new Object[10];
protected final T NO_OUTPUT;
protected final FST.BytesReader fstReader;
protected final FST.Arc<T> scratchArc = new FST.Arc<T>();
protected int upto;
@ -45,6 +46,7 @@ abstract class FSTEnum<T> {
* term before target. */
protected FSTEnum(FST<T> fst) {
this.fst = fst;
fstReader = fst.getBytesReader(0);
NO_OUTPUT = fst.outputs.getNoOutput();
fst.getFirstArc(getArc(0));
output[0] = NO_OUTPUT;
@ -62,7 +64,7 @@ abstract class FSTEnum<T> {
if (upto == 0) {
//System.out.println(" init");
upto = 1;
fst.readFirstTargetArc(getArc(0), getArc(1));
fst.readFirstTargetArc(getArc(0), getArc(1), fstReader);
return;
}
//System.out.println(" rewind upto=" + upto + " vs targetLength=" + targetLength);
@ -78,7 +80,7 @@ abstract class FSTEnum<T> {
} else if (cmp > 0) {
// seek backwards -- reset this arc to the first arc
final FST.Arc<T> arc = getArc(upto);
fst.readFirstTargetArc(getArc(upto-1), arc);
fst.readFirstTargetArc(getArc(upto-1), arc, fstReader);
//System.out.println(" seek first arc");
break;
}
@ -92,7 +94,7 @@ abstract class FSTEnum<T> {
if (upto == 0) {
//System.out.println(" init");
upto = 1;
fst.readFirstTargetArc(getArc(0), getArc(1));
fst.readFirstTargetArc(getArc(0), getArc(1), fstReader);
} else {
// pop
//System.out.println(" check pop curArc target=" + arcs[upto].target + " label=" + arcs[upto].label + " isLast?=" + arcs[upto].isLast());
@ -103,7 +105,7 @@ abstract class FSTEnum<T> {
return;
}
}
fst.readNextArc(arcs[upto]);
fst.readNextArc(arcs[upto], fstReader);
}
pushFirst();
@ -180,7 +182,7 @@ abstract class FSTEnum<T> {
}
setCurrentLabel(arc.label);
incr();
arc = fst.readFirstTargetArc(arc, getArc(upto));
arc = fst.readFirstTargetArc(arc, getArc(upto), fstReader);
targetLabel = getTargetLabel();
continue;
} else if (low == arc.numArcs) {
@ -198,7 +200,7 @@ abstract class FSTEnum<T> {
final FST.Arc<T> prevArc = getArc(upto);
//System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
if (!prevArc.isLast()) {
fst.readNextArc(prevArc);
fst.readNextArc(prevArc, fstReader);
pushFirst();
return;
}
@ -221,7 +223,7 @@ abstract class FSTEnum<T> {
}
setCurrentLabel(arc.label);
incr();
arc = fst.readFirstTargetArc(arc, getArc(upto));
arc = fst.readFirstTargetArc(arc, getArc(upto), fstReader);
targetLabel = getTargetLabel();
} else if (arc.label > targetLabel) {
pushFirst();
@ -237,7 +239,7 @@ abstract class FSTEnum<T> {
final FST.Arc<T> prevArc = getArc(upto);
//System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
if (!prevArc.isLast()) {
fst.readNextArc(prevArc);
fst.readNextArc(prevArc, fstReader);
pushFirst();
return;
}
@ -246,7 +248,7 @@ abstract class FSTEnum<T> {
} else {
// keep scanning
//System.out.println(" next scan");
fst.readNextArc(arc);
fst.readNextArc(arc, fstReader);
}
}
}
@ -320,7 +322,7 @@ abstract class FSTEnum<T> {
}
setCurrentLabel(arc.label);
incr();
arc = fst.readFirstTargetArc(arc, getArc(upto));
arc = fst.readFirstTargetArc(arc, getArc(upto), fstReader);
targetLabel = getTargetLabel();
continue;
} else if (high == -1) {
@ -333,12 +335,12 @@ abstract class FSTEnum<T> {
while(true) {
// First, walk backwards until we find a first arc
// that's before our target label:
fst.readFirstTargetArc(getArc(upto-1), arc);
fst.readFirstTargetArc(getArc(upto-1), arc, fstReader);
if (arc.label < targetLabel) {
// Then, scan forwards to the arc just before
// the targetLabel:
while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
fst.readNextArc(arc);
while(!arc.isLast() && fst.readNextArcLabel(arc, in) < targetLabel) {
fst.readNextArc(arc, fstReader);
}
pushLast();
return;
@ -355,7 +357,7 @@ abstract class FSTEnum<T> {
arc.arcIdx = (low > high ? high : low)-1;
//System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
fst.readNextRealArc(arc, in);
assert arc.isLast() || fst.readNextArcLabel(arc) > targetLabel;
assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
assert arc.label < targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel;
pushLast();
return;
@ -370,7 +372,7 @@ abstract class FSTEnum<T> {
}
setCurrentLabel(arc.label);
incr();
arc = fst.readFirstTargetArc(arc, getArc(upto));
arc = fst.readFirstTargetArc(arc, getArc(upto), fstReader);
targetLabel = getTargetLabel();
} else if (arc.label > targetLabel) {
// TODO: if each arc could somehow read the arc just
@ -380,12 +382,12 @@ abstract class FSTEnum<T> {
while(true) {
// First, walk backwards until we find a first arc
// that's before our target label:
fst.readFirstTargetArc(getArc(upto-1), arc);
fst.readFirstTargetArc(getArc(upto-1), arc, fstReader);
if (arc.label < targetLabel) {
// Then, scan forwards to the arc just before
// the targetLabel:
while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
fst.readNextArc(arc);
while(!arc.isLast() && fst.readNextArcLabel(arc, fstReader) < targetLabel) {
fst.readNextArc(arc, fstReader);
}
pushLast();
return;
@ -399,12 +401,12 @@ abstract class FSTEnum<T> {
}
} else if (!arc.isLast()) {
//System.out.println(" check next label=" + fst.readNextArcLabel(arc) + " (" + (char) fst.readNextArcLabel(arc) + ")");
if (fst.readNextArcLabel(arc) > targetLabel) {
if (fst.readNextArcLabel(arc, fstReader) > targetLabel) {
pushLast();
return;
} else {
// keep scanning
fst.readNextArc(arc);
fst.readNextArc(arc, fstReader);
}
} else {
pushLast();
@ -441,7 +443,7 @@ abstract class FSTEnum<T> {
// short circuit
//upto--;
//upto = 0;
fst.readFirstTargetArc(arc, getArc(upto));
fst.readFirstTargetArc(arc, getArc(upto), fstReader);
//System.out.println(" no match upto=" + upto);
return false;
}
@ -493,7 +495,7 @@ abstract class FSTEnum<T> {
incr();
final FST.Arc<T> nextArc = getArc(upto);
fst.readFirstTargetArc(arc, nextArc);
fst.readFirstTargetArc(arc, nextArc, fstReader);
arc = nextArc;
}
}
@ -514,7 +516,7 @@ abstract class FSTEnum<T> {
}
incr();
arc = fst.readLastTargetArc(arc, getArc(upto));
arc = fst.readLastTargetArc(arc, getArc(upto), fstReader);
}
}

View File

@ -335,6 +335,7 @@ public final class Util {
final List<MinResult<T>> results = new ArrayList<MinResult<T>>();
final FST.BytesReader fstReader = fst.getBytesReader(0);
final T NO_OUTPUT = fst.outputs.getNoOutput();
// TODO: we could enable FST to sorting arcs by weight
@ -366,7 +367,7 @@ public final class Util {
FST.Arc<T> minArc = null;
path = new FSTPath<T>(NO_OUTPUT, fromNode, comparator);
fst.readFirstTargetArc(fromNode, path.arc);
fst.readFirstTargetArc(fromNode, path.arc, fstReader);
// Bootstrap: find the min starting arc
while (true) {
@ -383,7 +384,7 @@ public final class Util {
if (path.arc.isLast()) {
break;
}
fst.readNextArc(path.arc);
fst.readNextArc(path.arc, fstReader);
}
assert minArc != null;
@ -439,7 +440,7 @@ public final class Util {
while (true) {
//System.out.println("\n cycle path: " + path);
fst.readFirstTargetArc(path.arc, path.arc);
fst.readFirstTargetArc(path.arc, path.arc, fstReader);
// For each arc leaving this node:
boolean foundZero = false;
@ -463,7 +464,7 @@ public final class Util {
if (path.arc.isLast()) {
break;
}
fst.readNextArc(path.arc);
fst.readNextArc(path.arc, fstReader);
}
assert foundZero;
@ -598,12 +599,13 @@ public final class Util {
emitDotState(out, "initial", "point", "white", "");
final T NO_OUTPUT = fst.outputs.getNoOutput();
final FST.BytesReader r = fst.getBytesReader(0);
// final FST.Arc<T> scratchArc = new FST.Arc<T>();
{
final String stateColor;
if (fst.isExpandedTarget(startArc)) {
if (fst.isExpandedTarget(startArc, r)) {
stateColor = expandedNodeColor;
} else {
stateColor = null;
@ -626,8 +628,6 @@ public final class Util {
int level = 0;
final FST.BytesReader r = fst.getBytesReader(0);
while (!nextLevelQueue.isEmpty()) {
// we could double buffer here, but it doesn't matter probably.
//System.out.println("next level=" + level);
@ -666,7 +666,7 @@ public final class Util {
}
*/
final String stateColor;
if (fst.isExpandedTarget(arc)) {
if (fst.isExpandedTarget(arc, r)) {
stateColor = expandedNodeColor;
} else {
stateColor = null;

View File

@ -436,13 +436,14 @@ public class TestFSTs extends LuceneTestCase {
in.offset = 0;
final T NO_OUTPUT = fst.outputs.getNoOutput();
T output = NO_OUTPUT;
final FST.BytesReader fstReader = fst.getBytesReader(0);
while(true) {
// read all arcs:
fst.readFirstTargetArc(arc, arc);
fst.readFirstTargetArc(arc, arc, fstReader);
arcs.add(new FST.Arc<T>().copyFrom(arc));
while(!arc.isLast()) {
fst.readNextArc(arc);
fst.readNextArc(arc, fstReader);
arcs.add(new FST.Arc<T>().copyFrom(arc));
}
@ -1847,10 +1848,11 @@ public class TestFSTs extends LuceneTestCase {
throws IOException {
if (FST.targetHasArcs(arc)) {
int childCount = 0;
for (arc = fst.readFirstTargetArc(arc, arc);;
arc = fst.readNextArc(arc), childCount++)
FST.BytesReader fstReader = fst.getBytesReader(0);
for (arc = fst.readFirstTargetArc(arc, arc, fstReader);;
arc = fst.readNextArc(arc, fstReader), childCount++)
{
boolean expanded = fst.isExpandedTarget(arc);
boolean expanded = fst.isExpandedTarget(arc, fstReader);
int children = verifyStateAndBelow(fst, new FST.Arc<Object>().copyFrom(arc), depth + 1);
assertEquals(
@ -1982,12 +1984,13 @@ public class TestFSTs extends LuceneTestCase {
assertEquals(nothing, startArc.output);
assertEquals(nothing, startArc.nextFinalOutput);
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>());
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>(),
fst.getBytesReader(0));
assertEquals('a', arc.label);
assertEquals(17, arc.nextFinalOutput.longValue());
assertTrue(arc.isFinal());
arc = fst.readNextArc(arc);
arc = fst.readNextArc(arc, fst.getBytesReader(0));
assertEquals('b', arc.label);
assertFalse(arc.isFinal());
assertEquals(42, arc.output.longValue());

View File

@ -135,11 +135,12 @@ public class FSTCompletion {
try {
List<Arc<Object>> rootArcs = new ArrayList<Arc<Object>>();
Arc<Object> arc = automaton.getFirstArc(new Arc<Object>());
automaton.readFirstTargetArc(arc, arc);
FST.BytesReader fstReader = automaton.getBytesReader(0);
automaton.readFirstTargetArc(arc, arc, fstReader);
while (true) {
rootArcs.add(new Arc<Object>().copyFrom(arc));
if (arc.isLast()) break;
automaton.readNextArc(arc);
automaton.readNextArc(arc, fstReader);
}
Collections.reverse(rootArcs); // we want highest weights first.
@ -168,13 +169,14 @@ public class FSTCompletion {
// Get the UTF-8 bytes representation of the input key.
try {
final FST.Arc<Object> scratch = new FST.Arc<Object>();
FST.BytesReader fstReader = automaton.getBytesReader(0);
for (; rootArcIndex < rootArcs.length; rootArcIndex++) {
final FST.Arc<Object> rootArc = rootArcs[rootArcIndex];
final FST.Arc<Object> arc = scratch.copyFrom(rootArc);
// Descend into the automaton using the key as prefix.
if (descendWithPrefix(arc, utf8)) {
automaton.readFirstTargetArc(arc, arc);
automaton.readFirstTargetArc(arc, arc, fstReader);
if (arc.label == FST.END_LABEL) {
// Normalize prefix-encoded weight.
return rootArc.label;
@ -356,8 +358,8 @@ public class FSTCompletion {
}
assert output.offset == 0;
output.bytes[output.length++] = (byte) arc.label;
automaton.readFirstTargetArc(arc, arc);
FST.BytesReader fstReader = automaton.getBytesReader(0);
automaton.readFirstTargetArc(arc, arc, fstReader);
while (true) {
if (arc.label == FST.END_LABEL) {
res.add(new Completion(output, bucket));
@ -373,7 +375,7 @@ public class FSTCompletion {
if (arc.isLast()) {
break;
}
automaton.readNextArc(arc);
automaton.readNextArc(arc, fstReader);
}
return false;
}