LUCENE-8920: remove Arc setters, moving implementations into Arc, or copying data into consumers

This commit is contained in:
Michael Sokolov 2019-07-04 09:45:51 -04:00 committed by Michael Sokolov
parent 760f2dbdcb
commit fe0c042470
6 changed files with 90 additions and 104 deletions

View File

@ -1115,10 +1115,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
}
}
if (found) {
// Keep recursing
arc.arcIdx(mid - 1);
} else {
if (found == false) {
result.setLength(bestUpto);
InputOutput io = new InputOutput();
io.input = result.get();
@ -1127,9 +1124,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
return io;
}
fr.index.readNextRealArc(arc, fstReader);
// Recurse on this arc:
fr.index.readArcByIndex(arc, fstReader, mid);
result.setIntAt(upto++, arc.label());
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());

View File

@ -65,7 +65,7 @@ import org.apache.lucene.util.fst.Util;
* FST-based terms dictionary reader.
*
* The FST index maps each term and its ord, and during seek
* the ord is used fetch metadata from a single block.
* the ord is used to fetch metadata from a single block.
* The term dictionary is fully memory resident.
*
* @lucene.experimental
@ -305,7 +305,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
// Only wraps common operations for PBF interact
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
/* Current term's ord, starts from 0 */
long ord;
@ -563,6 +563,8 @@ public class FSTOrdTermsReader extends FieldsProducer {
/* fst stats */
FST.Arc<Long> arc;
Long output;
/* automaton stats */
int state;
@ -620,9 +622,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
@Override
void decodeStats() throws IOException {
final FST.Arc<Long> arc = topFrame().arc;
assert arc.nextFinalOutput() == fstOutputs.getNoOutput();
ord = arc.output();
ord = topFrame().output;
super.decodeStats();
}
@ -704,8 +704,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
/** Virtual frame, never pop */
Frame loadVirtualFrame(Frame frame) {
frame.arc.output(fstOutputs.getNoOutput());
frame.arc.nextFinalOutput(fstOutputs.getNoOutput());
frame.output = fstOutputs.getNoOutput();
frame.state = -1;
return frame;
}
@ -713,6 +712,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
/** Load frame for start arc(node) on fst */
Frame loadFirstFrame(Frame frame) {
frame.arc = fst.getFirstArc(frame.arc);
frame.output = frame.arc.output();
frame.state = 0;
return frame;
}
@ -724,6 +724,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
frame.arc = fst.readFirstRealTargetArc(top.arc.target(), frame.arc, fstReader);
frame.state = fsa.step(top.state, frame.arc.label());
frame.output = frame.arc.output();
//if (TEST) System.out.println(" loadExpand frame="+frame);
if (frame.state == -1) {
return loadNextFrame(top, frame);
@ -738,6 +739,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
while (!frame.arc.isLast()) {
frame.arc = fst.readNextRealArc(frame.arc, fstReader);
frame.output = frame.arc.output();
frame.state = fsa.step(top.state, frame.arc.label());
if (frame.state != -1) {
break;
@ -763,6 +765,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
if (frame.state == -1) {
return loadNextFrame(top, frame);
}
frame.output = arc.output();
return frame;
}
@ -781,7 +784,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
void pushFrame(Frame frame) {
final FST.Arc<Long> arc = frame.arc;
arc.output(fstOutputs.add(topFrame().arc.output(), arc.output()));
frame.output = fstOutputs.add(topFrame().output, frame.output);
term = grow(arc.label());
level++;
assert frame == stack[level];

View File

@ -438,6 +438,8 @@ public class FSTTermsReader extends FieldsProducer {
/* fst stats */
FST.Arc<FSTTermOutputs.TermData> fstArc;
FSTTermOutputs.TermData output;
/* automaton stats */
int fsaState;
@ -464,11 +466,9 @@ public class FSTTermsReader extends FieldsProducer {
this.stack[i] = new Frame();
}
Frame frame;
frame = loadVirtualFrame(newFrame());
loadVirtualFrame(newFrame());
this.level++;
frame = loadFirstFrame(newFrame());
pushFrame(frame);
pushFrame(loadFirstFrame(newFrame()));
this.meta = null;
this.metaUpto = 1;
@ -502,18 +502,18 @@ public class FSTTermsReader extends FieldsProducer {
/** Lazily accumulate meta data, when we got a accepted term */
void loadMetaData() {
FST.Arc<FSTTermOutputs.TermData> last, next;
last = stack[metaUpto].fstArc;
Frame last, next;
last = stack[metaUpto];
while (metaUpto != level) {
metaUpto++;
next = stack[metaUpto].fstArc;
next.output(fstOutputs.add(next.output(), last.output()));
next = stack[metaUpto];
next.output = fstOutputs.add(next.output, last.output);
last = next;
}
if (last.isFinal()) {
meta = fstOutputs.add(last.output(), last.nextFinalOutput());
if (last.fstArc.isFinal()) {
meta = fstOutputs.add(last.output, last.fstArc.nextFinalOutput());
} else {
meta = last.output();
meta = last.output;
}
state.docFreq = meta.docFreq;
state.totalTermFreq = meta.totalTermFreq;
@ -604,8 +604,7 @@ public class FSTTermsReader extends FieldsProducer {
/** Virtual frame, never pop */
Frame loadVirtualFrame(Frame frame) {
frame.fstArc.output(fstOutputs.getNoOutput());
frame.fstArc.nextFinalOutput(fstOutputs.getNoOutput());
frame.output = fstOutputs.getNoOutput();
frame.fsaState = -1;
return frame;
}
@ -613,6 +612,7 @@ public class FSTTermsReader extends FieldsProducer {
/** Load frame for start arc(node) on fst */
Frame loadFirstFrame(Frame frame) throws IOException {
frame.fstArc = fst.getFirstArc(frame.fstArc);
frame.output = frame.fstArc.output();
frame.fsaState = 0;
return frame;
}
@ -628,6 +628,7 @@ public class FSTTermsReader extends FieldsProducer {
if (frame.fsaState == -1) {
return loadNextFrame(top, frame);
}
frame.output = frame.fstArc.output();
return frame;
}
@ -647,6 +648,7 @@ public class FSTTermsReader extends FieldsProducer {
if (frame.fsaState == -1) {
return null;
}
frame.output = frame.fstArc.output();
return frame;
}
@ -663,6 +665,7 @@ public class FSTTermsReader extends FieldsProducer {
if (frame.fsaState == -1) {
return loadNextFrame(top, frame);
}
frame.output = frame.fstArc.output();
return frame;
}

View File

@ -186,7 +186,7 @@ public final class FST<T> implements Accountable {
}
boolean flag(int flag) {
return FST.flag(flags(), flag);
return FST.flag(flags, flag);
}
public boolean isLast() {
@ -230,18 +230,10 @@ public final class FST<T> implements Accountable {
return label;
}
public void label(int label) {
this.label = label;
}
public T output() {
return output;
}
public void output(T output) {
this.output = output;
}
/** To node (ord or address) */
public long target() {
return target;
@ -251,30 +243,14 @@ public final class FST<T> implements Accountable {
return flags;
}
public void flags(byte flags) {
this.flags = flags;
}
public T nextFinalOutput() {
return nextFinalOutput;
}
public void nextFinalOutput(T output) {
nextFinalOutput = output;
}
long nextArc() {
return nextArc;
}
/**
* Set the position of the next arc to read
* @param nextArc the position to set
*/
public void nextArc(long nextArc) {
this.nextArc = nextArc;
}
/** Where the first arc in the array starts; only valid if
* bytesPerArc != 0 */
public long posArcsStart() {
@ -298,14 +274,6 @@ public final class FST<T> implements Accountable {
return arcIdx;
}
/**
* Set the arcIdx
* @param idx the value to set
*/
public void arcIdx(int idx) {
arcIdx = idx;
}
/** How many arc, if bytesPerArc == 0. Otherwise, the size of the arc array. If the array is
* direct, this may include holes. Otherwise it is also how many arcs are in the array */
public int numArcs() {
@ -818,7 +786,7 @@ public final class FST<T> implements Accountable {
arc.flags = BIT_FINAL_ARC | BIT_LAST_ARC;
arc.nextFinalOutput = emptyOutput;
if (emptyOutput != NO_OUTPUT) {
arc.flags |= BIT_ARC_HAS_FINAL_OUTPUT;
arc.flags = (byte) (arc.flags() | BIT_ARC_HAS_FINAL_OUTPUT);
}
} else {
arc.flags = BIT_LAST_ARC;
@ -1033,6 +1001,27 @@ public final class FST<T> implements Accountable {
return readLabel(in);
}
public Arc<T> readArcAtPosition(Arc<T> arc, final BytesReader in, long pos) throws IOException {
in.setPosition(pos);
arc.flags = in.readByte();
arc.nextArc = pos;
while (flag(arc.flags(), BIT_MISSING_ARC)) {
// skip empty arcs
arc.nextArc -= arc.bytesPerArc();
in.skipBytes(arc.bytesPerArc() - 1);
arc.flags = in.readByte();
}
return readArc(arc, in);
}
public Arc<T> readArcByIndex(Arc<T> arc, final BytesReader in, int idx) throws IOException {
arc.arcIdx = idx;
assert arc.arcIdx() < arc.numArcs();
in.setPosition(arc.posArcsStart() - arc.arcIdx() * arc.bytesPerArc());
arc.flags = in.readByte();
return readArc(arc, in);
}
/** Never returns null, but you should never call this if
* arc.isLast() is true. */
public Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
@ -1064,7 +1053,10 @@ public final class FST<T> implements Accountable {
in.setPosition(arc.nextArc());
arc.flags = in.readByte();
}
return readArc(arc, in);
}
private Arc<T> readArc(Arc<T> arc, BytesReader in) throws IOException {
arc.label = readLabel(in);
if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
@ -1118,6 +1110,23 @@ public final class FST<T> implements Accountable {
return arc;
}
static <T> Arc<T> readEndArc(Arc<T> follow, Arc<T> arc) {
if (follow.isFinal()) {
if (follow.target() <= 0) {
arc.flags = FST.BIT_LAST_ARC;
} else {
arc.flags = 0;
// NOTE: nextArc is a node (not an address!) in this case:
arc.nextArc = follow.target();
}
arc.output = follow.nextFinalOutput();
arc.label = FST.END_LABEL;
return arc;
} else {
return null;
}
}
// LUCENE-5152: called only from asserts, to validate that the
// non-cached arc lookup would produce the same result, to
// catch callers that illegally modify shared structures with

View File

@ -161,8 +161,7 @@ abstract class FSTEnum<T> {
int arcOffset = targetLabel - firstLabel;
if (arcOffset >= arc.numArcs()) {
// target is beyond the last arc
arc.nextArc(arc.posArcsStart() - (arc.numArcs() - 1) * arc.bytesPerArc());
fst.readNextRealArc(arc, in);
fst.readArcAtPosition(arc, in, arc.posArcsStart() - (arc.numArcs() - 1) * arc.bytesPerArc());
assert arc.isLast();
// Dead end (target is after the last arc);
// rollback to last fork then push
@ -182,12 +181,13 @@ abstract class FSTEnum<T> {
}
} else {
// TODO: if firstLabel == targetLabel
long pos;
if (arcOffset >= 0) {
arc.nextArc(arc.posArcsStart() - (arc.bytesPerArc() * arcOffset));
pos = arc.posArcsStart() - (arc.bytesPerArc() * arcOffset);
} else {
arc.nextArc(arc.posArcsStart());
pos = arc.posArcsStart();
}
fst.readNextRealArc(arc, in);
fst.readArcAtPosition(arc, in, pos);
if (arc.label() == targetLabel) {
// found -- copy pasta from below
output[upto] = fst.outputs.add(output[upto-1], arc.output());
@ -234,8 +234,7 @@ abstract class FSTEnum<T> {
// the outer else clause):
if (found) {
// Match
arc.arcIdx(mid - 1);
fst.readNextRealArc(arc, in);
fst.readArcByIndex(arc, in, mid);
assert arc.arcIdx() == mid;
assert arc.label() == targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + mid;
output[upto] = fst.outputs.add(output[upto-1], arc.output());
@ -247,8 +246,7 @@ abstract class FSTEnum<T> {
return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
} else if (low == arc.numArcs()) {
// Dead end
arc.arcIdx(arc.numArcs() - 2);
fst.readNextRealArc(arc, in);
fst.readArcByIndex(arc, in, arc.numArcs() - 1);
assert arc.isLast();
// Dead end (target is after the last arc);
// rollback to last fork then push
@ -267,8 +265,7 @@ abstract class FSTEnum<T> {
upto--;
}
} else {
arc.arcIdx(low - 1);
fst.readNextRealArc(arc, in);
fst.readArcByIndex(arc, in, low);
assert arc.label() > targetLabel;
pushFirst();
return null;
@ -386,15 +383,13 @@ abstract class FSTEnum<T> {
}
} else {
if (targetOffset >= arc.numArcs()) {
arc.nextArc(arc.posArcsStart() - arc.bytesPerArc() * (arc.numArcs() - 1));
fst.readNextRealArc(arc, in);
fst.readArcAtPosition(arc, in, arc.posArcsStart() - arc.bytesPerArc() * (arc.numArcs() - 1));
assert arc.isLast();
assert arc.label() < targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel;
pushLast();
return null;
}
arc.nextArc(arc.posArcsStart() - arc.bytesPerArc() * targetOffset);
fst.readNextRealArc(arc, in);
fst.readArcAtPosition(arc, in, arc.posArcsStart() - arc.bytesPerArc() * targetOffset);
if (arc.label() == targetLabel) {
// found -- copy pasta from below
output[upto] = fst.outputs.add(output[upto-1], arc.output());
@ -408,8 +403,7 @@ abstract class FSTEnum<T> {
// Scan backwards to find a floor arc that is not missing
for (long arcOffset = arc.posArcsStart() - targetOffset * arc.bytesPerArc(); arcOffset <= arc.posArcsStart(); arcOffset += arc.bytesPerArc()) {
// TODO: we can do better here by skipping missing arcs
arc.nextArc(arcOffset);
fst.readNextRealArc(arc, in);
fst.readArcAtPosition(arc, in, arcOffset);
if (arc.label() < targetLabel) {
assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
pushLast();
@ -451,8 +445,7 @@ abstract class FSTEnum<T> {
if (found) {
// Match -- recurse
//System.out.println(" match! arcIdx=" + mid);
arc.arcIdx(mid - 1);
fst.readNextRealArc(arc, in);
fst.readArcByIndex(arc, in, mid);
assert arc.arcIdx() == mid;
assert arc.label() == targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + mid;
output[upto] = fst.outputs.add(output[upto-1], arc.output());
@ -491,8 +484,7 @@ abstract class FSTEnum<T> {
}
} else {
// There is a floor arc:
arc.arcIdx(high - 1);
fst.readNextRealArc(arc, in);
fst.readArcByIndex(arc, in, high);
assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
assert arc.label() < targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel;
pushLast();

View File

@ -191,8 +191,7 @@ public final class Util {
idx = low - 1;
}
arc.arcIdx(idx - 1);
fst.readNextRealArc(arc, in);
fst.readArcByIndex(arc, in, idx);
result.setIntAt(upto++, arc.label());
output += arc.output();
@ -934,20 +933,7 @@ public final class Util {
*/
public static <T> Arc<T> readCeilArc(int label, FST<T> fst, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
if (label == FST.END_LABEL) {
if (follow.isFinal()) {
if (follow.target() <= 0) {
arc.flags((byte) FST.BIT_LAST_ARC);
} else {
arc.flags((byte) 0);
// NOTE: nextArc is a node (not an address!) in this case:
arc.nextArc(follow.target());
}
arc.output(follow.nextFinalOutput());
arc.label(FST.END_LABEL);
return arc;
} else {
return null;
}
return FST.readEndArc(follow, arc);
}
if (!FST.targetHasArcs(follow)) {
return null;
@ -962,8 +948,7 @@ public final class Util {
} else if (offset < 0) {
return arc;
} else {
arc.nextArc(arc.posArcsStart() - offset * arc.bytesPerArc());
return fst.readNextRealArc(arc, in);
return fst.readArcAtPosition(arc, in, arc.posArcsStart() - offset * arc.bytesPerArc());
}
}
// Arcs are packed array -- use binary search to find
@ -987,16 +972,14 @@ public final class Util {
} else if (cmp > 0) {
high = mid - 1;
} else {
arc.arcIdx(mid - 1);
return fst.readNextRealArc(arc, in);
return fst.readArcByIndex(arc, in, mid);
}
}
if (low == arc.numArcs()) {
// DEAD END!
return null;
}
arc.arcIdx(high + 1);
return fst.readNextRealArc(arc, in );
return fst.readArcByIndex(arc, in , high + 1);
}
// Linear scan