mirror of https://github.com/apache/lucene.git
LUCENE-8920: remove Arc setters, moving implementations into Arc, or copying data into consumers
This commit is contained in:
parent
4836dfc8b8
commit
0f4dcde4d9
|
@ -1115,10 +1115,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
// Keep recursing
|
||||
arc.arcIdx(mid - 1);
|
||||
} else {
|
||||
if (found == false) {
|
||||
result.setLength(bestUpto);
|
||||
InputOutput io = new InputOutput();
|
||||
io.input = result.get();
|
||||
|
@ -1127,9 +1124,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
return io;
|
||||
}
|
||||
|
||||
fr.index.readNextRealArc(arc, fstReader);
|
||||
|
||||
// Recurse on this arc:
|
||||
fr.index.readArcByIndex(arc, fstReader, mid);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ import org.apache.lucene.util.fst.Util;
|
|||
* FST-based terms dictionary reader.
|
||||
*
|
||||
* The FST index maps each term and its ord, and during seek
|
||||
* the ord is used fetch metadata from a single block.
|
||||
* the ord is used to fetch metadata from a single block.
|
||||
* The term dictionary is fully memory resident.
|
||||
*
|
||||
* @lucene.experimental
|
||||
|
@ -305,7 +305,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
// Only wraps common operations for PBF interact
|
||||
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
|
||||
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
|
||||
|
||||
/* Current term's ord, starts from 0 */
|
||||
long ord;
|
||||
|
@ -563,6 +563,8 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
/* fst stats */
|
||||
FST.Arc<Long> arc;
|
||||
|
||||
Long output;
|
||||
|
||||
/* automaton stats */
|
||||
int state;
|
||||
|
||||
|
@ -620,9 +622,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
void decodeStats() throws IOException {
|
||||
final FST.Arc<Long> arc = topFrame().arc;
|
||||
assert arc.nextFinalOutput() == fstOutputs.getNoOutput();
|
||||
ord = arc.output();
|
||||
ord = topFrame().output;
|
||||
super.decodeStats();
|
||||
}
|
||||
|
||||
|
@ -704,8 +704,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
|
||||
/** Virtual frame, never pop */
|
||||
Frame loadVirtualFrame(Frame frame) {
|
||||
frame.arc.output(fstOutputs.getNoOutput());
|
||||
frame.arc.nextFinalOutput(fstOutputs.getNoOutput());
|
||||
frame.output = fstOutputs.getNoOutput();
|
||||
frame.state = -1;
|
||||
return frame;
|
||||
}
|
||||
|
@ -713,6 +712,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
/** Load frame for start arc(node) on fst */
|
||||
Frame loadFirstFrame(Frame frame) {
|
||||
frame.arc = fst.getFirstArc(frame.arc);
|
||||
frame.output = frame.arc.output();
|
||||
frame.state = 0;
|
||||
return frame;
|
||||
}
|
||||
|
@ -724,6 +724,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
frame.arc = fst.readFirstRealTargetArc(top.arc.target(), frame.arc, fstReader);
|
||||
frame.state = fsa.step(top.state, frame.arc.label());
|
||||
frame.output = frame.arc.output();
|
||||
//if (TEST) System.out.println(" loadExpand frame="+frame);
|
||||
if (frame.state == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
|
@ -738,6 +739,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
while (!frame.arc.isLast()) {
|
||||
frame.arc = fst.readNextRealArc(frame.arc, fstReader);
|
||||
frame.output = frame.arc.output();
|
||||
frame.state = fsa.step(top.state, frame.arc.label());
|
||||
if (frame.state != -1) {
|
||||
break;
|
||||
|
@ -763,6 +765,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
if (frame.state == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
}
|
||||
frame.output = arc.output();
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
@ -781,7 +784,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
|
||||
void pushFrame(Frame frame) {
|
||||
final FST.Arc<Long> arc = frame.arc;
|
||||
arc.output(fstOutputs.add(topFrame().arc.output(), arc.output()));
|
||||
frame.output = fstOutputs.add(topFrame().output, frame.output);
|
||||
term = grow(arc.label());
|
||||
level++;
|
||||
assert frame == stack[level];
|
||||
|
|
|
@ -438,6 +438,8 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
/* fst stats */
|
||||
FST.Arc<FSTTermOutputs.TermData> fstArc;
|
||||
|
||||
FSTTermOutputs.TermData output;
|
||||
|
||||
/* automaton stats */
|
||||
int fsaState;
|
||||
|
||||
|
@ -464,11 +466,9 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
this.stack[i] = new Frame();
|
||||
}
|
||||
|
||||
Frame frame;
|
||||
frame = loadVirtualFrame(newFrame());
|
||||
loadVirtualFrame(newFrame());
|
||||
this.level++;
|
||||
frame = loadFirstFrame(newFrame());
|
||||
pushFrame(frame);
|
||||
pushFrame(loadFirstFrame(newFrame()));
|
||||
|
||||
this.meta = null;
|
||||
this.metaUpto = 1;
|
||||
|
@ -502,18 +502,18 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
|
||||
/** Lazily accumulate meta data, when we got a accepted term */
|
||||
void loadMetaData() {
|
||||
FST.Arc<FSTTermOutputs.TermData> last, next;
|
||||
last = stack[metaUpto].fstArc;
|
||||
Frame last, next;
|
||||
last = stack[metaUpto];
|
||||
while (metaUpto != level) {
|
||||
metaUpto++;
|
||||
next = stack[metaUpto].fstArc;
|
||||
next.output(fstOutputs.add(next.output(), last.output()));
|
||||
next = stack[metaUpto];
|
||||
next.output = fstOutputs.add(next.output, last.output);
|
||||
last = next;
|
||||
}
|
||||
if (last.isFinal()) {
|
||||
meta = fstOutputs.add(last.output(), last.nextFinalOutput());
|
||||
if (last.fstArc.isFinal()) {
|
||||
meta = fstOutputs.add(last.output, last.fstArc.nextFinalOutput());
|
||||
} else {
|
||||
meta = last.output();
|
||||
meta = last.output;
|
||||
}
|
||||
state.docFreq = meta.docFreq;
|
||||
state.totalTermFreq = meta.totalTermFreq;
|
||||
|
@ -604,8 +604,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
|
||||
/** Virtual frame, never pop */
|
||||
Frame loadVirtualFrame(Frame frame) {
|
||||
frame.fstArc.output(fstOutputs.getNoOutput());
|
||||
frame.fstArc.nextFinalOutput(fstOutputs.getNoOutput());
|
||||
frame.output = fstOutputs.getNoOutput();
|
||||
frame.fsaState = -1;
|
||||
return frame;
|
||||
}
|
||||
|
@ -613,6 +612,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
/** Load frame for start arc(node) on fst */
|
||||
Frame loadFirstFrame(Frame frame) throws IOException {
|
||||
frame.fstArc = fst.getFirstArc(frame.fstArc);
|
||||
frame.output = frame.fstArc.output();
|
||||
frame.fsaState = 0;
|
||||
return frame;
|
||||
}
|
||||
|
@ -628,6 +628,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
if (frame.fsaState == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
}
|
||||
frame.output = frame.fstArc.output();
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
@ -647,6 +648,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
if (frame.fsaState == -1) {
|
||||
return null;
|
||||
}
|
||||
frame.output = frame.fstArc.output();
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
@ -663,6 +665,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
if (frame.fsaState == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
}
|
||||
frame.output = frame.fstArc.output();
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
|
|
@ -184,7 +184,7 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
|
||||
boolean flag(int flag) {
|
||||
return FST.flag(flags(), flag);
|
||||
return FST.flag(flags, flag);
|
||||
}
|
||||
|
||||
public boolean isLast() {
|
||||
|
@ -228,18 +228,10 @@ public final class FST<T> implements Accountable {
|
|||
return label;
|
||||
}
|
||||
|
||||
public void label(int label) {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
public T output() {
|
||||
return output;
|
||||
}
|
||||
|
||||
public void output(T output) {
|
||||
this.output = output;
|
||||
}
|
||||
|
||||
/** To node (ord or address) */
|
||||
public long target() {
|
||||
return target;
|
||||
|
@ -249,30 +241,14 @@ public final class FST<T> implements Accountable {
|
|||
return flags;
|
||||
}
|
||||
|
||||
public void flags(byte flags) {
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
public T nextFinalOutput() {
|
||||
return nextFinalOutput;
|
||||
}
|
||||
|
||||
public void nextFinalOutput(T output) {
|
||||
nextFinalOutput = output;
|
||||
}
|
||||
|
||||
long nextArc() {
|
||||
return nextArc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the position of the next arc to read
|
||||
* @param nextArc the position to set
|
||||
*/
|
||||
public void nextArc(long nextArc) {
|
||||
this.nextArc = nextArc;
|
||||
}
|
||||
|
||||
/** Where the first arc in the array starts; only valid if
|
||||
* bytesPerArc != 0 */
|
||||
public long posArcsStart() {
|
||||
|
@ -296,14 +272,6 @@ public final class FST<T> implements Accountable {
|
|||
return arcIdx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the arcIdx
|
||||
* @param idx the value to set
|
||||
*/
|
||||
public void arcIdx(int idx) {
|
||||
arcIdx = idx;
|
||||
}
|
||||
|
||||
/** How many arc, if bytesPerArc == 0. Otherwise, the size of the arc array. If the array is
|
||||
* direct, this may include holes. Otherwise it is also how many arcs are in the array */
|
||||
public int numArcs() {
|
||||
|
@ -765,7 +733,7 @@ public final class FST<T> implements Accountable {
|
|||
arc.flags = BIT_FINAL_ARC | BIT_LAST_ARC;
|
||||
arc.nextFinalOutput = emptyOutput;
|
||||
if (emptyOutput != NO_OUTPUT) {
|
||||
arc.flags |= BIT_ARC_HAS_FINAL_OUTPUT;
|
||||
arc.flags = (byte) (arc.flags() | BIT_ARC_HAS_FINAL_OUTPUT);
|
||||
}
|
||||
} else {
|
||||
arc.flags = BIT_LAST_ARC;
|
||||
|
@ -975,6 +943,27 @@ public final class FST<T> implements Accountable {
|
|||
return readLabel(in);
|
||||
}
|
||||
|
||||
public Arc<T> readArcAtPosition(Arc<T> arc, final BytesReader in, long pos) throws IOException {
|
||||
in.setPosition(pos);
|
||||
arc.flags = in.readByte();
|
||||
arc.nextArc = pos;
|
||||
while (flag(arc.flags(), BIT_MISSING_ARC)) {
|
||||
// skip empty arcs
|
||||
arc.nextArc -= arc.bytesPerArc();
|
||||
in.skipBytes(arc.bytesPerArc() - 1);
|
||||
arc.flags = in.readByte();
|
||||
}
|
||||
return readArc(arc, in);
|
||||
}
|
||||
|
||||
public Arc<T> readArcByIndex(Arc<T> arc, final BytesReader in, int idx) throws IOException {
|
||||
arc.arcIdx = idx;
|
||||
assert arc.arcIdx() < arc.numArcs();
|
||||
in.setPosition(arc.posArcsStart() - arc.arcIdx() * arc.bytesPerArc());
|
||||
arc.flags = in.readByte();
|
||||
return readArc(arc, in);
|
||||
}
|
||||
|
||||
/** Never returns null, but you should never call this if
|
||||
* arc.isLast() is true. */
|
||||
public Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
|
||||
|
@ -1006,7 +995,10 @@ public final class FST<T> implements Accountable {
|
|||
in.setPosition(arc.nextArc());
|
||||
arc.flags = in.readByte();
|
||||
}
|
||||
return readArc(arc, in);
|
||||
}
|
||||
|
||||
private Arc<T> readArc(Arc<T> arc, BytesReader in) throws IOException {
|
||||
arc.label = readLabel(in);
|
||||
|
||||
if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
|
||||
|
@ -1060,6 +1052,23 @@ public final class FST<T> implements Accountable {
|
|||
return arc;
|
||||
}
|
||||
|
||||
static <T> Arc<T> readEndArc(Arc<T> follow, Arc<T> arc) {
|
||||
if (follow.isFinal()) {
|
||||
if (follow.target() <= 0) {
|
||||
arc.flags = FST.BIT_LAST_ARC;
|
||||
} else {
|
||||
arc.flags = 0;
|
||||
// NOTE: nextArc is a node (not an address!) in this case:
|
||||
arc.nextArc = follow.target();
|
||||
}
|
||||
arc.output = follow.nextFinalOutput();
|
||||
arc.label = FST.END_LABEL;
|
||||
return arc;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// LUCENE-5152: called only from asserts, to validate that the
|
||||
// non-cached arc lookup would produce the same result, to
|
||||
// catch callers that illegally modify shared structures with
|
||||
|
|
|
@ -161,8 +161,7 @@ abstract class FSTEnum<T> {
|
|||
int arcOffset = targetLabel - firstLabel;
|
||||
if (arcOffset >= arc.numArcs()) {
|
||||
// target is beyond the last arc
|
||||
arc.nextArc(arc.posArcsStart() - (arc.numArcs() - 1) * arc.bytesPerArc());
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcAtPosition(arc, in, arc.posArcsStart() - (arc.numArcs() - 1) * arc.bytesPerArc());
|
||||
assert arc.isLast();
|
||||
// Dead end (target is after the last arc);
|
||||
// rollback to last fork then push
|
||||
|
@ -182,12 +181,13 @@ abstract class FSTEnum<T> {
|
|||
}
|
||||
} else {
|
||||
// TODO: if firstLabel == targetLabel
|
||||
long pos;
|
||||
if (arcOffset >= 0) {
|
||||
arc.nextArc(arc.posArcsStart() - (arc.bytesPerArc() * arcOffset));
|
||||
pos = arc.posArcsStart() - (arc.bytesPerArc() * arcOffset);
|
||||
} else {
|
||||
arc.nextArc(arc.posArcsStart());
|
||||
pos = arc.posArcsStart();
|
||||
}
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcAtPosition(arc, in, pos);
|
||||
if (arc.label() == targetLabel) {
|
||||
// found -- copy pasta from below
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
|
@ -234,8 +234,7 @@ abstract class FSTEnum<T> {
|
|||
// the outer else clause):
|
||||
if (found) {
|
||||
// Match
|
||||
arc.arcIdx(mid - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcByIndex(arc, in, mid);
|
||||
assert arc.arcIdx() == mid;
|
||||
assert arc.label() == targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + mid;
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
|
@ -247,8 +246,7 @@ abstract class FSTEnum<T> {
|
|||
return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
|
||||
} else if (low == arc.numArcs()) {
|
||||
// Dead end
|
||||
arc.arcIdx(arc.numArcs() - 2);
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcByIndex(arc, in, arc.numArcs() - 1);
|
||||
assert arc.isLast();
|
||||
// Dead end (target is after the last arc);
|
||||
// rollback to last fork then push
|
||||
|
@ -267,8 +265,7 @@ abstract class FSTEnum<T> {
|
|||
upto--;
|
||||
}
|
||||
} else {
|
||||
arc.arcIdx(low - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcByIndex(arc, in, low);
|
||||
assert arc.label() > targetLabel;
|
||||
pushFirst();
|
||||
return null;
|
||||
|
@ -386,15 +383,13 @@ abstract class FSTEnum<T> {
|
|||
}
|
||||
} else {
|
||||
if (targetOffset >= arc.numArcs()) {
|
||||
arc.nextArc(arc.posArcsStart() - arc.bytesPerArc() * (arc.numArcs() - 1));
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcAtPosition(arc, in, arc.posArcsStart() - arc.bytesPerArc() * (arc.numArcs() - 1));
|
||||
assert arc.isLast();
|
||||
assert arc.label() < targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel;
|
||||
pushLast();
|
||||
return null;
|
||||
}
|
||||
arc.nextArc(arc.posArcsStart() - arc.bytesPerArc() * targetOffset);
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcAtPosition(arc, in, arc.posArcsStart() - arc.bytesPerArc() * targetOffset);
|
||||
if (arc.label() == targetLabel) {
|
||||
// found -- copy pasta from below
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
|
@ -408,8 +403,7 @@ abstract class FSTEnum<T> {
|
|||
// Scan backwards to find a floor arc that is not missing
|
||||
for (long arcOffset = arc.posArcsStart() - targetOffset * arc.bytesPerArc(); arcOffset <= arc.posArcsStart(); arcOffset += arc.bytesPerArc()) {
|
||||
// TODO: we can do better here by skipping missing arcs
|
||||
arc.nextArc(arcOffset);
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcAtPosition(arc, in, arcOffset);
|
||||
if (arc.label() < targetLabel) {
|
||||
assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
|
||||
pushLast();
|
||||
|
@ -451,8 +445,7 @@ abstract class FSTEnum<T> {
|
|||
if (found) {
|
||||
// Match -- recurse
|
||||
//System.out.println(" match! arcIdx=" + mid);
|
||||
arc.arcIdx(mid - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcByIndex(arc, in, mid);
|
||||
assert arc.arcIdx() == mid;
|
||||
assert arc.label() == targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + mid;
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
|
@ -491,8 +484,7 @@ abstract class FSTEnum<T> {
|
|||
}
|
||||
} else {
|
||||
// There is a floor arc:
|
||||
arc.arcIdx(high - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcByIndex(arc, in, high);
|
||||
assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
|
||||
assert arc.label() < targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel;
|
||||
pushLast();
|
||||
|
|
|
@ -191,8 +191,7 @@ public final class Util {
|
|||
idx = low - 1;
|
||||
}
|
||||
|
||||
arc.arcIdx(idx - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
fst.readArcByIndex(arc, in, idx);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
output += arc.output();
|
||||
|
||||
|
@ -934,20 +933,7 @@ public final class Util {
|
|||
*/
|
||||
public static <T> Arc<T> readCeilArc(int label, FST<T> fst, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
|
||||
if (label == FST.END_LABEL) {
|
||||
if (follow.isFinal()) {
|
||||
if (follow.target() <= 0) {
|
||||
arc.flags((byte) FST.BIT_LAST_ARC);
|
||||
} else {
|
||||
arc.flags((byte) 0);
|
||||
// NOTE: nextArc is a node (not an address!) in this case:
|
||||
arc.nextArc(follow.target());
|
||||
}
|
||||
arc.output(follow.nextFinalOutput());
|
||||
arc.label(FST.END_LABEL);
|
||||
return arc;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
return FST.readEndArc(follow, arc);
|
||||
}
|
||||
if (!FST.targetHasArcs(follow)) {
|
||||
return null;
|
||||
|
@ -962,8 +948,7 @@ public final class Util {
|
|||
} else if (offset < 0) {
|
||||
return arc;
|
||||
} else {
|
||||
arc.nextArc(arc.posArcsStart() - offset * arc.bytesPerArc());
|
||||
return fst.readNextRealArc(arc, in);
|
||||
return fst.readArcAtPosition(arc, in, arc.posArcsStart() - offset * arc.bytesPerArc());
|
||||
}
|
||||
}
|
||||
// Arcs are packed array -- use binary search to find
|
||||
|
@ -987,16 +972,14 @@ public final class Util {
|
|||
} else if (cmp > 0) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
arc.arcIdx(mid - 1);
|
||||
return fst.readNextRealArc(arc, in);
|
||||
return fst.readArcByIndex(arc, in, mid);
|
||||
}
|
||||
}
|
||||
if (low == arc.numArcs()) {
|
||||
// DEAD END!
|
||||
return null;
|
||||
}
|
||||
arc.arcIdx(high + 1);
|
||||
return fst.readNextRealArc(arc, in );
|
||||
return fst.readArcByIndex(arc, in , high + 1);
|
||||
}
|
||||
|
||||
// Linear scan
|
||||
|
|
Loading…
Reference in New Issue