mirror of https://github.com/apache/lucene.git
LUCENE-8920: encapsulate FST.Arc data
This commit is contained in:
parent
16ec64f7b2
commit
760f2dbdcb
|
@ -106,17 +106,17 @@ public class MappingCharFilter extends BaseCharFilter {
|
|||
// Fast pass for single character match:
|
||||
assert arc.isFinal();
|
||||
lastMatchLen = 1;
|
||||
lastMatch = arc.output;
|
||||
lastMatch = arc.output();
|
||||
} else {
|
||||
int lookahead = 0;
|
||||
CharsRef output = arc.output;
|
||||
CharsRef output = arc.output();
|
||||
while (true) {
|
||||
lookahead++;
|
||||
|
||||
if (arc.isFinal()) {
|
||||
// Match! (to node is final)
|
||||
lastMatchLen = lookahead;
|
||||
lastMatch = outputs.add(output, arc.nextFinalOutput);
|
||||
lastMatch = outputs.add(output, arc.nextFinalOutput());
|
||||
// Greedy: keep searching to see if there's a
|
||||
// longer match...
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ public class MappingCharFilter extends BaseCharFilter {
|
|||
// Dead end
|
||||
break;
|
||||
}
|
||||
output = outputs.add(output, arc.output);
|
||||
output = outputs.add(output, arc.output());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,10 +50,10 @@ public class NormalizeCharMap {
|
|||
final FST.BytesReader fstReader = map.getBytesReader();
|
||||
map.getFirstArc(scratchArc);
|
||||
if (FST.targetHasArcs(scratchArc)) {
|
||||
map.readFirstRealTargetArc(scratchArc.target, scratchArc, fstReader);
|
||||
map.readFirstRealTargetArc(scratchArc.target(), scratchArc, fstReader);
|
||||
while(true) {
|
||||
assert scratchArc.label != FST.END_LABEL;
|
||||
cachedRootArcs.put(Character.valueOf((char) scratchArc.label), new FST.Arc<CharsRef>().copyFrom(scratchArc));
|
||||
assert scratchArc.label() != FST.END_LABEL;
|
||||
cachedRootArcs.put(Character.valueOf((char) scratchArc.label()), new FST.Arc<CharsRef>().copyFrom(scratchArc));
|
||||
if (scratchArc.isLast()) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -280,14 +280,14 @@ public class Dictionary {
|
|||
cp = Character.codePointAt(word, i, l);
|
||||
if (fst.findTargetArc(cp, arc, arc, bytesReader) == null) {
|
||||
return null;
|
||||
} else if (arc.output != NO_OUTPUT) {
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
} else if (arc.output() != NO_OUTPUT) {
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
}
|
||||
}
|
||||
if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null) {
|
||||
return null;
|
||||
} else if (arc.output != NO_OUTPUT) {
|
||||
return fst.outputs.add(output, arc.output);
|
||||
} else if (arc.output() != NO_OUTPUT) {
|
||||
return fst.outputs.add(output, arc.output());
|
||||
} else {
|
||||
return output;
|
||||
}
|
||||
|
@ -1228,10 +1228,10 @@ public class Dictionary {
|
|||
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
|
||||
break;
|
||||
} else {
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
|
||||
longestOutput = fst.outputs.add(output, arc.nextFinalOutput());
|
||||
longestMatch = j;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -311,15 +311,15 @@ final class Stemmer {
|
|||
int ch = word[i-1];
|
||||
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
|
||||
break;
|
||||
} else if (arc.output != NO_OUTPUT) {
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
} else if (arc.output() != NO_OUTPUT) {
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
}
|
||||
}
|
||||
IntsRef prefixes = null;
|
||||
if (!arc.isFinal()) {
|
||||
continue;
|
||||
} else {
|
||||
prefixes = fst.outputs.add(output, arc.nextFinalOutput);
|
||||
prefixes = fst.outputs.add(output, arc.nextFinalOutput());
|
||||
}
|
||||
|
||||
for (int j = 0; j < prefixes.length; j++) {
|
||||
|
@ -395,15 +395,15 @@ final class Stemmer {
|
|||
int ch = word[i];
|
||||
if (fst.findTargetArc(ch, arc, arc, bytesReader) == null) {
|
||||
break;
|
||||
} else if (arc.output != NO_OUTPUT) {
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
} else if (arc.output() != NO_OUTPUT) {
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
}
|
||||
}
|
||||
IntsRef suffixes = null;
|
||||
if (!arc.isFinal()) {
|
||||
continue;
|
||||
} else {
|
||||
suffixes = fst.outputs.add(output, arc.nextFinalOutput);
|
||||
suffixes = fst.outputs.add(output, arc.nextFinalOutput());
|
||||
}
|
||||
|
||||
for (int j = 0; j < suffixes.length; j++) {
|
||||
|
|
|
@ -132,11 +132,11 @@ public final class StemmerOverrideFilter extends TokenFilter {
|
|||
if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) {
|
||||
return null;
|
||||
}
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output());
|
||||
bufUpto += Character.charCount(codePoint);
|
||||
}
|
||||
if (scratchArc.isFinal()) {
|
||||
matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
|
||||
matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput());
|
||||
}
|
||||
return matchOutput;
|
||||
}
|
||||
|
|
|
@ -330,7 +330,7 @@ public final class SynonymFilter extends TokenFilter {
|
|||
BytesRef pendingOutput = fst.outputs.getNoOutput();
|
||||
fst.getFirstArc(scratchArc);
|
||||
|
||||
assert scratchArc.output == fst.outputs.getNoOutput();
|
||||
assert scratchArc.output() == fst.outputs.getNoOutput();
|
||||
|
||||
int tokenCount = 0;
|
||||
|
||||
|
@ -399,7 +399,7 @@ public final class SynonymFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
// Accum the output
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output());
|
||||
//System.out.println(" char=" + buffer[bufUpto] + " output=" + pendingOutput + " arc.output=" + scratchArc.output);
|
||||
bufUpto += Character.charCount(codePoint);
|
||||
}
|
||||
|
@ -407,7 +407,7 @@ public final class SynonymFilter extends TokenFilter {
|
|||
// OK, entire token matched; now see if this is a final
|
||||
// state:
|
||||
if (scratchArc.isFinal()) {
|
||||
matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
|
||||
matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput());
|
||||
matchInputLength = tokenCount;
|
||||
matchEndOffset = inputEndOffset;
|
||||
//System.out.println(" found matchLength=" + matchInputLength + " output=" + matchOutput);
|
||||
|
@ -423,7 +423,7 @@ public final class SynonymFilter extends TokenFilter {
|
|||
} else {
|
||||
// More matching is possible -- accum the output (if
|
||||
// any) of the WORD_SEP arc:
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output());
|
||||
if (nextRead == nextWrite) {
|
||||
capture();
|
||||
}
|
||||
|
|
|
@ -290,7 +290,7 @@ public final class SynonymGraphFilter extends TokenFilter {
|
|||
BytesRef pendingOutput = fst.outputs.getNoOutput();
|
||||
fst.getFirstArc(scratchArc);
|
||||
|
||||
assert scratchArc.output == fst.outputs.getNoOutput();
|
||||
assert scratchArc.output() == fst.outputs.getNoOutput();
|
||||
|
||||
// How many tokens in the current match
|
||||
int matchLength = 0;
|
||||
|
@ -360,7 +360,7 @@ public final class SynonymGraphFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
// Accum the output
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output());
|
||||
bufUpto += Character.charCount(codePoint);
|
||||
}
|
||||
|
||||
|
@ -369,7 +369,7 @@ public final class SynonymGraphFilter extends TokenFilter {
|
|||
// OK, entire token matched; now see if this is a final
|
||||
// state in the FST (a match):
|
||||
if (scratchArc.isFinal()) {
|
||||
matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
|
||||
matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput());
|
||||
matchInputLength = matchLength;
|
||||
matchEndOffset = inputEndOffset;
|
||||
//System.out.println(" ** match");
|
||||
|
@ -385,7 +385,7 @@ public final class SynonymGraphFilter extends TokenFilter {
|
|||
} else {
|
||||
// More matching is possible -- accum the output (if
|
||||
// any) of the WORD_SEP arc:
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
|
||||
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output());
|
||||
doFinalCapture = true;
|
||||
if (liveToken) {
|
||||
capture();
|
||||
|
|
|
@ -772,12 +772,12 @@ public final class JapaneseTokenizer extends Tokenizer {
|
|||
if (userFST.findTargetArc(ch, arc, arc, posAhead == posData.pos, userFSTReader) == null) {
|
||||
break;
|
||||
}
|
||||
output += arc.output.intValue();
|
||||
output += arc.output().intValue();
|
||||
if (arc.isFinal()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" USER word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1));
|
||||
}
|
||||
add(userDictionary, posData, posAhead+1, output + arc.nextFinalOutput.intValue(), Type.USER, false);
|
||||
add(userDictionary, posData, posAhead+1, output + arc.nextFinalOutput().intValue(), Type.USER, false);
|
||||
anyMatches = true;
|
||||
}
|
||||
}
|
||||
|
@ -803,7 +803,7 @@ public final class JapaneseTokenizer extends Tokenizer {
|
|||
break;
|
||||
}
|
||||
|
||||
output += arc.output.intValue();
|
||||
output += arc.output().intValue();
|
||||
|
||||
// Optimization: for known words that are too-long
|
||||
// (compound), we should pre-compute the 2nd
|
||||
|
@ -812,7 +812,7 @@ public final class JapaneseTokenizer extends Tokenizer {
|
|||
// match is found.
|
||||
|
||||
if (arc.isFinal()) {
|
||||
dictionary.lookupWordIds(output + arc.nextFinalOutput.intValue(), wordIdRef);
|
||||
dictionary.lookupWordIds(output + arc.nextFinalOutput().intValue(), wordIdRef);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" KNOWN word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1) + " " + wordIdRef.length + " wordIDs");
|
||||
}
|
||||
|
|
|
@ -162,9 +162,9 @@ public final class UserDictionary implements Dictionary {
|
|||
if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
|
||||
break; // continue to next position
|
||||
}
|
||||
output += arc.output.intValue();
|
||||
output += arc.output().intValue();
|
||||
if (arc.isFinal()) {
|
||||
final int finalOutput = output + arc.nextFinalOutput.intValue();
|
||||
final int finalOutput = output + arc.nextFinalOutput().intValue();
|
||||
result.put(startOffset-off, segmentations[finalOutput]);
|
||||
found = true;
|
||||
}
|
||||
|
|
|
@ -681,11 +681,11 @@ public final class KoreanTokenizer extends Tokenizer {
|
|||
if (userFST.findTargetArc(ch, arc, arc, posAhead == pos, userFSTReader) == null) {
|
||||
break;
|
||||
}
|
||||
output += arc.output.intValue();
|
||||
output += arc.output().intValue();
|
||||
if (arc.isFinal()) {
|
||||
maxPosAhead = posAhead;
|
||||
outputMaxPosAhead = output;
|
||||
arcFinalOutMaxPosAhead = arc.nextFinalOutput.intValue();
|
||||
arcFinalOutMaxPosAhead = arc.nextFinalOutput().intValue();
|
||||
anyMatches = true;
|
||||
}
|
||||
}
|
||||
|
@ -720,7 +720,7 @@ public final class KoreanTokenizer extends Tokenizer {
|
|||
break;
|
||||
}
|
||||
|
||||
output += arc.output.intValue();
|
||||
output += arc.output().intValue();
|
||||
|
||||
// Optimization: for known words that are too-long
|
||||
// (compound), we should pre-compute the 2nd
|
||||
|
@ -729,7 +729,7 @@ public final class KoreanTokenizer extends Tokenizer {
|
|||
// match is found.
|
||||
|
||||
if (arc.isFinal()) {
|
||||
dictionary.lookupWordIds(output + arc.nextFinalOutput.intValue(), wordIdRef);
|
||||
dictionary.lookupWordIds(output + arc.nextFinalOutput().intValue(), wordIdRef);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" KNOWN word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1) + " " + wordIdRef.length + " wordIDs");
|
||||
}
|
||||
|
|
|
@ -221,9 +221,9 @@ public final class UserDictionary implements Dictionary {
|
|||
if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
|
||||
break; // continue to next position
|
||||
}
|
||||
output += arc.output.intValue();
|
||||
output += arc.output().intValue();
|
||||
if (arc.isFinal()) {
|
||||
final int finalOutput = output + arc.nextFinalOutput.intValue();
|
||||
final int finalOutput = output + arc.nextFinalOutput().intValue();
|
||||
result.add(finalOutput);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
f.prefix = 0;
|
||||
f.setState(0);
|
||||
f.arc = arc;
|
||||
f.outputPrefix = arc.output;
|
||||
f.outputPrefix = arc.output();
|
||||
f.load(fr.rootCode);
|
||||
|
||||
// for assert:
|
||||
|
@ -168,14 +168,14 @@ final class OrdsIntersectTermsEnum extends BaseTermsEnum {
|
|||
// passed to findTargetArc
|
||||
arc = fr.index.findTargetArc(target, arc, getArc(1+idx), fstReader);
|
||||
assert arc != null;
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
idx++;
|
||||
}
|
||||
|
||||
f.arc = arc;
|
||||
f.outputPrefix = output;
|
||||
assert arc.isFinal();
|
||||
f.load(OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput));
|
||||
f.load(OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()));
|
||||
return f;
|
||||
}
|
||||
|
||||
|
|
|
@ -271,7 +271,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
arc = arcs[0];
|
||||
assert arc.isFinal();
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
targetUpto = 0;
|
||||
|
||||
OrdsSegmentTermsEnumFrame lastFrame = stack[0];
|
||||
|
@ -294,9 +294,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
break;
|
||||
}
|
||||
arc = arcs[1+targetUpto];
|
||||
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
if (arc.output != OrdsBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
if (arc.output() != OrdsBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
lastFrame = stack[1+lastFrame.ord];
|
||||
|
@ -374,19 +374,19 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// Empty string prefix must have an output (block) in the index!
|
||||
assert arc.isFinal();
|
||||
assert arc.output != null;
|
||||
assert arc.output() != null;
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" no seek state; push root frame");
|
||||
// }
|
||||
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
|
||||
currentFrame = staticFrame;
|
||||
|
||||
//term.length = 0;
|
||||
targetUpto = 0;
|
||||
currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
|
||||
}
|
||||
|
||||
positioned = true;
|
||||
|
@ -443,9 +443,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
arc = nextArc;
|
||||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != OrdsBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.output() != null;
|
||||
if (arc.output() != OrdsBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -455,7 +455,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (arc.isFinal()) {
|
||||
//if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
|
||||
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
|
@ -529,7 +529,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
arc = arcs[0];
|
||||
assert arc.isFinal();
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
targetUpto = 0;
|
||||
|
||||
OrdsSegmentTermsEnumFrame lastFrame = stack[0];
|
||||
|
@ -552,14 +552,14 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
break;
|
||||
}
|
||||
arc = arcs[1+targetUpto];
|
||||
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// TODO: we could save the outputs in local
|
||||
// byte[][] instead of making new objs ever
|
||||
// seek; but, often the FST doesn't have any
|
||||
// shared bytes (but this could change if we
|
||||
// reverse vLong byte order)
|
||||
if (arc.output != OrdsBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
if (arc.output() != OrdsBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
lastFrame = stack[1+lastFrame.ord];
|
||||
|
@ -632,19 +632,19 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// Empty string prefix must have an output (block) in the index!
|
||||
assert arc.isFinal();
|
||||
assert arc.output != null;
|
||||
assert arc.output() != null;
|
||||
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" no seek state; push root frame");
|
||||
//}
|
||||
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
|
||||
currentFrame = staticFrame;
|
||||
|
||||
//term.length = 0;
|
||||
targetUpto = 0;
|
||||
currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
|
||||
}
|
||||
|
||||
positioned = true;
|
||||
|
@ -701,9 +701,9 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
arc = nextArc;
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != OrdsBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.output() != null;
|
||||
if (arc.output() != OrdsBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
|
@ -713,7 +713,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (arc.isFinal()) {
|
||||
//if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
currentFrame = pushFrame(arc, OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
|
||||
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
|
@ -766,8 +766,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
if (fr.index != null) {
|
||||
assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
|
||||
if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix-1)&0xFF)) {
|
||||
out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
|
||||
if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix-1)&0xFF)) {
|
||||
out.println(" broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
|
||||
throw new RuntimeException("seek state is broken");
|
||||
}
|
||||
Output output = Util.get(fr.index, prefix);
|
||||
|
@ -1052,7 +1052,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
final IntsRefBuilder result = new IntsRefBuilder();
|
||||
|
||||
fr.index.getFirstArc(arc);
|
||||
Output output = arc.output;
|
||||
Output output = arc.output();
|
||||
int upto = 0;
|
||||
|
||||
int bestUpto = 0;
|
||||
|
@ -1069,7 +1069,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
while (true) {
|
||||
// System.out.println(" loop: output=" + output.startOrd + "-" + (Long.MAX_VALUE-output.endOrd) + " upto=" + upto + " arc=" + arc + " final?=" + arc.isFinal());
|
||||
if (arc.isFinal()) {
|
||||
final Output finalOutput = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput);
|
||||
final Output finalOutput = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput());
|
||||
// System.out.println(" isFinal: " + finalOutput.startOrd + "-" + (Long.MAX_VALUE-finalOutput.endOrd));
|
||||
if (targetOrd >= finalOutput.startOrd && targetOrd <= Long.MAX_VALUE-finalOutput.endOrd) {
|
||||
// Only one range should match across all arc leaving this node
|
||||
|
@ -1082,19 +1082,19 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
if (FST.targetHasArcs(arc)) {
|
||||
// System.out.println(" targetHasArcs");
|
||||
result.grow(1+upto);
|
||||
fr.index.readFirstRealTargetArc(arc.target, arc, fstReader);
|
||||
fr.index.readFirstRealTargetArc(arc.target(), arc, fstReader);
|
||||
|
||||
if (arc.bytesPerArc != 0 && arc.arcIdx > Integer.MIN_VALUE) {
|
||||
if (arc.bytesPerArc() != 0 && arc.arcIdx() > Integer.MIN_VALUE) {
|
||||
// System.out.println(" array arcs");
|
||||
int low = 0;
|
||||
int high = arc.numArcs-1;
|
||||
int high = arc.numArcs() -1;
|
||||
int mid = 0;
|
||||
//System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output);
|
||||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
fstReader.setPosition(arc.posArcsStart);
|
||||
fstReader.skipBytes(arc.bytesPerArc*mid);
|
||||
fstReader.setPosition(arc.posArcsStart());
|
||||
fstReader.skipBytes(arc.bytesPerArc() *mid);
|
||||
final byte flags = fstReader.readByte();
|
||||
fr.index.readLabel(fstReader);
|
||||
final Output minArcOutput;
|
||||
|
@ -1116,8 +1116,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
if (found) {
|
||||
// Keep recursing
|
||||
arc.arcIdx = mid-1;
|
||||
// Keep recursing
|
||||
arc.arcIdx(mid - 1);
|
||||
} else {
|
||||
result.setLength(bestUpto);
|
||||
InputOutput io = new InputOutput();
|
||||
|
@ -1130,8 +1130,8 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
fr.index.readNextRealArc(arc, fstReader);
|
||||
|
||||
// Recurse on this arc:
|
||||
result.setIntAt(upto++, arc.label);
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
output = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
|
||||
} else {
|
||||
// System.out.println(" non-array arc");
|
||||
|
@ -1141,14 +1141,14 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// This is the min output we'd hit if we follow
|
||||
// this arc:
|
||||
final Output minArcOutput = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
final Output minArcOutput = OrdsBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
long endOrd = Long.MAX_VALUE - minArcOutput.endOrd;
|
||||
// System.out.println(" endOrd=" + endOrd + " targetOrd=" + targetOrd);
|
||||
|
||||
if (targetOrd >= minArcOutput.startOrd && targetOrd <= endOrd) {
|
||||
// Recurse on this arc:
|
||||
output = minArcOutput;
|
||||
result.setIntAt(upto++, arc.label);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
break;
|
||||
} else if (targetOrd < endOrd || arc.isLast()) {
|
||||
result.setLength(bestUpto);
|
||||
|
|
|
@ -621,8 +621,8 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
@Override
|
||||
void decodeStats() throws IOException {
|
||||
final FST.Arc<Long> arc = topFrame().arc;
|
||||
assert arc.nextFinalOutput == fstOutputs.getNoOutput();
|
||||
ord = arc.output;
|
||||
assert arc.nextFinalOutput() == fstOutputs.getNoOutput();
|
||||
ord = arc.output();
|
||||
super.decodeStats();
|
||||
}
|
||||
|
||||
|
@ -675,7 +675,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
frame = newFrame();
|
||||
label = target.bytes[upto] & 0xff;
|
||||
frame = loadCeilFrame(label, topFrame(), frame);
|
||||
if (frame == null || frame.arc.label != label) {
|
||||
if (frame == null || frame.arc.label() != label) {
|
||||
break;
|
||||
}
|
||||
assert isValid(frame); // target must be fetched from automaton
|
||||
|
@ -703,15 +703,15 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
/** Virtual frame, never pop */
|
||||
Frame loadVirtualFrame(Frame frame) throws IOException {
|
||||
frame.arc.output = fstOutputs.getNoOutput();
|
||||
frame.arc.nextFinalOutput = fstOutputs.getNoOutput();
|
||||
Frame loadVirtualFrame(Frame frame) {
|
||||
frame.arc.output(fstOutputs.getNoOutput());
|
||||
frame.arc.nextFinalOutput(fstOutputs.getNoOutput());
|
||||
frame.state = -1;
|
||||
return frame;
|
||||
}
|
||||
|
||||
/** Load frame for start arc(node) on fst */
|
||||
Frame loadFirstFrame(Frame frame) throws IOException {
|
||||
Frame loadFirstFrame(Frame frame) {
|
||||
frame.arc = fst.getFirstArc(frame.arc);
|
||||
frame.state = 0;
|
||||
return frame;
|
||||
|
@ -722,8 +722,8 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
if (!canGrow(top)) {
|
||||
return null;
|
||||
}
|
||||
frame.arc = fst.readFirstRealTargetArc(top.arc.target, frame.arc, fstReader);
|
||||
frame.state = fsa.step(top.state, frame.arc.label);
|
||||
frame.arc = fst.readFirstRealTargetArc(top.arc.target(), frame.arc, fstReader);
|
||||
frame.state = fsa.step(top.state, frame.arc.label());
|
||||
//if (TEST) System.out.println(" loadExpand frame="+frame);
|
||||
if (frame.state == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
|
@ -738,7 +738,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
while (!frame.arc.isLast()) {
|
||||
frame.arc = fst.readNextRealArc(frame.arc, fstReader);
|
||||
frame.state = fsa.step(top.state, frame.arc.label);
|
||||
frame.state = fsa.step(top.state, frame.arc.label());
|
||||
if (frame.state != -1) {
|
||||
break;
|
||||
}
|
||||
|
@ -758,7 +758,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
if (arc == null) {
|
||||
return null;
|
||||
}
|
||||
frame.state = fsa.step(top.state, arc.label);
|
||||
frame.state = fsa.step(top.state, arc.label());
|
||||
//if (TEST) System.out.println(" loadCeil frame="+frame);
|
||||
if (frame.state == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
|
@ -781,8 +781,8 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
|
||||
void pushFrame(Frame frame) {
|
||||
final FST.Arc<Long> arc = frame.arc;
|
||||
arc.output = fstOutputs.add(topFrame().arc.output, arc.output);
|
||||
term = grow(arc.label);
|
||||
arc.output(fstOutputs.add(topFrame().arc.output(), arc.output()));
|
||||
term = grow(arc.label());
|
||||
level++;
|
||||
assert frame == stack[level];
|
||||
}
|
||||
|
@ -836,7 +836,7 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
queue.add(startArc);
|
||||
while (!queue.isEmpty()) {
|
||||
final FST.Arc<T> arc = queue.remove(0);
|
||||
final long node = arc.target;
|
||||
final long node = arc.target();
|
||||
//System.out.println(arc);
|
||||
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
|
||||
seen.set((int) node);
|
||||
|
|
|
@ -415,7 +415,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
/* True when there is pending term when calling next() */
|
||||
boolean pending;
|
||||
|
||||
/* stack to record how current term is constructed,
|
||||
/* stack to record how current term is constructed,
|
||||
* used to accumulate metadata or rewind term:
|
||||
* level == term.length + 1,
|
||||
* == 0 when term is null */
|
||||
|
@ -501,19 +501,19 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
/** Lazily accumulate meta data, when we got a accepted term */
|
||||
void loadMetaData() throws IOException {
|
||||
void loadMetaData() {
|
||||
FST.Arc<FSTTermOutputs.TermData> last, next;
|
||||
last = stack[metaUpto].fstArc;
|
||||
while (metaUpto != level) {
|
||||
metaUpto++;
|
||||
next = stack[metaUpto].fstArc;
|
||||
next.output = fstOutputs.add(next.output, last.output);
|
||||
next.output(fstOutputs.add(next.output(), last.output()));
|
||||
last = next;
|
||||
}
|
||||
if (last.isFinal()) {
|
||||
meta = fstOutputs.add(last.output, last.nextFinalOutput);
|
||||
meta = fstOutputs.add(last.output(), last.nextFinalOutput());
|
||||
} else {
|
||||
meta = last.output;
|
||||
meta = last.output();
|
||||
}
|
||||
state.docFreq = meta.docFreq;
|
||||
state.totalTermFreq = meta.totalTermFreq;
|
||||
|
@ -575,7 +575,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
frame = newFrame();
|
||||
label = target.bytes[upto] & 0xff;
|
||||
frame = loadCeilFrame(label, topFrame(), frame);
|
||||
if (frame == null || frame.fstArc.label != label) {
|
||||
if (frame == null || frame.fstArc.label() != label) {
|
||||
break;
|
||||
}
|
||||
assert isValid(frame); // target must be fetched from automaton
|
||||
|
@ -603,9 +603,9 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
/** Virtual frame, never pop */
|
||||
Frame loadVirtualFrame(Frame frame) throws IOException {
|
||||
frame.fstArc.output = fstOutputs.getNoOutput();
|
||||
frame.fstArc.nextFinalOutput = fstOutputs.getNoOutput();
|
||||
Frame loadVirtualFrame(Frame frame) {
|
||||
frame.fstArc.output(fstOutputs.getNoOutput());
|
||||
frame.fstArc.nextFinalOutput(fstOutputs.getNoOutput());
|
||||
frame.fsaState = -1;
|
||||
return frame;
|
||||
}
|
||||
|
@ -622,8 +622,8 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
if (!canGrow(top)) {
|
||||
return null;
|
||||
}
|
||||
frame.fstArc = fst.readFirstRealTargetArc(top.fstArc.target, frame.fstArc, fstReader);
|
||||
frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label);
|
||||
frame.fstArc = fst.readFirstRealTargetArc(top.fstArc.target(), frame.fstArc, fstReader);
|
||||
frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label());
|
||||
//if (TEST) System.out.println(" loadExpand frame="+frame);
|
||||
if (frame.fsaState == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
|
@ -638,7 +638,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
while (!frame.fstArc.isLast()) {
|
||||
frame.fstArc = fst.readNextRealArc(frame.fstArc, fstReader);
|
||||
frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label);
|
||||
frame.fsaState = fsa.step(top.fsaState, frame.fstArc.label());
|
||||
if (frame.fsaState != -1) {
|
||||
break;
|
||||
}
|
||||
|
@ -658,7 +658,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
if (arc == null) {
|
||||
return null;
|
||||
}
|
||||
frame.fsaState = fsa.step(top.fsaState, arc.label);
|
||||
frame.fsaState = fsa.step(top.fsaState, arc.label());
|
||||
//if (TEST) System.out.println(" loadCeil frame="+frame);
|
||||
if (frame.fsaState == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
|
@ -680,7 +680,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
void pushFrame(Frame frame) {
|
||||
term = grow(frame.fstArc.label);
|
||||
term = grow(frame.fstArc.label());
|
||||
level++;
|
||||
//if (TEST) System.out.println(" term=" + term + " level=" + level);
|
||||
}
|
||||
|
@ -737,7 +737,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
queue.add(startArc);
|
||||
while (!queue.isEmpty()) {
|
||||
final FST.Arc<T> arc = queue.remove(0);
|
||||
final long node = arc.target;
|
||||
final long node = arc.target();
|
||||
//System.out.println(arc);
|
||||
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
|
||||
seen.set((int) node);
|
||||
|
|
|
@ -111,7 +111,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
f.prefix = 0;
|
||||
f.setState(0);
|
||||
f.arc = arc;
|
||||
f.outputPrefix = arc.output;
|
||||
f.outputPrefix = arc.output();
|
||||
f.load(fr.rootCode);
|
||||
|
||||
// for assert:
|
||||
|
@ -186,14 +186,14 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
// passed to findTargetArc
|
||||
arc = fr.index.findTargetArc(target, arc, getArc(1+idx), fstReader);
|
||||
assert arc != null;
|
||||
output = fstOutputs.add(output, arc.output);
|
||||
output = fstOutputs.add(output, arc.output());
|
||||
idx++;
|
||||
}
|
||||
|
||||
f.arc = arc;
|
||||
f.outputPrefix = output;
|
||||
assert arc.isFinal();
|
||||
f.load(fstOutputs.add(output, arc.nextFinalOutput));
|
||||
f.load(fstOutputs.add(output, arc.nextFinalOutput()));
|
||||
return f;
|
||||
}
|
||||
|
||||
|
|
|
@ -351,7 +351,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
arc = arcs[0];
|
||||
assert arc.isFinal();
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
targetUpto = 0;
|
||||
|
||||
SegmentTermsEnumFrame lastFrame = stack[0];
|
||||
|
@ -374,9 +374,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
break;
|
||||
}
|
||||
arc = arcs[1+targetUpto];
|
||||
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
if (arc.output != BlockTreeTermsReader.NO_OUTPUT) {
|
||||
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) {
|
||||
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
lastFrame = stack[1+lastFrame.ord];
|
||||
|
@ -454,19 +454,19 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// Empty string prefix must have an output (block) in the index!
|
||||
assert arc.isFinal();
|
||||
assert arc.output != null;
|
||||
assert arc.output() != null;
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" no seek state; push root frame");
|
||||
// }
|
||||
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
|
||||
currentFrame = staticFrame;
|
||||
|
||||
//term.length = 0;
|
||||
targetUpto = 0;
|
||||
currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -521,9 +521,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
arc = nextArc;
|
||||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != BlockTreeTermsReader.NO_OUTPUT) {
|
||||
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.output() != null;
|
||||
if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) {
|
||||
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -533,7 +533,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (arc.isFinal()) {
|
||||
//if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
|
||||
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
|
@ -608,7 +608,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
arc = arcs[0];
|
||||
assert arc.isFinal();
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
targetUpto = 0;
|
||||
|
||||
SegmentTermsEnumFrame lastFrame = stack[0];
|
||||
|
@ -631,14 +631,14 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
break;
|
||||
}
|
||||
arc = arcs[1+targetUpto];
|
||||
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// TODO: we could save the outputs in local
|
||||
// byte[][] instead of making new objs ever
|
||||
// seek; but, often the FST doesn't have any
|
||||
// shared bytes (but this could change if we
|
||||
// reverse vLong byte order)
|
||||
if (arc.output != BlockTreeTermsReader.NO_OUTPUT) {
|
||||
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output);
|
||||
if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) {
|
||||
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
lastFrame = stack[1+lastFrame.ord];
|
||||
|
@ -711,19 +711,19 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// Empty string prefix must have an output (block) in the index!
|
||||
assert arc.isFinal();
|
||||
assert arc.output != null;
|
||||
assert arc.output() != null;
|
||||
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" no seek state; push root frame");
|
||||
//}
|
||||
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
|
||||
currentFrame = staticFrame;
|
||||
|
||||
//term.length = 0;
|
||||
targetUpto = 0;
|
||||
currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
|
||||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
|
@ -779,9 +779,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
arc = nextArc;
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != BlockTreeTermsReader.NO_OUTPUT) {
|
||||
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.output() != null;
|
||||
if (arc.output() != BlockTreeTermsReader.NO_OUTPUT) {
|
||||
output = BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
|
@ -791,7 +791,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (arc.isFinal()) {
|
||||
//if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
currentFrame = pushFrame(arc, BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
|
||||
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
|
@ -844,8 +844,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
if (fr.index != null) {
|
||||
assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
|
||||
if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix-1)&0xFF)) {
|
||||
out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
|
||||
if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix-1)&0xFF)) {
|
||||
out.println(" broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
|
||||
throw new RuntimeException("seek state is broken");
|
||||
}
|
||||
BytesRef output = Util.get(fr.index, prefix);
|
||||
|
|
|
@ -62,24 +62,24 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
*/
|
||||
public final class FST<T> implements Accountable {
|
||||
|
||||
/** Specifies allowed range of each int input label for
|
||||
* this FST. */
|
||||
public enum INPUT_TYPE {BYTE1, BYTE2, BYTE4}
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FST.class);
|
||||
private static final long ARC_SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Arc.class);
|
||||
|
||||
/** Specifies allowed range of each int input label for
|
||||
* this FST. */
|
||||
public static enum INPUT_TYPE {BYTE1, BYTE2, BYTE4};
|
||||
|
||||
static final int BIT_FINAL_ARC = 1 << 0;
|
||||
private static final int BIT_FINAL_ARC = 1 << 0;
|
||||
static final int BIT_LAST_ARC = 1 << 1;
|
||||
static final int BIT_TARGET_NEXT = 1 << 2;
|
||||
|
||||
// TODO: we can free up a bit if we can nuke this:
|
||||
static final int BIT_STOP_NODE = 1 << 3;
|
||||
private static final int BIT_STOP_NODE = 1 << 3;
|
||||
|
||||
/** This flag is set if the arc has an output. */
|
||||
public static final int BIT_ARC_HAS_OUTPUT = 1 << 4;
|
||||
|
||||
static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
|
||||
private static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
|
||||
|
||||
// We use this as a marker (because this one flag is
|
||||
// illegal by itself ...):
|
||||
|
@ -119,10 +119,13 @@ public final class FST<T> implements Accountable {
|
|||
// non-final node w/ no arcs:
|
||||
private static final long NON_FINAL_END_NODE = 0;
|
||||
|
||||
/* Used for memory accounting */
|
||||
private int cachedArcsBytesUsed;
|
||||
|
||||
/** If arc has this label then that arc is final/accepted */
|
||||
public static final int END_LABEL = -1;
|
||||
|
||||
public final INPUT_TYPE inputType;
|
||||
final INPUT_TYPE inputType;
|
||||
|
||||
// if non-null, this FST accepts the empty string and
|
||||
// produces this output
|
||||
|
@ -139,62 +142,51 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
public final Outputs<T> outputs;
|
||||
|
||||
private Arc<T> cachedRootArcs[];
|
||||
private Arc<T>[] cachedRootArcs;
|
||||
|
||||
/** Represents a single arc. */
|
||||
public static final class Arc<T> {
|
||||
public int label;
|
||||
public T output;
|
||||
|
||||
/** To node (ord or address) */
|
||||
public long target;
|
||||
private int label;
|
||||
|
||||
byte flags;
|
||||
public T nextFinalOutput;
|
||||
private T output;
|
||||
|
||||
private long target;
|
||||
|
||||
private byte flags;
|
||||
|
||||
private T nextFinalOutput;
|
||||
|
||||
// address (into the byte[]), or ord/address if label == END_LABEL
|
||||
long nextArc;
|
||||
private long nextArc;
|
||||
|
||||
/** Where the first arc in the array starts; only valid if
|
||||
* bytesPerArc != 0 */
|
||||
public long posArcsStart;
|
||||
|
||||
/** Non-zero if this arc is part of an array, which means all
|
||||
* arcs for the node are encoded with a fixed number of bytes so
|
||||
* that we can random access by index. We do when there are enough
|
||||
* arcs leaving one node. It wastes some bytes but gives faster
|
||||
* lookups. */
|
||||
public int bytesPerArc;
|
||||
private long posArcsStart;
|
||||
|
||||
/** Where we are in the array; only valid if bytesPerArc != 0, and the array has no holes.
|
||||
* arcIdx = Integer.MIN_VALUE indicates that the arc is part of a direct array, addressed by
|
||||
* label.
|
||||
*/
|
||||
public int arcIdx;
|
||||
private int bytesPerArc;
|
||||
|
||||
/** How many arc, if bytesPerArc == 0. Otherwise, the size of the arc array. If the array is
|
||||
* direct, this may include holes. Otherwise it is also how many arcs are in the array */
|
||||
public int numArcs;
|
||||
private int arcIdx;
|
||||
|
||||
private int numArcs;
|
||||
|
||||
/** Returns this */
|
||||
public Arc<T> copyFrom(Arc<T> other) {
|
||||
label = other.label;
|
||||
target = other.target;
|
||||
flags = other.flags;
|
||||
output = other.output;
|
||||
nextFinalOutput = other.nextFinalOutput;
|
||||
nextArc = other.nextArc;
|
||||
bytesPerArc = other.bytesPerArc;
|
||||
if (bytesPerArc != 0) {
|
||||
posArcsStart = other.posArcsStart;
|
||||
arcIdx = other.arcIdx;
|
||||
numArcs = other.numArcs;
|
||||
label = other.label();
|
||||
target = other.target();
|
||||
flags = other.flags();
|
||||
output = other.output();
|
||||
nextFinalOutput = other.nextFinalOutput();
|
||||
nextArc = other.nextArc();
|
||||
bytesPerArc = other.bytesPerArc();
|
||||
if (bytesPerArc() != 0) {
|
||||
posArcsStart = other.posArcsStart();
|
||||
arcIdx = other.arcIdx();
|
||||
numArcs = other.numArcs();
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
boolean flag(int flag) {
|
||||
return FST.flag(flags, flag);
|
||||
return FST.flag(flags(), flag);
|
||||
}
|
||||
|
||||
public boolean isLast() {
|
||||
|
@ -208,8 +200,8 @@ public final class FST<T> implements Accountable {
|
|||
@Override
|
||||
public String toString() {
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append(" target=").append(target);
|
||||
b.append(" label=0x").append(Integer.toHexString(label));
|
||||
b.append(" target=").append(target());
|
||||
b.append(" label=0x").append(Integer.toHexString(label()));
|
||||
if (flag(BIT_FINAL_ARC)) {
|
||||
b.append(" final");
|
||||
}
|
||||
|
@ -223,40 +215,121 @@ public final class FST<T> implements Accountable {
|
|||
b.append(" stop");
|
||||
}
|
||||
if (flag(BIT_ARC_HAS_OUTPUT)) {
|
||||
b.append(" output=").append(output);
|
||||
b.append(" output=").append(output());
|
||||
}
|
||||
if (flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
|
||||
b.append(" nextFinalOutput=").append(nextFinalOutput);
|
||||
b.append(" nextFinalOutput=").append(nextFinalOutput());
|
||||
}
|
||||
if (bytesPerArc != 0) {
|
||||
b.append(" arcArray(idx=").append(arcIdx).append(" of ").append(numArcs).append(")");
|
||||
if (bytesPerArc() != 0) {
|
||||
b.append(" arcArray(idx=").append(arcIdx()).append(" of ").append(numArcs()).append(")");
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
};
|
||||
|
||||
public int label() {
|
||||
return label;
|
||||
}
|
||||
|
||||
public void label(int label) {
|
||||
this.label = label;
|
||||
}
|
||||
|
||||
public T output() {
|
||||
return output;
|
||||
}
|
||||
|
||||
public void output(T output) {
|
||||
this.output = output;
|
||||
}
|
||||
|
||||
/** To node (ord or address) */
|
||||
public long target() {
|
||||
return target;
|
||||
}
|
||||
|
||||
public byte flags() {
|
||||
return flags;
|
||||
}
|
||||
|
||||
public void flags(byte flags) {
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
public T nextFinalOutput() {
|
||||
return nextFinalOutput;
|
||||
}
|
||||
|
||||
public void nextFinalOutput(T output) {
|
||||
nextFinalOutput = output;
|
||||
}
|
||||
|
||||
long nextArc() {
|
||||
return nextArc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the position of the next arc to read
|
||||
* @param nextArc the position to set
|
||||
*/
|
||||
public void nextArc(long nextArc) {
|
||||
this.nextArc = nextArc;
|
||||
}
|
||||
|
||||
/** Where the first arc in the array starts; only valid if
|
||||
* bytesPerArc != 0 */
|
||||
public long posArcsStart() {
|
||||
return posArcsStart;
|
||||
}
|
||||
|
||||
/** Non-zero if this arc is part of an array, which means all
|
||||
* arcs for the node are encoded with a fixed number of bytes so
|
||||
* that we can random access by index. We do when there are enough
|
||||
* arcs leaving one node. It wastes some bytes but gives faster
|
||||
* lookups. */
|
||||
public int bytesPerArc() {
|
||||
return bytesPerArc;
|
||||
}
|
||||
|
||||
/** Where we are in the array; only valid if bytesPerArc != 0, and the array has no holes.
|
||||
* arcIdx = Integer.MIN_VALUE indicates that the arc is part of a direct array, addressed by
|
||||
* label.
|
||||
*/
|
||||
public int arcIdx() {
|
||||
return arcIdx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the arcIdx
|
||||
* @param idx the value to set
|
||||
*/
|
||||
public void arcIdx(int idx) {
|
||||
arcIdx = idx;
|
||||
}
|
||||
|
||||
/** How many arc, if bytesPerArc == 0. Otherwise, the size of the arc array. If the array is
|
||||
* direct, this may include holes. Otherwise it is also how many arcs are in the array */
|
||||
public int numArcs() {
|
||||
return numArcs;
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean flag(int flags, int bit) {
|
||||
return (flags & bit) != 0;
|
||||
}
|
||||
|
||||
private final int version;
|
||||
|
||||
// make a new empty FST, for building; Builder invokes
|
||||
// this ctor
|
||||
// make a new empty FST, for building; Builder invokes this
|
||||
FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
|
||||
this.inputType = inputType;
|
||||
this.outputs = outputs;
|
||||
version = VERSION_CURRENT;
|
||||
fstStore = null;
|
||||
bytes = new BytesStore(bytesPageBits);
|
||||
// pad: ensure no node gets address 0 which is reserved to mean
|
||||
// the stop state w/ no arcs
|
||||
bytes.writeByte((byte) 0);
|
||||
|
||||
emptyOutput = null;
|
||||
}
|
||||
|
||||
public static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28;
|
||||
private static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28;
|
||||
|
||||
/** Load a previously saved FST. */
|
||||
public FST(DataInput in, Outputs<T> outputs) throws IOException {
|
||||
|
@ -270,9 +343,9 @@ public final class FST<T> implements Accountable {
|
|||
this.fstStore = fstStore;
|
||||
this.outputs = outputs;
|
||||
|
||||
// NOTE: only reads most recent format; we don't have
|
||||
// back-compat promise for FSTs (they are experimental):
|
||||
version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_CURRENT);
|
||||
// NOTE: only reads formats VERSION_START up to VERSION_CURRENT; we don't have
|
||||
// back-compat promise for FSTs (they are experimental), but we are sometimes able to offer it
|
||||
CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_CURRENT);
|
||||
if (in.readByte() == 1) {
|
||||
// accepts empty string
|
||||
// 1 KB blocks:
|
||||
|
@ -313,10 +386,6 @@ public final class FST<T> implements Accountable {
|
|||
cacheRootArcs();
|
||||
}
|
||||
|
||||
public INPUT_TYPE getInputType() {
|
||||
return inputType;
|
||||
}
|
||||
|
||||
private long ramBytesUsed(Arc<T>[] arcs) {
|
||||
long size = 0;
|
||||
if (arcs != null) {
|
||||
|
@ -324,11 +393,11 @@ public final class FST<T> implements Accountable {
|
|||
for (Arc<T> arc : arcs) {
|
||||
if (arc != null) {
|
||||
size += ARC_SHALLOW_RAM_BYTES_USED;
|
||||
if (arc.output != null && arc.output != outputs.getNoOutput()) {
|
||||
size += outputs.ramBytesUsed(arc.output);
|
||||
if (arc.output() != null && arc.output() != outputs.getNoOutput()) {
|
||||
size += outputs.ramBytesUsed(arc.output());
|
||||
}
|
||||
if (arc.nextFinalOutput != null && arc.nextFinalOutput != outputs.getNoOutput()) {
|
||||
size += outputs.ramBytesUsed(arc.nextFinalOutput);
|
||||
if (arc.nextFinalOutput() != null && arc.nextFinalOutput() != outputs.getNoOutput()) {
|
||||
size += outputs.ramBytesUsed(arc.nextFinalOutput());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -336,8 +405,6 @@ public final class FST<T> implements Accountable {
|
|||
return size;
|
||||
}
|
||||
|
||||
private int cachedArcsBytesUsed;
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long size = BASE_RAM_BYTES_USED;
|
||||
|
@ -380,12 +447,12 @@ public final class FST<T> implements Accountable {
|
|||
if (targetHasArcs(arc)) {
|
||||
final BytesReader in = getBytesReader();
|
||||
Arc<T>[] arcs = (Arc<T>[]) new Arc[0x80];
|
||||
readFirstRealTargetArc(arc.target, arc, in);
|
||||
readFirstRealTargetArc(arc.target(), arc, in);
|
||||
int count = 0;
|
||||
while(true) {
|
||||
assert arc.label != END_LABEL;
|
||||
if (arc.label < arcs.length) {
|
||||
arcs[arc.label] = new Arc<T>().copyFrom(arc);
|
||||
assert arc.label() != END_LABEL;
|
||||
if (arc.label() < arcs.length) {
|
||||
arcs[arc.label()] = new Arc<T>().copyFrom(arc);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -410,7 +477,7 @@ public final class FST<T> implements Accountable {
|
|||
return emptyOutput;
|
||||
}
|
||||
|
||||
void setEmptyOutput(T v) throws IOException {
|
||||
void setEmptyOutput(T v) {
|
||||
if (emptyOutput != null) {
|
||||
emptyOutput = outputs.merge(emptyOutput, v);
|
||||
} else {
|
||||
|
@ -433,18 +500,19 @@ public final class FST<T> implements Accountable {
|
|||
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
|
||||
outputs.writeFinalOutput(emptyOutput, ros);
|
||||
byte[] emptyOutputBytes = ros.toArrayCopy();
|
||||
int emptyLen = emptyOutputBytes.length;
|
||||
|
||||
// reverse
|
||||
final int stopAt = emptyOutputBytes.length/2;
|
||||
final int stopAt = emptyLen / 2;
|
||||
int upto = 0;
|
||||
while (upto < stopAt) {
|
||||
final byte b = emptyOutputBytes[upto];
|
||||
emptyOutputBytes[upto] = emptyOutputBytes[emptyOutputBytes.length-upto-1];
|
||||
emptyOutputBytes[emptyOutputBytes.length-upto-1] = b;
|
||||
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
|
||||
emptyOutputBytes[emptyLen - upto - 1] = b;
|
||||
upto++;
|
||||
}
|
||||
out.writeVInt(emptyOutputBytes.length);
|
||||
out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length);
|
||||
out.writeVInt(emptyLen);
|
||||
out.writeBytes(emptyOutputBytes, 0, emptyLen);
|
||||
} else {
|
||||
out.writeByte((byte) 0);
|
||||
}
|
||||
|
@ -517,7 +585,7 @@ public final class FST<T> implements Accountable {
|
|||
/** returns true if the node at this address has any
|
||||
* outgoing arcs */
|
||||
public static<T> boolean targetHasArcs(Arc<T> arc) {
|
||||
return arc.target > 0;
|
||||
return arc.target() > 0;
|
||||
}
|
||||
|
||||
// serializes new node by appending its bytes to the end
|
||||
|
@ -652,7 +720,7 @@ public final class FST<T> implements Accountable {
|
|||
//System.out.println("write int @pos=" + (fixedArrayStart-4) + " numArcs=" + nodeIn.numArcs);
|
||||
// create the header
|
||||
// TODO: clean this up: or just rewind+reuse and deal with it
|
||||
byte header[] = new byte[MAX_HEADER_SIZE];
|
||||
byte[] header = new byte[MAX_HEADER_SIZE];
|
||||
ByteArrayDataOutput bad = new ByteArrayDataOutput(header);
|
||||
// write a "false" first arc:
|
||||
if (writeDirectly) {
|
||||
|
@ -742,8 +810,7 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Fills virtual 'start' arc, ie, an empty incoming arc to
|
||||
* the FST's start node */
|
||||
/** Fills virtual 'start' arc, ie, an empty incoming arc to the FST's start node */
|
||||
public Arc<T> getFirstArc(Arc<T> arc) {
|
||||
T NO_OUTPUT = outputs.getNoOutput();
|
||||
|
||||
|
@ -771,18 +838,18 @@ public final class FST<T> implements Accountable {
|
|||
*
|
||||
* @return Returns the second argument
|
||||
* (<code>arc</code>). */
|
||||
public Arc<T> readLastTargetArc(Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
|
||||
Arc<T> readLastTargetArc(Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
|
||||
//System.out.println("readLast");
|
||||
if (!targetHasArcs(follow)) {
|
||||
//System.out.println(" end node");
|
||||
assert follow.isFinal();
|
||||
arc.label = END_LABEL;
|
||||
arc.target = FINAL_END_NODE;
|
||||
arc.output = follow.nextFinalOutput;
|
||||
arc.output = follow.nextFinalOutput();
|
||||
arc.flags = BIT_LAST_ARC;
|
||||
return arc;
|
||||
} else {
|
||||
in.setPosition(follow.target);
|
||||
in.setPosition(follow.target());
|
||||
final byte b = in.readByte();
|
||||
if (b == ARCS_AS_ARRAY_PACKED || b == ARCS_AS_ARRAY_WITH_GAPS) {
|
||||
// array: jump straight to end
|
||||
|
@ -792,9 +859,9 @@ public final class FST<T> implements Accountable {
|
|||
arc.posArcsStart = in.getPosition();
|
||||
if (b == ARCS_AS_ARRAY_WITH_GAPS) {
|
||||
arc.arcIdx = Integer.MIN_VALUE;
|
||||
arc.nextArc = arc.posArcsStart - (arc.numArcs - 1) * arc.bytesPerArc;
|
||||
arc.nextArc = arc.posArcsStart() - (arc.numArcs() - 1) * arc.bytesPerArc();
|
||||
} else {
|
||||
arc.arcIdx = arc.numArcs - 2;
|
||||
arc.arcIdx = arc.numArcs() - 2;
|
||||
}
|
||||
} else {
|
||||
arc.flags = b;
|
||||
|
@ -844,25 +911,24 @@ public final class FST<T> implements Accountable {
|
|||
if (follow.isFinal()) {
|
||||
// Insert "fake" final first arc:
|
||||
arc.label = END_LABEL;
|
||||
arc.output = follow.nextFinalOutput;
|
||||
arc.output = follow.nextFinalOutput();
|
||||
arc.flags = BIT_FINAL_ARC;
|
||||
if (follow.target <= 0) {
|
||||
if (follow.target() <= 0) {
|
||||
arc.flags |= BIT_LAST_ARC;
|
||||
} else {
|
||||
// NOTE: nextArc is a node (not an address!) in this case:
|
||||
arc.nextArc = follow.target;
|
||||
arc.nextArc = follow.target();
|
||||
}
|
||||
arc.target = FINAL_END_NODE;
|
||||
//System.out.println(" insert isFinal; nextArc=" + follow.target + " isLast=" + arc.isLast() + " output=" + outputs.outputToString(arc.output));
|
||||
return arc;
|
||||
} else {
|
||||
return readFirstRealTargetArc(follow.target, arc, in);
|
||||
return readFirstRealTargetArc(follow.target(), arc, in);
|
||||
}
|
||||
}
|
||||
|
||||
public Arc<T> readFirstRealTargetArc(long node, Arc<T> arc, final BytesReader in) throws IOException {
|
||||
final long address = node;
|
||||
in.setPosition(address);
|
||||
public Arc<T> readFirstRealTargetArc(long nodeAddress, Arc<T> arc, final BytesReader in) throws IOException {
|
||||
in.setPosition(nodeAddress);
|
||||
//System.out.println(" flags=" + arc.flags);
|
||||
|
||||
byte flags = in.readByte();
|
||||
|
@ -880,7 +946,7 @@ public final class FST<T> implements Accountable {
|
|||
//System.out.println(" bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos);
|
||||
} else {
|
||||
//arc.flags = b;
|
||||
arc.nextArc = address;
|
||||
arc.nextArc = nodeAddress;
|
||||
arc.bytesPerArc = 0;
|
||||
}
|
||||
|
||||
|
@ -897,7 +963,7 @@ public final class FST<T> implements Accountable {
|
|||
if (!targetHasArcs(follow)) {
|
||||
return false;
|
||||
} else {
|
||||
in.setPosition(follow.target);
|
||||
in.setPosition(follow.target());
|
||||
byte flags = in.readByte();
|
||||
return flags == ARCS_AS_ARRAY_PACKED || flags == ARCS_AS_ARRAY_WITH_GAPS;
|
||||
}
|
||||
|
@ -905,12 +971,12 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
/** In-place read; returns the arc. */
|
||||
public Arc<T> readNextArc(Arc<T> arc, BytesReader in) throws IOException {
|
||||
if (arc.label == END_LABEL) {
|
||||
if (arc.label() == END_LABEL) {
|
||||
// This was a fake inserted "final" arc
|
||||
if (arc.nextArc <= 0) {
|
||||
if (arc.nextArc() <= 0) {
|
||||
throw new IllegalArgumentException("cannot readNextArc when arc.isLast()=true");
|
||||
}
|
||||
return readFirstRealTargetArc(arc.nextArc, arc, in);
|
||||
return readFirstRealTargetArc(arc.nextArc(), arc, in);
|
||||
} else {
|
||||
return readNextRealArc(arc, in);
|
||||
}
|
||||
|
@ -918,14 +984,14 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
/** Peeks at next arc's label; does not alter arc. Do
|
||||
* not call this if arc.isLast()! */
|
||||
public int readNextArcLabel(Arc<T> arc, BytesReader in) throws IOException {
|
||||
int readNextArcLabel(Arc<T> arc, BytesReader in) throws IOException {
|
||||
assert !arc.isLast();
|
||||
|
||||
if (arc.label == END_LABEL) {
|
||||
if (arc.label() == END_LABEL) {
|
||||
//System.out.println(" nextArc fake " +
|
||||
//arc.nextArc);
|
||||
|
||||
long pos = arc.nextArc;
|
||||
long pos = arc.nextArc();
|
||||
in.setPosition(pos);
|
||||
|
||||
final byte flags = in.readByte();
|
||||
|
@ -941,19 +1007,19 @@ public final class FST<T> implements Accountable {
|
|||
// skip flags
|
||||
in.readByte();
|
||||
} else {
|
||||
if (arc.bytesPerArc != 0) {
|
||||
if (arc.bytesPerArc() != 0) {
|
||||
//System.out.println(" nextArc real array");
|
||||
// arcs are in an array
|
||||
if (arc.arcIdx >= 0) {
|
||||
in.setPosition(arc.posArcsStart);
|
||||
if (arc.arcIdx() >= 0) {
|
||||
in.setPosition(arc.posArcsStart());
|
||||
// point at next arc, -1 to skip flags
|
||||
in.skipBytes((1 + arc.arcIdx) * arc.bytesPerArc + 1);
|
||||
in.skipBytes((1 + arc.arcIdx()) * arc.bytesPerArc() + 1);
|
||||
} else {
|
||||
in.setPosition(arc.nextArc);
|
||||
in.setPosition(arc.nextArc());
|
||||
byte flags = in.readByte();
|
||||
// skip missing arcs
|
||||
while (flag(flags, BIT_MISSING_ARC)) {
|
||||
in.skipBytes(arc.bytesPerArc - 1);
|
||||
in.skipBytes(arc.bytesPerArc() - 1);
|
||||
flags = in.readByte();
|
||||
}
|
||||
}
|
||||
|
@ -961,7 +1027,7 @@ public final class FST<T> implements Accountable {
|
|||
// arcs are packed
|
||||
//System.out.println(" nextArc real packed");
|
||||
// -1 to skip flags
|
||||
in.setPosition(arc.nextArc - 1);
|
||||
in.setPosition(arc.nextArc() - 1);
|
||||
}
|
||||
}
|
||||
return readLabel(in);
|
||||
|
@ -975,29 +1041,30 @@ public final class FST<T> implements Accountable {
|
|||
// assert !flag(arc.flags, BIT_LAST_ARC);
|
||||
|
||||
// this is a continuing arc in a fixed array
|
||||
if (arc.bytesPerArc != 0) {
|
||||
if (arc.bytesPerArc() != 0) {
|
||||
// arcs are in an array
|
||||
if (arc.arcIdx > Integer.MIN_VALUE) {
|
||||
if (arc.arcIdx() > Integer.MIN_VALUE) {
|
||||
arc.arcIdx++;
|
||||
assert arc.arcIdx < arc.numArcs;
|
||||
in.setPosition(arc.posArcsStart - arc.arcIdx * arc.bytesPerArc);
|
||||
in.setPosition(arc.posArcsStart() - arc.arcIdx() * arc.bytesPerArc());
|
||||
arc.flags = in.readByte();
|
||||
} else {
|
||||
assert arc.nextArc <= arc.posArcsStart && arc.nextArc > arc.posArcsStart - arc.numArcs * arc.bytesPerArc;
|
||||
in.setPosition(arc.nextArc);
|
||||
assert arc.nextArc() <= arc.posArcsStart() && arc.nextArc() > arc.posArcsStart() - arc.numArcs() * arc.bytesPerArc();
|
||||
in.setPosition(arc.nextArc());
|
||||
arc.flags = in.readByte();
|
||||
while (flag(arc.flags, BIT_MISSING_ARC)) {
|
||||
while (flag(arc.flags(), BIT_MISSING_ARC)) {
|
||||
// skip empty arcs
|
||||
arc.nextArc -= arc.bytesPerArc;
|
||||
in.skipBytes(arc.bytesPerArc - 1);
|
||||
arc.nextArc = arc.nextArc() - arc.bytesPerArc();
|
||||
in.skipBytes(arc.bytesPerArc() - 1);
|
||||
arc.flags = in.readByte();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// arcs are packed
|
||||
in.setPosition(arc.nextArc);
|
||||
in.setPosition(arc.nextArc());
|
||||
arc.flags = in.readByte();
|
||||
}
|
||||
|
||||
arc.label = readLabel(in);
|
||||
|
||||
if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
|
||||
|
@ -1018,31 +1085,31 @@ public final class FST<T> implements Accountable {
|
|||
} else {
|
||||
arc.target = NON_FINAL_END_NODE;
|
||||
}
|
||||
if (arc.bytesPerArc == 0) {
|
||||
if (arc.bytesPerArc() == 0) {
|
||||
arc.nextArc = in.getPosition();
|
||||
} else {
|
||||
arc.nextArc -= arc.bytesPerArc;
|
||||
arc.nextArc -= arc.bytesPerArc();
|
||||
}
|
||||
} else if (arc.flag(BIT_TARGET_NEXT)) {
|
||||
arc.nextArc = in.getPosition();
|
||||
// TODO: would be nice to make this lazy -- maybe
|
||||
// caller doesn't need the target and is scanning arcs...
|
||||
if (!arc.flag(BIT_LAST_ARC)) {
|
||||
if (arc.bytesPerArc == 0) {
|
||||
if (arc.bytesPerArc() == 0) {
|
||||
// must scan
|
||||
seekToNextNode(in);
|
||||
} else {
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc * arc.numArcs);
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(arc.bytesPerArc() * arc.numArcs());
|
||||
}
|
||||
}
|
||||
arc.target = in.getPosition();
|
||||
} else {
|
||||
arc.target = readUnpackedNodeTarget(in);
|
||||
if (arc.bytesPerArc > 0 && arc.arcIdx == Integer.MIN_VALUE) {
|
||||
if (arc.bytesPerArc() > 0 && arc.arcIdx() == Integer.MIN_VALUE) {
|
||||
// nextArc was pointing to *this* arc when we entered; advance to the next
|
||||
// if it is a missing arc, we will skip it later
|
||||
arc.nextArc -= arc.bytesPerArc;
|
||||
arc.nextArc = arc.nextArc() - arc.bytesPerArc();
|
||||
} else {
|
||||
// in list and fixed table encodings, the next arc always follows this one
|
||||
arc.nextArc = in.getPosition();
|
||||
|
@ -1065,19 +1132,16 @@ public final class FST<T> implements Accountable {
|
|||
assert cachedArc == null;
|
||||
} else {
|
||||
assert cachedArc != null;
|
||||
assert cachedArc.arcIdx == result.arcIdx;
|
||||
assert cachedArc.bytesPerArc == result.bytesPerArc;
|
||||
assert cachedArc.flags == result.flags;
|
||||
assert cachedArc.label == result.label;
|
||||
if (cachedArc.bytesPerArc == 0 || cachedArc.arcIdx == Integer.MIN_VALUE) {
|
||||
// in the sparse array case, this value is not valid, so don't assert it
|
||||
assert cachedArc.nextArc == result.nextArc;
|
||||
}
|
||||
assert cachedArc.nextFinalOutput.equals(result.nextFinalOutput);
|
||||
assert cachedArc.numArcs == result.numArcs;
|
||||
assert cachedArc.output.equals(result.output);
|
||||
assert cachedArc.posArcsStart == result.posArcsStart;
|
||||
assert cachedArc.target == result.target;
|
||||
assert cachedArc.arcIdx() == result.arcIdx();
|
||||
assert cachedArc.bytesPerArc() == result.bytesPerArc();
|
||||
assert cachedArc.flags() == result.flags();
|
||||
assert cachedArc.label() == result.label();
|
||||
assert (cachedArc.bytesPerArc() != 0 && cachedArc.arcIdx() != Integer.MIN_VALUE) || cachedArc.nextArc() == result.nextArc();
|
||||
assert cachedArc.nextFinalOutput().equals(result.nextFinalOutput());
|
||||
assert cachedArc.numArcs() == result.numArcs();
|
||||
assert cachedArc.output().equals(result.output());
|
||||
assert cachedArc.posArcsStart() == result.posArcsStart();
|
||||
assert cachedArc.target() == result.target();
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -1098,14 +1162,14 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
if (labelToMatch == END_LABEL) {
|
||||
if (follow.isFinal()) {
|
||||
if (follow.target <= 0) {
|
||||
if (follow.target() <= 0) {
|
||||
arc.flags = BIT_LAST_ARC;
|
||||
} else {
|
||||
arc.flags = 0;
|
||||
// NOTE: nextArc is a node (not an address!) in this case:
|
||||
arc.nextArc = follow.target;
|
||||
arc.nextArc = follow.target();
|
||||
}
|
||||
arc.output = follow.nextFinalOutput;
|
||||
arc.output = follow.nextFinalOutput();
|
||||
arc.label = END_LABEL;
|
||||
return arc;
|
||||
} else {
|
||||
|
@ -1114,7 +1178,7 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
|
||||
// Short-circuit if this arc is in the root arc cache:
|
||||
if (useRootArcCache && cachedRootArcs != null && follow.target == startNode && labelToMatch < cachedRootArcs.length) {
|
||||
if (useRootArcCache && cachedRootArcs != null && follow.target() == startNode && labelToMatch < cachedRootArcs.length) {
|
||||
final Arc<T> result = cachedRootArcs[labelToMatch];
|
||||
|
||||
// LUCENE-5152: detect tricky cases where caller
|
||||
|
@ -1133,7 +1197,7 @@ public final class FST<T> implements Accountable {
|
|||
return null;
|
||||
}
|
||||
|
||||
in.setPosition(follow.target);
|
||||
in.setPosition(follow.target());
|
||||
|
||||
// System.out.println("fta label=" + (char) labelToMatch);
|
||||
|
||||
|
@ -1148,12 +1212,12 @@ public final class FST<T> implements Accountable {
|
|||
int firstLabel = readLabel(in);
|
||||
int arcPos = labelToMatch - firstLabel;
|
||||
if (arcPos == 0) {
|
||||
arc.nextArc = arc.posArcsStart;
|
||||
arc.nextArc = arc.posArcsStart();
|
||||
} else if (arcPos > 0) {
|
||||
if (arcPos >= arc.numArcs) {
|
||||
if (arcPos >= arc.numArcs()) {
|
||||
return null;
|
||||
}
|
||||
in.setPosition(arc.posArcsStart - arc.bytesPerArc * arcPos);
|
||||
in.setPosition(arc.posArcsStart() - arc.bytesPerArc() * arcPos);
|
||||
flags = in.readByte();
|
||||
if (flag(flags, BIT_MISSING_ARC)) {
|
||||
return null;
|
||||
|
@ -1172,12 +1236,12 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
// Array is sparse; do binary search:
|
||||
int low = 0;
|
||||
int high = arc.numArcs - 1;
|
||||
int high = arc.numArcs() - 1;
|
||||
while (low <= high) {
|
||||
//System.out.println(" cycle");
|
||||
int mid = (low + high) >>> 1;
|
||||
// +1 to skip over flags
|
||||
in.setPosition(arc.posArcsStart - (arc.bytesPerArc * mid + 1));
|
||||
in.setPosition(arc.posArcsStart() - (arc.bytesPerArc() * mid + 1));
|
||||
int midLabel = readLabel(in);
|
||||
final int cmp = midLabel - labelToMatch;
|
||||
if (cmp < 0) {
|
||||
|
@ -1194,17 +1258,17 @@ public final class FST<T> implements Accountable {
|
|||
}
|
||||
|
||||
// Linear scan
|
||||
readFirstRealTargetArc(follow.target, arc, in);
|
||||
readFirstRealTargetArc(follow.target(), arc, in);
|
||||
|
||||
while(true) {
|
||||
//System.out.println(" non-bs cycle");
|
||||
// TODO: we should fix this code to not have to create
|
||||
// object for the output of every arc we scan... only
|
||||
// for the matching arc, if found
|
||||
if (arc.label == labelToMatch) {
|
||||
if (arc.label() == labelToMatch) {
|
||||
//System.out.println(" found!");
|
||||
return arc;
|
||||
} else if (arc.label > labelToMatch) {
|
||||
} else if (arc.label() > labelToMatch) {
|
||||
return null;
|
||||
} else if (arc.isLast()) {
|
||||
return null;
|
||||
|
|
|
@ -39,12 +39,12 @@ abstract class FSTEnum<T> {
|
|||
protected final FST.Arc<T> scratchArc = new FST.Arc<>();
|
||||
|
||||
protected int upto;
|
||||
protected int targetLength;
|
||||
int targetLength;
|
||||
|
||||
/** doFloor controls the behavior of advance: if it's true
|
||||
* doFloor is true, advance positions to the biggest
|
||||
* term before target. */
|
||||
protected FSTEnum(FST<T> fst) {
|
||||
FSTEnum(FST<T> fst) {
|
||||
this.fst = fst;
|
||||
fstReader = fst.getBytesReader();
|
||||
NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
|
@ -60,7 +60,7 @@ abstract class FSTEnum<T> {
|
|||
|
||||
/** Rewinds enum state to match the shared prefix between
|
||||
* current term and target term */
|
||||
protected final void rewindPrefix() throws IOException {
|
||||
private void rewindPrefix() throws IOException {
|
||||
if (upto == 0) {
|
||||
//System.out.println(" init");
|
||||
upto = 1;
|
||||
|
@ -138,10 +138,10 @@ abstract class FSTEnum<T> {
|
|||
while(arc != null) {
|
||||
int targetLabel = getTargetLabel();
|
||||
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel);
|
||||
if (arc.bytesPerArc != 0 && arc.label != -1) {
|
||||
if (arc.bytesPerArc() != 0 && arc.label() != -1) {
|
||||
// Arcs are in an array
|
||||
final FST.BytesReader in = fst.getBytesReader();
|
||||
if (arc.arcIdx == Integer.MIN_VALUE) {
|
||||
if (arc.arcIdx() == Integer.MIN_VALUE) {
|
||||
arc = doSeekCeilArrayWithGaps(arc, targetLabel, in);
|
||||
} else {
|
||||
arc = doSeekCeilArrayPacked(arc, targetLabel, in);
|
||||
|
@ -155,13 +155,13 @@ abstract class FSTEnum<T> {
|
|||
private FST.Arc<T> doSeekCeilArrayWithGaps(final FST.Arc<T> arc, final int targetLabel, final FST.BytesReader in) throws IOException {
|
||||
// The array is addressed directly by label and may contain holes.
|
||||
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(1);
|
||||
int firstLabel = fst.readLabel(in);
|
||||
int arcOffset = targetLabel - firstLabel;
|
||||
if (arcOffset >= arc.numArcs) {
|
||||
if (arcOffset >= arc.numArcs()) {
|
||||
// target is beyond the last arc
|
||||
arc.nextArc = arc.posArcsStart - (arc.numArcs - 1) * arc.bytesPerArc;
|
||||
arc.nextArc(arc.posArcsStart() - (arc.numArcs() - 1) * arc.bytesPerArc());
|
||||
fst.readNextRealArc(arc, in);
|
||||
assert arc.isLast();
|
||||
// Dead end (target is after the last arc);
|
||||
|
@ -183,23 +183,23 @@ abstract class FSTEnum<T> {
|
|||
} else {
|
||||
// TODO: if firstLabel == targetLabel
|
||||
if (arcOffset >= 0) {
|
||||
arc.nextArc = arc.posArcsStart - (arc.bytesPerArc * arcOffset);
|
||||
arc.nextArc(arc.posArcsStart() - (arc.bytesPerArc() * arcOffset));
|
||||
} else {
|
||||
arc.nextArc = arc.posArcsStart;
|
||||
arc.nextArc(arc.posArcsStart());
|
||||
}
|
||||
fst.readNextRealArc(arc, in);
|
||||
if (arc.label == targetLabel) {
|
||||
if (arc.label() == targetLabel) {
|
||||
// found -- copy pasta from below
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output);
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
if (targetLabel == FST.END_LABEL) {
|
||||
return null;
|
||||
}
|
||||
setCurrentLabel(arc.label);
|
||||
setCurrentLabel(arc.label());
|
||||
incr();
|
||||
return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
|
||||
}
|
||||
// not found, return the next highest
|
||||
assert arc.label > targetLabel;
|
||||
assert arc.label() > targetLabel;
|
||||
pushFirst();
|
||||
return null;
|
||||
}
|
||||
|
@ -208,15 +208,15 @@ abstract class FSTEnum<T> {
|
|||
private FST.Arc<T> doSeekCeilArrayPacked(final FST.Arc<T> arc, final int targetLabel, final FST.BytesReader in) throws IOException {
|
||||
// The array is packed -- use binary search to find the target.
|
||||
|
||||
int low = arc.arcIdx;
|
||||
int high = arc.numArcs-1;
|
||||
int low = arc.arcIdx();
|
||||
int high = arc.numArcs() -1;
|
||||
int mid = 0;
|
||||
//System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
|
||||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc * mid + 1);
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(arc.bytesPerArc() * mid + 1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - targetLabel;
|
||||
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
|
||||
|
@ -234,20 +234,20 @@ abstract class FSTEnum<T> {
|
|||
// the outer else clause):
|
||||
if (found) {
|
||||
// Match
|
||||
arc.arcIdx = mid-1;
|
||||
arc.arcIdx(mid - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
assert arc.arcIdx == mid;
|
||||
assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output);
|
||||
assert arc.arcIdx() == mid;
|
||||
assert arc.label() == targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + mid;
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
if (targetLabel == FST.END_LABEL) {
|
||||
return null;
|
||||
}
|
||||
setCurrentLabel(arc.label);
|
||||
setCurrentLabel(arc.label());
|
||||
incr();
|
||||
return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
|
||||
} else if (low == arc.numArcs) {
|
||||
} else if (low == arc.numArcs()) {
|
||||
// Dead end
|
||||
arc.arcIdx = arc.numArcs-2;
|
||||
arc.arcIdx(arc.numArcs() - 2);
|
||||
fst.readNextRealArc(arc, in);
|
||||
assert arc.isLast();
|
||||
// Dead end (target is after the last arc);
|
||||
|
@ -267,9 +267,9 @@ abstract class FSTEnum<T> {
|
|||
upto--;
|
||||
}
|
||||
} else {
|
||||
arc.arcIdx = (low > high ? low : high)-1;
|
||||
arc.arcIdx(low - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
assert arc.label > targetLabel;
|
||||
assert arc.label() > targetLabel;
|
||||
pushFirst();
|
||||
return null;
|
||||
}
|
||||
|
@ -277,16 +277,16 @@ abstract class FSTEnum<T> {
|
|||
|
||||
private FST.Arc<T> doSeekCeilList(final FST.Arc<T> arc, final int targetLabel) throws IOException {
|
||||
// Arcs are not array'd -- must do linear scan:
|
||||
if (arc.label == targetLabel) {
|
||||
if (arc.label() == targetLabel) {
|
||||
// recurse
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output);
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
if (targetLabel == FST.END_LABEL) {
|
||||
return null;
|
||||
}
|
||||
setCurrentLabel(arc.label);
|
||||
setCurrentLabel(arc.label());
|
||||
incr();
|
||||
return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
|
||||
} else if (arc.label > targetLabel) {
|
||||
} else if (arc.label() > targetLabel) {
|
||||
pushFirst();
|
||||
return null;
|
||||
} else if (arc.isLast()) {
|
||||
|
@ -340,10 +340,10 @@ abstract class FSTEnum<T> {
|
|||
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast() + " bba=" + arc.bytesPerArc);
|
||||
int targetLabel = getTargetLabel();
|
||||
|
||||
if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
|
||||
if (arc.bytesPerArc() != 0 && arc.label() != FST.END_LABEL) {
|
||||
// Arcs are in an array
|
||||
final FST.BytesReader in = fst.getBytesReader();
|
||||
if (arc.arcIdx == Integer.MIN_VALUE) {
|
||||
if (arc.arcIdx() == Integer.MIN_VALUE) {
|
||||
arc = doSeekFloorArrayWithGaps(arc, targetLabel, in);
|
||||
} else {
|
||||
arc = doSeekFloorArrayPacked(arc, targetLabel, in);
|
||||
|
@ -356,7 +356,7 @@ abstract class FSTEnum<T> {
|
|||
|
||||
private FST.Arc<T> doSeekFloorArrayWithGaps(FST.Arc<T> arc, int targetLabel, final FST.BytesReader in) throws IOException {
|
||||
// The array is addressed directly by label and may contain holes.
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(1);
|
||||
int firstLabel = fst.readLabel(in);
|
||||
int targetOffset = targetLabel - firstLabel;
|
||||
|
@ -368,7 +368,7 @@ abstract class FSTEnum<T> {
|
|||
// First, walk backwards until we find a first arc
|
||||
// that's before our target label:
|
||||
fst.readFirstTargetArc(getArc(upto-1), arc, fstReader);
|
||||
if (arc.label < targetLabel) {
|
||||
if (arc.label() < targetLabel) {
|
||||
// Then, scan forwards to the arc just before
|
||||
// the targetLabel:
|
||||
while(!arc.isLast() && fst.readNextArcLabel(arc, in) < targetLabel) {
|
||||
|
@ -385,39 +385,38 @@ abstract class FSTEnum<T> {
|
|||
arc = getArc(upto);
|
||||
}
|
||||
} else {
|
||||
if (targetOffset >= arc.numArcs) {
|
||||
arc.nextArc = arc.posArcsStart - arc.bytesPerArc * (arc.numArcs - 1);
|
||||
if (targetOffset >= arc.numArcs()) {
|
||||
arc.nextArc(arc.posArcsStart() - arc.bytesPerArc() * (arc.numArcs() - 1));
|
||||
fst.readNextRealArc(arc, in);
|
||||
assert arc.isLast();
|
||||
assert arc.label < targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel;
|
||||
assert arc.label() < targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel;
|
||||
pushLast();
|
||||
return null;
|
||||
}
|
||||
arc.nextArc = arc.posArcsStart - arc.bytesPerArc * targetOffset;
|
||||
arc.nextArc(arc.posArcsStart() - arc.bytesPerArc() * targetOffset);
|
||||
fst.readNextRealArc(arc, in);
|
||||
if (arc.label == targetLabel) {
|
||||
if (arc.label() == targetLabel) {
|
||||
// found -- copy pasta from below
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output);
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
if (targetLabel == FST.END_LABEL) {
|
||||
return null;
|
||||
}
|
||||
setCurrentLabel(arc.label);
|
||||
setCurrentLabel(arc.label());
|
||||
incr();
|
||||
return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
|
||||
}
|
||||
// Scan backwards to find a floor arc that is not missing
|
||||
for (long arcOffset = arc.posArcsStart - targetOffset * arc.bytesPerArc; arcOffset <= arc.posArcsStart; arcOffset += arc.bytesPerArc) {
|
||||
for (long arcOffset = arc.posArcsStart() - targetOffset * arc.bytesPerArc(); arcOffset <= arc.posArcsStart(); arcOffset += arc.bytesPerArc()) {
|
||||
// TODO: we can do better here by skipping missing arcs
|
||||
arc.nextArc = arcOffset;
|
||||
//System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
|
||||
arc.nextArc(arcOffset);
|
||||
fst.readNextRealArc(arc, in);
|
||||
if (arc.label < targetLabel) {
|
||||
if (arc.label() < targetLabel) {
|
||||
assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
|
||||
pushLast();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
assert false: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel;
|
||||
assert false: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel;
|
||||
return arc; // unreachable
|
||||
}
|
||||
}
|
||||
|
@ -425,15 +424,15 @@ abstract class FSTEnum<T> {
|
|||
private FST.Arc<T> doSeekFloorArrayPacked(FST.Arc<T> arc, int targetLabel, final FST.BytesReader in) throws IOException {
|
||||
// Arcs are fixed array -- use binary search to find the target.
|
||||
|
||||
int low = arc.arcIdx;
|
||||
int high = arc.numArcs-1;
|
||||
int low = arc.arcIdx();
|
||||
int high = arc.numArcs() -1;
|
||||
int mid = 0;
|
||||
//System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
|
||||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc*mid+1);
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(arc.bytesPerArc() * mid + 1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - targetLabel;
|
||||
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
|
||||
|
@ -452,15 +451,15 @@ abstract class FSTEnum<T> {
|
|||
if (found) {
|
||||
// Match -- recurse
|
||||
//System.out.println(" match! arcIdx=" + mid);
|
||||
arc.arcIdx = mid-1;
|
||||
arc.arcIdx(mid - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
assert arc.arcIdx == mid;
|
||||
assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output);
|
||||
assert arc.arcIdx() == mid;
|
||||
assert arc.label() == targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel + " mid=" + mid;
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
if (targetLabel == FST.END_LABEL) {
|
||||
return null;
|
||||
}
|
||||
setCurrentLabel(arc.label);
|
||||
setCurrentLabel(arc.label());
|
||||
incr();
|
||||
return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
|
||||
} else if (high == -1) {
|
||||
|
@ -474,7 +473,7 @@ abstract class FSTEnum<T> {
|
|||
// First, walk backwards until we find a first arc
|
||||
// that's before our target label:
|
||||
fst.readFirstTargetArc(getArc(upto-1), arc, fstReader);
|
||||
if (arc.label < targetLabel) {
|
||||
if (arc.label() < targetLabel) {
|
||||
// Then, scan forwards to the arc just before
|
||||
// the targetLabel:
|
||||
while(!arc.isLast() && fst.readNextArcLabel(arc, in) < targetLabel) {
|
||||
|
@ -492,27 +491,26 @@ abstract class FSTEnum<T> {
|
|||
}
|
||||
} else {
|
||||
// There is a floor arc:
|
||||
arc.arcIdx = (low > high ? high : low)-1;
|
||||
//System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
|
||||
arc.arcIdx(high - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
assert arc.isLast() || fst.readNextArcLabel(arc, in) > targetLabel;
|
||||
assert arc.label < targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel;
|
||||
assert arc.label() < targetLabel: "arc.label=" + arc.label() + " vs targetLabel=" + targetLabel;
|
||||
pushLast();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private FST.Arc<T> doSeekFloorList(FST.Arc<T> arc, int targetLabel) throws IOException {
|
||||
if (arc.label == targetLabel) {
|
||||
if (arc.label() == targetLabel) {
|
||||
// Match -- recurse
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output);
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
if (targetLabel == FST.END_LABEL) {
|
||||
return null;
|
||||
}
|
||||
setCurrentLabel(arc.label);
|
||||
setCurrentLabel(arc.label());
|
||||
incr();
|
||||
return fst.readFirstTargetArc(arc, getArc(upto), fstReader);
|
||||
} else if (arc.label > targetLabel) {
|
||||
} else if (arc.label() > targetLabel) {
|
||||
// TODO: if each arc could somehow read the arc just
|
||||
// before, we can save this re-scan. The ceil case
|
||||
// doesn't need this because it reads the next arc
|
||||
|
@ -521,7 +519,7 @@ abstract class FSTEnum<T> {
|
|||
// First, walk backwards until we find a first arc
|
||||
// that's before our target label:
|
||||
fst.readFirstTargetArc(getArc(upto-1), arc, fstReader);
|
||||
if (arc.label < targetLabel) {
|
||||
if (arc.label() < targetLabel) {
|
||||
// Then, scan forwards to the arc just before
|
||||
// the targetLabel:
|
||||
while(!arc.isLast() && fst.readNextArcLabel(arc, fstReader) < targetLabel) {
|
||||
|
@ -553,7 +551,7 @@ abstract class FSTEnum<T> {
|
|||
}
|
||||
|
||||
/** Seeks to exactly target term. */
|
||||
protected boolean doSeekExact() throws IOException {
|
||||
boolean doSeekExact() throws IOException {
|
||||
|
||||
// TODO: possibly caller could/should provide common
|
||||
// prefix length? ie this work may be redundant if
|
||||
|
@ -584,7 +582,7 @@ abstract class FSTEnum<T> {
|
|||
return false;
|
||||
}
|
||||
// Match -- recurse:
|
||||
output[upto] = fst.outputs.add(output[upto-1], nextArc.output);
|
||||
output[upto] = fst.outputs.add(output[upto-1], nextArc.output());
|
||||
if (targetLabel == FST.END_LABEL) {
|
||||
//System.out.println(" return found; upto=" + upto + " output=" + output[upto] + " nextArc=" + nextArc.isLast());
|
||||
return true;
|
||||
|
@ -621,13 +619,13 @@ abstract class FSTEnum<T> {
|
|||
assert arc != null;
|
||||
|
||||
while (true) {
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output);
|
||||
if (arc.label == FST.END_LABEL) {
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
if (arc.label() == FST.END_LABEL) {
|
||||
// Final node
|
||||
break;
|
||||
}
|
||||
//System.out.println(" pushFirst label=" + (char) arc.label + " upto=" + upto + " output=" + fst.outputs.outputToString(output[upto]));
|
||||
setCurrentLabel(arc.label);
|
||||
setCurrentLabel(arc.label());
|
||||
incr();
|
||||
|
||||
final FST.Arc<T> nextArc = getArc(upto);
|
||||
|
@ -644,9 +642,9 @@ abstract class FSTEnum<T> {
|
|||
assert arc != null;
|
||||
|
||||
while (true) {
|
||||
setCurrentLabel(arc.label);
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output);
|
||||
if (arc.label == FST.END_LABEL) {
|
||||
setCurrentLabel(arc.label());
|
||||
output[upto] = fst.outputs.add(output[upto-1], arc.output());
|
||||
if (arc.label() == FST.END_LABEL) {
|
||||
// Final node
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -41,15 +41,15 @@ final class NodeHash<T> {
|
|||
|
||||
private boolean nodesEqual(Builder.UnCompiledNode<T> node, long address) throws IOException {
|
||||
fst.readFirstRealTargetArc(address, scratchArc, in);
|
||||
if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
|
||||
if (scratchArc.bytesPerArc() != 0 && node.numArcs != scratchArc.numArcs()) {
|
||||
return false;
|
||||
}
|
||||
for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
|
||||
final Builder.Arc<T> arc = node.arcs[arcUpto];
|
||||
if (arc.label != scratchArc.label ||
|
||||
!arc.output.equals(scratchArc.output) ||
|
||||
((Builder.CompiledNode) arc.target).node != scratchArc.target ||
|
||||
!arc.nextFinalOutput.equals(scratchArc.nextFinalOutput) ||
|
||||
if (arc.label != scratchArc.label() ||
|
||||
!arc.output.equals(scratchArc.output()) ||
|
||||
((Builder.CompiledNode) arc.target).node != scratchArc.target() ||
|
||||
!arc.nextFinalOutput.equals(scratchArc.nextFinalOutput()) ||
|
||||
arc.isFinal != scratchArc.isFinal()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -98,10 +98,10 @@ final class NodeHash<T> {
|
|||
fst.readFirstRealTargetArc(node, scratchArc, in);
|
||||
while(true) {
|
||||
// System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal() + " pos=" + in.getPosition());
|
||||
h = PRIME * h + scratchArc.label;
|
||||
h = PRIME * h + (int) (scratchArc.target^(scratchArc.target>>32));
|
||||
h = PRIME * h + scratchArc.output.hashCode();
|
||||
h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
|
||||
h = PRIME * h + scratchArc.label();
|
||||
h = PRIME * h + (int) (scratchArc.target() ^(scratchArc.target() >>32));
|
||||
h = PRIME * h + scratchArc.output().hashCode();
|
||||
h = PRIME * h + scratchArc.nextFinalOutput().hashCode();
|
||||
if (scratchArc.isFinal()) {
|
||||
h += 17;
|
||||
}
|
||||
|
|
|
@ -55,11 +55,11 @@ public final class Util {
|
|||
if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) {
|
||||
return null;
|
||||
}
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
}
|
||||
|
||||
if (arc.isFinal()) {
|
||||
return fst.outputs.add(output, arc.nextFinalOutput);
|
||||
return fst.outputs.add(output, arc.nextFinalOutput());
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ public final class Util {
|
|||
final BytesReader fstReader = fst.getBytesReader();
|
||||
|
||||
// TODO: would be nice not to alloc this on every lookup
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<>());
|
||||
|
||||
// Accumulate output as we go
|
||||
T output = fst.outputs.getNoOutput();
|
||||
|
@ -83,11 +83,11 @@ public final class Util {
|
|||
if (fst.findTargetArc(input.bytes[i+input.offset] & 0xFF, arc, arc, fstReader) == null) {
|
||||
return null;
|
||||
}
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
}
|
||||
|
||||
if (arc.isFinal()) {
|
||||
return fst.outputs.add(output, arc.nextFinalOutput);
|
||||
return fst.outputs.add(output, arc.nextFinalOutput());
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
@ -125,7 +125,7 @@ public final class Util {
|
|||
*/
|
||||
@Deprecated
|
||||
public static IntsRef getByOutput(FST<Long> fst, long targetOutput, BytesReader in, Arc<Long> arc, Arc<Long> scratchArc, IntsRefBuilder result) throws IOException {
|
||||
long output = arc.output;
|
||||
long output = arc.output();
|
||||
int upto = 0;
|
||||
|
||||
//System.out.println("reverseLookup output=" + targetOutput);
|
||||
|
@ -133,7 +133,7 @@ public final class Util {
|
|||
while(true) {
|
||||
//System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc);
|
||||
if (arc.isFinal()) {
|
||||
final long finalOutput = output + arc.nextFinalOutput;
|
||||
final long finalOutput = output + arc.nextFinalOutput();
|
||||
//System.out.println(" isFinal finalOutput=" + finalOutput);
|
||||
if (finalOutput == targetOutput) {
|
||||
result.setLength(upto);
|
||||
|
@ -149,19 +149,19 @@ public final class Util {
|
|||
//System.out.println(" targetHasArcs");
|
||||
result.grow(1+upto);
|
||||
|
||||
fst.readFirstRealTargetArc(arc.target, arc, in);
|
||||
fst.readFirstRealTargetArc(arc.target(), arc, in);
|
||||
|
||||
if (arc.bytesPerArc != 0 && arc.arcIdx > Integer.MIN_VALUE) {
|
||||
if (arc.bytesPerArc() != 0 && arc.arcIdx() > Integer.MIN_VALUE) {
|
||||
|
||||
int low = 0;
|
||||
int high = arc.numArcs-1;
|
||||
int high = arc.numArcs() -1;
|
||||
int mid = 0;
|
||||
//System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output);
|
||||
boolean exact = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc*mid);
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(arc.bytesPerArc() *mid);
|
||||
final byte flags = in.readByte();
|
||||
fst.readLabel(in);
|
||||
final long minArcOutput;
|
||||
|
@ -182,17 +182,19 @@ public final class Util {
|
|||
}
|
||||
}
|
||||
|
||||
int idx;
|
||||
if (high == -1) {
|
||||
return null;
|
||||
} else if (exact) {
|
||||
arc.arcIdx = mid-1;
|
||||
idx = mid;
|
||||
} else {
|
||||
arc.arcIdx = low-2;
|
||||
idx = low - 1;
|
||||
}
|
||||
|
||||
arc.arcIdx(idx - 1);
|
||||
fst.readNextRealArc(arc, in);
|
||||
result.setIntAt(upto++, arc.label);
|
||||
output += arc.output;
|
||||
result.setIntAt(upto++, arc.label());
|
||||
output += arc.output();
|
||||
|
||||
} else {
|
||||
|
||||
|
@ -203,13 +205,13 @@ public final class Util {
|
|||
|
||||
// This is the min output we'd hit if we follow
|
||||
// this arc:
|
||||
final long minArcOutput = output + arc.output;
|
||||
final long minArcOutput = output + arc.output();
|
||||
|
||||
if (minArcOutput == targetOutput) {
|
||||
// Recurse on this arc:
|
||||
//System.out.println(" match! break");
|
||||
output = minArcOutput;
|
||||
result.setIntAt(upto++, arc.label);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
break;
|
||||
} else if (minArcOutput > targetOutput) {
|
||||
if (prevArc == null) {
|
||||
|
@ -218,8 +220,8 @@ public final class Util {
|
|||
} else {
|
||||
// Recurse on previous arc:
|
||||
arc.copyFrom(prevArc);
|
||||
result.setIntAt(upto++, arc.label);
|
||||
output += arc.output;
|
||||
result.setIntAt(upto++, arc.label());
|
||||
output += arc.output();
|
||||
//System.out.println(" recurse prev label=" + (char) arc.label + " output=" + output);
|
||||
break;
|
||||
}
|
||||
|
@ -227,7 +229,7 @@ public final class Util {
|
|||
// Recurse on this arc:
|
||||
output = minArcOutput;
|
||||
//System.out.println(" recurse last label=" + (char) arc.label + " output=" + output);
|
||||
result.setIntAt(upto++, arc.label);
|
||||
result.setIntAt(upto++, arc.label());
|
||||
break;
|
||||
} else {
|
||||
// Read next arc in this node:
|
||||
|
@ -261,12 +263,7 @@ public final class Util {
|
|||
// Custom int payload for consumers; the NRT suggester uses this to record if this path has already enumerated a surface form
|
||||
public int payload;
|
||||
|
||||
/** Sole constructor */
|
||||
public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input) {
|
||||
this(output, arc, input, 0, null, -1);
|
||||
}
|
||||
|
||||
public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) {
|
||||
FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) {
|
||||
this.arc = new FST.Arc<T>().copyFrom(arc);
|
||||
this.output = output;
|
||||
this.input = input;
|
||||
|
@ -275,7 +272,7 @@ public final class Util {
|
|||
this.payload = payload;
|
||||
}
|
||||
|
||||
public FSTPath<T> newPath(T output, IntsRefBuilder input) {
|
||||
FSTPath<T> newPath(T output, IntsRefBuilder input) {
|
||||
return new FSTPath<>(output, this.arc, input, this.boost, this.context, this.payload);
|
||||
}
|
||||
|
||||
|
@ -289,7 +286,8 @@ public final class Util {
|
|||
* tie breaks by path.input. */
|
||||
private static class TieBreakByInputComparator<T> implements Comparator<FSTPath<T>> {
|
||||
private final Comparator<T> comparator;
|
||||
public TieBreakByInputComparator(Comparator<T> comparator) {
|
||||
|
||||
TieBreakByInputComparator(Comparator<T> comparator) {
|
||||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
|
@ -318,7 +316,7 @@ public final class Util {
|
|||
private final Comparator<T> comparator;
|
||||
private final Comparator<FSTPath<T>> pathComparator;
|
||||
|
||||
TreeSet<FSTPath<T>> queue = null;
|
||||
TreeSet<FSTPath<T>> queue;
|
||||
|
||||
/**
|
||||
* Creates an unbounded TopNSearcher
|
||||
|
@ -347,7 +345,7 @@ public final class Util {
|
|||
|
||||
assert queue != null;
|
||||
|
||||
T output = fst.outputs.add(path.output, path.arc.output);
|
||||
T output = fst.outputs.add(path.output, path.arc.output());
|
||||
|
||||
if (queue.size() == maxQueueDepth) {
|
||||
FSTPath<T> bottom = queue.last();
|
||||
|
@ -357,7 +355,7 @@ public final class Util {
|
|||
return;
|
||||
} else if (comp == 0) {
|
||||
// Tie break by alpha sort on the input:
|
||||
path.input.append(path.arc.label);
|
||||
path.input.append(path.arc.label());
|
||||
final int cmp = bottom.input.get().compareTo(path.input.get());
|
||||
path.input.setLength(path.input.length() - 1);
|
||||
|
||||
|
@ -370,15 +368,14 @@ public final class Util {
|
|||
}
|
||||
}
|
||||
// Competes
|
||||
} else {
|
||||
// Queue isn't full yet, so any path we hit competes:
|
||||
}
|
||||
// else ... Queue isn't full yet, so any path we hit competes:
|
||||
|
||||
// copy over the current input to the new input
|
||||
// and add the arc.label to the end
|
||||
IntsRefBuilder newInput = new IntsRefBuilder();
|
||||
newInput.copyInts(path.input.get());
|
||||
newInput.append(path.arc.label);
|
||||
newInput.append(path.arc.label());
|
||||
|
||||
FSTPath<T> newPath = path.newPath(output, newInput);
|
||||
if (acceptPartialPath(newPath)) {
|
||||
|
@ -408,7 +405,7 @@ public final class Util {
|
|||
|
||||
// Bootstrap: find the min starting arc
|
||||
while (true) {
|
||||
if (allowEmptyString || path.arc.label != FST.END_LABEL) {
|
||||
if (allowEmptyString || path.arc.label() != FST.END_LABEL) {
|
||||
addIfCompetitive(path);
|
||||
}
|
||||
if (path.arc.isLast()) {
|
||||
|
@ -457,7 +454,7 @@ public final class Util {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (path.arc.label == FST.END_LABEL) {
|
||||
if (path.arc.label() == FST.END_LABEL) {
|
||||
// Empty string!
|
||||
path.input.setLength(path.input.length() - 1);
|
||||
results.add(new Result<>(path.input.get(), path.output));
|
||||
|
@ -485,7 +482,7 @@ public final class Util {
|
|||
while(true) {
|
||||
// tricky: instead of comparing output == 0, we must
|
||||
// express it via the comparator compare(output, 0) == 0
|
||||
if (comparator.compare(NO_OUTPUT, path.arc.output) == 0) {
|
||||
if (comparator.compare(NO_OUTPUT, path.arc.output()) == 0) {
|
||||
if (queue == null) {
|
||||
foundZero = true;
|
||||
break;
|
||||
|
@ -514,9 +511,9 @@ public final class Util {
|
|||
path.arc.copyFrom(scratchArc);
|
||||
}
|
||||
|
||||
if (path.arc.label == FST.END_LABEL) {
|
||||
if (path.arc.label() == FST.END_LABEL) {
|
||||
// Add final output:
|
||||
path.output = fst.outputs.add(path.output, path.arc.output);
|
||||
path.output = fst.outputs.add(path.output, path.arc.output());
|
||||
if (acceptResult(path)) {
|
||||
results.add(new Result<>(path.input.get(), path.output));
|
||||
} else {
|
||||
|
@ -524,8 +521,8 @@ public final class Util {
|
|||
}
|
||||
break;
|
||||
} else {
|
||||
path.input.append(path.arc.label);
|
||||
path.output = fst.outputs.add(path.output, path.arc.output);
|
||||
path.input.append(path.arc.label());
|
||||
path.output = fst.outputs.add(path.output, path.arc.output());
|
||||
if (acceptPartialPath(path) == false) {
|
||||
break;
|
||||
}
|
||||
|
@ -641,7 +638,7 @@ public final class Util {
|
|||
|
||||
// This is the start arc in the automaton (from the epsilon state to the first state
|
||||
// with outgoing transitions.
|
||||
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
|
||||
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<>());
|
||||
|
||||
// A queue of transitions to consider for the next level.
|
||||
final List<FST.Arc<T>> thisLevelQueue = new ArrayList<>();
|
||||
|
@ -656,7 +653,7 @@ public final class Util {
|
|||
|
||||
// A bitset of already seen states (target offset).
|
||||
final BitSet seen = new BitSet();
|
||||
seen.set((int) startArc.target);
|
||||
seen.set((int) startArc.target());
|
||||
|
||||
// Shape for states.
|
||||
final String stateShape = "circle";
|
||||
|
@ -689,16 +686,16 @@ public final class Util {
|
|||
final T finalOutput;
|
||||
if (startArc.isFinal()) {
|
||||
isFinal = true;
|
||||
finalOutput = startArc.nextFinalOutput == NO_OUTPUT ? null : startArc.nextFinalOutput;
|
||||
finalOutput = startArc.nextFinalOutput() == NO_OUTPUT ? null : startArc.nextFinalOutput();
|
||||
} else {
|
||||
isFinal = false;
|
||||
finalOutput = null;
|
||||
}
|
||||
|
||||
emitDotState(out, Long.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
|
||||
emitDotState(out, Long.toString(startArc.target()), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
|
||||
}
|
||||
|
||||
out.write(" initial -> " + startArc.target + "\n");
|
||||
out.write(" initial -> " + startArc.target() + "\n");
|
||||
|
||||
int level = 0;
|
||||
|
||||
|
@ -717,9 +714,9 @@ public final class Util {
|
|||
// scan all target arcs
|
||||
//System.out.println(" readFirstTarget...");
|
||||
|
||||
final long node = arc.target;
|
||||
final long node = arc.target();
|
||||
|
||||
fst.readFirstRealTargetArc(arc.target, arc, r);
|
||||
fst.readFirstRealTargetArc(arc.target(), arc, r);
|
||||
|
||||
//System.out.println(" firstTarget: " + arc);
|
||||
|
||||
|
@ -727,7 +724,7 @@ public final class Util {
|
|||
|
||||
//System.out.println(" cycle arc=" + arc);
|
||||
// Emit the unseen state and add it to the queue for the next level.
|
||||
if (arc.target >= 0 && !seen.get((int) arc.target)) {
|
||||
if (arc.target() >= 0 && !seen.get((int) arc.target())) {
|
||||
|
||||
/*
|
||||
boolean isFinal = false;
|
||||
|
@ -748,35 +745,35 @@ public final class Util {
|
|||
}
|
||||
|
||||
final String finalOutput;
|
||||
if (arc.nextFinalOutput != null && arc.nextFinalOutput != NO_OUTPUT) {
|
||||
finalOutput = fst.outputs.outputToString(arc.nextFinalOutput);
|
||||
if (arc.nextFinalOutput() != null && arc.nextFinalOutput() != NO_OUTPUT) {
|
||||
finalOutput = fst.outputs.outputToString(arc.nextFinalOutput());
|
||||
} else {
|
||||
finalOutput = "";
|
||||
}
|
||||
|
||||
emitDotState(out, Long.toString(arc.target), stateShape, stateColor, finalOutput);
|
||||
emitDotState(out, Long.toString(arc.target()), stateShape, stateColor, finalOutput);
|
||||
// To see the node address, use this instead:
|
||||
//emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
|
||||
seen.set((int) arc.target);
|
||||
seen.set((int) arc.target());
|
||||
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
|
||||
sameLevelStates.add((int) arc.target);
|
||||
sameLevelStates.add((int) arc.target());
|
||||
}
|
||||
|
||||
String outs;
|
||||
if (arc.output != NO_OUTPUT) {
|
||||
outs = "/" + fst.outputs.outputToString(arc.output);
|
||||
if (arc.output() != NO_OUTPUT) {
|
||||
outs = "/" + fst.outputs.outputToString(arc.output());
|
||||
} else {
|
||||
outs = "";
|
||||
}
|
||||
|
||||
if (!FST.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
|
||||
if (!FST.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput() != NO_OUTPUT) {
|
||||
// Tricky special case: sometimes, due to
|
||||
// pruning, the builder can [sillily] produce
|
||||
// an FST with an arc into the final end state
|
||||
// (-1) but also with a next final output; in
|
||||
// this case we pull that output up onto this
|
||||
// arc
|
||||
outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput) + "]";
|
||||
outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput()) + "]";
|
||||
}
|
||||
|
||||
final String arcColor;
|
||||
|
@ -786,8 +783,8 @@ public final class Util {
|
|||
arcColor = "black";
|
||||
}
|
||||
|
||||
assert arc.label != FST.END_LABEL;
|
||||
out.write(" " + node + " -> " + arc.target + " [label=\"" + printableLabel(arc.label) + outs + "\"" + (arc.isFinal() ? " style=\"bold\"" : "" ) + " color=\"" + arcColor + "\"]\n");
|
||||
assert arc.label() != FST.END_LABEL;
|
||||
out.write(" " + node + " -> " + arc.target() + " [label=\"" + printableLabel(arc.label()) + outs + "\"" + (arc.isFinal() ? " style=\"bold\"" : "" ) + " color=\"" + arcColor + "\"]\n");
|
||||
|
||||
// Break the loop if we're on the last arc of this state.
|
||||
if (arc.isLast()) {
|
||||
|
@ -935,55 +932,52 @@ public final class Util {
|
|||
* @param arc the arc to read into in place
|
||||
* @param in the fst's {@link BytesReader}
|
||||
*/
|
||||
public static <T> Arc<T> readCeilArc(int label, FST<T> fst, Arc<T> follow,
|
||||
Arc<T> arc, BytesReader in) throws IOException {
|
||||
// TODO maybe this is a useful in the FST class - we could simplify some other code like FSTEnum?
|
||||
public static <T> Arc<T> readCeilArc(int label, FST<T> fst, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
|
||||
if (label == FST.END_LABEL) {
|
||||
if (follow.isFinal()) {
|
||||
if (follow.target <= 0) {
|
||||
arc.flags = FST.BIT_LAST_ARC;
|
||||
if (follow.target() <= 0) {
|
||||
arc.flags((byte) FST.BIT_LAST_ARC);
|
||||
} else {
|
||||
arc.flags = 0;
|
||||
arc.flags((byte) 0);
|
||||
// NOTE: nextArc is a node (not an address!) in this case:
|
||||
arc.nextArc = follow.target;
|
||||
arc.nextArc(follow.target());
|
||||
}
|
||||
arc.output = follow.nextFinalOutput;
|
||||
arc.label = FST.END_LABEL;
|
||||
arc.output(follow.nextFinalOutput());
|
||||
arc.label(FST.END_LABEL);
|
||||
return arc;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
if (!FST.targetHasArcs(follow)) {
|
||||
return null;
|
||||
}
|
||||
fst.readFirstTargetArc(follow, arc, in);
|
||||
if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
|
||||
if (arc.arcIdx == Integer.MIN_VALUE) {
|
||||
if (arc.bytesPerArc() != 0 && arc.label() != FST.END_LABEL) {
|
||||
if (arc.arcIdx() == Integer.MIN_VALUE) {
|
||||
// Arcs are in an array-with-gaps
|
||||
int offset = label - arc.label;
|
||||
if (offset >= arc.numArcs) {
|
||||
int offset = label - arc.label();
|
||||
if (offset >= arc.numArcs()) {
|
||||
return null;
|
||||
} else if (offset < 0) {
|
||||
return arc;
|
||||
} else {
|
||||
arc.nextArc = arc.posArcsStart - offset * arc.bytesPerArc;
|
||||
arc.nextArc(arc.posArcsStart() - offset * arc.bytesPerArc());
|
||||
return fst.readNextRealArc(arc, in);
|
||||
}
|
||||
}
|
||||
// Arcs are packed array -- use binary search to find
|
||||
// the target.
|
||||
|
||||
int low = arc.arcIdx;
|
||||
int high = arc.numArcs - 1;
|
||||
int low = arc.arcIdx();
|
||||
int mid = 0;
|
||||
int high = arc.numArcs() - 1;
|
||||
// System.out.println("do arc array low=" + low + " high=" + high +
|
||||
// " targetLabel=" + targetLabel);
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc * mid + 1);
|
||||
in.setPosition(arc.posArcsStart());
|
||||
in.skipBytes(arc.bytesPerArc() * mid + 1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - label;
|
||||
// System.out.println(" cycle low=" + low + " high=" + high + " mid=" +
|
||||
|
@ -993,28 +987,27 @@ public final class Util {
|
|||
} else if (cmp > 0) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
arc.arcIdx = mid-1;
|
||||
arc.arcIdx(mid - 1);
|
||||
return fst.readNextRealArc(arc, in);
|
||||
}
|
||||
}
|
||||
if (low == arc.numArcs) {
|
||||
if (low == arc.numArcs()) {
|
||||
// DEAD END!
|
||||
return null;
|
||||
}
|
||||
|
||||
arc.arcIdx = (low > high ? high : low);
|
||||
return fst.readNextRealArc(arc, in);
|
||||
arc.arcIdx(high + 1);
|
||||
return fst.readNextRealArc(arc, in );
|
||||
}
|
||||
|
||||
// Linear scan
|
||||
fst.readFirstRealTargetArc(follow.target, arc, in);
|
||||
fst.readFirstRealTargetArc(follow.target(), arc, in);
|
||||
|
||||
while (true) {
|
||||
// System.out.println(" non-bs cycle");
|
||||
// TODO: we should fix this code to not have to create
|
||||
// object for the output of every arc we scan... only
|
||||
// for the matching arc, if found
|
||||
if (arc.label >= label) {
|
||||
if (arc.label() >= label) {
|
||||
// System.out.println(" found!");
|
||||
return arc;
|
||||
} else if (arc.isLast()) {
|
||||
|
@ -1024,4 +1017,5 @@ public final class Util {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1201,19 +1201,19 @@ public class TestFSTs extends LuceneTestCase {
|
|||
private void checkStopNodes(FST<Long> fst, PositiveIntOutputs outputs) throws Exception {
|
||||
final Long nothing = outputs.getNoOutput();
|
||||
FST.Arc<Long> startArc = fst.getFirstArc(new FST.Arc<Long>());
|
||||
assertEquals(nothing, startArc.output);
|
||||
assertEquals(nothing, startArc.nextFinalOutput);
|
||||
assertEquals(nothing, startArc.output());
|
||||
assertEquals(nothing, startArc.nextFinalOutput());
|
||||
|
||||
FST.Arc<Long> arc = fst.readFirstTargetArc(startArc, new FST.Arc<Long>(),
|
||||
fst.getBytesReader());
|
||||
assertEquals('a', arc.label);
|
||||
assertEquals(17, arc.nextFinalOutput.longValue());
|
||||
assertEquals('a', arc.label());
|
||||
assertEquals(17, arc.nextFinalOutput().longValue());
|
||||
assertTrue(arc.isFinal());
|
||||
|
||||
arc = fst.readNextArc(arc, fst.getBytesReader());
|
||||
assertEquals('b', arc.label);
|
||||
assertEquals('b', arc.label());
|
||||
assertFalse(arc.isFinal());
|
||||
assertEquals(42, arc.output.longValue());
|
||||
assertEquals(42, arc.output().longValue());
|
||||
}
|
||||
|
||||
static final Comparator<Long> minLongComparator = new Comparator<Long> () {
|
||||
|
@ -1404,7 +1404,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
|
||||
fail();
|
||||
}
|
||||
prefixOutput += arc.output;
|
||||
prefixOutput += arc.output();
|
||||
}
|
||||
|
||||
final int topN = TestUtil.nextInt(random, 1, 10);
|
||||
|
@ -1526,7 +1526,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
if (fst.findTargetArc((int) prefix.charAt(idx), arc, arc, reader) == null) {
|
||||
fail();
|
||||
}
|
||||
prefixOutput = outputs.add(prefixOutput, arc.output);
|
||||
prefixOutput = outputs.add(prefixOutput, arc.output());
|
||||
}
|
||||
|
||||
final int topN = TestUtil.nextInt(random, 1, 10);
|
||||
|
@ -1623,10 +1623,10 @@ public class TestFSTs extends LuceneTestCase {
|
|||
FST.BytesReader reader = fst.getBytesReader();
|
||||
arc = fst.findTargetArc((int) 'm', arc, arc, reader);
|
||||
assertNotNull(arc);
|
||||
assertEquals(new BytesRef("m"), arc.output);
|
||||
assertEquals(new BytesRef("m"), arc.output());
|
||||
|
||||
// NOTE: illegal:
|
||||
arc.output.length = 0;
|
||||
arc.output().length = 0;
|
||||
|
||||
fst.getFirstArc(arc);
|
||||
try {
|
||||
|
|
|
@ -79,9 +79,9 @@ public class TestFstDirect extends LuceneTestCase {
|
|||
BytesRefFSTEnum<BytesRef> fstEnum = new BytesRefFSTEnum<>(fst);
|
||||
int sparseArrayArcCount = 0, directArrayArcCount = 0, listArcCount = 0;
|
||||
while(fstEnum.next() != null) {
|
||||
if (fstEnum.arcs[fstEnum.upto].bytesPerArc == 0) {
|
||||
if (fstEnum.arcs[fstEnum.upto].bytesPerArc() == 0) {
|
||||
listArcCount ++;
|
||||
} else if (fstEnum.arcs[fstEnum.upto].arcIdx == Integer.MIN_VALUE) {
|
||||
} else if (fstEnum.arcs[fstEnum.upto].arcIdx() == Integer.MIN_VALUE) {
|
||||
directArrayArcCount ++;
|
||||
} else {
|
||||
sparseArrayArcCount ++;
|
||||
|
|
|
@ -277,7 +277,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
arc = arcs[0];
|
||||
assert arc.isFinal();
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
targetUpto = 0;
|
||||
|
||||
IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
|
||||
|
@ -303,9 +303,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
//if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
|
||||
//System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
|
||||
//}
|
||||
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
lastFrame = stack[1+lastFrame.ord];
|
||||
|
@ -404,19 +404,19 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// Empty string prefix must have an output (block) in the index!
|
||||
assert arc.isFinal();
|
||||
assert arc.output != null;
|
||||
assert arc.output() != null;
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" no seek state; push root frame");
|
||||
// }
|
||||
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
|
||||
currentFrame = staticFrame;
|
||||
|
||||
//term.length = 0;
|
||||
targetUpto = 0;
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -517,9 +517,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
termExists = false;
|
||||
}
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.output() != null;
|
||||
if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -529,7 +529,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (arc.isFinal()) {
|
||||
// if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
|
||||
// if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
|
@ -619,7 +619,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
arc = arcs[0];
|
||||
assert arc.isFinal();
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
targetUpto = 0;
|
||||
|
||||
IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
|
||||
|
@ -642,14 +642,14 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
break;
|
||||
}
|
||||
arc = arcs[1+targetUpto];
|
||||
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label() + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
|
||||
// TODO: we could save the outputs in local
|
||||
// byte[][] instead of making new objs ever
|
||||
// seek; but, often the FST doesn't have any
|
||||
// shared bytes (but this could change if we
|
||||
// reverse vLong byte order)
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
if (arc.isFinal()) {
|
||||
lastFrame = stack[1+lastFrame.ord];
|
||||
|
@ -722,19 +722,19 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
// Empty string prefix must have an output (block) in the index!
|
||||
assert arc.isFinal();
|
||||
assert arc.output != null;
|
||||
assert arc.output() != null;
|
||||
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" no seek state; push root frame");
|
||||
//}
|
||||
|
||||
output = arc.output;
|
||||
output = arc.output();
|
||||
|
||||
currentFrame = staticFrame;
|
||||
|
||||
//term.length = 0;
|
||||
targetUpto = 0;
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), 0);
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
|
||||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
|
@ -789,9 +789,9 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
term.setByteAt(targetUpto, (byte) targetLabel);
|
||||
arc = nextArc;
|
||||
// Aggregate output as we go:
|
||||
assert arc.output != null;
|
||||
if (arc.output != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output);
|
||||
assert arc.output() != null;
|
||||
if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
|
||||
output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
|
||||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
|
@ -801,7 +801,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
if (arc.isFinal()) {
|
||||
//if (DEBUG) System.out.println(" arc is final!");
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput), targetUpto);
|
||||
currentFrame = pushFrame(arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), targetUpto);
|
||||
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
|
||||
}
|
||||
}
|
||||
|
@ -854,8 +854,8 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
if (fr.index != null) {
|
||||
assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
|
||||
if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.byteAt(f.prefix-1)&0xFF)) {
|
||||
out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
|
||||
if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix-1)&0xFF)) {
|
||||
out.println(" broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" + (char) (term.byteAt(f.prefix-1)&0xFF));
|
||||
throw new RuntimeException("seek state is broken");
|
||||
}
|
||||
Pair<BytesRef,Long> output = Util.get(fr.index, prefix);
|
||||
|
|
|
@ -727,7 +727,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
|
|||
if (fst.findTargetArc(END_BYTE, path.fstNode, scratchArc, bytesReader) != null) {
|
||||
// This node has END_BYTE arc leaving, meaning it's an
|
||||
// "exact" match:
|
||||
searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output), false, path.input);
|
||||
searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output()), false, path.input);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -107,7 +107,7 @@ public class FSTUtil {
|
|||
newInput.append(t.min);
|
||||
queue.add(new Path<>(t.dest, new FST.Arc<T>()
|
||||
.copyFrom(nextArc), fst.outputs
|
||||
.add(path.output, nextArc.output), newInput));
|
||||
.add(path.output, nextArc.output()), newInput));
|
||||
}
|
||||
} else {
|
||||
// TODO: if this transition's TO state is accepting, and
|
||||
|
@ -119,21 +119,21 @@ public class FSTUtil {
|
|||
// done in AnalyzingSuggester).
|
||||
FST.Arc<T> nextArc = Util.readCeilArc(min, fst, path.fstNode,
|
||||
scratchArc, fstReader);
|
||||
while (nextArc != null && nextArc.label <= max) {
|
||||
assert nextArc.label <= max;
|
||||
assert nextArc.label >= min : nextArc.label + " "
|
||||
while (nextArc != null && nextArc.label() <= max) {
|
||||
assert nextArc.label() <= max;
|
||||
assert nextArc.label() >= min : nextArc.label() + " "
|
||||
+ min;
|
||||
final IntsRefBuilder newInput = new IntsRefBuilder();
|
||||
newInput.copyInts(currentInput.get());
|
||||
newInput.append(nextArc.label);
|
||||
newInput.append(nextArc.label());
|
||||
queue.add(new Path<>(t.dest, new FST.Arc<T>()
|
||||
.copyFrom(nextArc), fst.outputs
|
||||
.add(path.output, nextArc.output), newInput));
|
||||
final int label = nextArc.label; // used in assert
|
||||
.add(path.output, nextArc.output()), newInput));
|
||||
final int label = nextArc.label(); // used in assert
|
||||
nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc,
|
||||
fstReader);
|
||||
assert nextArc == null || label < nextArc.label : "last: " + label
|
||||
+ " next: " + nextArc.label;
|
||||
assert nextArc == null || label < nextArc.label() : "last: " + label
|
||||
+ " next: " + nextArc.label();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -599,7 +599,7 @@ public class FreeTextSuggester extends Lookup implements Accountable {
|
|||
|
||||
@Override
|
||||
protected void addIfCompetitive(Util.FSTPath<Long> path) {
|
||||
if (path.arc.label != separator) {
|
||||
if (path.arc.label() != separator) {
|
||||
//System.out.println(" keep path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc);
|
||||
super.addIfCompetitive(path);
|
||||
} else {
|
||||
|
@ -718,7 +718,7 @@ public class FreeTextSuggester extends Lookup implements Accountable {
|
|||
if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) {
|
||||
return null;
|
||||
} else {
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -159,7 +159,7 @@ public final class NRTSuggester implements Accountable {
|
|||
// We are removing dups
|
||||
if (path.payload == -1) {
|
||||
// This path didn't yet see the complete surface form; let's see if it just did with the arc output we just added:
|
||||
BytesRef arcOutput = path.arc.output.output2;
|
||||
BytesRef arcOutput = path.arc.output().output2;
|
||||
BytesRef output = path.output.output2;
|
||||
for(int i=0;i<arcOutput.length;i++) {
|
||||
if (arcOutput.bytes[arcOutput.offset + i] == payloadSep) {
|
||||
|
|
|
@ -180,9 +180,9 @@ public class FSTCompletion {
|
|||
// Descend into the automaton using the key as prefix.
|
||||
if (descendWithPrefix(arc, utf8)) {
|
||||
automaton.readFirstTargetArc(arc, arc, fstReader);
|
||||
if (arc.label == FST.END_LABEL) {
|
||||
if (arc.label() == FST.END_LABEL) {
|
||||
// Normalize prefix-encoded weight.
|
||||
return rootArc.label;
|
||||
return rootArc.label();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -273,7 +273,7 @@ public class FSTCompletion {
|
|||
// of the key prefix. The arc we're at is the last key's byte,
|
||||
// so we will collect it too.
|
||||
output.length = key.length - 1;
|
||||
if (collect(res, num, rootArc.label, output, arc) && !collectAll) {
|
||||
if (collect(res, num, rootArc.label(), output, arc) && !collectAll) {
|
||||
// We have enough suggestions to return immediately. Keep on looking
|
||||
// for an
|
||||
// exact match, if requested.
|
||||
|
@ -360,11 +360,11 @@ public class FSTCompletion {
|
|||
output.bytes = ArrayUtil.grow(output.bytes);
|
||||
}
|
||||
assert output.offset == 0;
|
||||
output.bytes[output.length++] = (byte) arc.label;
|
||||
output.bytes[output.length++] = (byte) arc.label();
|
||||
FST.BytesReader fstReader = automaton.getBytesReader();
|
||||
automaton.readFirstTargetArc(arc, arc, fstReader);
|
||||
while (true) {
|
||||
if (arc.label == FST.END_LABEL) {
|
||||
if (arc.label() == FST.END_LABEL) {
|
||||
res.add(new Completion(output, bucket));
|
||||
if (res.size() >= num) return true;
|
||||
} else {
|
||||
|
|
|
@ -186,7 +186,7 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
|
|||
CharsRefBuilder spare = new CharsRefBuilder();
|
||||
if (exactFirst && arc.isFinal()) {
|
||||
spare.copyUTF8Bytes(scratch.get());
|
||||
results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
|
||||
results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput())));
|
||||
if (--num == 0) {
|
||||
return results; // that was quick
|
||||
}
|
||||
|
@ -227,7 +227,7 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
|
|||
if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) {
|
||||
return null;
|
||||
} else {
|
||||
output += arc.output.longValue();
|
||||
output += arc.output().longValue();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -250,7 +250,7 @@ public class WFSTCompletionLookup extends Lookup implements Accountable {
|
|||
if (result == null || !arc.isFinal()) {
|
||||
return null;
|
||||
} else {
|
||||
return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput));
|
||||
return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -220,7 +220,7 @@ public class FSTTester<T> {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
}
|
||||
|
||||
if (prefixLength != null) {
|
||||
|
@ -253,14 +253,14 @@ public class FSTTester<T> {
|
|||
arcs.clear();
|
||||
|
||||
// accumulate output
|
||||
output = fst.outputs.add(output, arc.output);
|
||||
output = fst.outputs.add(output, arc.output());
|
||||
|
||||
// append label
|
||||
if (arc.label == FST.END_LABEL) {
|
||||
if (arc.label() == FST.END_LABEL) {
|
||||
break;
|
||||
}
|
||||
|
||||
in.append(arc.label);
|
||||
in.append(arc.label());
|
||||
}
|
||||
|
||||
return output;
|
||||
|
|
Loading…
Reference in New Issue