mirror of https://github.com/apache/lucene.git
Use Arrays.compareUnsigned instead of loop compare suffix.
This commit is contained in:
parent
4d3d219146
commit
2cbf5ff761
|
@ -184,7 +184,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||||
// term.length + " vs prefix=" + f.prefix);
|
// term.length + " vs prefix=" + f.prefix);
|
||||||
if (f.prefix > targetBeforeCurrentLength) {
|
if (f.prefixLength > targetBeforeCurrentLength) {
|
||||||
// System.out.println(" do rewind!");
|
// System.out.println(" do rewind!");
|
||||||
f.rewind();
|
f.rewind();
|
||||||
} else {
|
} else {
|
||||||
|
@ -192,11 +192,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// System.out.println(" skip rewind!");
|
// System.out.println(" skip rewind!");
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
assert length == f.prefix;
|
assert length == f.prefixLength;
|
||||||
assert termOrd == f.termOrdOrig;
|
assert termOrd == f.termOrdOrig;
|
||||||
} else {
|
} else {
|
||||||
f.nextEnt = -1;
|
f.nextEnt = -1;
|
||||||
f.prefix = length;
|
f.prefixLength = length;
|
||||||
f.state.termBlockOrd = 0;
|
f.state.termBlockOrd = 0;
|
||||||
f.termOrdOrig = termOrd;
|
f.termOrdOrig = termOrd;
|
||||||
// System.out.println("set termOrdOrig=" + termOrd);
|
// System.out.println("set termOrdOrig=" + termOrd);
|
||||||
|
@ -412,7 +412,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// toHex(targetLabel));
|
// toHex(targetLabel));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
validIndexPrefix = currentFrame.prefix;
|
validIndexPrefix = currentFrame.prefixLength;
|
||||||
// validIndexPrefix = targetUpto;
|
// validIndexPrefix = targetUpto;
|
||||||
|
|
||||||
currentFrame.scanToFloorFrame(target);
|
currentFrame.scanToFloorFrame(target);
|
||||||
|
@ -472,7 +472,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// validIndexPrefix = targetUpto;
|
// validIndexPrefix = targetUpto;
|
||||||
validIndexPrefix = currentFrame.prefix;
|
validIndexPrefix = currentFrame.prefixLength;
|
||||||
|
|
||||||
currentFrame.scanToFloorFrame(target);
|
currentFrame.scanToFloorFrame(target);
|
||||||
|
|
||||||
|
@ -686,7 +686,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// toHex(targetLabel));
|
// toHex(targetLabel));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
validIndexPrefix = currentFrame.prefix;
|
validIndexPrefix = currentFrame.prefixLength;
|
||||||
// validIndexPrefix = targetUpto;
|
// validIndexPrefix = targetUpto;
|
||||||
|
|
||||||
currentFrame.scanToFloorFrame(target);
|
currentFrame.scanToFloorFrame(target);
|
||||||
|
@ -747,7 +747,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// validIndexPrefix = targetUpto;
|
// validIndexPrefix = targetUpto;
|
||||||
validIndexPrefix = currentFrame.prefix;
|
validIndexPrefix = currentFrame.prefixLength;
|
||||||
|
|
||||||
currentFrame.scanToFloorFrame(target);
|
currentFrame.scanToFloorFrame(target);
|
||||||
|
|
||||||
|
@ -785,7 +785,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
while (true) {
|
while (true) {
|
||||||
OrdsSegmentTermsEnumFrame f = getFrame(ord);
|
OrdsSegmentTermsEnumFrame f = getFrame(ord);
|
||||||
assert f != null;
|
assert f != null;
|
||||||
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
|
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefixLength);
|
||||||
if (f.nextEnt == -1) {
|
if (f.nextEnt == -1) {
|
||||||
out.println(
|
out.println(
|
||||||
" frame "
|
" frame "
|
||||||
|
@ -796,7 +796,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
+ f.fp
|
+ f.fp
|
||||||
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
|
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
|
||||||
+ " prefixLen="
|
+ " prefixLen="
|
||||||
+ f.prefix
|
+ f.prefixLength
|
||||||
+ " prefix="
|
+ " prefix="
|
||||||
+ ToStringUtils.bytesRefToString(prefix)
|
+ ToStringUtils.bytesRefToString(prefix)
|
||||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||||
|
@ -826,7 +826,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
+ f.fp
|
+ f.fp
|
||||||
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
|
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
|
||||||
+ " prefixLen="
|
+ " prefixLen="
|
||||||
+ f.prefix
|
+ f.prefixLength
|
||||||
+ " prefix="
|
+ " prefix="
|
||||||
+ ToStringUtils.bytesRefToString(prefix)
|
+ ToStringUtils.bytesRefToString(prefix)
|
||||||
+ " nextEnt="
|
+ " nextEnt="
|
||||||
|
@ -853,12 +853,14 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
if (fr.index != null) {
|
if (fr.index != null) {
|
||||||
assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
|
assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
|
||||||
if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) {
|
if (f.prefixLength > 0
|
||||||
|
&& isSeekFrame
|
||||||
|
&& f.arc.label() != (term.byteAt(f.prefixLength - 1) & 0xFF)) {
|
||||||
out.println(
|
out.println(
|
||||||
" broken seek state: arc.label="
|
" broken seek state: arc.label="
|
||||||
+ (char) f.arc.label()
|
+ (char) f.arc.label()
|
||||||
+ " vs term byte="
|
+ " vs term byte="
|
||||||
+ (char) (term.byteAt(f.prefix - 1) & 0xFF));
|
+ (char) (term.byteAt(f.prefixLength - 1) & 0xFF));
|
||||||
throw new RuntimeException("seek state is broken");
|
throw new RuntimeException("seek state is broken");
|
||||||
}
|
}
|
||||||
Output output = Util.get(fr.index, prefix);
|
Output output = Util.get(fr.index, prefix);
|
||||||
|
@ -887,7 +889,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
if (f == currentFrame) {
|
if (f == currentFrame) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (f.prefix == validIndexPrefix) {
|
if (f.prefixLength == validIndexPrefix) {
|
||||||
isSeekFrame = false;
|
isSeekFrame = false;
|
||||||
}
|
}
|
||||||
ord++;
|
ord++;
|
||||||
|
@ -969,7 +971,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// Note that the seek state (last seek) has been
|
// Note that the seek state (last seek) has been
|
||||||
// invalidated beyond this depth
|
// invalidated beyond this depth
|
||||||
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
|
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefixLength);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
|
// System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
|
||||||
// }
|
// }
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.codecs.blocktreeords;
|
package org.apache.lucene.codecs.blocktreeords;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.codecs.BlockTermState;
|
import org.apache.lucene.codecs.BlockTermState;
|
||||||
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
|
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
@ -54,7 +55,7 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
|
final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
|
||||||
|
|
||||||
// Length of prefix shared by all terms in this block
|
// Length of prefix shared by all terms in this block
|
||||||
int prefix;
|
int prefixLength;
|
||||||
|
|
||||||
// Number of entries (term or sub-block) in this block
|
// Number of entries (term or sub-block) in this block
|
||||||
int entCount;
|
int entCount;
|
||||||
|
@ -295,11 +296,11 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
|
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd;
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
termOrd++;
|
termOrd++;
|
||||||
suffix = suffixesReader.readVInt();
|
suffixLength = suffixesReader.readVInt();
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
ste.term.setLength(prefix + suffix);
|
ste.term.setLength(prefixLength + suffixLength);
|
||||||
ste.term.grow(ste.term.length());
|
ste.term.grow(ste.term.length());
|
||||||
suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
|
suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength);
|
||||||
// A normal term
|
// A normal term
|
||||||
ste.termExists = true;
|
ste.termExists = true;
|
||||||
return false;
|
return false;
|
||||||
|
@ -312,11 +313,11 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
final int code = suffixesReader.readVInt();
|
final int code = suffixesReader.readVInt();
|
||||||
suffix = code >>> 1;
|
suffixLength = code >>> 1;
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
ste.term.setLength(prefix + suffix);
|
ste.term.setLength(prefixLength + suffixLength);
|
||||||
ste.term.grow(ste.term.length());
|
ste.term.grow(ste.term.length());
|
||||||
suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
|
suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength);
|
||||||
if ((code & 1) == 0) {
|
if ((code & 1) == 0) {
|
||||||
// A normal term
|
// A normal term
|
||||||
ste.termExists = true;
|
ste.termExists = true;
|
||||||
|
@ -342,7 +343,7 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
// floor blocks we "typically" get
|
// floor blocks we "typically" get
|
||||||
public void scanToFloorFrame(BytesRef target) {
|
public void scanToFloorFrame(BytesRef target) {
|
||||||
|
|
||||||
if (!isFloor || target.length <= prefix) {
|
if (!isFloor || target.length <= prefixLength) {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" +
|
// System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" +
|
||||||
// target.length + " vs prefix=" + prefix);
|
// target.length + " vs prefix=" + prefix);
|
||||||
|
@ -350,7 +351,7 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int targetLabel = target.bytes[target.offset + prefix] & 0xFF;
|
final int targetLabel = target.bytes[target.offset + prefixLength] & 0xFF;
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char)
|
// System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char)
|
||||||
|
@ -532,7 +533,7 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
|
|
||||||
// Used only by assert
|
// Used only by assert
|
||||||
private boolean prefixMatches(BytesRef target) {
|
private boolean prefixMatches(BytesRef target) {
|
||||||
for (int bytePos = 0; bytePos < prefix; bytePos++) {
|
for (int bytePos = 0; bytePos < prefixLength; bytePos++) {
|
||||||
if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) {
|
if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -586,7 +587,7 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
}
|
}
|
||||||
|
|
||||||
private int startBytePos;
|
private int startBytePos;
|
||||||
private int suffix;
|
private int suffixLength;
|
||||||
private long subCode;
|
private long subCode;
|
||||||
|
|
||||||
// Target's prefix matches this block's prefix; we
|
// Target's prefix matches this block's prefix; we
|
||||||
|
@ -613,13 +614,11 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
assert prefixMatches(target);
|
assert prefixMatches(target);
|
||||||
|
|
||||||
// Loop over each entry (term or sub-block) in this block:
|
// Loop over each entry (term or sub-block) in this block:
|
||||||
// nextTerm: while(nextEnt < entCount) {
|
do {
|
||||||
nextTerm:
|
|
||||||
while (true) {
|
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
termOrd++;
|
termOrd++;
|
||||||
|
|
||||||
suffix = suffixesReader.readVInt();
|
suffixLength = suffixesReader.readVInt();
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// BytesRef suffixBytesRef = new BytesRef();
|
// BytesRef suffixBytesRef = new BytesRef();
|
||||||
|
@ -630,63 +629,41 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
final int termLen = prefix + suffix;
|
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
suffixesReader.skipBytes(suffix);
|
suffixesReader.skipBytes(suffixLength);
|
||||||
|
|
||||||
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
|
// Compare suffix and target.
|
||||||
int targetPos = target.offset + prefix;
|
final int cmp =
|
||||||
|
Arrays.compareUnsigned(
|
||||||
// Loop over bytes in the suffix, comparing to
|
suffixBytes,
|
||||||
// the target
|
startBytePos,
|
||||||
int bytePos = startBytePos;
|
startBytePos + suffixLength,
|
||||||
while (true) {
|
target.bytes,
|
||||||
final int cmp;
|
target.offset + prefixLength,
|
||||||
final boolean stop;
|
target.offset + target.length);
|
||||||
if (targetPos < targetLimit) {
|
|
||||||
cmp = (suffixBytes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF);
|
|
||||||
stop = false;
|
|
||||||
} else {
|
|
||||||
assert targetPos == targetLimit;
|
|
||||||
cmp = termLen - target.length;
|
|
||||||
stop = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
// Current entry is still before the target;
|
// Current entry is still before the target;
|
||||||
// keep scanning
|
// keep scanning
|
||||||
|
|
||||||
if (nextEnt == entCount) {
|
|
||||||
if (exactOnly) {
|
|
||||||
fillTerm();
|
|
||||||
}
|
|
||||||
// We are done scanning this block
|
|
||||||
break nextTerm;
|
|
||||||
} else {
|
|
||||||
continue nextTerm;
|
|
||||||
}
|
|
||||||
} else if (cmp > 0) {
|
} else if (cmp > 0) {
|
||||||
|
|
||||||
// Done! Current entry is after target --
|
// Done! Current entry is after target --
|
||||||
// return NOT_FOUND:
|
// return NOT_FOUND:
|
||||||
fillTerm();
|
fillTerm();
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" not found");
|
// if (DEBUG) System.out.println(" not found");
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else if (stop) {
|
} else {
|
||||||
// Exact match!
|
// Exact match!
|
||||||
|
|
||||||
// This cannot be a sub-block because we
|
// This cannot be a sub-block because we
|
||||||
// would have followed the index to this
|
// would have followed the index to this
|
||||||
// sub-block from the start:
|
// sub-block from the start:
|
||||||
|
|
||||||
assert ste.termExists;
|
|
||||||
fillTerm();
|
fillTerm();
|
||||||
// if (DEBUG) System.out.println(" found!");
|
// if (DEBUG) System.out.println(" found!");
|
||||||
return SeekStatus.FOUND;
|
return SeekStatus.FOUND;
|
||||||
}
|
}
|
||||||
}
|
} while (nextEnt < entCount);
|
||||||
}
|
|
||||||
|
|
||||||
// It is possible (and OK) that terms index pointed us
|
// It is possible (and OK) that terms index pointed us
|
||||||
// at this block, but, we scanned the entire block and
|
// at this block, but, we scanned the entire block and
|
||||||
|
@ -730,13 +707,11 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
assert prefixMatches(target);
|
assert prefixMatches(target);
|
||||||
|
|
||||||
// Loop over each entry (term or sub-block) in this block:
|
// Loop over each entry (term or sub-block) in this block:
|
||||||
// nextTerm: while(nextEnt < entCount) {
|
while (nextEnt < entCount) {
|
||||||
nextTerm:
|
|
||||||
while (true) {
|
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
|
|
||||||
final int code = suffixesReader.readVInt();
|
final int code = suffixesReader.readVInt();
|
||||||
suffix = code >>> 1;
|
suffixLength = code >>> 1;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// BytesRef suffixBytesRef = new BytesRef();
|
// BytesRef suffixBytesRef = new BytesRef();
|
||||||
// suffixBytesRef.bytes = suffixBytes;
|
// suffixBytesRef.bytes = suffixBytes;
|
||||||
|
@ -748,9 +723,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
// }
|
// }
|
||||||
|
|
||||||
ste.termExists = (code & 1) == 0;
|
ste.termExists = (code & 1) == 0;
|
||||||
final int termLen = prefix + suffix;
|
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
suffixesReader.skipBytes(suffix);
|
suffixesReader.skipBytes(suffixLength);
|
||||||
// Must save ord before we skip over a sub-block in case we push, below:
|
// Must save ord before we skip over a sub-block in case we push, below:
|
||||||
long prevTermOrd = termOrd;
|
long prevTermOrd = termOrd;
|
||||||
if (ste.termExists) {
|
if (ste.termExists) {
|
||||||
|
@ -763,40 +737,20 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
lastSubFP = fp - subCode;
|
lastSubFP = fp - subCode;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
|
// Compare suffix and target.
|
||||||
int targetPos = target.offset + prefix;
|
final int cmp =
|
||||||
|
Arrays.compareUnsigned(
|
||||||
// Loop over bytes in the suffix, comparing to
|
suffixBytes,
|
||||||
// the target
|
startBytePos,
|
||||||
int bytePos = startBytePos;
|
startBytePos + suffixLength,
|
||||||
while (true) {
|
target.bytes,
|
||||||
final int cmp;
|
target.offset + prefixLength,
|
||||||
final boolean stop;
|
target.offset + target.length);
|
||||||
if (targetPos < targetLimit) {
|
|
||||||
cmp = (suffixBytes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF);
|
|
||||||
stop = false;
|
|
||||||
} else {
|
|
||||||
assert targetPos == targetLimit;
|
|
||||||
cmp = termLen - target.length;
|
|
||||||
stop = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
// Current entry is still before the target;
|
// Current entry is still before the target;
|
||||||
// keep scanning
|
// keep scanning
|
||||||
|
|
||||||
if (nextEnt == entCount) {
|
|
||||||
if (exactOnly) {
|
|
||||||
fillTerm();
|
|
||||||
// termExists = true;
|
|
||||||
}
|
|
||||||
// We are done scanning this block
|
|
||||||
break nextTerm;
|
|
||||||
} else {
|
|
||||||
continue nextTerm;
|
|
||||||
}
|
|
||||||
} else if (cmp > 0) {
|
} else if (cmp > 0) {
|
||||||
|
|
||||||
// Done! Current entry is after target --
|
// Done! Current entry is after target --
|
||||||
// return NOT_FOUND:
|
// return NOT_FOUND:
|
||||||
fillTerm();
|
fillTerm();
|
||||||
|
@ -807,7 +761,8 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
// the target, so we must recurse into the
|
// the target, so we must recurse into the
|
||||||
// sub-frame(s):
|
// sub-frame(s):
|
||||||
ste.currentFrame =
|
ste.currentFrame =
|
||||||
ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen, prevTermOrd);
|
ste.pushFrame(
|
||||||
|
null, ste.currentFrame.lastSubFP, prefixLength + suffixLength, prevTermOrd);
|
||||||
ste.currentFrame.loadBlock();
|
ste.currentFrame.loadBlock();
|
||||||
while (ste.currentFrame.next()) {
|
while (ste.currentFrame.next()) {
|
||||||
ste.currentFrame =
|
ste.currentFrame =
|
||||||
|
@ -818,7 +773,7 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" not found");
|
// if (DEBUG) System.out.println(" not found");
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else if (stop) {
|
} else {
|
||||||
// Exact match!
|
// Exact match!
|
||||||
|
|
||||||
// This cannot be a sub-block because we
|
// This cannot be a sub-block because we
|
||||||
|
@ -831,7 +786,6 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
return SeekStatus.FOUND;
|
return SeekStatus.FOUND;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// It is possible (and OK) that terms index pointed us
|
// It is possible (and OK) that terms index pointed us
|
||||||
// at this block, but, we scanned the entire block and
|
// at this block, but, we scanned the entire block and
|
||||||
|
@ -854,9 +808,9 @@ final class OrdsSegmentTermsEnumFrame {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fillTerm() {
|
private void fillTerm() {
|
||||||
final int termLength = prefix + suffix;
|
final int termLength = prefixLength + suffixLength;
|
||||||
ste.term.setLength(prefix + suffix);
|
ste.term.setLength(prefixLength + suffixLength);
|
||||||
ste.term.grow(termLength);
|
ste.term.grow(termLength);
|
||||||
System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefix, suffix);
|
System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefixLength, suffixLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -181,17 +181,17 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
// " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
|
||||||
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
// f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
|
||||||
// term.length + " vs prefix=" + f.prefix);
|
// term.length + " vs prefix=" + f.prefix);
|
||||||
if (f.prefix > targetBeforeCurrentLength) {
|
if (f.prefixLength > targetBeforeCurrentLength) {
|
||||||
f.rewind();
|
f.rewind();
|
||||||
} else {
|
} else {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" skip rewind!");
|
// System.out.println(" skip rewind!");
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
assert length == f.prefix;
|
assert length == f.prefixLength;
|
||||||
} else {
|
} else {
|
||||||
f.nextEnt = -1;
|
f.nextEnt = -1;
|
||||||
f.prefix = length;
|
f.prefixLength = length;
|
||||||
f.state.termBlockOrd = 0;
|
f.state.termBlockOrd = 0;
|
||||||
f.fpOrig = f.fp = fp;
|
f.fpOrig = f.fp = fp;
|
||||||
f.lastSubFP = -1;
|
f.lastSubFP = -1;
|
||||||
|
@ -459,7 +459,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// Integer.toHexString(targetLabel) + " termExists=" + termExists);
|
// Integer.toHexString(targetLabel) + " termExists=" + termExists);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
validIndexPrefix = currentFrame.prefix;
|
validIndexPrefix = currentFrame.prefixLength;
|
||||||
// validIndexPrefix = targetUpto;
|
// validIndexPrefix = targetUpto;
|
||||||
|
|
||||||
currentFrame.scanToFloorFrame(target);
|
currentFrame.scanToFloorFrame(target);
|
||||||
|
@ -573,7 +573,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// validIndexPrefix = targetUpto;
|
// validIndexPrefix = targetUpto;
|
||||||
validIndexPrefix = currentFrame.prefix;
|
validIndexPrefix = currentFrame.prefixLength;
|
||||||
|
|
||||||
currentFrame.scanToFloorFrame(target);
|
currentFrame.scanToFloorFrame(target);
|
||||||
|
|
||||||
|
@ -802,7 +802,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
// toHex(targetLabel));
|
// toHex(targetLabel));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
validIndexPrefix = currentFrame.prefix;
|
validIndexPrefix = currentFrame.prefixLength;
|
||||||
// validIndexPrefix = targetUpto;
|
// validIndexPrefix = targetUpto;
|
||||||
|
|
||||||
currentFrame.scanToFloorFrame(target);
|
currentFrame.scanToFloorFrame(target);
|
||||||
|
@ -863,7 +863,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
// validIndexPrefix = targetUpto;
|
// validIndexPrefix = targetUpto;
|
||||||
validIndexPrefix = currentFrame.prefix;
|
validIndexPrefix = currentFrame.prefixLength;
|
||||||
|
|
||||||
currentFrame.scanToFloorFrame(target);
|
currentFrame.scanToFloorFrame(target);
|
||||||
|
|
||||||
|
@ -901,7 +901,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
while (true) {
|
while (true) {
|
||||||
IDVersionSegmentTermsEnumFrame f = getFrame(ord);
|
IDVersionSegmentTermsEnumFrame f = getFrame(ord);
|
||||||
assert f != null;
|
assert f != null;
|
||||||
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
|
final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefixLength);
|
||||||
if (f.nextEnt == -1) {
|
if (f.nextEnt == -1) {
|
||||||
out.println(
|
out.println(
|
||||||
" frame "
|
" frame "
|
||||||
|
@ -912,7 +912,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
+ f.fp
|
+ f.fp
|
||||||
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
|
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
|
||||||
+ " prefixLen="
|
+ " prefixLen="
|
||||||
+ f.prefix
|
+ f.prefixLength
|
||||||
+ " prefix="
|
+ " prefix="
|
||||||
+ ToStringUtils.bytesRefToString(prefix)
|
+ ToStringUtils.bytesRefToString(prefix)
|
||||||
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
+ (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
|
||||||
|
@ -940,7 +940,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
+ f.fp
|
+ f.fp
|
||||||
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
|
+ (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
|
||||||
+ " prefixLen="
|
+ " prefixLen="
|
||||||
+ f.prefix
|
+ f.prefixLength
|
||||||
+ " prefix="
|
+ " prefix="
|
||||||
+ ToStringUtils.bytesRefToString(prefix)
|
+ ToStringUtils.bytesRefToString(prefix)
|
||||||
+ " nextEnt="
|
+ " nextEnt="
|
||||||
|
@ -965,12 +965,14 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
}
|
}
|
||||||
if (fr.index != null) {
|
if (fr.index != null) {
|
||||||
assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
|
assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
|
||||||
if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) {
|
if (f.prefixLength > 0
|
||||||
|
&& isSeekFrame
|
||||||
|
&& f.arc.label() != (term.byteAt(f.prefixLength - 1) & 0xFF)) {
|
||||||
out.println(
|
out.println(
|
||||||
" broken seek state: arc.label="
|
" broken seek state: arc.label="
|
||||||
+ (char) f.arc.label()
|
+ (char) f.arc.label()
|
||||||
+ " vs term byte="
|
+ " vs term byte="
|
||||||
+ (char) (term.byteAt(f.prefix - 1) & 0xFF));
|
+ (char) (term.byteAt(f.prefixLength - 1) & 0xFF));
|
||||||
throw new RuntimeException("seek state is broken");
|
throw new RuntimeException("seek state is broken");
|
||||||
}
|
}
|
||||||
Pair<BytesRef, Long> output = Util.get(fr.index, prefix);
|
Pair<BytesRef, Long> output = Util.get(fr.index, prefix);
|
||||||
|
@ -999,7 +1001,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
if (f == currentFrame) {
|
if (f == currentFrame) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (f.prefix == validIndexPrefix) {
|
if (f.prefixLength == validIndexPrefix) {
|
||||||
isSeekFrame = false;
|
isSeekFrame = false;
|
||||||
}
|
}
|
||||||
ord++;
|
ord++;
|
||||||
|
@ -1079,7 +1081,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {
|
||||||
|
|
||||||
// Note that the seek state (last seek) has been
|
// Note that the seek state (last seek) has been
|
||||||
// invalidated beyond this depth
|
// invalidated beyond this depth
|
||||||
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
|
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefixLength);
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
|
// System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
|
||||||
// }
|
// }
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.sandbox.codecs.idversion;
|
package org.apache.lucene.sandbox.codecs.idversion;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.codecs.BlockTermState;
|
import org.apache.lucene.codecs.BlockTermState;
|
||||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
|
@ -52,7 +53,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
|
final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
|
||||||
|
|
||||||
// Length of prefix shared by all terms in this block
|
// Length of prefix shared by all terms in this block
|
||||||
int prefix;
|
int prefixLength;
|
||||||
|
|
||||||
// Number of entries (term or sub-block) in this block
|
// Number of entries (term or sub-block) in this block
|
||||||
int entCount;
|
int entCount;
|
||||||
|
@ -262,11 +263,11 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
assert nextEnt != -1 && nextEnt < entCount
|
assert nextEnt != -1 && nextEnt < entCount
|
||||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
suffix = suffixesReader.readVInt();
|
suffixLength = suffixesReader.readVInt();
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
ste.term.setLength(prefix + suffix);
|
ste.term.setLength(prefixLength + suffixLength);
|
||||||
ste.term.grow(ste.term.length());
|
ste.term.grow(ste.term.length());
|
||||||
suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
|
suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength);
|
||||||
// A normal term
|
// A normal term
|
||||||
ste.termExists = true;
|
ste.termExists = true;
|
||||||
return false;
|
return false;
|
||||||
|
@ -279,11 +280,11 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
final int code = suffixesReader.readVInt();
|
final int code = suffixesReader.readVInt();
|
||||||
suffix = code >>> 1;
|
suffixLength = code >>> 1;
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
ste.term.setLength(prefix + suffix);
|
ste.term.setLength(prefixLength + suffixLength);
|
||||||
ste.term.grow(ste.term.length());
|
ste.term.grow(ste.term.length());
|
||||||
suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
|
suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength);
|
||||||
if ((code & 1) == 0) {
|
if ((code & 1) == 0) {
|
||||||
// A normal term
|
// A normal term
|
||||||
ste.termExists = true;
|
ste.termExists = true;
|
||||||
|
@ -307,7 +308,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
// floor blocks we "typically" get
|
// floor blocks we "typically" get
|
||||||
public void scanToFloorFrame(BytesRef target) {
|
public void scanToFloorFrame(BytesRef target) {
|
||||||
|
|
||||||
if (!isFloor || target.length <= prefix) {
|
if (!isFloor || target.length <= prefixLength) {
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" +
|
// System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" +
|
||||||
// target.length + " vs prefix=" + prefix);
|
// target.length + " vs prefix=" + prefix);
|
||||||
|
@ -315,7 +316,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int targetLabel = target.bytes[target.offset + prefix] & 0xFF;
|
final int targetLabel = target.bytes[target.offset + prefixLength] & 0xFF;
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char)
|
// System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char)
|
||||||
|
@ -415,7 +416,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
|
|
||||||
// Used only by assert
|
// Used only by assert
|
||||||
private boolean prefixMatches(BytesRef target) {
|
private boolean prefixMatches(BytesRef target) {
|
||||||
for (int bytePos = 0; bytePos < prefix; bytePos++) {
|
for (int bytePos = 0; bytePos < prefixLength; bytePos++) {
|
||||||
if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) {
|
if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -466,7 +467,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
}
|
}
|
||||||
|
|
||||||
private int startBytePos;
|
private int startBytePos;
|
||||||
private int suffix;
|
private int suffixLength;
|
||||||
private long subCode;
|
private long subCode;
|
||||||
|
|
||||||
// Target's prefix matches this block's prefix; we
|
// Target's prefix matches this block's prefix; we
|
||||||
|
@ -493,12 +494,10 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
assert prefixMatches(target);
|
assert prefixMatches(target);
|
||||||
|
|
||||||
// Loop over each entry (term or sub-block) in this block:
|
// Loop over each entry (term or sub-block) in this block:
|
||||||
// nextTerm: while(nextEnt < entCount) {
|
do {
|
||||||
nextTerm:
|
|
||||||
while (true) {
|
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
|
|
||||||
suffix = suffixesReader.readVInt();
|
suffixLength = suffixesReader.readVInt();
|
||||||
|
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// BytesRef suffixBytesRef = new BytesRef();
|
// BytesRef suffixBytesRef = new BytesRef();
|
||||||
|
@ -509,76 +508,41 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
// + ToStringUtils.bytesRefToString(suffixBytesRef));
|
||||||
// }
|
// }
|
||||||
|
|
||||||
final int termLen = prefix + suffix;
|
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
suffixesReader.skipBytes(suffix);
|
suffixesReader.skipBytes(suffixLength);
|
||||||
|
|
||||||
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
|
// Compare suffix and target.
|
||||||
int targetPos = target.offset + prefix;
|
final int cmp =
|
||||||
|
Arrays.compareUnsigned(
|
||||||
// Loop over bytes in the suffix, comparing to
|
suffixBytes,
|
||||||
// the target
|
startBytePos,
|
||||||
int bytePos = startBytePos;
|
startBytePos + suffixLength,
|
||||||
while (true) {
|
target.bytes,
|
||||||
final int cmp;
|
target.offset + prefixLength,
|
||||||
final boolean stop;
|
target.offset + target.length);
|
||||||
if (targetPos < targetLimit) {
|
|
||||||
cmp = (suffixBytes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF);
|
|
||||||
stop = false;
|
|
||||||
} else {
|
|
||||||
assert targetPos == targetLimit;
|
|
||||||
cmp = termLen - target.length;
|
|
||||||
stop = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
// Current entry is still before the target;
|
// Current entry is still before the target;
|
||||||
// keep scanning
|
// keep scanning
|
||||||
|
|
||||||
if (nextEnt == entCount) {
|
|
||||||
if (exactOnly) {
|
|
||||||
fillTerm();
|
|
||||||
}
|
|
||||||
// We are done scanning this block
|
|
||||||
break nextTerm;
|
|
||||||
} else {
|
|
||||||
continue nextTerm;
|
|
||||||
}
|
|
||||||
} else if (cmp > 0) {
|
} else if (cmp > 0) {
|
||||||
|
|
||||||
// Done! Current entry is after target --
|
// Done! Current entry is after target --
|
||||||
// return NOT_FOUND:
|
// return NOT_FOUND:
|
||||||
fillTerm();
|
fillTerm();
|
||||||
|
|
||||||
if (!exactOnly && !ste.termExists) {
|
|
||||||
// We are on a sub-block, and caller wants
|
|
||||||
// us to position to the next term after
|
|
||||||
// the target, so we must recurse into the
|
|
||||||
// sub-frame(s):
|
|
||||||
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen);
|
|
||||||
ste.currentFrame.loadBlock();
|
|
||||||
while (ste.currentFrame.next()) {
|
|
||||||
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length());
|
|
||||||
ste.currentFrame.loadBlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" not found");
|
// if (DEBUG) System.out.println(" not found");
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else if (stop) {
|
} else {
|
||||||
// Exact match!
|
// Exact match!
|
||||||
|
|
||||||
// This cannot be a sub-block because we
|
// This cannot be a sub-block because we
|
||||||
// would have followed the index to this
|
// would have followed the index to this
|
||||||
// sub-block from the start:
|
// sub-block from the start:
|
||||||
|
|
||||||
assert ste.termExists;
|
|
||||||
fillTerm();
|
fillTerm();
|
||||||
// if (DEBUG) System.out.println(" found!");
|
// if (DEBUG) System.out.println(" found!");
|
||||||
return SeekStatus.FOUND;
|
return SeekStatus.FOUND;
|
||||||
}
|
}
|
||||||
}
|
} while (nextEnt < entCount);
|
||||||
}
|
|
||||||
|
|
||||||
// It is possible (and OK) that terms index pointed us
|
// It is possible (and OK) that terms index pointed us
|
||||||
// at this block, but, we scanned the entire block and
|
// at this block, but, we scanned the entire block and
|
||||||
|
@ -622,13 +586,11 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
assert prefixMatches(target);
|
assert prefixMatches(target);
|
||||||
|
|
||||||
// Loop over each entry (term or sub-block) in this block:
|
// Loop over each entry (term or sub-block) in this block:
|
||||||
// nextTerm: while(nextEnt < entCount) {
|
while (nextEnt < entCount) {
|
||||||
nextTerm:
|
|
||||||
while (true) {
|
|
||||||
nextEnt++;
|
nextEnt++;
|
||||||
|
|
||||||
final int code = suffixesReader.readVInt();
|
final int code = suffixesReader.readVInt();
|
||||||
suffix = code >>> 1;
|
suffixLength = code >>> 1;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// BytesRef suffixBytesRef = new BytesRef();
|
// BytesRef suffixBytesRef = new BytesRef();
|
||||||
// suffixBytesRef.bytes = suffixBytes;
|
// suffixBytesRef.bytes = suffixBytes;
|
||||||
|
@ -640,9 +602,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
// }
|
// }
|
||||||
|
|
||||||
ste.termExists = (code & 1) == 0;
|
ste.termExists = (code & 1) == 0;
|
||||||
final int termLen = prefix + suffix;
|
|
||||||
startBytePos = suffixesReader.getPosition();
|
startBytePos = suffixesReader.getPosition();
|
||||||
suffixesReader.skipBytes(suffix);
|
suffixesReader.skipBytes(suffixLength);
|
||||||
if (ste.termExists) {
|
if (ste.termExists) {
|
||||||
state.termBlockOrd++;
|
state.termBlockOrd++;
|
||||||
subCode = 0;
|
subCode = 0;
|
||||||
|
@ -651,40 +612,20 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
lastSubFP = fp - subCode;
|
lastSubFP = fp - subCode;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
|
// Compare suffix and target.
|
||||||
int targetPos = target.offset + prefix;
|
final int cmp =
|
||||||
|
Arrays.compareUnsigned(
|
||||||
// Loop over bytes in the suffix, comparing to
|
suffixBytes,
|
||||||
// the target
|
startBytePos,
|
||||||
int bytePos = startBytePos;
|
startBytePos + suffixLength,
|
||||||
while (true) {
|
target.bytes,
|
||||||
final int cmp;
|
target.offset + prefixLength,
|
||||||
final boolean stop;
|
target.offset + target.length);
|
||||||
if (targetPos < targetLimit) {
|
|
||||||
cmp = (suffixBytes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF);
|
|
||||||
stop = false;
|
|
||||||
} else {
|
|
||||||
assert targetPos == targetLimit;
|
|
||||||
cmp = termLen - target.length;
|
|
||||||
stop = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
// Current entry is still before the target;
|
// Current entry is still before the target;
|
||||||
// keep scanning
|
// keep scanning
|
||||||
|
|
||||||
if (nextEnt == entCount) {
|
|
||||||
if (exactOnly) {
|
|
||||||
fillTerm();
|
|
||||||
// termExists = true;
|
|
||||||
}
|
|
||||||
// We are done scanning this block
|
|
||||||
break nextTerm;
|
|
||||||
} else {
|
|
||||||
continue nextTerm;
|
|
||||||
}
|
|
||||||
} else if (cmp > 0) {
|
} else if (cmp > 0) {
|
||||||
|
|
||||||
// Done! Current entry is after target --
|
// Done! Current entry is after target --
|
||||||
// return NOT_FOUND:
|
// return NOT_FOUND:
|
||||||
fillTerm();
|
fillTerm();
|
||||||
|
@ -694,7 +635,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
// us to position to the next term after
|
// us to position to the next term after
|
||||||
// the target, so we must recurse into the
|
// the target, so we must recurse into the
|
||||||
// sub-frame(s):
|
// sub-frame(s):
|
||||||
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen);
|
ste.currentFrame =
|
||||||
|
ste.pushFrame(null, ste.currentFrame.lastSubFP, prefixLength + suffixLength);
|
||||||
ste.currentFrame.loadBlock();
|
ste.currentFrame.loadBlock();
|
||||||
while (ste.currentFrame.next()) {
|
while (ste.currentFrame.next()) {
|
||||||
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length());
|
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length());
|
||||||
|
@ -704,7 +646,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
|
|
||||||
// if (DEBUG) System.out.println(" not found");
|
// if (DEBUG) System.out.println(" not found");
|
||||||
return SeekStatus.NOT_FOUND;
|
return SeekStatus.NOT_FOUND;
|
||||||
} else if (stop) {
|
} else {
|
||||||
// Exact match!
|
// Exact match!
|
||||||
|
|
||||||
// This cannot be a sub-block because we
|
// This cannot be a sub-block because we
|
||||||
|
@ -717,7 +659,6 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
return SeekStatus.FOUND;
|
return SeekStatus.FOUND;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// It is possible (and OK) that terms index pointed us
|
// It is possible (and OK) that terms index pointed us
|
||||||
// at this block, but, we scanned the entire block and
|
// at this block, but, we scanned the entire block and
|
||||||
|
@ -740,9 +681,9 @@ final class IDVersionSegmentTermsEnumFrame {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fillTerm() {
|
private void fillTerm() {
|
||||||
final int termLength = prefix + suffix;
|
final int termLength = prefixLength + suffixLength;
|
||||||
ste.term.setLength(prefix + suffix);
|
ste.term.setLength(prefixLength + suffixLength);
|
||||||
ste.term.grow(termLength);
|
ste.term.grow(termLength);
|
||||||
System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefix, suffix);
|
System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefixLength, suffixLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue