LUCENE-9118: BlockTreeTermsReader uses `Arrays#compareUnsigned` to compare suffixes. (#1150)

This commit is contained in:
Adrien Grand 2020-01-09 15:09:21 +01:00 committed by GitHub
parent ffe75fb441
commit b11c3cffe4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 62 additions and 102 deletions

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.blocktree;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.index.IndexOptions;
@ -523,8 +524,7 @@ final class SegmentTermsEnumFrame {
assert prefixMatches(target);
// Loop over each entry (term or sub-block) in this block:
//nextTerm: while(nextEnt < entCount) {
nextTerm: while (true) {
do {
nextEnt++;
suffix = suffixesReader.readVInt();
@ -537,60 +537,37 @@ final class SegmentTermsEnumFrame {
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
startBytePos = suffixesReader.getPosition();
suffixesReader.skipBytes(suffix);
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
int targetPos = target.offset + prefix;
// Loop over bytes in the suffix, comparing to the target
final int cmp = Arrays.compareUnsigned(
suffixBytes, startBytePos, startBytePos + suffix,
target.bytes, target.offset + prefix, target.offset + target.length);
// Loop over bytes in the suffix, comparing to
// the target
int bytePos = startBytePos;
while(true) {
final int cmp;
final boolean stop;
if (targetPos < targetLimit) {
cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
stop = false;
} else {
assert targetPos == targetLimit;
cmp = termLen - target.length;
stop = true;
}
if (cmp < 0) {
// Current entry is still before the target;
// keep scanning
} else if (cmp > 0) {
// Done! Current entry is after target --
// return NOT_FOUND:
fillTerm();
if (cmp < 0) {
// Current entry is still before the target;
// keep scanning
//if (DEBUG) System.out.println(" not found");
return SeekStatus.NOT_FOUND;
} else {
// Exact match!
if (nextEnt == entCount) {
// We are done scanning this block
break nextTerm;
} else {
continue nextTerm;
}
} else if (cmp > 0) {
// This cannot be a sub-block because we
// would have followed the index to this
// sub-block from the start:
// Done! Current entry is after target --
// return NOT_FOUND:
fillTerm();
//if (DEBUG) System.out.println(" not found");
return SeekStatus.NOT_FOUND;
} else if (stop) {
// Exact match!
// This cannot be a sub-block because we
// would have followed the index to this
// sub-block from the start:
assert ste.termExists;
fillTerm();
//if (DEBUG) System.out.println(" found!");
return SeekStatus.FOUND;
}
assert ste.termExists;
fillTerm();
//if (DEBUG) System.out.println(" found!");
return SeekStatus.FOUND;
}
}
} while (nextEnt < entCount);
// It is possible (and OK) that terms index pointed us
// at this block, but, we scanned the entire block and
@ -631,7 +608,7 @@ final class SegmentTermsEnumFrame {
assert prefixMatches(target);
// Loop over each entry (term or sub-block) in this block:
nextTerm: while(nextEnt < entCount) {
while(nextEnt < entCount) {
nextEnt++;
@ -658,65 +635,48 @@ final class SegmentTermsEnumFrame {
lastSubFP = fp - subCode;
}
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
int targetPos = target.offset + prefix;
final int cmp = Arrays.compareUnsigned(
suffixBytes, startBytePos, startBytePos + suffix,
target.bytes, target.offset + prefix, target.offset + target.length);
// Loop over bytes in the suffix, comparing to
// the target
int bytePos = startBytePos;
while (true) {
final int cmp;
final boolean stop;
if (targetPos < targetLimit) {
cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
stop = false;
} else {
assert targetPos == targetLimit;
cmp = termLen - target.length;
stop = true;
}
if (cmp < 0) {
// Current entry is still before the target;
// keep scanning
} else if (cmp > 0) {
// Done! Current entry is after target --
// return NOT_FOUND:
fillTerm();
if (cmp < 0) {
// Current entry is still before the target;
// keep scanning
continue nextTerm;
} else if (cmp > 0) {
//if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " ste.termExists=" + ste.termExists);
// Done! Current entry is after target --
// return NOT_FOUND:
fillTerm();
//if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " ste.termExists=" + ste.termExists);
if (!exactOnly && !ste.termExists) {
//System.out.println(" now pushFrame");
// TODO this
// We are on a sub-block, and caller wants
// us to position to the next term after
// the target, so we must recurse into the
// sub-frame(s):
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen);
if (!exactOnly && !ste.termExists) {
//System.out.println(" now pushFrame");
// TODO this
// We are on a sub-block, and caller wants
// us to position to the next term after
// the target, so we must recurse into the
// sub-frame(s):
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen);
ste.currentFrame.loadBlock();
while (ste.currentFrame.next()) {
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length());
ste.currentFrame.loadBlock();
while (ste.currentFrame.next()) {
ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length());
ste.currentFrame.loadBlock();
}
}
//if (DEBUG) System.out.println(" not found");
return SeekStatus.NOT_FOUND;
} else if (stop) {
// Exact match!
// This cannot be a sub-block because we
// would have followed the index to this
// sub-block from the start:
assert ste.termExists;
fillTerm();
//if (DEBUG) System.out.println(" found!");
return SeekStatus.FOUND;
}
//if (DEBUG) System.out.println(" not found");
return SeekStatus.NOT_FOUND;
} else {
// Exact match!
// This cannot be a sub-block because we
// would have followed the index to this
// sub-block from the start:
assert ste.termExists;
fillTerm();
//if (DEBUG) System.out.println(" found!");
return SeekStatus.FOUND;
}
}