From b11c3cffe4954f35afd3461c0d3a7d0ac5a34054 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 9 Jan 2020 15:09:21 +0100 Subject: [PATCH] LUCENE-9118: BlockTreeTermsReader uses `Arrays#compareUnsigned` to compare suffixes. (#1150) --- .../blocktree/SegmentTermsEnumFrame.java | 164 +++++++----------- 1 file changed, 62 insertions(+), 102 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java index a32bdac427c..fdb4cc6955b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java @@ -18,6 +18,7 @@ package org.apache.lucene.codecs.blocktree; import java.io.IOException; +import java.util.Arrays; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.index.IndexOptions; @@ -523,8 +524,7 @@ final class SegmentTermsEnumFrame { assert prefixMatches(target); // Loop over each entry (term or sub-block) in this block: - //nextTerm: while(nextEnt < entCount) { - nextTerm: while (true) { + do { nextEnt++; suffix = suffixesReader.readVInt(); @@ -537,60 +537,37 @@ final class SegmentTermsEnumFrame { // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef)); // } - final int termLen = prefix + suffix; startBytePos = suffixesReader.getPosition(); suffixesReader.skipBytes(suffix); - final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen); - int targetPos = target.offset + prefix; + // Loop over bytes in the suffix, comparing to the target + final int cmp = Arrays.compareUnsigned( + suffixBytes, startBytePos, startBytePos + suffix, + target.bytes, target.offset + prefix, target.offset + target.length); - // Loop over bytes in the suffix, comparing to - // the target - int bytePos = startBytePos; - while(true) { - final int cmp; - final boolean stop; - if (targetPos < targetLimit) { - cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF); - stop = false; - } else { - assert targetPos == targetLimit; - cmp = termLen - target.length; - stop = true; - } + if (cmp < 0) { + // Current entry is still before the target; + // keep scanning + } else if (cmp > 0) { + // Done! Current entry is after target -- + // return NOT_FOUND: + fillTerm(); - if (cmp < 0) { - // Current entry is still before the target; - // keep scanning + //if (DEBUG) System.out.println(" not found"); + return SeekStatus.NOT_FOUND; + } else { + // Exact match! - if (nextEnt == entCount) { - // We are done scanning this block - break nextTerm; - } else { - continue nextTerm; - } - } else if (cmp > 0) { + // This cannot be a sub-block because we + // would have followed the index to this + // sub-block from the start: - // Done! Current entry is after target -- - // return NOT_FOUND: - fillTerm(); - - //if (DEBUG) System.out.println(" not found"); - return SeekStatus.NOT_FOUND; - } else if (stop) { - // Exact match! - - // This cannot be a sub-block because we - // would have followed the index to this - // sub-block from the start: - - assert ste.termExists; - fillTerm(); - //if (DEBUG) System.out.println(" found!"); - return SeekStatus.FOUND; - } + assert ste.termExists; + fillTerm(); + //if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; } - } + } while (nextEnt < entCount); // It is possible (and OK) that terms index pointed us // at this block, but, we scanned the entire block and @@ -631,7 +608,7 @@ final class SegmentTermsEnumFrame { assert prefixMatches(target); // Loop over each entry (term or sub-block) in this block: - nextTerm: while(nextEnt < entCount) { + while(nextEnt < entCount) { nextEnt++; @@ -658,65 +635,48 @@ final class SegmentTermsEnumFrame { lastSubFP = fp - subCode; } - final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen); - int targetPos = target.offset + prefix; + final int cmp = Arrays.compareUnsigned( + suffixBytes, startBytePos, startBytePos + suffix, + target.bytes, target.offset + prefix, target.offset + target.length); - // Loop over bytes in the suffix, comparing to - // the target - int bytePos = startBytePos; - while (true) { - final int cmp; - final boolean stop; - if (targetPos < targetLimit) { - cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF); - stop = false; - } else { - assert targetPos == targetLimit; - cmp = termLen - target.length; - stop = true; - } + if (cmp < 0) { + // Current entry is still before the target; + // keep scanning + } else if (cmp > 0) { + // Done! Current entry is after target -- + // return NOT_FOUND: + fillTerm(); - if (cmp < 0) { - // Current entry is still before the target; - // keep scanning - continue nextTerm; - } else if (cmp > 0) { + //if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " ste.termExists=" + ste.termExists); - // Done! Current entry is after target -- - // return NOT_FOUND: - fillTerm(); - - //if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " ste.termExists=" + ste.termExists); - - if (!exactOnly && !ste.termExists) { - //System.out.println(" now pushFrame"); - // TODO this - // We are on a sub-block, and caller wants - // us to position to the next term after - // the target, so we must recurse into the - // sub-frame(s): - ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen); + if (!exactOnly && !ste.termExists) { + //System.out.println(" now pushFrame"); + // TODO this + // We are on a sub-block, and caller wants + // us to position to the next term after + // the target, so we must recurse into the + // sub-frame(s): + ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen); + ste.currentFrame.loadBlock(); + while (ste.currentFrame.next()) { + ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length()); ste.currentFrame.loadBlock(); - while (ste.currentFrame.next()) { - ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length()); - ste.currentFrame.loadBlock(); - } } - - //if (DEBUG) System.out.println(" not found"); - return SeekStatus.NOT_FOUND; - } else if (stop) { - // Exact match! - - // This cannot be a sub-block because we - // would have followed the index to this - // sub-block from the start: - - assert ste.termExists; - fillTerm(); - //if (DEBUG) System.out.println(" found!"); - return SeekStatus.FOUND; } + + //if (DEBUG) System.out.println(" not found"); + return SeekStatus.NOT_FOUND; + } else { + // Exact match! + + // This cannot be a sub-block because we + // would have followed the index to this + // sub-block from the start: + + assert ste.termExists; + fillTerm(); + //if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; } }