From 2cbf5ff761310ede0f8947bde5c422fc5bc14b41 Mon Sep 17 00:00:00 2001 From: zhouhui Date: Wed, 18 Sep 2024 14:36:49 +0800 Subject: [PATCH] Use Arrays.compareUnsigned instead of loop compare suffix. --- .../blocktreeords/OrdsSegmentTermsEnum.java | 30 +-- .../OrdsSegmentTermsEnumFrame.java | 224 +++++++---------- .../idversion/IDVersionSegmentTermsEnum.java | 30 +-- .../IDVersionSegmentTermsEnumFrame.java | 235 +++++++----------- 4 files changed, 209 insertions(+), 310 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java index 13d64097cfe..39e35cc8360 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java @@ -184,7 +184,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // term.length + " vs prefix=" + f.prefix); - if (f.prefix > targetBeforeCurrentLength) { + if (f.prefixLength > targetBeforeCurrentLength) { // System.out.println(" do rewind!"); f.rewind(); } else { @@ -192,11 +192,11 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // System.out.println(" skip rewind!"); // } } - assert length == f.prefix; + assert length == f.prefixLength; assert termOrd == f.termOrdOrig; } else { f.nextEnt = -1; - f.prefix = length; + f.prefixLength = length; f.state.termBlockOrd = 0; f.termOrdOrig = termOrd; // System.out.println("set termOrdOrig=" + termOrd); @@ -412,7 +412,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // toHex(targetLabel)); // } - validIndexPrefix = currentFrame.prefix; + validIndexPrefix = currentFrame.prefixLength; // validIndexPrefix = targetUpto; currentFrame.scanToFloorFrame(target); @@ -472,7 +472,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { } // validIndexPrefix = targetUpto; - validIndexPrefix = currentFrame.prefix; + validIndexPrefix = currentFrame.prefixLength; currentFrame.scanToFloorFrame(target); @@ -686,7 +686,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // toHex(targetLabel)); // } - validIndexPrefix = currentFrame.prefix; + validIndexPrefix = currentFrame.prefixLength; // validIndexPrefix = targetUpto; currentFrame.scanToFloorFrame(target); @@ -747,7 +747,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { } // validIndexPrefix = targetUpto; - validIndexPrefix = currentFrame.prefix; + validIndexPrefix = currentFrame.prefixLength; currentFrame.scanToFloorFrame(target); @@ -785,7 +785,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { while (true) { OrdsSegmentTermsEnumFrame f = getFrame(ord); assert f != null; - final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix); + final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefixLength); if (f.nextEnt == -1) { out.println( " frame " @@ -796,7 +796,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" - + f.prefix + + f.prefixLength + " prefix=" + ToStringUtils.bytesRefToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) @@ -826,7 +826,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" - + f.prefix + + f.prefixLength + " prefix=" + ToStringUtils.bytesRefToString(prefix) + " nextEnt=" @@ -853,12 +853,14 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { } if (fr.index != null) { assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; - if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) { + if (f.prefixLength > 0 + && isSeekFrame + && f.arc.label() != (term.byteAt(f.prefixLength - 1) & 0xFF)) { out.println( " broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" - + (char) (term.byteAt(f.prefix - 1) & 0xFF)); + + (char) (term.byteAt(f.prefixLength - 1) & 0xFF)); throw new RuntimeException("seek state is broken"); } Output output = Util.get(fr.index, prefix); @@ -887,7 +889,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { if (f == currentFrame) { break; } - if (f.prefix == validIndexPrefix) { + if (f.prefixLength == validIndexPrefix) { isSeekFrame = false; } ord++; @@ -969,7 +971,7 @@ public final class OrdsSegmentTermsEnum extends BaseTermsEnum { // Note that the seek state (last seek) has been // invalidated beyond this depth - validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix); + validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefixLength); // if (DEBUG) { // System.out.println(" reset validIndexPrefix=" + validIndexPrefix); // } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java index cd98a3e0f8c..aeab06a9440 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java @@ -17,6 +17,7 @@ package org.apache.lucene.codecs.blocktreeords; import java.io.IOException; +import java.util.Arrays; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output; import org.apache.lucene.index.IndexOptions; @@ -54,7 +55,7 @@ final class OrdsSegmentTermsEnumFrame { final ByteArrayDataInput floorDataReader = new ByteArrayDataInput(); // Length of prefix shared by all terms in this block - int prefix; + int prefixLength; // Number of entries (term or sub-block) in this block int entCount; @@ -295,11 +296,11 @@ final class OrdsSegmentTermsEnumFrame { : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd; nextEnt++; termOrd++; - suffix = suffixesReader.readVInt(); + suffixLength = suffixesReader.readVInt(); startBytePos = suffixesReader.getPosition(); - ste.term.setLength(prefix + suffix); + ste.term.setLength(prefixLength + suffixLength); ste.term.grow(ste.term.length()); - suffixesReader.readBytes(ste.term.bytes(), prefix, suffix); + suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength); // A normal term ste.termExists = true; return false; @@ -312,11 +313,11 @@ final class OrdsSegmentTermsEnumFrame { : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; nextEnt++; final int code = suffixesReader.readVInt(); - suffix = code >>> 1; + suffixLength = code >>> 1; startBytePos = suffixesReader.getPosition(); - ste.term.setLength(prefix + suffix); + ste.term.setLength(prefixLength + suffixLength); ste.term.grow(ste.term.length()); - suffixesReader.readBytes(ste.term.bytes(), prefix, suffix); + suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength); if ((code & 1) == 0) { // A normal term ste.termExists = true; @@ -342,7 +343,7 @@ final class OrdsSegmentTermsEnumFrame { // floor blocks we "typically" get public void scanToFloorFrame(BytesRef target) { - if (!isFloor || target.length <= prefix) { + if (!isFloor || target.length <= prefixLength) { // if (DEBUG) { // System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + // target.length + " vs prefix=" + prefix); @@ -350,7 +351,7 @@ final class OrdsSegmentTermsEnumFrame { return; } - final int targetLabel = target.bytes[target.offset + prefix] & 0xFF; + final int targetLabel = target.bytes[target.offset + prefixLength] & 0xFF; // if (DEBUG) { // System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char) @@ -532,7 +533,7 @@ final class OrdsSegmentTermsEnumFrame { // Used only by assert private boolean prefixMatches(BytesRef target) { - for (int bytePos = 0; bytePos < prefix; bytePos++) { + for (int bytePos = 0; bytePos < prefixLength; bytePos++) { if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) { return false; } @@ -586,7 +587,7 @@ final class OrdsSegmentTermsEnumFrame { } private int startBytePos; - private int suffix; + private int suffixLength; private long subCode; // Target's prefix matches this block's prefix; we @@ -613,13 +614,11 @@ final class OrdsSegmentTermsEnumFrame { assert prefixMatches(target); // Loop over each entry (term or sub-block) in this block: - // nextTerm: while(nextEnt < entCount) { - nextTerm: - while (true) { + do { nextEnt++; termOrd++; - suffix = suffixesReader.readVInt(); + suffixLength = suffixesReader.readVInt(); // if (DEBUG) { // BytesRef suffixBytesRef = new BytesRef(); @@ -630,63 +629,41 @@ final class OrdsSegmentTermsEnumFrame { // + ToStringUtils.bytesRefToString(suffixBytesRef)); // } - final int termLen = prefix + suffix; startBytePos = suffixesReader.getPosition(); - suffixesReader.skipBytes(suffix); + suffixesReader.skipBytes(suffixLength); - final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen); - int targetPos = target.offset + prefix; + // Compare suffix and target. + final int cmp = + Arrays.compareUnsigned( + suffixBytes, + startBytePos, + startBytePos + suffixLength, + target.bytes, + target.offset + prefixLength, + target.offset + target.length); - // Loop over bytes in the suffix, comparing to - // the target - int bytePos = startBytePos; - while (true) { - final int cmp; - final boolean stop; - if (targetPos < targetLimit) { - cmp = (suffixBytes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF); - stop = false; - } else { - assert targetPos == targetLimit; - cmp = termLen - target.length; - stop = true; - } + if (cmp < 0) { + // Current entry is still before the target; + // keep scanning + } else if (cmp > 0) { + // Done! Current entry is after target -- + // return NOT_FOUND: + fillTerm(); - if (cmp < 0) { - // Current entry is still before the target; - // keep scanning + // if (DEBUG) System.out.println(" not found"); + return SeekStatus.NOT_FOUND; + } else { + // Exact match! - if (nextEnt == entCount) { - if (exactOnly) { - fillTerm(); - } - // We are done scanning this block - break nextTerm; - } else { - continue nextTerm; - } - } else if (cmp > 0) { + // This cannot be a sub-block because we + // would have followed the index to this + // sub-block from the start: - // Done! Current entry is after target -- - // return NOT_FOUND: - fillTerm(); - - // if (DEBUG) System.out.println(" not found"); - return SeekStatus.NOT_FOUND; - } else if (stop) { - // Exact match! - - // This cannot be a sub-block because we - // would have followed the index to this - // sub-block from the start: - - assert ste.termExists; - fillTerm(); - // if (DEBUG) System.out.println(" found!"); - return SeekStatus.FOUND; - } + fillTerm(); + // if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; } - } + } while (nextEnt < entCount); // It is possible (and OK) that terms index pointed us // at this block, but, we scanned the entire block and @@ -730,13 +707,11 @@ final class OrdsSegmentTermsEnumFrame { assert prefixMatches(target); // Loop over each entry (term or sub-block) in this block: - // nextTerm: while(nextEnt < entCount) { - nextTerm: - while (true) { + while (nextEnt < entCount) { nextEnt++; final int code = suffixesReader.readVInt(); - suffix = code >>> 1; + suffixLength = code >>> 1; // if (DEBUG) { // BytesRef suffixBytesRef = new BytesRef(); // suffixBytesRef.bytes = suffixBytes; @@ -748,9 +723,8 @@ final class OrdsSegmentTermsEnumFrame { // } ste.termExists = (code & 1) == 0; - final int termLen = prefix + suffix; startBytePos = suffixesReader.getPosition(); - suffixesReader.skipBytes(suffix); + suffixesReader.skipBytes(suffixLength); // Must save ord before we skip over a sub-block in case we push, below: long prevTermOrd = termOrd; if (ste.termExists) { @@ -763,73 +737,53 @@ final class OrdsSegmentTermsEnumFrame { lastSubFP = fp - subCode; } - final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen); - int targetPos = target.offset + prefix; + // Compare suffix and target. + final int cmp = + Arrays.compareUnsigned( + suffixBytes, + startBytePos, + startBytePos + suffixLength, + target.bytes, + target.offset + prefixLength, + target.offset + target.length); - // Loop over bytes in the suffix, comparing to - // the target - int bytePos = startBytePos; - while (true) { - final int cmp; - final boolean stop; - if (targetPos < targetLimit) { - cmp = (suffixBytes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF); - stop = false; - } else { - assert targetPos == targetLimit; - cmp = termLen - target.length; - stop = true; - } + if (cmp < 0) { + // Current entry is still before the target; + // keep scanning + } else if (cmp > 0) { + // Done! Current entry is after target -- + // return NOT_FOUND: + fillTerm(); - if (cmp < 0) { - // Current entry is still before the target; - // keep scanning - - if (nextEnt == entCount) { - if (exactOnly) { - fillTerm(); - // termExists = true; - } - // We are done scanning this block - break nextTerm; - } else { - continue nextTerm; - } - } else if (cmp > 0) { - - // Done! Current entry is after target -- - // return NOT_FOUND: - fillTerm(); - - if (!exactOnly && !ste.termExists) { - // We are on a sub-block, and caller wants - // us to position to the next term after - // the target, so we must recurse into the - // sub-frame(s): + if (!exactOnly && !ste.termExists) { + // We are on a sub-block, and caller wants + // us to position to the next term after + // the target, so we must recurse into the + // sub-frame(s): + ste.currentFrame = + ste.pushFrame( + null, ste.currentFrame.lastSubFP, prefixLength + suffixLength, prevTermOrd); + ste.currentFrame.loadBlock(); + while (ste.currentFrame.next()) { ste.currentFrame = - ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen, prevTermOrd); + ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length(), prevTermOrd); ste.currentFrame.loadBlock(); - while (ste.currentFrame.next()) { - ste.currentFrame = - ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length(), prevTermOrd); - ste.currentFrame.loadBlock(); - } } - - // if (DEBUG) System.out.println(" not found"); - return SeekStatus.NOT_FOUND; - } else if (stop) { - // Exact match! - - // This cannot be a sub-block because we - // would have followed the index to this - // sub-block from the start: - - assert ste.termExists; - fillTerm(); - // if (DEBUG) System.out.println(" found!"); - return SeekStatus.FOUND; } + + // if (DEBUG) System.out.println(" not found"); + return SeekStatus.NOT_FOUND; + } else { + // Exact match! + + // This cannot be a sub-block because we + // would have followed the index to this + // sub-block from the start: + + assert ste.termExists; + fillTerm(); + // if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; } } @@ -854,9 +808,9 @@ final class OrdsSegmentTermsEnumFrame { } private void fillTerm() { - final int termLength = prefix + suffix; - ste.term.setLength(prefix + suffix); + final int termLength = prefixLength + suffixLength; + ste.term.setLength(prefixLength + suffixLength); ste.term.grow(termLength); - System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefix, suffix); + System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefixLength, suffixLength); } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java index dd7732b342e..e380196b98f 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java @@ -181,17 +181,17 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // term.length + " vs prefix=" + f.prefix); - if (f.prefix > targetBeforeCurrentLength) { + if (f.prefixLength > targetBeforeCurrentLength) { f.rewind(); } else { // if (DEBUG) { // System.out.println(" skip rewind!"); // } } - assert length == f.prefix; + assert length == f.prefixLength; } else { f.nextEnt = -1; - f.prefix = length; + f.prefixLength = length; f.state.termBlockOrd = 0; f.fpOrig = f.fp = fp; f.lastSubFP = -1; @@ -459,7 +459,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { // Integer.toHexString(targetLabel) + " termExists=" + termExists); // } - validIndexPrefix = currentFrame.prefix; + validIndexPrefix = currentFrame.prefixLength; // validIndexPrefix = targetUpto; currentFrame.scanToFloorFrame(target); @@ -573,7 +573,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { } // validIndexPrefix = targetUpto; - validIndexPrefix = currentFrame.prefix; + validIndexPrefix = currentFrame.prefixLength; currentFrame.scanToFloorFrame(target); @@ -802,7 +802,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { // toHex(targetLabel)); // } - validIndexPrefix = currentFrame.prefix; + validIndexPrefix = currentFrame.prefixLength; // validIndexPrefix = targetUpto; currentFrame.scanToFloorFrame(target); @@ -863,7 +863,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { } // validIndexPrefix = targetUpto; - validIndexPrefix = currentFrame.prefix; + validIndexPrefix = currentFrame.prefixLength; currentFrame.scanToFloorFrame(target); @@ -901,7 +901,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { while (true) { IDVersionSegmentTermsEnumFrame f = getFrame(ord); assert f != null; - final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix); + final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefixLength); if (f.nextEnt == -1) { out.println( " frame " @@ -912,7 +912,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" - + f.prefix + + f.prefixLength + " prefix=" + ToStringUtils.bytesRefToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) @@ -940,7 +940,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" - + f.prefix + + f.prefixLength + " prefix=" + ToStringUtils.bytesRefToString(prefix) + " nextEnt=" @@ -965,12 +965,14 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { } if (fr.index != null) { assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc; - if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) { + if (f.prefixLength > 0 + && isSeekFrame + && f.arc.label() != (term.byteAt(f.prefixLength - 1) & 0xFF)) { out.println( " broken seek state: arc.label=" + (char) f.arc.label() + " vs term byte=" - + (char) (term.byteAt(f.prefix - 1) & 0xFF)); + + (char) (term.byteAt(f.prefixLength - 1) & 0xFF)); throw new RuntimeException("seek state is broken"); } Pair output = Util.get(fr.index, prefix); @@ -999,7 +1001,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { if (f == currentFrame) { break; } - if (f.prefix == validIndexPrefix) { + if (f.prefixLength == validIndexPrefix) { isSeekFrame = false; } ord++; @@ -1079,7 +1081,7 @@ public final class IDVersionSegmentTermsEnum extends BaseTermsEnum { // Note that the seek state (last seek) has been // invalidated beyond this depth - validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix); + validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefixLength); // if (DEBUG) { // System.out.println(" reset validIndexPrefix=" + validIndexPrefix); // } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java index 4ecac0a93ad..45f5aba6fa7 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java @@ -17,6 +17,7 @@ package org.apache.lucene.sandbox.codecs.idversion; import java.io.IOException; +import java.util.Arrays; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.store.ByteArrayDataInput; @@ -52,7 +53,7 @@ final class IDVersionSegmentTermsEnumFrame { final ByteArrayDataInput floorDataReader = new ByteArrayDataInput(); // Length of prefix shared by all terms in this block - int prefix; + int prefixLength; // Number of entries (term or sub-block) in this block int entCount; @@ -262,11 +263,11 @@ final class IDVersionSegmentTermsEnumFrame { assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; nextEnt++; - suffix = suffixesReader.readVInt(); + suffixLength = suffixesReader.readVInt(); startBytePos = suffixesReader.getPosition(); - ste.term.setLength(prefix + suffix); + ste.term.setLength(prefixLength + suffixLength); ste.term.grow(ste.term.length()); - suffixesReader.readBytes(ste.term.bytes(), prefix, suffix); + suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength); // A normal term ste.termExists = true; return false; @@ -279,11 +280,11 @@ final class IDVersionSegmentTermsEnumFrame { : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; nextEnt++; final int code = suffixesReader.readVInt(); - suffix = code >>> 1; + suffixLength = code >>> 1; startBytePos = suffixesReader.getPosition(); - ste.term.setLength(prefix + suffix); + ste.term.setLength(prefixLength + suffixLength); ste.term.grow(ste.term.length()); - suffixesReader.readBytes(ste.term.bytes(), prefix, suffix); + suffixesReader.readBytes(ste.term.bytes(), prefixLength, suffixLength); if ((code & 1) == 0) { // A normal term ste.termExists = true; @@ -307,7 +308,7 @@ final class IDVersionSegmentTermsEnumFrame { // floor blocks we "typically" get public void scanToFloorFrame(BytesRef target) { - if (!isFloor || target.length <= prefix) { + if (!isFloor || target.length <= prefixLength) { // if (DEBUG) { // System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + // target.length + " vs prefix=" + prefix); @@ -315,7 +316,7 @@ final class IDVersionSegmentTermsEnumFrame { return; } - final int targetLabel = target.bytes[target.offset + prefix] & 0xFF; + final int targetLabel = target.bytes[target.offset + prefixLength] & 0xFF; // if (DEBUG) { // System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + ((char) @@ -415,7 +416,7 @@ final class IDVersionSegmentTermsEnumFrame { // Used only by assert private boolean prefixMatches(BytesRef target) { - for (int bytePos = 0; bytePos < prefix; bytePos++) { + for (int bytePos = 0; bytePos < prefixLength; bytePos++) { if (target.bytes[target.offset + bytePos] != ste.term.byteAt(bytePos)) { return false; } @@ -466,7 +467,7 @@ final class IDVersionSegmentTermsEnumFrame { } private int startBytePos; - private int suffix; + private int suffixLength; private long subCode; // Target's prefix matches this block's prefix; we @@ -493,12 +494,10 @@ final class IDVersionSegmentTermsEnumFrame { assert prefixMatches(target); // Loop over each entry (term or sub-block) in this block: - // nextTerm: while(nextEnt < entCount) { - nextTerm: - while (true) { + do { nextEnt++; - suffix = suffixesReader.readVInt(); + suffixLength = suffixesReader.readVInt(); // if (DEBUG) { // BytesRef suffixBytesRef = new BytesRef(); @@ -509,76 +508,41 @@ final class IDVersionSegmentTermsEnumFrame { // + ToStringUtils.bytesRefToString(suffixBytesRef)); // } - final int termLen = prefix + suffix; startBytePos = suffixesReader.getPosition(); - suffixesReader.skipBytes(suffix); + suffixesReader.skipBytes(suffixLength); - final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen); - int targetPos = target.offset + prefix; + // Compare suffix and target. + final int cmp = + Arrays.compareUnsigned( + suffixBytes, + startBytePos, + startBytePos + suffixLength, + target.bytes, + target.offset + prefixLength, + target.offset + target.length); - // Loop over bytes in the suffix, comparing to - // the target - int bytePos = startBytePos; - while (true) { - final int cmp; - final boolean stop; - if (targetPos < targetLimit) { - cmp = (suffixBytes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF); - stop = false; - } else { - assert targetPos == targetLimit; - cmp = termLen - target.length; - stop = true; - } + if (cmp < 0) { + // Current entry is still before the target; + // keep scanning + } else if (cmp > 0) { + // Done! Current entry is after target -- + // return NOT_FOUND: + fillTerm(); - if (cmp < 0) { - // Current entry is still before the target; - // keep scanning + // if (DEBUG) System.out.println(" not found"); + return SeekStatus.NOT_FOUND; + } else { + // Exact match! - if (nextEnt == entCount) { - if (exactOnly) { - fillTerm(); - } - // We are done scanning this block - break nextTerm; - } else { - continue nextTerm; - } - } else if (cmp > 0) { + // This cannot be a sub-block because we + // would have followed the index to this + // sub-block from the start: - // Done! Current entry is after target -- - // return NOT_FOUND: - fillTerm(); - - if (!exactOnly && !ste.termExists) { - // We are on a sub-block, and caller wants - // us to position to the next term after - // the target, so we must recurse into the - // sub-frame(s): - ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen); - ste.currentFrame.loadBlock(); - while (ste.currentFrame.next()) { - ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length()); - ste.currentFrame.loadBlock(); - } - } - - // if (DEBUG) System.out.println(" not found"); - return SeekStatus.NOT_FOUND; - } else if (stop) { - // Exact match! - - // This cannot be a sub-block because we - // would have followed the index to this - // sub-block from the start: - - assert ste.termExists; - fillTerm(); - // if (DEBUG) System.out.println(" found!"); - return SeekStatus.FOUND; - } + fillTerm(); + // if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; } - } + } while (nextEnt < entCount); // It is possible (and OK) that terms index pointed us // at this block, but, we scanned the entire block and @@ -622,13 +586,11 @@ final class IDVersionSegmentTermsEnumFrame { assert prefixMatches(target); // Loop over each entry (term or sub-block) in this block: - // nextTerm: while(nextEnt < entCount) { - nextTerm: - while (true) { + while (nextEnt < entCount) { nextEnt++; final int code = suffixesReader.readVInt(); - suffix = code >>> 1; + suffixLength = code >>> 1; // if (DEBUG) { // BytesRef suffixBytesRef = new BytesRef(); // suffixBytesRef.bytes = suffixBytes; @@ -640,9 +602,8 @@ final class IDVersionSegmentTermsEnumFrame { // } ste.termExists = (code & 1) == 0; - final int termLen = prefix + suffix; startBytePos = suffixesReader.getPosition(); - suffixesReader.skipBytes(suffix); + suffixesReader.skipBytes(suffixLength); if (ste.termExists) { state.termBlockOrd++; subCode = 0; @@ -651,71 +612,51 @@ final class IDVersionSegmentTermsEnumFrame { lastSubFP = fp - subCode; } - final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen); - int targetPos = target.offset + prefix; + // Compare suffix and target. + final int cmp = + Arrays.compareUnsigned( + suffixBytes, + startBytePos, + startBytePos + suffixLength, + target.bytes, + target.offset + prefixLength, + target.offset + target.length); - // Loop over bytes in the suffix, comparing to - // the target - int bytePos = startBytePos; - while (true) { - final int cmp; - final boolean stop; - if (targetPos < targetLimit) { - cmp = (suffixBytes[bytePos++] & 0xFF) - (target.bytes[targetPos++] & 0xFF); - stop = false; - } else { - assert targetPos == targetLimit; - cmp = termLen - target.length; - stop = true; - } + if (cmp < 0) { + // Current entry is still before the target; + // keep scanning + } else if (cmp > 0) { + // Done! Current entry is after target -- + // return NOT_FOUND: + fillTerm(); - if (cmp < 0) { - // Current entry is still before the target; - // keep scanning - - if (nextEnt == entCount) { - if (exactOnly) { - fillTerm(); - // termExists = true; - } - // We are done scanning this block - break nextTerm; - } else { - continue nextTerm; - } - } else if (cmp > 0) { - - // Done! Current entry is after target -- - // return NOT_FOUND: - fillTerm(); - - if (!exactOnly && !ste.termExists) { - // We are on a sub-block, and caller wants - // us to position to the next term after - // the target, so we must recurse into the - // sub-frame(s): - ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, termLen); + if (!exactOnly && !ste.termExists) { + // We are on a sub-block, and caller wants + // us to position to the next term after + // the target, so we must recurse into the + // sub-frame(s): + ste.currentFrame = + ste.pushFrame(null, ste.currentFrame.lastSubFP, prefixLength + suffixLength); + ste.currentFrame.loadBlock(); + while (ste.currentFrame.next()) { + ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length()); ste.currentFrame.loadBlock(); - while (ste.currentFrame.next()) { - ste.currentFrame = ste.pushFrame(null, ste.currentFrame.lastSubFP, ste.term.length()); - ste.currentFrame.loadBlock(); - } } - - // if (DEBUG) System.out.println(" not found"); - return SeekStatus.NOT_FOUND; - } else if (stop) { - // Exact match! - - // This cannot be a sub-block because we - // would have followed the index to this - // sub-block from the start: - - assert ste.termExists; - fillTerm(); - // if (DEBUG) System.out.println(" found!"); - return SeekStatus.FOUND; } + + // if (DEBUG) System.out.println(" not found"); + return SeekStatus.NOT_FOUND; + } else { + // Exact match! + + // This cannot be a sub-block because we + // would have followed the index to this + // sub-block from the start: + + assert ste.termExists; + fillTerm(); + // if (DEBUG) System.out.println(" found!"); + return SeekStatus.FOUND; } } @@ -740,9 +681,9 @@ final class IDVersionSegmentTermsEnumFrame { } private void fillTerm() { - final int termLength = prefix + suffix; - ste.term.setLength(prefix + suffix); + final int termLength = prefixLength + suffixLength; + ste.term.setLength(prefixLength + suffixLength); ste.term.grow(termLength); - System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefix, suffix); + System.arraycopy(suffixBytes, startBytePos, ste.term.bytes(), prefixLength, suffixLength); } }