Use Arrays.compareUnsigned instead of iterating compare. (#13252)

This commit is contained in:
zhouhui 2024-04-19 16:01:16 +08:00 committed by GitHub
parent 1f1181a079
commit 3024e66e4a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 55 additions and 41 deletions

View File

@ -288,6 +288,8 @@ Improvements
Optimizations Optimizations
--------------------- ---------------------
* GITHUB#13252: Replace handwritten loops compare with Arrays.compareUnsigned in SegmentTermsEnum. (zhouhui)
* GITHUB#12996: Reduce ArrayUtil#grow in decompress. (Zhang Chao) * GITHUB#12996: Reduce ArrayUtil#grow in decompress. (Zhang Chao)
* GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma) * GITHUB#13115: Short circuit queued flush check when flush on update is disabled (Prabhat Sharma)

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene90.blocktree;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Arrays;
import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.index.BaseTermsEnum; import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum; import org.apache.lucene.index.ImpactsEnum;
@ -387,31 +388,18 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} }
if (cmp == 0) { if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but // Second compare the rest of the term, but
// don't save arc/output/frame; we only do this // don't save arc/output/frame; we only do this
// to find out if the target term is before, // to find out if the target term is before,
// equal or after the current term // equal or after the current term
final int targetLimit2 = Math.min(target.length, term.length()); cmp =
while (targetUpto < targetLimit2) { Arrays.compareUnsigned(
cmp = term.bytes(),
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); targetUpto,
// if (DEBUG) { term.length(),
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + target.bytes,
// targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + target.offset + targetUpto,
// targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"); target.offset + target.length);
// }
if (cmp != 0) {
break;
}
targetUpto++;
}
if (cmp == 0) {
cmp = term.length() - target.length;
}
targetUpto = targetUptoMid;
} }
if (cmp < 0) { if (cmp < 0) {
@ -666,28 +654,16 @@ final class SegmentTermsEnum extends BaseTermsEnum {
} }
if (cmp == 0) { if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but // Second compare the rest of the term, but
// don't save arc/output/frame: // don't save arc/output/frame:
final int targetLimit2 = Math.min(target.length, term.length()); cmp =
while (targetUpto < targetLimit2) { Arrays.compareUnsigned(
cmp = term.bytes(),
(term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); targetUpto,
// if (DEBUG) { term.length(),
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit target.bytes,
// + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) target.offset + targetUpto,
// + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")"); target.offset + target.length);
// }
if (cmp != 0) {
break;
}
targetUpto++;
}
if (cmp == 0) {
cmp = term.length() - target.length;
}
targetUpto = targetUptoMid;
} }
if (cmp < 0) { if (cmp < 0) {

View File

@ -369,6 +369,42 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
dir.close(); dir.close();
} }
// Test seek in disorder.
public void testDisorder() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setCodec(getCodec());
iwc.setMergePolicy(newTieredMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);
for (int i = 0; i < 10000; i++) {
Document document = new Document();
document.add(new StringField("id", i + "", Field.Store.NO));
iw.addDocument(document);
}
iw.commit();
iw.forceMerge(1);
DirectoryReader reader = DirectoryReader.open(iw);
TermsEnum termsEnum = getOnlyLeafReader(reader).terms("id").iterator();
for (int i = 0; i < 20000; i++) {
int n = random().nextInt(0, 10000);
BytesRef target = new BytesRef(n + "");
// seekExact.
assertTrue(termsEnum.seekExact(target));
assertEquals(termsEnum.term(), target);
// seekCeil.
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(target));
assertEquals(termsEnum.term(), target);
}
reader.close();
iw.close();
dir.close();
}
protected void subCheckBinarySearch(TermsEnum termsEnum) throws Exception {} protected void subCheckBinarySearch(TermsEnum termsEnum) throws Exception {}
public void testBinarySearchTermLeaf() throws Exception { public void testBinarySearchTermLeaf() throws Exception {