mirror of https://github.com/apache/lucene.git
LUCENE-8836: Speed up TermsEnum#lookupOrd on increasing sequences of ords. (#827)
This commit is contained in:
parent
1089b482fc
commit
2a4c21bb58
|
@ -121,6 +121,9 @@ Optimizations
|
||||||
|
|
||||||
* LUCENE-10315: Use SIMD instructions to decode BKD doc IDs. (Guo Feng, Adrien Grand, Ignacio Vera)
|
* LUCENE-10315: Use SIMD instructions to decode BKD doc IDs. (Guo Feng, Adrien Grand, Ignacio Vera)
|
||||||
|
|
||||||
|
* LUCENE-8836: Speed up calls to TermsEnum#lookupOrd on doc values terms enums
|
||||||
|
and sequences of increasing ords. (Bruno Roustant, Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
* LUCENE-10477: Highlighter: WeightedSpanTermExtractor.extractWeightedSpanTerms to Query#rewrite
|
* LUCENE-10477: Highlighter: WeightedSpanTermExtractor.extractWeightedSpanTerms to Query#rewrite
|
||||||
|
|
|
@ -1111,13 +1111,19 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
||||||
if (ord < 0 || ord >= entry.termsDictSize) {
|
if (ord < 0 || ord >= entry.termsDictSize) {
|
||||||
throw new IndexOutOfBoundsException();
|
throw new IndexOutOfBoundsException();
|
||||||
}
|
}
|
||||||
final long blockIndex = ord >>> TERMS_DICT_BLOCK_LZ4_SHIFT;
|
// Signed shift since ord is -1 when the terms enum is not positioned
|
||||||
final long blockAddress = blockAddresses.get(blockIndex);
|
final long currentBlockIndex = this.ord >> TERMS_DICT_BLOCK_LZ4_SHIFT;
|
||||||
bytes.seek(blockAddress);
|
final long blockIndex = ord >> TERMS_DICT_BLOCK_LZ4_SHIFT;
|
||||||
this.ord = (blockIndex << TERMS_DICT_BLOCK_LZ4_SHIFT) - 1;
|
if (ord < this.ord || blockIndex != currentBlockIndex) {
|
||||||
do {
|
// The looked up ord is before the current ord or belongs to a different block, seek again
|
||||||
|
final long blockAddress = blockAddresses.get(blockIndex);
|
||||||
|
bytes.seek(blockAddress);
|
||||||
|
this.ord = (blockIndex << TERMS_DICT_BLOCK_LZ4_SHIFT) - 1;
|
||||||
|
}
|
||||||
|
// Scan to the looked up ord
|
||||||
|
while (this.ord < ord) {
|
||||||
next();
|
next();
|
||||||
} while (this.ord < ord);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private BytesRef getTermFromIndex(long index) throws IOException {
|
private BytesRef getTermFromIndex(long index) throws IOException {
|
||||||
|
|
|
@ -865,4 +865,69 @@ public class TestLucene90DocValuesFormat extends BaseCompressingDocValuesFormatT
|
||||||
ireader.close();
|
ireader.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSortedTermsDictLookupOrd() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig());
|
||||||
|
Document doc = new Document();
|
||||||
|
SortedDocValuesField field = new SortedDocValuesField("foo", new BytesRef());
|
||||||
|
doc.add(field);
|
||||||
|
final int numDocs = atLeast(Lucene90DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SIZE + 1);
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
field.setBytesValue(new BytesRef("" + i));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.forceMerge(1);
|
||||||
|
IndexReader reader = DirectoryReader.open(writer);
|
||||||
|
LeafReader leafReader = getOnlyLeafReader(reader);
|
||||||
|
doTestTermsDictLookupOrd(leafReader.getSortedDocValues("foo").termsEnum());
|
||||||
|
reader.close();
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetTermsDictLookupOrd() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig());
|
||||||
|
Document doc = new Document();
|
||||||
|
SortedSetDocValuesField field = new SortedSetDocValuesField("foo", new BytesRef());
|
||||||
|
doc.add(field);
|
||||||
|
final int numDocs = atLeast(2 * Lucene90DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SIZE + 1);
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
field.setBytesValue(new BytesRef("" + i));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.forceMerge(1);
|
||||||
|
IndexReader reader = DirectoryReader.open(writer);
|
||||||
|
LeafReader leafReader = getOnlyLeafReader(reader);
|
||||||
|
doTestTermsDictLookupOrd(leafReader.getSortedSetDocValues("foo").termsEnum());
|
||||||
|
reader.close();
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestTermsDictLookupOrd(TermsEnum te) throws IOException {
|
||||||
|
List<BytesRef> terms = new ArrayList<>();
|
||||||
|
for (BytesRef term = te.next(); term != null; term = te.next()) {
|
||||||
|
terms.add(BytesRef.deepCopyOf(term));
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate in order
|
||||||
|
for (int i = 0; i < terms.size(); ++i) {
|
||||||
|
te.seekExact(i);
|
||||||
|
assertEquals(terms.get(i), te.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate in reverse order
|
||||||
|
for (int i = terms.size() - 1; i >= 0; --i) {
|
||||||
|
te.seekExact(i);
|
||||||
|
assertEquals(terms.get(i), te.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate in forward order with random gaps
|
||||||
|
for (int i = random().nextInt(5); i < terms.size(); i += random().nextInt(5)) {
|
||||||
|
te.seekExact(i);
|
||||||
|
assertEquals(terms.get(i), te.term());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue