mirror of https://github.com/apache/lucene.git
LUCENE-9674: Use binary search in VectorValues.advance()
Lucene90VectorReader now implements advance() with binary search in place of prior linear scan Co-authored-by: Anand Kotriwal <anakot@amazon.com>
This commit is contained in:
parent
37e31f2ac7
commit
e5a16f0b0f
|
@ -180,6 +180,9 @@ Improvements
|
|||
* LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag,
|
||||
and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss).
|
||||
|
||||
* LUCENE-9674: Implement faster advance on VectorValues using binary search.
|
||||
(Anand Kotriwal, Mike Sokolov)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.FloatBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
@ -386,9 +387,19 @@ public final class Lucene90VectorReader extends VectorReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
// We could do better by log-binary search in ordToDoc, but this is never used
|
||||
return slowAdvance(target);
|
||||
public int advance(int target) {
|
||||
assert docID() < target;
|
||||
ord = Arrays.binarySearch(fieldEntry.ordToDoc, ord + 1, fieldEntry.ordToDoc.length, target);
|
||||
if (ord < 0) {
|
||||
ord = -(ord + 1);
|
||||
}
|
||||
assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
|
||||
if (ord == fieldEntry.ordToDoc.length) {
|
||||
doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
doc = fieldEntry.ordToDoc[ord];
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -815,4 +815,50 @@ public class TestVectorValues extends LuceneTestCase {
|
|||
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
|
||||
assertEquals(3, VectorValues.SearchStrategy.values().length);
|
||||
}
|
||||
|
||||
public void testAdvance() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
|
||||
int numdocs = atLeast(1500);
|
||||
String fieldName = "field";
|
||||
for (int i = 0; i < numdocs; i++) {
|
||||
Document doc = new Document();
|
||||
// randomly add a vector field
|
||||
if (random().nextInt(4) == 3) {
|
||||
doc.add(new VectorField(fieldName, new float[4], SearchStrategy.NONE));
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
w.forceMerge(1);
|
||||
try (IndexReader reader = w.getReader()) {
|
||||
LeafReader r = getOnlyLeafReader(reader);
|
||||
VectorValues vectorValues = r.getVectorValues(fieldName);
|
||||
int[] vectorDocs = new int[vectorValues.size() + 1];
|
||||
int cur = -1;
|
||||
while (++cur < vectorValues.size() + 1) {
|
||||
vectorDocs[cur] = vectorValues.nextDoc();
|
||||
if (cur != 0) {
|
||||
assertTrue(vectorDocs[cur] > vectorDocs[cur - 1]);
|
||||
}
|
||||
}
|
||||
vectorValues = r.getVectorValues(fieldName);
|
||||
cur = -1;
|
||||
for (int i = 0; i < numdocs; i++) {
|
||||
// randomly advance to i
|
||||
if (random().nextInt(4) == 3) {
|
||||
while (vectorDocs[++cur] < i)
|
||||
;
|
||||
assertEquals(vectorDocs[cur], vectorValues.advance(i));
|
||||
assertEquals(vectorDocs[cur], vectorValues.docID());
|
||||
if (vectorValues.docID() == NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
// make i equal to docid so that it is greater than docId in the next loop iteration
|
||||
i = vectorValues.docID();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue