LUCENE-9674: Use binary search in VectorValues.advance()

Lucene90VectorReader now implements advance() with binary search in place of prior linear scan
Co-authored-by: Anand Kotriwal <anakot@amazon.com>
This commit is contained in:
Anand 2021-01-21 20:32:21 +05:30 committed by GitHub
parent 37e31f2ac7
commit e5a16f0b0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 63 additions and 3 deletions

View File

@ -180,6 +180,9 @@ Improvements
* LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag,
and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss).
* LUCENE-9674: Implement faster advance on VectorValues using binary search.
(Anand Kotriwal, Mike Sokolov)
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
@ -386,9 +387,19 @@ public final class Lucene90VectorReader extends VectorReader {
}
@Override
public int advance(int target) throws IOException {
// We could do better by log-binary search in ordToDoc, but this is never used
return slowAdvance(target);
public int advance(int target) {
assert docID() < target;
ord = Arrays.binarySearch(fieldEntry.ordToDoc, ord + 1, fieldEntry.ordToDoc.length, target);
if (ord < 0) {
ord = -(ord + 1);
}
assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
if (ord == fieldEntry.ordToDoc.length) {
doc = NO_MORE_DOCS;
} else {
doc = fieldEntry.ordToDoc[ord];
}
return doc;
}
@Override

View File

@ -815,4 +815,50 @@ public class TestVectorValues extends LuceneTestCase {
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
assertEquals(3, VectorValues.SearchStrategy.values().length);
}
public void testAdvance() throws Exception {
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
int numdocs = atLeast(1500);
String fieldName = "field";
for (int i = 0; i < numdocs; i++) {
Document doc = new Document();
// randomly add a vector field
if (random().nextInt(4) == 3) {
doc.add(new VectorField(fieldName, new float[4], SearchStrategy.NONE));
}
w.addDocument(doc);
}
w.forceMerge(1);
try (IndexReader reader = w.getReader()) {
LeafReader r = getOnlyLeafReader(reader);
VectorValues vectorValues = r.getVectorValues(fieldName);
int[] vectorDocs = new int[vectorValues.size() + 1];
int cur = -1;
while (++cur < vectorValues.size() + 1) {
vectorDocs[cur] = vectorValues.nextDoc();
if (cur != 0) {
assertTrue(vectorDocs[cur] > vectorDocs[cur - 1]);
}
}
vectorValues = r.getVectorValues(fieldName);
cur = -1;
for (int i = 0; i < numdocs; i++) {
// randomly advance to i
if (random().nextInt(4) == 3) {
while (vectorDocs[++cur] < i)
;
assertEquals(vectorDocs[cur], vectorValues.advance(i));
assertEquals(vectorDocs[cur], vectorValues.docID());
if (vectorValues.docID() == NO_MORE_DOCS) {
break;
}
// make i equal to docid so that it is greater than docId in the next loop iteration
i = vectorValues.docID();
}
}
}
}
}
}
}