diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fa1d09cffa6..fce70e979ee 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -180,6 +180,9 @@ Improvements * LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag, and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss). +* LUCENE-9674: Implement faster advance on VectorValues using binary search. + (Anand Kotriwal, Mike Sokolov) + Bug fixes * LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java index cab992220b8..91c49e5851a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java @@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.FloatBuffer; +import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.Random; @@ -386,9 +387,19 @@ public final class Lucene90VectorReader extends VectorReader { } @Override - public int advance(int target) throws IOException { - // We could do better by log-binary search in ordToDoc, but this is never used - return slowAdvance(target); + public int advance(int target) { + assert docID() < target; + ord = Arrays.binarySearch(fieldEntry.ordToDoc, ord + 1, fieldEntry.ordToDoc.length, target); + if (ord < 0) { + ord = -(ord + 1); + } + assert ord >= 0 && ord <= fieldEntry.ordToDoc.length; + if (ord == fieldEntry.ordToDoc.length) { + doc = NO_MORE_DOCS; + } else { + doc = fieldEntry.ordToDoc[ord]; + } + return doc; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java b/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java index f61b4b86193..c66e7cf1e18 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java @@ -815,4 +815,50 @@ public class TestVectorValues extends LuceneTestCase { assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal()); assertEquals(3, VectorValues.SearchStrategy.values().length); } + + public void testAdvance() throws Exception { + try (Directory dir = newDirectory()) { + try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) { + int numdocs = atLeast(1500); + String fieldName = "field"; + for (int i = 0; i < numdocs; i++) { + Document doc = new Document(); + // randomly add a vector field + if (random().nextInt(4) == 3) { + doc.add(new VectorField(fieldName, new float[4], SearchStrategy.NONE)); + } + w.addDocument(doc); + } + w.forceMerge(1); + try (IndexReader reader = w.getReader()) { + LeafReader r = getOnlyLeafReader(reader); + VectorValues vectorValues = r.getVectorValues(fieldName); + int[] vectorDocs = new int[vectorValues.size() + 1]; + int cur = -1; + while (++cur < vectorValues.size() + 1) { + vectorDocs[cur] = vectorValues.nextDoc(); + if (cur != 0) { + assertTrue(vectorDocs[cur] > vectorDocs[cur - 1]); + } + } + vectorValues = r.getVectorValues(fieldName); + cur = -1; + for (int i = 0; i < numdocs; i++) { + // randomly advance to i + if (random().nextInt(4) == 3) { + while (vectorDocs[++cur] < i) + ; + assertEquals(vectorDocs[cur], vectorValues.advance(i)); + assertEquals(vectorDocs[cur], vectorValues.docID()); + if (vectorValues.docID() == NO_MORE_DOCS) { + break; + } + // make i equal to docid so that it is greater than docId in the next loop iteration + i = vectorValues.docID(); + } + } + } + } + } + } }