LUCENE-9674: Use binary search in VectorValues.advance()

Lucene90VectorReader now implements advance() with binary search in place of prior linear scan
Co-authored-by: Anand Kotriwal <anakot@amazon.com>
This commit is contained in:
Anand 2021-01-21 20:32:21 +05:30 committed by GitHub
parent 37e31f2ac7
commit e5a16f0b0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 63 additions and 3 deletions

View File

@ -180,6 +180,9 @@ Improvements
* LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag, * LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag,
and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss). and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss).
* LUCENE-9674: Implement faster advance on VectorValues using binary search.
(Anand Kotriwal, Mike Sokolov)
Bug fixes Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while * LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.FloatBuffer; import java.nio.FloatBuffer;
import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
@ -386,9 +387,19 @@ public final class Lucene90VectorReader extends VectorReader {
} }
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) {
// We could do better by log-binary search in ordToDoc, but this is never used assert docID() < target;
return slowAdvance(target); ord = Arrays.binarySearch(fieldEntry.ordToDoc, ord + 1, fieldEntry.ordToDoc.length, target);
if (ord < 0) {
ord = -(ord + 1);
}
assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
if (ord == fieldEntry.ordToDoc.length) {
doc = NO_MORE_DOCS;
} else {
doc = fieldEntry.ordToDoc[ord];
}
return doc;
} }
@Override @Override

View File

@ -815,4 +815,50 @@ public class TestVectorValues extends LuceneTestCase {
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal()); assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
assertEquals(3, VectorValues.SearchStrategy.values().length); assertEquals(3, VectorValues.SearchStrategy.values().length);
} }
public void testAdvance() throws Exception {
try (Directory dir = newDirectory()) {
try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
int numdocs = atLeast(1500);
String fieldName = "field";
for (int i = 0; i < numdocs; i++) {
Document doc = new Document();
// randomly add a vector field
if (random().nextInt(4) == 3) {
doc.add(new VectorField(fieldName, new float[4], SearchStrategy.NONE));
}
w.addDocument(doc);
}
w.forceMerge(1);
try (IndexReader reader = w.getReader()) {
LeafReader r = getOnlyLeafReader(reader);
VectorValues vectorValues = r.getVectorValues(fieldName);
int[] vectorDocs = new int[vectorValues.size() + 1];
int cur = -1;
while (++cur < vectorValues.size() + 1) {
vectorDocs[cur] = vectorValues.nextDoc();
if (cur != 0) {
assertTrue(vectorDocs[cur] > vectorDocs[cur - 1]);
}
}
vectorValues = r.getVectorValues(fieldName);
cur = -1;
for (int i = 0; i < numdocs; i++) {
// randomly advance to i
if (random().nextInt(4) == 3) {
while (vectorDocs[++cur] < i)
;
assertEquals(vectorDocs[cur], vectorValues.advance(i));
assertEquals(vectorDocs[cur], vectorValues.docID());
if (vectorValues.docID() == NO_MORE_DOCS) {
break;
}
// make i equal to docid so that it is greater than docId in the next loop iteration
i = vectorValues.docID();
}
}
}
}
}
}
} }