mirror of https://github.com/apache/lucene.git
LUCENE-9674: Use binary search in VectorValues.advance()
Lucene90VectorReader now implements advance() with binary search in place of prior linear scan Co-authored-by: Anand Kotriwal <anakot@amazon.com>
This commit is contained in:
parent
37e31f2ac7
commit
e5a16f0b0f
|
@ -180,6 +180,9 @@ Improvements
|
||||||
* LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag,
|
* LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag,
|
||||||
and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss).
|
and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss).
|
||||||
|
|
||||||
|
* LUCENE-9674: Implement faster advance on VectorValues using binary search.
|
||||||
|
(Anand Kotriwal, Mike Sokolov)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.FloatBuffer;
|
import java.nio.FloatBuffer;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
@ -386,9 +387,19 @@ public final class Lucene90VectorReader extends VectorReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advance(int target) throws IOException {
|
public int advance(int target) {
|
||||||
// We could do better by log-binary search in ordToDoc, but this is never used
|
assert docID() < target;
|
||||||
return slowAdvance(target);
|
ord = Arrays.binarySearch(fieldEntry.ordToDoc, ord + 1, fieldEntry.ordToDoc.length, target);
|
||||||
|
if (ord < 0) {
|
||||||
|
ord = -(ord + 1);
|
||||||
|
}
|
||||||
|
assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
|
||||||
|
if (ord == fieldEntry.ordToDoc.length) {
|
||||||
|
doc = NO_MORE_DOCS;
|
||||||
|
} else {
|
||||||
|
doc = fieldEntry.ordToDoc[ord];
|
||||||
|
}
|
||||||
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -815,4 +815,50 @@ public class TestVectorValues extends LuceneTestCase {
|
||||||
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
|
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
|
||||||
assertEquals(3, VectorValues.SearchStrategy.values().length);
|
assertEquals(3, VectorValues.SearchStrategy.values().length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAdvance() throws Exception {
|
||||||
|
try (Directory dir = newDirectory()) {
|
||||||
|
try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
|
||||||
|
int numdocs = atLeast(1500);
|
||||||
|
String fieldName = "field";
|
||||||
|
for (int i = 0; i < numdocs; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
// randomly add a vector field
|
||||||
|
if (random().nextInt(4) == 3) {
|
||||||
|
doc.add(new VectorField(fieldName, new float[4], SearchStrategy.NONE));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
w.forceMerge(1);
|
||||||
|
try (IndexReader reader = w.getReader()) {
|
||||||
|
LeafReader r = getOnlyLeafReader(reader);
|
||||||
|
VectorValues vectorValues = r.getVectorValues(fieldName);
|
||||||
|
int[] vectorDocs = new int[vectorValues.size() + 1];
|
||||||
|
int cur = -1;
|
||||||
|
while (++cur < vectorValues.size() + 1) {
|
||||||
|
vectorDocs[cur] = vectorValues.nextDoc();
|
||||||
|
if (cur != 0) {
|
||||||
|
assertTrue(vectorDocs[cur] > vectorDocs[cur - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vectorValues = r.getVectorValues(fieldName);
|
||||||
|
cur = -1;
|
||||||
|
for (int i = 0; i < numdocs; i++) {
|
||||||
|
// randomly advance to i
|
||||||
|
if (random().nextInt(4) == 3) {
|
||||||
|
while (vectorDocs[++cur] < i)
|
||||||
|
;
|
||||||
|
assertEquals(vectorDocs[cur], vectorValues.advance(i));
|
||||||
|
assertEquals(vectorDocs[cur], vectorValues.docID());
|
||||||
|
if (vectorValues.docID() == NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// make i equal to docid so that it is greater than docId in the next loop iteration
|
||||||
|
i = vectorValues.docID();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue