mirror of https://github.com/apache/lucene.git
LUCENE-10040: Add test for vector search with skewed deletions (#527)
This exercises a challenging case where the documents to skip all happen to be closest to the query vector. In many cases, HNSW appears to be robust to this case and maintains good recall.
This commit is contained in:
parent
c74642d9a7
commit
394472d4b8
|
@ -196,6 +196,41 @@ public class TestHnswGraph extends LuceneTestCase {
|
|||
assertTrue("sum(result docs)=" + sum, sum < 75);
|
||||
}
|
||||
|
||||
public void testSearchWithSkewedAcceptOrds() throws IOException {
|
||||
int nDoc = 1000;
|
||||
CircularVectorValues vectors = new CircularVectorValues(nDoc);
|
||||
HnswGraphBuilder builder =
|
||||
new HnswGraphBuilder(
|
||||
vectors, VectorSimilarityFunction.DOT_PRODUCT, 16, 100, random().nextInt());
|
||||
HnswGraph hnsw = builder.build(vectors);
|
||||
|
||||
// Skip over half of the documents that are closest to the query vector
|
||||
FixedBitSet acceptOrds = new FixedBitSet(nDoc);
|
||||
for (int i = 500; i < nDoc; i++) {
|
||||
acceptOrds.set(i);
|
||||
}
|
||||
NeighborQueue nn =
|
||||
HnswGraph.search(
|
||||
new float[] {1, 0},
|
||||
10,
|
||||
10,
|
||||
vectors.randomAccess(),
|
||||
VectorSimilarityFunction.DOT_PRODUCT,
|
||||
hnsw,
|
||||
acceptOrds,
|
||||
new SplittableRandom(random().nextLong()));
|
||||
int[] nodes = nn.nodes();
|
||||
assertTrue("Number of found results is not equal to [10].", nodes.length == 10);
|
||||
int sum = 0;
|
||||
for (int node : nodes) {
|
||||
assertTrue("the results include a deleted document: " + node, acceptOrds.get(node));
|
||||
sum += node;
|
||||
}
|
||||
// We still expect to get reasonable recall. The lowest non-skipped docIds
|
||||
// are closest to the query vector: sum(500,509) = 5045
|
||||
assertTrue("sum(result docs)=" + sum, sum < 5100);
|
||||
}
|
||||
|
||||
public void testBoundsCheckerMax() {
|
||||
BoundsChecker max = BoundsChecker.create(false);
|
||||
float f = random().nextFloat() - 0.5f;
|
||||
|
|
Loading…
Reference in New Issue