mirror of https://github.com/apache/lucene.git
Fix failing BaseVectorSimilarityQueryTestCase#testApproximate (#12922)
Discovered in #12921, and introduced in #12679 The first issue is that we weren't advancing the `VectorScorer` [here](cf13a92950/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java (L257-L262)
) -- so it was still un-positioned while trying to compute the similarity score Earlier in the PR, the underlying delegate of the `FilteredDocIdSetIterator` was `scorer.iterator()` (see [here](cad565439b/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java (L107)
)) -- so we didn't need to explicitly advance it Later, we decided to maintain parity to `AbstractKnnVectorQuery` and introduce filtering in `AbstractVectorSimilarityQuery` (see [this commit](5096790f28
)) to determine the `visitLimit` of approximate search -- after which the underlying iterator changed to the accepted docs (see [here](5096790f28/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java (L255)
)) and I missed advancing the `VectorScorer` explicitly.. After doing so, we no longer get the original `java.lang.ArrayIndexOutOfBoundsException` -- but the `BaseVectorSimilarityQueryTestCase#testApproximate` starts failing because it falls back to exact search, as the limit of the prefilter is met during graph search Relaxed the parameters of the test to fix this (making the filter less restrictive, and trying to visit a fewer number of nodes so that approximate search completes without hitting its limit) Sorry for missing this earlier!
This commit is contained in:
parent
98d2df17d5
commit
6c5dcc1795
|
@ -255,6 +255,11 @@ abstract class AbstractVectorSimilarityQuery extends Query {
|
|||
new FilteredDocIdSetIterator(acceptDocs) {
|
||||
@Override
|
||||
protected boolean match(int doc) throws IOException {
|
||||
// Advance the scorer
|
||||
if (!scorer.advanceExact(doc)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute the dot product
|
||||
float score = scorer.score();
|
||||
cachedScore[0] = score * boost;
|
||||
|
|
|
@ -87,6 +87,7 @@ abstract class VectorScorer {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
assert values.docID() != -1 : getClass().getSimpleName() + " is not positioned";
|
||||
return similarity.compare(query, values.vectorValue());
|
||||
}
|
||||
}
|
||||
|
@ -117,6 +118,7 @@ abstract class VectorScorer {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
assert values.docID() != -1 : getClass().getSimpleName() + " is not positioned";
|
||||
return similarity.compare(query, values.vectorValue());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -433,8 +433,8 @@ abstract class BaseVectorSimilarityQueryTestCase<
|
|||
|
||||
public void testApproximate() throws IOException {
|
||||
// Non-restrictive filter, along with similarity to visit a small number of nodes
|
||||
int numFiltered = random().nextInt((numDocs * 4) / 5, numDocs);
|
||||
int targetVisited = random().nextInt(numFiltered / 10, numFiltered / 8);
|
||||
int numFiltered = numDocs - 1;
|
||||
int targetVisited = random().nextInt(1, numFiltered / 10);
|
||||
|
||||
V[] vectors = getRandomVectors(numDocs, dim);
|
||||
V queryVector = getRandomVector(dim);
|
||||
|
|
Loading…
Reference in New Issue