Stop exploring HNSW graph if scores are not getting better. (#12770)

I noticed while testing lower dimensionality and quantization, we would explore the HNSW graph way too much. I was stuck figuring out why until I noticed the searcher checks for distance equality (not just if the distance is better) when exploring neighbors-of-neighbors. This seems like a bad heuristic, but to double check I looked at what nmslib does. This pointed me back to this commit: nmslib/nmslib#106

Seems like this performance hitch was discovered awhile ago :).

This commit adjusts HNSW to only explore the graph layer if the distance is actually better.
This commit is contained in:
Benjamin Trent 2023-11-07 06:55:42 -05:00 committed by GitHub
parent 3231028070
commit e1ce1d692c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 8 deletions

View File

@ -288,6 +288,8 @@ Bug Fixes
* GITHUB#12736: Fix NullPointerException when Monitor.getQuery cannot find the requested queryId (Davis Cook)
* GITHUB#12770: Stop exploring HNSW graph if scores are not getting better. (Ben Trent)
Build
---------------------

View File

@ -174,8 +174,7 @@ public class HnswGraphSearcher {
}
float friendSimilarity = scorer.score(friendOrd);
visitedCount++;
if (friendSimilarity > currentScore
|| (friendSimilarity == currentScore && friendOrd < currentEp)) {
if (friendSimilarity > currentScore) {
currentScore = friendSimilarity;
currentEp = friendOrd;
foundBetter = true;
@ -243,7 +242,7 @@ public class HnswGraphSearcher {
}
float friendSimilarity = scorer.score(friendOrd);
results.incVisitedCount(1);
if (friendSimilarity >= minAcceptedSimilarity) {
if (friendSimilarity > minAcceptedSimilarity) {
candidates.add(friendOrd, friendSimilarity);
if (acceptOrds == null || acceptOrds.get(friendOrd)) {
if (results.collect(friendOrd, friendSimilarity)) {

View File

@ -281,12 +281,12 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
DocIdSetIterator it = scorer.iterator();
assertEquals(3, it.cost());
assertEquals(1, it.nextDoc());
assertEquals(1 / 6f, scorer.score(), 0);
assertEquals(3, it.advance(3));
assertEquals(2, it.nextDoc());
assertEquals(1 / 2f, scorer.score(), 0);
assertEquals(4, it.advance(4));
assertEquals(1 / 6f, scorer.score(), 0);
assertEquals(NO_MORE_DOCS, it.advance(4));
assertEquals(NO_MORE_DOCS, it.advance(5));
expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score);
}
}
@ -384,7 +384,7 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
assertEquals(0, matched.getDetails().length);
assertEquals("within top 3 docs", matched.getDescription());
Explanation nomatch = searcher.explain(query, 4);
Explanation nomatch = searcher.explain(query, 1);
assertFalse(nomatch.isMatch());
assertEquals(0f, nomatch.getValue());
assertEquals(0, matched.getDetails().length);