mirror of https://github.com/apache/lucene.git
Stop exploring HNSW graph if scores are not getting better. (#12770)
I noticed while testing lower dimensionality and quantization, we would explore the HNSW graph way too much. I was stuck figuring out why until I noticed the searcher checks for distance equality (not just if the distance is better) when exploring neighbors-of-neighbors. This seems like a bad heuristic, but to double check I looked at what nmslib does. This pointed me back to this commit: nmslib/nmslib#106 Seems like this performance hitch was discovered awhile ago :). This commit adjusts HNSW to only explore the graph layer if the distance is actually better.
This commit is contained in:
parent
3231028070
commit
e1ce1d692c
|
@ -288,6 +288,8 @@ Bug Fixes
|
|||
|
||||
* GITHUB#12736: Fix NullPointerException when Monitor.getQuery cannot find the requested queryId (Davis Cook)
|
||||
|
||||
* GITHUB#12770: Stop exploring HNSW graph if scores are not getting better. (Ben Trent)
|
||||
|
||||
Build
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -174,8 +174,7 @@ public class HnswGraphSearcher {
|
|||
}
|
||||
float friendSimilarity = scorer.score(friendOrd);
|
||||
visitedCount++;
|
||||
if (friendSimilarity > currentScore
|
||||
|| (friendSimilarity == currentScore && friendOrd < currentEp)) {
|
||||
if (friendSimilarity > currentScore) {
|
||||
currentScore = friendSimilarity;
|
||||
currentEp = friendOrd;
|
||||
foundBetter = true;
|
||||
|
@ -243,7 +242,7 @@ public class HnswGraphSearcher {
|
|||
}
|
||||
float friendSimilarity = scorer.score(friendOrd);
|
||||
results.incVisitedCount(1);
|
||||
if (friendSimilarity >= minAcceptedSimilarity) {
|
||||
if (friendSimilarity > minAcceptedSimilarity) {
|
||||
candidates.add(friendOrd, friendSimilarity);
|
||||
if (acceptOrds == null || acceptOrds.get(friendOrd)) {
|
||||
if (results.collect(friendOrd, friendSimilarity)) {
|
||||
|
|
|
@ -281,12 +281,12 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
|
|||
|
||||
DocIdSetIterator it = scorer.iterator();
|
||||
assertEquals(3, it.cost());
|
||||
assertEquals(1, it.nextDoc());
|
||||
assertEquals(1 / 6f, scorer.score(), 0);
|
||||
assertEquals(3, it.advance(3));
|
||||
assertEquals(2, it.nextDoc());
|
||||
assertEquals(1 / 2f, scorer.score(), 0);
|
||||
assertEquals(4, it.advance(4));
|
||||
assertEquals(1 / 6f, scorer.score(), 0);
|
||||
|
||||
assertEquals(NO_MORE_DOCS, it.advance(4));
|
||||
assertEquals(NO_MORE_DOCS, it.advance(5));
|
||||
expectThrows(ArrayIndexOutOfBoundsException.class, scorer::score);
|
||||
}
|
||||
}
|
||||
|
@ -384,7 +384,7 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
|
|||
assertEquals(0, matched.getDetails().length);
|
||||
assertEquals("within top 3 docs", matched.getDescription());
|
||||
|
||||
Explanation nomatch = searcher.explain(query, 4);
|
||||
Explanation nomatch = searcher.explain(query, 1);
|
||||
assertFalse(nomatch.isMatch());
|
||||
assertEquals(0f, nomatch.getValue());
|
||||
assertEquals(0, matched.getDetails().length);
|
||||
|
|
Loading…
Reference in New Issue