LUCENE-10016: remove fanout parameter from nearest neighbor vector search (#210)

This commit is contained in:
Michael Sokolov 2021-07-17 11:12:15 -04:00 committed by GitHub
parent 2bd6924f07
commit 9b5e233960
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 32 additions and 46 deletions

View File

@ -143,7 +143,7 @@ public class SimpleTextVectorReader extends VectorReader {
}
@Override
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
public TopDocs search(String field, float[] target, int k) throws IOException {
throw new UnsupportedOperationException();
}

View File

@ -99,7 +99,7 @@ public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
}
@Override
public TopDocs search(String field, float[] target, int k, int fanout) {
public TopDocs search(String field, float[] target, int k) {
return TopDocsCollector.EMPTY_TOPDOCS;
}

View File

@ -51,12 +51,9 @@ public abstract class VectorReader implements Closeable, Accountable {
* @param field the vector field to search
* @param target the vector-valued query
* @param k the number of docs to return
* @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher
* cost
* @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
*/
public abstract TopDocs search(String field, float[] target, int k, int fanout)
throws IOException;
public abstract TopDocs search(String field, float[] target, int k) throws IOException;
/**
* Returns an instance optimized for merging. This instance may only be consumed in the thread

View File

@ -241,7 +241,7 @@ public final class Lucene90HnswVectorReader extends VectorReader {
}
@Override
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
public TopDocs search(String field, float[] target, int k) throws IOException {
FieldEntry fieldEntry = fields.get(field);
if (fieldEntry == null || fieldEntry.dimension == 0) {
return null;
@ -252,7 +252,7 @@ public final class Lucene90HnswVectorReader extends VectorReader {
// use a seed that is fixed for the index so we get reproducible results for the same query
final Random random = new Random(checksumSeed);
NeighborQueue results =
HnswGraph.search(target, k, k + fanout, vectorValues, getGraphValues(fieldEntry), random);
HnswGraph.search(target, k, k, vectorValues, getGraphValues(fieldEntry), random);
int i = 0;
ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(results.size(), k)];
boolean reversed = fieldEntry.similarityFunction.reversed;

View File

@ -240,12 +240,12 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
}
@Override
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
public TopDocs search(String field, float[] target, int k) throws IOException {
VectorReader vectorReader = fields.get(field);
if (vectorReader == null) {
return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
} else {
return vectorReader.search(field, target, k, fanout);
return vectorReader.search(field, target, k);
}
}

View File

@ -211,7 +211,7 @@ public abstract class CodecReader extends LeafReader {
}
@Override
public final TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
public final TopDocs searchNearestVectors(String field, float[] target, int k)
throws IOException {
ensureOpen();
FieldInfo fi = getFieldInfos().fieldInfo(field);
@ -220,7 +220,7 @@ public abstract class CodecReader extends LeafReader {
return null;
}
return getVectorReader().search(field, target, k, fanout);
return getVectorReader().search(field, target, k);
}
@Override

View File

@ -53,8 +53,7 @@ abstract class DocValuesLeafReader extends LeafReader {
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
throws IOException {
public TopDocs searchNearestVectors(String field, float[] target, int k) throws IOException {
throw new UnsupportedOperationException();
}

View File

@ -345,9 +345,8 @@ public abstract class FilterLeafReader extends LeafReader {
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
throws IOException {
return in.searchNearestVectors(field, target, k, fanout);
public TopDocs searchNearestVectors(String field, float[] target, int k) throws IOException {
return in.searchNearestVectors(field, target, k);
}
@Override

View File

@ -222,12 +222,10 @@ public abstract class LeafReader extends IndexReader {
* @param field the vector field to search
* @param target the vector-valued query
* @param k the number of docs to return
* @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher
* cost
* @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
* @lucene.experimental
*/
public abstract TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
public abstract TopDocs searchNearestVectors(String field, float[] target, int k)
throws IOException;
/**

View File

@ -209,9 +209,8 @@ class MergeReaderWrapper extends LeafReader {
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
throws IOException {
return in.searchNearestVectors(field, target, k, fanout);
public TopDocs searchNearestVectors(String field, float[] target, int k) throws IOException {
return in.searchNearestVectors(field, target, k);
}
@Override

View File

@ -398,11 +398,10 @@ public class ParallelLeafReader extends LeafReader {
}
@Override
public TopDocs searchNearestVectors(String fieldName, float[] target, int k, int fanout)
throws IOException {
public TopDocs searchNearestVectors(String fieldName, float[] target, int k) throws IOException {
ensureOpen();
LeafReader reader = fieldToReader.get(fieldName);
return reader == null ? null : reader.searchNearestVectors(fieldName, target, k, fanout);
return reader == null ? null : reader.searchNearestVectors(fieldName, target, k);
}
@Override

View File

@ -167,8 +167,8 @@ public final class SlowCodecReaderWrapper {
}
@Override
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
return reader.searchNearestVectors(field, target, k, fanout);
public TopDocs search(String field, float[] target, int k) throws IOException {
return reader.searchNearestVectors(field, target, k);
}
@Override

View File

@ -315,7 +315,7 @@ public final class SortingCodecReader extends FilterCodecReader {
}
@Override
public TopDocs search(String field, float[] target, int k, int fanout) {
public TopDocs search(String field, float[] target, int k) {
throw new UnsupportedOperationException();
}

View File

@ -82,8 +82,8 @@ public abstract class VectorValues extends DocIdSetIterator {
public enum SimilarityFunction {
/**
* No similarity function is provided. Note: {@link VectorReader#search(String, float[], int,
* int)} is not supported for fields specifying this.
* No similarity function is provided. Note: {@link VectorReader#search(String, float[], int)}
* is not supported for fields specifying this.
*/
NONE,

View File

@ -23,7 +23,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.index.KnnGraphValues;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorValues;
@ -47,10 +46,6 @@ import org.apache.lucene.util.SparseFixedBitSet;
* searching the graph for each newly inserted node.
* <li><code>maxConn</code> has the same meaning as <code>M</code> in the later paper; it controls
* how many of the <code>efConst</code> neighbors are connected to the new node
* <li><code>fanout</code> the fanout parameter of {@link VectorReader#search(String, float[],
* int, int)} is used to control the values of <code>numSeed</code> and <code>topK</code> that
* are passed to this API. Thus <code>fanout</code> is like a combination of <code>ef</code>
* (search beam width) from the 2016 paper and <code>m</code> from the 2014 paper.
* </ul>
*
* <p>Note: The graph may be searched by multiple threads concurrently, but updates are not

View File

@ -106,14 +106,14 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
.leaves()
.get(0)
.reader()
.searchNearestVectors("field1", new float[] {1, 2, 3}, 10, 1);
.searchNearestVectors("field1", new float[] {1, 2, 3}, 10);
assertEquals(1, hits1.scoreDocs.length);
TopDocs hits2 =
ireader
.leaves()
.get(0)
.reader()
.searchNearestVectors("field2", new float[] {1, 2, 3}, 10, 1);
.searchNearestVectors("field2", new float[] {1, 2, 3}, 10);
assertEquals(1, hits2.scoreDocs.length);
}
}

View File

@ -292,7 +292,7 @@ public class TestKnnGraph extends LuceneTestCase {
private static TopDocs doKnnSearch(IndexReader reader, float[] vector, int k) throws IOException {
TopDocs[] results = new TopDocs[reader.leaves().size()];
for (LeafReaderContext ctx : reader.leaves()) {
results[ctx.ord] = ctx.reader().searchNearestVectors(KNN_GRAPH_FIELD, vector, k, 10);
results[ctx.ord] = ctx.reader().searchNearestVectors(KNN_GRAPH_FIELD, vector, k);
if (ctx.docBase > 0) {
for (ScoreDoc doc : results[ctx.ord].scoreDocs) {
doc.doc += ctx.docBase;

View File

@ -112,7 +112,7 @@ public class TestSegmentToThreadMapping extends LuceneTestCase {
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
public TopDocs searchNearestVectors(String field, float[] target, int k) {
return null;
}

View File

@ -423,7 +423,7 @@ public class KnnGraphTester {
IndexReader reader, String field, float[] vector, int k, int fanout) throws IOException {
TopDocs[] results = new TopDocs[reader.leaves().size()];
for (LeafReaderContext ctx : reader.leaves()) {
results[ctx.ord] = ctx.reader().searchNearestVectors(field, vector, k, fanout);
results[ctx.ord] = ctx.reader().searchNearestVectors(field, vector, k + fanout);
int docBase = ctx.docBase;
for (ScoreDoc scoreDoc : results[ctx.ord].scoreDocs) {
scoreDoc.doc += docBase;

View File

@ -161,7 +161,7 @@ public class TermVectorLeafReader extends LeafReader {
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
public TopDocs searchNearestVectors(String field, float[] target, int k) {
return null;
}

View File

@ -1373,7 +1373,7 @@ public class MemoryIndex {
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
public TopDocs searchNearestVectors(String field, float[] target, int k) {
return null;
}

View File

@ -99,8 +99,8 @@ public class AssertingVectorFormat extends VectorFormat {
}
@Override
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
TopDocs hits = delegate.search(field, target, k, fanout);
public TopDocs search(String field, float[] target, int k) throws IOException {
TopDocs hits = delegate.search(field, target, k);
assert hits != null;
assert hits.scoreDocs.length <= k;
return hits;

View File

@ -216,7 +216,7 @@ public class QueryUtils {
}
@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
public TopDocs searchNearestVectors(String field, float[] target, int k) {
return null;
}