mirror of https://github.com/apache/lucene.git
LUCENE-10016: remove fanout parameter from nearest neighbor vector search (#210)
This commit is contained in:
parent
2bd6924f07
commit
9b5e233960
|
@ -143,7 +143,7 @@ public class SimpleTextVectorReader extends VectorReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
|
||||
public TopDocs search(String field, float[] target, int k) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ public abstract class VectorFormat implements NamedSPILoader.NamedSPI {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs search(String field, float[] target, int k, int fanout) {
|
||||
public TopDocs search(String field, float[] target, int k) {
|
||||
return TopDocsCollector.EMPTY_TOPDOCS;
|
||||
}
|
||||
|
||||
|
|
|
@ -51,12 +51,9 @@ public abstract class VectorReader implements Closeable, Accountable {
|
|||
* @param field the vector field to search
|
||||
* @param target the vector-valued query
|
||||
* @param k the number of docs to return
|
||||
* @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher
|
||||
* cost
|
||||
* @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
|
||||
*/
|
||||
public abstract TopDocs search(String field, float[] target, int k, int fanout)
|
||||
throws IOException;
|
||||
public abstract TopDocs search(String field, float[] target, int k) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns an instance optimized for merging. This instance may only be consumed in the thread
|
||||
|
|
|
@ -241,7 +241,7 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
|
||||
public TopDocs search(String field, float[] target, int k) throws IOException {
|
||||
FieldEntry fieldEntry = fields.get(field);
|
||||
if (fieldEntry == null || fieldEntry.dimension == 0) {
|
||||
return null;
|
||||
|
@ -252,7 +252,7 @@ public final class Lucene90HnswVectorReader extends VectorReader {
|
|||
// use a seed that is fixed for the index so we get reproducible results for the same query
|
||||
final Random random = new Random(checksumSeed);
|
||||
NeighborQueue results =
|
||||
HnswGraph.search(target, k, k + fanout, vectorValues, getGraphValues(fieldEntry), random);
|
||||
HnswGraph.search(target, k, k, vectorValues, getGraphValues(fieldEntry), random);
|
||||
int i = 0;
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(results.size(), k)];
|
||||
boolean reversed = fieldEntry.similarityFunction.reversed;
|
||||
|
|
|
@ -240,12 +240,12 @@ public abstract class PerFieldVectorFormat extends VectorFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
|
||||
public TopDocs search(String field, float[] target, int k) throws IOException {
|
||||
VectorReader vectorReader = fields.get(field);
|
||||
if (vectorReader == null) {
|
||||
return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
|
||||
} else {
|
||||
return vectorReader.search(field, target, k, fanout);
|
||||
return vectorReader.search(field, target, k);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -211,7 +211,7 @@ public abstract class CodecReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
|
||||
public final TopDocs searchNearestVectors(String field, float[] target, int k)
|
||||
throws IOException {
|
||||
ensureOpen();
|
||||
FieldInfo fi = getFieldInfos().fieldInfo(field);
|
||||
|
@ -220,7 +220,7 @@ public abstract class CodecReader extends LeafReader {
|
|||
return null;
|
||||
}
|
||||
|
||||
return getVectorReader().search(field, target, k, fanout);
|
||||
return getVectorReader().search(field, target, k);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -53,8 +53,7 @@ abstract class DocValuesLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
|
||||
throws IOException {
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -345,9 +345,8 @@ public abstract class FilterLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
|
||||
throws IOException {
|
||||
return in.searchNearestVectors(field, target, k, fanout);
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k) throws IOException {
|
||||
return in.searchNearestVectors(field, target, k);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -222,12 +222,10 @@ public abstract class LeafReader extends IndexReader {
|
|||
* @param field the vector field to search
|
||||
* @param target the vector-valued query
|
||||
* @param k the number of docs to return
|
||||
* @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher
|
||||
* cost
|
||||
* @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
|
||||
public abstract TopDocs searchNearestVectors(String field, float[] target, int k)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
|
|
|
@ -209,9 +209,8 @@ class MergeReaderWrapper extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
|
||||
throws IOException {
|
||||
return in.searchNearestVectors(field, target, k, fanout);
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k) throws IOException {
|
||||
return in.searchNearestVectors(field, target, k);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -398,11 +398,10 @@ public class ParallelLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(String fieldName, float[] target, int k, int fanout)
|
||||
throws IOException {
|
||||
public TopDocs searchNearestVectors(String fieldName, float[] target, int k) throws IOException {
|
||||
ensureOpen();
|
||||
LeafReader reader = fieldToReader.get(fieldName);
|
||||
return reader == null ? null : reader.searchNearestVectors(fieldName, target, k, fanout);
|
||||
return reader == null ? null : reader.searchNearestVectors(fieldName, target, k);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -167,8 +167,8 @@ public final class SlowCodecReaderWrapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
|
||||
return reader.searchNearestVectors(field, target, k, fanout);
|
||||
public TopDocs search(String field, float[] target, int k) throws IOException {
|
||||
return reader.searchNearestVectors(field, target, k);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -315,7 +315,7 @@ public final class SortingCodecReader extends FilterCodecReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs search(String field, float[] target, int k, int fanout) {
|
||||
public TopDocs search(String field, float[] target, int k) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -82,8 +82,8 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||
public enum SimilarityFunction {
|
||||
|
||||
/**
|
||||
* No similarity function is provided. Note: {@link VectorReader#search(String, float[], int,
|
||||
* int)} is not supported for fields specifying this.
|
||||
* No similarity function is provided. Note: {@link VectorReader#search(String, float[], int)}
|
||||
* is not supported for fields specifying this.
|
||||
*/
|
||||
NONE,
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.codecs.VectorReader;
|
||||
import org.apache.lucene.index.KnnGraphValues;
|
||||
import org.apache.lucene.index.RandomAccessVectorValues;
|
||||
import org.apache.lucene.index.VectorValues;
|
||||
|
@ -47,10 +46,6 @@ import org.apache.lucene.util.SparseFixedBitSet;
|
|||
* searching the graph for each newly inserted node.
|
||||
* <li><code>maxConn</code> has the same meaning as <code>M</code> in the later paper; it controls
|
||||
* how many of the <code>efConst</code> neighbors are connected to the new node
|
||||
* <li><code>fanout</code> the fanout parameter of {@link VectorReader#search(String, float[],
|
||||
* int, int)} is used to control the values of <code>numSeed</code> and <code>topK</code> that
|
||||
* are passed to this API. Thus <code>fanout</code> is like a combination of <code>ef</code>
|
||||
* (search beam width) from the 2016 paper and <code>m</code> from the 2014 paper.
|
||||
* </ul>
|
||||
*
|
||||
* <p>Note: The graph may be searched by multiple threads concurrently, but updates are not
|
||||
|
|
|
@ -106,14 +106,14 @@ public class TestPerFieldVectorFormat extends BaseVectorFormatTestCase {
|
|||
.leaves()
|
||||
.get(0)
|
||||
.reader()
|
||||
.searchNearestVectors("field1", new float[] {1, 2, 3}, 10, 1);
|
||||
.searchNearestVectors("field1", new float[] {1, 2, 3}, 10);
|
||||
assertEquals(1, hits1.scoreDocs.length);
|
||||
TopDocs hits2 =
|
||||
ireader
|
||||
.leaves()
|
||||
.get(0)
|
||||
.reader()
|
||||
.searchNearestVectors("field2", new float[] {1, 2, 3}, 10, 1);
|
||||
.searchNearestVectors("field2", new float[] {1, 2, 3}, 10);
|
||||
assertEquals(1, hits2.scoreDocs.length);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -292,7 +292,7 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
private static TopDocs doKnnSearch(IndexReader reader, float[] vector, int k) throws IOException {
|
||||
TopDocs[] results = new TopDocs[reader.leaves().size()];
|
||||
for (LeafReaderContext ctx : reader.leaves()) {
|
||||
results[ctx.ord] = ctx.reader().searchNearestVectors(KNN_GRAPH_FIELD, vector, k, 10);
|
||||
results[ctx.ord] = ctx.reader().searchNearestVectors(KNN_GRAPH_FIELD, vector, k);
|
||||
if (ctx.docBase > 0) {
|
||||
for (ScoreDoc doc : results[ctx.ord].scoreDocs) {
|
||||
doc.doc += ctx.docBase;
|
||||
|
|
|
@ -112,7 +112,7 @@ public class TestSegmentToThreadMapping extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -423,7 +423,7 @@ public class KnnGraphTester {
|
|||
IndexReader reader, String field, float[] vector, int k, int fanout) throws IOException {
|
||||
TopDocs[] results = new TopDocs[reader.leaves().size()];
|
||||
for (LeafReaderContext ctx : reader.leaves()) {
|
||||
results[ctx.ord] = ctx.reader().searchNearestVectors(field, vector, k, fanout);
|
||||
results[ctx.ord] = ctx.reader().searchNearestVectors(field, vector, k + fanout);
|
||||
int docBase = ctx.docBase;
|
||||
for (ScoreDoc scoreDoc : results[ctx.ord].scoreDocs) {
|
||||
scoreDoc.doc += docBase;
|
||||
|
|
|
@ -161,7 +161,7 @@ public class TermVectorLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -1373,7 +1373,7 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -99,8 +99,8 @@ public class AssertingVectorFormat extends VectorFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
|
||||
TopDocs hits = delegate.search(field, target, k, fanout);
|
||||
public TopDocs search(String field, float[] target, int k) throws IOException {
|
||||
TopDocs hits = delegate.search(field, target, k);
|
||||
assert hits != null;
|
||||
assert hits.scoreDocs.length <= k;
|
||||
return hits;
|
||||
|
|
|
@ -216,7 +216,7 @@ public class QueryUtils {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
|
||||
public TopDocs searchNearestVectors(String field, float[] target, int k) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue