mirror of https://github.com/apache/lucene.git
Have byte[] vectors also trigger a timeout in ExitableDirectoryReader (#12423)
`ExitableDirectoryReader` did not wrap searching for `byte[]` vectors. Consequently timeouts were not respected with this reader when searching with `byte[]` vectors. This commit fixes that bug.
This commit is contained in:
parent
861153020a
commit
d03c8f16d9
|
@ -157,6 +157,8 @@ Bug Fixes
|
|||
|
||||
* GITHUB#12413: Fix HNSW graph search bug that potentially leaked unapproved docs (Ben Trent).
|
||||
|
||||
* GITHUB#12423: Respect timeouts in ExitableDirectoryReader when searching with byte[] vectors (Ben Trent).
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -364,6 +364,37 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
|
|||
return in.searchNearestVectors(field, target, k, timeoutCheckingAcceptDocs, visitedLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TopDocs searchNearestVectors(
|
||||
String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException {
|
||||
// when acceptDocs is null due to no doc deleted, we will instantiate a new one that would
|
||||
// match all docs to allow timeout checking.
|
||||
final Bits updatedAcceptDocs =
|
||||
acceptDocs == null ? new Bits.MatchAllBits(maxDoc()) : acceptDocs;
|
||||
|
||||
Bits timeoutCheckingAcceptDocs =
|
||||
new Bits() {
|
||||
private static final int MAX_CALLS_BEFORE_QUERY_TIMEOUT_CHECK = 16;
|
||||
private int calls;
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
if (calls++ % MAX_CALLS_BEFORE_QUERY_TIMEOUT_CHECK == 0) {
|
||||
checkAndThrowForSearchVectors();
|
||||
}
|
||||
|
||||
return updatedAcceptDocs.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return updatedAcceptDocs.length();
|
||||
}
|
||||
};
|
||||
|
||||
return in.searchNearestVectors(field, target, k, timeoutCheckingAcceptDocs, visitedLimit);
|
||||
}
|
||||
|
||||
private void checkAndThrowForSearchVectors() {
|
||||
if (queryTimeout.shouldExit()) {
|
||||
throw new ExitingReaderException(
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.KnnByteVectorField;
|
||||
import org.apache.lucene.document.KnnFloatVectorField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
|
@ -404,7 +405,7 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
public void testVectorValues() throws IOException {
|
||||
public void testFloatVectorValues() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
IndexWriter writer =
|
||||
new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
@ -485,6 +486,81 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
public void testByteVectorValues() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
IndexWriter writer =
|
||||
new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
|
||||
int numDoc = atLeast(20);
|
||||
int deletedDoc = atMost(5);
|
||||
int dimension = atLeast(3);
|
||||
|
||||
for (int i = 0; i < numDoc; i++) {
|
||||
Document doc = new Document();
|
||||
byte[] value = new byte[dimension];
|
||||
random().nextBytes(value);
|
||||
doc.add(new KnnByteVectorField("vector", value, VectorSimilarityFunction.COSINE));
|
||||
doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.forceMerge(1);
|
||||
writer.commit();
|
||||
|
||||
for (int i = 0; i < deletedDoc; i++) {
|
||||
writer.deleteDocuments(new Term("id", Integer.toString(i)));
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
QueryTimeout queryTimeout;
|
||||
if (random().nextBoolean()) {
|
||||
queryTimeout = immediateQueryTimeout();
|
||||
} else {
|
||||
queryTimeout = infiniteQueryTimeout();
|
||||
}
|
||||
|
||||
DirectoryReader directoryReader = DirectoryReader.open(directory);
|
||||
DirectoryReader exitableDirectoryReader = directoryReader;
|
||||
exitableDirectoryReader = new ExitableDirectoryReader(directoryReader, queryTimeout);
|
||||
IndexReader reader = new TestReader(getOnlyLeafReader(exitableDirectoryReader));
|
||||
|
||||
LeafReaderContext context = reader.leaves().get(0);
|
||||
LeafReader leaf = context.reader();
|
||||
|
||||
if (queryTimeout.shouldExit()) {
|
||||
expectThrows(
|
||||
ExitingReaderException.class,
|
||||
() -> {
|
||||
DocIdSetIterator iter = leaf.getByteVectorValues("vector");
|
||||
scanAndRetrieve(leaf, iter);
|
||||
});
|
||||
|
||||
expectThrows(
|
||||
ExitingReaderException.class,
|
||||
() ->
|
||||
leaf.searchNearestVectors(
|
||||
"vector",
|
||||
TestVectorUtil.randomVectorBytes(dimension),
|
||||
5,
|
||||
leaf.getLiveDocs(),
|
||||
Integer.MAX_VALUE));
|
||||
} else {
|
||||
DocIdSetIterator iter = leaf.getByteVectorValues("vector");
|
||||
scanAndRetrieve(leaf, iter);
|
||||
|
||||
leaf.searchNearestVectors(
|
||||
"vector",
|
||||
TestVectorUtil.randomVectorBytes(dimension),
|
||||
5,
|
||||
leaf.getLiveDocs(),
|
||||
Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
private static void scanAndRetrieve(LeafReader leaf, DocIdSetIterator iter) throws IOException {
|
||||
for (iter.nextDoc();
|
||||
iter.docID() != DocIdSetIterator.NO_MORE_DOCS && iter.docID() < leaf.maxDoc(); ) {
|
||||
|
|
Loading…
Reference in New Issue