mirror of https://github.com/apache/lucene.git
LUCENE-10040: Relax TestKnnVectorQuery#testDeletes assertion (#251)
TestKnnVectorQuery#testDeletes assumes that if there are n total documents, we can perform a kNN search with k=n and retrieve all documents. This isn't true with our implementation -- due to randomization we may select less than n entry points and never visit some vectors.
This commit is contained in:
parent
83ba5d859c
commit
782c3cca3a
|
@ -313,15 +313,11 @@ public class TestKnnVectorQuery extends LuceneTestCase {
|
|||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
final int numDocs = atLeast(100);
|
||||
final int dim = 30;
|
||||
int docIndex = 0;
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("index", String.valueOf(i), Field.Store.YES));
|
||||
if (frequently()) {
|
||||
d.add(new StringField("index", String.valueOf(docIndex), Field.Store.YES));
|
||||
d.add(new KnnVectorField("vector", randomVector(dim)));
|
||||
docIndex++;
|
||||
} else {
|
||||
d.add(new StringField("other", "value" + (i % 5), Field.Store.NO));
|
||||
}
|
||||
w.addDocument(d);
|
||||
}
|
||||
|
@ -329,18 +325,18 @@ public class TestKnnVectorQuery extends LuceneTestCase {
|
|||
|
||||
// Delete some documents at random, both those with and without vectors
|
||||
Set<Term> toDelete = new HashSet<>();
|
||||
for (int i = 0; i < 20; i++) {
|
||||
int index = random().nextInt(docIndex);
|
||||
for (int i = 0; i < 25; i++) {
|
||||
int index = random().nextInt(numDocs);
|
||||
toDelete.add(new Term("index", String.valueOf(index)));
|
||||
}
|
||||
w.deleteDocuments(toDelete.toArray(new Term[0]));
|
||||
w.deleteDocuments(new Term("other", "value" + random().nextInt(5)));
|
||||
w.commit();
|
||||
|
||||
int hits = 50;
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
Set<String> allIds = new HashSet<>();
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), numDocs);
|
||||
KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), hits);
|
||||
TopDocs topDocs = searcher.search(query, numDocs);
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
Document doc = reader.document(scoreDoc.doc, Set.of("index"));
|
||||
|
@ -350,7 +346,7 @@ public class TestKnnVectorQuery extends LuceneTestCase {
|
|||
toDelete.contains(new Term("index", index)));
|
||||
allIds.add(index);
|
||||
}
|
||||
assertEquals("search missed some documents", docIndex - toDelete.size(), allIds.size());
|
||||
assertEquals("search missed some documents", hits, allIds.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue