LUCENE-10040: Relax TestKnnVectorQuery#testDeletes assertion (#251)

TestKnnVectorQuery#testDeletes assumes that if there are n total documents, we
can perform a kNN search with k=n and retrieve all documents. This isn't true
with our implementation -- due to randomization we may select less than n entry
points and never visit some vectors.
This commit is contained in:
Julie Tibshirani 2021-08-24 11:15:27 -07:00 committed by GitHub
parent 83ba5d859c
commit 782c3cca3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 10 deletions

View File

@ -313,15 +313,11 @@ public class TestKnnVectorQuery extends LuceneTestCase {
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
final int numDocs = atLeast(100); final int numDocs = atLeast(100);
final int dim = 30; final int dim = 30;
int docIndex = 0;
for (int i = 0; i < numDocs; ++i) { for (int i = 0; i < numDocs; ++i) {
Document d = new Document(); Document d = new Document();
d.add(new StringField("index", String.valueOf(i), Field.Store.YES));
if (frequently()) { if (frequently()) {
d.add(new StringField("index", String.valueOf(docIndex), Field.Store.YES));
d.add(new KnnVectorField("vector", randomVector(dim))); d.add(new KnnVectorField("vector", randomVector(dim)));
docIndex++;
} else {
d.add(new StringField("other", "value" + (i % 5), Field.Store.NO));
} }
w.addDocument(d); w.addDocument(d);
} }
@ -329,18 +325,18 @@ public class TestKnnVectorQuery extends LuceneTestCase {
// Delete some documents at random, both those with and without vectors // Delete some documents at random, both those with and without vectors
Set<Term> toDelete = new HashSet<>(); Set<Term> toDelete = new HashSet<>();
for (int i = 0; i < 20; i++) { for (int i = 0; i < 25; i++) {
int index = random().nextInt(docIndex); int index = random().nextInt(numDocs);
toDelete.add(new Term("index", String.valueOf(index))); toDelete.add(new Term("index", String.valueOf(index)));
} }
w.deleteDocuments(toDelete.toArray(new Term[0])); w.deleteDocuments(toDelete.toArray(new Term[0]));
w.deleteDocuments(new Term("other", "value" + random().nextInt(5)));
w.commit(); w.commit();
int hits = 50;
try (IndexReader reader = DirectoryReader.open(dir)) { try (IndexReader reader = DirectoryReader.open(dir)) {
Set<String> allIds = new HashSet<>(); Set<String> allIds = new HashSet<>();
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = new IndexSearcher(reader);
KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), numDocs); KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), hits);
TopDocs topDocs = searcher.search(query, numDocs); TopDocs topDocs = searcher.search(query, numDocs);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) { for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document doc = reader.document(scoreDoc.doc, Set.of("index")); Document doc = reader.document(scoreDoc.doc, Set.of("index"));
@ -350,7 +346,7 @@ public class TestKnnVectorQuery extends LuceneTestCase {
toDelete.contains(new Term("index", index))); toDelete.contains(new Term("index", index)));
allIds.add(index); allIds.add(index);
} }
assertEquals("search missed some documents", docIndex - toDelete.size(), allIds.size()); assertEquals("search missed some documents", hits, allIds.size());
} }
} }
} }