mirror of https://github.com/apache/lucene.git
LUCENE-10040: Relax TestKnnVectorQuery#testDeletes assertion (#251)
TestKnnVectorQuery#testDeletes assumes that if there are n total documents, we can perform a kNN search with k=n and retrieve all documents. This isn't true with our implementation -- due to randomization we may select less than n entry points and never visit some vectors.
This commit is contained in:
parent
83ba5d859c
commit
782c3cca3a
|
@ -313,15 +313,11 @@ public class TestKnnVectorQuery extends LuceneTestCase {
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||||
final int numDocs = atLeast(100);
|
final int numDocs = atLeast(100);
|
||||||
final int dim = 30;
|
final int dim = 30;
|
||||||
int docIndex = 0;
|
|
||||||
for (int i = 0; i < numDocs; ++i) {
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
Document d = new Document();
|
Document d = new Document();
|
||||||
|
d.add(new StringField("index", String.valueOf(i), Field.Store.YES));
|
||||||
if (frequently()) {
|
if (frequently()) {
|
||||||
d.add(new StringField("index", String.valueOf(docIndex), Field.Store.YES));
|
|
||||||
d.add(new KnnVectorField("vector", randomVector(dim)));
|
d.add(new KnnVectorField("vector", randomVector(dim)));
|
||||||
docIndex++;
|
|
||||||
} else {
|
|
||||||
d.add(new StringField("other", "value" + (i % 5), Field.Store.NO));
|
|
||||||
}
|
}
|
||||||
w.addDocument(d);
|
w.addDocument(d);
|
||||||
}
|
}
|
||||||
|
@ -329,18 +325,18 @@ public class TestKnnVectorQuery extends LuceneTestCase {
|
||||||
|
|
||||||
// Delete some documents at random, both those with and without vectors
|
// Delete some documents at random, both those with and without vectors
|
||||||
Set<Term> toDelete = new HashSet<>();
|
Set<Term> toDelete = new HashSet<>();
|
||||||
for (int i = 0; i < 20; i++) {
|
for (int i = 0; i < 25; i++) {
|
||||||
int index = random().nextInt(docIndex);
|
int index = random().nextInt(numDocs);
|
||||||
toDelete.add(new Term("index", String.valueOf(index)));
|
toDelete.add(new Term("index", String.valueOf(index)));
|
||||||
}
|
}
|
||||||
w.deleteDocuments(toDelete.toArray(new Term[0]));
|
w.deleteDocuments(toDelete.toArray(new Term[0]));
|
||||||
w.deleteDocuments(new Term("other", "value" + random().nextInt(5)));
|
|
||||||
w.commit();
|
w.commit();
|
||||||
|
|
||||||
|
int hits = 50;
|
||||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||||
Set<String> allIds = new HashSet<>();
|
Set<String> allIds = new HashSet<>();
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), numDocs);
|
KnnVectorQuery query = new KnnVectorQuery("vector", randomVector(dim), hits);
|
||||||
TopDocs topDocs = searcher.search(query, numDocs);
|
TopDocs topDocs = searcher.search(query, numDocs);
|
||||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||||
Document doc = reader.document(scoreDoc.doc, Set.of("index"));
|
Document doc = reader.document(scoreDoc.doc, Set.of("index"));
|
||||||
|
@ -350,7 +346,7 @@ public class TestKnnVectorQuery extends LuceneTestCase {
|
||||||
toDelete.contains(new Term("index", index)));
|
toDelete.contains(new Term("index", index)));
|
||||||
allIds.add(index);
|
allIds.add(index);
|
||||||
}
|
}
|
||||||
assertEquals("search missed some documents", docIndex - toDelete.size(), allIds.size());
|
assertEquals("search missed some documents", hits, allIds.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue