Avoid wrap readers without soft-deletes (#13588)

I analyzed a heap dump of Elasticsearch where FixedBitSet uses more than 
1GB of memory. Most of these FixedBitSets are used by soft-deletes 
reader wrappers, even though these segments have no deletes at all. I
believe these segments previously had soft-deletes, but these deletes
were pruned by merges. The reason we wrap soft-deletes is that the
soft-deletes field exists. Since these segments had soft-deletes
previously, we carried the field-infos into the new segment. Ideally, we
should have ways to check whether the returned docValues iterator is
empty or not so that we can avoid allocating FixedBitSet completely, or
we should prune fields without values after merges.
This commit is contained in:
Nhat Nguyen 2024-07-18 22:47:44 -07:00 committed by GitHub
parent 00c9d9a03c
commit b42fd8e479
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 55 additions and 0 deletions

View File

@ -141,6 +141,9 @@ public final class SoftDeletesDirectoryReaderWrapper extends FilterDirectoryRead
bits.set(0, reader.maxDoc());
}
int numSoftDeletes = PendingSoftDeletes.applySoftDeletes(iterator, bits);
if (numSoftDeletes == 0) {
return reader;
}
int numDeletes = reader.numDeletedDocs() + numSoftDeletes;
int numDocs = reader.maxDoc() - numDeletes;
assert assertDocCounts(numDocs, numSoftDeletes, reader);

View File

@ -17,6 +17,8 @@
package org.apache.lucene.index;
import static org.hamcrest.Matchers.instanceOf;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
@ -27,6 +29,7 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
@ -264,4 +267,53 @@ public class TestSoftDeletesDirectoryReaderWrapper extends LuceneTestCase {
assertEquals(1, leafCalled.get());
IOUtils.close(reader, writer, dir);
}
public void testAvoidWrappingReadersWithoutSoftDeletes() throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
String softDeletesField = "soft_deletes";
iwc.setSoftDeletesField(softDeletesField);
MergePolicy mergePolicy = iwc.mergePolicy;
iwc.setMergePolicy(
new SoftDeletesRetentionMergePolicy(softDeletesField, MatchAllDocsQuery::new, mergePolicy));
try (Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, iwc)) {
int numDocs = 1 + random().nextInt(10);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
String docId = Integer.toString(i);
doc.add(new StringField("id", docId, Field.Store.YES));
writer.addDocument(doc);
}
int numDeletes = 1 + random().nextInt(5);
for (int i = 0; i < numDeletes; i++) {
Document doc = new Document();
String docId = Integer.toString(random().nextInt(numDocs));
doc.add(new StringField("id", docId, Field.Store.YES));
writer.softUpdateDocument(
new Term("id", docId), doc, new NumericDocValuesField(softDeletesField, 0));
}
writer.flush();
try (DirectoryReader reader = DirectoryReader.open(writer)) {
SoftDeletesDirectoryReaderWrapper wrapped =
new SoftDeletesDirectoryReaderWrapper(reader, softDeletesField);
assertEquals(numDocs, wrapped.numDocs());
assertEquals(numDeletes, wrapped.numDeletedDocs());
}
writer
.getConfig()
.setMergePolicy(
new SoftDeletesRetentionMergePolicy(
softDeletesField, MatchNoDocsQuery::new, mergePolicy));
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
SoftDeletesDirectoryReaderWrapper wrapped =
new SoftDeletesDirectoryReaderWrapper(reader, softDeletesField);
assertEquals(numDocs, wrapped.numDocs());
assertEquals(0, wrapped.numDeletedDocs());
for (LeafReaderContext leaf : wrapped.leaves()) {
assertThat(leaf.reader(), instanceOf(SegmentReader.class));
}
}
}
}
}