mirror of https://github.com/apache/lucene.git
Avoid wrap readers without soft-deletes (#13588)
I analyzed a heap dump of Elasticsearch where FixedBitSet uses more than 1GB of memory. Most of these FixedBitSets are used by soft-deletes reader wrappers, even though these segments have no deletes at all. I believe these segments previously had soft-deletes, but these deletes were pruned by merges. The reason we wrap soft-deletes is that the soft-deletes field exists. Since these segments had soft-deletes previously, we carried the field-infos into the new segment. Ideally, we should have ways to check whether the returned docValues iterator is empty or not so that we can avoid allocating FixedBitSet completely, or we should prune fields without values after merges.
This commit is contained in:
parent
00c9d9a03c
commit
b42fd8e479
|
@ -141,6 +141,9 @@ public final class SoftDeletesDirectoryReaderWrapper extends FilterDirectoryRead
|
|||
bits.set(0, reader.maxDoc());
|
||||
}
|
||||
int numSoftDeletes = PendingSoftDeletes.applySoftDeletes(iterator, bits);
|
||||
if (numSoftDeletes == 0) {
|
||||
return reader;
|
||||
}
|
||||
int numDeletes = reader.numDeletedDocs() + numSoftDeletes;
|
||||
int numDocs = reader.maxDoc() - numDeletes;
|
||||
assert assertDocCounts(numDocs, numSoftDeletes, reader);
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
@ -27,6 +29,7 @@ import org.apache.lucene.document.NumericDocValuesField;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
@ -264,4 +267,53 @@ public class TestSoftDeletesDirectoryReaderWrapper extends LuceneTestCase {
|
|||
assertEquals(1, leafCalled.get());
|
||||
IOUtils.close(reader, writer, dir);
|
||||
}
|
||||
|
||||
public void testAvoidWrappingReadersWithoutSoftDeletes() throws Exception {
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
String softDeletesField = "soft_deletes";
|
||||
iwc.setSoftDeletesField(softDeletesField);
|
||||
MergePolicy mergePolicy = iwc.mergePolicy;
|
||||
iwc.setMergePolicy(
|
||||
new SoftDeletesRetentionMergePolicy(softDeletesField, MatchAllDocsQuery::new, mergePolicy));
|
||||
try (Directory dir = newDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, iwc)) {
|
||||
int numDocs = 1 + random().nextInt(10);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
String docId = Integer.toString(i);
|
||||
doc.add(new StringField("id", docId, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
int numDeletes = 1 + random().nextInt(5);
|
||||
for (int i = 0; i < numDeletes; i++) {
|
||||
Document doc = new Document();
|
||||
String docId = Integer.toString(random().nextInt(numDocs));
|
||||
doc.add(new StringField("id", docId, Field.Store.YES));
|
||||
writer.softUpdateDocument(
|
||||
new Term("id", docId), doc, new NumericDocValuesField(softDeletesField, 0));
|
||||
}
|
||||
writer.flush();
|
||||
try (DirectoryReader reader = DirectoryReader.open(writer)) {
|
||||
SoftDeletesDirectoryReaderWrapper wrapped =
|
||||
new SoftDeletesDirectoryReaderWrapper(reader, softDeletesField);
|
||||
assertEquals(numDocs, wrapped.numDocs());
|
||||
assertEquals(numDeletes, wrapped.numDeletedDocs());
|
||||
}
|
||||
writer
|
||||
.getConfig()
|
||||
.setMergePolicy(
|
||||
new SoftDeletesRetentionMergePolicy(
|
||||
softDeletesField, MatchNoDocsQuery::new, mergePolicy));
|
||||
writer.forceMerge(1);
|
||||
try (DirectoryReader reader = DirectoryReader.open(writer)) {
|
||||
SoftDeletesDirectoryReaderWrapper wrapped =
|
||||
new SoftDeletesDirectoryReaderWrapper(reader, softDeletesField);
|
||||
assertEquals(numDocs, wrapped.numDocs());
|
||||
assertEquals(0, wrapped.numDeletedDocs());
|
||||
for (LeafReaderContext leaf : wrapped.leaves()) {
|
||||
assertThat(leaf.reader(), instanceOf(SegmentReader.class));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue