From 234059d2c091f192a1be6393c60f2a10e902dc54 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 9 Jan 2019 23:46:31 +0100 Subject: [PATCH] Enable Bulk-Merge if all source remains (#37269) Today we still wrap recovery source readers on merge even if we keep all documents recovery source. This basically disables bulk merging for stored fields. This change skips wrapping if all docs sources are kept anyway. --- .../RecoverySourcePruneMergePolicy.java | 16 ++++----- .../RecoverySourcePruneMergePolicyTests.java | 36 +++++++++++++++++++ 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicy.java b/server/src/main/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicy.java index 7faed37b2fd..42276f4ca21 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicy.java +++ b/server/src/main/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicy.java @@ -33,11 +33,8 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.StoredFieldVisitor; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConjunctionDISI; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; @@ -68,15 +65,18 @@ final class RecoverySourcePruneMergePolicy extends OneMergeWrappingMergePolicy { if (recoverySource == null || recoverySource.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { return reader; // early terminate - nothing to do here since non of the docs has a recovery source anymore. } - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(new DocValuesFieldExistsQuery(recoverySourceField), BooleanClause.Occur.FILTER); - builder.add(retainSourceQuerySupplier.get(), BooleanClause.Occur.FILTER); IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); - Weight weight = s.createWeight(s.rewrite(builder.build()), ScoreMode.COMPLETE_NO_SCORES, 1.0f); + Weight weight = s.createWeight(s.rewrite(retainSourceQuerySupplier.get()), ScoreMode.COMPLETE_NO_SCORES, 1.0f); Scorer scorer = weight.scorer(reader.getContext()); if (scorer != null) { - return new SourcePruningFilterCodecReader(recoverySourceField, reader, BitSet.of(scorer.iterator(), reader.maxDoc())); + BitSet recoverySourceToKeep = BitSet.of(scorer.iterator(), reader.maxDoc()); + // calculating the cardinality is significantly cheaper than skipping all bulk-merging we might do + // if retentions are high we keep most of it + if (recoverySourceToKeep.cardinality() == reader.maxDoc()) { + return reader; // keep all source + } + return new SourcePruningFilterCodecReader(recoverySourceField, reader, recoverySourceToKeep); } else { return new SourcePruningFilterCodecReader(recoverySourceField, reader, null); } diff --git a/server/src/test/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicyTests.java b/server/src/test/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicyTests.java index c46b47b87d0..ef895e1a4ce 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicyTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicyTests.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.StandardDirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; @@ -158,4 +159,39 @@ public class RecoverySourcePruneMergePolicyTests extends ESTestCase { } } } + + public void testPruneNone() throws IOException { + try (Directory dir = newDirectory()) { + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setMergePolicy(new RecoverySourcePruneMergePolicy("extra_source", + () -> new MatchAllDocsQuery(), iwc.getMergePolicy())); + try (IndexWriter writer = new IndexWriter(dir, iwc)) { + for (int i = 0; i < 20; i++) { + if (i > 0 && randomBoolean()) { + writer.flush(); + } + Document doc = new Document(); + doc.add(new StoredField("source", "hello world")); + doc.add(new StoredField("extra_source", "hello world")); + doc.add(new NumericDocValuesField("extra_source", 1)); + writer.addDocument(doc); + } + writer.forceMerge(1); + writer.commit(); + try (DirectoryReader reader = DirectoryReader.open(writer)) { + assertEquals(1, reader.leaves().size()); + NumericDocValues extra_source = reader.leaves().get(0).reader().getNumericDocValues("extra_source"); + assertNotNull(extra_source); + for (int i = 0; i < reader.maxDoc(); i++) { + Document document = reader.document(i); + Set collect = document.getFields().stream().map(IndexableField::name).collect(Collectors.toSet()); + assertTrue(collect.contains("source")); + assertTrue(collect.contains("extra_source")); + assertEquals(i, extra_source.nextDoc()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, extra_source.nextDoc()); + } + } + } + } }