Enable Bulk-Merge if all source remains (#37269)

Today we still wrap recovery source readers on merge even if we keep all documents recovery source. This basically disables bulk merging for stored fields. This change skips wrapping if all docs sources are kept anyway.
2019-01-09 23:46:31 +01:00 · 2019-01-09 23:46:31 +01:00 · 234059d2c0
parent 95479f1766
commit 234059d2c0
2 changed files with 44 additions and 8 deletions
--- a/server/src/main/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicy.java
+++ b/server/src/main/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicy.java
@ -33,11 +33,8 @@ import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.StoredFieldVisitor;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConjunctionDISI;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreMode;
@ -68,15 +65,18 @@ final class RecoverySourcePruneMergePolicy extends OneMergeWrappingMergePolicy {
        if (recoverySource == null || recoverySource.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
            return reader; // early terminate - nothing to do here since non of the docs has a recovery source anymore.
        }
-        BooleanQuery.Builder builder = new BooleanQuery.Builder();
-        builder.add(new DocValuesFieldExistsQuery(recoverySourceField), BooleanClause.Occur.FILTER);
-        builder.add(retainSourceQuerySupplier.get(), BooleanClause.Occur.FILTER);
        IndexSearcher s = new IndexSearcher(reader);
        s.setQueryCache(null);
-        Weight weight = s.createWeight(s.rewrite(builder.build()), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
+        Weight weight = s.createWeight(s.rewrite(retainSourceQuerySupplier.get()), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
        Scorer scorer = weight.scorer(reader.getContext());
        if (scorer != null) {
-            return new SourcePruningFilterCodecReader(recoverySourceField, reader, BitSet.of(scorer.iterator(), reader.maxDoc()));
+            BitSet recoverySourceToKeep = BitSet.of(scorer.iterator(), reader.maxDoc());
+            // calculating the cardinality is significantly cheaper than skipping all bulk-merging we might do
+            // if retentions are high we keep most of it
+            if (recoverySourceToKeep.cardinality() == reader.maxDoc()) {
+                return reader; // keep all source
+            }
+            return new SourcePruningFilterCodecReader(recoverySourceField, reader, recoverySourceToKeep);
        } else {
            return new SourcePruningFilterCodecReader(recoverySourceField, reader, null);
        }
--- a/server/src/test/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicyTests.java
+++ b/server/src/test/java/org/elasticsearch/index/engine/RecoverySourcePruneMergePolicyTests.java
@ -37,6 +37,7 @@ import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.StandardDirectoryReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
@ -158,4 +159,39 @@ public class RecoverySourcePruneMergePolicyTests extends ESTestCase {
            }
        }
    }
+
+    public void testPruneNone() throws IOException {
+        try (Directory dir = newDirectory()) {
+            IndexWriterConfig iwc = newIndexWriterConfig();
+            iwc.setMergePolicy(new RecoverySourcePruneMergePolicy("extra_source",
+                () -> new MatchAllDocsQuery(), iwc.getMergePolicy()));
+            try (IndexWriter writer = new IndexWriter(dir, iwc)) {
+                for (int i = 0; i < 20; i++) {
+                    if (i > 0 && randomBoolean()) {
+                        writer.flush();
+                    }
+                    Document doc = new Document();
+                    doc.add(new StoredField("source", "hello world"));
+                    doc.add(new StoredField("extra_source", "hello world"));
+                    doc.add(new NumericDocValuesField("extra_source", 1));
+                    writer.addDocument(doc);
+                }
+                writer.forceMerge(1);
+                writer.commit();
+                try (DirectoryReader reader = DirectoryReader.open(writer)) {
+                    assertEquals(1, reader.leaves().size());
+                    NumericDocValues extra_source = reader.leaves().get(0).reader().getNumericDocValues("extra_source");
+                    assertNotNull(extra_source);
+                    for (int i = 0; i < reader.maxDoc(); i++) {
+                        Document document = reader.document(i);
+                        Set<String> collect = document.getFields().stream().map(IndexableField::name).collect(Collectors.toSet());
+                        assertTrue(collect.contains("source"));
+                        assertTrue(collect.contains("extra_source"));
+                        assertEquals(i, extra_source.nextDoc());
+                    }
+                    assertEquals(DocIdSetIterator.NO_MORE_DOCS, extra_source.nextDoc());
+                }
+            }
+        }
+    }
 }