From d41cf6ac9f791b431762892b4ce8e85419e44eba Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sat, 8 Dec 2018 21:24:36 -0500 Subject: [PATCH] Use delCount of SegmentInfos to calculate numDocs (#36323) Today, we iterate the bitset of hardLiveDocs to calculate the number of live docs. This calculation might be expensive if we enable soft-deletes (by default) for old indices whose soft-deletes was disabled previously and had hard-deletes. Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly. We have hard-deletes in two scenarios: (1) from old segments where soft-deletes was disabled, (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos before exposing the hard-deletes; thus we can use the hard-delete count of SegmentInfos. --- .../elasticsearch/common/lucene/Lucene.java | 28 +++++++++++++------ .../index/engine/InternalEngine.java | 5 ++++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java index f515db43ab9..fd9d63ea225 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -956,18 +956,17 @@ public class Lucene { super(in, new SubReaderWrapper() { @Override public LeafReader wrap(LeafReader leaf) { - SegmentReader segmentReader = segmentReader(leaf); - Bits hardLiveDocs = segmentReader.getHardLiveDocs(); + final SegmentReader segmentReader = segmentReader(leaf); + final Bits hardLiveDocs = segmentReader.getHardLiveDocs(); if (hardLiveDocs == null) { return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc()); } - // TODO: Can we avoid calculate numDocs by using SegmentReader#getSegmentInfo with LUCENE-8458? - int numDocs = 0; - for (int i = 0; i < hardLiveDocs.length(); i++) { - if (hardLiveDocs.get(i)) { - numDocs++; - } - } + // Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly. + // Two scenarios that we have hard-deletes: (1) from old segments where soft-deletes was disabled, + // (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos + // before exposing the hard-deletes, thus we can use the hard-delete count of SegmentInfos. + final int numDocs = segmentReader.maxDoc() - segmentReader.getSegmentInfo().getDelCount(); + assert numDocs == popCount(hardLiveDocs) : numDocs + " != " + popCount(hardLiveDocs); return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs); } }); @@ -984,6 +983,17 @@ public class Lucene { } } + private static int popCount(Bits bits) { + assert bits != null; + int onBits = 0; + for (int i = 0; i < bits.length(); i++) { + if (bits.get(i)) { + onBits++; + } + } + return onBits; + } + /** * Returns a numeric docvalues which can be used to soft-delete documents. */ diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index e29b6ca362d..4ac63dacca2 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -2585,6 +2585,11 @@ public class InternalEngine extends Engine { assert softDeleteEnabled : "Call #softUpdateDocuments but soft-deletes is disabled"; return super.softUpdateDocuments(term, docs, softDeletes); } + @Override + public long tryDeleteDocument(IndexReader readerIn, int docID) { + assert false : "#tryDeleteDocument is not supported. See Lucene#DirectoryReaderWithAllLiveDocs"; + throw new UnsupportedOperationException(); + } } /**