Use delCount of SegmentInfos to calculate numDocs (#36323)

Today, we iterate the bitset of hardLiveDocs to calculate the number of
live docs. This calculation might be expensive if we enable soft-deletes
(by default) for old indices whose soft-deletes was disabled previously
and had hard-deletes.

Once soft-deletes is enabled, we no longer hard-update or hard-delete
documents directly. We have hard-deletes in two scenarios: (1) from old
segments where soft-deletes was disabled, (2) when IndexWriter hits
non-aborted exceptions. These two cases, IW flushes SegmentInfos before
exposing the hard-deletes; thus we can use the hard-delete count of
SegmentInfos.
This commit is contained in:
Nhat Nguyen 2018-12-08 21:24:36 -05:00 committed by GitHub
parent cc3872d934
commit d41cf6ac9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 9 deletions

View File

@ -956,18 +956,17 @@ public class Lucene {
super(in, new SubReaderWrapper() { super(in, new SubReaderWrapper() {
@Override @Override
public LeafReader wrap(LeafReader leaf) { public LeafReader wrap(LeafReader leaf) {
SegmentReader segmentReader = segmentReader(leaf); final SegmentReader segmentReader = segmentReader(leaf);
Bits hardLiveDocs = segmentReader.getHardLiveDocs(); final Bits hardLiveDocs = segmentReader.getHardLiveDocs();
if (hardLiveDocs == null) { if (hardLiveDocs == null) {
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc()); return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
} }
// TODO: Can we avoid calculate numDocs by using SegmentReader#getSegmentInfo with LUCENE-8458? // Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly.
int numDocs = 0; // Two scenarios that we have hard-deletes: (1) from old segments where soft-deletes was disabled,
for (int i = 0; i < hardLiveDocs.length(); i++) { // (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos
if (hardLiveDocs.get(i)) { // before exposing the hard-deletes, thus we can use the hard-delete count of SegmentInfos.
numDocs++; final int numDocs = segmentReader.maxDoc() - segmentReader.getSegmentInfo().getDelCount();
} assert numDocs == popCount(hardLiveDocs) : numDocs + " != " + popCount(hardLiveDocs);
}
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs); return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
} }
}); });
@ -984,6 +983,17 @@ public class Lucene {
} }
} }
private static int popCount(Bits bits) {
assert bits != null;
int onBits = 0;
for (int i = 0; i < bits.length(); i++) {
if (bits.get(i)) {
onBits++;
}
}
return onBits;
}
/** /**
* Returns a numeric docvalues which can be used to soft-delete documents. * Returns a numeric docvalues which can be used to soft-delete documents.
*/ */

View File

@ -2585,6 +2585,11 @@ public class InternalEngine extends Engine {
assert softDeleteEnabled : "Call #softUpdateDocuments but soft-deletes is disabled"; assert softDeleteEnabled : "Call #softUpdateDocuments but soft-deletes is disabled";
return super.softUpdateDocuments(term, docs, softDeletes); return super.softUpdateDocuments(term, docs, softDeletes);
} }
@Override
public long tryDeleteDocument(IndexReader readerIn, int docID) {
assert false : "#tryDeleteDocument is not supported. See Lucene#DirectoryReaderWithAllLiveDocs";
throw new UnsupportedOperationException();
}
} }
/** /**