Use delCount of SegmentInfos to calculate numDocs (#36323)
Today, we iterate the bitset of hardLiveDocs to calculate the number of live docs. This calculation might be expensive if we enable soft-deletes (by default) for old indices whose soft-deletes was disabled previously and had hard-deletes. Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly. We have hard-deletes in two scenarios: (1) from old segments where soft-deletes was disabled, (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos before exposing the hard-deletes; thus we can use the hard-delete count of SegmentInfos.
This commit is contained in:
parent
cc3872d934
commit
d41cf6ac9f
|
@ -956,18 +956,17 @@ public class Lucene {
|
||||||
super(in, new SubReaderWrapper() {
|
super(in, new SubReaderWrapper() {
|
||||||
@Override
|
@Override
|
||||||
public LeafReader wrap(LeafReader leaf) {
|
public LeafReader wrap(LeafReader leaf) {
|
||||||
SegmentReader segmentReader = segmentReader(leaf);
|
final SegmentReader segmentReader = segmentReader(leaf);
|
||||||
Bits hardLiveDocs = segmentReader.getHardLiveDocs();
|
final Bits hardLiveDocs = segmentReader.getHardLiveDocs();
|
||||||
if (hardLiveDocs == null) {
|
if (hardLiveDocs == null) {
|
||||||
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
|
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
|
||||||
}
|
}
|
||||||
// TODO: Can we avoid calculate numDocs by using SegmentReader#getSegmentInfo with LUCENE-8458?
|
// Once soft-deletes is enabled, we no longer hard-update or hard-delete documents directly.
|
||||||
int numDocs = 0;
|
// Two scenarios that we have hard-deletes: (1) from old segments where soft-deletes was disabled,
|
||||||
for (int i = 0; i < hardLiveDocs.length(); i++) {
|
// (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos
|
||||||
if (hardLiveDocs.get(i)) {
|
// before exposing the hard-deletes, thus we can use the hard-delete count of SegmentInfos.
|
||||||
numDocs++;
|
final int numDocs = segmentReader.maxDoc() - segmentReader.getSegmentInfo().getDelCount();
|
||||||
}
|
assert numDocs == popCount(hardLiveDocs) : numDocs + " != " + popCount(hardLiveDocs);
|
||||||
}
|
|
||||||
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
|
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -984,6 +983,17 @@ public class Lucene {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int popCount(Bits bits) {
|
||||||
|
assert bits != null;
|
||||||
|
int onBits = 0;
|
||||||
|
for (int i = 0; i < bits.length(); i++) {
|
||||||
|
if (bits.get(i)) {
|
||||||
|
onBits++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return onBits;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a numeric docvalues which can be used to soft-delete documents.
|
* Returns a numeric docvalues which can be used to soft-delete documents.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -2585,6 +2585,11 @@ public class InternalEngine extends Engine {
|
||||||
assert softDeleteEnabled : "Call #softUpdateDocuments but soft-deletes is disabled";
|
assert softDeleteEnabled : "Call #softUpdateDocuments but soft-deletes is disabled";
|
||||||
return super.softUpdateDocuments(term, docs, softDeletes);
|
return super.softUpdateDocuments(term, docs, softDeletes);
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
|
public long tryDeleteDocument(IndexReader readerIn, int docID) {
|
||||||
|
assert false : "#tryDeleteDocument is not supported. See Lucene#DirectoryReaderWithAllLiveDocs";
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue