mirror of
https://github.com/apache/lucene.git
synced 2025-02-07 02:28:49 +00:00
drop 100% deleted segments before merging
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237038 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
178857bc10
commit
e1a808d489
@ -78,6 +78,7 @@ public final class MappingMultiDocsEnum extends DocsEnum {
|
||||
current = subs[upto].docsEnum;
|
||||
currentBase = mergeState.docBase[reader];
|
||||
currentMap = mergeState.docMaps[reader];
|
||||
assert currentMap == null || currentMap.length == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.length + " vs " + subs[upto].slice.length;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -109,7 +109,7 @@ public class Floats {
|
||||
throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
|
||||
arrayTemplate = DocValuesArray.TEMPLATES.get(type);
|
||||
assert size == 4 || size == 8;
|
||||
assert size == 4 || size == 8: "wrong size=" + size + " type=" + type + " id=" + id;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -206,7 +206,7 @@ class BufferedDeletesStream {
|
||||
|
||||
delIDX--;
|
||||
} else if (packet != null && segGen == packet.delGen()) {
|
||||
assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet";
|
||||
assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen;
|
||||
//System.out.println(" eq");
|
||||
|
||||
// Lock order: IW -> BD -> RP
|
||||
|
@ -3036,10 +3036,17 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
// newly flushed deletes but mapping them to the new
|
||||
// docIDs.
|
||||
|
||||
// Since we copy-on-write, if any new deletes were
|
||||
// applied after merging has started, we can just
|
||||
// check if the before/after liveDocs have changed.
|
||||
// If so, we must carefully merge the liveDocs one
|
||||
// doc at a time:
|
||||
if (currentLiveDocs != prevLiveDocs) {
|
||||
|
||||
// This means this segment received new deletes
|
||||
// since we started the merge, so we
|
||||
// must merge them:
|
||||
final int startDocUpto = docUpto;
|
||||
for(int j=0;j<docCount;j++) {
|
||||
if (!prevLiveDocs.get(j)) {
|
||||
assert !currentLiveDocs.get(j);
|
||||
@ -3055,13 +3062,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final int readerDocCount;
|
||||
if (i == sourceSegments.size()-1) {
|
||||
readerDocCount = mergeState.mergedDocCount - mergeState.docBase[i];
|
||||
} else {
|
||||
readerDocCount = mergeState.docBase[i+1] - mergeState.docBase[i];
|
||||
}
|
||||
docUpto += readerDocCount;
|
||||
assert mergeState.readers != null;
|
||||
assert mergeState.segmentDocCounts != null;
|
||||
docUpto += mergeState.segmentDocCounts.get(info);
|
||||
}
|
||||
} else if (currentLiveDocs != null) {
|
||||
// This segment had no deletes before but now it
|
||||
@ -3600,10 +3603,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
merge.readerLiveDocs.add(liveDocs);
|
||||
merge.readers.add(reader);
|
||||
|
||||
merger.add(reader, liveDocs);
|
||||
segUpto++;
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
||||
*/
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
|
||||
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
|
||||
@ -41,10 +42,11 @@ public class MergeState {
|
||||
}
|
||||
|
||||
public FieldInfos fieldInfos;
|
||||
public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
|
||||
public int[][] docMaps; // Maps docIDs around deletions
|
||||
public int[] docBase; // New docID base per reader
|
||||
public int mergedDocCount; // Total # merged docs
|
||||
public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
|
||||
public int[][] docMaps; // Maps docIDs around deletions
|
||||
public int[] docBase; // New docID base per reader
|
||||
public Map<SegmentInfo,Integer> segmentDocCounts; // Non-deleted docCount per reader
|
||||
public int mergedDocCount; // Total # merged docs
|
||||
public CheckAbort checkAbort;
|
||||
public InfoStream infoStream;
|
||||
|
||||
|
@ -104,12 +104,7 @@ final class SegmentMerger {
|
||||
// IndexWriter.close(false) takes to actually stop the
|
||||
// threads.
|
||||
|
||||
final int numReaders = mergeState.readers.size();
|
||||
// Remap docIDs
|
||||
mergeState.docMaps = new int[numReaders][];
|
||||
mergeState.docBase = new int[numReaders];
|
||||
mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders];
|
||||
mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
|
||||
setDocMaps();
|
||||
|
||||
mergeFieldInfos();
|
||||
setMatchingSegmentReaders();
|
||||
@ -283,37 +278,44 @@ final class SegmentMerger {
|
||||
}
|
||||
}
|
||||
|
||||
private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException {
|
||||
int docBase = 0;
|
||||
|
||||
final List<Fields> fields = new ArrayList<Fields>();
|
||||
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
|
||||
private int[] shrink(int[] in, int size) {
|
||||
final int[] newArray = new int[size];
|
||||
System.arraycopy(in, 0, newArray, 0, size);
|
||||
return newArray;
|
||||
}
|
||||
|
||||
for(MergeState.IndexReaderAndLiveDocs r : mergeState.readers) {
|
||||
final Fields f = r.reader.fields();
|
||||
final int maxDoc = r.reader.maxDoc();
|
||||
if (f != null) {
|
||||
slices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size()));
|
||||
fields.add(f);
|
||||
}
|
||||
docBase += maxDoc;
|
||||
}
|
||||
private int[][] shrink(int[][] in, int size) {
|
||||
final int[][] newArray = new int[size][];
|
||||
System.arraycopy(in, 0, newArray, 0, size);
|
||||
return newArray;
|
||||
}
|
||||
|
||||
// NOTE: removes any "all deleted" readers from mergeState.readers
|
||||
private void setDocMaps() throws IOException {
|
||||
final int numReaders = mergeState.readers.size();
|
||||
|
||||
docBase = 0;
|
||||
// Remap docIDs
|
||||
mergeState.docMaps = new int[numReaders][];
|
||||
mergeState.docBase = new int[numReaders];
|
||||
mergeState.segmentDocCounts = new HashMap<SegmentInfo,Integer>();
|
||||
mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders];
|
||||
mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
|
||||
|
||||
for(int i=0;i<numReaders;i++) {
|
||||
int docBase = 0;
|
||||
|
||||
int i = 0;
|
||||
while(i < mergeState.readers.size()) {
|
||||
|
||||
final MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(i);
|
||||
|
||||
mergeState.docBase[i] = docBase;
|
||||
final int maxDoc = reader.reader.maxDoc();
|
||||
if (reader.liveDocs != null) {
|
||||
final int docCount;
|
||||
final Bits liveDocs = reader.liveDocs;
|
||||
final int[] docMap;
|
||||
if (liveDocs != null) {
|
||||
int delCount = 0;
|
||||
final Bits liveDocs = reader.liveDocs;
|
||||
assert liveDocs != null;
|
||||
final int[] docMap = mergeState.docMaps[i] = new int[maxDoc];
|
||||
docMap = new int[maxDoc];
|
||||
int newDocID = 0;
|
||||
for(int j=0;j<maxDoc;j++) {
|
||||
if (!liveDocs.get(j)) {
|
||||
@ -323,14 +325,56 @@ final class SegmentMerger {
|
||||
docMap[j] = newDocID++;
|
||||
}
|
||||
}
|
||||
docBase += maxDoc - delCount;
|
||||
docCount = maxDoc - delCount;
|
||||
} else {
|
||||
docBase += maxDoc;
|
||||
docCount = maxDoc;
|
||||
docMap = null;
|
||||
}
|
||||
|
||||
if (reader.reader instanceof SegmentReader) {
|
||||
mergeState.segmentDocCounts.put(((SegmentReader) reader.reader).getSegmentInfo(), docCount);
|
||||
}
|
||||
|
||||
if (docCount == 0) {
|
||||
// Skip this reader (all docs are deleted):
|
||||
mergeState.readers.remove(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
mergeState.docMaps[i] = docMap;
|
||||
docBase += docCount;
|
||||
|
||||
if (mergeState.payloadProcessorProvider != null) {
|
||||
mergeState.dirPayloadProcessor[i] = mergeState.payloadProcessorProvider.getDirProcessor(reader.reader.directory());
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
final int numReadersLeft = mergeState.readers.size();
|
||||
|
||||
if (numReadersLeft < mergeState.docMaps.length) {
|
||||
mergeState.docMaps = shrink(mergeState.docMaps, numReadersLeft);
|
||||
mergeState.docBase = shrink(mergeState.docBase, numReadersLeft);
|
||||
}
|
||||
}
|
||||
|
||||
private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException {
|
||||
|
||||
final List<Fields> fields = new ArrayList<Fields>();
|
||||
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
|
||||
|
||||
int docBase = 0;
|
||||
|
||||
for(int readerIndex=0;readerIndex<mergeState.readers.size();readerIndex++) {
|
||||
final MergeState.IndexReaderAndLiveDocs r = mergeState.readers.get(readerIndex);
|
||||
final Fields f = r.reader.fields();
|
||||
final int maxDoc = r.reader.maxDoc();
|
||||
if (f != null) {
|
||||
slices.add(new ReaderUtil.Slice(docBase, maxDoc, readerIndex));
|
||||
fields.add(f);
|
||||
}
|
||||
docBase += maxDoc;
|
||||
}
|
||||
|
||||
final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState);
|
||||
|
Loading…
x
Reference in New Issue
Block a user