drop 100% deleted segments before merging

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237038 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-01-28 10:43:52 +00:00
parent 178857bc10
commit e1a808d489
6 changed files with 91 additions and 43 deletions

View File

@ -78,6 +78,7 @@ public final class MappingMultiDocsEnum extends DocsEnum {
current = subs[upto].docsEnum;
currentBase = mergeState.docBase[reader];
currentMap = mergeState.docMaps[reader];
assert currentMap == null || currentMap.length == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.length + " vs " + subs[upto].slice.length;
}
}

View File

@ -109,7 +109,7 @@ public class Floats {
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
arrayTemplate = DocValuesArray.TEMPLATES.get(type);
assert size == 4 || size == 8;
assert size == 4 || size == 8: "wrong size=" + size + " type=" + type + " id=" + id;
}
@Override

View File

@ -206,7 +206,7 @@ class BufferedDeletesStream {
delIDX--;
} else if (packet != null && segGen == packet.delGen()) {
assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet";
assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen;
//System.out.println(" eq");
// Lock order: IW -> BD -> RP

View File

@ -3036,10 +3036,17 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// newly flushed deletes but mapping them to the new
// docIDs.
// Since we copy-on-write, if any new deletes were
// applied after merging has started, we can just
// check if the before/after liveDocs have changed.
// If so, we must carefully merge the liveDocs one
// doc at a time:
if (currentLiveDocs != prevLiveDocs) {
// This means this segment received new deletes
// since we started the merge, so we
// must merge them:
final int startDocUpto = docUpto;
for(int j=0;j<docCount;j++) {
if (!prevLiveDocs.get(j)) {
assert !currentLiveDocs.get(j);
@ -3055,13 +3062,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
} else {
final int readerDocCount;
if (i == sourceSegments.size()-1) {
readerDocCount = mergeState.mergedDocCount - mergeState.docBase[i];
} else {
readerDocCount = mergeState.docBase[i+1] - mergeState.docBase[i];
}
docUpto += readerDocCount;
assert mergeState.readers != null;
assert mergeState.segmentDocCounts != null;
docUpto += mergeState.segmentDocCounts.get(info);
}
} else if (currentLiveDocs != null) {
// This segment had no deletes before but now it
@ -3600,10 +3603,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
}
merge.readerLiveDocs.add(liveDocs);
merge.readers.add(reader);
merger.add(reader, liveDocs);
segUpto++;
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
@ -41,10 +42,11 @@ public class MergeState {
}
public FieldInfos fieldInfos;
public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
public int[][] docMaps; // Maps docIDs around deletions
public int[] docBase; // New docID base per reader
public int mergedDocCount; // Total # merged docs
public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
public int[][] docMaps; // Maps docIDs around deletions
public int[] docBase; // New docID base per reader
public Map<SegmentInfo,Integer> segmentDocCounts; // Non-deleted docCount per reader
public int mergedDocCount; // Total # merged docs
public CheckAbort checkAbort;
public InfoStream infoStream;

View File

@ -104,12 +104,7 @@ final class SegmentMerger {
// IndexWriter.close(false) takes to actually stop the
// threads.
final int numReaders = mergeState.readers.size();
// Remap docIDs
mergeState.docMaps = new int[numReaders][];
mergeState.docBase = new int[numReaders];
mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders];
mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
setDocMaps();
mergeFieldInfos();
setMatchingSegmentReaders();
@ -283,37 +278,44 @@ final class SegmentMerger {
}
}
private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException {
int docBase = 0;
final List<Fields> fields = new ArrayList<Fields>();
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
private int[] shrink(int[] in, int size) {
final int[] newArray = new int[size];
System.arraycopy(in, 0, newArray, 0, size);
return newArray;
}
for(MergeState.IndexReaderAndLiveDocs r : mergeState.readers) {
final Fields f = r.reader.fields();
final int maxDoc = r.reader.maxDoc();
if (f != null) {
slices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size()));
fields.add(f);
}
docBase += maxDoc;
}
private int[][] shrink(int[][] in, int size) {
final int[][] newArray = new int[size][];
System.arraycopy(in, 0, newArray, 0, size);
return newArray;
}
// NOTE: removes any "all deleted" readers from mergeState.readers
private void setDocMaps() throws IOException {
final int numReaders = mergeState.readers.size();
docBase = 0;
// Remap docIDs
mergeState.docMaps = new int[numReaders][];
mergeState.docBase = new int[numReaders];
mergeState.segmentDocCounts = new HashMap<SegmentInfo,Integer>();
mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders];
mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
for(int i=0;i<numReaders;i++) {
int docBase = 0;
int i = 0;
while(i < mergeState.readers.size()) {
final MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(i);
mergeState.docBase[i] = docBase;
final int maxDoc = reader.reader.maxDoc();
if (reader.liveDocs != null) {
final int docCount;
final Bits liveDocs = reader.liveDocs;
final int[] docMap;
if (liveDocs != null) {
int delCount = 0;
final Bits liveDocs = reader.liveDocs;
assert liveDocs != null;
final int[] docMap = mergeState.docMaps[i] = new int[maxDoc];
docMap = new int[maxDoc];
int newDocID = 0;
for(int j=0;j<maxDoc;j++) {
if (!liveDocs.get(j)) {
@ -323,14 +325,56 @@ final class SegmentMerger {
docMap[j] = newDocID++;
}
}
docBase += maxDoc - delCount;
docCount = maxDoc - delCount;
} else {
docBase += maxDoc;
docCount = maxDoc;
docMap = null;
}
if (reader.reader instanceof SegmentReader) {
mergeState.segmentDocCounts.put(((SegmentReader) reader.reader).getSegmentInfo(), docCount);
}
if (docCount == 0) {
// Skip this reader (all docs are deleted):
mergeState.readers.remove(i);
continue;
}
mergeState.docMaps[i] = docMap;
docBase += docCount;
if (mergeState.payloadProcessorProvider != null) {
mergeState.dirPayloadProcessor[i] = mergeState.payloadProcessorProvider.getDirProcessor(reader.reader.directory());
}
i++;
}
final int numReadersLeft = mergeState.readers.size();
if (numReadersLeft < mergeState.docMaps.length) {
mergeState.docMaps = shrink(mergeState.docMaps, numReadersLeft);
mergeState.docBase = shrink(mergeState.docBase, numReadersLeft);
}
}
private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException {
final List<Fields> fields = new ArrayList<Fields>();
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
int docBase = 0;
for(int readerIndex=0;readerIndex<mergeState.readers.size();readerIndex++) {
final MergeState.IndexReaderAndLiveDocs r = mergeState.readers.get(readerIndex);
final Fields f = r.reader.fields();
final int maxDoc = r.reader.maxDoc();
if (f != null) {
slices.add(new ReaderUtil.Slice(docBase, maxDoc, readerIndex));
fields.add(f);
}
docBase += maxDoc;
}
final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState);