mirror of https://github.com/apache/lucene.git
LUCENE-2357: used packed ints to hold docID remapping during merging, to reduce transient RAM usage
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1343946 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f5223505b0
commit
5d3dba2c56
|
@ -937,6 +937,9 @@ Optimizations
|
|||
performance; add float acceptableOverheadRatio to getWriter and
|
||||
getMutable API to give packed ints freedom to pick faster
|
||||
implementations (Adrien Grand via Mike McCandless)
|
||||
|
||||
* LUCENE-2357: Reduce transient RAM usage when merging segments in
|
||||
IndexWriter. (Adrien Grand via Mike McCandless)
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum
|
|||
private MultiDocsAndPositionsEnum.EnumWithSlice[] subs;
|
||||
int numSubs;
|
||||
int upto;
|
||||
int[] currentMap;
|
||||
MergeState.DocMap currentMap;
|
||||
DocsAndPositionsEnum current;
|
||||
int currentBase;
|
||||
int doc = -1;
|
||||
|
@ -94,12 +94,10 @@ public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum
|
|||
|
||||
int doc = current.nextDoc();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
if (currentMap != null) {
|
||||
// compact deletions
|
||||
doc = currentMap[doc];
|
||||
if (doc == -1) {
|
||||
continue;
|
||||
}
|
||||
// compact deletions
|
||||
doc = currentMap.get(doc);
|
||||
if (doc == -1) {
|
||||
continue;
|
||||
}
|
||||
return this.doc = currentBase + doc;
|
||||
} else {
|
||||
|
|
|
@ -35,7 +35,7 @@ public final class MappingMultiDocsEnum extends DocsEnum {
|
|||
private MultiDocsEnum.EnumWithSlice[] subs;
|
||||
int numSubs;
|
||||
int upto;
|
||||
int[] currentMap;
|
||||
MergeState.DocMap currentMap;
|
||||
DocsEnum current;
|
||||
int currentBase;
|
||||
int doc = -1;
|
||||
|
@ -88,18 +88,16 @@ public final class MappingMultiDocsEnum extends DocsEnum {
|
|||
current = subs[upto].docsEnum;
|
||||
currentBase = mergeState.docBase[reader];
|
||||
currentMap = mergeState.docMaps[reader];
|
||||
assert currentMap == null || currentMap.length == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.length + " vs " + subs[upto].slice.length;
|
||||
assert currentMap.maxDoc() == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.maxDoc() + " vs " + subs[upto].slice.length;
|
||||
}
|
||||
}
|
||||
|
||||
int doc = current.nextDoc();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
if (currentMap != null) {
|
||||
// compact deletions
|
||||
doc = currentMap[doc];
|
||||
if (doc == -1) {
|
||||
continue;
|
||||
}
|
||||
// compact deletions
|
||||
doc = currentMap.get(doc);
|
||||
if (doc == -1) {
|
||||
continue;
|
||||
}
|
||||
return this.doc = currentBase + doc;
|
||||
} else {
|
||||
|
|
|
@ -3483,7 +3483,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
merge.readers.add(reader);
|
||||
assert delCount <= info.info.getDocCount(): "delCount=" + delCount + " info.docCount=" + info.info.getDocCount() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount();
|
||||
if (delCount < info.info.getDocCount()) {
|
||||
merger.add(reader, liveDocs);
|
||||
merger.add(reader, liveDocs, delCount);
|
||||
}
|
||||
segUpto++;
|
||||
}
|
||||
|
|
|
@ -19,11 +19,12 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor;
|
||||
import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
|
||||
import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/** Holds common state used during segment merging
|
||||
*
|
||||
|
@ -33,17 +34,171 @@ public class MergeState {
|
|||
public static class IndexReaderAndLiveDocs {
|
||||
public final AtomicReader reader;
|
||||
public final Bits liveDocs;
|
||||
public final int numDeletedDocs;
|
||||
|
||||
public IndexReaderAndLiveDocs(AtomicReader reader, Bits liveDocs) {
|
||||
public IndexReaderAndLiveDocs(AtomicReader reader, Bits liveDocs, int numDeletedDocs) {
|
||||
this.reader = reader;
|
||||
this.liveDocs = liveDocs;
|
||||
this.numDeletedDocs = numDeletedDocs;
|
||||
}
|
||||
}
|
||||
|
||||
public static abstract class DocMap {
|
||||
private final Bits liveDocs;
|
||||
|
||||
protected DocMap(Bits liveDocs) {
|
||||
this.liveDocs = liveDocs;
|
||||
}
|
||||
|
||||
public static DocMap build(IndexReaderAndLiveDocs reader) {
|
||||
final int maxDoc = reader.reader.maxDoc();
|
||||
final int numDeletes = reader.numDeletedDocs;
|
||||
final int numDocs = maxDoc - numDeletes;
|
||||
assert reader.liveDocs != null || numDeletes == 0;
|
||||
if (numDeletes == 0) {
|
||||
return new NoDelDocMap(maxDoc);
|
||||
} else if (numDeletes < numDocs) {
|
||||
return buildDelCountDocmap(maxDoc, numDeletes, reader.liveDocs, PackedInts.FAST);
|
||||
} else {
|
||||
return buildDirectDocMap(maxDoc, numDocs, reader.liveDocs, PackedInts.FAST);
|
||||
}
|
||||
}
|
||||
|
||||
static DocMap buildDelCountDocmap(int maxDoc, int numDeletes, Bits liveDocs, float acceptableOverheadRatio) {
|
||||
PackedInts.Mutable numDeletesSoFar = PackedInts.getMutable(maxDoc,
|
||||
PackedInts.bitsRequired(numDeletes), acceptableOverheadRatio);
|
||||
int del = 0;
|
||||
for (int i = 0; i < maxDoc; ++i) {
|
||||
if (!liveDocs.get(i)) {
|
||||
++del;
|
||||
}
|
||||
numDeletesSoFar.set(i, del);
|
||||
}
|
||||
assert del == numDeletes : "del=" + del + ", numdeletes=" + numDeletes;
|
||||
return new DelCountDocMap(liveDocs, numDeletesSoFar);
|
||||
}
|
||||
|
||||
static DocMap buildDirectDocMap(int maxDoc, int numDocs, Bits liveDocs, float acceptableOverheadRatio) {
|
||||
PackedInts.Mutable docIds = PackedInts.getMutable(maxDoc,
|
||||
PackedInts.bitsRequired(Math.max(0, numDocs - 1)), acceptableOverheadRatio);
|
||||
int del = 0;
|
||||
for (int i = 0; i < maxDoc; ++i) {
|
||||
if (liveDocs.get(i)) {
|
||||
docIds.set(i, i - del);
|
||||
} else {
|
||||
++del;
|
||||
}
|
||||
}
|
||||
assert numDocs + del == maxDoc : "maxDoc=" + maxDoc + ", del=" + del + ", numDocs=" + numDocs;
|
||||
return new DirectDocMap(liveDocs, docIds, del);
|
||||
}
|
||||
|
||||
public int get(int docId) {
|
||||
if (liveDocs == null || liveDocs.get(docId)) {
|
||||
return remap(docId);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public abstract int remap(int docId);
|
||||
|
||||
public abstract int maxDoc();
|
||||
|
||||
public final int numDocs() {
|
||||
return maxDoc() - numDeletedDocs();
|
||||
}
|
||||
|
||||
public abstract int numDeletedDocs();
|
||||
|
||||
public boolean hasDeletions() {
|
||||
return numDeletedDocs() > 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class NoDelDocMap extends DocMap {
|
||||
|
||||
private final int maxDoc;
|
||||
|
||||
private NoDelDocMap(int maxDoc) {
|
||||
super(null);
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int remap(int docId) {
|
||||
return docId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDoc() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDeletedDocs() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static class DirectDocMap extends DocMap {
|
||||
|
||||
private final PackedInts.Mutable docIds;
|
||||
private final int numDeletedDocs;
|
||||
|
||||
private DirectDocMap(Bits liveDocs, PackedInts.Mutable docIds, int numDeletedDocs) {
|
||||
super(liveDocs);
|
||||
this.docIds = docIds;
|
||||
this.numDeletedDocs = numDeletedDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int remap(int docId) {
|
||||
return (int) docIds.get(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDoc() {
|
||||
return docIds.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDeletedDocs() {
|
||||
return numDeletedDocs;
|
||||
}
|
||||
}
|
||||
|
||||
private static class DelCountDocMap extends DocMap {
|
||||
|
||||
private final PackedInts.Mutable numDeletesSoFar;
|
||||
|
||||
private DelCountDocMap(Bits liveDocs, PackedInts.Mutable numDeletesSoFar) {
|
||||
super(liveDocs);
|
||||
this.numDeletesSoFar = numDeletesSoFar;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int remap(int docId) {
|
||||
return docId - (int) numDeletesSoFar.get(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDoc() {
|
||||
return numDeletesSoFar.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDeletedDocs() {
|
||||
final int maxDoc = maxDoc();
|
||||
return (int) numDeletesSoFar.get(maxDoc - 1);
|
||||
}
|
||||
}
|
||||
|
||||
public SegmentInfo segmentInfo;
|
||||
public FieldInfos fieldInfos;
|
||||
public List<IndexReaderAndLiveDocs> readers; // Readers & liveDocs being merged
|
||||
public int[][] docMaps; // Maps docIDs around deletions
|
||||
public DocMap[] docMaps; // Maps docIDs around deletions
|
||||
public int[] docBase; // New docID base per reader
|
||||
public CheckAbort checkAbort;
|
||||
public InfoStream infoStream;
|
||||
|
@ -65,8 +220,8 @@ public class MergeState {
|
|||
|
||||
public static class CheckAbort {
|
||||
private double workCount;
|
||||
private MergePolicy.OneMerge merge;
|
||||
private Directory dir;
|
||||
private final MergePolicy.OneMerge merge;
|
||||
private final Directory dir;
|
||||
public CheckAbort(MergePolicy.OneMerge merge, Directory dir) {
|
||||
this.merge = merge;
|
||||
this.dir = dir;
|
||||
|
|
|
@ -322,7 +322,7 @@ public class MultiDocValues extends DocValues {
|
|||
final MergeContext ctx = SortedBytesMergeUtils.init(type, values,
|
||||
comp, globalNumDocs);
|
||||
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(
|
||||
docBases(), new int[values.length][], values, ctx);
|
||||
docBases(), new MergeState.DocMap[values.length], values, ctx);
|
||||
RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer(
|
||||
type);
|
||||
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer,
|
||||
|
|
|
@ -80,7 +80,7 @@ final class SegmentMerger {
|
|||
new ReaderUtil.Gather(reader) {
|
||||
@Override
|
||||
protected void add(int base, AtomicReader r) {
|
||||
mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(r, r.getLiveDocs()));
|
||||
mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(r, r.getLiveDocs(), r.numDeletedDocs()));
|
||||
}
|
||||
}.run();
|
||||
} catch (IOException ioe) {
|
||||
|
@ -89,8 +89,8 @@ final class SegmentMerger {
|
|||
}
|
||||
}
|
||||
|
||||
final void add(SegmentReader reader, Bits liveDocs) {
|
||||
mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(reader, liveDocs));
|
||||
final void add(SegmentReader reader, Bits liveDocs, int delCount) {
|
||||
mergeState.readers.add(new MergeState.IndexReaderAndLiveDocs(reader, liveDocs, delCount));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -287,7 +287,7 @@ final class SegmentMerger {
|
|||
final int numReaders = mergeState.readers.size();
|
||||
|
||||
// Remap docIDs
|
||||
mergeState.docMaps = new int[numReaders][];
|
||||
mergeState.docMaps = new MergeState.DocMap[numReaders];
|
||||
mergeState.docBase = new int[numReaders];
|
||||
mergeState.readerPayloadProcessor = new PayloadProcessorProvider.ReaderPayloadProcessor[numReaders];
|
||||
mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
|
||||
|
@ -300,30 +300,9 @@ final class SegmentMerger {
|
|||
final MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(i);
|
||||
|
||||
mergeState.docBase[i] = docBase;
|
||||
final int maxDoc = reader.reader.maxDoc();
|
||||
final int docCount;
|
||||
final Bits liveDocs = reader.liveDocs;
|
||||
final int[] docMap;
|
||||
if (liveDocs != null) {
|
||||
int delCount = 0;
|
||||
docMap = new int[maxDoc];
|
||||
int newDocID = 0;
|
||||
for(int j=0;j<maxDoc;j++) {
|
||||
if (!liveDocs.get(j)) {
|
||||
docMap[j] = -1;
|
||||
delCount++;
|
||||
} else {
|
||||
docMap[j] = newDocID++;
|
||||
}
|
||||
}
|
||||
docCount = maxDoc - delCount;
|
||||
} else {
|
||||
docCount = maxDoc;
|
||||
docMap = null;
|
||||
}
|
||||
|
||||
final MergeState.DocMap docMap = MergeState.DocMap.build(reader);
|
||||
mergeState.docMaps[i] = docMap;
|
||||
docBase += docCount;
|
||||
docBase += docMap.numDocs();
|
||||
|
||||
if (mergeState.payloadProcessorProvider != null) {
|
||||
mergeState.readerPayloadProcessor[i] = mergeState.payloadProcessorProvider.getReaderProcessor(reader.reader);
|
||||
|
|
|
@ -81,7 +81,8 @@ public final class SortedBytesMergeUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static List<SortedSourceSlice> buildSlices(int[] docBases, int[][] docMaps,
|
||||
public static List<SortedSourceSlice> buildSlices(
|
||||
int[] docBases, MergeState.DocMap[] docMaps,
|
||||
DocValues[] docValues, MergeContext ctx) throws IOException {
|
||||
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
|
||||
for (int i = 0; i < docValues.length; i++) {
|
||||
|
@ -111,15 +112,15 @@ public final class SortedBytesMergeUtils {
|
|||
* mapping in docIDToRelativeOrd. After the merge SortedSourceSlice#ordMapping
|
||||
* contains the new global ordinals for the relative index.
|
||||
*/
|
||||
private static void createOrdMapping(int[] docBases, int[][] docMaps,
|
||||
private static void createOrdMapping(int[] docBases, MergeState.DocMap[] docMaps,
|
||||
SortedSourceSlice currentSlice) {
|
||||
final int readerIdx = currentSlice.readerIdx;
|
||||
final int[] currentDocMap = docMaps[readerIdx];
|
||||
final MergeState.DocMap currentDocMap = docMaps[readerIdx];
|
||||
final int docBase = currentSlice.docToOrdStart;
|
||||
assert docBase == docBases[readerIdx];
|
||||
if (currentDocMap != null) { // we have deletes
|
||||
for (int i = 0; i < currentDocMap.length; i++) {
|
||||
final int doc = currentDocMap[i];
|
||||
if (currentDocMap != null && currentDocMap.hasDeletions()) { // we have deletes
|
||||
for (int i = 0; i < currentDocMap.maxDoc(); i++) {
|
||||
final int doc = currentDocMap.get(i);
|
||||
if (doc != -1) { // not deleted
|
||||
final int ord = currentSlice.source.ord(i); // collect ords strictly
|
||||
// increasing
|
||||
|
|
|
@ -25,9 +25,11 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
|
||||
public class TestSegmentMerger extends LuceneTestCase {
|
||||
|
@ -139,4 +141,41 @@ public class TestSegmentMerger extends LuceneTestCase {
|
|||
TestSegmentReader.checkNorms(mergedReader);
|
||||
mergedReader.close();
|
||||
}
|
||||
|
||||
private static boolean equals(MergeState.DocMap map1, MergeState.DocMap map2) {
|
||||
if (map1.maxDoc() != map2.maxDoc()) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < map1.maxDoc(); ++i) {
|
||||
if (map1.get(i) != map2.get(i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void testBuildDocMap() {
|
||||
final int maxDoc = 128;
|
||||
final FixedBitSet liveDocs = new FixedBitSet(maxDoc);
|
||||
|
||||
MergeState.DocMap docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT);
|
||||
MergeState.DocMap docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, 0, liveDocs, PackedInts.COMPACT);
|
||||
assertTrue(equals(docMap1, docMap2));
|
||||
|
||||
liveDocs.set(1);
|
||||
for (int i = 7; i < 79; ++i) {
|
||||
liveDocs.set(i);
|
||||
}
|
||||
liveDocs.set(80);
|
||||
liveDocs.set(88);
|
||||
int numDocs = liveDocs.cardinality();
|
||||
docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, maxDoc - numDocs, liveDocs, PackedInts.COMPACT);
|
||||
docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, numDocs, liveDocs, PackedInts.COMPACT);
|
||||
assertTrue(equals(docMap1, docMap2));
|
||||
|
||||
liveDocs.set(0, maxDoc);
|
||||
docMap1 = MergeState.DocMap.buildDelCountDocmap(maxDoc, 0, liveDocs, PackedInts.COMPACT);
|
||||
docMap2 = MergeState.DocMap.buildDirectDocMap(maxDoc, maxDoc, liveDocs, PackedInts.COMPACT);
|
||||
assertTrue(equals(docMap1, docMap2));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue