mirror of https://github.com/apache/lucene.git
LUCENE-4775: nuke SegmentInfo.sizeInBytes; fix OneMerge.totalBytesSize thread safety
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1445978 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9e767a667d
commit
447e9fcfee
|
@ -216,6 +216,10 @@ Bug Fixes
|
|||
cases, for example if you had an index with more than 260M documents and a
|
||||
VAR_INT field. (Simon Willnauer, Adrien Grand, Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-4775: Remove SegmentInfo.sizeInBytes() and make
|
||||
MergePolicy.OneMerge.totalBytesSize thread safe (Josh Bronson via
|
||||
Robert Muir, Mike McCandless)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-4718: Fixed documentation of oal.queryparser.classic.
|
||||
|
|
|
@ -520,7 +520,7 @@ class DocumentsWriterPerThread {
|
|||
}
|
||||
|
||||
if (infoStream.isEnabled("DWPT")) {
|
||||
final double newSegmentSize = segmentInfo.sizeInBytes()/1024./1024.;
|
||||
final double newSegmentSize = segmentInfoPerCommit.sizeInBytes()/1024./1024.;
|
||||
infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name +
|
||||
" ramUsed=" + nf.format(startMBUsed) + " MB" +
|
||||
" newFlushedSize(includes docstores)=" + nf.format(newSegmentSize) + " MB" +
|
||||
|
@ -557,7 +557,7 @@ class DocumentsWriterPerThread {
|
|||
|
||||
IndexWriter.setDiagnostics(newSegment.info, "flush");
|
||||
|
||||
IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.info.sizeInBytes()));
|
||||
IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.sizeInBytes()));
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
|
|
|
@ -2301,7 +2301,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info);
|
||||
}
|
||||
|
||||
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1));
|
||||
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.sizeInBytes(), true, -1));
|
||||
|
||||
for(FieldInfo fi : getFieldInfos(info.info)) {
|
||||
globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType());
|
||||
|
@ -3458,7 +3458,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
final int delCount = numDeletedDocs(info);
|
||||
assert delCount <= info.info.getDocCount();
|
||||
final double delRatio = ((double) delCount)/info.info.getDocCount();
|
||||
merge.estimatedMergeBytes += info.info.sizeInBytes() * (1.0 - delRatio);
|
||||
merge.estimatedMergeBytes += info.sizeInBytes() * (1.0 - delRatio);
|
||||
merge.totalMergeBytes += info.sizeInBytes();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3759,7 +3760,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
// lost...
|
||||
|
||||
if (infoStream.isEnabled("IW")) {
|
||||
infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
|
||||
infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
|
||||
}
|
||||
|
||||
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -74,7 +73,11 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
|
|||
int maxNumSegments = -1; // used by IndexWriter
|
||||
|
||||
/** Estimated size in bytes of the merged segment. */
|
||||
public long estimatedMergeBytes; // used by IndexWriter
|
||||
public volatile long estimatedMergeBytes; // used by IndexWriter
|
||||
|
||||
// Sum of sizeInBytes of all SegmentInfos; set by IW.mergeInit
|
||||
volatile long totalMergeBytes;
|
||||
|
||||
List<SegmentReader> readers; // used by IndexWriter
|
||||
|
||||
/** Segments to be merged. */
|
||||
|
@ -187,14 +190,12 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
|
|||
|
||||
/**
|
||||
* Returns the total size in bytes of this merge. Note that this does not
|
||||
* indicate the size of the merged segment, but the input total size.
|
||||
* */
|
||||
* indicate the size of the merged segment, but the
|
||||
* input total size. This is only set once the merge is
|
||||
* initialized by IndexWriter.
|
||||
*/
|
||||
public long totalBytesSize() throws IOException {
|
||||
long total = 0;
|
||||
for (SegmentInfoPerCommit info : segments) {
|
||||
total += info.info.sizeInBytes();
|
||||
}
|
||||
return total;
|
||||
return totalMergeBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
|
@ -57,8 +56,6 @@ public final class SegmentInfo {
|
|||
|
||||
private boolean isCompoundFile;
|
||||
|
||||
private volatile long sizeInBytes = -1; // total byte size of all files (computed on demand)
|
||||
|
||||
private Codec codec;
|
||||
|
||||
private Map<String,String> diagnostics;
|
||||
|
@ -100,23 +97,6 @@ public final class SegmentInfo {
|
|||
this.attributes = attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns total size in bytes of all of files used by
|
||||
* this segment. Note that this will not include any live
|
||||
* docs for the segment; to include that use {@link
|
||||
* SegmentInfoPerCommit#sizeInBytes()} instead.
|
||||
*/
|
||||
public long sizeInBytes() throws IOException {
|
||||
if (sizeInBytes == -1) {
|
||||
long sum = 0;
|
||||
for (final String fileName : files()) {
|
||||
sum += dir.fileLength(fileName);
|
||||
}
|
||||
sizeInBytes = sum;
|
||||
}
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark whether this segment is stored as a compound file.
|
||||
*
|
||||
|
@ -254,7 +234,6 @@ public final class SegmentInfo {
|
|||
public void setFiles(Set<String> files) {
|
||||
checkFileNames(files);
|
||||
setFiles = files;
|
||||
sizeInBytes = -1;
|
||||
}
|
||||
|
||||
/** Add these files to the set of files written for this
|
||||
|
@ -262,7 +241,6 @@ public final class SegmentInfo {
|
|||
public void addFiles(Collection<String> files) {
|
||||
checkFileNames(files);
|
||||
setFiles.addAll(files);
|
||||
sizeInBytes = -1;
|
||||
}
|
||||
|
||||
/** Add this file to the set of files written for this
|
||||
|
@ -270,7 +248,6 @@ public final class SegmentInfo {
|
|||
public void addFile(String file) {
|
||||
checkFileNames(Collections.singleton(file));
|
||||
setFiles.add(file);
|
||||
sizeInBytes = -1;
|
||||
}
|
||||
|
||||
private void checkFileNames(Collection<String> files) {
|
||||
|
|
|
@ -374,7 +374,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
|
||||
final SegmentInfoPerCommit info = infosSorted.get(idx);
|
||||
if (merging.contains(info)) {
|
||||
mergingBytes += info.info.sizeInBytes();
|
||||
mergingBytes += info.sizeInBytes();
|
||||
} else if (!toBeMerged.contains(info)) {
|
||||
eligible.add(info);
|
||||
}
|
||||
|
@ -470,7 +470,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
final long segBytes = size(info);
|
||||
totAfterMergeBytes += segBytes;
|
||||
totAfterMergeBytesFloored += floorSize(segBytes);
|
||||
totBeforeMergeBytes += info.info.sizeInBytes();
|
||||
totBeforeMergeBytes += info.sizeInBytes();
|
||||
}
|
||||
|
||||
// Measure "skew" of the merge, which can range
|
||||
|
@ -670,7 +670,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
|
||||
// Segment size in bytes, pro-rated by % deleted
|
||||
private long size(SegmentInfoPerCommit info) throws IOException {
|
||||
final long byteSize = info.info.sizeInBytes();
|
||||
final long byteSize = info.sizeInBytes();
|
||||
final int delCount = writer.get().numDeletedDocs(info);
|
||||
final double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((double)delCount / (double)info.info.getDocCount()));
|
||||
assert delRatio <= 1.0;
|
||||
|
|
|
@ -324,4 +324,40 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
|||
w.close(false);
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
private static class TrackingCMS extends ConcurrentMergeScheduler {
|
||||
long totMergedBytes;
|
||||
|
||||
public TrackingCMS() {
|
||||
setMaxMergeCount(5);
|
||||
setMaxThreadCount(5);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doMerge(MergePolicy.OneMerge merge) throws IOException {
|
||||
totMergedBytes += merge.totalBytesSize();
|
||||
super.doMerge(merge);
|
||||
}
|
||||
}
|
||||
|
||||
public void testTotalBytesSize() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setMaxBufferedDocs(5);
|
||||
iwc.setMergeScheduler(new TrackingCMS());
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
for(int i=0;i<100000;i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", ""+i, Field.Store.NO));
|
||||
doc.add(newTextField("field", "here is some text", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
w.deleteDocuments(new Term("id", ""+random().nextInt(i+1)));
|
||||
}
|
||||
}
|
||||
w.close();
|
||||
d.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue