mirror of https://github.com/apache/lucene.git
LUCENE-4775: nuke SegmentInfo.sizeInBytes; fix OneMerge.totalBytesSize thread safety
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1445978 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9e767a667d
commit
447e9fcfee
|
@ -216,6 +216,10 @@ Bug Fixes
|
||||||
cases, for example if you had an index with more than 260M documents and a
|
cases, for example if you had an index with more than 260M documents and a
|
||||||
VAR_INT field. (Simon Willnauer, Adrien Grand, Mike McCandless, Robert Muir)
|
VAR_INT field. (Simon Willnauer, Adrien Grand, Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4775: Remove SegmentInfo.sizeInBytes() and make
|
||||||
|
MergePolicy.OneMerge.totalBytesSize thread safe (Josh Bronson via
|
||||||
|
Robert Muir, Mike McCandless)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
||||||
* LUCENE-4718: Fixed documentation of oal.queryparser.classic.
|
* LUCENE-4718: Fixed documentation of oal.queryparser.classic.
|
||||||
|
|
|
@ -520,7 +520,7 @@ class DocumentsWriterPerThread {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (infoStream.isEnabled("DWPT")) {
|
if (infoStream.isEnabled("DWPT")) {
|
||||||
final double newSegmentSize = segmentInfo.sizeInBytes()/1024./1024.;
|
final double newSegmentSize = segmentInfoPerCommit.sizeInBytes()/1024./1024.;
|
||||||
infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name +
|
infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name +
|
||||||
" ramUsed=" + nf.format(startMBUsed) + " MB" +
|
" ramUsed=" + nf.format(startMBUsed) + " MB" +
|
||||||
" newFlushedSize(includes docstores)=" + nf.format(newSegmentSize) + " MB" +
|
" newFlushedSize(includes docstores)=" + nf.format(newSegmentSize) + " MB" +
|
||||||
|
@ -557,7 +557,7 @@ class DocumentsWriterPerThread {
|
||||||
|
|
||||||
IndexWriter.setDiagnostics(newSegment.info, "flush");
|
IndexWriter.setDiagnostics(newSegment.info, "flush");
|
||||||
|
|
||||||
IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.info.sizeInBytes()));
|
IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.sizeInBytes()));
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -2301,7 +2301,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info);
|
infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info);
|
||||||
}
|
}
|
||||||
|
|
||||||
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1));
|
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.sizeInBytes(), true, -1));
|
||||||
|
|
||||||
for(FieldInfo fi : getFieldInfos(info.info)) {
|
for(FieldInfo fi : getFieldInfos(info.info)) {
|
||||||
globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType());
|
globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType());
|
||||||
|
@ -3458,7 +3458,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
final int delCount = numDeletedDocs(info);
|
final int delCount = numDeletedDocs(info);
|
||||||
assert delCount <= info.info.getDocCount();
|
assert delCount <= info.info.getDocCount();
|
||||||
final double delRatio = ((double) delCount)/info.info.getDocCount();
|
final double delRatio = ((double) delCount)/info.info.getDocCount();
|
||||||
merge.estimatedMergeBytes += info.info.sizeInBytes() * (1.0 - delRatio);
|
merge.estimatedMergeBytes += info.sizeInBytes() * (1.0 - delRatio);
|
||||||
|
merge.totalMergeBytes += info.sizeInBytes();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3759,7 +3760,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
// lost...
|
// lost...
|
||||||
|
|
||||||
if (infoStream.isEnabled("IW")) {
|
if (infoStream.isEnabled("IW")) {
|
||||||
infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
|
infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
|
||||||
}
|
}
|
||||||
|
|
||||||
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
|
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -74,7 +73,11 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
|
||||||
int maxNumSegments = -1; // used by IndexWriter
|
int maxNumSegments = -1; // used by IndexWriter
|
||||||
|
|
||||||
/** Estimated size in bytes of the merged segment. */
|
/** Estimated size in bytes of the merged segment. */
|
||||||
public long estimatedMergeBytes; // used by IndexWriter
|
public volatile long estimatedMergeBytes; // used by IndexWriter
|
||||||
|
|
||||||
|
// Sum of sizeInBytes of all SegmentInfos; set by IW.mergeInit
|
||||||
|
volatile long totalMergeBytes;
|
||||||
|
|
||||||
List<SegmentReader> readers; // used by IndexWriter
|
List<SegmentReader> readers; // used by IndexWriter
|
||||||
|
|
||||||
/** Segments to be merged. */
|
/** Segments to be merged. */
|
||||||
|
@ -187,14 +190,12 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the total size in bytes of this merge. Note that this does not
|
* Returns the total size in bytes of this merge. Note that this does not
|
||||||
* indicate the size of the merged segment, but the input total size.
|
* indicate the size of the merged segment, but the
|
||||||
* */
|
* input total size. This is only set once the merge is
|
||||||
|
* initialized by IndexWriter.
|
||||||
|
*/
|
||||||
public long totalBytesSize() throws IOException {
|
public long totalBytesSize() throws IOException {
|
||||||
long total = 0;
|
return totalMergeBytes;
|
||||||
for (SegmentInfoPerCommit info : segments) {
|
|
||||||
total += info.info.sizeInBytes();
|
|
||||||
}
|
|
||||||
return total;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -57,8 +56,6 @@ public final class SegmentInfo {
|
||||||
|
|
||||||
private boolean isCompoundFile;
|
private boolean isCompoundFile;
|
||||||
|
|
||||||
private volatile long sizeInBytes = -1; // total byte size of all files (computed on demand)
|
|
||||||
|
|
||||||
private Codec codec;
|
private Codec codec;
|
||||||
|
|
||||||
private Map<String,String> diagnostics;
|
private Map<String,String> diagnostics;
|
||||||
|
@ -100,23 +97,6 @@ public final class SegmentInfo {
|
||||||
this.attributes = attributes;
|
this.attributes = attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns total size in bytes of all of files used by
|
|
||||||
* this segment. Note that this will not include any live
|
|
||||||
* docs for the segment; to include that use {@link
|
|
||||||
* SegmentInfoPerCommit#sizeInBytes()} instead.
|
|
||||||
*/
|
|
||||||
public long sizeInBytes() throws IOException {
|
|
||||||
if (sizeInBytes == -1) {
|
|
||||||
long sum = 0;
|
|
||||||
for (final String fileName : files()) {
|
|
||||||
sum += dir.fileLength(fileName);
|
|
||||||
}
|
|
||||||
sizeInBytes = sum;
|
|
||||||
}
|
|
||||||
return sizeInBytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mark whether this segment is stored as a compound file.
|
* Mark whether this segment is stored as a compound file.
|
||||||
*
|
*
|
||||||
|
@ -254,7 +234,6 @@ public final class SegmentInfo {
|
||||||
public void setFiles(Set<String> files) {
|
public void setFiles(Set<String> files) {
|
||||||
checkFileNames(files);
|
checkFileNames(files);
|
||||||
setFiles = files;
|
setFiles = files;
|
||||||
sizeInBytes = -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Add these files to the set of files written for this
|
/** Add these files to the set of files written for this
|
||||||
|
@ -262,7 +241,6 @@ public final class SegmentInfo {
|
||||||
public void addFiles(Collection<String> files) {
|
public void addFiles(Collection<String> files) {
|
||||||
checkFileNames(files);
|
checkFileNames(files);
|
||||||
setFiles.addAll(files);
|
setFiles.addAll(files);
|
||||||
sizeInBytes = -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Add this file to the set of files written for this
|
/** Add this file to the set of files written for this
|
||||||
|
@ -270,7 +248,6 @@ public final class SegmentInfo {
|
||||||
public void addFile(String file) {
|
public void addFile(String file) {
|
||||||
checkFileNames(Collections.singleton(file));
|
checkFileNames(Collections.singleton(file));
|
||||||
setFiles.add(file);
|
setFiles.add(file);
|
||||||
sizeInBytes = -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkFileNames(Collection<String> files) {
|
private void checkFileNames(Collection<String> files) {
|
||||||
|
|
|
@ -374,7 +374,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
|
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
|
||||||
final SegmentInfoPerCommit info = infosSorted.get(idx);
|
final SegmentInfoPerCommit info = infosSorted.get(idx);
|
||||||
if (merging.contains(info)) {
|
if (merging.contains(info)) {
|
||||||
mergingBytes += info.info.sizeInBytes();
|
mergingBytes += info.sizeInBytes();
|
||||||
} else if (!toBeMerged.contains(info)) {
|
} else if (!toBeMerged.contains(info)) {
|
||||||
eligible.add(info);
|
eligible.add(info);
|
||||||
}
|
}
|
||||||
|
@ -470,7 +470,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
final long segBytes = size(info);
|
final long segBytes = size(info);
|
||||||
totAfterMergeBytes += segBytes;
|
totAfterMergeBytes += segBytes;
|
||||||
totAfterMergeBytesFloored += floorSize(segBytes);
|
totAfterMergeBytesFloored += floorSize(segBytes);
|
||||||
totBeforeMergeBytes += info.info.sizeInBytes();
|
totBeforeMergeBytes += info.sizeInBytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Measure "skew" of the merge, which can range
|
// Measure "skew" of the merge, which can range
|
||||||
|
@ -670,7 +670,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
|
|
||||||
// Segment size in bytes, pro-rated by % deleted
|
// Segment size in bytes, pro-rated by % deleted
|
||||||
private long size(SegmentInfoPerCommit info) throws IOException {
|
private long size(SegmentInfoPerCommit info) throws IOException {
|
||||||
final long byteSize = info.info.sizeInBytes();
|
final long byteSize = info.sizeInBytes();
|
||||||
final int delCount = writer.get().numDeletedDocs(info);
|
final int delCount = writer.get().numDeletedDocs(info);
|
||||||
final double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((double)delCount / (double)info.info.getDocCount()));
|
final double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((double)delCount / (double)info.info.getDocCount()));
|
||||||
assert delRatio <= 1.0;
|
assert delRatio <= 1.0;
|
||||||
|
|
|
@ -324,4 +324,40 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase {
|
||||||
w.close(false);
|
w.close(false);
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static class TrackingCMS extends ConcurrentMergeScheduler {
|
||||||
|
long totMergedBytes;
|
||||||
|
|
||||||
|
public TrackingCMS() {
|
||||||
|
setMaxMergeCount(5);
|
||||||
|
setMaxThreadCount(5);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void doMerge(MergePolicy.OneMerge merge) throws IOException {
|
||||||
|
totMergedBytes += merge.totalBytesSize();
|
||||||
|
super.doMerge(merge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTotalBytesSize() throws Exception {
|
||||||
|
Directory d = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||||
|
iwc.setMaxBufferedDocs(5);
|
||||||
|
iwc.setMergeScheduler(new TrackingCMS());
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||||
|
for(int i=0;i<100000;i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newStringField("id", ""+i, Field.Store.NO));
|
||||||
|
doc.add(newTextField("field", "here is some text", Field.Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
w.deleteDocuments(new Term("id", ""+random().nextInt(i+1)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.close();
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue