From 447e9fcfeee30661a97af67431b1c6fbd8121ea6 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 13 Feb 2013 23:19:08 +0000 Subject: [PATCH] LUCENE-4775: nuke SegmentInfo.sizeInBytes; fix OneMerge.totalBytesSize thread safety git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1445978 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 +++ .../index/DocumentsWriterPerThread.java | 4 +-- .../org/apache/lucene/index/IndexWriter.java | 7 ++-- .../org/apache/lucene/index/MergePolicy.java | 19 +++++----- .../org/apache/lucene/index/SegmentInfo.java | 23 ------------ .../lucene/index/TieredMergePolicy.java | 6 ++-- .../index/TestConcurrentMergeScheduler.java | 36 +++++++++++++++++++ 7 files changed, 59 insertions(+), 40 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4c4c02588e0..10438e2d5dc 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -216,6 +216,10 @@ Bug Fixes cases, for example if you had an index with more than 260M documents and a VAR_INT field. (Simon Willnauer, Adrien Grand, Mike McCandless, Robert Muir) +* LUCENE-4775: Remove SegmentInfo.sizeInBytes() and make + MergePolicy.OneMerge.totalBytesSize thread safe (Josh Bronson via + Robert Muir, Mike McCandless) + Documentation * LUCENE-4718: Fixed documentation of oal.queryparser.classic. diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 41b38cebdf3..7e1cd406cd9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -520,7 +520,7 @@ class DocumentsWriterPerThread { } if (infoStream.isEnabled("DWPT")) { - final double newSegmentSize = segmentInfo.sizeInBytes()/1024./1024.; + final double newSegmentSize = segmentInfoPerCommit.sizeInBytes()/1024./1024.; infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name + " ramUsed=" + nf.format(startMBUsed) + " MB" + " newFlushedSize(includes docstores)=" + nf.format(newSegmentSize) + " MB" + @@ -557,7 +557,7 @@ class DocumentsWriterPerThread { IndexWriter.setDiagnostics(newSegment.info, "flush"); - IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.info.sizeInBytes())); + IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.sizeInBytes())); boolean success = false; try { diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 1da8087a382..085070d6d1f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -2301,7 +2301,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info); } - IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1)); + IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.sizeInBytes(), true, -1)); for(FieldInfo fi : getFieldInfos(info.info)) { globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType()); @@ -3458,7 +3458,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { final int delCount = numDeletedDocs(info); assert delCount <= info.info.getDocCount(); final double delRatio = ((double) delCount)/info.info.getDocCount(); - merge.estimatedMergeBytes += info.info.sizeInBytes() * (1.0 - delRatio); + merge.estimatedMergeBytes += info.sizeInBytes() * (1.0 - delRatio); + merge.totalMergeBytes += info.sizeInBytes(); } } } @@ -3759,7 +3760,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // lost... if (infoStream.isEnabled("IW")) { - infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.)); + infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.)); } final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index 3b8b4bf2b17..656c7e56fd2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -19,7 +19,6 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Map; @@ -74,7 +73,11 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable { int maxNumSegments = -1; // used by IndexWriter /** Estimated size in bytes of the merged segment. */ - public long estimatedMergeBytes; // used by IndexWriter + public volatile long estimatedMergeBytes; // used by IndexWriter + + // Sum of sizeInBytes of all SegmentInfos; set by IW.mergeInit + volatile long totalMergeBytes; + List readers; // used by IndexWriter /** Segments to be merged. */ @@ -187,14 +190,12 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable { /** * Returns the total size in bytes of this merge. Note that this does not - * indicate the size of the merged segment, but the input total size. - * */ + * indicate the size of the merged segment, but the + * input total size. This is only set once the merge is + * initialized by IndexWriter. + */ public long totalBytesSize() throws IOException { - long total = 0; - for (SegmentInfoPerCommit info : segments) { - total += info.info.sizeInBytes(); - } - return total; + return totalMergeBytes; } /** diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java index fd27f7176f8..4d3c9f986cf 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java @@ -18,7 +18,6 @@ package org.apache.lucene.index; */ -import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -57,8 +56,6 @@ public final class SegmentInfo { private boolean isCompoundFile; - private volatile long sizeInBytes = -1; // total byte size of all files (computed on demand) - private Codec codec; private Map diagnostics; @@ -100,23 +97,6 @@ public final class SegmentInfo { this.attributes = attributes; } - /** - * Returns total size in bytes of all of files used by - * this segment. Note that this will not include any live - * docs for the segment; to include that use {@link - * SegmentInfoPerCommit#sizeInBytes()} instead. - */ - public long sizeInBytes() throws IOException { - if (sizeInBytes == -1) { - long sum = 0; - for (final String fileName : files()) { - sum += dir.fileLength(fileName); - } - sizeInBytes = sum; - } - return sizeInBytes; - } - /** * Mark whether this segment is stored as a compound file. * @@ -254,7 +234,6 @@ public final class SegmentInfo { public void setFiles(Set files) { checkFileNames(files); setFiles = files; - sizeInBytes = -1; } /** Add these files to the set of files written for this @@ -262,7 +241,6 @@ public final class SegmentInfo { public void addFiles(Collection files) { checkFileNames(files); setFiles.addAll(files); - sizeInBytes = -1; } /** Add this file to the set of files written for this @@ -270,7 +248,6 @@ public final class SegmentInfo { public void addFile(String file) { checkFileNames(Collections.singleton(file)); setFiles.add(file); - sizeInBytes = -1; } private void checkFileNames(Collection files) { diff --git a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java index dccb5cdfe77..745681d5121 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java @@ -374,7 +374,7 @@ public class TieredMergePolicy extends MergePolicy { for(int idx = tooBigCount; idx