LUCENE-2790: IndexWriter should call MP.useCompoundFile and not LogMP.getUseCompoundFile

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1042101 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2010-12-04 05:58:35 +00:00
parent 7249ebe483
commit 30af48bda8
9 changed files with 127 additions and 104 deletions

View File

@ -132,11 +132,10 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
// Since we must optimize down to 1 segment, the
// choice is simple:
boolean useCompoundFile = getUseCompoundFile();
if (last > 1 || !isOptimized(infos.info(0))) {
spec = new MergeSpecification();
spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
spec.add(new OneMerge(infos.range(0, last)));
}
} else if (last > maxNumSegments) {
@ -153,7 +152,6 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if (infoLen <= maxNumSegments) return null;
MergeSpecification spec = new MergeSpecification();
boolean useCompoundFile = getUseCompoundFile();
// use Viterbi algorithm to find the best segmentation.
// we will try to minimize the size variance of resulting segments.
@ -194,7 +192,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
prev = backLink[i][prev];
int mergeStart = i + prev;
if((mergeEnd - mergeStart) > 1) {
spec.add(new OneMerge(infos.range(mergeStart, mergeEnd), useCompoundFile));
spec.add(new OneMerge(infos.range(mergeStart, mergeEnd)));
} else {
if(partialExpunge) {
SegmentInfo info = infos.info(mergeStart);
@ -210,7 +208,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(partialExpunge && maxDelCount > 0) {
// expunge deletes
spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1), useCompoundFile));
spec.add(new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1)));
}
return spec;
@ -260,7 +258,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
for(int i = 0; i < numLargeSegs; i++) {
SegmentInfo info = infos.info(i);
if(info.hasDeletions()) {
spec.add(new OneMerge(infos.range(i, i + 1), getUseCompoundFile()));
spec.add(new OneMerge(infos.range(i, i + 1)));
}
}
return spec;
@ -298,7 +296,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(totalSmallSegSize < targetSegSize * 2) {
MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge);
if(spec == null) spec = new MergeSpecification(); // should not happen
spec.add(new OneMerge(infos.range(numLargeSegs, numSegs), getUseCompoundFile()));
spec.add(new OneMerge(infos.range(numLargeSegs, numSegs)));
return spec;
} else {
return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
@ -313,7 +311,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
if(size(info) < sizeThreshold) break;
startSeg++;
}
spec.add(new OneMerge(infos.range(startSeg, numSegs), getUseCompoundFile()));
spec.add(new OneMerge(infos.range(startSeg, numSegs)));
return spec;
} else {
// apply the log merge policy to small segments.
@ -344,7 +342,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
}
}
if (maxDelCount > 0) {
return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1), getUseCompoundFile());
return new OneMerge(infos.range(expungeCandidate, expungeCandidate + 1));
}
return null;
}

View File

@ -34,7 +34,15 @@ public class TestIndexSplitter extends LuceneTestCase {
_TestUtil.rmDir(destDir);
destDir.mkdirs();
FSDirectory fsDir = FSDirectory.open(dir);
IndexWriter iw = new IndexWriter(fsDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE));
LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setNoCFSRatio(1);
IndexWriter iw = new IndexWriter(
fsDir,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
setOpenMode(OpenMode.CREATE).
setMergePolicy(mergePolicy)
);
for (int x=0; x < 100; x++) {
Document doc = TestIndexWriterReader.createDocument(x, "index", 5);
iw.addDocument(doc);

View File

@ -50,13 +50,13 @@ import java.util.Date;
An <code>IndexWriter</code> creates and maintains an index.
<p>The <code>create</code> argument to the {@link
#IndexWriter(Directory, Analyzer, boolean, MaxFieldLength) constructor} determines
#IndexWriter(Directory, IndexWriterConfig) constructor} determines
whether a new index is created, or whether an existing index is
opened. Note that you can open an index with <code>create=true</code>
even while readers are using the index. The old readers will
continue to search the "point in time" snapshot they had opened,
and won't see the newly created index until they re-open. There are
also {@link #IndexWriter(Directory, Analyzer, MaxFieldLength) constructors}
also {@link #IndexWriter(Directory, IndexWriterConfig) constructors}
with no <code>create</code> argument which will create a new index
if there is not already an index at the provided path and otherwise
open the existing index.</p>
@ -72,11 +72,11 @@ import java.util.Date;
<p>These changes are buffered in memory and periodically
flushed to the {@link Directory} (during the above method
calls). A flush is triggered when there are enough
buffered deletes (see {@link #setMaxBufferedDeleteTerms})
buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms})
or enough added documents since the last flush, whichever
is sooner. For the added documents, flushing is triggered
either by RAM usage of the documents (see {@link
#setRAMBufferSizeMB}) or the number of added documents.
IndexWriterConfig#setRAMBufferSizeMB}) or the number of added documents.
The default is to flush when RAM usage hits 16 MB. For
best indexing speed you should flush by RAM usage with a
large RAM buffer. Note that flushing just moves the
@ -1248,8 +1248,8 @@ public class IndexWriter implements Closeable {
/**
* Adds a document to this index. If the document contains more than
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
* discarded.
* {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field,
* the remainder are discarded.
*
* <p> Note that if an Exception is hit (for example disk full)
* then the index will be consistent, but this document
@ -1297,7 +1297,7 @@ public class IndexWriter implements Closeable {
/**
* Adds a document to this index, using the provided analyzer instead of the
* value of {@link #getAnalyzer()}. If the document contains more than
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
* {@link IndexWriterConfig#setMaxFieldLength(int)} terms for a given field, the remainder are
* discarded.
*
* <p>See {@link #addDocument(Document)} for details on
@ -1603,7 +1603,7 @@ public class IndexWriter implements Closeable {
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @see LogMergePolicy#findMergesForOptimize
* @see MergePolicy#findMergesForOptimize
*/
public void optimize() throws CorruptIndexException, IOException {
optimize(true);
@ -2282,8 +2282,7 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public void addIndexes(IndexReader... readers)
throws CorruptIndexException, IOException {
public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
ensureOpen();
try {
@ -2296,47 +2295,33 @@ public class IndexWriter implements Closeable {
int docCount = merger.merge(); // merge 'em
SegmentInfo info = null;
synchronized(this) {
info = new SegmentInfo(mergedName, docCount, directory, false, -1,
null, false, merger.hasProx(), merger.getSegmentCodecs());
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
false, -1, null, false, merger.hasProx(), merger.getSegmentCodecs());
setDiagnostics(info, "addIndexes(IndexReader...)");
segmentInfos.add(info);
checkpoint();
// Notify DocumentsWriter that the flushed count just increased
docWriter.updateFlushedDocCount(docCount);
boolean useCompoundFile;
synchronized(this) { // Guard segmentInfos
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
}
// Now create the compound file if needed
if (mergePolicy instanceof LogMergePolicy && ((LogMergePolicy) mergePolicy).getUseCompoundFile()) {
List<String> files = null;
synchronized(this) {
// Must incRef our files so that if another thread
// is running merge/optimize, it doesn't delete our
// segment's files before we have a chance to
// finish making the compound file.
if (segmentInfos.contains(info)) {
files = info.files();
deleter.incRef(files);
}
}
if (files != null) {
try {
if (useCompoundFile) {
merger.createCompoundFile(mergedName + ".cfs", info);
synchronized(this) {
info.setUseCompoundFile(true);
checkpoint();
// delete new non cfs files directly: they were never
// registered with IFD
deleter.deleteNewFiles(merger.getMergedFiles(info));
}
} finally {
// Register the new segment
synchronized(this) {
deleter.decRef(files);
}
}
}
segmentInfos.add(info);
// Notify DocumentsWriter that the flushed count just increased
docWriter.updateFlushedDocCount(docCount);
checkpoint();
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "addIndexes(IndexReader...)");
@ -3440,8 +3425,12 @@ public class IndexWriter implements Closeable {
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
merge.info.setHasProx(merger.hasProx());
if (merge.useCompoundFile) {
boolean useCompoundFile;
synchronized (this) { // Guard segmentInfos
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
}
if (useCompoundFile) {
success = false;
final String compoundFileName = IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);

View File

@ -127,8 +127,21 @@ public abstract class LogMergePolicy extends MergePolicy {
// Javadoc inherited
@Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) {
return useCompoundFile;
public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException {
final boolean doCFS;
if (!useCompoundFile) {
doCFS = false;
} else if (noCFSRatio == 1.0) {
doCFS = true;
} else {
long totalSize = 0;
for (SegmentInfo info : infos)
totalSize += size(info);
doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
}
return doCFS;
}
/** Sets whether compound file format should be used for
@ -254,12 +267,12 @@ public abstract class LogMergePolicy extends MergePolicy {
// unless there is only 1 which is optimized.
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
// there is more than 1 segment to the right of this one, or an unoptimized single segment.
spec.add(makeOneMerge(infos, infos.range(start + 1, last)));
spec.add(new OneMerge(infos.range(start + 1, last)));
}
last = start;
} else if (last - start == mergeFactor) {
// mergeFactor eligible segments were found, add them as a merge.
spec.add(makeOneMerge(infos, infos.range(start, last)));
spec.add(new OneMerge(infos.range(start, last)));
last = start;
}
--start;
@ -267,7 +280,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// Add any left-over segments, unless there is just 1 already optimized.
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
spec.add(makeOneMerge(infos, infos.range(start, last)));
spec.add(new OneMerge(infos.range(start, last)));
}
return spec.merges.size() == 0 ? null : spec;
@ -284,7 +297,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// First, enroll all "full" merges (size
// mergeFactor) to potentially be run concurrently:
while (last - maxNumSegments + 1 >= mergeFactor) {
spec.add(makeOneMerge(infos, infos.range(last-mergeFactor, last)));
spec.add(new OneMerge(infos.range(last - mergeFactor, last)));
last -= mergeFactor;
}
@ -296,7 +309,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// Since we must optimize down to 1 segment, the
// choice is simple:
if (last > 1 || !isOptimized(infos.info(0))) {
spec.add(makeOneMerge(infos, infos.range(0, last)));
spec.add(new OneMerge(infos.range(0, last)));
}
} else if (last > maxNumSegments) {
@ -325,7 +338,7 @@ public abstract class LogMergePolicy extends MergePolicy {
}
}
spec.add(makeOneMerge(infos, infos.range(bestStart, bestStart+finalMergeSize)));
spec.add(new OneMerge(infos.range(bestStart, bestStart + finalMergeSize)));
}
}
return spec.merges.size() == 0 ? null : spec;
@ -413,7 +426,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// deletions, so force a merge now:
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i)));
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = i;
}
} else if (firstSegmentWithDeletions != -1) {
@ -422,7 +435,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// mergeFactor segments
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i)));
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = -1;
}
}
@ -430,7 +443,7 @@ public abstract class LogMergePolicy extends MergePolicy {
if (firstSegmentWithDeletions != -1) {
if (verbose())
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, numSegments)));
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments)));
}
return spec;
@ -530,7 +543,7 @@ public abstract class LogMergePolicy extends MergePolicy {
spec = new MergeSpecification();
if (verbose())
message(" " + start + " to " + end + ": add this merge");
spec.add(makeOneMerge(infos, infos.range(start, end)));
spec.add(new OneMerge(infos.range(start, end)));
} else if (verbose())
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
@ -544,29 +557,6 @@ public abstract class LogMergePolicy extends MergePolicy {
return spec;
}
protected OneMerge makeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) throws IOException {
final boolean doCFS;
if (!useCompoundFile) {
doCFS = false;
} else if (noCFSRatio == 1.0) {
doCFS = true;
} else {
long totSize = 0;
for(SegmentInfo info : infos) {
totSize += size(info);
}
long mergeSize = 0;
for(SegmentInfo info : infosToMerge) {
mergeSize += size(info);
}
doCFS = mergeSize <= noCFSRatio * totSize;
}
return new OneMerge(infosToMerge, doCFS);
}
/** <p>Determines the largest segment (measured by
* document count) that may be merged with other segments.
* Small values (e.g., less than 10,000) are best for

View File

@ -76,16 +76,14 @@ public abstract class MergePolicy implements java.io.Closeable {
SegmentReader[] readers; // used by IndexWriter
SegmentReader[] readersClone; // used by IndexWriter
public final SegmentInfos segments;
public final boolean useCompoundFile;
boolean aborted;
Throwable error;
boolean paused;
public OneMerge(SegmentInfos segments, boolean useCompoundFile) {
public OneMerge(SegmentInfos segments) {
if (0 == segments.size())
throw new RuntimeException("segments must include at least one segment");
this.segments = segments;
this.useCompoundFile = useCompoundFile;
}
/** Record that an exception occurred while executing
@ -314,10 +312,9 @@ public abstract class MergePolicy implements java.io.Closeable {
public abstract void close();
/**
* Returns true if a newly flushed (not from merge)
* segment should use the compound file format.
* Returns true if a new segment (regardless of its origin) should use the compound file format.
*/
public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment);
public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException;
/**
* Returns true if the doc store files should use the

View File

@ -24,6 +24,9 @@ import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.mocksep.MockSepCodec;
@ -36,6 +39,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import org.apache.lucene.util._TestUtil;
public class TestAddIndexes extends LuceneTestCase {
@ -1015,4 +1019,29 @@ public class TestAddIndexes extends LuceneTestCase {
}
}
// LUCENE-2790: tests that the non CFS files were deleted by addIndexes
public void testNonCFSLeftovers() throws Exception {
Directory[] dirs = new Directory[2];
for (int i = 0; i < dirs.length; i++) {
dirs[i] = new RAMDirectory();
IndexWriter w = new IndexWriter(dirs[i], new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer()));
Document d = new Document();
d.add(new Field("c", "v", Store.YES, Index.ANALYZED, TermVector.YES));
w.addDocument(d);
w.close();
}
IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) };
Directory dir = new RAMDirectory();
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer());
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
lmp.setNoCFSRatio(1.0); // Force creation of CFS
IndexWriter w3 = new IndexWriter(dir, conf);
w3.addIndexes(readers);
w3.close();
assertEquals("Only one compound segment should exist", 3, dir.listAll().length);
}
}

View File

@ -527,12 +527,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
try {
Directory dir = FSDirectory.open(new File(fullDir(outputDir)));
LogMergePolicy mergePolicy = newLogMergePolicy(true, 10);
mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
setMaxBufferedDocs(-1).
setRAMBufferSizeMB(16.0).
setMergePolicy(newLogMergePolicy(true, 10))
setMergePolicy(mergePolicy)
);
for(int i=0;i<35;i++) {
addDoc(writer, i);

View File

@ -40,18 +40,23 @@ public class TestIndexFileDeleter extends LuceneTestCase {
public void testDeleteLeftoverFiles() throws IOException {
MockDirectoryWrapper dir = newDirectory();
dir.setPreventDoubleWrite(false);
LogMergePolicy mergePolicy = newLogMergePolicy(true, 10);
mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
setMaxBufferedDocs(10).
setMergePolicy(newLogMergePolicy(true, 10))
setMergePolicy(mergePolicy)
);
int i;
for(i=0;i<35;i++) {
addDoc(writer, i);
}
((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false);
((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundDocStore(false);
mergePolicy.setUseCompoundFile(false);
mergePolicy.setUseCompoundDocStore(false);
for(;i<45;i++) {
addDoc(writer, i);
}

View File

@ -2479,10 +2479,14 @@ public class TestIndexWriter extends LuceneTestCase {
public void testDeleteUnusedFiles() throws Exception {
for(int iter=0;iter<2;iter++) {
Directory dir = newDirectory();
LogMergePolicy mergePolicy = newLogMergePolicy(true);
mergePolicy.setNoCFSRatio(1); // This test expects all of its segments to be in CFS
IndexWriter w = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
setMergePolicy(newLogMergePolicy(true))
setMergePolicy(mergePolicy)
);
Document doc = new Document();
doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED));