LUCENE-1013: fix IndexWriter.setMaxMergeDocs(N) to work out-of-the-box again

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@582384 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2007-10-05 20:51:53 +00:00
parent 2ac108d096
commit 065553bd20
6 changed files with 76 additions and 30 deletions

View File

@ -381,6 +381,12 @@ Bug fixes
23. LUCENE-913: Two consecutive score() calls return different 23. LUCENE-913: Two consecutive score() calls return different
scores for Boolean Queries. (Michael Busch, Doron Cohen) scores for Boolean Queries. (Michael Busch, Doron Cohen)
24. LUCENE-1013: Fix IndexWriter.setMaxMergeDocs to work "out of the
box", again, by moving set/getMaxMergeDocs up from
LogDocMergePolicy into LogMergePolicy. This fixes the API
breakage (non backwards compatible change) caused by LUCENE-994.
(Yonik Seeley via Mike McCandless)
New features New features
1. LUCENE-759: Added two n-gram-producing TokenFilters. 1. LUCENE-759: Added two n-gram-producing TokenFilters.

View File

@ -327,13 +327,6 @@ public class IndexWriter {
throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy"); throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
} }
private LogDocMergePolicy getLogDocMergePolicy() {
if (mergePolicy instanceof LogDocMergePolicy)
return (LogDocMergePolicy) mergePolicy;
else
throw new IllegalArgumentException("this method can only be called when the merge policy is LogDocMergePolicy");
}
/** <p>Get the current setting of whether newly flushed /** <p>Get the current setting of whether newly flushed
* segments will use the compound file format. Note that * segments will use the compound file format. Note that
* this just returns the value previously set with * this just returns the value previously set with
@ -794,7 +787,7 @@ public class IndexWriter {
* Otherwise an IllegalArgumentException is thrown.</p> * Otherwise an IllegalArgumentException is thrown.</p>
*/ */
public void setMaxMergeDocs(int maxMergeDocs) { public void setMaxMergeDocs(int maxMergeDocs) {
getLogDocMergePolicy().setMaxMergeDocs(maxMergeDocs); getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
} }
/** /**
@ -809,7 +802,7 @@ public class IndexWriter {
* @see #setMaxMergeDocs * @see #setMaxMergeDocs
*/ */
public int getMaxMergeDocs() { public int getMaxMergeDocs() {
return getLogDocMergePolicy().getMaxMergeDocs(); return getLogMergePolicy().getMaxMergeDocs();
} }
/** /**

View File

@ -41,7 +41,9 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
/** Sets the maximum size for a segment to be merged. /** Sets the maximum size for a segment to be merged.
* When a segment is this size or larger it will never be * When a segment is this size or larger it will never be
* merged. */ * merged. Note that {@link #setMaxMergeDocs} is also
* used to check whether a segment is too large for
* merging (it's either or). */
public void setMaxMergeMB(double mb) { public void setMaxMergeMB(double mb) {
maxMergeSize = (long) (mb*1024*1024); maxMergeSize = (long) (mb*1024*1024);
} }

View File

@ -26,32 +26,18 @@ public class LogDocMergePolicy extends LogMergePolicy {
/** Default minimum segment size. @see setMinMergeDocs */ /** Default minimum segment size. @see setMinMergeDocs */
public static final int DEFAULT_MIN_MERGE_DOCS = 1000; public static final int DEFAULT_MIN_MERGE_DOCS = 1000;
/** Default maximum segment size. A segment of this size
* or larger will never be merged. @see setMaxMergeDocs */
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
public LogDocMergePolicy() { public LogDocMergePolicy() {
super(); super();
minMergeSize = DEFAULT_MIN_MERGE_DOCS; minMergeSize = DEFAULT_MIN_MERGE_DOCS;
maxMergeSize = DEFAULT_MAX_MERGE_DOCS;
// maxMergeSize is never used by LogDocMergePolicy; set
// it to Long.MAX_VALUE to disable it
maxMergeSize = Long.MAX_VALUE;
} }
protected long size(SegmentInfo info) { protected long size(SegmentInfo info) {
return info.docCount; return info.docCount;
} }
/** Sets the maximum size for a segment to be merged.
* When a segment is this size or larger it will never be
* merged. */
public void setMaxMergeDocs(int maxMergeDocs) {
maxMergeSize = maxMergeDocs;
}
/** Get the maximum size for a segment to be merged.
* @see #setMaxMergeDocs */
public int getMaxMergeDocs() {
return (int) maxMergeSize;
}
/** Sets the minimum size for the lowest level segments. /** Sets the minimum size for the lowest level segments.
* Any segments below this size are considered to be on * Any segments below this size are considered to be on
* the same level (even if they vary drastically in size) * the same level (even if they vary drastically in size)

View File

@ -49,10 +49,15 @@ public abstract class LogMergePolicy implements MergePolicy {
* merged at a time */ * merged at a time */
public static final int DEFAULT_MERGE_FACTOR = 10; public static final int DEFAULT_MERGE_FACTOR = 10;
/** Default maximum segment size. A segment of this size
* or larger will never be merged. @see setMaxMergeDocs */
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
private int mergeFactor = DEFAULT_MERGE_FACTOR; private int mergeFactor = DEFAULT_MERGE_FACTOR;
long minMergeSize; long minMergeSize;
long maxMergeSize; long maxMergeSize;
int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
private boolean useCompoundFile = true; private boolean useCompoundFile = true;
private boolean useCompoundDocStore = true; private boolean useCompoundDocStore = true;
@ -219,6 +224,9 @@ public abstract class LogMergePolicy implements MergePolicy {
long size = size(info); long size = size(info);
// Refuse to import a segment that's too large // Refuse to import a segment that's too large
if (info.docCount > maxMergeDocs && info.dir != directory)
throw new IllegalArgumentException("Segment is too large (" + info.docCount + " docs vs max docs " + maxMergeDocs + ")");
if (size >= maxMergeSize && info.dir != directory) if (size >= maxMergeSize && info.dir != directory)
throw new IllegalArgumentException("Segment is too large (" + size + " vs max size " + maxMergeSize + ")"); throw new IllegalArgumentException("Segment is too large (" + size + " vs max size " + maxMergeSize + ")");
@ -281,8 +289,10 @@ public abstract class LogMergePolicy implements MergePolicy {
int end = start + mergeFactor; int end = start + mergeFactor;
while(end <= 1+upto) { while(end <= 1+upto) {
boolean anyTooLarge = false; boolean anyTooLarge = false;
for(int i=start;i<end;i++) for(int i=start;i<end;i++) {
anyTooLarge |= size(infos.info(i)) >= maxMergeSize; final SegmentInfo info = infos.info(i);
anyTooLarge |= (size(info) >= maxMergeSize || info.docCount >= maxMergeDocs);
}
if (!anyTooLarge) { if (!anyTooLarge) {
if (spec == null) if (spec == null)
@ -298,4 +308,18 @@ public abstract class LogMergePolicy implements MergePolicy {
return spec; return spec;
} }
/** Sets the maximum docs for a segment to be merged.
* When a segment has this many docs or more it will never be
* merged. */
public void setMaxMergeDocs(int maxMergeDocs) {
this.maxMergeDocs = maxMergeDocs;
}
/** Get the maximum docs for a segment to be merged.
* @see #setMaxMergeDocs */
public int getMaxMergeDocs() {
return maxMergeDocs;
}
} }

View File

@ -1583,4 +1583,39 @@ public class TestIndexWriter extends TestCase
iw.close(); iw.close();
dir.close(); dir.close();
} }
// Just intercepts all merges & verifies that we are never
// merging a segment with >= 20 (maxMergeDocs) docs
private class MyMergeScheduler implements MergeScheduler {
synchronized public void merge(IndexWriter writer)
throws CorruptIndexException, IOException {
while(true) {
MergePolicy.OneMerge merge = writer.getNextMerge();
if (merge == null)
break;
for(int i=0;i<merge.segments.size();i++)
assert merge.segments.info(i).docCount < 20;
writer.merge(merge);
}
}
public void close() {}
}
// LUCENE-1013
public void testSetMaxMergeDocs() throws IOException {
MockRAMDirectory dir = new MockRAMDirectory();
IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true);
iw.setMergeScheduler(new MyMergeScheduler());
iw.setMaxMergeDocs(20);
iw.setMaxBufferedDocs(2);
iw.setMergeFactor(2);
Document document = new Document();
document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED,
Field.TermVector.YES));
for(int i=0;i<177;i++)
iw.addDocument(document);
iw.close();
}
} }