mirror of https://github.com/apache/lucene.git
LUCENE-1013: fix IndexWriter.setMaxMergeDocs(N) to work out-of-the-box again
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@582384 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2ac108d096
commit
065553bd20
|
@ -381,6 +381,12 @@ Bug fixes
|
|||
23. LUCENE-913: Two consecutive score() calls return different
|
||||
scores for Boolean Queries. (Michael Busch, Doron Cohen)
|
||||
|
||||
24. LUCENE-1013: Fix IndexWriter.setMaxMergeDocs to work "out of the
|
||||
box", again, by moving set/getMaxMergeDocs up from
|
||||
LogDocMergePolicy into LogMergePolicy. This fixes the API
|
||||
breakage (non backwards compatible change) caused by LUCENE-994.
|
||||
(Yonik Seeley via Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
1. LUCENE-759: Added two n-gram-producing TokenFilters.
|
||||
|
|
|
@ -327,13 +327,6 @@ public class IndexWriter {
|
|||
throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
|
||||
}
|
||||
|
||||
private LogDocMergePolicy getLogDocMergePolicy() {
|
||||
if (mergePolicy instanceof LogDocMergePolicy)
|
||||
return (LogDocMergePolicy) mergePolicy;
|
||||
else
|
||||
throw new IllegalArgumentException("this method can only be called when the merge policy is LogDocMergePolicy");
|
||||
}
|
||||
|
||||
/** <p>Get the current setting of whether newly flushed
|
||||
* segments will use the compound file format. Note that
|
||||
* this just returns the value previously set with
|
||||
|
@ -794,7 +787,7 @@ public class IndexWriter {
|
|||
* Otherwise an IllegalArgumentException is thrown.</p>
|
||||
*/
|
||||
public void setMaxMergeDocs(int maxMergeDocs) {
|
||||
getLogDocMergePolicy().setMaxMergeDocs(maxMergeDocs);
|
||||
getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -809,7 +802,7 @@ public class IndexWriter {
|
|||
* @see #setMaxMergeDocs
|
||||
*/
|
||||
public int getMaxMergeDocs() {
|
||||
return getLogDocMergePolicy().getMaxMergeDocs();
|
||||
return getLogMergePolicy().getMaxMergeDocs();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -41,7 +41,9 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
|
|||
|
||||
/** Sets the maximum size for a segment to be merged.
|
||||
* When a segment is this size or larger it will never be
|
||||
* merged. */
|
||||
* merged. Note that {@link #setMaxMergeDocs} is also
|
||||
* used to check whether a segment is too large for
|
||||
* merging (it's either or). */
|
||||
public void setMaxMergeMB(double mb) {
|
||||
maxMergeSize = (long) (mb*1024*1024);
|
||||
}
|
||||
|
|
|
@ -26,32 +26,18 @@ public class LogDocMergePolicy extends LogMergePolicy {
|
|||
/** Default minimum segment size. @see setMinMergeDocs */
|
||||
public static final int DEFAULT_MIN_MERGE_DOCS = 1000;
|
||||
|
||||
/** Default maximum segment size. A segment of this size
|
||||
* or larger will never be merged. @see setMaxMergeDocs */
|
||||
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
|
||||
|
||||
public LogDocMergePolicy() {
|
||||
super();
|
||||
minMergeSize = DEFAULT_MIN_MERGE_DOCS;
|
||||
maxMergeSize = DEFAULT_MAX_MERGE_DOCS;
|
||||
|
||||
// maxMergeSize is never used by LogDocMergePolicy; set
|
||||
// it to Long.MAX_VALUE to disable it
|
||||
maxMergeSize = Long.MAX_VALUE;
|
||||
}
|
||||
protected long size(SegmentInfo info) {
|
||||
return info.docCount;
|
||||
}
|
||||
|
||||
/** Sets the maximum size for a segment to be merged.
|
||||
* When a segment is this size or larger it will never be
|
||||
* merged. */
|
||||
public void setMaxMergeDocs(int maxMergeDocs) {
|
||||
maxMergeSize = maxMergeDocs;
|
||||
}
|
||||
|
||||
/** Get the maximum size for a segment to be merged.
|
||||
* @see #setMaxMergeDocs */
|
||||
public int getMaxMergeDocs() {
|
||||
return (int) maxMergeSize;
|
||||
}
|
||||
|
||||
/** Sets the minimum size for the lowest level segments.
|
||||
* Any segments below this size are considered to be on
|
||||
* the same level (even if they vary drastically in size)
|
||||
|
|
|
@ -49,10 +49,15 @@ public abstract class LogMergePolicy implements MergePolicy {
|
|||
* merged at a time */
|
||||
public static final int DEFAULT_MERGE_FACTOR = 10;
|
||||
|
||||
/** Default maximum segment size. A segment of this size
|
||||
* or larger will never be merged. @see setMaxMergeDocs */
|
||||
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
|
||||
|
||||
private int mergeFactor = DEFAULT_MERGE_FACTOR;
|
||||
|
||||
long minMergeSize;
|
||||
long maxMergeSize;
|
||||
int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
|
||||
|
||||
private boolean useCompoundFile = true;
|
||||
private boolean useCompoundDocStore = true;
|
||||
|
@ -219,6 +224,9 @@ public abstract class LogMergePolicy implements MergePolicy {
|
|||
long size = size(info);
|
||||
|
||||
// Refuse to import a segment that's too large
|
||||
if (info.docCount > maxMergeDocs && info.dir != directory)
|
||||
throw new IllegalArgumentException("Segment is too large (" + info.docCount + " docs vs max docs " + maxMergeDocs + ")");
|
||||
|
||||
if (size >= maxMergeSize && info.dir != directory)
|
||||
throw new IllegalArgumentException("Segment is too large (" + size + " vs max size " + maxMergeSize + ")");
|
||||
|
||||
|
@ -281,8 +289,10 @@ public abstract class LogMergePolicy implements MergePolicy {
|
|||
int end = start + mergeFactor;
|
||||
while(end <= 1+upto) {
|
||||
boolean anyTooLarge = false;
|
||||
for(int i=start;i<end;i++)
|
||||
anyTooLarge |= size(infos.info(i)) >= maxMergeSize;
|
||||
for(int i=start;i<end;i++) {
|
||||
final SegmentInfo info = infos.info(i);
|
||||
anyTooLarge |= (size(info) >= maxMergeSize || info.docCount >= maxMergeDocs);
|
||||
}
|
||||
|
||||
if (!anyTooLarge) {
|
||||
if (spec == null)
|
||||
|
@ -298,4 +308,18 @@ public abstract class LogMergePolicy implements MergePolicy {
|
|||
|
||||
return spec;
|
||||
}
|
||||
|
||||
/** Sets the maximum docs for a segment to be merged.
|
||||
* When a segment has this many docs or more it will never be
|
||||
* merged. */
|
||||
public void setMaxMergeDocs(int maxMergeDocs) {
|
||||
this.maxMergeDocs = maxMergeDocs;
|
||||
}
|
||||
|
||||
/** Get the maximum docs for a segment to be merged.
|
||||
* @see #setMaxMergeDocs */
|
||||
public int getMaxMergeDocs() {
|
||||
return maxMergeDocs;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1583,4 +1583,39 @@ public class TestIndexWriter extends TestCase
|
|||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Just intercepts all merges & verifies that we are never
|
||||
// merging a segment with >= 20 (maxMergeDocs) docs
|
||||
private class MyMergeScheduler implements MergeScheduler {
|
||||
synchronized public void merge(IndexWriter writer)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
||||
while(true) {
|
||||
MergePolicy.OneMerge merge = writer.getNextMerge();
|
||||
if (merge == null)
|
||||
break;
|
||||
for(int i=0;i<merge.segments.size();i++)
|
||||
assert merge.segments.info(i).docCount < 20;
|
||||
writer.merge(merge);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {}
|
||||
}
|
||||
|
||||
// LUCENE-1013
|
||||
public void testSetMaxMergeDocs() throws IOException {
|
||||
MockRAMDirectory dir = new MockRAMDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true);
|
||||
iw.setMergeScheduler(new MyMergeScheduler());
|
||||
iw.setMaxMergeDocs(20);
|
||||
iw.setMaxBufferedDocs(2);
|
||||
iw.setMergeFactor(2);
|
||||
Document document = new Document();
|
||||
document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED,
|
||||
Field.TermVector.YES));
|
||||
for(int i=0;i<177;i++)
|
||||
iw.addDocument(document);
|
||||
iw.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue