mirror of https://github.com/apache/lucene.git
LUCENE-1013: fix IndexWriter.setMaxMergeDocs(N) to work out-of-the-box again
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@582384 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2ac108d096
commit
065553bd20
|
@ -381,6 +381,12 @@ Bug fixes
|
||||||
23. LUCENE-913: Two consecutive score() calls return different
|
23. LUCENE-913: Two consecutive score() calls return different
|
||||||
scores for Boolean Queries. (Michael Busch, Doron Cohen)
|
scores for Boolean Queries. (Michael Busch, Doron Cohen)
|
||||||
|
|
||||||
|
24. LUCENE-1013: Fix IndexWriter.setMaxMergeDocs to work "out of the
|
||||||
|
box", again, by moving set/getMaxMergeDocs up from
|
||||||
|
LogDocMergePolicy into LogMergePolicy. This fixes the API
|
||||||
|
breakage (non backwards compatible change) caused by LUCENE-994.
|
||||||
|
(Yonik Seeley via Mike McCandless)
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
||||||
1. LUCENE-759: Added two n-gram-producing TokenFilters.
|
1. LUCENE-759: Added two n-gram-producing TokenFilters.
|
||||||
|
|
|
@ -327,13 +327,6 @@ public class IndexWriter {
|
||||||
throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
|
throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
|
||||||
}
|
}
|
||||||
|
|
||||||
private LogDocMergePolicy getLogDocMergePolicy() {
|
|
||||||
if (mergePolicy instanceof LogDocMergePolicy)
|
|
||||||
return (LogDocMergePolicy) mergePolicy;
|
|
||||||
else
|
|
||||||
throw new IllegalArgumentException("this method can only be called when the merge policy is LogDocMergePolicy");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** <p>Get the current setting of whether newly flushed
|
/** <p>Get the current setting of whether newly flushed
|
||||||
* segments will use the compound file format. Note that
|
* segments will use the compound file format. Note that
|
||||||
* this just returns the value previously set with
|
* this just returns the value previously set with
|
||||||
|
@ -794,7 +787,7 @@ public class IndexWriter {
|
||||||
* Otherwise an IllegalArgumentException is thrown.</p>
|
* Otherwise an IllegalArgumentException is thrown.</p>
|
||||||
*/
|
*/
|
||||||
public void setMaxMergeDocs(int maxMergeDocs) {
|
public void setMaxMergeDocs(int maxMergeDocs) {
|
||||||
getLogDocMergePolicy().setMaxMergeDocs(maxMergeDocs);
|
getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -809,7 +802,7 @@ public class IndexWriter {
|
||||||
* @see #setMaxMergeDocs
|
* @see #setMaxMergeDocs
|
||||||
*/
|
*/
|
||||||
public int getMaxMergeDocs() {
|
public int getMaxMergeDocs() {
|
||||||
return getLogDocMergePolicy().getMaxMergeDocs();
|
return getLogMergePolicy().getMaxMergeDocs();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -41,7 +41,9 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
|
||||||
|
|
||||||
/** Sets the maximum size for a segment to be merged.
|
/** Sets the maximum size for a segment to be merged.
|
||||||
* When a segment is this size or larger it will never be
|
* When a segment is this size or larger it will never be
|
||||||
* merged. */
|
* merged. Note that {@link #setMaxMergeDocs} is also
|
||||||
|
* used to check whether a segment is too large for
|
||||||
|
* merging (it's either or). */
|
||||||
public void setMaxMergeMB(double mb) {
|
public void setMaxMergeMB(double mb) {
|
||||||
maxMergeSize = (long) (mb*1024*1024);
|
maxMergeSize = (long) (mb*1024*1024);
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,32 +26,18 @@ public class LogDocMergePolicy extends LogMergePolicy {
|
||||||
/** Default minimum segment size. @see setMinMergeDocs */
|
/** Default minimum segment size. @see setMinMergeDocs */
|
||||||
public static final int DEFAULT_MIN_MERGE_DOCS = 1000;
|
public static final int DEFAULT_MIN_MERGE_DOCS = 1000;
|
||||||
|
|
||||||
/** Default maximum segment size. A segment of this size
|
|
||||||
* or larger will never be merged. @see setMaxMergeDocs */
|
|
||||||
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
|
|
||||||
|
|
||||||
public LogDocMergePolicy() {
|
public LogDocMergePolicy() {
|
||||||
super();
|
super();
|
||||||
minMergeSize = DEFAULT_MIN_MERGE_DOCS;
|
minMergeSize = DEFAULT_MIN_MERGE_DOCS;
|
||||||
maxMergeSize = DEFAULT_MAX_MERGE_DOCS;
|
|
||||||
|
// maxMergeSize is never used by LogDocMergePolicy; set
|
||||||
|
// it to Long.MAX_VALUE to disable it
|
||||||
|
maxMergeSize = Long.MAX_VALUE;
|
||||||
}
|
}
|
||||||
protected long size(SegmentInfo info) {
|
protected long size(SegmentInfo info) {
|
||||||
return info.docCount;
|
return info.docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Sets the maximum size for a segment to be merged.
|
|
||||||
* When a segment is this size or larger it will never be
|
|
||||||
* merged. */
|
|
||||||
public void setMaxMergeDocs(int maxMergeDocs) {
|
|
||||||
maxMergeSize = maxMergeDocs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get the maximum size for a segment to be merged.
|
|
||||||
* @see #setMaxMergeDocs */
|
|
||||||
public int getMaxMergeDocs() {
|
|
||||||
return (int) maxMergeSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Sets the minimum size for the lowest level segments.
|
/** Sets the minimum size for the lowest level segments.
|
||||||
* Any segments below this size are considered to be on
|
* Any segments below this size are considered to be on
|
||||||
* the same level (even if they vary drastically in size)
|
* the same level (even if they vary drastically in size)
|
||||||
|
|
|
@ -49,10 +49,15 @@ public abstract class LogMergePolicy implements MergePolicy {
|
||||||
* merged at a time */
|
* merged at a time */
|
||||||
public static final int DEFAULT_MERGE_FACTOR = 10;
|
public static final int DEFAULT_MERGE_FACTOR = 10;
|
||||||
|
|
||||||
|
/** Default maximum segment size. A segment of this size
|
||||||
|
* or larger will never be merged. @see setMaxMergeDocs */
|
||||||
|
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
|
||||||
|
|
||||||
private int mergeFactor = DEFAULT_MERGE_FACTOR;
|
private int mergeFactor = DEFAULT_MERGE_FACTOR;
|
||||||
|
|
||||||
long minMergeSize;
|
long minMergeSize;
|
||||||
long maxMergeSize;
|
long maxMergeSize;
|
||||||
|
int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
|
||||||
|
|
||||||
private boolean useCompoundFile = true;
|
private boolean useCompoundFile = true;
|
||||||
private boolean useCompoundDocStore = true;
|
private boolean useCompoundDocStore = true;
|
||||||
|
@ -219,6 +224,9 @@ public abstract class LogMergePolicy implements MergePolicy {
|
||||||
long size = size(info);
|
long size = size(info);
|
||||||
|
|
||||||
// Refuse to import a segment that's too large
|
// Refuse to import a segment that's too large
|
||||||
|
if (info.docCount > maxMergeDocs && info.dir != directory)
|
||||||
|
throw new IllegalArgumentException("Segment is too large (" + info.docCount + " docs vs max docs " + maxMergeDocs + ")");
|
||||||
|
|
||||||
if (size >= maxMergeSize && info.dir != directory)
|
if (size >= maxMergeSize && info.dir != directory)
|
||||||
throw new IllegalArgumentException("Segment is too large (" + size + " vs max size " + maxMergeSize + ")");
|
throw new IllegalArgumentException("Segment is too large (" + size + " vs max size " + maxMergeSize + ")");
|
||||||
|
|
||||||
|
@ -281,8 +289,10 @@ public abstract class LogMergePolicy implements MergePolicy {
|
||||||
int end = start + mergeFactor;
|
int end = start + mergeFactor;
|
||||||
while(end <= 1+upto) {
|
while(end <= 1+upto) {
|
||||||
boolean anyTooLarge = false;
|
boolean anyTooLarge = false;
|
||||||
for(int i=start;i<end;i++)
|
for(int i=start;i<end;i++) {
|
||||||
anyTooLarge |= size(infos.info(i)) >= maxMergeSize;
|
final SegmentInfo info = infos.info(i);
|
||||||
|
anyTooLarge |= (size(info) >= maxMergeSize || info.docCount >= maxMergeDocs);
|
||||||
|
}
|
||||||
|
|
||||||
if (!anyTooLarge) {
|
if (!anyTooLarge) {
|
||||||
if (spec == null)
|
if (spec == null)
|
||||||
|
@ -298,4 +308,18 @@ public abstract class LogMergePolicy implements MergePolicy {
|
||||||
|
|
||||||
return spec;
|
return spec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Sets the maximum docs for a segment to be merged.
|
||||||
|
* When a segment has this many docs or more it will never be
|
||||||
|
* merged. */
|
||||||
|
public void setMaxMergeDocs(int maxMergeDocs) {
|
||||||
|
this.maxMergeDocs = maxMergeDocs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the maximum docs for a segment to be merged.
|
||||||
|
* @see #setMaxMergeDocs */
|
||||||
|
public int getMaxMergeDocs() {
|
||||||
|
return maxMergeDocs;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1583,4 +1583,39 @@ public class TestIndexWriter extends TestCase
|
||||||
iw.close();
|
iw.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Just intercepts all merges & verifies that we are never
|
||||||
|
// merging a segment with >= 20 (maxMergeDocs) docs
|
||||||
|
private class MyMergeScheduler implements MergeScheduler {
|
||||||
|
synchronized public void merge(IndexWriter writer)
|
||||||
|
throws CorruptIndexException, IOException {
|
||||||
|
|
||||||
|
while(true) {
|
||||||
|
MergePolicy.OneMerge merge = writer.getNextMerge();
|
||||||
|
if (merge == null)
|
||||||
|
break;
|
||||||
|
for(int i=0;i<merge.segments.size();i++)
|
||||||
|
assert merge.segments.info(i).docCount < 20;
|
||||||
|
writer.merge(merge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LUCENE-1013
|
||||||
|
public void testSetMaxMergeDocs() throws IOException {
|
||||||
|
MockRAMDirectory dir = new MockRAMDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true);
|
||||||
|
iw.setMergeScheduler(new MyMergeScheduler());
|
||||||
|
iw.setMaxMergeDocs(20);
|
||||||
|
iw.setMaxBufferedDocs(2);
|
||||||
|
iw.setMergeFactor(2);
|
||||||
|
Document document = new Document();
|
||||||
|
document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED,
|
||||||
|
Field.TermVector.YES));
|
||||||
|
for(int i=0;i<177;i++)
|
||||||
|
iw.addDocument(document);
|
||||||
|
iw.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue