diff --git a/CHANGES.txt b/CHANGES.txt index e640ad66c72..7f9aced0962 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -80,6 +80,11 @@ Changes in backwards compatibility policy methods in these TokenStreams/-Filters were made final. (Michael Busch, Uwe Schindler) + 5. LUCENE-1763: MergePolicy now requires an IndexWriter instance to + be passed upon instantiation. As a result, IndexWriter was removed + as a method argument from all MergePolicy methods. (Shai Erera via + Mike McCandless) + Changes in runtime behavior 1. LUCENE-1424: QueryParser now by default uses constant score auto diff --git a/common-build.xml b/common-build.xml index 6c7a185701b..e6f69b0f31d 100644 --- a/common-build.xml +++ b/common-build.xml @@ -42,7 +42,7 @@ - + diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java index 501717f6e6d..b174ef3c81f 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java @@ -60,38 +60,19 @@ public class CreateIndexTask extends PerfTask { final String mergeScheduler = config.get("merge.scheduler", "org.apache.lucene.index.ConcurrentMergeScheduler"); - RuntimeException err = null; try { writer.setMergeScheduler((MergeScheduler) Class.forName(mergeScheduler).newInstance()); - } catch (IllegalAccessException iae) { - err = new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler"); - err.initCause(iae); - } catch (InstantiationException ie) { - err = new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler"); - err.initCause(ie); - } catch (ClassNotFoundException cnfe) { - err = new RuntimeException("unable to load class '" + mergeScheduler + "' as merge scheduler"); - err.initCause(cnfe); + } catch (Exception e) { + throw new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e); } - if (err != null) - throw err; final String mergePolicy = config.get("merge.policy", "org.apache.lucene.index.LogByteSizeMergePolicy"); try { - writer.setMergePolicy((MergePolicy) Class.forName(mergePolicy).newInstance()); - } catch (IllegalAccessException iae) { - err = new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy"); - err.initCause(iae); - } catch (InstantiationException ie) { - err = new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy"); - err.initCause(ie); - } catch (ClassNotFoundException cnfe) { - err = new RuntimeException("unable to load class '" + mergePolicy + "' as merge policy"); - err.initCause(cnfe); + writer.setMergePolicy((MergePolicy) Class.forName(mergePolicy).getConstructor(new Class[] { IndexWriter.class }).newInstance(new Object[] { writer })); + } catch (Exception e) { + throw new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy", e); } - if (err != null) - throw err; writer.setUseCompoundFile(config.get("compound",true)); writer.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR)); diff --git a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index cb138af5b57..446710e956b 100755 --- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -612,8 +612,8 @@ public class TestPerfTasksLogic extends TestCase { public static class MyMergePolicy extends LogDocMergePolicy { boolean called; - public MyMergePolicy() { - super(); + public MyMergePolicy(IndexWriter writer) { + super(writer); called = true; } } diff --git a/src/java/org/apache/lucene/index/IndexWriter.java b/src/java/org/apache/lucene/index/IndexWriter.java index c28d74f62bc..dfeac0c4b01 100644 --- a/src/java/org/apache/lucene/index/IndexWriter.java +++ b/src/java/org/apache/lucene/index/IndexWriter.java @@ -354,7 +354,7 @@ public class IndexWriter { // merges private HashSet mergingSegments = new HashSet(); - private MergePolicy mergePolicy = new LogByteSizeMergePolicy(); + private MergePolicy mergePolicy = new LogByteSizeMergePolicy(this); private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler(); private LinkedList pendingMerges = new LinkedList(); private Set runningMerges = new HashSet(); @@ -2899,7 +2899,7 @@ public class IndexWriter { MergePolicy.MergeSpecification spec; synchronized(this) { - spec = mergePolicy.findMergesToExpungeDeletes(segmentInfos, this); + spec = mergePolicy.findMergesToExpungeDeletes(segmentInfos); if (spec != null) { final int numMerges = spec.merges.size(); for(int i=0;i 0; return !hasDeletions && @@ -208,12 +211,13 @@ public abstract class LogMergePolicy extends MergePolicy { * setting is true. This method returns multiple merges * (mergeFactor at a time) so the {@link MergeScheduler} * in use may make use of concurrency. */ - public MergeSpecification findMergesForOptimize(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set segmentsToOptimize) throws IOException { + public MergeSpecification findMergesForOptimize(SegmentInfos infos, + int maxNumSegments, Set segmentsToOptimize) throws IOException { MergeSpecification spec; assert maxNumSegments > 0; - if (!isOptimized(infos, writer, maxNumSegments, segmentsToOptimize)) { + if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed @@ -245,7 +249,7 @@ public abstract class LogMergePolicy extends MergePolicy { // Since we must optimize down to 1 segment, the // choice is simple: - if (last > 1 || !isOptimized(writer, infos.info(0))) + if (last > 1 || !isOptimized(infos.info(0))) spec.add(new OneMerge(infos.range(0, last), useCompoundFile)); } else if (last > maxNumSegments) { @@ -291,12 +295,8 @@ public abstract class LogMergePolicy extends MergePolicy { * index. We simply merge adjacent segments that have * deletes, up to mergeFactor at a time. */ - public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos, - IndexWriter writer) - throws CorruptIndexException, IOException - { - this.writer = writer; - + public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) + throws CorruptIndexException, IOException { final int numSegments = segmentInfos.size(); if (verbose()) @@ -347,10 +347,9 @@ public abstract class LogMergePolicy extends MergePolicy { * multiple levels have too many segments, this method * will return multiple merges, allowing the {@link * MergeScheduler} to use concurrency. */ - public MergeSpecification findMerges(SegmentInfos infos, IndexWriter writer) throws IOException { + public MergeSpecification findMerges(SegmentInfos infos) throws IOException { final int numSegments = infos.size(); - this.writer = writer; if (verbose()) message("findMerges: " + numSegments + " segments"); diff --git a/src/java/org/apache/lucene/index/MergePolicy.java b/src/java/org/apache/lucene/index/MergePolicy.java index fccce5ac17a..b43f871c37d 100644 --- a/src/java/org/apache/lucene/index/MergePolicy.java +++ b/src/java/org/apache/lucene/index/MergePolicy.java @@ -206,68 +206,69 @@ public abstract class MergePolicy { } } - /** - * Determine what set of merge operations are now - * necessary on the index. The IndexWriter calls this - * whenever there is a change to the segments. This call - * is always synchronized on the IndexWriter instance so - * only one thread at a time will call this method. - * - * @param segmentInfos the total set of segments in the index - * @param writer IndexWriter instance - */ - abstract MergeSpecification findMerges(SegmentInfos segmentInfos, - IndexWriter writer) - throws CorruptIndexException, IOException; - - /** - * Determine what set of merge operations is necessary in - * order to optimize the index. The IndexWriter calls - * this when its optimize() method is called. This call - * is always synchronized on the IndexWriter instance so - * only one thread at a time will call this method. - * - * @param segmentInfos the total set of segments in the index - * @param writer IndexWriter instance - * @param maxSegmentCount requested maximum number of - * segments in the index (currently this is always 1) - * @param segmentsToOptimize contains the specific - * SegmentInfo instances that must be merged away. This - * may be a subset of all SegmentInfos. - */ - abstract MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, - IndexWriter writer, - int maxSegmentCount, - Set segmentsToOptimize) - throws CorruptIndexException, IOException; - - /** - * Determine what set of merge operations is necessary in - * order to expunge all deletes from the index. - * @param segmentInfos the total set of segments in the index - * @param writer IndexWriter instance - */ - MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos, - IndexWriter writer) - throws CorruptIndexException, IOException - { - throw new RuntimeException("not implemented"); + final protected IndexWriter writer; + + public MergePolicy(IndexWriter writer) { + this.writer = writer; } + /** + * Determine what set of merge operations are now necessary on the index. + * {@link IndexWriter} calls this whenever there is a change to the segments. + * This call is always synchronized on the {@link IndexWriter} instance so + * only one thread at a time will call this method. + * + * @param segmentInfos + * the total set of segments in the index + */ + public abstract MergeSpecification findMerges(SegmentInfos segmentInfos) + throws CorruptIndexException, IOException; + + /** + * Determine what set of merge operations is necessary in order to optimize + * the index. {@link IndexWriter} calls this when its + * {@link IndexWriter#optimize()} method is called. This call is always + * synchronized on the {@link IndexWriter} instance so only one thread at a + * time will call this method. + * + * @param segmentInfos + * the total set of segments in the index + * @param maxSegmentCount + * requested maximum number of segments in the index (currently this + * is always 1) + * @param segmentsToOptimize + * contains the specific SegmentInfo instances that must be merged + * away. This may be a subset of all SegmentInfos. + */ + public abstract MergeSpecification findMergesForOptimize( + SegmentInfos segmentInfos, int maxSegmentCount, Set segmentsToOptimize) + throws CorruptIndexException, IOException; + + /** + * Determine what set of merge operations is necessary in order to expunge all + * deletes from the index. + * + * @param segmentInfos + * the total set of segments in the index + */ + public abstract MergeSpecification findMergesToExpungeDeletes( + SegmentInfos segmentInfos) throws CorruptIndexException, IOException; + /** * Release all resources for the policy. */ - abstract void close(); + public abstract void close(); /** * Returns true if a newly flushed (not from merge) * segment should use the compound file format. */ - abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); /** * Returns true if the doc store files should use the * compound file format. */ - abstract boolean useCompoundDocStore(SegmentInfos segments); + public abstract boolean useCompoundDocStore(SegmentInfos segments); + } diff --git a/src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java b/src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java index a9354a1db40..adb4e26870a 100755 --- a/src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java +++ b/src/test/org/apache/lucene/index/TestAddIndexesNoOptimize.java @@ -426,7 +426,7 @@ public class TestAddIndexesNoOptimize extends LuceneTestCase { private IndexWriter newWriter(Directory dir, boolean create) throws IOException { final IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), create); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); return writer; } @@ -500,7 +500,7 @@ public class TestAddIndexesNoOptimize extends LuceneTestCase { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); - writer.setMergePolicy(new LogByteSizeMergePolicy()); + writer.setMergePolicy(new LogByteSizeMergePolicy(writer)); writer.setMaxBufferedDocs(5); writer.setUseCompoundFile(false); writer.setMergeFactor(100); @@ -526,7 +526,7 @@ public class TestAddIndexesNoOptimize extends LuceneTestCase { Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); - LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); + LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.setMinMergeMB(0.0001); writer.setMergePolicy(lmp); writer.setMergeFactor(4); diff --git a/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java b/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java index 4a59c239fef..2d07c3cc34b 100644 --- a/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java +++ b/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java @@ -104,7 +104,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler(); writer.setMergeScheduler(cms); - LogDocMergePolicy mp = new LogDocMergePolicy(); + LogDocMergePolicy mp = new LogDocMergePolicy(writer); writer.setMergePolicy(mp); // Force degenerate merging so we can get a mix of diff --git a/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/src/test/org/apache/lucene/index/TestIndexReaderReopen.java index 204492a75b5..3a22f1b1b59 100644 --- a/src/test/org/apache/lucene/index/TestIndexReaderReopen.java +++ b/src/test/org/apache/lucene/index/TestIndexReaderReopen.java @@ -930,7 +930,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { IndexWriter.unlock(dir); IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); - w.setMergePolicy(new LogDocMergePolicy()); + w.setMergePolicy(new LogDocMergePolicy(w)); for (int i = 0; i < 100; i++) { w.addDocument(createDocument(i, 4)); diff --git a/src/test/org/apache/lucene/index/TestIndexWriter.java b/src/test/org/apache/lucene/index/TestIndexWriter.java index f6162e89110..46156f04c9f 100644 --- a/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -629,7 +629,7 @@ public class TestIndexWriter extends LuceneTestCase for(int numDocs=38;numDocs<500;numDocs += 38) { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); - LogDocMergePolicy ldmp = new LogDocMergePolicy(); + LogDocMergePolicy ldmp = new LogDocMergePolicy(writer); ldmp.setMinMergeDocs(1); writer.setMergePolicy(ldmp); writer.setMergeFactor(5); @@ -666,7 +666,7 @@ public class TestIndexWriter extends LuceneTestCase doc.add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); - LogDocMergePolicy ldmp = new LogDocMergePolicy(); + LogDocMergePolicy ldmp = new LogDocMergePolicy(writer); ldmp.setMinMergeDocs(1); writer.setMergePolicy(ldmp); writer.setMergeFactor(4); @@ -2813,7 +2813,7 @@ public class TestIndexWriter extends LuceneTestCase writer.setMaxBufferedDocs(2); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.setMergeScheduler(new SerialMergeScheduler()); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); Document document = new Document(); @@ -2846,7 +2846,7 @@ public class TestIndexWriter extends LuceneTestCase writer.setMaxBufferedDocs(2); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.setMergeScheduler(new SerialMergeScheduler()); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); Directory[] indexDirs = {new MockRAMDirectory(dir)}; writer.addIndexes(indexDirs); @@ -2865,7 +2865,7 @@ public class TestIndexWriter extends LuceneTestCase writer.setMaxBufferedDocs(2); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.setMergeScheduler(new SerialMergeScheduler()); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); Document document = new Document(); @@ -2903,7 +2903,7 @@ public class TestIndexWriter extends LuceneTestCase writer.setMaxBufferedDocs(2); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.setMergeScheduler(new SerialMergeScheduler()); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); Document document = new Document(); @@ -2925,7 +2925,7 @@ public class TestIndexWriter extends LuceneTestCase writer.setMaxBufferedDocs(2); writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH); writer.setMergeScheduler(new SerialMergeScheduler()); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); for(int i=0;i<6;i++) writer.addDocument(document); diff --git a/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java b/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java index 10f92fcd69f..fc229982dec 100755 --- a/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java +++ b/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java @@ -37,7 +37,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.setMaxBufferedDocs(10); writer.setMergeFactor(10); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); for (int i = 0; i < 100; i++) { addDoc(writer); @@ -54,7 +54,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.setMaxBufferedDocs(10); writer.setMergeFactor(10); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); boolean noOverMerge = false; for (int i = 0; i < 100; i++) { @@ -76,7 +76,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.setMaxBufferedDocs(10); writer.setMergeFactor(10); - LogDocMergePolicy mp = new LogDocMergePolicy(); + LogDocMergePolicy mp = new LogDocMergePolicy(writer); mp.setMinMergeDocs(100); writer.setMergePolicy(mp); @@ -102,7 +102,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.setMaxBufferedDocs(10); writer.setMergeFactor(100); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); for (int i = 0; i < 250; i++) { addDoc(writer); @@ -128,7 +128,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true); writer.setMaxBufferedDocs(101); writer.setMergeFactor(101); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); // leftmost* segment has 1 doc // rightmost* segment has 100 docs @@ -142,7 +142,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false); writer.setMaxBufferedDocs(101); writer.setMergeFactor(101); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); } writer.setMaxBufferedDocs(10); @@ -168,7 +168,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); writer.setMaxBufferedDocs(10); writer.setMergeFactor(100); @@ -183,7 +183,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase { reader.close(); writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false); - writer.setMergePolicy(new LogDocMergePolicy()); + writer.setMergePolicy(new LogDocMergePolicy(writer)); writer.setMaxBufferedDocs(10); writer.setMergeFactor(5); diff --git a/src/test/org/apache/lucene/index/TestIndexWriterReader.java b/src/test/org/apache/lucene/index/TestIndexWriterReader.java index 05364f28ab9..e5d31d7625a 100644 --- a/src/test/org/apache/lucene/index/TestIndexWriterReader.java +++ b/src/test/org/apache/lucene/index/TestIndexWriterReader.java @@ -539,7 +539,7 @@ public class TestIndexWriterReader extends LuceneTestCase { boolean multiSegment) throws IOException { IndexWriter w = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); - w.setMergePolicy(new LogDocMergePolicy()); + w.setMergePolicy(new LogDocMergePolicy(w)); for (int i = 0; i < 100; i++) { w.addDocument(createDocument(i, indexName, 4)); if (multiSegment && (i % 10) == 0) {