diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2a9553bd9ae..83380041f13 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -280,6 +280,10 @@ API Changes * LUCENE-2953: In addition to changes in 3.x, PriorityQueue#initialize(int) function was moved into the ctor. (Uwe Schindler, Yonik Seeley) +* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public + (though @lucene.experimental), allowing for custom MergeScheduler + implementations. (Shai Erera) + New features * LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 3f7faf62c20..cb4a0bfa8b4 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -30,8 +31,7 @@ import org.apache.lucene.index.DocumentsWriterPerThread.DocState; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer; -import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.store.Directory; +import org.apache.lucene.util.ArrayUtil; /** @@ -262,7 +262,7 @@ final class DocFieldProcessor extends DocConsumer { // sort the subset of fields that have vectors // enabled; we could save [small amount of] CPU // here. - quickSort(fields, 0, fieldCount-1); + ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); for(int i=0;i fieldsComp = new Comparator() { + public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) { + return o1.fieldInfo.name.compareTo(o2.fieldInfo.name); + } + }; + @Override void finishDocument() throws IOException { try { @@ -282,66 +288,6 @@ final class DocFieldProcessor extends DocConsumer { } } - void quickSort(DocFieldProcessorPerField[] array, int lo, int hi) { - if (lo >= hi) - return; - else if (hi == 1+lo) { - if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { - final DocFieldProcessorPerField tmp = array[lo]; - array[lo] = array[hi]; - array[hi] = tmp; - } - return; - } - - int mid = (lo + hi) >>> 1; - - if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp = array[lo]; - array[lo] = array[mid]; - array[mid] = tmp; - } - - if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp = array[mid]; - array[mid] = array[hi]; - array[hi] = tmp; - - if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { - DocFieldProcessorPerField tmp2 = array[lo]; - array[lo] = array[mid]; - array[mid] = tmp2; - } - } - - int left = lo + 1; - int right = hi - 1; - - if (left >= right) - return; - - DocFieldProcessorPerField partition = array[mid]; - - for (; ;) { - while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0) - --right; - - while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0) - ++left; - - if (left < right) { - DocFieldProcessorPerField tmp = array[left]; - array[left] = array[right]; - array[right] = tmp; - --right; - } else { - break; - } - } - - quickSort(array, lo, left); - quickSort(array, left + 1, hi); - } final private Map docValues = new HashMap(); final private Map perDocConsumers = new HashMap(); diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 166a6d594dd..826049c997f 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -1813,10 +1813,13 @@ public class IndexWriter implements Closeable { return mergingSegments; } - /** Expert: the {@link MergeScheduler} calls this method - * to retrieve the next merge requested by the - * MergePolicy */ - synchronized MergePolicy.OneMerge getNextMerge() { + /** + * Expert: the {@link MergeScheduler} calls this method to retrieve the next + * merge requested by the MergePolicy + * + * @lucene.experimental + */ + public synchronized MergePolicy.OneMerge getNextMerge() { if (pendingMerges.size() == 0) return null; else { @@ -2936,9 +2939,10 @@ public class IndexWriter implements Closeable { /** * Merges the indicated segments, replacing them in the stack with a * single segment. + * + * @lucene.experimental */ - - final void merge(MergePolicy.OneMerge merge) + public final void merge(MergePolicy.OneMerge merge) throws CorruptIndexException, IOException { boolean success = false; diff --git a/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java b/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java index 6d61cc0a15b..e477ae97b67 100644 --- a/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java +++ b/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java @@ -19,17 +19,21 @@ package org.apache.lucene; import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.MergePolicy.OneMerge; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; - /** * Holds tests cases to verify external APIs are accessible * while not being in org.apache.lucene.index package. @@ -106,4 +110,40 @@ public class TestMergeSchedulerExternal extends LuceneTestCase { assertTrue(excCalled); dir.close(); } + + private static class ReportingMergeScheduler extends MergeScheduler { + + @Override + public void merge(IndexWriter writer) throws CorruptIndexException, IOException { + OneMerge merge = null; + while ((merge = writer.getNextMerge()) != null) { + if (VERBOSE) { + System.out.println("executing merge " + merge.segString(writer.getDirectory())); + } + writer.merge(merge); + } + } + + @Override + public void close() throws CorruptIndexException, IOException {} + + } + + public void testCustomMergeScheduler() throws Exception { + // we don't really need to execute anything, just to make sure the custom MS + // compiles. But ensure that it can be used as well, e.g., no other hidden + // dependencies or something. Therefore, don't use any random API ! + Directory dir = new RAMDirectory(); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); + conf.setMergeScheduler(new ReportingMergeScheduler()); + IndexWriter writer = new IndexWriter(dir, conf); + writer.addDocument(new Document()); + writer.commit(); // trigger flush + writer.addDocument(new Document()); + writer.commit(); // trigger flush + writer.optimize(); + writer.close(); + dir.close(); + } + } diff --git a/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java b/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java index 442788a389c..e65732a3693 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java +++ b/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java @@ -257,27 +257,6 @@ public abstract class CollationTestBase extends LuceneTestCase { } assertEquals(expectedResult, buff.toString()); } - - private String randomString() { - // ideally we could do this! - // return _TestUtil.randomUnicodeString(random); - // - // http://bugs.icu-project.org/trac/ticket/8060 - // http://bugs.icu-project.org/trac/ticket/7732 - // ... - // - // as a workaround, just test the BMP for now (and avoid 0xFFFF etc) - int length = _TestUtil.nextInt(random, 0, 10); - char chars[] = new char[length]; - for (int i = 0; i < length; i++) { - if (random.nextBoolean()) { - chars[i] = (char) _TestUtil.nextInt(random, 0, 0xD7FF); - } else { - chars[i] = (char) _TestUtil.nextInt(random, 0xE000, 0xFFFD); - } - } - return new String(chars, 0, length); - } public void assertThreadSafe(final Analyzer analyzer) throws Exception { int numTestPoints = 100; @@ -289,7 +268,7 @@ public abstract class CollationTestBase extends LuceneTestCase { // and ensure they are the same as the ones we produced in serial fashion. for (int i = 0; i < numTestPoints; i++) { - String term = randomString(); + String term = _TestUtil.randomSimpleString(random); TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term)); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); diff --git a/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java index ba348d9fe9f..bff441c148d 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyAnalyzer.java @@ -87,8 +87,7 @@ public class TestCollationKeyAnalyzer extends CollationTestBase { public void testThreadSafe() throws Exception { int iters = 20 * RANDOM_MULTIPLIER; for (int i = 0; i < iters; i++) { - Locale locale = randomLocale(random); - Collator collator = Collator.getInstance(locale); + Collator collator = Collator.getInstance(Locale.GERMAN); collator.setStrength(Collator.PRIMARY); assertThreadSafe(new CollationKeyAnalyzer(TEST_VERSION_CURRENT, collator)); } diff --git a/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java b/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java index ad481ee74cb..dc6a3e80300 100644 --- a/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java +++ b/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.collation; import com.ibm.icu.text.Collator; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.util.BytesRef; @@ -88,7 +89,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase { public void testThreadSafe() throws Exception { int iters = 20 * RANDOM_MULTIPLIER; for (int i = 0; i < iters; i++) { - Locale locale = randomLocale(random); + Locale locale = Locale.GERMAN; Collator collator = Collator.getInstance(locale); collator.setStrength(Collator.IDENTICAL); assertThreadSafe(new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));