diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0d6b772a9a5..269e3dda81d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -68,6 +68,10 @@ Improvements * LUCENE-9110: Refactor stack analysis in tests to use generalized LuceneTestCase methods that use StackWalker (Uwe Schindler) +* LUCENE-9206: IndexMergeTool gets additional options to control the merging. + This tool no longer forceMerge(1)s to a single segment by default. If you + rely upon this behavior, pass -max-segments 1 instead. (Robert Muir) + Bug fixes * LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index b7c17e2df1d..c019260e57c 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -1,5 +1,11 @@ # Apache Lucene Migration Guide +## o.a.l.misc.IndexMergeTool defaults changes (LUCENE-9206) ## + +This command-line tool no longer forceMerges to a single segment. Instead, by +default it just follows (configurable) merge policy. If you really want to merge +to a single segment, you can pass -max-segments 1. + ## o.a.l.util.fst.Builder is renamed FSTCompiler with fluent-style Builder (LUCENE-9089) ## Simply use FSTCompiler instead of the previous Builder. Use either the simple constructor with default settings, or diff --git a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java index 09eb850a3f7..1342a553d89 100644 --- a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java +++ b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java @@ -19,12 +19,12 @@ package org.apache.lucene.misc; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.HardlinkCopyDirectoryWrapper; import org.apache.lucene.util.SuppressForbidden; -import java.io.IOException; import java.nio.file.Paths; /** @@ -34,28 +34,89 @@ import java.nio.file.Paths; @SuppressForbidden(reason = "System.out required: command line tool") public class IndexMergeTool { - public static void main(String[] args) throws IOException { - if (args.length < 3) { - System.err.println("Usage: IndexMergeTool [index3] ..."); - System.exit(1); + static final String USAGE = + "Usage: IndexMergeTool [OPTION...] [index3] ...\n" + + "Merges source indexes 'index1' .. 'indexN' into 'mergedIndex'\n" + + "\n" + + "OPTIONS:\n" + + " -merge-policy ClassName specifies MergePolicy class (must be in CLASSPATH).The default is\n" + + " 'org.apache.lucene.index.TieredMergePolicy.TieredMergePolicy'\n" + + " -max-segments N force-merge's the index to a maximum of N segments. Default is\n" + + " to execute only the merges according to the merge policy.\n" + + " -verbose print additional details.\n"; + + static class Options { + String mergedIndexPath; + String indexPaths[]; + IndexWriterConfig config = new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE); + int maxSegments = 0; + + static Options parse(String args[]) throws ReflectiveOperationException { + Options options = new Options(); + int index = 0; + while (index < args.length) { + if (!args[index].startsWith("-")) { + break; + } + if (args[index] == "--") { + break; + } + switch(args[index]) { + case "-merge-policy": + String clazzName = args[++index]; + Class clazz = Class.forName(clazzName).asSubclass(MergePolicy.class); + options.config.setMergePolicy(clazz.getConstructor().newInstance()); + break; + case "-max-segments": + options.maxSegments = Integer.parseInt(args[++index]); + break; + case "-verbose": + options.config.setInfoStream(System.err); + break; + default: throw new IllegalArgumentException("unrecognized option: '" + args[index] + "'\n" + USAGE); + } + index++; + } + + // process any remaining arguments as the target and source index paths. + int numPaths = args.length - index; + if (numPaths < 3) { + throw new IllegalArgumentException("not enough parameters.\n" + USAGE); + } + + options.mergedIndexPath = args[index]; + options.indexPaths = new String[numPaths - 1]; + System.arraycopy(args, index + 1, options.indexPaths, 0, options.indexPaths.length); + return options; + } + } + + public static void main(String[] args) throws Exception { + Options options = null; + try { + options = Options.parse(args); + } catch (IllegalArgumentException e) { + System.err.println(e.getMessage()); + System.exit(2); } // Try to use hardlinks to source segments, if possible. - Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[0]))); + Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(options.mergedIndexPath))); - IndexWriter writer = new IndexWriter(mergedIndex, - new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE)); - - Directory[] indexes = new Directory[args.length - 1]; - for (int i = 1; i < args.length; i++) { - indexes[i - 1] = FSDirectory.open(Paths.get(args[i])); + Directory[] indexes = new Directory[options.indexPaths.length]; + for (int i = 0; i < indexes.length; i++) { + indexes[i] = FSDirectory.open(Paths.get(options.indexPaths[i])); } + IndexWriter writer = new IndexWriter(mergedIndex, options.config); + System.out.println("Merging..."); writer.addIndexes(indexes); - System.out.println("Full merge..."); - writer.forceMerge(1); + if (options.maxSegments > 0) { + System.out.println("Force-merging to " + options.maxSegments + "..."); + writer.forceMerge(options.maxSegments); + } writer.close(); System.out.println("Done."); } diff --git a/lucene/misc/src/test/org/apache/lucene/misc/TestIndexMergeTool.java b/lucene/misc/src/test/org/apache/lucene/misc/TestIndexMergeTool.java new file mode 100644 index 00000000000..b9183ea75ab --- /dev/null +++ b/lucene/misc/src/test/org/apache/lucene/misc/TestIndexMergeTool.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.misc; + +import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.misc.IndexMergeTool.Options; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.PrintStreamInfoStream; + +public class TestIndexMergeTool extends LuceneTestCase { + + public void testNoParameters() throws Exception { + expectThrows(IllegalArgumentException.class, () -> { + Options.parse(new String[] {}); + }); + } + + public void testOneParameter() throws Exception { + expectThrows(IllegalArgumentException.class, () -> { + Options.parse(new String[] { "target" }); + }); + } + + public void testTwoParameters() throws Exception { + expectThrows(IllegalArgumentException.class, () -> { + Options.parse(new String[] { "target", "source1" }); + }); + } + + public void testThreeParameters() throws Exception { + Options options = Options.parse(new String[] { "target", "source1", "source2" }); + assertEquals("target", options.mergedIndexPath); + assertArrayEquals(new String[] { "source1", "source2" }, options.indexPaths); + } + + public void testVerboseOption() throws Exception { + Options options = Options.parse(new String[] { "-verbose", "target", "source1", "source2" }); + assertEquals(PrintStreamInfoStream.class, options.config.getInfoStream().getClass()); + } + + public void testMergePolicyOption() throws Exception { + Options options = Options.parse(new String[] { "-merge-policy", LogDocMergePolicy.class.getName(), "target", "source1", "source2" }); + assertEquals(LogDocMergePolicy.class, options.config.getMergePolicy().getClass()); + } + + public void testMaxSegmentsOption() throws Exception { + Options options = Options.parse(new String[] { "-max-segments", "42", "target", "source1", "source2" }); + assertEquals(42, options.maxSegments); + } + +}