LUCENE-9206: Improve IndexMergeTool defaults and options

IndexMergeTool previously had no options and always forceMerge(1)
the resulting index. This can result in wasted work and confusing
performance (unbalancing the index).

Instead the default is to not do anything, except merges from the
merge policy.
This commit is contained in:
Robert Muir 2020-02-05 16:31:07 -05:00
parent 80ed8c281b
commit 93b83f635d
No known key found for this signature in database
GPG Key ID: 817AE1DD322D7ECA
4 changed files with 150 additions and 14 deletions

View File

@ -68,6 +68,10 @@ Improvements
* LUCENE-9110: Refactor stack analysis in tests to use generalized LuceneTestCase
methods that use StackWalker (Uwe Schindler)
* LUCENE-9206: IndexMergeTool gets additional options to control the merging.
This tool no longer forceMerge(1)s to a single segment by default. If you
rely upon this behavior, pass -max-segments 1 instead. (Robert Muir)
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -1,5 +1,11 @@
# Apache Lucene Migration Guide
## o.a.l.misc.IndexMergeTool defaults changes (LUCENE-9206) ##
This command-line tool no longer forceMerges to a single segment. Instead, by
default it just follows (configurable) merge policy. If you really want to merge
to a single segment, you can pass -max-segments 1.
## o.a.l.util.fst.Builder is renamed FSTCompiler with fluent-style Builder (LUCENE-9089) ##
Simply use FSTCompiler instead of the previous Builder. Use either the simple constructor with default settings, or

View File

@ -19,12 +19,12 @@ package org.apache.lucene.misc;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.HardlinkCopyDirectoryWrapper;
import org.apache.lucene.util.SuppressForbidden;
import java.io.IOException;
import java.nio.file.Paths;
/**
@ -34,28 +34,89 @@ import java.nio.file.Paths;
@SuppressForbidden(reason = "System.out required: command line tool")
public class IndexMergeTool {
public static void main(String[] args) throws IOException {
if (args.length < 3) {
System.err.println("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
System.exit(1);
static final String USAGE =
"Usage: IndexMergeTool [OPTION...] <mergedIndex> <index1> <index2> [index3] ...\n" +
"Merges source indexes 'index1' .. 'indexN' into 'mergedIndex'\n" +
"\n" +
"OPTIONS:\n" +
" -merge-policy ClassName specifies MergePolicy class (must be in CLASSPATH).The default is\n" +
" 'org.apache.lucene.index.TieredMergePolicy.TieredMergePolicy'\n" +
" -max-segments N force-merge's the index to a maximum of N segments. Default is\n" +
" to execute only the merges according to the merge policy.\n" +
" -verbose print additional details.\n";
static class Options {
String mergedIndexPath;
String indexPaths[];
IndexWriterConfig config = new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE);
int maxSegments = 0;
static Options parse(String args[]) throws ReflectiveOperationException {
Options options = new Options();
int index = 0;
while (index < args.length) {
if (!args[index].startsWith("-")) {
break;
}
if (args[index] == "--") {
break;
}
switch(args[index]) {
case "-merge-policy":
String clazzName = args[++index];
Class<? extends MergePolicy> clazz = Class.forName(clazzName).asSubclass(MergePolicy.class);
options.config.setMergePolicy(clazz.getConstructor().newInstance());
break;
case "-max-segments":
options.maxSegments = Integer.parseInt(args[++index]);
break;
case "-verbose":
options.config.setInfoStream(System.err);
break;
default: throw new IllegalArgumentException("unrecognized option: '" + args[index] + "'\n" + USAGE);
}
index++;
}
// process any remaining arguments as the target and source index paths.
int numPaths = args.length - index;
if (numPaths < 3) {
throw new IllegalArgumentException("not enough parameters.\n" + USAGE);
}
options.mergedIndexPath = args[index];
options.indexPaths = new String[numPaths - 1];
System.arraycopy(args, index + 1, options.indexPaths, 0, options.indexPaths.length);
return options;
}
}
public static void main(String[] args) throws Exception {
Options options = null;
try {
options = Options.parse(args);
} catch (IllegalArgumentException e) {
System.err.println(e.getMessage());
System.exit(2);
}
// Try to use hardlinks to source segments, if possible.
Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[0])));
Directory mergedIndex = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(options.mergedIndexPath)));
IndexWriter writer = new IndexWriter(mergedIndex,
new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE));
Directory[] indexes = new Directory[args.length - 1];
for (int i = 1; i < args.length; i++) {
indexes[i - 1] = FSDirectory.open(Paths.get(args[i]));
Directory[] indexes = new Directory[options.indexPaths.length];
for (int i = 0; i < indexes.length; i++) {
indexes[i] = FSDirectory.open(Paths.get(options.indexPaths[i]));
}
IndexWriter writer = new IndexWriter(mergedIndex, options.config);
System.out.println("Merging...");
writer.addIndexes(indexes);
System.out.println("Full merge...");
writer.forceMerge(1);
if (options.maxSegments > 0) {
System.out.println("Force-merging to " + options.maxSegments + "...");
writer.forceMerge(options.maxSegments);
}
writer.close();
System.out.println("Done.");
}

View File

@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.misc;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.misc.IndexMergeTool.Options;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.PrintStreamInfoStream;
public class TestIndexMergeTool extends LuceneTestCase {
public void testNoParameters() throws Exception {
expectThrows(IllegalArgumentException.class, () -> {
Options.parse(new String[] {});
});
}
public void testOneParameter() throws Exception {
expectThrows(IllegalArgumentException.class, () -> {
Options.parse(new String[] { "target" });
});
}
public void testTwoParameters() throws Exception {
expectThrows(IllegalArgumentException.class, () -> {
Options.parse(new String[] { "target", "source1" });
});
}
public void testThreeParameters() throws Exception {
Options options = Options.parse(new String[] { "target", "source1", "source2" });
assertEquals("target", options.mergedIndexPath);
assertArrayEquals(new String[] { "source1", "source2" }, options.indexPaths);
}
public void testVerboseOption() throws Exception {
Options options = Options.parse(new String[] { "-verbose", "target", "source1", "source2" });
assertEquals(PrintStreamInfoStream.class, options.config.getInfoStream().getClass());
}
public void testMergePolicyOption() throws Exception {
Options options = Options.parse(new String[] { "-merge-policy", LogDocMergePolicy.class.getName(), "target", "source1", "source2" });
assertEquals(LogDocMergePolicy.class, options.config.getMergePolicy().getClass());
}
public void testMaxSegmentsOption() throws Exception {
Options options = Options.parse(new String[] { "-max-segments", "42", "target", "source1", "source2" });
assertEquals(42, options.maxSegments);
}
}