From 74acda4b8c124c0c57e4d9dbf0787faa1f3e578a Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 11 Sep 2024 16:30:00 +0200 Subject: [PATCH] Add factory methods for IndexWriterConfig. This adds factory methods for `IndexWriterConfig` that are optimized for a few different use-cases. Configuring an `IndexWriterConfig` is quite an expert task, the goal is to give sensible defaults for some common use-cases. Furthermore, javadocs explain how these configs differ from defaults, which would help users know what to tune if they want to further tune their configs. --- .../lucene/misc/index/IndexWriterConfigs.java | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 lucene/misc/src/java/org/apache/lucene/misc/index/IndexWriterConfigs.java diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/IndexWriterConfigs.java b/lucene/misc/src/java/org/apache/lucene/misc/index/IndexWriterConfigs.java new file mode 100644 index 00000000000..9b411db0799 --- /dev/null +++ b/lucene/misc/src/java/org/apache/lucene/misc/index/IndexWriterConfigs.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.misc.index; + +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LogByteSizeMergePolicy; +import org.apache.lucene.index.SimpleMergedSegmentWarmer; +import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.util.InfoStream; + +/** Factory methods for {@link IndexWriterConfig}s with sensible defaults for various use-cases. */ +public class IndexWriterConfigs { + + // Prevent instantiation + private IndexWriterConfigs() {} + + /** + * Create a new {@link IndexWriterConfig} for time-based data. It internally configures a {@link + * LogByteSizeMergePolicy}, whose policy of only merging adjacent segments helps keep the time + * range overlap between segments very low. This in-turn makes filtering on timestamp ranges + * faster. + */ + public static IndexWriterConfig forTimeBasedData() { + IndexWriterConfig config = new IndexWriterConfig(); + config.setMergePolicy(new LogByteSizeMergePolicy()); + return config; + } + + /** + * Create a new {@link IndexWriterConfig} that optimized for fast near-realtime search. Internally + * it: + * + * + */ + public static IndexWriterConfig forFastNRTSearch() { + IndexWriterConfig config = new IndexWriterConfig(); + config.setMergedSegmentWarmer(new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT)); + TieredMergePolicy tmp = new TieredMergePolicy(); + tmp.setSegmentsPerTier(8); + tmp.setMaxMergeAtOnce(8); + tmp.setFloorSegmentMB(20); // 10x the default of 2MB + BPIndexReorderer reorderer = new BPIndexReorderer(); + BPReorderingMergePolicy bpmp = new BPReorderingMergePolicy(tmp, reorderer); + bpmp.setMinNaturalMergeNumDocs(100_000); + bpmp.setMinNaturalMergeRatioFromBiggestSegment(0.5f); + config.setMergePolicy(bpmp); + return config; + } + + /** + * Create a new {@link IndexWriterConfig} that optimized for fast search on read-only indexes. It + * expects the index to be created and then {@link IndexWriter#forceMerge(int) force-merged} + * before serving searches. Internally it: + * + * + */ + public static IndexWriterConfig forFastReadOnlySearch() { + IndexWriterConfig config = new IndexWriterConfig(); + config.setMaxFullFlushMergeWaitMillis(0L); + TieredMergePolicy tmp = new TieredMergePolicy(); + tmp.setSegmentsPerTier(32); + tmp.setMaxMergeAtOnce(32); + BPIndexReorderer reorderer = new BPIndexReorderer(); + BPReorderingMergePolicy bpmp = new BPReorderingMergePolicy(tmp, reorderer); + bpmp.setMinNaturalMergeNumDocs(Integer.MAX_VALUE); + config.setMergePolicy(bpmp); + return config; + } +}