diff --git a/core/src/main/java/org/elasticsearch/index/shard/MergePolicyConfig.java b/core/src/main/java/org/elasticsearch/index/shard/MergePolicyConfig.java index cf4a4c85d10..3895bbed2c4 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/MergePolicyConfig.java +++ b/core/src/main/java/org/elasticsearch/index/shard/MergePolicyConfig.java @@ -28,6 +28,92 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.index.settings.IndexSettingsService; +/** + * A shard in elasticsearch is a Lucene index, and a Lucene index is broken + * down into segments. Segments are internal storage elements in the index + * where the index data is stored, and are immutable up to delete markers. + * Segments are, periodically, merged into larger segments to keep the + * index size at bay and expunge deletes. + * + *

+ * Merges select segments of approximately equal size, subject to an allowed + * number of segments per tier. The merge policy is able to merge + * non-adjacent segments, and separates how many segments are merged at once from how many + * segments are allowed per tier. It also does not over-merge (i.e., cascade merges). + * + *

+ * All merge policy settings are dynamic and can be updated on a live index. + * The merge policy has the following settings: + * + *

+ * + *

+ * For normal merging, the policy first computes a "budget" of how many + * segments are allowed to be in the index. If the index is over-budget, + * then the policy sorts segments by decreasing size (proportionally considering percent + * deletes), and then finds the least-cost merge. Merge cost is measured by + * a combination of the "skew" of the merge (size of largest seg divided by + * smallest seg), total merge size and pct deletes reclaimed, so that + * merges with lower skew, smaller size and those reclaiming more deletes, + * are favored. + * + *

+ * If a merge will produce a segment that's larger than + * max_merged_segment then the policy will merge fewer segments (down to + * 1 at once, if that one has deletions) to keep the segment size under + * budget. + * + *

+ * Note, this can mean that for large shards that holds many gigabytes of + * data, the default of max_merged_segment (5gb) can cause for many + * segments to be in an index, and causing searches to be slower. Use the + * indices segments API to see the segments that an index has, and + * possibly either increase the max_merged_segment or issue an optimize + * call for the index (try and aim to issue it on a low traffic time). + */ + public final class MergePolicyConfig implements IndexSettingsService.Listener{ private final TieredMergePolicy mergePolicy = new TieredMergePolicy(); private final ESLogger logger; @@ -187,4 +273,4 @@ public final class MergePolicyConfig implements IndexSettingsService.Listener{ return Double.toString(ratio); } } -} \ No newline at end of file +} diff --git a/core/src/main/java/org/elasticsearch/index/shard/MergeSchedulerConfig.java b/core/src/main/java/org/elasticsearch/index/shard/MergeSchedulerConfig.java index 9c8aba25ee3..f061a95f2af 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/MergeSchedulerConfig.java +++ b/core/src/main/java/org/elasticsearch/index/shard/MergeSchedulerConfig.java @@ -24,7 +24,30 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.EsExecutors; /** - * + * The merge scheduler (ConcurrentMergeScheduler) controls the execution of + * merge operations once they are needed (according to the merge policy). Merges + * run in separate threads, and when the maximum number of threads is reached, + * further merges will wait until a merge thread becomes available. + * + *

The merge scheduler supports the following dynamic settings: + * + *

*/ public final class MergeSchedulerConfig {