diff --git a/core/src/main/java/org/elasticsearch/index/shard/MergePolicyConfig.java b/core/src/main/java/org/elasticsearch/index/shard/MergePolicyConfig.java index cf4a4c85d10..3895bbed2c4 100644 --- a/core/src/main/java/org/elasticsearch/index/shard/MergePolicyConfig.java +++ b/core/src/main/java/org/elasticsearch/index/shard/MergePolicyConfig.java @@ -28,6 +28,92 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.index.settings.IndexSettingsService; +/** + * A shard in elasticsearch is a Lucene index, and a Lucene index is broken + * down into segments. Segments are internal storage elements in the index + * where the index data is stored, and are immutable up to delete markers. + * Segments are, periodically, merged into larger segments to keep the + * index size at bay and expunge deletes. + * + *
+ * Merges select segments of approximately equal size, subject to an allowed + * number of segments per tier. The merge policy is able to merge + * non-adjacent segments, and separates how many segments are merged at once from how many + * segments are allowed per tier. It also does not over-merge (i.e., cascade merges). + * + *
+ * All merge policy settings are dynamic and can be updated on a live index. + * The merge policy has the following settings: + * + *
index.merge.policy.expunge_deletes_allowed
:
+ *
+ * When expungeDeletes is called, we only merge away a segment if its delete
+ * percentage is over this threshold. Default is 10
.
+ *
+ * index.merge.policy.floor_segment
:
+ *
+ * Segments smaller than this are "rounded up" to this size, i.e. treated as
+ * equal (floor) size for merge selection. This is to prevent frequent
+ * flushing of tiny segments, thus preventing a long tail in the index. Default
+ * is 2mb
.
+ *
+ * index.merge.policy.max_merge_at_once
:
+ *
+ * Maximum number of segments to be merged at a time during "normal" merging.
+ * Default is 10
.
+ *
+ * index.merge.policy.max_merge_at_once_explicit
:
+ *
+ * Maximum number of segments to be merged at a time, during optimize or
+ * expungeDeletes. Default is 30
.
+ *
+ * index.merge.policy.max_merged_segment
:
+ *
+ * Maximum sized segment to produce during normal merging (not explicit
+ * optimize). This setting is approximate: the estimate of the merged segment
+ * size is made by summing sizes of to-be-merged segments (compensating for
+ * percent deleted docs). Default is 5gb
.
+ *
+ * index.merge.policy.segments_per_tier
:
+ *
+ * Sets the allowed number of segments per tier. Smaller values mean more
+ * merging but fewer segments. Default is 10
. Note, this value needs to be
+ * >= than the max_merge_at_once
otherwise you'll force too many merges to
+ * occur.
+ *
+ * index.merge.policy.reclaim_deletes_weight
:
+ *
+ * Controls how aggressively merges that reclaim more deletions are favored.
+ * Higher values favor selecting merges that reclaim deletions. A value of
+ * 0.0
means deletions don't impact merge selection. Defaults to 2.0
.
+ * + * For normal merging, the policy first computes a "budget" of how many + * segments are allowed to be in the index. If the index is over-budget, + * then the policy sorts segments by decreasing size (proportionally considering percent + * deletes), and then finds the least-cost merge. Merge cost is measured by + * a combination of the "skew" of the merge (size of largest seg divided by + * smallest seg), total merge size and pct deletes reclaimed, so that + * merges with lower skew, smaller size and those reclaiming more deletes, + * are favored. + * + *
+ * If a merge will produce a segment that's larger than
+ * max_merged_segment
then the policy will merge fewer segments (down to
+ * 1 at once, if that one has deletions) to keep the segment size under
+ * budget.
+ *
+ *
+ * Note, this can mean that for large shards that holds many gigabytes of
+ * data, the default of max_merged_segment
(5gb
) can cause for many
+ * segments to be in an index, and causing searches to be slower. Use the
+ * indices segments API to see the segments that an index has, and
+ * possibly either increase the max_merged_segment
or issue an optimize
+ * call for the index (try and aim to issue it on a low traffic time).
+ */
+
public final class MergePolicyConfig implements IndexSettingsService.Listener{
private final TieredMergePolicy mergePolicy = new TieredMergePolicy();
private final ESLogger logger;
@@ -187,4 +273,4 @@ public final class MergePolicyConfig implements IndexSettingsService.Listener{
return Double.toString(ratio);
}
}
-}
\ No newline at end of file
+}
diff --git a/core/src/main/java/org/elasticsearch/index/shard/MergeSchedulerConfig.java b/core/src/main/java/org/elasticsearch/index/shard/MergeSchedulerConfig.java
index 9c8aba25ee3..f061a95f2af 100644
--- a/core/src/main/java/org/elasticsearch/index/shard/MergeSchedulerConfig.java
+++ b/core/src/main/java/org/elasticsearch/index/shard/MergeSchedulerConfig.java
@@ -24,7 +24,30 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.EsExecutors;
/**
- *
+ * The merge scheduler (ConcurrentMergeScheduler
) controls the execution of
+ * merge operations once they are needed (according to the merge policy). Merges
+ * run in separate threads, and when the maximum number of threads is reached,
+ * further merges will wait until a merge thread becomes available.
+ *
+ *
The merge scheduler supports the following dynamic settings: + * + *
index.merge.scheduler.max_thread_count
:
+ *
+ * The maximum number of threads that may be merging at once. Defaults to
+ * Math.max(1, Math.min(4, Runtime.getRuntime().availableProcessors() / 2))
+ * which works well for a good solid-state-disk (SSD). If your index is on
+ * spinning platter drives instead, decrease this to 1.
+ *
+ * index.merge.scheduler.auto_throttle
:
+ *
+ * If this is true (the default), then the merge scheduler will rate-limit IO
+ * (writes) for merges to an adaptive value depending on how many merges are
+ * requested over time. An application with a low indexing rate that
+ * unluckily suddenly requires a large merge will see that merge aggressively
+ * throttled, while an application doing heavy indexing will see the throttle
+ * move higher to allow merges to keep up with ongoing indexing.
+ *