From b816ab0c18d98aa4ff586b27d528ce731ab14889 Mon Sep 17 00:00:00 2001
From: Lisa Cawley <lcawley@elastic.co>
Date: Mon, 4 May 2020 10:37:26 -0700
Subject: [PATCH] [DOCS] Synchs and links hyperparameter descriptions (#56131)

---
 .../apis/get-dfanalytics-stats.asciidoc       | 40 ++++-----
 .../apis/put-dfanalytics.asciidoc             | 51 ++---------
 docs/reference/ml/ml-shared.asciidoc          | 87 +++++++++++++------
 3 files changed, 85 insertions(+), 93 deletions(-)

diff --git a/docs/reference/ml/df-analytics/apis/get-dfanalytics-stats.asciidoc b/docs/reference/ml/df-analytics/apis/get-dfanalytics-stats.asciidoc
index e814eb7e3e1..4b3ed526b90 100644
--- a/docs/reference/ml/df-analytics/apis/get-dfanalytics-stats.asciidoc
+++ b/docs/reference/ml/df-analytics/apis/get-dfanalytics-stats.asciidoc
@@ -99,8 +99,7 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-alpha]
 
 `class_assignment_objective`::::
 (string)
-Defines whether class assignment maximizes the accuracy or the minimum recall 
-metric. Possible values are `maximize_accuracy` and `maximize_minimum_recall`.
+include::{docdir}/ml/ml-shared.asciidoc[tag=class-assignment-objective]
 
 `downsample_factor`::::
 (double)
@@ -108,7 +107,7 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-downsample-factor]
 
 `eta`::::
 (double)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-eta]
+include::{docdir}/ml/ml-shared.asciidoc[tag=eta]
 
 `eta_growth_rate_per_tree`::::
 (double)
@@ -116,15 +115,15 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-eta-growth]
 
 `feature_bag_fraction`::::
 (double)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-feature-bag-fraction]
+include::{docdir}/ml/ml-shared.asciidoc[tag=feature-bag-fraction]
 
 `gamma`::::
 (double)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-gamma]
+include::{docdir}/ml/ml-shared.asciidoc[tag=gamma]
 
 `lambda`::::
 (double)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-lambda]
+include::{docdir}/ml/ml-shared.asciidoc[tag=lambda]
 
 `max_attempts_to_add_tree`::::
 (integer)
@@ -136,7 +135,7 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-max-optimization-rounds]
 
 `max_trees`::::
 (integer)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-max-trees]
+include::{docdir}/ml/ml-shared.asciidoc[tag=max-trees]
 
 `num_folds`::::
 (integer)
@@ -221,32 +220,27 @@ heuristics.
 =======
 `compute_feature_influence`::::
 (boolean)
-If true, feature influence calculation is enabled.
+include::{docdir}/ml/ml-shared.asciidoc[tag=compute-feature-influence]
 
 `feature_influence_threshold`::::
 (double)
-The minimum {olscore} that a document needs to have to calculate its feature 
-influence score.
+include::{docdir}/ml/ml-shared.asciidoc[tag=feature-influence-threshold]
 
 `method`::::
 (string)
-The method that {oldetection} uses. Possible values are `lof`, `ldof`, 
-`distance_kth_nn`, `distance_knn`, and `ensemble`.
+include::{docdir}/ml/ml-shared.asciidoc[tag=method]
 
 `n_neighbors`::::
 (integer)
-The value for how many nearest neighbors each method of {oldetection} uses to 
-calculate its outlier score.
+include::{docdir}/ml/ml-shared.asciidoc[tag=n-neighbors]
 
 `outlier_fraction`::::
 (double)
-The proportion of the data set that is assumed to be outlying prior to 
-{oldetection}.
+include::{docdir}/ml/ml-shared.asciidoc[tag=outlier-fraction]
 
 `standardization_enabled`::::
 (boolean)
-If true, then the following operation is performed on the columns before 
-computing {olscores}: (x_i - mean(x_i)) / sd(x_i).
+include::{docdir}/ml/ml-shared.asciidoc[tag=standardization-enabled]
 =======
 //End parameters
 
@@ -296,7 +290,7 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-downsample-factor]
 
 `eta`::::
 (double)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-eta]
+include::{docdir}/ml/ml-shared.asciidoc[tag=eta]
 
 `eta_growth_rate_per_tree`::::
 (double)
@@ -304,15 +298,15 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-eta-growth]
 
 `feature_bag_fraction`::::
 (double)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-feature-bag-fraction]
+include::{docdir}/ml/ml-shared.asciidoc[tag=feature-bag-fraction]
 
 `gamma`::::
 (double)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-gamma]
+include::{docdir}/ml/ml-shared.asciidoc[tag=gamma]
 
 `lambda`::::
 (double)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-lambda]
+include::{docdir}/ml/ml-shared.asciidoc[tag=lambda]
 
 `max_attempts_to_add_tree`::::
 (integer)
@@ -324,7 +318,7 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-max-optimization-rounds]
 
 `max_trees`::::
 (integer)
-include::{docdir}/ml/ml-shared.asciidoc[tag=dfas-max-trees]
+include::{docdir}/ml/ml-shared.asciidoc[tag=max-trees]
 
 `num_folds`::::
 (integer)
diff --git a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc
index a5681c1c912..064f9fcce49 100644
--- a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc
+++ b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc
@@ -47,24 +47,9 @@ indices and stores the outcome in a destination index.
 If the destination index does not exist, it is created automatically when you
 start the job. See <<start-dfanalytics>>.
 
-[[ml-hyperparam-optimization]]
 If you supply only a subset of the {regression} or {classification} parameters,
-_hyperparameter optimization_ occurs. It determines a value for each of the
-undefined parameters.
-
-////
-The starting point is calculated for data dependent parameters by examining the loss
-on the training data. Subject to the size constraint, this operation provides an
-upper bound on the improvement in validation loss.
-
-The optimization starts with random search, then 
-Bayesian optimization is performed that is targeting maximum expected 
-improvement. If you override any parameters by explicitely setting it, the 
-optimization calculates the value of the remaining parameters accordingly and 
-uses the value you provided for the overridden parameter. The number of rounds 
-are reduced respectively. The validation error is estimated in each round by 
-using 4-fold cross validation.
-////
+{ml-docs}/hyperparameters.html[hyperparameter optimization] occurs. It 
+determines a value for each of the undefined parameters.
 
 [[ml-put-dfanalytics-path-params]]
 ==== {api-path-parms-title}
@@ -113,11 +98,7 @@ understand the function of these parameters.
 =====
 `class_assignment_objective`::::
 (Optional, string)
-Defines the objective to optimize when assigning class labels:
-`maximize_accuracy` or `maximize_minimum_recall`. When maximizing accuracy,
-class labels are chosen to maximize the number of correct predictions. When
-maximizing minimum recall, labels are chosen to maximize the minimum recall
-for any class. Defaults to `maximize_minimum_recall`.
+include::{docdir}/ml/ml-shared.asciidoc[tag=class-assignment-objective]
 
 `dependent_variable`::::
 (Required, string)
@@ -184,41 +165,27 @@ The configuration information necessary to perform
 =====
 `compute_feature_influence`::::
 (Optional, boolean)
-If `true`, the feature influence calculation is enabled. Defaults to `true`.
+include::{docdir}/ml/ml-shared.asciidoc[tag=compute-feature-influence]
   
 `feature_influence_threshold`:::: 
 (Optional, double)
-The minimum {olscore} that a document needs to have in order to calculate its 
-{fiscore}. Value range: 0-1 (`0.1` by default).
+include::{docdir}/ml/ml-shared.asciidoc[tag=feature-influence-threshold]
 
 `method`::::
 (Optional, string)
-Sets the method that {oldetection} uses. If the method is not set {oldetection} 
-uses an ensemble of different methods and normalises and combines their 
-individual {olscores} to obtain the overall {olscore}. We recommend to use the 
-ensemble method. Available methods are `lof`, `ldof`, `distance_kth_nn`, 
-`distance_knn`.
+include::{docdir}/ml/ml-shared.asciidoc[tag=method]
   
 `n_neighbors`::::
 (Optional, integer)
-Defines the value for how many nearest neighbors each method of 
-{oldetection} will use to calculate its {olscore}. When the value is not set, 
-different values will be used for different ensemble members. This helps 
-improve diversity in the ensemble. Therefore, only override this if you are 
-confident that the value you choose is appropriate for the data set.
+include::{docdir}/ml/ml-shared.asciidoc[tag=n-neighbors]
   
 `outlier_fraction`::::
 (Optional, double)
-Sets the proportion of the data set that is assumed to be outlying prior to 
-{oldetection}. For example, 0.05 means it is assumed that 5% of values are real 
-outliers and 95% are inliers.
+include::{docdir}/ml/ml-shared.asciidoc[tag=outlier-fraction]
   
 `standardization_enabled`::::
 (Optional, boolean)
-If `true`, then the following operation is performed on the columns before 
-computing outlier scores: (x_i - mean(x_i)) / sd(x_i). Defaults to `true`. For 
-more information, see 
-https://en.wikipedia.org/wiki/Feature_scaling#Standardization_(Z-score_Normalization)[this wiki page about standardization].
+include::{docdir}/ml/ml-shared.asciidoc[tag=standardization-enabled]
 //End outlier_detection
 =====
 //Begin regression
diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc
index 8b9d7d107f7..afe88407e26 100644
--- a/docs/reference/ml/ml-shared.asciidoc
+++ b/docs/reference/ml/ml-shared.asciidoc
@@ -278,6 +278,19 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=time-span]
 ====
 end::chunking-config[]
 
+tag::class-assignment-objective[]
+Defines the objective to optimize when assigning class labels:
+`maximize_accuracy` or `maximize_minimum_recall`. When maximizing accuracy,
+class labels are chosen to maximize the number of correct predictions. When
+maximizing minimum recall, labels are chosen to maximize the minimum recall
+for any class. Defaults to `maximize_minimum_recall`.
+end::class-assignment-objective[]
+
+tag::compute-feature-influence[]
+Specifies whether the feature influence calculation is enabled. Defaults to
+`true`.
+end::compute-feature-influence[]
+
 tag::custom-rules[]
 An array of custom rule objects, which enable you to customize the way detectors
 operate. For example, a rule may dictate to the detector conditions under which
@@ -479,32 +492,15 @@ tag::dfas-downsample-factor[]
 The value of the downsample factor.
 end::dfas-downsample-factor[]
 
-tag::dfas-eta[]
-The value of the eta hyperparameter.
-end::dfas-eta[]
-
 tag::dfas-eta-growth[]
 Specifies the rate at which the `eta` increases for each new tree that is added to the 
 forest. For example, a rate of `1.05` increases `eta` by 5%.
 end::dfas-eta-growth[]
 
-tag::dfas-feature-bag-fraction[]
-The fraction of features that is used when selecting a random bag for each 
-candidate split.
-end::dfas-feature-bag-fraction[]
-
-tag::dfas-gamma[]
-Regularization factor to penalize trees with large numbers of nodes.
-end::dfas-gamma[]
-
 tag::dfas-iteration[]
 The number of iterations on the analysis.
 end::dfas-iteration[]
 
-tag::dfas-lambda[]
-Regularization factor to penalize large leaf weights.
-end::dfas-lambda[]
-
 tag::dfas-max-attempts[]
 If the algorithm fails to determine a non-trivial tree (more than a single 
 leaf), this parameter determines how many of such consecutive failures are 
@@ -519,10 +515,6 @@ The maximum number of steps is determined based on the number of undefined hyper
 times the maximum optimization rounds per hyperparameter.
 end::dfas-max-optimization-rounds[]
 
-tag::dfas-max-trees[]
-The maximum number of trees in the forest.
-end::dfas-max-trees[]
-
 tag::dfas-num-folds[]
 The maximum number of folds for the cross-validation procedure.
 end::dfas-num-folds[]
@@ -584,10 +576,11 @@ end::empty-bucket-count[]
 
 tag::eta[]
 Advanced configuration option. The shrinkage applied to the weights. Smaller 
-values result in larger forests which have a better generalization error. However, 
-the smaller the value the longer the training will take. For more information, 
-about shrinkage, see 
-https://en.wikipedia.org/wiki/Gradient_boosting#Shrinkage[this wiki article].
+values result in larger forests which have a better generalization error.
+However, the smaller the value the longer the training will take. For more
+information, about shrinkage, see 
+https://en.wikipedia.org/wiki/Gradient_boosting#Shrinkage[this wiki article]. By
+default, this value is calcuated during hyperparameter optimization.
 end::eta[]
 
 tag::exclude-frequent[]
@@ -612,9 +605,15 @@ end::failed-category-count[]
 
 tag::feature-bag-fraction[]
 Advanced configuration option. Defines the fraction of features that will be 
-used when selecting a random bag for each candidate split. 
+used when selecting a random bag for each candidate split. By default, this
+value is calculated during hyperparameter optimization.
 end::feature-bag-fraction[]
 
+tag::feature-influence-threshold[]
+The minimum {olscore} that a document needs to have to calculate its feature 
+influence score. Value range: 0-1 (`0.1` by default).
+end::feature-influence-threshold[]
+
 tag::filter[]
 One or more <<analysis-tokenfilters,token filters>>. In addition to the built-in 
 token filters, other plugins can provide more token filters. This property is
@@ -660,7 +659,8 @@ Advanced configuration option. Regularization parameter to prevent overfitting
 on the training data set. Multiplies a linear penalty associated with the size of 
 individual trees in the forest. The higher the value the more training will 
 prefer smaller trees. The smaller this parameter the larger individual trees 
-will be and the longer training will take.
+will be and the longer training will take. By default, this value is calculated
+during hyperparameter optimization.
 end::gamma[]
 
 tag::groups[]
@@ -824,6 +824,7 @@ more training will attempt to keep leaf weights small. This makes the prediction
 function smoother at the expense of potentially not being able to capture 
 relevant relationships between the features and the {depvar}. The smaller this 
 parameter the larger individual trees will be and the longer training will take.
+By default, this value is calculated during hyperparameter optimization.
 end::lambda[]
 
 tag::last-data-time[]
@@ -867,9 +868,18 @@ end::max-empty-searches[]
 
 tag::max-trees[]
 Advanced configuration option. Defines the maximum number of trees the forest is
-allowed to contain. The maximum value is 2000.
+allowed to contain. The maximum value is 2000. By default, this value is
+calculated during hyperparameter optimization.
 end::max-trees[]
 
+tag::method[]
+The method that {oldetection} uses. Available methods are `lof`, `ldof`,
+`distance_kth_nn`, `distance_knn`, and `ensemble`. The default value is
+`ensemble`, which means that {oldetection} uses an ensemble of different methods
+and normalises and combines their individual {olscores} to obtain the overall
+{olscore}.
+end::method[]
+
 tag::missing-field-count[]
 The number of input documents that are missing a field that the {anomaly-job} is
 configured to analyze. Input documents with missing fields are still processed
@@ -1012,6 +1022,14 @@ NOTE: To use the `multivariate_by_fields` property, you must also specify
 --
 end::multivariate-by-fields[]
 
+tag::n-neighbors[]
+Defines the value for how many nearest neighbors each method of {oldetection}
+uses to calculate its {olscore}. When the value is not set, different values are
+used for different ensemble members. This deafault behavior helps improve the
+diversity in the ensemble; only override it if you are confident that the value
+you choose is appropriate for the data set.
+end::n-neighbors[]
+
 tag::node-address[]
 The network address of the node.
 end::node-address[]
@@ -1046,6 +1064,12 @@ tag::open-time[]
 For open jobs only, the elapsed time for which the job has been open.
 end::open-time[]
 
+tag::outlier-fraction[]
+The proportion of the data set that is assumed to be outlying prior to 
+{oldetection}. For example, 0.05 means it is assumed that 5% of values are real 
+outliers and 95% are inliers.
+end::outlier-fraction[]
+
 tag::out-of-order-timestamp-count[]
 The number of input documents that are out of time sequence and outside
 of the latency window. This information is applicable only when you provide data
@@ -1182,6 +1206,13 @@ number of data points. If your data contains many sparse buckets, consider using
 a longer `bucket_span`.
 end::sparse-bucket-count[]
 
+tag::standardization-enabled[]
+If `true`, the following operation is performed on the columns before computing
+{olscores}: (x_i - mean(x_i)) / sd(x_i). Defaults to `true`. For 
+more information about this concept, see 
+https://en.wikipedia.org/wiki/Feature_scaling#Standardization_(Z-score_Normalization)[Wikipedia].
+end::standardization-enabled[]
+
 tag::state-anomaly-job[]
 The status of the {anomaly-job}, which can be one of the following values:
 +