mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-09 14:34:43 +00:00
* [ML] adding docs + hlrc for data frame analysis feature_processors (#61149) Adds HLRC and some docs for the new feature_processors field in Data frame analytics. Co-authored-by: Przemysław Witek <przemyslaw.witek@elastic.co> Co-authored-by: Lisa Cawley <lcawley@elastic.co>
This commit is contained in:
parent
d05649bfae
commit
1ae2923632
client/rest-high-level/src
main/java/org/elasticsearch/client/ml
test/java/org/elasticsearch/client
docs
java-rest/high-level/ml
reference/ml
@ -18,6 +18,8 @@
|
|||||||
*/
|
*/
|
||||||
package org.elasticsearch.client.ml.dataframe;
|
package org.elasticsearch.client.ml.dataframe;
|
||||||
|
|
||||||
|
import org.elasticsearch.client.ml.inference.NamedXContentObjectHelper;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.PreProcessor;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.ParseField;
|
import org.elasticsearch.common.ParseField;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
@ -26,6 +28,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
|||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
@ -53,7 +56,9 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
static final ParseField CLASS_ASSIGNMENT_OBJECTIVE = new ParseField("class_assignment_objective");
|
static final ParseField CLASS_ASSIGNMENT_OBJECTIVE = new ParseField("class_assignment_objective");
|
||||||
static final ParseField NUM_TOP_CLASSES = new ParseField("num_top_classes");
|
static final ParseField NUM_TOP_CLASSES = new ParseField("num_top_classes");
|
||||||
static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed");
|
static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed");
|
||||||
|
static final ParseField FEATURE_PROCESSORS = new ParseField("feature_processors");
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
private static final ConstructingObjectParser<Classification, Void> PARSER =
|
private static final ConstructingObjectParser<Classification, Void> PARSER =
|
||||||
new ConstructingObjectParser<>(
|
new ConstructingObjectParser<>(
|
||||||
NAME.getPreferredName(),
|
NAME.getPreferredName(),
|
||||||
@ -70,7 +75,8 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
(Double) a[8],
|
(Double) a[8],
|
||||||
(Integer) a[9],
|
(Integer) a[9],
|
||||||
(Long) a[10],
|
(Long) a[10],
|
||||||
(ClassAssignmentObjective) a[11]));
|
(ClassAssignmentObjective) a[11],
|
||||||
|
(List<PreProcessor>) a[12]));
|
||||||
|
|
||||||
static {
|
static {
|
||||||
PARSER.declareString(ConstructingObjectParser.constructorArg(), DEPENDENT_VARIABLE);
|
PARSER.declareString(ConstructingObjectParser.constructorArg(), DEPENDENT_VARIABLE);
|
||||||
@ -86,6 +92,10 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED);
|
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED);
|
||||||
PARSER.declareString(
|
PARSER.declareString(
|
||||||
ConstructingObjectParser.optionalConstructorArg(), ClassAssignmentObjective::fromString, CLASS_ASSIGNMENT_OBJECTIVE);
|
ConstructingObjectParser.optionalConstructorArg(), ClassAssignmentObjective::fromString, CLASS_ASSIGNMENT_OBJECTIVE);
|
||||||
|
PARSER.declareNamedObjects(ConstructingObjectParser.optionalConstructorArg(),
|
||||||
|
(p, c, n) -> p.namedObject(PreProcessor.class, n, c),
|
||||||
|
(classification) -> {},
|
||||||
|
FEATURE_PROCESSORS);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String dependentVariable;
|
private final String dependentVariable;
|
||||||
@ -100,12 +110,13 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
private final ClassAssignmentObjective classAssignmentObjective;
|
private final ClassAssignmentObjective classAssignmentObjective;
|
||||||
private final Integer numTopClasses;
|
private final Integer numTopClasses;
|
||||||
private final Long randomizeSeed;
|
private final Long randomizeSeed;
|
||||||
|
private final List<PreProcessor> featureProcessors;
|
||||||
|
|
||||||
private Classification(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta,
|
private Classification(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta,
|
||||||
@Nullable Integer maxTrees, @Nullable Double featureBagFraction,
|
@Nullable Integer maxTrees, @Nullable Double featureBagFraction,
|
||||||
@Nullable Integer numTopFeatureImportanceValues, @Nullable String predictionFieldName,
|
@Nullable Integer numTopFeatureImportanceValues, @Nullable String predictionFieldName,
|
||||||
@Nullable Double trainingPercent, @Nullable Integer numTopClasses, @Nullable Long randomizeSeed,
|
@Nullable Double trainingPercent, @Nullable Integer numTopClasses, @Nullable Long randomizeSeed,
|
||||||
@Nullable ClassAssignmentObjective classAssignmentObjective) {
|
@Nullable ClassAssignmentObjective classAssignmentObjective, @Nullable List<PreProcessor> featureProcessors) {
|
||||||
this.dependentVariable = Objects.requireNonNull(dependentVariable);
|
this.dependentVariable = Objects.requireNonNull(dependentVariable);
|
||||||
this.lambda = lambda;
|
this.lambda = lambda;
|
||||||
this.gamma = gamma;
|
this.gamma = gamma;
|
||||||
@ -118,6 +129,7 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
this.classAssignmentObjective = classAssignmentObjective;
|
this.classAssignmentObjective = classAssignmentObjective;
|
||||||
this.numTopClasses = numTopClasses;
|
this.numTopClasses = numTopClasses;
|
||||||
this.randomizeSeed = randomizeSeed;
|
this.randomizeSeed = randomizeSeed;
|
||||||
|
this.featureProcessors = featureProcessors;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -173,6 +185,10 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
return numTopClasses;
|
return numTopClasses;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<PreProcessor> getFeatureProcessors() {
|
||||||
|
return featureProcessors;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
builder.startObject();
|
builder.startObject();
|
||||||
@ -210,6 +226,9 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
if (numTopClasses != null) {
|
if (numTopClasses != null) {
|
||||||
builder.field(NUM_TOP_CLASSES.getPreferredName(), numTopClasses);
|
builder.field(NUM_TOP_CLASSES.getPreferredName(), numTopClasses);
|
||||||
}
|
}
|
||||||
|
if (featureProcessors != null) {
|
||||||
|
NamedXContentObjectHelper.writeNamedObjects(builder, params, true, FEATURE_PROCESSORS.getPreferredName(), featureProcessors);
|
||||||
|
}
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
@ -217,7 +236,7 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(dependentVariable, lambda, gamma, eta, maxTrees, featureBagFraction, numTopFeatureImportanceValues,
|
return Objects.hash(dependentVariable, lambda, gamma, eta, maxTrees, featureBagFraction, numTopFeatureImportanceValues,
|
||||||
predictionFieldName, trainingPercent, randomizeSeed, numTopClasses, classAssignmentObjective);
|
predictionFieldName, trainingPercent, randomizeSeed, numTopClasses, classAssignmentObjective, featureProcessors);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -236,7 +255,8 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
&& Objects.equals(trainingPercent, that.trainingPercent)
|
&& Objects.equals(trainingPercent, that.trainingPercent)
|
||||||
&& Objects.equals(randomizeSeed, that.randomizeSeed)
|
&& Objects.equals(randomizeSeed, that.randomizeSeed)
|
||||||
&& Objects.equals(numTopClasses, that.numTopClasses)
|
&& Objects.equals(numTopClasses, that.numTopClasses)
|
||||||
&& Objects.equals(classAssignmentObjective, that.classAssignmentObjective);
|
&& Objects.equals(classAssignmentObjective, that.classAssignmentObjective)
|
||||||
|
&& Objects.equals(featureProcessors, that.featureProcessors);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -270,6 +290,7 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
private Integer numTopClasses;
|
private Integer numTopClasses;
|
||||||
private Long randomizeSeed;
|
private Long randomizeSeed;
|
||||||
private ClassAssignmentObjective classAssignmentObjective;
|
private ClassAssignmentObjective classAssignmentObjective;
|
||||||
|
private List<PreProcessor> featureProcessors;
|
||||||
|
|
||||||
private Builder(String dependentVariable) {
|
private Builder(String dependentVariable) {
|
||||||
this.dependentVariable = Objects.requireNonNull(dependentVariable);
|
this.dependentVariable = Objects.requireNonNull(dependentVariable);
|
||||||
@ -330,10 +351,15 @@ public class Classification implements DataFrameAnalysis {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder setFeatureProcessors(List<PreProcessor> featureProcessors) {
|
||||||
|
this.featureProcessors = featureProcessors;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public Classification build() {
|
public Classification build() {
|
||||||
return new Classification(dependentVariable, lambda, gamma, eta, maxTrees, featureBagFraction,
|
return new Classification(dependentVariable, lambda, gamma, eta, maxTrees, featureBagFraction,
|
||||||
numTopFeatureImportanceValues, predictionFieldName, trainingPercent, numTopClasses, randomizeSeed,
|
numTopFeatureImportanceValues, predictionFieldName, trainingPercent, numTopClasses, randomizeSeed,
|
||||||
classAssignmentObjective);
|
classAssignmentObjective, featureProcessors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,8 @@
|
|||||||
*/
|
*/
|
||||||
package org.elasticsearch.client.ml.dataframe;
|
package org.elasticsearch.client.ml.dataframe;
|
||||||
|
|
||||||
|
import org.elasticsearch.client.ml.inference.NamedXContentObjectHelper;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.PreProcessor;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.ParseField;
|
import org.elasticsearch.common.ParseField;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
@ -26,6 +28,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
|||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
@ -55,7 +58,9 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed");
|
static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed");
|
||||||
static final ParseField LOSS_FUNCTION = new ParseField("loss_function");
|
static final ParseField LOSS_FUNCTION = new ParseField("loss_function");
|
||||||
static final ParseField LOSS_FUNCTION_PARAMETER = new ParseField("loss_function_parameter");
|
static final ParseField LOSS_FUNCTION_PARAMETER = new ParseField("loss_function_parameter");
|
||||||
|
static final ParseField FEATURE_PROCESSORS = new ParseField("feature_processors");
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
private static final ConstructingObjectParser<Regression, Void> PARSER =
|
private static final ConstructingObjectParser<Regression, Void> PARSER =
|
||||||
new ConstructingObjectParser<>(
|
new ConstructingObjectParser<>(
|
||||||
NAME.getPreferredName(),
|
NAME.getPreferredName(),
|
||||||
@ -72,7 +77,8 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
(Double) a[8],
|
(Double) a[8],
|
||||||
(Long) a[9],
|
(Long) a[9],
|
||||||
(LossFunction) a[10],
|
(LossFunction) a[10],
|
||||||
(Double) a[11]
|
(Double) a[11],
|
||||||
|
(List<PreProcessor>) a[12]
|
||||||
));
|
));
|
||||||
|
|
||||||
static {
|
static {
|
||||||
@ -88,6 +94,10 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED);
|
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED);
|
||||||
PARSER.declareString(optionalConstructorArg(), LossFunction::fromString, LOSS_FUNCTION);
|
PARSER.declareString(optionalConstructorArg(), LossFunction::fromString, LOSS_FUNCTION);
|
||||||
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), LOSS_FUNCTION_PARAMETER);
|
PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), LOSS_FUNCTION_PARAMETER);
|
||||||
|
PARSER.declareNamedObjects(ConstructingObjectParser.optionalConstructorArg(),
|
||||||
|
(p, c, n) -> p.namedObject(PreProcessor.class, n, c),
|
||||||
|
(regression) -> {},
|
||||||
|
FEATURE_PROCESSORS);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String dependentVariable;
|
private final String dependentVariable;
|
||||||
@ -102,12 +112,13 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
private final Long randomizeSeed;
|
private final Long randomizeSeed;
|
||||||
private final LossFunction lossFunction;
|
private final LossFunction lossFunction;
|
||||||
private final Double lossFunctionParameter;
|
private final Double lossFunctionParameter;
|
||||||
|
private final List<PreProcessor> featureProcessors;
|
||||||
|
|
||||||
private Regression(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta,
|
private Regression(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta,
|
||||||
@Nullable Integer maxTrees, @Nullable Double featureBagFraction,
|
@Nullable Integer maxTrees, @Nullable Double featureBagFraction,
|
||||||
@Nullable Integer numTopFeatureImportanceValues, @Nullable String predictionFieldName,
|
@Nullable Integer numTopFeatureImportanceValues, @Nullable String predictionFieldName,
|
||||||
@Nullable Double trainingPercent, @Nullable Long randomizeSeed, @Nullable LossFunction lossFunction,
|
@Nullable Double trainingPercent, @Nullable Long randomizeSeed, @Nullable LossFunction lossFunction,
|
||||||
@Nullable Double lossFunctionParameter) {
|
@Nullable Double lossFunctionParameter, @Nullable List<PreProcessor> featureProcessors) {
|
||||||
this.dependentVariable = Objects.requireNonNull(dependentVariable);
|
this.dependentVariable = Objects.requireNonNull(dependentVariable);
|
||||||
this.lambda = lambda;
|
this.lambda = lambda;
|
||||||
this.gamma = gamma;
|
this.gamma = gamma;
|
||||||
@ -120,6 +131,7 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
this.randomizeSeed = randomizeSeed;
|
this.randomizeSeed = randomizeSeed;
|
||||||
this.lossFunction = lossFunction;
|
this.lossFunction = lossFunction;
|
||||||
this.lossFunctionParameter = lossFunctionParameter;
|
this.lossFunctionParameter = lossFunctionParameter;
|
||||||
|
this.featureProcessors = featureProcessors;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -175,6 +187,10 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
return lossFunctionParameter;
|
return lossFunctionParameter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<PreProcessor> getFeatureProcessors() {
|
||||||
|
return featureProcessors;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
builder.startObject();
|
builder.startObject();
|
||||||
@ -212,6 +228,9 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
if (lossFunctionParameter != null) {
|
if (lossFunctionParameter != null) {
|
||||||
builder.field(LOSS_FUNCTION_PARAMETER.getPreferredName(), lossFunctionParameter);
|
builder.field(LOSS_FUNCTION_PARAMETER.getPreferredName(), lossFunctionParameter);
|
||||||
}
|
}
|
||||||
|
if (featureProcessors != null) {
|
||||||
|
NamedXContentObjectHelper.writeNamedObjects(builder, params, true, FEATURE_PROCESSORS.getPreferredName(), featureProcessors);
|
||||||
|
}
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
@ -219,7 +238,7 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(dependentVariable, lambda, gamma, eta, maxTrees, featureBagFraction, numTopFeatureImportanceValues,
|
return Objects.hash(dependentVariable, lambda, gamma, eta, maxTrees, featureBagFraction, numTopFeatureImportanceValues,
|
||||||
predictionFieldName, trainingPercent, randomizeSeed, lossFunction, lossFunctionParameter);
|
predictionFieldName, trainingPercent, randomizeSeed, lossFunction, lossFunctionParameter, featureProcessors);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -238,7 +257,8 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
&& Objects.equals(trainingPercent, that.trainingPercent)
|
&& Objects.equals(trainingPercent, that.trainingPercent)
|
||||||
&& Objects.equals(randomizeSeed, that.randomizeSeed)
|
&& Objects.equals(randomizeSeed, that.randomizeSeed)
|
||||||
&& Objects.equals(lossFunction, that.lossFunction)
|
&& Objects.equals(lossFunction, that.lossFunction)
|
||||||
&& Objects.equals(lossFunctionParameter, that.lossFunctionParameter);
|
&& Objects.equals(lossFunctionParameter, that.lossFunctionParameter)
|
||||||
|
&& Objects.equals(featureProcessors, that.featureProcessors);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -259,6 +279,7 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
private Long randomizeSeed;
|
private Long randomizeSeed;
|
||||||
private LossFunction lossFunction;
|
private LossFunction lossFunction;
|
||||||
private Double lossFunctionParameter;
|
private Double lossFunctionParameter;
|
||||||
|
private List<PreProcessor> featureProcessors;
|
||||||
|
|
||||||
private Builder(String dependentVariable) {
|
private Builder(String dependentVariable) {
|
||||||
this.dependentVariable = Objects.requireNonNull(dependentVariable);
|
this.dependentVariable = Objects.requireNonNull(dependentVariable);
|
||||||
@ -319,9 +340,15 @@ public class Regression implements DataFrameAnalysis {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder setFeatureProcessors(List<PreProcessor> featureProcessors) {
|
||||||
|
this.featureProcessors = featureProcessors;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public Regression build() {
|
public Regression build() {
|
||||||
return new Regression(dependentVariable, lambda, gamma, eta, maxTrees, featureBagFraction,
|
return new Regression(dependentVariable, lambda, gamma, eta, maxTrees, featureBagFraction,
|
||||||
numTopFeatureImportanceValues, predictionFieldName, trainingPercent, randomizeSeed, lossFunction, lossFunctionParameter);
|
numTopFeatureImportanceValues, predictionFieldName, trainingPercent, randomizeSeed, lossFunction, lossFunctionParameter,
|
||||||
|
featureProcessors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ public class OneHotEncoding implements PreProcessor {
|
|||||||
return Objects.hash(field, hotMap, custom);
|
return Objects.hash(field, hotMap, custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder builder(String field) {
|
public static Builder builder(String field) {
|
||||||
return new Builder(field);
|
return new Builder(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,6 +179,7 @@ import org.elasticsearch.client.ml.inference.TrainedModelDefinition;
|
|||||||
import org.elasticsearch.client.ml.inference.TrainedModelDefinitionTests;
|
import org.elasticsearch.client.ml.inference.TrainedModelDefinitionTests;
|
||||||
import org.elasticsearch.client.ml.inference.TrainedModelInput;
|
import org.elasticsearch.client.ml.inference.TrainedModelInput;
|
||||||
import org.elasticsearch.client.ml.inference.TrainedModelStats;
|
import org.elasticsearch.client.ml.inference.TrainedModelStats;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.OneHotEncoding;
|
||||||
import org.elasticsearch.client.ml.inference.trainedmodel.RegressionConfig;
|
import org.elasticsearch.client.ml.inference.trainedmodel.RegressionConfig;
|
||||||
import org.elasticsearch.client.ml.inference.trainedmodel.TargetType;
|
import org.elasticsearch.client.ml.inference.trainedmodel.TargetType;
|
||||||
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
|
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
|
||||||
@ -3003,6 +3004,9 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||||||
.setRandomizeSeed(1234L) // <10>
|
.setRandomizeSeed(1234L) // <10>
|
||||||
.setClassAssignmentObjective(Classification.ClassAssignmentObjective.MAXIMIZE_ACCURACY) // <11>
|
.setClassAssignmentObjective(Classification.ClassAssignmentObjective.MAXIMIZE_ACCURACY) // <11>
|
||||||
.setNumTopClasses(1) // <12>
|
.setNumTopClasses(1) // <12>
|
||||||
|
.setFeatureProcessors(Arrays.asList(OneHotEncoding.builder("categorical_feature") // <13>
|
||||||
|
.addOneHot("cat", "cat_column")
|
||||||
|
.build()))
|
||||||
.build();
|
.build();
|
||||||
// end::put-data-frame-analytics-classification
|
// end::put-data-frame-analytics-classification
|
||||||
|
|
||||||
@ -3019,6 +3023,9 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||||||
.setRandomizeSeed(1234L) // <10>
|
.setRandomizeSeed(1234L) // <10>
|
||||||
.setLossFunction(Regression.LossFunction.MSE) // <11>
|
.setLossFunction(Regression.LossFunction.MSE) // <11>
|
||||||
.setLossFunctionParameter(1.0) // <12>
|
.setLossFunctionParameter(1.0) // <12>
|
||||||
|
.setFeatureProcessors(Arrays.asList(OneHotEncoding.builder("categorical_feature") // <13>
|
||||||
|
.addOneHot("cat", "cat_column")
|
||||||
|
.build()))
|
||||||
.build();
|
.build();
|
||||||
// end::put-data-frame-analytics-regression
|
// end::put-data-frame-analytics-regression
|
||||||
|
|
||||||
|
28
client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java
28
client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java
@ -18,10 +18,20 @@
|
|||||||
*/
|
*/
|
||||||
package org.elasticsearch.client.ml.dataframe;
|
package org.elasticsearch.client.ml.dataframe;
|
||||||
|
|
||||||
|
import org.elasticsearch.client.ml.inference.MlInferenceNamedXContentProvider;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.FrequencyEncodingTests;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.OneHotEncodingTests;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.TargetMeanEncodingTests;
|
||||||
|
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public class ClassificationTests extends AbstractXContentTestCase<Classification> {
|
public class ClassificationTests extends AbstractXContentTestCase<Classification> {
|
||||||
|
|
||||||
@ -38,9 +48,20 @@ public class ClassificationTests extends AbstractXContentTestCase<Classification
|
|||||||
.setRandomizeSeed(randomBoolean() ? null : randomLong())
|
.setRandomizeSeed(randomBoolean() ? null : randomLong())
|
||||||
.setClassAssignmentObjective(randomBoolean() ? null : randomFrom(Classification.ClassAssignmentObjective.values()))
|
.setClassAssignmentObjective(randomBoolean() ? null : randomFrom(Classification.ClassAssignmentObjective.values()))
|
||||||
.setNumTopClasses(randomBoolean() ? null : randomIntBetween(0, 10))
|
.setNumTopClasses(randomBoolean() ? null : randomIntBetween(0, 10))
|
||||||
|
.setFeatureProcessors(randomBoolean() ? null :
|
||||||
|
Stream.generate(() -> randomFrom(FrequencyEncodingTests.createRandom(),
|
||||||
|
OneHotEncodingTests.createRandom(),
|
||||||
|
TargetMeanEncodingTests.createRandom()))
|
||||||
|
.limit(randomIntBetween(1, 10))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<String> getRandomFieldsExcludeFilter() {
|
||||||
|
return field -> field.startsWith("feature_processors");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Classification createTestInstance() {
|
protected Classification createTestInstance() {
|
||||||
return randomClassification();
|
return randomClassification();
|
||||||
@ -55,4 +76,11 @@ public class ClassificationTests extends AbstractXContentTestCase<Classification
|
|||||||
protected boolean supportsUnknownFields() {
|
protected boolean supportsUnknownFields() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected NamedXContentRegistry xContentRegistry() {
|
||||||
|
List<NamedXContentRegistry.Entry> namedXContent = new ArrayList<>();
|
||||||
|
namedXContent.addAll(new MlInferenceNamedXContentProvider().getNamedXContentParsers());
|
||||||
|
return new NamedXContentRegistry(namedXContent);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
package org.elasticsearch.client.ml.dataframe;
|
package org.elasticsearch.client.ml.dataframe;
|
||||||
|
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
|
import org.elasticsearch.client.ml.inference.MlInferenceNamedXContentProvider;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.unit.ByteSizeUnit;
|
import org.elasticsearch.common.unit.ByteSizeUnit;
|
||||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||||
@ -101,6 +102,7 @@ public class DataFrameAnalyticsConfigTests extends AbstractXContentTestCase<Data
|
|||||||
List<NamedXContentRegistry.Entry> namedXContent = new ArrayList<>();
|
List<NamedXContentRegistry.Entry> namedXContent = new ArrayList<>();
|
||||||
namedXContent.addAll(new SearchModule(Settings.EMPTY, false, Collections.emptyList()).getNamedXContents());
|
namedXContent.addAll(new SearchModule(Settings.EMPTY, false, Collections.emptyList()).getNamedXContents());
|
||||||
namedXContent.addAll(new MlDataFrameAnalysisNamedXContentProvider().getNamedXContentParsers());
|
namedXContent.addAll(new MlDataFrameAnalysisNamedXContentProvider().getNamedXContentParsers());
|
||||||
|
namedXContent.addAll(new MlInferenceNamedXContentProvider().getNamedXContentParsers());
|
||||||
return new NamedXContentRegistry(namedXContent);
|
return new NamedXContentRegistry(namedXContent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,10 +18,20 @@
|
|||||||
*/
|
*/
|
||||||
package org.elasticsearch.client.ml.dataframe;
|
package org.elasticsearch.client.ml.dataframe;
|
||||||
|
|
||||||
|
import org.elasticsearch.client.ml.inference.MlInferenceNamedXContentProvider;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.FrequencyEncodingTests;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.OneHotEncodingTests;
|
||||||
|
import org.elasticsearch.client.ml.inference.preprocessing.TargetMeanEncodingTests;
|
||||||
|
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public class RegressionTests extends AbstractXContentTestCase<Regression> {
|
public class RegressionTests extends AbstractXContentTestCase<Regression> {
|
||||||
|
|
||||||
@ -37,9 +47,20 @@ public class RegressionTests extends AbstractXContentTestCase<Regression> {
|
|||||||
.setTrainingPercent(randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true))
|
.setTrainingPercent(randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true))
|
||||||
.setLossFunction(randomBoolean() ? null : randomFrom(Regression.LossFunction.values()))
|
.setLossFunction(randomBoolean() ? null : randomFrom(Regression.LossFunction.values()))
|
||||||
.setLossFunctionParameter(randomBoolean() ? null : randomDoubleBetween(1.0, Double.MAX_VALUE, true))
|
.setLossFunctionParameter(randomBoolean() ? null : randomDoubleBetween(1.0, Double.MAX_VALUE, true))
|
||||||
|
.setFeatureProcessors(randomBoolean() ? null :
|
||||||
|
Stream.generate(() -> randomFrom(FrequencyEncodingTests.createRandom(),
|
||||||
|
OneHotEncodingTests.createRandom(),
|
||||||
|
TargetMeanEncodingTests.createRandom()))
|
||||||
|
.limit(randomIntBetween(1, 10))
|
||||||
|
.collect(Collectors.toList()))
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Predicate<String> getRandomFieldsExcludeFilter() {
|
||||||
|
return field -> field.startsWith("feature_processors");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Regression createTestInstance() {
|
protected Regression createTestInstance() {
|
||||||
return randomRegression();
|
return randomRegression();
|
||||||
@ -54,4 +75,11 @@ public class RegressionTests extends AbstractXContentTestCase<Regression> {
|
|||||||
protected boolean supportsUnknownFields() {
|
protected boolean supportsUnknownFields() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected NamedXContentRegistry xContentRegistry() {
|
||||||
|
List<NamedXContentRegistry.Entry> namedXContent = new ArrayList<>();
|
||||||
|
namedXContent.addAll(new MlInferenceNamedXContentProvider().getNamedXContentParsers());
|
||||||
|
return new NamedXContentRegistry(namedXContent);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -124,6 +124,8 @@ include-tagged::{doc-tests-file}[{api}-classification]
|
|||||||
<10> The seed to be used by the random generator that picks which rows are used in training.
|
<10> The seed to be used by the random generator that picks which rows are used in training.
|
||||||
<11> The optimization objective to target when assigning class labels. Defaults to maximize_minimum_recall.
|
<11> The optimization objective to target when assigning class labels. Defaults to maximize_minimum_recall.
|
||||||
<12> The number of top classes to be reported in the results. Defaults to 2.
|
<12> The number of top classes to be reported in the results. Defaults to 2.
|
||||||
|
<13> Custom feature processors that will create new features for analysis from the included document
|
||||||
|
fields. Note, automatic categorical {ml-docs}/ml-feature-encoding.html[feature encoding] still occurs for all features.
|
||||||
|
|
||||||
===== Regression
|
===== Regression
|
||||||
|
|
||||||
@ -146,6 +148,8 @@ include-tagged::{doc-tests-file}[{api}-regression]
|
|||||||
<10> The seed to be used by the random generator that picks which rows are used in training.
|
<10> The seed to be used by the random generator that picks which rows are used in training.
|
||||||
<11> The loss function used for regression. Defaults to `mse`.
|
<11> The loss function used for regression. Defaults to `mse`.
|
||||||
<12> An optional parameter to the loss function.
|
<12> An optional parameter to the loss function.
|
||||||
|
<13> Custom feature processors that will create new features for analysis from the included document
|
||||||
|
fields. Note, automatic categorical {ml-docs}/ml-feature-encoding.html[feature encoding] still occurs for all features.
|
||||||
|
|
||||||
==== Analyzed fields
|
==== Analyzed fields
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ If the {es} {security-features} are enabled, you must have the following built-i
|
|||||||
* `machine_learning_admin`
|
* `machine_learning_admin`
|
||||||
* source indices: `read`, `view_index_metadata`
|
* source indices: `read`, `view_index_metadata`
|
||||||
* destination index: `read`, `create_index`, `manage` and `index`
|
* destination index: `read`, `create_index`, `manage` and `index`
|
||||||
|
|
||||||
For more information, see <<built-in-roles>>, <<security-privileges>>, and
|
For more information, see <<built-in-roles>>, <<security-privileges>>, and
|
||||||
{ml-docs-setup-privileges}.
|
{ml-docs-setup-privileges}.
|
||||||
|
|
||||||
@ -33,20 +33,20 @@ For more information, see <<built-in-roles>>, <<security-privileges>>, and
|
|||||||
NOTE: The {dfanalytics-job} remembers which roles the user who created it had at
|
NOTE: The {dfanalytics-job} remembers which roles the user who created it had at
|
||||||
the time of creation. When you start the job, it performs the analysis using
|
the time of creation. When you start the job, it performs the analysis using
|
||||||
those same roles. If you provide
|
those same roles. If you provide
|
||||||
<<http-clients-secondary-authorization,secondary authorization headers>>,
|
<<http-clients-secondary-authorization,secondary authorization headers>>,
|
||||||
those credentials are used instead.
|
those credentials are used instead.
|
||||||
|
|
||||||
[[ml-put-dfanalytics-desc]]
|
[[ml-put-dfanalytics-desc]]
|
||||||
== {api-description-title}
|
== {api-description-title}
|
||||||
|
|
||||||
This API creates a {dfanalytics-job} that performs an analysis on the source
|
This API creates a {dfanalytics-job} that performs an analysis on the source
|
||||||
indices and stores the outcome in a destination index.
|
indices and stores the outcome in a destination index.
|
||||||
|
|
||||||
If the destination index does not exist, it is created automatically when you
|
If the destination index does not exist, it is created automatically when you
|
||||||
start the job. See <<start-dfanalytics>>.
|
start the job. See <<start-dfanalytics>>.
|
||||||
|
|
||||||
If you supply only a subset of the {regression} or {classification} parameters,
|
If you supply only a subset of the {regression} or {classification} parameters,
|
||||||
{ml-docs}/hyperparameters.html[hyperparameter optimization] occurs. It
|
{ml-docs}/hyperparameters.html[hyperparameter optimization] occurs. It
|
||||||
determines a value for each of the undefined parameters.
|
determines a value for each of the undefined parameters.
|
||||||
|
|
||||||
[[ml-put-dfanalytics-path-params]]
|
[[ml-put-dfanalytics-path-params]]
|
||||||
@ -61,9 +61,9 @@ include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=job-id-data-frame-analytics-def
|
|||||||
== {api-request-body-title}
|
== {api-request-body-title}
|
||||||
|
|
||||||
`allow_lazy_start`::
|
`allow_lazy_start`::
|
||||||
(Optional, boolean)
|
(Optional, boolean)
|
||||||
Specifies whether this job can start when there is insufficient {ml} node
|
Specifies whether this job can start when there is insufficient {ml} node
|
||||||
capacity for it to be immediately assigned to a node. The default is `false`; if
|
capacity for it to be immediately assigned to a node. The default is `false`; if
|
||||||
a {ml} node with capacity to run the job cannot immediately be found, the
|
a {ml} node with capacity to run the job cannot immediately be found, the
|
||||||
<<start-dfanalytics>> API returns an error. However, this is also subject to the
|
<<start-dfanalytics>> API returns an error. However, this is also subject to the
|
||||||
cluster-wide `xpack.ml.max_lazy_ml_nodes` setting. See <<advanced-ml-settings>>.
|
cluster-wide `xpack.ml.max_lazy_ml_nodes` setting. See <<advanced-ml-settings>>.
|
||||||
@ -86,7 +86,7 @@ one of the following types of analysis: {classification}, {oldetection}, or
|
|||||||
The configuration information necessary to perform
|
The configuration information necessary to perform
|
||||||
{ml-docs}/dfa-classification.html[{classification}].
|
{ml-docs}/dfa-classification.html[{classification}].
|
||||||
+
|
+
|
||||||
TIP: Advanced parameters are for fine-tuning {classanalysis}. They are set
|
TIP: Advanced parameters are for fine-tuning {classanalysis}. They are set
|
||||||
automatically by hyperparameter optimization to give the minimum validation
|
automatically by hyperparameter optimization to give the minimum validation
|
||||||
error. It is highly recommended to use the default values unless you fully
|
error. It is highly recommended to use the default values unless you fully
|
||||||
understand the function of these parameters.
|
understand the function of these parameters.
|
||||||
@ -108,23 +108,27 @@ categorical (`ip` or `keyword`), or boolean. There must be no more than 30
|
|||||||
different values in this field.
|
different values in this field.
|
||||||
|
|
||||||
`eta`::::
|
`eta`::::
|
||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=eta]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=eta]
|
||||||
|
|
||||||
`feature_bag_fraction`::::
|
`feature_bag_fraction`::::
|
||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=feature-bag-fraction]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=feature-bag-fraction]
|
||||||
|
|
||||||
|
`feature_processors`::::
|
||||||
|
(Optional, list)
|
||||||
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=dfas-feature-processors]
|
||||||
|
|
||||||
`gamma`::::
|
`gamma`::::
|
||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=gamma]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=gamma]
|
||||||
|
|
||||||
`lambda`::::
|
`lambda`::::
|
||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=lambda]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=lambda]
|
||||||
|
|
||||||
`max_trees`::::
|
`max_trees`::::
|
||||||
(Optional, integer)
|
(Optional, integer)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=max-trees]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=max-trees]
|
||||||
|
|
||||||
`num_top_classes`::::
|
`num_top_classes`::::
|
||||||
@ -136,11 +140,11 @@ categories, the API reports all category probabilities. Defaults to 2.
|
|||||||
`num_top_feature_importance_values`::::
|
`num_top_feature_importance_values`::::
|
||||||
(Optional, integer)
|
(Optional, integer)
|
||||||
Advanced configuration option. Specifies the maximum number of
|
Advanced configuration option. Specifies the maximum number of
|
||||||
{ml-docs}/ml-feature-importance.html[{feat-imp}] values per document to return.
|
{ml-docs}/ml-feature-importance.html[{feat-imp}] values per document to return.
|
||||||
By default, it is zero and no {feat-imp} calculation occurs.
|
By default, it is zero and no {feat-imp} calculation occurs.
|
||||||
|
|
||||||
`prediction_field_name`::::
|
`prediction_field_name`::::
|
||||||
(Optional, string)
|
(Optional, string)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=prediction-field-name]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=prediction-field-name]
|
||||||
|
|
||||||
`randomize_seed`::::
|
`randomize_seed`::::
|
||||||
@ -164,23 +168,23 @@ The configuration information necessary to perform
|
|||||||
`compute_feature_influence`::::
|
`compute_feature_influence`::::
|
||||||
(Optional, boolean)
|
(Optional, boolean)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=compute-feature-influence]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=compute-feature-influence]
|
||||||
|
|
||||||
`feature_influence_threshold`::::
|
`feature_influence_threshold`::::
|
||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=feature-influence-threshold]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=feature-influence-threshold]
|
||||||
|
|
||||||
`method`::::
|
`method`::::
|
||||||
(Optional, string)
|
(Optional, string)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=method]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=method]
|
||||||
|
|
||||||
`n_neighbors`::::
|
`n_neighbors`::::
|
||||||
(Optional, integer)
|
(Optional, integer)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=n-neighbors]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=n-neighbors]
|
||||||
|
|
||||||
`outlier_fraction`::::
|
`outlier_fraction`::::
|
||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=outlier-fraction]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=outlier-fraction]
|
||||||
|
|
||||||
`standardization_enabled`::::
|
`standardization_enabled`::::
|
||||||
(Optional, boolean)
|
(Optional, boolean)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=standardization-enabled]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=standardization-enabled]
|
||||||
@ -192,7 +196,7 @@ include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=standardization-enabled]
|
|||||||
The configuration information necessary to perform
|
The configuration information necessary to perform
|
||||||
{ml-docs}/dfa-regression.html[{regression}].
|
{ml-docs}/dfa-regression.html[{regression}].
|
||||||
+
|
+
|
||||||
TIP: Advanced parameters are for fine-tuning {reganalysis}. They are set
|
TIP: Advanced parameters are for fine-tuning {reganalysis}. They are set
|
||||||
automatically by hyperparameter optimization to give minimum validation error.
|
automatically by hyperparameter optimization to give minimum validation error.
|
||||||
It is highly recommended to use the default values unless you fully understand
|
It is highly recommended to use the default values unless you fully understand
|
||||||
the function of these parameters.
|
the function of these parameters.
|
||||||
@ -215,20 +219,24 @@ include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=eta]
|
|||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=feature-bag-fraction]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=feature-bag-fraction]
|
||||||
|
|
||||||
|
`feature_processors`::::
|
||||||
|
(Optional, list)
|
||||||
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=dfas-feature-processors]
|
||||||
|
|
||||||
`gamma`::::
|
`gamma`::::
|
||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=gamma]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=gamma]
|
||||||
|
|
||||||
`lambda`::::
|
`lambda`::::
|
||||||
(Optional, double)
|
(Optional, double)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=lambda]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=lambda]
|
||||||
|
|
||||||
`loss_function`::::
|
`loss_function`::::
|
||||||
(Optional, string)
|
(Optional, string)
|
||||||
The loss function used during {regression}. Available options are `mse` (mean
|
The loss function used during {regression}. Available options are `mse` (mean
|
||||||
squared error), `msle` (mean squared logarithmic error), `huber` (Pseudo-Huber
|
squared error), `msle` (mean squared logarithmic error), `huber` (Pseudo-Huber
|
||||||
loss). Defaults to `mse`. Refer to
|
loss). Defaults to `mse`. Refer to
|
||||||
{ml-docs}/dfa-regression.html#dfa-regression-lossfunction[Loss functions for {regression} analyses]
|
{ml-docs}/dfa-regression.html#dfa-regression-lossfunction[Loss functions for {regression} analyses]
|
||||||
to learn more.
|
to learn more.
|
||||||
|
|
||||||
`loss_function_parameter`::::
|
`loss_function_parameter`::::
|
||||||
@ -236,13 +244,13 @@ to learn more.
|
|||||||
A positive number that is used as a parameter to the `loss_function`.
|
A positive number that is used as a parameter to the `loss_function`.
|
||||||
|
|
||||||
`max_trees`::::
|
`max_trees`::::
|
||||||
(Optional, integer)
|
(Optional, integer)
|
||||||
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=max-trees]
|
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=max-trees]
|
||||||
|
|
||||||
`num_top_feature_importance_values`::::
|
`num_top_feature_importance_values`::::
|
||||||
(Optional, integer)
|
(Optional, integer)
|
||||||
Advanced configuration option. Specifies the maximum number of
|
Advanced configuration option. Specifies the maximum number of
|
||||||
{ml-docs}/ml-feature-importance.html[{feat-imp}] values per document to return.
|
{ml-docs}/ml-feature-importance.html[{feat-imp}] values per document to return.
|
||||||
By default, it is zero and no {feat-imp} calculation occurs.
|
By default, it is zero and no {feat-imp} calculation occurs.
|
||||||
|
|
||||||
`prediction_field_name`::::
|
`prediction_field_name`::::
|
||||||
@ -264,31 +272,31 @@ include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=training-percent]
|
|||||||
//Begin analyzed_fields
|
//Begin analyzed_fields
|
||||||
`analyzed_fields`::
|
`analyzed_fields`::
|
||||||
(Optional, object)
|
(Optional, object)
|
||||||
Specify `includes` and/or `excludes` patterns to select which fields will be
|
Specify `includes` and/or `excludes` patterns to select which fields will be
|
||||||
included in the analysis. The patterns specified in `excludes` are applied last,
|
included in the analysis. The patterns specified in `excludes` are applied last,
|
||||||
therefore `excludes` takes precedence. In other words, if the same field is
|
therefore `excludes` takes precedence. In other words, if the same field is
|
||||||
specified in both `includes` and `excludes`, then the field will not be included
|
specified in both `includes` and `excludes`, then the field will not be included
|
||||||
in the analysis.
|
in the analysis.
|
||||||
+
|
+
|
||||||
--
|
--
|
||||||
[[dfa-supported-fields]]
|
[[dfa-supported-fields]]
|
||||||
The supported fields for each type of analysis are as follows:
|
The supported fields for each type of analysis are as follows:
|
||||||
|
|
||||||
* {oldetection-cap} requires numeric or boolean data to analyze. The algorithms
|
* {oldetection-cap} requires numeric or boolean data to analyze. The algorithms
|
||||||
don't support missing values therefore fields that have data types other than
|
don't support missing values therefore fields that have data types other than
|
||||||
numeric or boolean are ignored. Documents where included fields contain missing
|
numeric or boolean are ignored. Documents where included fields contain missing
|
||||||
values, null values, or an array are also ignored. Therefore the `dest` index
|
values, null values, or an array are also ignored. Therefore the `dest` index
|
||||||
may contain documents that don't have an {olscore}.
|
may contain documents that don't have an {olscore}.
|
||||||
* {regression-cap} supports fields that are numeric, `boolean`, `text`,
|
* {regression-cap} supports fields that are numeric, `boolean`, `text`,
|
||||||
`keyword`, and `ip`. It is also tolerant of missing values. Fields that are
|
`keyword`, and `ip`. It is also tolerant of missing values. Fields that are
|
||||||
supported are included in the analysis, other fields are ignored. Documents
|
supported are included in the analysis, other fields are ignored. Documents
|
||||||
where included fields contain an array with two or more values are also
|
where included fields contain an array with two or more values are also
|
||||||
ignored. Documents in the `dest` index that don’t contain a results field are
|
ignored. Documents in the `dest` index that don’t contain a results field are
|
||||||
not included in the {reganalysis}.
|
not included in the {reganalysis}.
|
||||||
* {classification-cap} supports fields that are numeric, `boolean`, `text`,
|
* {classification-cap} supports fields that are numeric, `boolean`, `text`,
|
||||||
`keyword`, and `ip`. It is also tolerant of missing values. Fields that are
|
`keyword`, and `ip`. It is also tolerant of missing values. Fields that are
|
||||||
supported are included in the analysis, other fields are ignored. Documents
|
supported are included in the analysis, other fields are ignored. Documents
|
||||||
where included fields contain an array with two or more values are also ignored.
|
where included fields contain an array with two or more values are also ignored.
|
||||||
Documents in the `dest` index that don’t contain a results field are not
|
Documents in the `dest` index that don’t contain a results field are not
|
||||||
included in the {classanalysis}. {classanalysis-cap} can be improved by mapping
|
included in the {classanalysis}. {classanalysis-cap} can be improved by mapping
|
||||||
ordinal variable values to a single number. For example, in case of age ranges,
|
ordinal variable values to a single number. For example, in case of age ranges,
|
||||||
@ -310,7 +318,7 @@ analysis. You do not need to add fields with unsupported data types to
|
|||||||
|
|
||||||
`includes`:::
|
`includes`:::
|
||||||
(Optional, array)
|
(Optional, array)
|
||||||
An array of strings that defines the fields that will be included in the
|
An array of strings that defines the fields that will be included in the
|
||||||
analysis.
|
analysis.
|
||||||
//End analyzed_fields
|
//End analyzed_fields
|
||||||
====
|
====
|
||||||
@ -330,16 +338,16 @@ The default value is `1`. Using more threads may decrease the time
|
|||||||
necessary to complete the analysis at the cost of using more CPU.
|
necessary to complete the analysis at the cost of using more CPU.
|
||||||
Note that the process may use additional threads for operational
|
Note that the process may use additional threads for operational
|
||||||
functionality other than the analysis itself.
|
functionality other than the analysis itself.
|
||||||
|
|
||||||
`model_memory_limit`::
|
`model_memory_limit`::
|
||||||
(Optional, string)
|
(Optional, string)
|
||||||
The approximate maximum amount of memory resources that are permitted for
|
The approximate maximum amount of memory resources that are permitted for
|
||||||
analytical processing. The default value for {dfanalytics-jobs} is `1gb`. If
|
analytical processing. The default value for {dfanalytics-jobs} is `1gb`. If
|
||||||
your `elasticsearch.yml` file contains an `xpack.ml.max_model_memory_limit`
|
your `elasticsearch.yml` file contains an `xpack.ml.max_model_memory_limit`
|
||||||
setting, an error occurs when you try to create {dfanalytics-jobs} that have
|
setting, an error occurs when you try to create {dfanalytics-jobs} that have
|
||||||
`model_memory_limit` values greater than that setting. For more information, see
|
`model_memory_limit` values greater than that setting. For more information, see
|
||||||
<<ml-settings>>.
|
<<ml-settings>>.
|
||||||
|
|
||||||
`source`::
|
`source`::
|
||||||
(object)
|
(object)
|
||||||
The configuration of how to source the analysis data. It requires an `index`.
|
The configuration of how to source the analysis data. It requires an `index`.
|
||||||
@ -353,7 +361,7 @@ Optionally, `query` and `_source` may be specified.
|
|||||||
It can be a single index or index pattern as well as an array of indices or
|
It can be a single index or index pattern as well as an array of indices or
|
||||||
patterns.
|
patterns.
|
||||||
+
|
+
|
||||||
WARNING: If your source indices contain documents with the same IDs, only the
|
WARNING: If your source indices contain documents with the same IDs, only the
|
||||||
document that is indexed last appears in the destination index.
|
document that is indexed last appears in the destination index.
|
||||||
|
|
||||||
`query`:::
|
`query`:::
|
||||||
@ -374,7 +382,7 @@ included in the analysis.
|
|||||||
`includes`::::
|
`includes`::::
|
||||||
(array) An array of strings that defines the fields that will be included in the
|
(array) An array of strings that defines the fields that will be included in the
|
||||||
destination.
|
destination.
|
||||||
|
|
||||||
`excludes`::::
|
`excludes`::::
|
||||||
(array) An array of strings that defines the fields that will be excluded from
|
(array) An array of strings that defines the fields that will be excluded from
|
||||||
the destination.
|
the destination.
|
||||||
@ -390,8 +398,8 @@ the destination.
|
|||||||
[[ml-put-dfanalytics-example-preprocess]]
|
[[ml-put-dfanalytics-example-preprocess]]
|
||||||
=== Preprocessing actions example
|
=== Preprocessing actions example
|
||||||
|
|
||||||
The following example shows how to limit the scope of the analysis to certain
|
The following example shows how to limit the scope of the analysis to certain
|
||||||
fields, specify excluded fields in the destination index, and use a query to
|
fields, specify excluded fields in the destination index, and use a query to
|
||||||
filter your data before analysis.
|
filter your data before analysis.
|
||||||
|
|
||||||
[source,console]
|
[source,console]
|
||||||
@ -404,7 +412,7 @@ PUT _ml/data_frame/analytics/model-flight-delays-pre
|
|||||||
],
|
],
|
||||||
"query": { <2>
|
"query": { <2>
|
||||||
"range": {
|
"range": {
|
||||||
"DistanceKilometers": {
|
"DistanceKilometers": {
|
||||||
"gt": 0
|
"gt": 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -429,7 +437,7 @@ PUT _ml/data_frame/analytics/model-flight-delays-pre
|
|||||||
},
|
},
|
||||||
"analyzed_fields": { <5>
|
"analyzed_fields": { <5>
|
||||||
"includes": [],
|
"includes": [],
|
||||||
"excludes": [
|
"excludes": [
|
||||||
"FlightNum"
|
"FlightNum"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -439,29 +447,29 @@ PUT _ml/data_frame/analytics/model-flight-delays-pre
|
|||||||
// TEST[skip:setup kibana sample data]
|
// TEST[skip:setup kibana sample data]
|
||||||
|
|
||||||
<1> Source index to analyze.
|
<1> Source index to analyze.
|
||||||
<2> This query filters out entire documents that will not be present in the
|
<2> This query filters out entire documents that will not be present in the
|
||||||
destination index.
|
destination index.
|
||||||
<3> The `_source` object defines fields in the dataset that will be included or
|
<3> The `_source` object defines fields in the dataset that will be included or
|
||||||
excluded in the destination index.
|
excluded in the destination index.
|
||||||
<4> Defines the destination index that contains the results of the analysis and
|
<4> Defines the destination index that contains the results of the analysis and
|
||||||
the fields of the source index specified in the `_source` object. Also defines
|
the fields of the source index specified in the `_source` object. Also defines
|
||||||
the name of the `results_field`.
|
the name of the `results_field`.
|
||||||
<5> Specifies fields to be included in or excluded from the analysis. This does
|
<5> Specifies fields to be included in or excluded from the analysis. This does
|
||||||
not affect whether the fields will be present in the destination index, only
|
not affect whether the fields will be present in the destination index, only
|
||||||
affects whether they are used in the analysis.
|
affects whether they are used in the analysis.
|
||||||
|
|
||||||
In this example, we can see that all the fields of the source index are included
|
In this example, we can see that all the fields of the source index are included
|
||||||
in the destination index except `FlightDelay` and `FlightDelayType` because
|
in the destination index except `FlightDelay` and `FlightDelayType` because
|
||||||
these are defined as excluded fields by the `excludes` parameter of the
|
these are defined as excluded fields by the `excludes` parameter of the
|
||||||
`_source` object. The `FlightNum` field is included in the destination index,
|
`_source` object. The `FlightNum` field is included in the destination index,
|
||||||
however it is not included in the analysis because it is explicitly specified as
|
however it is not included in the analysis because it is explicitly specified as
|
||||||
excluded field by the `excludes` parameter of the `analyzed_fields` object.
|
excluded field by the `excludes` parameter of the `analyzed_fields` object.
|
||||||
|
|
||||||
|
|
||||||
[[ml-put-dfanalytics-example-od]]
|
[[ml-put-dfanalytics-example-od]]
|
||||||
=== {oldetection-cap} example
|
=== {oldetection-cap} example
|
||||||
|
|
||||||
The following example creates the `loganalytics` {dfanalytics-job}, the analysis
|
The following example creates the `loganalytics` {dfanalytics-job}, the analysis
|
||||||
type is `outlier_detection`:
|
type is `outlier_detection`:
|
||||||
|
|
||||||
[source,console]
|
[source,console]
|
||||||
@ -525,7 +533,7 @@ The API returns the following result:
|
|||||||
[[ml-put-dfanalytics-example-r]]
|
[[ml-put-dfanalytics-example-r]]
|
||||||
=== {regression-cap} examples
|
=== {regression-cap} examples
|
||||||
|
|
||||||
The following example creates the `house_price_regression_analysis`
|
The following example creates the `house_price_regression_analysis`
|
||||||
{dfanalytics-job}, the analysis type is `regression`:
|
{dfanalytics-job}, the analysis type is `regression`:
|
||||||
|
|
||||||
[source,console]
|
[source,console]
|
||||||
@ -538,7 +546,7 @@ PUT _ml/data_frame/analytics/house_price_regression_analysis
|
|||||||
"dest": {
|
"dest": {
|
||||||
"index": "house_price_predictions"
|
"index": "house_price_predictions"
|
||||||
},
|
},
|
||||||
"analysis":
|
"analysis":
|
||||||
{
|
{
|
||||||
"regression": {
|
"regression": {
|
||||||
"dependent_variable": "price"
|
"dependent_variable": "price"
|
||||||
@ -614,7 +622,7 @@ PUT _ml/data_frame/analytics/student_performance_mathematics_0.3
|
|||||||
[[ml-put-dfanalytics-example-c]]
|
[[ml-put-dfanalytics-example-c]]
|
||||||
=== {classification-cap} example
|
=== {classification-cap} example
|
||||||
|
|
||||||
The following example creates the `loan_classification` {dfanalytics-job}, the
|
The following example creates the `loan_classification` {dfanalytics-job}, the
|
||||||
analysis type is `classification`:
|
analysis type is `classification`:
|
||||||
|
|
||||||
[source,console]
|
[source,console]
|
||||||
|
@ -453,10 +453,10 @@ Defaults to `true`.
|
|||||||
end::delayed-data-check-config[]
|
end::delayed-data-check-config[]
|
||||||
|
|
||||||
tag::dependent-variable[]
|
tag::dependent-variable[]
|
||||||
Defines which field of the document is to be predicted.
|
Defines which field of the document is to be predicted.
|
||||||
This parameter is supplied by field name and must match one of the fields in
|
This parameter is supplied by field name and must match one of the fields in
|
||||||
the index being used to train. If this field is missing from a document, then
|
the index being used to train. If this field is missing from a document, then
|
||||||
that document will not be used for training, but a prediction with the trained
|
that document will not be used for training, but a prediction with the trained
|
||||||
model will be generated for it. It is also known as continuous target variable.
|
model will be generated for it. It is also known as continuous target variable.
|
||||||
end::dependent-variable[]
|
end::dependent-variable[]
|
||||||
|
|
||||||
@ -513,10 +513,18 @@ The value of the downsample factor.
|
|||||||
end::dfas-downsample-factor[]
|
end::dfas-downsample-factor[]
|
||||||
|
|
||||||
tag::dfas-eta-growth[]
|
tag::dfas-eta-growth[]
|
||||||
Specifies the rate at which the `eta` increases for each new tree that is added to the
|
Specifies the rate at which the `eta` increases for each new tree that is added to the
|
||||||
forest. For example, a rate of `1.05` increases `eta` by 5%.
|
forest. For example, a rate of `1.05` increases `eta` by 5%.
|
||||||
end::dfas-eta-growth[]
|
end::dfas-eta-growth[]
|
||||||
|
|
||||||
|
tag::dfas-feature-processors[]
|
||||||
|
A collection of feature preprocessors that modify one or more included fields.
|
||||||
|
The analysis uses the resulting one or more features instead of the
|
||||||
|
original document field. Multiple `feature_processors` entries can refer to the
|
||||||
|
same document fields.
|
||||||
|
Note, automatic categorical {ml-docs}/ml-feature-encoding.html[feature encoding] still occurs.
|
||||||
|
end::dfas-feature-processors[]
|
||||||
|
|
||||||
tag::dfas-iteration[]
|
tag::dfas-iteration[]
|
||||||
The number of iterations on the analysis.
|
The number of iterations on the analysis.
|
||||||
end::dfas-iteration[]
|
end::dfas-iteration[]
|
||||||
@ -529,9 +537,9 @@ training stops.
|
|||||||
end::dfas-max-attempts[]
|
end::dfas-max-attempts[]
|
||||||
|
|
||||||
tag::dfas-max-optimization-rounds[]
|
tag::dfas-max-optimization-rounds[]
|
||||||
A multiplier responsible for determining the maximum number of
|
A multiplier responsible for determining the maximum number of
|
||||||
hyperparameter optimization steps in the Bayesian optimization procedure.
|
hyperparameter optimization steps in the Bayesian optimization procedure.
|
||||||
The maximum number of steps is determined based on the number of undefined hyperparameters
|
The maximum number of steps is determined based on the number of undefined hyperparameters
|
||||||
times the maximum optimization rounds per hyperparameter.
|
times the maximum optimization rounds per hyperparameter.
|
||||||
end::dfas-max-optimization-rounds[]
|
end::dfas-max-optimization-rounds[]
|
||||||
|
|
||||||
@ -595,10 +603,10 @@ functions that are tolerant to gaps in data such as `mean`, `non_null_sum` or
|
|||||||
end::empty-bucket-count[]
|
end::empty-bucket-count[]
|
||||||
|
|
||||||
tag::eta[]
|
tag::eta[]
|
||||||
Advanced configuration option. The shrinkage applied to the weights. Smaller
|
Advanced configuration option. The shrinkage applied to the weights. Smaller
|
||||||
values result in larger forests which have a better generalization error.
|
values result in larger forests which have a better generalization error.
|
||||||
However, the smaller the value the longer the training will take. For more
|
However, the smaller the value the longer the training will take. For more
|
||||||
information, about shrinkage, see
|
information, about shrinkage, see
|
||||||
{wikipedia}/Gradient_boosting#Shrinkage[this wiki article]. By
|
{wikipedia}/Gradient_boosting#Shrinkage[this wiki article]. By
|
||||||
default, this value is calcuated during hyperparameter optimization.
|
default, this value is calcuated during hyperparameter optimization.
|
||||||
end::eta[]
|
end::eta[]
|
||||||
@ -624,13 +632,13 @@ this value to determine the number of unique categories that were missed.
|
|||||||
end::failed-category-count[]
|
end::failed-category-count[]
|
||||||
|
|
||||||
tag::feature-bag-fraction[]
|
tag::feature-bag-fraction[]
|
||||||
Advanced configuration option. Defines the fraction of features that will be
|
Advanced configuration option. Defines the fraction of features that will be
|
||||||
used when selecting a random bag for each candidate split. By default, this
|
used when selecting a random bag for each candidate split. By default, this
|
||||||
value is calculated during hyperparameter optimization.
|
value is calculated during hyperparameter optimization.
|
||||||
end::feature-bag-fraction[]
|
end::feature-bag-fraction[]
|
||||||
|
|
||||||
tag::feature-influence-threshold[]
|
tag::feature-influence-threshold[]
|
||||||
The minimum {olscore} that a document needs to have to calculate its feature
|
The minimum {olscore} that a document needs to have to calculate its feature
|
||||||
influence score. Value range: 0-1 (`0.1` by default).
|
influence score. Value range: 0-1 (`0.1` by default).
|
||||||
end::feature-influence-threshold[]
|
end::feature-influence-threshold[]
|
||||||
|
|
||||||
@ -675,10 +683,10 @@ The analysis function that is used. For example, `count`, `rare`, `mean`, `min`,
|
|||||||
end::function[]
|
end::function[]
|
||||||
|
|
||||||
tag::gamma[]
|
tag::gamma[]
|
||||||
Advanced configuration option. Regularization parameter to prevent overfitting
|
Advanced configuration option. Regularization parameter to prevent overfitting
|
||||||
on the training data set. Multiplies a linear penalty associated with the size of
|
on the training data set. Multiplies a linear penalty associated with the size of
|
||||||
individual trees in the forest. The higher the value the more training will
|
individual trees in the forest. The higher the value the more training will
|
||||||
prefer smaller trees. The smaller this parameter the larger individual trees
|
prefer smaller trees. The smaller this parameter the larger individual trees
|
||||||
will be and the longer training will take. By default, this value is calculated
|
will be and the longer training will take. By default, this value is calculated
|
||||||
during hyperparameter optimization.
|
during hyperparameter optimization.
|
||||||
end::gamma[]
|
end::gamma[]
|
||||||
@ -798,8 +806,8 @@ information for all {anomaly-jobs}.
|
|||||||
end::job-id-anomaly-detection-default[]
|
end::job-id-anomaly-detection-default[]
|
||||||
|
|
||||||
tag::job-id-anomaly-detection-define[]
|
tag::job-id-anomaly-detection-define[]
|
||||||
Identifier for the {anomaly-job}. This identifier can contain lowercase
|
Identifier for the {anomaly-job}. This identifier can contain lowercase
|
||||||
alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start
|
alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start
|
||||||
and end with alphanumeric characters.
|
and end with alphanumeric characters.
|
||||||
end::job-id-anomaly-detection-define[]
|
end::job-id-anomaly-detection-define[]
|
||||||
|
|
||||||
@ -843,12 +851,12 @@ For more information, see <<ml-jobstats>>.
|
|||||||
end::jobs-stats-anomaly-detection[]
|
end::jobs-stats-anomaly-detection[]
|
||||||
|
|
||||||
tag::lambda[]
|
tag::lambda[]
|
||||||
Advanced configuration option. Regularization parameter to prevent overfitting
|
Advanced configuration option. Regularization parameter to prevent overfitting
|
||||||
on the training data set. Multiplies an L2 regularisation term which applies to
|
on the training data set. Multiplies an L2 regularisation term which applies to
|
||||||
leaf weights of the individual trees in the forest. The higher the value the
|
leaf weights of the individual trees in the forest. The higher the value the
|
||||||
more training will attempt to keep leaf weights small. This makes the prediction
|
more training will attempt to keep leaf weights small. This makes the prediction
|
||||||
function smoother at the expense of potentially not being able to capture
|
function smoother at the expense of potentially not being able to capture
|
||||||
relevant relationships between the features and the {depvar}. The smaller this
|
relevant relationships between the features and the {depvar}. The smaller this
|
||||||
parameter the larger individual trees will be and the longer training will take.
|
parameter the larger individual trees will be and the longer training will take.
|
||||||
By default, this value is calculated during hyperparameter optimization.
|
By default, this value is calculated during hyperparameter optimization.
|
||||||
end::lambda[]
|
end::lambda[]
|
||||||
@ -1098,8 +1106,8 @@ For open jobs only, the elapsed time for which the job has been open.
|
|||||||
end::open-time[]
|
end::open-time[]
|
||||||
|
|
||||||
tag::outlier-fraction[]
|
tag::outlier-fraction[]
|
||||||
The proportion of the data set that is assumed to be outlying prior to
|
The proportion of the data set that is assumed to be outlying prior to
|
||||||
{oldetection}. For example, 0.05 means it is assumed that 5% of values are real
|
{oldetection}. For example, 0.05 means it is assumed that 5% of values are real
|
||||||
outliers and 95% are inliers.
|
outliers and 95% are inliers.
|
||||||
end::outlier-fraction[]
|
end::outlier-fraction[]
|
||||||
|
|
||||||
@ -1185,7 +1193,7 @@ tag::randomize-seed[]
|
|||||||
Defines the seed to the random generator that is used to pick
|
Defines the seed to the random generator that is used to pick
|
||||||
which documents will be used for training. By default it is randomly generated.
|
which documents will be used for training. By default it is randomly generated.
|
||||||
Set it to a specific value to ensure the same documents are used for training
|
Set it to a specific value to ensure the same documents are used for training
|
||||||
assuming other related parameters (e.g. `source`, `analyzed_fields`, etc.) are
|
assuming other related parameters (e.g. `source`, `analyzed_fields`, etc.) are
|
||||||
the same.
|
the same.
|
||||||
end::randomize-seed[]
|
end::randomize-seed[]
|
||||||
|
|
||||||
@ -1264,8 +1272,8 @@ end::sparse-bucket-count[]
|
|||||||
|
|
||||||
tag::standardization-enabled[]
|
tag::standardization-enabled[]
|
||||||
If `true`, the following operation is performed on the columns before computing
|
If `true`, the following operation is performed on the columns before computing
|
||||||
{olscores}: (x_i - mean(x_i)) / sd(x_i). Defaults to `true`. For
|
{olscores}: (x_i - mean(x_i)) / sd(x_i). Defaults to `true`. For
|
||||||
more information about this concept, see
|
more information about this concept, see
|
||||||
https://en.wikipedia.org/wiki/Feature_scaling#Standardization_(Z-score_Normalization)[Wikipedia].
|
https://en.wikipedia.org/wiki/Feature_scaling#Standardization_(Z-score_Normalization)[Wikipedia].
|
||||||
end::standardization-enabled[]
|
end::standardization-enabled[]
|
||||||
|
|
||||||
@ -1340,12 +1348,12 @@ when the mode is set to `manual`. For example: `3h`.
|
|||||||
end::time-span[]
|
end::time-span[]
|
||||||
|
|
||||||
tag::timeout-start[]
|
tag::timeout-start[]
|
||||||
Controls the amount of time to wait until the {dfanalytics-job} starts. Defaults
|
Controls the amount of time to wait until the {dfanalytics-job} starts. Defaults
|
||||||
to 20 seconds.
|
to 20 seconds.
|
||||||
end::timeout-start[]
|
end::timeout-start[]
|
||||||
|
|
||||||
tag::timeout-stop[]
|
tag::timeout-stop[]
|
||||||
Controls the amount of time to wait until the {dfanalytics-job} stops. Defaults
|
Controls the amount of time to wait until the {dfanalytics-job} stops. Defaults
|
||||||
to 20 seconds.
|
to 20 seconds.
|
||||||
end::timeout-stop[]
|
end::timeout-stop[]
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user