[ML] allow datafeeds to run if there are any concrete indices (#62827) (#62965)

This commit allows a datafeed to be assigned to a node if only one index pattern has concrete indices.
This commit is contained in:
Benjamin Trent 2020-09-28 12:58:07 -04:00 committed by GitHub
parent 2247ab3295
commit a054e62bc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 33 deletions

View File

@ -33,7 +33,6 @@ import org.junit.After;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -80,10 +79,9 @@ public class DatafeedJobsIT extends MlNativeAutodetectIntegTestCase {
openJob(job.getId());
assertBusy(() -> assertEquals(getJobStats(job.getId()).get(0).getState(), JobState.OPENED));
List<String> t = new ArrayList<>(2);
t.add("data-1");
t.add("data-2");
DatafeedConfig datafeedConfig = createDatafeed(job.getId() + "-datafeed", job.getId(), t);
// Having a pattern with missing indices is acceptable
List<String> indices = Arrays.asList("data-*", "missing-*");
DatafeedConfig datafeedConfig = createDatafeed(job.getId() + "-datafeed", job.getId(), indices);
registerDatafeed(datafeedConfig);
putDatafeed(datafeedConfig);

View File

@ -14,6 +14,7 @@ import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.license.RemoteClusterLicenseChecker;
import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
import org.elasticsearch.rest.RestStatus;
@ -120,24 +121,22 @@ public class DatafeedNodeSelector {
@Nullable
private AssignmentFailure verifyIndicesActive() {
for (String index : datafeedIndices) {
if (RemoteClusterLicenseChecker.isRemoteIndex(index)) {
String[] index = datafeedIndices.stream()
// We cannot verify remote indices
continue;
}
.filter(i -> RemoteClusterLicenseChecker.isRemoteIndex(i) == false)
.toArray(String[]::new);
String[] concreteIndices;
final String[] concreteIndices;
try {
concreteIndices = resolver.concreteIndexNames(clusterState, indicesOptions, true, index);
if (concreteIndices.length == 0) {
return new AssignmentFailure("cannot start datafeed [" + datafeedId + "] because index ["
+ index + "] does not exist, is closed, or is still initializing.", true);
+ Strings.arrayToCommaDelimitedString(index) + "] does not exist, is closed, or is still initializing.", true);
}
} catch (Exception e) {
String msg = new ParameterizedMessage("failed resolving indices given [{}] and indices_options [{}]",
index,
Strings.arrayToCommaDelimitedString(index),
indicesOptions).getFormattedMessage();
LOGGER.debug("[" + datafeedId + "] " + msg, e);
return new AssignmentFailure(
@ -152,7 +151,6 @@ public class DatafeedNodeSelector {
+ concreteIndex + "] does not have all primary shards active yet.", false);
}
}
}
return null;
}

View File

@ -40,6 +40,7 @@ import org.junit.Before;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
@ -319,6 +320,31 @@ public class DatafeedNodeSelectorTests extends ESTestCase {
"]] with exception [no such index [not_foo]]]"));
}
public void testIndexPatternDoesntExist() {
Job job = createScheduledJob("job_id").build(new Date());
DatafeedConfig df = createDatafeed("datafeed_id", job.getId(), Arrays.asList("missing-*", "foo*"));
PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder();
addJobTask(job.getId(), "node_id", JobState.OPENED, tasksBuilder);
tasks = tasksBuilder.build();
givenClusterState("foo", 1, 0);
PersistentTasksCustomMetadata.Assignment result = new DatafeedNodeSelector(clusterState,
resolver,
df.getId(),
df.getJobId(),
df.getIndices(),
SearchRequest.DEFAULT_INDICES_OPTIONS).selectNode();
assertEquals("node_id", result.getExecutorNode());
new DatafeedNodeSelector(clusterState,
resolver,
df.getId(),
df.getJobId(),
df.getIndices(),
SearchRequest.DEFAULT_INDICES_OPTIONS).checkDatafeedTaskCanBeCreated();
}
public void testRemoteIndex() {
Job job = createScheduledJob("job_id").build(new Date());
DatafeedConfig df = createDatafeed("datafeed_id", job.getId(), Collections.singletonList("remote:foo"));