Delete custom index if the only contained job is deleted (#33788)

* Delete custom index if the only contained job is deleted
This commit is contained in:
Benjamin Trent 2018-09-19 07:42:26 -07:00 committed by GitHub
parent c118581617
commit 4190a9f1e9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 145 additions and 42 deletions

View File

@ -11,8 +11,12 @@ import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest; import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesResponse; import org.elasticsearch.action.admin.indices.alias.get.GetAliasesResponse;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchAction;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
@ -23,10 +27,12 @@ import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.query.ConstantScoreQueryBuilder; import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
import org.elasticsearch.index.query.IdsQueryBuilder; import org.elasticsearch.index.query.IdsQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.reindex.BulkByScrollResponse; import org.elasticsearch.index.reindex.BulkByScrollResponse;
import org.elasticsearch.index.reindex.DeleteByQueryAction; import org.elasticsearch.index.reindex.DeleteByQueryAction;
import org.elasticsearch.index.reindex.DeleteByQueryRequest; import org.elasticsearch.index.reindex.DeleteByQueryRequest;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.Task;
import org.elasticsearch.tasks.TaskId; import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.xpack.core.ml.action.GetModelSnapshotsAction; import org.elasticsearch.xpack.core.ml.action.GetModelSnapshotsAction;
@ -69,43 +75,103 @@ public class JobStorageDeletionTask extends Task {
final String indexName = AnomalyDetectorsIndex.getPhysicalIndexFromState(state, jobId); final String indexName = AnomalyDetectorsIndex.getPhysicalIndexFromState(state, jobId);
final String indexPattern = indexName + "-*"; final String indexPattern = indexName + "-*";
ActionListener<Boolean> deleteAliasHandler = ActionListener.wrap(finishedHandler, failureHandler); final ActionListener<AcknowledgedResponse> completionHandler = ActionListener.wrap(
response -> finishedHandler.accept(response.isAcknowledged()),
failureHandler);
// Step 5. DBQ state done, delete the aliases // Step 7. If we did not drop the index and after DBQ state done, we delete the aliases
ActionListener<BulkByScrollResponse> dbqHandler = ActionListener.wrap( ActionListener<BulkByScrollResponse> dbqHandler = ActionListener.wrap(
bulkByScrollResponse -> { bulkByScrollResponse -> {
if (bulkByScrollResponse.isTimedOut()) { if (bulkByScrollResponse == null) { // no action was taken by DBQ, assume Index was deleted
logger.warn("[{}] DeleteByQuery for indices [{}, {}] timed out.", jobId, indexName, indexPattern); completionHandler.onResponse(new AcknowledgedResponse(true));
} } else {
if (!bulkByScrollResponse.getBulkFailures().isEmpty()) { if (bulkByScrollResponse.isTimedOut()) {
logger.warn("[{}] {} failures and {} conflicts encountered while running DeleteByQuery on indices [{}, {}].", logger.warn("[{}] DeleteByQuery for indices [{}, {}] timed out.", jobId, indexName, indexPattern);
}
if (!bulkByScrollResponse.getBulkFailures().isEmpty()) {
logger.warn("[{}] {} failures and {} conflicts encountered while running DeleteByQuery on indices [{}, {}].",
jobId, bulkByScrollResponse.getBulkFailures().size(), bulkByScrollResponse.getVersionConflicts(), jobId, bulkByScrollResponse.getBulkFailures().size(), bulkByScrollResponse.getVersionConflicts(),
indexName, indexPattern); indexName, indexPattern);
for (BulkItemResponse.Failure failure : bulkByScrollResponse.getBulkFailures()) { for (BulkItemResponse.Failure failure : bulkByScrollResponse.getBulkFailures()) {
logger.warn("DBQ failure: " + failure); logger.warn("DBQ failure: " + failure);
}
} }
deleteAliases(jobId, client, completionHandler);
} }
deleteAliases(jobId, client, deleteAliasHandler);
}, },
failureHandler); failureHandler);
// Step 4. Delete categorizer state done, DeleteByQuery on the index, matching all docs with the right job_id // Step 6. If we did not delete the index, we run a delete by query
ActionListener<Boolean> deleteCategorizerStateHandler = ActionListener.wrap( ActionListener<Boolean> deleteByQueryExecutor = ActionListener.wrap(
response -> { response -> {
logger.info("Running DBQ on [" + indexName + "," + indexPattern + "] for job [" + jobId + "]"); if (response) {
DeleteByQueryRequest request = new DeleteByQueryRequest(indexName, indexPattern); logger.info("Running DBQ on [" + indexName + "," + indexPattern + "] for job [" + jobId + "]");
ConstantScoreQueryBuilder query = DeleteByQueryRequest request = new DeleteByQueryRequest(indexName, indexPattern);
ConstantScoreQueryBuilder query =
new ConstantScoreQueryBuilder(new TermQueryBuilder(Job.ID.getPreferredName(), jobId)); new ConstantScoreQueryBuilder(new TermQueryBuilder(Job.ID.getPreferredName(), jobId));
request.setQuery(query); request.setQuery(query);
request.setIndicesOptions(MlIndicesUtils.addIgnoreUnavailable(IndicesOptions.lenientExpandOpen())); request.setIndicesOptions(MlIndicesUtils.addIgnoreUnavailable(IndicesOptions.lenientExpandOpen()));
request.setSlices(5); request.setSlices(5);
request.setAbortOnVersionConflict(false); request.setAbortOnVersionConflict(false);
request.setRefresh(true); request.setRefresh(true);
executeAsyncWithOrigin(client, ML_ORIGIN, DeleteByQueryAction.INSTANCE, request, dbqHandler); executeAsyncWithOrigin(client, ML_ORIGIN, DeleteByQueryAction.INSTANCE, request, dbqHandler);
} else { // We did not execute DBQ, no need to delete aliases or check the response
dbqHandler.onResponse(null);
}
}, },
failureHandler); failureHandler);
// Step 5. If we have any hits, that means we are NOT the only job on this index, and should not delete it
// if we do not have any hits, we can drop the index and then skip the DBQ and alias deletion
ActionListener<SearchResponse> customIndexSearchHandler = ActionListener.wrap(
searchResponse -> {
if (searchResponse == null || searchResponse.getHits().totalHits > 0) {
deleteByQueryExecutor.onResponse(true); // We need to run DBQ and alias deletion
} else {
logger.info("Running DELETE Index on [" + indexName + "] for job [" + jobId + "]");
DeleteIndexRequest request = new DeleteIndexRequest(indexName);
request.indicesOptions(IndicesOptions.lenientExpandOpen());
// If we have deleted the index, then we don't need to delete the aliases or run the DBQ
executeAsyncWithOrigin(
client.threadPool().getThreadContext(),
ML_ORIGIN,
request,
ActionListener.<AcknowledgedResponse>wrap(
response -> deleteByQueryExecutor.onResponse(false), // skip DBQ && Alias
failureHandler),
client.admin().indices()::delete);
}
},
failure -> {
if (failure.getClass() == IndexNotFoundException.class) { // assume the index is already deleted
deleteByQueryExecutor.onResponse(false); // skip DBQ && Alias
} else {
failureHandler.accept(failure);
}
}
);
// Step 4. Determine if we are on a shared index by looking at `.ml-anomalies-shared` or the custom index's aliases
ActionListener<Boolean> deleteCategorizerStateHandler = ActionListener.wrap(
response -> {
if (indexName.equals(AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX +
AnomalyDetectorsIndexFields.RESULTS_INDEX_DEFAULT)) {
customIndexSearchHandler.onResponse(null); //don't bother searching the index any further, we are on the default shared
} else {
SearchSourceBuilder source = new SearchSourceBuilder()
.size(1)
.query(QueryBuilders.boolQuery().filter(
QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(Job.ID.getPreferredName(), jobId))));
SearchRequest searchRequest = new SearchRequest(indexName);
searchRequest.source(source);
executeAsyncWithOrigin(client, ML_ORIGIN, SearchAction.INSTANCE, searchRequest, customIndexSearchHandler);
}
},
failureHandler
);
// Step 3. Delete quantiles done, delete the categorizer state // Step 3. Delete quantiles done, delete the categorizer state
ActionListener<Boolean> deleteQuantilesHandler = ActionListener.wrap( ActionListener<Boolean> deleteQuantilesHandler = ActionListener.wrap(
response -> deleteCategorizerState(jobId, client, 1, deleteCategorizerStateHandler), response -> deleteCategorizerState(jobId, client, 1, deleteCategorizerStateHandler),
@ -189,7 +255,7 @@ public class JobStorageDeletionTask extends Task {
})); }));
} }
private void deleteAliases(String jobId, Client client, ActionListener<Boolean> finishedHandler) { private void deleteAliases(String jobId, Client client, ActionListener<AcknowledgedResponse> finishedHandler) {
final String readAliasName = AnomalyDetectorsIndex.jobResultsAliasedName(jobId); final String readAliasName = AnomalyDetectorsIndex.jobResultsAliasedName(jobId);
final String writeAliasName = AnomalyDetectorsIndex.resultsWriteAlias(jobId); final String writeAliasName = AnomalyDetectorsIndex.resultsWriteAlias(jobId);
@ -204,12 +270,13 @@ public class JobStorageDeletionTask extends Task {
if (removeRequest == null) { if (removeRequest == null) {
// don't error if the job's aliases have already been deleted - carry on and delete the // don't error if the job's aliases have already been deleted - carry on and delete the
// rest of the job's data // rest of the job's data
finishedHandler.onResponse(true); finishedHandler.onResponse(new AcknowledgedResponse(true));
return; return;
} }
executeAsyncWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN, removeRequest, executeAsyncWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN, removeRequest,
ActionListener.<AcknowledgedResponse>wrap(removeResponse -> finishedHandler.onResponse(true), ActionListener.<AcknowledgedResponse>wrap(
finishedHandler::onFailure), finishedHandler::onResponse,
finishedHandler::onFailure),
client.admin().indices()::aliases); client.admin().indices()::aliases);
}, },
finishedHandler::onFailure), client.admin().indices()::getAliases); finishedHandler::onFailure), client.admin().indices()::getAliases);

View File

@ -192,33 +192,60 @@ public class MlJobIT extends ESRestTestCase {
assertThat(responseAsString, not(containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId1)))); assertThat(responseAsString, not(containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId1))));
assertThat(responseAsString, not(containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId2)))); assertThat(responseAsString, not(containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId2))));
String id = String.format(Locale.ROOT, "%s_bucket_%s_%s", jobId1, "1234", 300); { //create jobId1 docs
Request createResultRequest = new Request("PUT", AnomalyDetectorsIndex.jobResultsAliasedName(jobId1) + "/doc/" + id); String id = String.format(Locale.ROOT, "%s_bucket_%s_%s", jobId1, "1234", 300);
createResultRequest.setJsonEntity(String.format(Locale.ROOT, Request createResultRequest = new Request("PUT", AnomalyDetectorsIndex.jobResultsAliasedName(jobId1) + "/doc/" + id);
createResultRequest.setJsonEntity(String.format(Locale.ROOT,
"{\"job_id\":\"%s\", \"timestamp\": \"%s\", \"result_type\":\"bucket\", \"bucket_span\": \"%s\"}", "{\"job_id\":\"%s\", \"timestamp\": \"%s\", \"result_type\":\"bucket\", \"bucket_span\": \"%s\"}",
jobId1, "1234", 1)); jobId1, "1234", 1));
client().performRequest(createResultRequest); client().performRequest(createResultRequest);
id = String.format(Locale.ROOT, "%s_bucket_%s_%s", jobId1, "1236", 300); id = String.format(Locale.ROOT, "%s_bucket_%s_%s", jobId1, "1236", 300);
createResultRequest = new Request("PUT", AnomalyDetectorsIndex.jobResultsAliasedName(jobId1) + "/doc/" + id); createResultRequest = new Request("PUT", AnomalyDetectorsIndex.jobResultsAliasedName(jobId1) + "/doc/" + id);
createResultRequest.setJsonEntity(String.format(Locale.ROOT, createResultRequest.setJsonEntity(String.format(Locale.ROOT,
"{\"job_id\":\"%s\", \"timestamp\": \"%s\", \"result_type\":\"bucket\", \"bucket_span\": \"%s\"}", "{\"job_id\":\"%s\", \"timestamp\": \"%s\", \"result_type\":\"bucket\", \"bucket_span\": \"%s\"}",
jobId1, "1236", 1)); jobId1, "1236", 1));
client().performRequest(createResultRequest); client().performRequest(createResultRequest);
client().performRequest(new Request("POST", "/_refresh")); client().performRequest(new Request("POST", "/_refresh"));
responseAsString = EntityUtils.toString(client().performRequest( responseAsString = EntityUtils.toString(client().performRequest(
new Request("GET", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId1 + "/results/buckets")).getEntity()); new Request("GET", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId1 + "/results/buckets")).getEntity());
assertThat(responseAsString, containsString("\"count\":2")); assertThat(responseAsString, containsString("\"count\":2"));
responseAsString = EntityUtils.toString(client().performRequest( responseAsString = EntityUtils.toString(client().performRequest(
new Request("GET", AnomalyDetectorsIndex.jobResultsAliasedName(jobId1) + "/_search")).getEntity()); new Request("GET", AnomalyDetectorsIndex.jobResultsAliasedName(jobId1) + "/_search")).getEntity());
assertThat(responseAsString, containsString("\"total\":2")); assertThat(responseAsString, containsString("\"total\":2"));
}
{ //create jobId2 docs
String id = String.format(Locale.ROOT, "%s_bucket_%s_%s", jobId2, "1234", 300);
Request createResultRequest = new Request("PUT", AnomalyDetectorsIndex.jobResultsAliasedName(jobId2) + "/doc/" + id);
createResultRequest.setJsonEntity(String.format(Locale.ROOT,
"{\"job_id\":\"%s\", \"timestamp\": \"%s\", \"result_type\":\"bucket\", \"bucket_span\": \"%s\"}",
jobId2, "1234", 1));
client().performRequest(createResultRequest);
id = String.format(Locale.ROOT, "%s_bucket_%s_%s", jobId2, "1236", 300);
createResultRequest = new Request("PUT", AnomalyDetectorsIndex.jobResultsAliasedName(jobId2) + "/doc/" + id);
createResultRequest.setJsonEntity(String.format(Locale.ROOT,
"{\"job_id\":\"%s\", \"timestamp\": \"%s\", \"result_type\":\"bucket\", \"bucket_span\": \"%s\"}",
jobId2, "1236", 1));
client().performRequest(createResultRequest);
client().performRequest(new Request("POST", "/_refresh"));
responseAsString = EntityUtils.toString(client().performRequest(
new Request("GET", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId2 + "/results/buckets")).getEntity());
assertThat(responseAsString, containsString("\"count\":2"));
responseAsString = EntityUtils.toString(client().performRequest(
new Request("GET", AnomalyDetectorsIndex.jobResultsAliasedName(jobId2) + "/_search")).getEntity());
assertThat(responseAsString, containsString("\"total\":2"));
}
client().performRequest(new Request("DELETE", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId1)); client().performRequest(new Request("DELETE", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId1));
// check that indices still exist, but are empty and aliases are gone // check that indices still exist, but no longer have job1 entries and aliases are gone
responseAsString = EntityUtils.toString(client().performRequest(new Request("GET", "/_aliases")).getEntity()); responseAsString = EntityUtils.toString(client().performRequest(new Request("GET", "/_aliases")).getEntity());
assertThat(responseAsString, not(containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId1)))); assertThat(responseAsString, not(containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId1))));
assertThat(responseAsString, containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId2))); //job2 still exists assertThat(responseAsString, containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId2))); //job2 still exists
@ -230,7 +257,16 @@ public class MlJobIT extends ESRestTestCase {
responseAsString = EntityUtils.toString(client().performRequest( responseAsString = EntityUtils.toString(client().performRequest(
new Request("GET", AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX + "custom-" + indexName + "/_count")).getEntity()); new Request("GET", AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX + "custom-" + indexName + "/_count")).getEntity());
assertThat(responseAsString, containsString("\"count\":0")); assertThat(responseAsString, containsString("\"count\":2"));
// Delete the second job and verify aliases are gone, and original concrete/custom index is gone
client().performRequest(new Request("DELETE", MachineLearning.BASE_PATH + "anomaly_detectors/" + jobId2));
responseAsString = EntityUtils.toString(client().performRequest(new Request("GET", "/_aliases")).getEntity());
assertThat(responseAsString, not(containsString(AnomalyDetectorsIndex.jobResultsAliasedName(jobId2))));
client().performRequest(new Request("POST", "/_refresh"));
responseAsString = EntityUtils.toString(client().performRequest(new Request("GET", "/_cat/indices")).getEntity());
assertThat(responseAsString, not(containsString(AnomalyDetectorsIndexFields.RESULTS_INDEX_PREFIX + "custom-" + indexName)));
} }
public void testCreateJobInSharedIndexUpdatesMapping() throws Exception { public void testCreateJobInSharedIndexUpdatesMapping() throws Exception {