[Security] Reset IndexAuditTrail to INITIALISED before start (elastic/x-pack-elasticsearch#3807)

Calling start() when already in the STARTING state doesn't do anything, so the component
gets stuck in STARTING state forever.

Also: wait on the required index name not just the cluster.

Also: added more logging to help diagnose such issues (either in RemoteIndexAuditTrailStartingTests or production)

Original commit: elastic/x-pack-elasticsearch@fb81214fe7
This commit is contained in:
Tim Vernum 2018-02-07 12:30:23 +11:00 committed by GitHub
parent 092fcd3675
commit 7bb2cba14f
2 changed files with 17 additions and 9 deletions

View File

@ -45,19 +45,17 @@ import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportMessage;
import org.elasticsearch.xpack.core.XPackClientPlugin;
import org.elasticsearch.xpack.core.XPackPlugin;
import org.elasticsearch.xpack.core.security.authc.AuthenticationToken;
import org.elasticsearch.xpack.core.security.index.IndexAuditTrailField;
import org.elasticsearch.xpack.core.security.user.SystemUser;
import org.elasticsearch.xpack.core.security.user.User;
import org.elasticsearch.xpack.core.security.user.XPackUser;
import org.elasticsearch.xpack.security.Security;
import org.elasticsearch.xpack.core.template.TemplateUtils;
import org.elasticsearch.xpack.security.audit.AuditLevel;
import org.elasticsearch.xpack.security.audit.AuditTrail;
import org.elasticsearch.xpack.security.rest.RemoteHostHeader;
import org.elasticsearch.xpack.security.support.IndexLifecycleManager;
import org.elasticsearch.xpack.security.transport.filter.SecurityIpFilterRule;
import org.elasticsearch.xpack.core.template.TemplateUtils;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
@ -275,6 +273,8 @@ public class IndexAuditTrail extends AbstractComponent implements AuditTrail {
client.admin().cluster().prepareState().execute(new ActionListener<ClusterStateResponse>() {
@Override
public void onResponse(ClusterStateResponse clusterStateResponse) {
logger.trace("remote cluster state is [{}] [{}]",
clusterStateResponse.getClusterName(), clusterStateResponse.getState());
if (canStart(clusterStateResponse.getState())) {
innerStart();
} else if (TemplateUtils.checkTemplateExistsAndVersionMatches(INDEX_TEMPLATE_NAME,
@ -291,10 +291,15 @@ public class IndexAuditTrail extends AbstractComponent implements AuditTrail {
// state recovery etc.
String indexName = getIndexName();
// if this index doesn't exists the call will fail with a not_found exception...
client.admin().cluster().prepareHealth().setIndices().setWaitForYellowStatus().execute(ActionListener.wrap(
(x) -> start(),
client.admin().cluster().prepareHealth().setIndices(indexName).setWaitForYellowStatus().execute(
ActionListener.wrap(
(x) -> {
logger.debug("have yellow status on remote index [{}] ", indexName);
transitionStartingToInitialized();
start();
},
(e) -> {
logger.error("failed to get wait for yellow status on index [" + indexName + "]", e);
logger.error("failed to get wait for yellow status on remote index [" + indexName + "]", e);
transitionStartingToInitialized();
}));
}
@ -327,6 +332,8 @@ public class IndexAuditTrail extends AbstractComponent implements AuditTrail {
final String message = "state transition from starting to start ed failed, current value: " + state.get();
assert false : message;
logger.error(message);
} else {
logger.trace("successful state transition from starting to started, current value: [{}]", state.get());
}
}

View File

@ -146,6 +146,7 @@ public class RemoteIndexAuditTrailStartingTests extends SecurityIntegTestCase {
}
public void testThatRemoteAuditInstancesAreStarted() throws Exception {
logger.info("Test configuration: ssl=[{}] localAudit=[{}][{}]", sslEnabled, localAudit, outputs);
// we ensure that all instances present are started otherwise we will have issues
// and race with the shutdown logic
for (InternalTestCluster cluster : Arrays.asList(remoteCluster, internalCluster())) {