Reduce logging noise when stepping down as master before state recovery (#39950)

Reduces the logging noise from the state recovery component when there are duelling elections.

Relates to #32006
This commit is contained in:
Yannick Welsch 2019-03-15 17:14:00 +01:00
parent 4eb3683d65
commit c74111ff8e
1 changed files with 25 additions and 13 deletions

View File

@ -206,10 +206,19 @@ public class GatewayService extends AbstractLifecycleComponent implements Cluste
if (enforceRecoverAfterTime && recoverAfterTime != null) {
if (scheduledRecovery.compareAndSet(false, true)) {
logger.info("delaying initial state recovery for [{}]. {}", recoverAfterTime, reason);
threadPool.schedule(() -> {
if (recovered.compareAndSet(false, true)) {
logger.info("recover_after_time [{}] elapsed. performing state recovery...", recoverAfterTime);
recoveryRunnable.run();
threadPool.schedule(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.warn("delayed state recovery failed", e);
resetRecoveredFlags();
}
@Override
protected void doRun() {
if (recovered.compareAndSet(false, true)) {
logger.info("recover_after_time [{}] elapsed. performing state recovery...", recoverAfterTime);
recoveryRunnable.run();
}
}
}, recoverAfterTime, ThreadPool.Names.GENERIC);
}
@ -218,10 +227,8 @@ public class GatewayService extends AbstractLifecycleComponent implements Cluste
threadPool.generic().execute(new AbstractRunnable() {
@Override
public void onFailure(final Exception e) {
logger.warn("Recovery failed", e);
// we reset `recovered` in the listener don't reset it here otherwise there might be a race
// that resets it to false while a new recover is already running?
GatewayService.this.onFailure("state recovery failed: " + e.getMessage());
logger.warn("state recovery failed", e);
resetRecoveredFlags();
}
@Override
@ -233,11 +240,9 @@ public class GatewayService extends AbstractLifecycleComponent implements Cluste
}
}
private void onFailure(final String message) {
private void resetRecoveredFlags() {
recovered.set(false);
scheduledRecovery.set(false);
// don't remove the block here, we don't want to allow anything in such a case
logger.info("metadata state not restored, reason: {}", message);
}
class RecoverStateUpdateTask extends ClusterStateUpdateTask {
@ -257,10 +262,16 @@ public class GatewayService extends AbstractLifecycleComponent implements Cluste
logger.info("recovered [{}] indices into cluster_state", newState.metaData().indices().size());
}
@Override
public void onNoLongerMaster(String source) {
logger.debug("stepped down as master before recovering state [{}]", source);
resetRecoveredFlags();
}
@Override
public void onFailure(final String source, final Exception e) {
logger.info(() -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
GatewayService.this.onFailure("failed to update cluster state");
resetRecoveredFlags();
}
}
@ -280,7 +291,8 @@ public class GatewayService extends AbstractLifecycleComponent implements Cluste
@Override
public void onFailure(final String msg) {
GatewayService.this.onFailure(msg);
logger.info("state recovery failed: {}", msg);
resetRecoveredFlags();
}
}