mirror of https://github.com/apache/druid.git
autoscaling will clear state if a node takes too long to create
This commit is contained in:
parent
ac2d4e52da
commit
86c7ebe1e2
|
@ -176,6 +176,9 @@ public class IndexerCoordinatorResource
|
|||
if (!configManager.set(WorkerSetupData.CONFIG_KEY, workerSetupData)) {
|
||||
return Response.status(Response.Status.BAD_REQUEST).build();
|
||||
}
|
||||
|
||||
log.info("Updating Worker Setup configs: %s", workerSetupData);
|
||||
|
||||
return Response.ok().build();
|
||||
}
|
||||
|
||||
|
|
|
@ -103,18 +103,21 @@ public class SimpleResourceManagementStrategy implements ResourceManagementStrat
|
|||
}
|
||||
} else {
|
||||
Duration durSinceLastProvision = new Duration(lastProvisionTime, new DateTime());
|
||||
if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration())) {
|
||||
log.makeAlert("Worker node provisioning taking too long")
|
||||
.addData("millisSinceLastProvision", durSinceLastProvision.getMillis())
|
||||
.addData("provisioningCount", currentlyProvisioning.size())
|
||||
.emit();
|
||||
}
|
||||
|
||||
log.info(
|
||||
"%s still provisioning. Wait for all provisioned nodes to complete before requesting new worker. Current wait time: %s",
|
||||
currentlyProvisioning,
|
||||
durSinceLastProvision
|
||||
);
|
||||
|
||||
if (durSinceLastProvision.isLongerThan(config.getMaxScalingDuration())) {
|
||||
log.makeAlert("Worker node provisioning taking too long!")
|
||||
.addData("millisSinceLastProvision", durSinceLastProvision.getMillis())
|
||||
.addData("provisioningCount", currentlyProvisioning.size())
|
||||
.emit();
|
||||
|
||||
currentlyProvisioning.clear();
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -205,17 +208,20 @@ public class SimpleResourceManagementStrategy implements ResourceManagementStrat
|
|||
}
|
||||
} else {
|
||||
Duration durSinceLastTerminate = new Duration(lastTerminateTime, new DateTime());
|
||||
if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration())) {
|
||||
log.makeAlert("Worker node termination taking too long")
|
||||
.addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis())
|
||||
.addData("terminatingCount", currentlyTerminating.size())
|
||||
.emit();
|
||||
}
|
||||
|
||||
log.info(
|
||||
"%s still terminating. Wait for all nodes to terminate before trying again.",
|
||||
currentlyTerminating
|
||||
);
|
||||
|
||||
if (durSinceLastTerminate.isLongerThan(config.getMaxScalingDuration())) {
|
||||
log.makeAlert("Worker node termination taking too long!")
|
||||
.addData("millisSinceLastTerminate", durSinceLastTerminate.getMillis())
|
||||
.addData("terminatingCount", currentlyTerminating.size())
|
||||
.emit();
|
||||
|
||||
currentlyTerminating.clear();
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -91,4 +91,17 @@ public class EC2NodeData
|
|||
{
|
||||
return keyName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "EC2NodeData{" +
|
||||
"amiId='" + amiId + '\'' +
|
||||
", instanceType='" + instanceType + '\'' +
|
||||
", minInstances=" + minInstances +
|
||||
", maxInstances=" + maxInstances +
|
||||
", securityGroupIds=" + securityGroupIds +
|
||||
", keyName='" + keyName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,4 +60,14 @@ public class GalaxyUserData
|
|||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "GalaxyUserData{" +
|
||||
"env='" + env + '\'' +
|
||||
", version='" + version + '\'' +
|
||||
", type='" + type + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,4 +75,15 @@ public class WorkerSetupData
|
|||
{
|
||||
return userData;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "WorkerSetupData{" +
|
||||
"minVersion='" + minVersion + '\'' +
|
||||
", minNumWorkers=" + minNumWorkers +
|
||||
", nodeData=" + nodeData +
|
||||
", userData=" + userData +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue