YARN-6185. Apply SLIDER-1199 to yarn native services for blacklisting nodes. Contributed by Billie Rinaldi

This commit is contained in:
Gour Saha 2017-02-21 08:52:47 -08:00 committed by Jian He
parent 8967a1b812
commit 500695d726
10 changed files with 155 additions and 8 deletions

View File

@ -410,6 +410,12 @@ public abstract class AbstractProviderService
return 0; return 0;
} }
@Override
public void updateBlacklist(List<String> blacklistAdditions,
List<String> blacklistRemovals) {
// no-op
}
@Override @Override
public void execute(List<AbstractRMOperation> operations) { public void execute(List<AbstractRMOperation> operations) {
for (AbstractRMOperation operation : operations) { for (AbstractRMOperation operation : operations) {

View File

@ -1755,7 +1755,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
*/ */
private void scheduleFailureWindowResets(ConfTree resources) throws private void scheduleFailureWindowResets(ConfTree resources) throws
BadConfigException { BadConfigException {
ResetFailureWindow reset = new ResetFailureWindow(); ResetFailureWindow reset = new ResetFailureWindow(rmOperationHandler);
ConfTreeOperations ops = new ConfTreeOperations(resources); ConfTreeOperations ops = new ConfTreeOperations(resources);
MapOperations globals = ops.getGlobalOptions(); MapOperations globals = ops.getGlobalOptions();
long seconds = globals.getTimeRange(ResourceKeys.CONTAINER_FAILURE_WINDOW, long seconds = globals.getTimeRange(ResourceKeys.CONTAINER_FAILURE_WINDOW,
@ -1988,6 +1988,12 @@ public class SliderAppMaster extends AbstractSliderLaunchedService
rmOperationHandler.cancelSingleRequest(request); rmOperationHandler.cancelSingleRequest(request);
} }
@Override
public void updateBlacklist(List<String> blacklistAdditions,
List<String> blacklistRemovals) {
rmOperationHandler.updateBlacklist(blacklistAdditions, blacklistRemovals);
}
/* =================================================================== */ /* =================================================================== */
/* END */ /* END */
/* =================================================================== */ /* =================================================================== */

View File

@ -19,21 +19,31 @@
package org.apache.slider.server.appmaster.actions; package org.apache.slider.server.appmaster.actions;
import org.apache.slider.server.appmaster.SliderAppMaster; import org.apache.slider.server.appmaster.SliderAppMaster;
import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
import org.apache.slider.server.appmaster.operations.RMOperationHandlerActions;
import org.apache.slider.server.appmaster.state.AppState; import org.apache.slider.server.appmaster.state.AppState;
/** /**
* Requests the AM to reset the failure window * Requests the AM to reset the failure window
*/ */
public class ResetFailureWindow extends AsyncAction { public class ResetFailureWindow extends AsyncAction {
private final RMOperationHandlerActions operationHandler;
public ResetFailureWindow() { public ResetFailureWindow(RMOperationHandlerActions operationHandler) {
super("ResetFailureWindow"); super("ResetFailureWindow");
this.operationHandler = operationHandler;
} }
@Override @Override
public void execute(SliderAppMaster appMaster, public void execute(SliderAppMaster appMaster,
QueueAccess queueService, QueueAccess queueService,
AppState appState) throws Exception { AppState appState) throws Exception {
synchronized (appMaster) {
appState.resetFailureCounts(); appState.resetFailureCounts();
AbstractRMOperation blacklistOperation = appState.updateBlacklist();
if (blacklistOperation != null) {
blacklistOperation.execute(operationHandler);
}
}
} }
} }

View File

@ -107,4 +107,10 @@ public class AsyncRMOperationHandler extends RMOperationHandler {
public void addContainerRequest(AMRMClient.ContainerRequest req) { public void addContainerRequest(AMRMClient.ContainerRequest req) {
client.addContainerRequest(req); client.addContainerRequest(req);
} }
@Override
public void updateBlacklist(List<String> blacklistAdditions,
List<String> blacklistRemovals) {
client.updateBlacklist(blacklistAdditions, blacklistRemovals);
}
} }

View File

@ -23,6 +23,8 @@ import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.client.api.AMRMClient; import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.slider.providers.ProviderService; import org.apache.slider.providers.ProviderService;
import java.util.List;
public class ProviderNotifyingOperationHandler extends RMOperationHandler { public class ProviderNotifyingOperationHandler extends RMOperationHandler {
private final ProviderService providerService; private final ProviderService providerService;
@ -52,4 +54,10 @@ public class ProviderNotifyingOperationHandler extends RMOperationHandler {
public void cancelSingleRequest(AMRMClient.ContainerRequest request) { public void cancelSingleRequest(AMRMClient.ContainerRequest request) {
providerService.cancelSingleRequest(request); providerService.cancelSingleRequest(request);
} }
@Override
public void updateBlacklist(List<String> blacklistAdditions,
List<String> blacklistRemovals) {
providerService.updateBlacklist(blacklistAdditions, blacklistRemovals);
}
} }

View File

@ -27,25 +27,25 @@ import java.util.List;
public interface RMOperationHandlerActions { public interface RMOperationHandlerActions {
/** /**
* Release an assigned container * Release an assigned container.
* @param containerId container * @param containerId container
*/ */
void releaseAssignedContainer(ContainerId containerId); void releaseAssignedContainer(ContainerId containerId);
/** /**
* Issue a container request * Issue a container request.
* @param request * @param request
*/ */
void addContainerRequest(AMRMClient.ContainerRequest request); void addContainerRequest(AMRMClient.ContainerRequest request);
/** /**
* Cancel a specific request * Cancel a specific request.
* @param request request to cancel * @param request request to cancel
*/ */
void cancelSingleRequest(AMRMClient.ContainerRequest request); void cancelSingleRequest(AMRMClient.ContainerRequest request);
/** /**
* Remove a container request * Remove a container request.
* @param priority1 priority to remove at * @param priority1 priority to remove at
* @param priority2 second priority to target * @param priority2 second priority to target
* @param count number to remove * @param count number to remove
@ -53,7 +53,15 @@ public interface RMOperationHandlerActions {
int cancelContainerRequests(Priority priority1, Priority priority2, int count); int cancelContainerRequests(Priority priority1, Priority priority2, int count);
/** /**
* Execute an entire list of operations * Blacklist resources.
* @param blacklistAdditions resources to add to the blacklist
* @param blacklistRemovals resources to remove from the blacklist
*/
void updateBlacklist(List<String> blacklistAdditions,
List<String> blacklistRemovals);
/**
* Execute an entire list of operations.
* @param operations ops * @param operations ops
*/ */
void execute(List<AbstractRMOperation> operations); void execute(List<AbstractRMOperation> operations);

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.slider.server.appmaster.operations;
import java.util.List;
/**
* Update blacklisted resources for the application.
*/
public class UpdateBlacklistOperation extends AbstractRMOperation {
private final List<String> blacklistAdditions;
private final List<String> blacklistRemovals;
public UpdateBlacklistOperation(List<String> blacklistAdditions,
List<String> blacklistRemovals) {
this.blacklistAdditions = blacklistAdditions;
this.blacklistRemovals = blacklistRemovals;
}
@Override
public void execute(RMOperationHandlerActions handler) {
handler.updateBlacklist(blacklistAdditions, blacklistRemovals);
}
@Override
public String toString() {
return "blacklist additions: " + blacklistAdditions
+ ", blacklist removals: " + blacklistRemovals;
}
}

View File

@ -70,6 +70,7 @@ import org.apache.slider.server.appmaster.management.MetricsConstants;
import org.apache.slider.server.appmaster.operations.AbstractRMOperation; import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation; import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation;
import org.apache.slider.server.appmaster.operations.ContainerRequestOperation; import org.apache.slider.server.appmaster.operations.ContainerRequestOperation;
import org.apache.slider.server.appmaster.operations.UpdateBlacklistOperation;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -1934,6 +1935,15 @@ public class AppState {
return results; return results;
} }
public synchronized AbstractRMOperation updateBlacklist() {
UpdateBlacklistOperation blacklistOperation =
roleHistory.updateBlacklist(getRoleStatusMap().values());
if (blacklistOperation != null) {
log.info("Updating {}", blacklistOperation);
}
return blacklistOperation;
}
/** /**
* Look at where the current node state is -and whether it should be changed * Look at where the current node state is -and whether it should be changed
*/ */
@ -1941,6 +1951,10 @@ public class AppState {
throws SliderInternalStateException, TriggerClusterTeardownException { throws SliderInternalStateException, TriggerClusterTeardownException {
log.debug("in reviewRequestAndReleaseNodes()"); log.debug("in reviewRequestAndReleaseNodes()");
List<AbstractRMOperation> allOperations = new ArrayList<>(); List<AbstractRMOperation> allOperations = new ArrayList<>();
AbstractRMOperation blacklistOperation = updateBlacklist();
if (blacklistOperation != null) {
allOperations.add(blacklistOperation);
}
for (RoleStatus roleStatus : getRoleStatusMap().values()) { for (RoleStatus roleStatus : getRoleStatusMap().values()) {
if (!roleStatus.isExcludeFromFlexing()) { if (!roleStatus.isExcludeFromFlexing()) {
List<AbstractRMOperation> operations = reviewOneRole(roleStatus); List<AbstractRMOperation> operations = reviewOneRole(roleStatus);

View File

@ -42,6 +42,8 @@ public class NodeInstance {
public final String hostname; public final String hostname;
private boolean blacklisted = false;
/** /**
* last state of node. Starts off as {@link NodeState#RUNNING}, * last state of node. Starts off as {@link NodeState#RUNNING},
* on the assumption that it is live. * on the assumption that it is live.
@ -81,6 +83,14 @@ public class NodeInstance {
nodeEntries = new ArrayList<>(roles); nodeEntries = new ArrayList<>(roles);
} }
public synchronized void setBlacklisted(boolean blacklisted) {
this.blacklisted = blacklisted;
}
public boolean isBlacklisted() {
return blacklisted;
}
/** /**
* Update the node status. * Update the node status.
* The return code is true if the node state changed enough to * The return code is true if the node state changed enough to

View File

@ -34,6 +34,7 @@ import org.apache.slider.server.appmaster.management.BoolMetric;
import org.apache.slider.server.appmaster.management.MetricsAndMonitoring; import org.apache.slider.server.appmaster.management.MetricsAndMonitoring;
import org.apache.slider.server.appmaster.management.Timestamp; import org.apache.slider.server.appmaster.management.Timestamp;
import org.apache.slider.server.appmaster.operations.AbstractRMOperation; import org.apache.slider.server.appmaster.operations.AbstractRMOperation;
import org.apache.slider.server.appmaster.operations.UpdateBlacklistOperation;
import org.apache.slider.server.avro.LoadedRoleHistory; import org.apache.slider.server.avro.LoadedRoleHistory;
import org.apache.slider.server.avro.NodeEntryRecord; import org.apache.slider.server.avro.NodeEntryRecord;
import org.apache.slider.server.avro.RoleHistoryHeader; import org.apache.slider.server.avro.RoleHistoryHeader;
@ -49,6 +50,7 @@ import java.util.HashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
/** /**
@ -546,6 +548,38 @@ public class RoleHistory {
} }
} }
public synchronized UpdateBlacklistOperation updateBlacklist(
Collection<RoleStatus> roleStatuses) {
List<String> blacklistAdditions = new ArrayList<>();
List<String> blacklistRemovals = new ArrayList<>();
for (Entry<String, NodeInstance> nodeInstanceEntry : nodemap.entrySet()) {
boolean shouldBeBlacklisted = false;
String nodeHost = nodeInstanceEntry.getKey();
NodeInstance nodeInstance = nodeInstanceEntry.getValue();
for (RoleStatus roleStatus : roleStatuses) {
if (nodeInstance.exceedsFailureThreshold(roleStatus)) {
shouldBeBlacklisted = true;
break;
}
}
if (shouldBeBlacklisted) {
if (!nodeInstance.isBlacklisted()) {
blacklistAdditions.add(nodeHost);
nodeInstance.setBlacklisted(true);
}
} else {
if (nodeInstance.isBlacklisted()) {
blacklistRemovals.add(nodeHost);
nodeInstance.setBlacklisted(false);
}
}
}
if (blacklistAdditions.isEmpty() && blacklistRemovals.isEmpty()) {
return null;
}
return new UpdateBlacklistOperation(blacklistAdditions, blacklistRemovals);
}
/** /**
* Find a node for use * Find a node for use
* @param role role * @param role role