HDFS-9023. When NN is not able to identify DN for replication, reason behind it can be logged.
This commit is contained in:
parent
d31c9d8c49
commit
5bf7e594d7
|
@ -62,6 +62,28 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
private static final ThreadLocal<HashMap<NodeNotChosenReason, Integer>>
|
||||||
|
CHOOSE_RANDOM_REASONS = ThreadLocal
|
||||||
|
.withInitial(() -> new HashMap<NodeNotChosenReason, Integer>());
|
||||||
|
|
||||||
|
private enum NodeNotChosenReason {
|
||||||
|
NOT_IN_SERVICE("the node isn't in service"),
|
||||||
|
NODE_STALE("the node is stale"),
|
||||||
|
NODE_TOO_BUSY("the node is too busy"),
|
||||||
|
TOO_MANY_NODES_ON_RACK("the rack has too many chosen nodes"),
|
||||||
|
NOT_ENOUGH_STORAGE_SPACE("no enough storage space to place the block");
|
||||||
|
|
||||||
|
private final String text;
|
||||||
|
|
||||||
|
NodeNotChosenReason(final String logText) {
|
||||||
|
text = logText;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getText() {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected boolean considerLoad;
|
protected boolean considerLoad;
|
||||||
protected double considerLoadFactor;
|
protected double considerLoadFactor;
|
||||||
private boolean preferLocalNode;
|
private boolean preferLocalNode;
|
||||||
|
@ -711,6 +733,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
|
||||||
builder.setLength(0);
|
builder.setLength(0);
|
||||||
builder.append("[");
|
builder.append("[");
|
||||||
}
|
}
|
||||||
|
CHOOSE_RANDOM_REASONS.get().clear();
|
||||||
boolean badTarget = false;
|
boolean badTarget = false;
|
||||||
DatanodeStorageInfo firstChosen = null;
|
DatanodeStorageInfo firstChosen = null;
|
||||||
while (numOfReplicas > 0) {
|
while (numOfReplicas > 0) {
|
||||||
|
@ -781,14 +804,24 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
|
||||||
}
|
}
|
||||||
if (numOfReplicas>0) {
|
if (numOfReplicas>0) {
|
||||||
String detail = enableDebugLogging;
|
String detail = enableDebugLogging;
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled() && builder != null) {
|
||||||
if (badTarget && builder != null) {
|
|
||||||
detail = builder.toString();
|
detail = builder.toString();
|
||||||
|
if (badTarget) {
|
||||||
builder.setLength(0);
|
builder.setLength(0);
|
||||||
} else {
|
} else {
|
||||||
|
if (detail.length() > 1) {
|
||||||
|
// only log if there's more than "[", which is always appended at
|
||||||
|
// the beginning of this method.
|
||||||
|
LOG.debug(detail);
|
||||||
|
}
|
||||||
detail = "";
|
detail = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
final HashMap<NodeNotChosenReason, Integer> reasonMap =
|
||||||
|
CHOOSE_RANDOM_REASONS.get();
|
||||||
|
if (!reasonMap.isEmpty()) {
|
||||||
|
LOG.info("Not enough replicas was chosen. Reason:{}", reasonMap);
|
||||||
|
}
|
||||||
throw new NotEnoughReplicasException(detail);
|
throw new NotEnoughReplicasException(detail);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -834,19 +867,38 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
|
||||||
if (storage != null) {
|
if (storage != null) {
|
||||||
results.add(storage);
|
results.add(storage);
|
||||||
} else {
|
} else {
|
||||||
logNodeIsNotChosen(dnd, "no good storage to place the block ");
|
logNodeIsNotChosen(dnd, NodeNotChosenReason.NOT_ENOUGH_STORAGE_SPACE,
|
||||||
|
" for storage type " + storageType);
|
||||||
}
|
}
|
||||||
return storage;
|
return storage;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void logNodeIsNotChosen(DatanodeDescriptor node,
|
private static void logNodeIsNotChosen(DatanodeDescriptor node,
|
||||||
String reason) {
|
NodeNotChosenReason reason) {
|
||||||
|
logNodeIsNotChosen(node, reason, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void logNodeIsNotChosen(DatanodeDescriptor node,
|
||||||
|
NodeNotChosenReason reason, String reasonDetails) {
|
||||||
|
assert reason != null;
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
// build the error message for later use.
|
// build the error message for later use.
|
||||||
debugLoggingBuilder.get()
|
debugLoggingBuilder.get()
|
||||||
.append("\n Datanode ").append(node)
|
.append("\n Datanode ").append(node)
|
||||||
.append(" is not chosen since ").append(reason).append(".");
|
.append(" is not chosen since ").append(reason.getText());
|
||||||
|
if (reasonDetails != null) {
|
||||||
|
debugLoggingBuilder.get().append(" ").append(reasonDetails);
|
||||||
}
|
}
|
||||||
|
debugLoggingBuilder.get().append(".");
|
||||||
|
}
|
||||||
|
// always populate reason map to log high level reasons.
|
||||||
|
final HashMap<NodeNotChosenReason, Integer> reasonMap =
|
||||||
|
CHOOSE_RANDOM_REASONS.get();
|
||||||
|
Integer base = reasonMap.get(reason);
|
||||||
|
if (base == null) {
|
||||||
|
base = 0;
|
||||||
|
}
|
||||||
|
reasonMap.put(reason, base + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -868,13 +920,13 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
|
||||||
boolean avoidStaleNodes) {
|
boolean avoidStaleNodes) {
|
||||||
// check if the node is (being) decommissioned
|
// check if the node is (being) decommissioned
|
||||||
if (!node.isInService()) {
|
if (!node.isInService()) {
|
||||||
logNodeIsNotChosen(node, "the node isn't in service.");
|
logNodeIsNotChosen(node, NodeNotChosenReason.NOT_IN_SERVICE);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (avoidStaleNodes) {
|
if (avoidStaleNodes) {
|
||||||
if (node.isStale(this.staleInterval)) {
|
if (node.isStale(this.staleInterval)) {
|
||||||
logNodeIsNotChosen(node, "the node is stale ");
|
logNodeIsNotChosen(node, NodeNotChosenReason.NODE_STALE);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -885,8 +937,8 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
|
||||||
stats.getInServiceXceiverAverage();
|
stats.getInServiceXceiverAverage();
|
||||||
final int nodeLoad = node.getXceiverCount();
|
final int nodeLoad = node.getXceiverCount();
|
||||||
if (nodeLoad > maxLoad) {
|
if (nodeLoad > maxLoad) {
|
||||||
logNodeIsNotChosen(node, "the node is too busy (load: " + nodeLoad
|
logNodeIsNotChosen(node, NodeNotChosenReason.NODE_TOO_BUSY,
|
||||||
+ " > " + maxLoad + ") ");
|
"(load: " + nodeLoad + " > " + maxLoad + ")");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -901,7 +953,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (counter > maxTargetPerRack) {
|
if (counter > maxTargetPerRack) {
|
||||||
logNodeIsNotChosen(node, "the rack has too many chosen nodes ");
|
logNodeIsNotChosen(node, NodeNotChosenReason.TOO_MANY_NODES_ON_RACK);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -764,7 +764,7 @@ public class DatanodeDescriptor extends DatanodeInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (requiredSize > remaining - scheduledSize) {
|
if (requiredSize > remaining - scheduledSize) {
|
||||||
LOG.debug(
|
BlockPlacementPolicy.LOG.debug(
|
||||||
"The node {} does not have enough {} space (required={},"
|
"The node {} does not have enough {} space (required={},"
|
||||||
+ " scheduled={}, remaining={}).",
|
+ " scheduled={}, remaining={}).",
|
||||||
this, t, requiredSize, scheduledSize, remaining);
|
this, t, requiredSize, scheduledSize, remaining);
|
||||||
|
|
Loading…
Reference in New Issue