HDFS-16902 Add Namenode status to BPServiceActor metrics and improve logging in offerservice (#5334)
Reviewed-by: Mingliang Liu <liuml07@apache.org> Reviewed-by: Shilun Fan <slfan1989@apache.org> Signed-off-by: Tao Li <tomscut@apache.org>
This commit is contained in:
parent
22f6d55b71
commit
bce388fd3f
|
@ -202,6 +202,7 @@ class BPServiceActor implements Runnable {
|
||||||
Map<String, String> getActorInfoMap() {
|
Map<String, String> getActorInfoMap() {
|
||||||
final Map<String, String> info = new HashMap<String, String>();
|
final Map<String, String> info = new HashMap<String, String>();
|
||||||
info.put("NamenodeAddress", getNameNodeAddress());
|
info.put("NamenodeAddress", getNameNodeAddress());
|
||||||
|
info.put("NamenodeHaState", state != null ? state.toString() : "Unknown");
|
||||||
info.put("BlockPoolID", bpos.getBlockPoolId());
|
info.put("BlockPoolID", bpos.getBlockPoolId());
|
||||||
info.put("ActorState", getRunningState());
|
info.put("ActorState", getRunningState());
|
||||||
info.put("LastHeartbeat",
|
info.put("LastHeartbeat",
|
||||||
|
@ -697,6 +698,8 @@ class BPServiceActor implements Runnable {
|
||||||
// Every so often, send heartbeat or block-report
|
// Every so often, send heartbeat or block-report
|
||||||
//
|
//
|
||||||
final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime);
|
final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime);
|
||||||
|
LOG.debug("BP offer service run start time: {}, sendHeartbeat: {}", startTime,
|
||||||
|
sendHeartbeat);
|
||||||
HeartbeatResponse resp = null;
|
HeartbeatResponse resp = null;
|
||||||
if (sendHeartbeat) {
|
if (sendHeartbeat) {
|
||||||
//
|
//
|
||||||
|
@ -709,6 +712,8 @@ class BPServiceActor implements Runnable {
|
||||||
boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) &&
|
boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) &&
|
||||||
scheduler.isBlockReportDue(startTime);
|
scheduler.isBlockReportDue(startTime);
|
||||||
if (!dn.areHeartbeatsDisabledForTests()) {
|
if (!dn.areHeartbeatsDisabledForTests()) {
|
||||||
|
LOG.debug("Before sending heartbeat to namenode {}, the state of the namenode known"
|
||||||
|
+ " to datanode so far is {}", this.getNameNodeAddress(), state);
|
||||||
resp = sendHeartBeat(requestBlockReportLease);
|
resp = sendHeartBeat(requestBlockReportLease);
|
||||||
assert resp != null;
|
assert resp != null;
|
||||||
if (resp.getFullBlockReportLeaseId() != 0) {
|
if (resp.getFullBlockReportLeaseId() != 0) {
|
||||||
|
@ -733,7 +738,12 @@ class BPServiceActor implements Runnable {
|
||||||
// that we should actually process.
|
// that we should actually process.
|
||||||
bpos.updateActorStatesFromHeartbeat(
|
bpos.updateActorStatesFromHeartbeat(
|
||||||
this, resp.getNameNodeHaState());
|
this, resp.getNameNodeHaState());
|
||||||
state = resp.getNameNodeHaState().getState();
|
HAServiceState stateFromResp = resp.getNameNodeHaState().getState();
|
||||||
|
if (state != stateFromResp) {
|
||||||
|
LOG.info("After receiving heartbeat response, updating state of namenode {} to {}",
|
||||||
|
this.getNameNodeAddress(), stateFromResp);
|
||||||
|
}
|
||||||
|
state = stateFromResp;
|
||||||
|
|
||||||
if (state == HAServiceState.ACTIVE) {
|
if (state == HAServiceState.ACTIVE) {
|
||||||
handleRollingUpgradeStatus(resp);
|
handleRollingUpgradeStatus(resp);
|
||||||
|
@ -794,6 +804,7 @@ class BPServiceActor implements Runnable {
|
||||||
long sleepTime = Math.min(1000, dnConf.heartBeatInterval);
|
long sleepTime = Math.min(1000, dnConf.heartBeatInterval);
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
|
LOG.info("BPServiceActor {} is interrupted", this);
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -995,6 +1006,8 @@ class BPServiceActor implements Runnable {
|
||||||
while (!duplicateQueue.isEmpty()) {
|
while (!duplicateQueue.isEmpty()) {
|
||||||
BPServiceActorAction actionItem = duplicateQueue.remove();
|
BPServiceActorAction actionItem = duplicateQueue.remove();
|
||||||
try {
|
try {
|
||||||
|
LOG.debug("BPServiceActor ( {} ) processing queued messages. Action item: {}", this,
|
||||||
|
actionItem);
|
||||||
actionItem.reportTo(bpNamenode, bpRegistration);
|
actionItem.reportTo(bpNamenode, bpRegistration);
|
||||||
} catch (BPServiceActorActionException baae) {
|
} catch (BPServiceActorActionException baae) {
|
||||||
LOG.warn(baae.getMessage() + nnAddr , baae);
|
LOG.warn(baae.getMessage() + nnAddr , baae);
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.datanode;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.builder.ToStringBuilder;
|
||||||
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.ipc.RemoteException;
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
|
@ -84,4 +85,12 @@ public class ErrorReportAction implements BPServiceActorAction {
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new ToStringBuilder(this)
|
||||||
|
.append("errorCode", errorCode)
|
||||||
|
.append("errorMessage", errorMessage)
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.datanode;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.builder.ToStringBuilder;
|
||||||
import org.apache.hadoop.fs.StorageType;
|
import org.apache.hadoop.fs.StorageType;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder;
|
||||||
|
@ -111,4 +112,13 @@ public class ReportBadBlockAction implements BPServiceActorAction {
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return new ToStringBuilder(this)
|
||||||
|
.append("block", block)
|
||||||
|
.append("storageUuid", storageUuid)
|
||||||
|
.append("storageType", storageType)
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,6 +81,7 @@
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th>Namenode Address</th>
|
<th>Namenode Address</th>
|
||||||
|
<th>Namenode HA State</th>
|
||||||
<th>Block Pool ID</th>
|
<th>Block Pool ID</th>
|
||||||
<th>Actor State</th>
|
<th>Actor State</th>
|
||||||
<th>Last Heartbeat</th>
|
<th>Last Heartbeat</th>
|
||||||
|
@ -91,6 +92,7 @@
|
||||||
{#dn.BPServiceActorInfo}
|
{#dn.BPServiceActorInfo}
|
||||||
<tr>
|
<tr>
|
||||||
<td>{NamenodeAddress}</td>
|
<td>{NamenodeAddress}</td>
|
||||||
|
<td>{NamenodeHaState}</td>
|
||||||
<td>{BlockPoolID}</td>
|
<td>{BlockPoolID}</td>
|
||||||
<td>{ActorState}</td>
|
<td>{ActorState}</td>
|
||||||
<td>{LastHeartbeat}s</td>
|
<td>{LastHeartbeat}s</td>
|
||||||
|
|
Loading…
Reference in New Issue